@@ -10664,7 +10664,9 @@ ix86_ssecom_setcc (const enum rtx_code comparison,
rtx_code_label *label = NULL;
/* NB: For ordered EQ or unordered NE, check ZF alone isn't sufficient
- with NAN operands. */
+ with NAN operands.
+ Under TARGET_AVX10_2_256, VCOMX/VUCOMX are generated instead of
+ COMI/UCOMI. VCOMX/VUCOMX will not set ZF for NAN operands. */
if (check_unordered)
{
gcc_assert (comparison == EQ || comparison == NE);
@@ -10703,7 +10705,7 @@ ix86_ssecom_setcc (const enum rtx_code comparison,
static rtx
ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
- rtx target)
+ rtx target, bool comx_ok)
{
rtx pat, set_dst;
tree arg0 = CALL_EXPR_ARG (exp, 0);
@@ -10736,11 +10738,13 @@ ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
case GE:
break;
case EQ:
- check_unordered = true;
+ if (!TARGET_AVX10_2_256 || !comx_ok)
+ check_unordered = true;
mode = CCZmode;
break;
case NE:
- check_unordered = true;
+ if (!TARGET_AVX10_2_256 || !comx_ok)
+ check_unordered = true;
mode = CCZmode;
const_val = const1_rtx;
break;
@@ -10759,6 +10763,28 @@ ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
|| !insn_p->operand[1].predicate (op1, mode1))
op1 = copy_to_mode_reg (mode1, op1);
+ if ((comparison == EQ || comparison == NE)
+ && TARGET_AVX10_2_256 && comx_ok)
+ {
+ switch (icode)
+ {
+ case CODE_FOR_sse_comi:
+ icode = CODE_FOR_avx10_2_comxsf;
+ break;
+ case CODE_FOR_sse_ucomi:
+ icode = CODE_FOR_avx10_2_ucomxsf;
+ break;
+ case CODE_FOR_sse2_comi:
+ icode = CODE_FOR_avx10_2_comxdf;
+ break;
+ case CODE_FOR_sse2_ucomi:
+ icode = CODE_FOR_avx10_2_ucomxdf;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
pat = GEN_FCN (icode) (op0, op1);
if (! pat)
return 0;
@@ -12253,7 +12279,7 @@ ix86_erase_embedded_rounding (rtx pat)
with rounding. */
static rtx
ix86_expand_sse_comi_round (const struct builtin_description *d,
- tree exp, rtx target)
+ tree exp, rtx target, bool comx_ok)
{
rtx pat, set_dst;
tree arg0 = CALL_EXPR_ARG (exp, 0);
@@ -12315,6 +12341,7 @@ ix86_expand_sse_comi_round (const struct builtin_description *d,
op1 = safe_vector_operand (op1, mode1);
enum rtx_code comparison = comparisons[INTVAL (op2)];
+ enum rtx_code orig_comp = comparison;
bool ordered = ordereds[INTVAL (op2)];
bool non_signaling = non_signalings[INTVAL (op2)];
rtx const_val = const0_rtx;
@@ -12326,10 +12353,21 @@ ix86_expand_sse_comi_round (const struct builtin_description *d,
case ORDERED:
if (!ordered)
{
- /* NB: Use CCSmode/NE for _CMP_TRUE_UQ/_CMP_TRUE_US. */
- if (!non_signaling)
- ordered = true;
- mode = CCSmode;
+ if (TARGET_AVX10_2_256 && comx_ok)
+ {
+ /* Unlike VCOMI{SH,SS,SD}, VCOMX{SH,SS,SD} will set SF
+ differently. So directly return true here. */
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const1_rtx);
+ return target;
+ }
+ else
+ {
+ /* NB: Use CCSmode/NE for _CMP_TRUE_UQ/_CMP_TRUE_US. */
+ if (!non_signaling)
+ ordered = true;
+ mode = CCSmode;
+ }
}
else
{
@@ -12343,10 +12381,21 @@ ix86_expand_sse_comi_round (const struct builtin_description *d,
case UNORDERED:
if (ordered)
{
- /* NB: Use CCSmode/EQ for _CMP_FALSE_OQ/_CMP_FALSE_OS. */
- if (non_signaling)
- ordered = false;
- mode = CCSmode;
+ if (TARGET_AVX10_2_256 && comx_ok)
+ {
+ /* Unlike VCOMI{SH,SS,SD}, VCOMX{SH,SS,SD} will set SF
+ differently. So directly return false here. */
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ return target;
+ }
+ else
+ {
+ /* NB: Use CCSmode/EQ for _CMP_FALSE_OQ/_CMP_FALSE_OS. */
+ if (non_signaling)
+ ordered = false;
+ mode = CCSmode;
+ }
}
else
{
@@ -12377,17 +12426,23 @@ ix86_expand_sse_comi_round (const struct builtin_description *d,
if (ordered == non_signaling)
ordered = !ordered;
break;
- case EQ:
/* NB: COMI/UCOMI will set ZF with NAN operands. Use CCZmode for
- _CMP_EQ_OQ/_CMP_EQ_OS. */
- check_unordered = true;
+ _CMP_EQ_OQ/_CMP_EQ_OS.
+ Under TARGET_AVX10_2_256, VCOMX/VUCOMX are always generated instead
+ of COMI/UCOMI, VCOMX/VUCOMX will not set ZF with NAN. */
+ case EQ:
+ if (!TARGET_AVX10_2_256 || !comx_ok)
+ check_unordered = true;
mode = CCZmode;
break;
case NE:
/* NB: COMI/UCOMI will set ZF with NAN operands. Use CCZmode for
- _CMP_NEQ_UQ/_CMP_NEQ_US. */
+ _CMP_NEQ_UQ/_CMP_NEQ_US.
+ Under TARGET_AVX10_2_256, VCOMX/VUCOMX are always generated instead
+ of COMI/UCOMI, VCOMX/VUCOMX will not set ZF with NAN. */
gcc_assert (!ordered);
- check_unordered = true;
+ if (!TARGET_AVX10_2_256 || !comx_ok)
+ check_unordered = true;
mode = CCZmode;
const_val = const1_rtx;
break;
@@ -12406,14 +12461,77 @@ ix86_expand_sse_comi_round (const struct builtin_description *d,
|| !insn_p->operand[1].predicate (op1, mode1))
op1 = copy_to_mode_reg (mode1, op1);
+ /* Generate comx instead of comi when EQ/NE to avoid NAN checks.
+ Use orig_comp to exclude ORDERED/UNORDERED cases. */
+ if ((orig_comp == EQ || orig_comp == NE)
+ && TARGET_AVX10_2_256 && comx_ok)
+ {
+ switch (icode)
+ {
+ case CODE_FOR_avx512fp16_comi_round:
+ icode = CODE_FOR_avx10_2_comxhf_round;
+ break;
+ case CODE_FOR_sse_comi_round:
+ icode = CODE_FOR_avx10_2_comxsf_round;
+ break;
+ case CODE_FOR_sse2_comi_round:
+ icode = CODE_FOR_avx10_2_comxdf_round;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ /* Generate comi instead of comx when UNEQ/LTGT to avoid NAN checks. */
+ if ((comparison == UNEQ || comparison == LTGT)
+ && TARGET_AVX10_2_256 && comx_ok)
+ {
+ switch (icode)
+ {
+ case CODE_FOR_avx10_2_comxhf_round:
+ icode = CODE_FOR_avx512fp16_comi_round;
+ break;
+ case CODE_FOR_avx10_2_comxsf_round:
+ icode = CODE_FOR_sse_comi_round;
+ break;
+ case CODE_FOR_avx10_2_comxdf_round:
+ icode = CODE_FOR_sse2_comi_round;
+ break;
+
+ default:
+ break;
+ }
+ }
+
/*
- 1. COMI: ordered and signaling.
- 2. UCOMI: unordered and non-signaling.
+ 1. COMI/VCOMX: ordered and signaling.
+ 2. UCOMI/VUCOMX: unordered and non-signaling.
*/
if (non_signaling)
- icode = (icode == CODE_FOR_sse_comi_round
- ? CODE_FOR_sse_ucomi_round
- : CODE_FOR_sse2_ucomi_round);
+ switch (icode)
+ {
+ case CODE_FOR_sse_comi_round:
+ icode = CODE_FOR_sse_ucomi_round;
+ break;
+ case CODE_FOR_sse2_comi_round:
+ icode = CODE_FOR_sse2_ucomi_round;
+ break;
+ case CODE_FOR_avx512fp16_comi_round:
+ icode = CODE_FOR_avx512fp16_ucomi_round;
+ break;
+ case CODE_FOR_avx10_2_comxsf_round:
+ icode = CODE_FOR_avx10_2_ucomxsf_round;
+ break;
+ case CODE_FOR_avx10_2_comxhf_round:
+ icode = CODE_FOR_avx10_2_ucomxhf_round;
+ break;
+ case CODE_FOR_avx10_2_comxdf_round:
+ icode = CODE_FOR_avx10_2_ucomxdf_round;
+ break;
+ default:
+ gcc_unreachable ();
+ }
pat = GEN_FCN (icode) (op0, op1, op3);
if (! pat)
@@ -12550,7 +12668,7 @@ ix86_expand_round_builtin (const struct builtin_description *d,
break;
case INT_FTYPE_V4SF_V4SF_INT_INT:
case INT_FTYPE_V2DF_V2DF_INT_INT:
- return ix86_expand_sse_comi_round (d, exp, target);
+ return ix86_expand_sse_comi_round (d, exp, target, true);
case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT:
case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT:
case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT:
@@ -15691,7 +15809,7 @@ rdseed_step:
case IX86_BUILTIN_VCOMSBF16GE:
case IX86_BUILTIN_VCOMSBF16LT:
case IX86_BUILTIN_VCOMSBF16LE:
- return ix86_expand_sse_comi (bdesc_args + i, exp, target);
+ return ix86_expand_sse_comi (bdesc_args + i, exp, target, false);
case IX86_BUILTIN_FABSQ:
case IX86_BUILTIN_COPYSIGNQ:
if (!TARGET_SSE)
@@ -15707,7 +15825,7 @@ rdseed_step:
&& fcode <= IX86_BUILTIN__BDESC_COMI_LAST)
{
i = fcode - IX86_BUILTIN__BDESC_COMI_FIRST;
- return ix86_expand_sse_comi (bdesc_comi + i, exp, target);
+ return ix86_expand_sse_comi (bdesc_comi + i, exp, target, true);
}
if (fcode >= IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST
@@ -139,6 +139,7 @@
UNSPEC_SCALEF
UNSPEC_PCMP
UNSPEC_CVTBFSF
+ UNSPEC_COMX
;; Generic math support
UNSPEC_IEEE_MIN ; not commutative
@@ -4692,6 +4692,22 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<ssescalarmode>")])
+(define_insn "avx10_2_<unord>comx<mode><round_saeonly_name>"
+ [(set (reg:CCFP FLAGS_REG)
+ (unspec:CCFP
+ [(vec_select:MODEFH
+ (match_operand:<ssevecmode> 0 "register_operand" "v")
+ (parallel [(const_int 0)]))
+ (vec_select:MODEFH
+ (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
+ (parallel [(const_int 0)]))]
+ UNSPEC_COMX))]
+ "TARGET_AVX10_2_256"
+ "v<unord>comx<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
+ [(set_attr "type" "ssecomi")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "<sse>_<unord>comi<round_saeonly_name>"
[(set (reg:CCFP FLAGS_REG)
(compare:CCFP
@@ -4701,7 +4717,7 @@
(vec_select:MODEFH
(match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
(parallel [(const_int 0)]))))]
- "SSE_FLOAT_MODE_P (<MODE>mode)"
+ "SSE_FLOAT_MODE_P (<MODE>mode) || <MODE>mode == E_HFmode"
"%v<unord>comi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
[(set_attr "type" "ssecomi")
(set_attr "prefix" "maybe_vex")
new file mode 100644
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-final { scan-assembler-times "vcomxsd\[ \\t\]+\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcomxss\[ \\t\]+\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vucomxsd\[ \\t\]+\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vucomxss\[ \\t\]+\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x3;
+volatile __m128d x4;
+volatile int a;
+
+void extern
+avx10_2_test (void)
+{
+ a = _mm_comi_round_sd (x4, x4, _CMP_EQ_OS, _MM_FROUND_NO_EXC);
+ a = _mm_comi_round_ss (x3, x3, _CMP_NEQ_US, _MM_FROUND_NO_EXC);
+ a = _mm_comi_round_sd (x4, x4, _CMP_EQ_OQ, _MM_FROUND_NO_EXC);
+ a = _mm_comi_round_ss (x3, x3, _CMP_NEQ_UQ, _MM_FROUND_NO_EXC);
+}
From: "Zhang, Jun" <jun.zhang@intel.com> gcc/ChangeLog: * config/i386/i386-expand.cc (ix86_ssecom_setcc): Mention behavior change on flags. (ix86_expand_sse_comi): Handle AVX10.2 behavior. (ix86_expand_sse_comi_round): Ditto. (ix86_expand_round_builtin): Ditto. (ix86_expand_builtin): Change function call. * config/i386/i386.md (UNSPEC_COMX): New unspec. * config/i386/sse.md (avx10_2_v<unord>comx<ssemodesuffix><round_saeonly_name>): New. (<sse>_<unord>comi<round_saeonly_name>): Add HFmode. gcc/testsuite/ChangeLog: * gcc.target/i386/avx10_2-compare-1.c: New test. Co-authored-by: Haochen Jiang <haochen.jiang@intel.com> Co-authored-by: Hongtao Liu <hongtao.liu@intel.com> --- gcc/config/i386/i386-expand.cc | 170 +++++++++++++++--- gcc/config/i386/i386.md | 1 + gcc/config/i386/sse.md | 18 +- .../gcc.target/i386/avx10_2-compare-1.c | 21 +++ 4 files changed, 183 insertions(+), 27 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-compare-1.c