Message ID | 20240515082054.3934069-4-hongyu.wang@intel.com |
---|---|
State | New |
Headers | show |
Series | Support Intel APX CCMP | expand |
On Wed, May 15, 2024 at 4:21 PM Hongyu Wang <hongyu.wang@intel.com> wrote: > > The ccmp insn itself doesn't support fp compare, but x86 has fp comi > insn that changes EFLAG which can be the scc input to ccmp. Allow > scalar fp compare in ix86_gen_ccmp_first except ORDERED/UNORDERD > compare which can not be identified in ccmp. Ok if the second patch(middle-end part) is approved. > > gcc/ChangeLog: > > * config/i386/i386-expand.cc (ix86_gen_ccmp_first): Add fp > compare and check the allowed fp compare type. > (ix86_gen_ccmp_next): Adjust compare_code input to ccmp for > fp compare. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/apx-ccmp-1.c: Add test for fp compare. > * gcc.target/i386/apx-ccmp-2.c: Likewise. > --- > gcc/config/i386/i386-expand.cc | 53 ++++++++++++++++++++-- > gcc/testsuite/gcc.target/i386/apx-ccmp-1.c | 45 +++++++++++++++++- > gcc/testsuite/gcc.target/i386/apx-ccmp-2.c | 47 +++++++++++++++++++ > 3 files changed, 138 insertions(+), 7 deletions(-) > > diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc > index f00525e449f..7507034dc91 100644 > --- a/gcc/config/i386/i386-expand.cc > +++ b/gcc/config/i386/i386-expand.cc > @@ -25571,18 +25571,58 @@ ix86_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq, > if (op_mode == VOIDmode) > op_mode = GET_MODE (op1); > > + /* We only supports following scalar comparisons that use just 1 > + instruction: DI/SI/QI/HI/DF/SF/HF. > + Unordered/Ordered compare cannot be corretly indentified by > + ccmp so they are not supported. */ > if (!(op_mode == DImode || op_mode == SImode || op_mode == HImode > - || op_mode == QImode)) > + || op_mode == QImode || op_mode == DFmode || op_mode == SFmode > + || op_mode == HFmode) > + || code == ORDERED > + || code == UNORDERED) > { > end_sequence (); > return NULL_RTX; > } > > /* Canonicalize the operands according to mode. */ > - if (!nonimmediate_operand (op0, op_mode)) > - op0 = force_reg (op_mode, op0); > - if (!x86_64_general_operand (op1, op_mode)) > - op1 = force_reg (op_mode, op1); > + if (SCALAR_INT_MODE_P (op_mode)) > + { > + if (!nonimmediate_operand (op0, op_mode)) > + op0 = force_reg (op_mode, op0); > + if (!x86_64_general_operand (op1, op_mode)) > + op1 = force_reg (op_mode, op1); > + } > + else > + { > + /* op0/op1 can be canonicallized from expand_fp_compare, so > + just adjust the code to make it generate supported fp > + condition. */ > + if (ix86_fp_compare_code_to_integer (code) == UNKNOWN) > + { > + /* First try to split condition if we don't need to honor > + NaNs, as the ORDERED/UNORDERED check always fall > + through. */ > + if (!HONOR_NANS (op_mode)) > + { > + rtx_code first_code; > + split_comparison (code, op_mode, &first_code, &code); > + } > + /* Otherwise try to swap the operand order and check if > + the comparison is supported. */ > + else > + { > + code = swap_condition (code); > + std::swap (op0, op1); > + } > + > + if (ix86_fp_compare_code_to_integer (code) == UNKNOWN) > + { > + end_sequence (); > + return NULL_RTX; > + } > + } > + } > > *prep_seq = get_insns (); > end_sequence (); > @@ -25647,6 +25687,9 @@ ix86_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev, > dfv = ix86_get_flags_cc ((rtx_code) cmp_code); > > prev_code = GET_CODE (prev); > + /* Fixup FP compare code here. */ > + if (GET_MODE (XEXP (prev, 0)) == CCFPmode) > + prev_code = ix86_fp_compare_code_to_integer (prev_code); > > if (bit_code != AND) > prev_code = reverse_condition (prev_code); > diff --git a/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c b/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c > index 5a2dad89f1f..e4e112f07e0 100644 > --- a/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c > +++ b/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c > @@ -1,5 +1,5 @@ > /* { dg-do compile { target { ! ia32 } } } */ > -/* { dg-options "-O2 -mapx-features=ccmp" } */ > +/* { dg-options "-O2 -ffast-math -mapx-features=ccmp" } */ > > int > f1 (int a) > @@ -56,8 +56,49 @@ f9 (int a, int b) > return a == 3 || a == 0; > } > > +int > +f10 (float a, int b, float c) > +{ > + return a > c || b < 19; > +} > + > +int > +f11 (float a, int b) > +{ > + return a == 0.0 && b > 21; > +} > + > +int > +f12 (double a, int b) > +{ > + return a < 3.0 && b != 23; > +} > + > +int > +f13 (double a, double b, int c, int d) > +{ > + a += b; > + c += d; > + return a != b || c == d; > +} > + > +int > +f14 (double a, int b) > +{ > + return b != 0 && a < 1.5; > +} > + > +int > +f15 (double a, double b, int c, int d) > +{ > + return c != d || a <= b; > +} > + > /* { dg-final { scan-assembler-times "ccmpg" 2 } } */ > /* { dg-final { scan-assembler-times "ccmple" 2 } } */ > /* { dg-final { scan-assembler-times "ccmpne" 4 } } */ > -/* { dg-final { scan-assembler-times "ccmpe" 1 } } */ > +/* { dg-final { scan-assembler-times "ccmpe" 3 } } */ > +/* { dg-final { scan-assembler-times "ccmpbe" 1 } } */ > +/* { dg-final { scan-assembler-times "ccmpa" 1 } } */ > +/* { dg-final { scan-assembler-times "ccmpbl" 2 } } */ > > diff --git a/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c b/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c > index 30a1c216c1b..0123a686d2c 100644 > --- a/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c > +++ b/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c > @@ -42,6 +42,47 @@ int foo_noapx(int a, int b, int c, int d) > return sum; > } > > +__attribute__((noinline, noclone, > + optimize(("finite-math-only")), target("apxf"))) > +double foo_fp_apx(int a, double b, int c, double d) > +{ > + int sum = a; > + double sumd = b; > + > + if (a != c) > + { > + sum += a; > + if (a < c || sumd != d || sum > c) > + { > + c += a; > + sum += a + c; > + } > + } > + > + return sum + sumd; > +} > + > +__attribute__((noinline, noclone, > + optimize(("finite-math-only")), target("no-apxf"))) > +double foo_fp_noapx(int a, double b, int c, double d) > +{ > + int sum = a; > + double sumd = b; > + > + if (a != c) > + { > + sum += a; > + if (a < c || sumd != d || sum > c) > + { > + c += a; > + sum += a + c; > + } > + } > + > + return sum + sumd; > +} > + > + > int main (void) > { > if (!__builtin_cpu_supports ("apxf")) > @@ -53,5 +94,11 @@ int main (void) > if (val1 != val2) > __builtin_abort (); > > + double val3 = foo_fp_noapx (24, 7.5, 32, 2.0); > + double val4 = foo_fp_apx (24, 7.5, 32, 2.0); > + > + if (val3 != val4) > + __builtin_abort (); > + > return 0; > } > -- > 2.31.1 >
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index f00525e449f..7507034dc91 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -25571,18 +25571,58 @@ ix86_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq, if (op_mode == VOIDmode) op_mode = GET_MODE (op1); + /* We only supports following scalar comparisons that use just 1 + instruction: DI/SI/QI/HI/DF/SF/HF. + Unordered/Ordered compare cannot be corretly indentified by + ccmp so they are not supported. */ if (!(op_mode == DImode || op_mode == SImode || op_mode == HImode - || op_mode == QImode)) + || op_mode == QImode || op_mode == DFmode || op_mode == SFmode + || op_mode == HFmode) + || code == ORDERED + || code == UNORDERED) { end_sequence (); return NULL_RTX; } /* Canonicalize the operands according to mode. */ - if (!nonimmediate_operand (op0, op_mode)) - op0 = force_reg (op_mode, op0); - if (!x86_64_general_operand (op1, op_mode)) - op1 = force_reg (op_mode, op1); + if (SCALAR_INT_MODE_P (op_mode)) + { + if (!nonimmediate_operand (op0, op_mode)) + op0 = force_reg (op_mode, op0); + if (!x86_64_general_operand (op1, op_mode)) + op1 = force_reg (op_mode, op1); + } + else + { + /* op0/op1 can be canonicallized from expand_fp_compare, so + just adjust the code to make it generate supported fp + condition. */ + if (ix86_fp_compare_code_to_integer (code) == UNKNOWN) + { + /* First try to split condition if we don't need to honor + NaNs, as the ORDERED/UNORDERED check always fall + through. */ + if (!HONOR_NANS (op_mode)) + { + rtx_code first_code; + split_comparison (code, op_mode, &first_code, &code); + } + /* Otherwise try to swap the operand order and check if + the comparison is supported. */ + else + { + code = swap_condition (code); + std::swap (op0, op1); + } + + if (ix86_fp_compare_code_to_integer (code) == UNKNOWN) + { + end_sequence (); + return NULL_RTX; + } + } + } *prep_seq = get_insns (); end_sequence (); @@ -25647,6 +25687,9 @@ ix86_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev, dfv = ix86_get_flags_cc ((rtx_code) cmp_code); prev_code = GET_CODE (prev); + /* Fixup FP compare code here. */ + if (GET_MODE (XEXP (prev, 0)) == CCFPmode) + prev_code = ix86_fp_compare_code_to_integer (prev_code); if (bit_code != AND) prev_code = reverse_condition (prev_code); diff --git a/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c b/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c index 5a2dad89f1f..e4e112f07e0 100644 --- a/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c +++ b/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c @@ -1,5 +1,5 @@ /* { dg-do compile { target { ! ia32 } } } */ -/* { dg-options "-O2 -mapx-features=ccmp" } */ +/* { dg-options "-O2 -ffast-math -mapx-features=ccmp" } */ int f1 (int a) @@ -56,8 +56,49 @@ f9 (int a, int b) return a == 3 || a == 0; } +int +f10 (float a, int b, float c) +{ + return a > c || b < 19; +} + +int +f11 (float a, int b) +{ + return a == 0.0 && b > 21; +} + +int +f12 (double a, int b) +{ + return a < 3.0 && b != 23; +} + +int +f13 (double a, double b, int c, int d) +{ + a += b; + c += d; + return a != b || c == d; +} + +int +f14 (double a, int b) +{ + return b != 0 && a < 1.5; +} + +int +f15 (double a, double b, int c, int d) +{ + return c != d || a <= b; +} + /* { dg-final { scan-assembler-times "ccmpg" 2 } } */ /* { dg-final { scan-assembler-times "ccmple" 2 } } */ /* { dg-final { scan-assembler-times "ccmpne" 4 } } */ -/* { dg-final { scan-assembler-times "ccmpe" 1 } } */ +/* { dg-final { scan-assembler-times "ccmpe" 3 } } */ +/* { dg-final { scan-assembler-times "ccmpbe" 1 } } */ +/* { dg-final { scan-assembler-times "ccmpa" 1 } } */ +/* { dg-final { scan-assembler-times "ccmpbl" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c b/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c index 30a1c216c1b..0123a686d2c 100644 --- a/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c +++ b/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c @@ -42,6 +42,47 @@ int foo_noapx(int a, int b, int c, int d) return sum; } +__attribute__((noinline, noclone, + optimize(("finite-math-only")), target("apxf"))) +double foo_fp_apx(int a, double b, int c, double d) +{ + int sum = a; + double sumd = b; + + if (a != c) + { + sum += a; + if (a < c || sumd != d || sum > c) + { + c += a; + sum += a + c; + } + } + + return sum + sumd; +} + +__attribute__((noinline, noclone, + optimize(("finite-math-only")), target("no-apxf"))) +double foo_fp_noapx(int a, double b, int c, double d) +{ + int sum = a; + double sumd = b; + + if (a != c) + { + sum += a; + if (a < c || sumd != d || sum > c) + { + c += a; + sum += a + c; + } + } + + return sum + sumd; +} + + int main (void) { if (!__builtin_cpu_supports ("apxf")) @@ -53,5 +94,11 @@ int main (void) if (val1 != val2) __builtin_abort (); + double val3 = foo_fp_noapx (24, 7.5, 32, 2.0); + double val4 = foo_fp_apx (24, 7.5, 32, 2.0); + + if (val3 != val4) + __builtin_abort (); + return 0; }