Message ID | 20230711091349.3376586-1-hongtao.liu@intel.com |
---|---|
State | New |
Headers | show |
Series | Add peephole to eliminate redundant comparison after cmpccxadd. | expand |
Ping. On Tue, Jul 11, 2023 at 5:16 PM liuhongt via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > Similar like we did for CMPXCHG, but extended to all > ix86_comparison_int_operator since CMPCCXADD set EFLAGS exactly same > as CMP. > > When operand order in CMP insn is same as that in CMPCCXADD, > CMP insn can be eliminated directly. > > When operand order is swapped in CMP insn, only optimize > cmpccxadd + cmpl + jcc/setcc to cmpccxadd + jcc/setcc when FLAGS_REG is dead > after jcc/setcc plus adjusting code for jcc/setcc. > > gcc/ChangeLog: > > PR target/110591 > * config/i386/sync.md (cmpccxadd_<mode>): Adjust the pattern > to explicitly set FLAGS_REG like *cmp<mode>_1, also add extra > 3 define_peephole2 after the pattern. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/pr110591.c: New test. > * gcc.target/i386/pr110591-2.c: New test. > --- > gcc/config/i386/sync.md | 160 ++++++++++++++++++++- > gcc/testsuite/gcc.target/i386/pr110591-2.c | 90 ++++++++++++ > gcc/testsuite/gcc.target/i386/pr110591.c | 66 +++++++++ > 3 files changed, 315 insertions(+), 1 deletion(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr110591-2.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr110591.c > > diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md > index e1fa1504deb..e84226cf895 100644 > --- a/gcc/config/i386/sync.md > +++ b/gcc/config/i386/sync.md > @@ -1093,7 +1093,9 @@ (define_insn "cmpccxadd_<mode>" > UNSPECV_CMPCCXADD)) > (set (match_dup 1) > (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD)) > - (clobber (reg:CC FLAGS_REG))] > + (set (reg:CC FLAGS_REG) > + (compare:CC (match_dup 1) > + (match_dup 2)))] > "TARGET_CMPCCXADD && TARGET_64BIT" > { > char buf[128]; > @@ -1105,3 +1107,159 @@ (define_insn "cmpccxadd_<mode>" > output_asm_insn (buf, operands); > return ""; > }) > + > +(define_peephole2 > + [(set (match_operand:SWI48x 0 "register_operand") > + (match_operand:SWI48x 1 "x86_64_general_operand")) > + (parallel [(set (match_dup 0) > + (unspec_volatile:SWI48x > + [(match_operand:SWI48x 2 "memory_operand") > + (match_dup 0) > + (match_operand:SWI48x 3 "register_operand") > + (match_operand:SI 4 "const_int_operand")] > + UNSPECV_CMPCCXADD)) > + (set (match_dup 2) > + (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD)) > + (set (reg:CC FLAGS_REG) > + (compare:CC (match_dup 2) > + (match_dup 0)))]) > + (set (reg FLAGS_REG) > + (compare (match_operand:SWI48x 5 "register_operand") > + (match_operand:SWI48x 6 "x86_64_general_operand")))] > + "TARGET_CMPCCXADD && TARGET_64BIT > + && rtx_equal_p (operands[0], operands[5]) > + && rtx_equal_p (operands[1], operands[6])" > + [(set (match_dup 0) > + (match_dup 1)) > + (parallel [(set (match_dup 0) > + (unspec_volatile:SWI48x > + [(match_dup 2) > + (match_dup 0) > + (match_dup 3) > + (match_dup 4)] > + UNSPECV_CMPCCXADD)) > + (set (match_dup 2) > + (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD)) > + (set (reg:CC FLAGS_REG) > + (compare:CC (match_dup 2) > + (match_dup 0)))]) > + (set (match_dup 7) > + (match_op_dup 8 > + [(match_dup 9) (const_int 0)]))]) > + > +(define_peephole2 > + [(set (match_operand:SWI48x 0 "register_operand") > + (match_operand:SWI48x 1 "x86_64_general_operand")) > + (parallel [(set (match_dup 0) > + (unspec_volatile:SWI48x > + [(match_operand:SWI48x 2 "memory_operand") > + (match_dup 0) > + (match_operand:SWI48x 3 "register_operand") > + (match_operand:SI 4 "const_int_operand")] > + UNSPECV_CMPCCXADD)) > + (set (match_dup 2) > + (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD)) > + (set (reg:CC FLAGS_REG) > + (compare:CC (match_dup 2) > + (match_dup 0)))]) > + (set (reg FLAGS_REG) > + (compare (match_operand:SWI48x 5 "register_operand") > + (match_operand:SWI48x 6 "x86_64_general_operand"))) > + (set (match_operand:QI 7 "nonimmediate_operand") > + (match_operator:QI 8 "ix86_comparison_int_operator" > + [(reg FLAGS_REG) (const_int 0)]))] > + "TARGET_CMPCCXADD && TARGET_64BIT > + && rtx_equal_p (operands[0], operands[6]) > + && rtx_equal_p (operands[1], operands[5]) > + && peep2_regno_dead_p (4, FLAGS_REG)" > + [(set (match_dup 0) > + (match_dup 1)) > + (parallel [(set (match_dup 0) > + (unspec_volatile:SWI48x > + [(match_dup 2) > + (match_dup 0) > + (match_dup 3) > + (match_dup 4)] > + UNSPECV_CMPCCXADD)) > + (set (match_dup 2) > + (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD)) > + (set (reg:CC FLAGS_REG) > + (compare:CC (match_dup 2) > + (match_dup 0)))]) > + (set (match_dup 7) > + (match_op_dup 8 > + [(match_dup 9) (const_int 0)]))] > +{ > + operands[9] = gen_rtx_REG (GET_MODE (XEXP (operands[8], 0)), FLAGS_REG); > + if (swap_condition (GET_CODE (operands[8])) != GET_CODE (operands[8])) > + { > + operands[8] = shallow_copy_rtx (operands[8]); > + enum rtx_code ccode = swap_condition (GET_CODE (operands[8])); > + PUT_CODE (operands[8], ccode); > + operands[9] = gen_rtx_REG (SELECT_CC_MODE (ccode, > + operands[6], > + operands[5]), > + FLAGS_REG); > + } > +}) > + > +(define_peephole2 > + [(set (match_operand:SWI48x 0 "register_operand") > + (match_operand:SWI48x 1 "x86_64_general_operand")) > + (parallel [(set (match_dup 0) > + (unspec_volatile:SWI48x > + [(match_operand:SWI48x 2 "memory_operand") > + (match_dup 0) > + (match_operand:SWI48x 3 "register_operand") > + (match_operand:SI 4 "const_int_operand")] > + UNSPECV_CMPCCXADD)) > + (set (match_dup 2) > + (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD)) > + (set (reg:CC FLAGS_REG) > + (compare:CC (match_dup 2) > + (match_dup 0)))]) > + (set (reg FLAGS_REG) > + (compare (match_operand:SWI48x 5 "register_operand") > + (match_operand:SWI48x 6 "x86_64_general_operand"))) > + (set (pc) > + (if_then_else (match_operator 7 "ix86_comparison_int_operator" > + [(reg FLAGS_REG) (const_int 0)]) > + (label_ref (match_operand 8)) > + (pc)))] > + "TARGET_CMPCCXADD && TARGET_64BIT > + && rtx_equal_p (operands[0], operands[6]) > + && rtx_equal_p (operands[1], operands[5]) > + && peep2_regno_dead_p (4, FLAGS_REG)" > + [(set (match_dup 0) > + (match_dup 1)) > + (parallel [(set (match_dup 0) > + (unspec_volatile:SWI48x > + [(match_dup 2) > + (match_dup 0) > + (match_dup 3) > + (match_dup 4)] > + UNSPECV_CMPCCXADD)) > + (set (match_dup 2) > + (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD)) > + (set (reg:CC FLAGS_REG) > + (compare:CC (match_dup 2) > + (match_dup 0)))]) > + (set (pc) > + (if_then_else > + (match_op_dup 7 > + [(match_dup 9) (const_int 0)]) > + (label_ref (match_dup 8)) > + (pc)))] > +{ > + operands[9] = gen_rtx_REG (GET_MODE (XEXP (operands[7], 0)), FLAGS_REG); > + if (swap_condition (GET_CODE (operands[7])) != GET_CODE (operands[7])) > + { > + operands[7] = shallow_copy_rtx (operands[7]); > + enum rtx_code ccode = swap_condition (GET_CODE (operands[7])); > + PUT_CODE (operands[7], ccode); > + operands[9] = gen_rtx_REG (SELECT_CC_MODE (ccode, > + operands[6], > + operands[5]), > + FLAGS_REG); > + } > +}) > diff --git a/gcc/testsuite/gcc.target/i386/pr110591-2.c b/gcc/testsuite/gcc.target/i386/pr110591-2.c > new file mode 100644 > index 00000000000..92ffdb97d62 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr110591-2.c > @@ -0,0 +1,90 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-mcmpccxadd -O2 -fno-if-conversion -fno-if-conversion2" } */ > +/* { dg-final { scan-assembler-not {cmp[lq]?[ \t]+} } } */ > +/* { dg-final { scan-assembler-times {cmpoxadd[ \t]+} 12 } } */ > + > +#include <immintrin.h> > + > +int foo_jg (int *ptr, int v) > +{ > + if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) > v) > + return 100; > + return 200; > +} > + > +int foo_jl (int *ptr, int v) > +{ > + if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) < v) > + return 300; > + return 100; > +} > + > +int foo_je(int *ptr, int v) > +{ > + if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) == v) > + return 123; > + return 134; > +} > + > +int foo_jne(int *ptr, int v) > +{ > + if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) != v) > + return 111; > + return 12; > +} > + > +int foo_jge(int *ptr, int v) > +{ > + if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) >= v) > + return 413; > + return 23; > +} > + > +int foo_jle(int *ptr, int v) > +{ > + if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) <= v) > + return 3141; > + return 341; > +} > + > +int fooq_jg (long long *ptr, long long v) > +{ > + if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) > v) > + return 123; > + return 3; > +} > + > +int fooq_jl (long long *ptr, long long v) > +{ > + if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) < v) > + return 313; > + return 5; > +} > + > +int fooq_je(long long *ptr, long long v) > +{ > + if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) == v) > + return 1313; > + return 13; > +} > + > +int fooq_jne(long long *ptr, long long v) > +{ > + if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) != v) > + return 1314; > + return 132; > +} > + > +int fooq_jge(long long *ptr, long long v) > +{ > + if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) >= v) > + return 14314; > + return 434; > +} > + > +int fooq_jle(long long *ptr, long long v) > +{ > + if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) <= v) > + return 14414; > + return 43; > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr110591.c b/gcc/testsuite/gcc.target/i386/pr110591.c > new file mode 100644 > index 00000000000..32a515b429e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr110591.c > @@ -0,0 +1,66 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-mcmpccxadd -O2" } */ > +/* { dg-final { scan-assembler-not {cmp[lq]?[ \t]+} } } */ > +/* { dg-final { scan-assembler-times {cmpoxadd[ \t]+} 12 } } */ > + > +#include <immintrin.h> > + > +_Bool foo_setg (int *ptr, int v) > +{ > + return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) > v; > +} > + > +_Bool foo_setl (int *ptr, int v) > +{ > + return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) < v; > +} > + > +_Bool foo_sete(int *ptr, int v) > +{ > + return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) == v; > +} > + > +_Bool foo_setne(int *ptr, int v) > +{ > + return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) != v; > +} > + > +_Bool foo_setge(int *ptr, int v) > +{ > + return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) >= v; > +} > + > +_Bool foo_setle(int *ptr, int v) > +{ > + return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) <= v; > +} > + > +_Bool fooq_setg (long long *ptr, long long v) > +{ > + return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) > v; > +} > + > +_Bool fooq_setl (long long *ptr, long long v) > +{ > + return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) < v; > +} > + > +_Bool fooq_sete(long long *ptr, long long v) > +{ > + return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) == v; > +} > + > +_Bool fooq_setne(long long *ptr, long long v) > +{ > + return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) != v; > +} > + > +_Bool fooq_setge(long long *ptr, long long v) > +{ > + return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) >= v; > +} > + > +_Bool fooq_setle(long long *ptr, long long v) > +{ > + return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) <= v; > +} > -- > 2.39.1.388.g2fc9e9ca3c >
On Mon, Jul 17, 2023 at 8:44 AM Hongtao Liu <crazylht@gmail.com> wrote: > > Ping. > > On Tue, Jul 11, 2023 at 5:16 PM liuhongt via Gcc-patches > <gcc-patches@gcc.gnu.org> wrote: > > > > Similar like we did for CMPXCHG, but extended to all > > ix86_comparison_int_operator since CMPCCXADD set EFLAGS exactly same > > as CMP. > > > > When operand order in CMP insn is same as that in CMPCCXADD, > > CMP insn can be eliminated directly. > > > > When operand order is swapped in CMP insn, only optimize > > cmpccxadd + cmpl + jcc/setcc to cmpccxadd + jcc/setcc when FLAGS_REG is dead > > after jcc/setcc plus adjusting code for jcc/setcc. > > > > gcc/ChangeLog: > > > > PR target/110591 > > * config/i386/sync.md (cmpccxadd_<mode>): Adjust the pattern > > to explicitly set FLAGS_REG like *cmp<mode>_1, also add extra > > 3 define_peephole2 after the pattern. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.target/i386/pr110591.c: New test. > > * gcc.target/i386/pr110591-2.c: New test. LGTM. Thanks, Uros. > > --- > > gcc/config/i386/sync.md | 160 ++++++++++++++++++++- > > gcc/testsuite/gcc.target/i386/pr110591-2.c | 90 ++++++++++++ > > gcc/testsuite/gcc.target/i386/pr110591.c | 66 +++++++++ > > 3 files changed, 315 insertions(+), 1 deletion(-) > > create mode 100644 gcc/testsuite/gcc.target/i386/pr110591-2.c > > create mode 100644 gcc/testsuite/gcc.target/i386/pr110591.c > > > > diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md > > index e1fa1504deb..e84226cf895 100644 > > --- a/gcc/config/i386/sync.md > > +++ b/gcc/config/i386/sync.md > > @@ -1093,7 +1093,9 @@ (define_insn "cmpccxadd_<mode>" > > UNSPECV_CMPCCXADD)) > > (set (match_dup 1) > > (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD)) > > - (clobber (reg:CC FLAGS_REG))] > > + (set (reg:CC FLAGS_REG) > > + (compare:CC (match_dup 1) > > + (match_dup 2)))] > > "TARGET_CMPCCXADD && TARGET_64BIT" > > { > > char buf[128]; > > @@ -1105,3 +1107,159 @@ (define_insn "cmpccxadd_<mode>" > > output_asm_insn (buf, operands); > > return ""; > > }) > > + > > +(define_peephole2 > > + [(set (match_operand:SWI48x 0 "register_operand") > > + (match_operand:SWI48x 1 "x86_64_general_operand")) > > + (parallel [(set (match_dup 0) > > + (unspec_volatile:SWI48x > > + [(match_operand:SWI48x 2 "memory_operand") > > + (match_dup 0) > > + (match_operand:SWI48x 3 "register_operand") > > + (match_operand:SI 4 "const_int_operand")] > > + UNSPECV_CMPCCXADD)) > > + (set (match_dup 2) > > + (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD)) > > + (set (reg:CC FLAGS_REG) > > + (compare:CC (match_dup 2) > > + (match_dup 0)))]) > > + (set (reg FLAGS_REG) > > + (compare (match_operand:SWI48x 5 "register_operand") > > + (match_operand:SWI48x 6 "x86_64_general_operand")))] > > + "TARGET_CMPCCXADD && TARGET_64BIT > > + && rtx_equal_p (operands[0], operands[5]) > > + && rtx_equal_p (operands[1], operands[6])" > > + [(set (match_dup 0) > > + (match_dup 1)) > > + (parallel [(set (match_dup 0) > > + (unspec_volatile:SWI48x > > + [(match_dup 2) > > + (match_dup 0) > > + (match_dup 3) > > + (match_dup 4)] > > + UNSPECV_CMPCCXADD)) > > + (set (match_dup 2) > > + (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD)) > > + (set (reg:CC FLAGS_REG) > > + (compare:CC (match_dup 2) > > + (match_dup 0)))]) > > + (set (match_dup 7) > > + (match_op_dup 8 > > + [(match_dup 9) (const_int 0)]))]) > > + > > +(define_peephole2 > > + [(set (match_operand:SWI48x 0 "register_operand") > > + (match_operand:SWI48x 1 "x86_64_general_operand")) > > + (parallel [(set (match_dup 0) > > + (unspec_volatile:SWI48x > > + [(match_operand:SWI48x 2 "memory_operand") > > + (match_dup 0) > > + (match_operand:SWI48x 3 "register_operand") > > + (match_operand:SI 4 "const_int_operand")] > > + UNSPECV_CMPCCXADD)) > > + (set (match_dup 2) > > + (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD)) > > + (set (reg:CC FLAGS_REG) > > + (compare:CC (match_dup 2) > > + (match_dup 0)))]) > > + (set (reg FLAGS_REG) > > + (compare (match_operand:SWI48x 5 "register_operand") > > + (match_operand:SWI48x 6 "x86_64_general_operand"))) > > + (set (match_operand:QI 7 "nonimmediate_operand") > > + (match_operator:QI 8 "ix86_comparison_int_operator" > > + [(reg FLAGS_REG) (const_int 0)]))] > > + "TARGET_CMPCCXADD && TARGET_64BIT > > + && rtx_equal_p (operands[0], operands[6]) > > + && rtx_equal_p (operands[1], operands[5]) > > + && peep2_regno_dead_p (4, FLAGS_REG)" > > + [(set (match_dup 0) > > + (match_dup 1)) > > + (parallel [(set (match_dup 0) > > + (unspec_volatile:SWI48x > > + [(match_dup 2) > > + (match_dup 0) > > + (match_dup 3) > > + (match_dup 4)] > > + UNSPECV_CMPCCXADD)) > > + (set (match_dup 2) > > + (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD)) > > + (set (reg:CC FLAGS_REG) > > + (compare:CC (match_dup 2) > > + (match_dup 0)))]) > > + (set (match_dup 7) > > + (match_op_dup 8 > > + [(match_dup 9) (const_int 0)]))] > > +{ > > + operands[9] = gen_rtx_REG (GET_MODE (XEXP (operands[8], 0)), FLAGS_REG); > > + if (swap_condition (GET_CODE (operands[8])) != GET_CODE (operands[8])) > > + { > > + operands[8] = shallow_copy_rtx (operands[8]); > > + enum rtx_code ccode = swap_condition (GET_CODE (operands[8])); > > + PUT_CODE (operands[8], ccode); > > + operands[9] = gen_rtx_REG (SELECT_CC_MODE (ccode, > > + operands[6], > > + operands[5]), > > + FLAGS_REG); > > + } > > +}) > > + > > +(define_peephole2 > > + [(set (match_operand:SWI48x 0 "register_operand") > > + (match_operand:SWI48x 1 "x86_64_general_operand")) > > + (parallel [(set (match_dup 0) > > + (unspec_volatile:SWI48x > > + [(match_operand:SWI48x 2 "memory_operand") > > + (match_dup 0) > > + (match_operand:SWI48x 3 "register_operand") > > + (match_operand:SI 4 "const_int_operand")] > > + UNSPECV_CMPCCXADD)) > > + (set (match_dup 2) > > + (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD)) > > + (set (reg:CC FLAGS_REG) > > + (compare:CC (match_dup 2) > > + (match_dup 0)))]) > > + (set (reg FLAGS_REG) > > + (compare (match_operand:SWI48x 5 "register_operand") > > + (match_operand:SWI48x 6 "x86_64_general_operand"))) > > + (set (pc) > > + (if_then_else (match_operator 7 "ix86_comparison_int_operator" > > + [(reg FLAGS_REG) (const_int 0)]) > > + (label_ref (match_operand 8)) > > + (pc)))] > > + "TARGET_CMPCCXADD && TARGET_64BIT > > + && rtx_equal_p (operands[0], operands[6]) > > + && rtx_equal_p (operands[1], operands[5]) > > + && peep2_regno_dead_p (4, FLAGS_REG)" > > + [(set (match_dup 0) > > + (match_dup 1)) > > + (parallel [(set (match_dup 0) > > + (unspec_volatile:SWI48x > > + [(match_dup 2) > > + (match_dup 0) > > + (match_dup 3) > > + (match_dup 4)] > > + UNSPECV_CMPCCXADD)) > > + (set (match_dup 2) > > + (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD)) > > + (set (reg:CC FLAGS_REG) > > + (compare:CC (match_dup 2) > > + (match_dup 0)))]) > > + (set (pc) > > + (if_then_else > > + (match_op_dup 7 > > + [(match_dup 9) (const_int 0)]) > > + (label_ref (match_dup 8)) > > + (pc)))] > > +{ > > + operands[9] = gen_rtx_REG (GET_MODE (XEXP (operands[7], 0)), FLAGS_REG); > > + if (swap_condition (GET_CODE (operands[7])) != GET_CODE (operands[7])) > > + { > > + operands[7] = shallow_copy_rtx (operands[7]); > > + enum rtx_code ccode = swap_condition (GET_CODE (operands[7])); > > + PUT_CODE (operands[7], ccode); > > + operands[9] = gen_rtx_REG (SELECT_CC_MODE (ccode, > > + operands[6], > > + operands[5]), > > + FLAGS_REG); > > + } > > +}) > > diff --git a/gcc/testsuite/gcc.target/i386/pr110591-2.c b/gcc/testsuite/gcc.target/i386/pr110591-2.c > > new file mode 100644 > > index 00000000000..92ffdb97d62 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr110591-2.c > > @@ -0,0 +1,90 @@ > > +/* { dg-do compile { target { ! ia32 } } } */ > > +/* { dg-options "-mcmpccxadd -O2 -fno-if-conversion -fno-if-conversion2" } */ > > +/* { dg-final { scan-assembler-not {cmp[lq]?[ \t]+} } } */ > > +/* { dg-final { scan-assembler-times {cmpoxadd[ \t]+} 12 } } */ > > + > > +#include <immintrin.h> > > + > > +int foo_jg (int *ptr, int v) > > +{ > > + if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) > v) > > + return 100; > > + return 200; > > +} > > + > > +int foo_jl (int *ptr, int v) > > +{ > > + if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) < v) > > + return 300; > > + return 100; > > +} > > + > > +int foo_je(int *ptr, int v) > > +{ > > + if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) == v) > > + return 123; > > + return 134; > > +} > > + > > +int foo_jne(int *ptr, int v) > > +{ > > + if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) != v) > > + return 111; > > + return 12; > > +} > > + > > +int foo_jge(int *ptr, int v) > > +{ > > + if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) >= v) > > + return 413; > > + return 23; > > +} > > + > > +int foo_jle(int *ptr, int v) > > +{ > > + if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) <= v) > > + return 3141; > > + return 341; > > +} > > + > > +int fooq_jg (long long *ptr, long long v) > > +{ > > + if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) > v) > > + return 123; > > + return 3; > > +} > > + > > +int fooq_jl (long long *ptr, long long v) > > +{ > > + if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) < v) > > + return 313; > > + return 5; > > +} > > + > > +int fooq_je(long long *ptr, long long v) > > +{ > > + if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) == v) > > + return 1313; > > + return 13; > > +} > > + > > +int fooq_jne(long long *ptr, long long v) > > +{ > > + if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) != v) > > + return 1314; > > + return 132; > > +} > > + > > +int fooq_jge(long long *ptr, long long v) > > +{ > > + if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) >= v) > > + return 14314; > > + return 434; > > +} > > + > > +int fooq_jle(long long *ptr, long long v) > > +{ > > + if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) <= v) > > + return 14414; > > + return 43; > > +} > > diff --git a/gcc/testsuite/gcc.target/i386/pr110591.c b/gcc/testsuite/gcc.target/i386/pr110591.c > > new file mode 100644 > > index 00000000000..32a515b429e > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr110591.c > > @@ -0,0 +1,66 @@ > > +/* { dg-do compile { target { ! ia32 } } } */ > > +/* { dg-options "-mcmpccxadd -O2" } */ > > +/* { dg-final { scan-assembler-not {cmp[lq]?[ \t]+} } } */ > > +/* { dg-final { scan-assembler-times {cmpoxadd[ \t]+} 12 } } */ > > + > > +#include <immintrin.h> > > + > > +_Bool foo_setg (int *ptr, int v) > > +{ > > + return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) > v; > > +} > > + > > +_Bool foo_setl (int *ptr, int v) > > +{ > > + return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) < v; > > +} > > + > > +_Bool foo_sete(int *ptr, int v) > > +{ > > + return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) == v; > > +} > > + > > +_Bool foo_setne(int *ptr, int v) > > +{ > > + return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) != v; > > +} > > + > > +_Bool foo_setge(int *ptr, int v) > > +{ > > + return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) >= v; > > +} > > + > > +_Bool foo_setle(int *ptr, int v) > > +{ > > + return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) <= v; > > +} > > + > > +_Bool fooq_setg (long long *ptr, long long v) > > +{ > > + return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) > v; > > +} > > + > > +_Bool fooq_setl (long long *ptr, long long v) > > +{ > > + return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) < v; > > +} > > + > > +_Bool fooq_sete(long long *ptr, long long v) > > +{ > > + return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) == v; > > +} > > + > > +_Bool fooq_setne(long long *ptr, long long v) > > +{ > > + return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) != v; > > +} > > + > > +_Bool fooq_setge(long long *ptr, long long v) > > +{ > > + return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) >= v; > > +} > > + > > +_Bool fooq_setle(long long *ptr, long long v) > > +{ > > + return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) <= v; > > +} > > -- > > 2.39.1.388.g2fc9e9ca3c > > > > > -- > BR, > Hongtao
diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md index e1fa1504deb..e84226cf895 100644 --- a/gcc/config/i386/sync.md +++ b/gcc/config/i386/sync.md @@ -1093,7 +1093,9 @@ (define_insn "cmpccxadd_<mode>" UNSPECV_CMPCCXADD)) (set (match_dup 1) (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD)) - (clobber (reg:CC FLAGS_REG))] + (set (reg:CC FLAGS_REG) + (compare:CC (match_dup 1) + (match_dup 2)))] "TARGET_CMPCCXADD && TARGET_64BIT" { char buf[128]; @@ -1105,3 +1107,159 @@ (define_insn "cmpccxadd_<mode>" output_asm_insn (buf, operands); return ""; }) + +(define_peephole2 + [(set (match_operand:SWI48x 0 "register_operand") + (match_operand:SWI48x 1 "x86_64_general_operand")) + (parallel [(set (match_dup 0) + (unspec_volatile:SWI48x + [(match_operand:SWI48x 2 "memory_operand") + (match_dup 0) + (match_operand:SWI48x 3 "register_operand") + (match_operand:SI 4 "const_int_operand")] + UNSPECV_CMPCCXADD)) + (set (match_dup 2) + (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD)) + (set (reg:CC FLAGS_REG) + (compare:CC (match_dup 2) + (match_dup 0)))]) + (set (reg FLAGS_REG) + (compare (match_operand:SWI48x 5 "register_operand") + (match_operand:SWI48x 6 "x86_64_general_operand")))] + "TARGET_CMPCCXADD && TARGET_64BIT + && rtx_equal_p (operands[0], operands[5]) + && rtx_equal_p (operands[1], operands[6])" + [(set (match_dup 0) + (match_dup 1)) + (parallel [(set (match_dup 0) + (unspec_volatile:SWI48x + [(match_dup 2) + (match_dup 0) + (match_dup 3) + (match_dup 4)] + UNSPECV_CMPCCXADD)) + (set (match_dup 2) + (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD)) + (set (reg:CC FLAGS_REG) + (compare:CC (match_dup 2) + (match_dup 0)))]) + (set (match_dup 7) + (match_op_dup 8 + [(match_dup 9) (const_int 0)]))]) + +(define_peephole2 + [(set (match_operand:SWI48x 0 "register_operand") + (match_operand:SWI48x 1 "x86_64_general_operand")) + (parallel [(set (match_dup 0) + (unspec_volatile:SWI48x + [(match_operand:SWI48x 2 "memory_operand") + (match_dup 0) + (match_operand:SWI48x 3 "register_operand") + (match_operand:SI 4 "const_int_operand")] + UNSPECV_CMPCCXADD)) + (set (match_dup 2) + (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD)) + (set (reg:CC FLAGS_REG) + (compare:CC (match_dup 2) + (match_dup 0)))]) + (set (reg FLAGS_REG) + (compare (match_operand:SWI48x 5 "register_operand") + (match_operand:SWI48x 6 "x86_64_general_operand"))) + (set (match_operand:QI 7 "nonimmediate_operand") + (match_operator:QI 8 "ix86_comparison_int_operator" + [(reg FLAGS_REG) (const_int 0)]))] + "TARGET_CMPCCXADD && TARGET_64BIT + && rtx_equal_p (operands[0], operands[6]) + && rtx_equal_p (operands[1], operands[5]) + && peep2_regno_dead_p (4, FLAGS_REG)" + [(set (match_dup 0) + (match_dup 1)) + (parallel [(set (match_dup 0) + (unspec_volatile:SWI48x + [(match_dup 2) + (match_dup 0) + (match_dup 3) + (match_dup 4)] + UNSPECV_CMPCCXADD)) + (set (match_dup 2) + (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD)) + (set (reg:CC FLAGS_REG) + (compare:CC (match_dup 2) + (match_dup 0)))]) + (set (match_dup 7) + (match_op_dup 8 + [(match_dup 9) (const_int 0)]))] +{ + operands[9] = gen_rtx_REG (GET_MODE (XEXP (operands[8], 0)), FLAGS_REG); + if (swap_condition (GET_CODE (operands[8])) != GET_CODE (operands[8])) + { + operands[8] = shallow_copy_rtx (operands[8]); + enum rtx_code ccode = swap_condition (GET_CODE (operands[8])); + PUT_CODE (operands[8], ccode); + operands[9] = gen_rtx_REG (SELECT_CC_MODE (ccode, + operands[6], + operands[5]), + FLAGS_REG); + } +}) + +(define_peephole2 + [(set (match_operand:SWI48x 0 "register_operand") + (match_operand:SWI48x 1 "x86_64_general_operand")) + (parallel [(set (match_dup 0) + (unspec_volatile:SWI48x + [(match_operand:SWI48x 2 "memory_operand") + (match_dup 0) + (match_operand:SWI48x 3 "register_operand") + (match_operand:SI 4 "const_int_operand")] + UNSPECV_CMPCCXADD)) + (set (match_dup 2) + (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD)) + (set (reg:CC FLAGS_REG) + (compare:CC (match_dup 2) + (match_dup 0)))]) + (set (reg FLAGS_REG) + (compare (match_operand:SWI48x 5 "register_operand") + (match_operand:SWI48x 6 "x86_64_general_operand"))) + (set (pc) + (if_then_else (match_operator 7 "ix86_comparison_int_operator" + [(reg FLAGS_REG) (const_int 0)]) + (label_ref (match_operand 8)) + (pc)))] + "TARGET_CMPCCXADD && TARGET_64BIT + && rtx_equal_p (operands[0], operands[6]) + && rtx_equal_p (operands[1], operands[5]) + && peep2_regno_dead_p (4, FLAGS_REG)" + [(set (match_dup 0) + (match_dup 1)) + (parallel [(set (match_dup 0) + (unspec_volatile:SWI48x + [(match_dup 2) + (match_dup 0) + (match_dup 3) + (match_dup 4)] + UNSPECV_CMPCCXADD)) + (set (match_dup 2) + (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD)) + (set (reg:CC FLAGS_REG) + (compare:CC (match_dup 2) + (match_dup 0)))]) + (set (pc) + (if_then_else + (match_op_dup 7 + [(match_dup 9) (const_int 0)]) + (label_ref (match_dup 8)) + (pc)))] +{ + operands[9] = gen_rtx_REG (GET_MODE (XEXP (operands[7], 0)), FLAGS_REG); + if (swap_condition (GET_CODE (operands[7])) != GET_CODE (operands[7])) + { + operands[7] = shallow_copy_rtx (operands[7]); + enum rtx_code ccode = swap_condition (GET_CODE (operands[7])); + PUT_CODE (operands[7], ccode); + operands[9] = gen_rtx_REG (SELECT_CC_MODE (ccode, + operands[6], + operands[5]), + FLAGS_REG); + } +}) diff --git a/gcc/testsuite/gcc.target/i386/pr110591-2.c b/gcc/testsuite/gcc.target/i386/pr110591-2.c new file mode 100644 index 00000000000..92ffdb97d62 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr110591-2.c @@ -0,0 +1,90 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mcmpccxadd -O2 -fno-if-conversion -fno-if-conversion2" } */ +/* { dg-final { scan-assembler-not {cmp[lq]?[ \t]+} } } */ +/* { dg-final { scan-assembler-times {cmpoxadd[ \t]+} 12 } } */ + +#include <immintrin.h> + +int foo_jg (int *ptr, int v) +{ + if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) > v) + return 100; + return 200; +} + +int foo_jl (int *ptr, int v) +{ + if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) < v) + return 300; + return 100; +} + +int foo_je(int *ptr, int v) +{ + if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) == v) + return 123; + return 134; +} + +int foo_jne(int *ptr, int v) +{ + if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) != v) + return 111; + return 12; +} + +int foo_jge(int *ptr, int v) +{ + if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) >= v) + return 413; + return 23; +} + +int foo_jle(int *ptr, int v) +{ + if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) <= v) + return 3141; + return 341; +} + +int fooq_jg (long long *ptr, long long v) +{ + if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) > v) + return 123; + return 3; +} + +int fooq_jl (long long *ptr, long long v) +{ + if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) < v) + return 313; + return 5; +} + +int fooq_je(long long *ptr, long long v) +{ + if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) == v) + return 1313; + return 13; +} + +int fooq_jne(long long *ptr, long long v) +{ + if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) != v) + return 1314; + return 132; +} + +int fooq_jge(long long *ptr, long long v) +{ + if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) >= v) + return 14314; + return 434; +} + +int fooq_jle(long long *ptr, long long v) +{ + if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) <= v) + return 14414; + return 43; +} diff --git a/gcc/testsuite/gcc.target/i386/pr110591.c b/gcc/testsuite/gcc.target/i386/pr110591.c new file mode 100644 index 00000000000..32a515b429e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr110591.c @@ -0,0 +1,66 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mcmpccxadd -O2" } */ +/* { dg-final { scan-assembler-not {cmp[lq]?[ \t]+} } } */ +/* { dg-final { scan-assembler-times {cmpoxadd[ \t]+} 12 } } */ + +#include <immintrin.h> + +_Bool foo_setg (int *ptr, int v) +{ + return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) > v; +} + +_Bool foo_setl (int *ptr, int v) +{ + return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) < v; +} + +_Bool foo_sete(int *ptr, int v) +{ + return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) == v; +} + +_Bool foo_setne(int *ptr, int v) +{ + return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) != v; +} + +_Bool foo_setge(int *ptr, int v) +{ + return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) >= v; +} + +_Bool foo_setle(int *ptr, int v) +{ + return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) <= v; +} + +_Bool fooq_setg (long long *ptr, long long v) +{ + return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) > v; +} + +_Bool fooq_setl (long long *ptr, long long v) +{ + return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) < v; +} + +_Bool fooq_sete(long long *ptr, long long v) +{ + return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) == v; +} + +_Bool fooq_setne(long long *ptr, long long v) +{ + return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) != v; +} + +_Bool fooq_setge(long long *ptr, long long v) +{ + return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) >= v; +} + +_Bool fooq_setle(long long *ptr, long long v) +{ + return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) <= v; +}