Message ID | 85f7e36e-4a24-0e9b-ad8e-56f85cabf5b5@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | [v2,rs6000] Use CC for BCD operations [PR100736] | expand |
Hi, Gentle ping this: https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597020.html Thanks. On 22/6/2022 下午 4:26, HAO CHEN GUI wrote: > Hi, > This patch uses CC instead of CCFP for all BCD operations. Thus, infinite > math flag has no impact on BCD operations. To support BCD overflow and > invalid coding, an UNSPEC is defined to move the bit to a general register. > The patterns of condition branch and return with overflow bit are defined as > the UNSPEC and branch/return can be combined to one jump insn. The split > pattern of overflow bit extension is define for optimization. > > This patch also replaces bcdadd with bcdsub for BCD invaliding coding > expand. > > ChangeLog > 2022-06-22 Haochen Gui <guihaoc@linux.ibm.com> > > gcc/ > PR target/100736 > * config/rs6000/altivec.md (BCD_TEST): Remove unordered. > (bcd<bcd_add_sub>_<mode>): Replace CCFP with CC. > (*bcd<bcd_add_sub>_test_<mode>): Replace CCFP with CC. Generate > condition insn with CC mode. > (bcd<bcd_add_sub>_overflow_<mode>): New. > (*bcdoverflow_<mode>): New. > (*bcdinvalid_<mode>): Removed. > (bcdinvalid_<mode>): Implement by UNSPEC_BCDSUB and UNSPEC_BCD_OVERFLOW. > (nuun): New. > (*overflow_cbranch): New. > (*overflow_creturn): New. > (*overflow_extendsidi): New. > (bcdshift_v16qi): Replace CCFP with CC. > (bcdmul10_v16qi): Likewise. > (bcddiv10_v16qi): Likewise. > (peephole for bcd_add/sub): Likewise. > * config/rs6000/rs6000-builtins.def (__builtin_bcdadd_ov_v1ti): Set > pattern to bcdadd_overflow_v1ti. > (__builtin_bcdadd_ov_v16qi): Set pattern to bcdadd_overflow_v16qi. > (__builtin_bcdsub_ov_v1ti): Set pattern to bcdsub_overflow_v1ti. > (__builtin_bcdsub_ov_v16qi): Set pattern to bcdsub_overflow_v16qi. > > gcc/testsuite/ > PR target/100736 > * gcc.target/powerpc/bcd-4.c: Adjust number of bcdadd and bcdsub. > Scan no cror insns. > > patch.diff > diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md > index efc8ae35c2e..26f131e61ea 100644 > --- a/gcc/config/rs6000/altivec.md > +++ b/gcc/config/rs6000/altivec.md > @@ -4370,7 +4370,7 @@ (define_int_iterator UNSPEC_BCD_ADD_SUB [UNSPEC_BCDADD UNSPEC_BCDSUB]) > (define_int_attr bcd_add_sub [(UNSPEC_BCDADD "add") > (UNSPEC_BCDSUB "sub")]) > > -(define_code_iterator BCD_TEST [eq lt le gt ge unordered]) > +(define_code_iterator BCD_TEST [eq lt le gt ge]) > (define_mode_iterator VBCD [V1TI V16QI]) > > (define_insn "bcd<bcd_add_sub>_<mode>" > @@ -4379,7 +4379,7 @@ (define_insn "bcd<bcd_add_sub>_<mode>" > (match_operand:VBCD 2 "register_operand" "v") > (match_operand:QI 3 "const_0_to_1_operand" "n")] > UNSPEC_BCD_ADD_SUB)) > - (clobber (reg:CCFP CR6_REGNO))] > + (clobber (reg:CC CR6_REGNO))] > "TARGET_P8_VECTOR" > "bcd<bcd_add_sub>. %0,%1,%2,%3" > [(set_attr "type" "vecsimple")]) > @@ -4389,9 +4389,9 @@ (define_insn "bcd<bcd_add_sub>_<mode>" > ;; UNORDERED test on an integer type (like V1TImode) is not defined. The type > ;; probably should be one that can go in the VMX (Altivec) registers, so we > ;; can't use DDmode or DFmode. > -(define_insn "*bcd<bcd_add_sub>_test_<mode>" > - [(set (reg:CCFP CR6_REGNO) > - (compare:CCFP > +(define_insn "bcd<bcd_add_sub>_test_<mode>" > + [(set (reg:CC CR6_REGNO) > + (compare:CC > (unspec:V2DF [(match_operand:VBCD 1 "register_operand" "v") > (match_operand:VBCD 2 "register_operand" "v") > (match_operand:QI 3 "const_0_to_1_operand" "i")] > @@ -4408,8 +4408,8 @@ (define_insn "*bcd<bcd_add_sub>_test2_<mode>" > (match_operand:VBCD 2 "register_operand" "v") > (match_operand:QI 3 "const_0_to_1_operand" "i")] > UNSPEC_BCD_ADD_SUB)) > - (set (reg:CCFP CR6_REGNO) > - (compare:CCFP > + (set (reg:CC CR6_REGNO) > + (compare:CC > (unspec:V2DF [(match_dup 1) > (match_dup 2) > (match_dup 3)] > @@ -4502,8 +4502,8 @@ (define_insn "vclrrb" > [(set_attr "type" "vecsimple")]) > > (define_expand "bcd<bcd_add_sub>_<code>_<mode>" > - [(parallel [(set (reg:CCFP CR6_REGNO) > - (compare:CCFP > + [(parallel [(set (reg:CC CR6_REGNO) > + (compare:CC > (unspec:V2DF [(match_operand:VBCD 1 "register_operand") > (match_operand:VBCD 2 "register_operand") > (match_operand:QI 3 "const_0_to_1_operand")] > @@ -4511,46 +4511,138 @@ (define_expand "bcd<bcd_add_sub>_<code>_<mode>" > (match_dup 4))) > (clobber (match_scratch:VBCD 5))]) > (set (match_operand:SI 0 "register_operand") > - (BCD_TEST:SI (reg:CCFP CR6_REGNO) > + (BCD_TEST:SI (reg:CC CR6_REGNO) > (const_int 0)))] > "TARGET_P8_VECTOR" > { > operands[4] = CONST0_RTX (V2DFmode); > + emit_insn (gen_bcd<bcd_add_sub>_test_<mode> (operands[0], operands[1], > + operands[2], operands[3], > + operands[4])); > + > + rtx cr6 = gen_rtx_REG (CCmode, CR6_REGNO); > + rtx condition_rtx = gen_rtx_<CODE> (SImode, cr6, const0_rtx); > + > + if (<CODE> == GE || <CODE> == LE) > + { > + rtx not_result = gen_reg_rtx (CCEQmode); > + rtx not_op, rev_cond_rtx; > + rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (SImode, <CODE>), > + SImode, XEXP (condition_rtx, 0), > + const0_rtx); > + not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx); > + emit_insn (gen_rtx_SET (not_result, not_op)); > + condition_rtx = gen_rtx_EQ (SImode, not_result, const0_rtx); > + } > + > + emit_insn (gen_rtx_SET (operands[0], condition_rtx)); > + DONE; > }) > > -(define_insn "*bcdinvalid_<mode>" > - [(set (reg:CCFP CR6_REGNO) > - (compare:CCFP > - (unspec:V2DF [(match_operand:VBCD 1 "register_operand" "v")] > - UNSPEC_BCDADD) > - (match_operand:V2DF 2 "zero_constant" "j"))) > - (clobber (match_scratch:VBCD 0 "=v"))] > +(define_expand "bcd<bcd_add_sub>_overflow_<mode>" > + [(parallel [(set (reg:CC CR6_REGNO) > + (compare:CC > + (unspec:V2DF [(match_operand:VBCD 1 "register_operand") > + (match_operand:VBCD 2 "register_operand") > + (match_operand:QI 3 "const_0_to_1_operand")] > + UNSPEC_BCD_ADD_SUB) > + (match_dup 4))) > + (clobber (match_scratch:VBCD 5))]) > + (set (match_operand:SI 0 "register_operand") > + (unspec:SI [(reg:CC CR6_REGNO) > + (const_int 0)] > + UNSPEC_BCD_OVERFLOW))] > "TARGET_P8_VECTOR" > - "bcdadd. %0,%1,%1,0" > +{ > + operands[4] = CONST0_RTX (V2DFmode); > +}) > + > +(define_insn "*bcdoverflow_<mode>" > + [(set (match_operand:SDI 0 "register_operand" "=r") > + (unspec:SDI [(reg:CC CR6_REGNO) > + (const_int 0)] > + UNSPEC_BCD_OVERFLOW))] > + "TARGET_P8_VECTOR" > + "mfcr %0,2\;rlwinm %0,%0,28,1" > [(set_attr "type" "vecsimple")]) > > (define_expand "bcdinvalid_<mode>" > - [(parallel [(set (reg:CCFP CR6_REGNO) > - (compare:CCFP > - (unspec:V2DF [(match_operand:VBCD 1 "register_operand")] > - UNSPEC_BCDADD) > + [(parallel [(set (reg:CC CR6_REGNO) > + (compare:CC > + (unspec:V2DF [(match_operand:VBCD 1 "register_operand") > + (match_dup 1) > + (const_int 0)] > + UNSPEC_BCDSUB) > (match_dup 2))) > (clobber (match_scratch:VBCD 3))]) > (set (match_operand:SI 0 "register_operand") > - (unordered:SI (reg:CCFP CR6_REGNO) > - (const_int 0)))] > + (unspec:SI [(reg:CC CR6_REGNO) > + (const_int 0)] > + UNSPEC_BCD_OVERFLOW))] > "TARGET_P8_VECTOR" > { > operands[2] = CONST0_RTX (V2DFmode); > }) > > +(define_code_attr nuun [(eq "nu") > + (ne "un")]) > + > +(define_insn "*overflow_cbranch" > + [(set (pc) > + (if_then_else (eqne > + (unspec:SI [(reg:CC CR6_REGNO) > + (const_int 0)] > + UNSPEC_BCD_OVERFLOW) > + (const_int 0)) > + (label_ref (match_operand 0)) > + (pc)))] > + "TARGET_P8_VECTOR" > + "b<nuun> 6,%l0" > + [(set_attr "type" "branch") > + (set (attr "length") > + (if_then_else (and (ge (minus (match_dup 0) (pc)) > + (const_int -32768)) > + (lt (minus (match_dup 0) (pc)) > + (const_int 32764))) > + (const_int 4) > + (const_int 8)))]) > + > +(define_insn "*overflow_creturn" > + [(set (pc) > + (if_then_else (eqne > + (unspec:SI [(reg:CC CR6_REGNO) > + (const_int 0)] > + UNSPEC_BCD_OVERFLOW) > + (const_int 0)) > + (simple_return) > + (pc)))] > + "TARGET_P8_VECTOR" > + "b<nuun>lr 6" > + [(set_attr "type" "jmpreg")]) > + > +(define_insn_and_split "*overflow_extendsidi" > + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") > + (sign_extend:DI > + (unspec:SI [(reg:CC CR6_REGNO) > + (const_int 0)] > + UNSPEC_BCD_OVERFLOW)))] > + "TARGET_P8_VECTOR" > + "#" > + "&& 1" > + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") > + (unspec:DI [(reg:CC CR6_REGNO) > + (const_int 0)] > + UNSPEC_BCD_OVERFLOW))] > + "" > + [(set_attr "type" "vecsimple")]) > + > (define_insn "bcdshift_v16qi" > [(set (match_operand:V16QI 0 "register_operand" "=v") > (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") > (match_operand:V16QI 2 "register_operand" "v") > (match_operand:QI 3 "const_0_to_1_operand" "n")] > UNSPEC_BCDSHIFT)) > - (clobber (reg:CCFP CR6_REGNO))] > + (clobber (reg:CC CR6_REGNO))] > "TARGET_P8_VECTOR" > "bcds. %0,%1,%2,%3" > [(set_attr "type" "vecsimple")]) > @@ -4559,7 +4651,7 @@ (define_expand "bcdmul10_v16qi" > [(set (match_operand:V16QI 0 "register_operand") > (unspec:V16QI [(match_operand:V16QI 1 "register_operand")] > UNSPEC_BCDSHIFT)) > - (clobber (reg:CCFP CR6_REGNO))] > + (clobber (reg:CC CR6_REGNO))] > "TARGET_P9_VECTOR" > { > rtx one = gen_reg_rtx (V16QImode); > @@ -4574,7 +4666,7 @@ (define_expand "bcddiv10_v16qi" > [(set (match_operand:V16QI 0 "register_operand") > (unspec:V16QI [(match_operand:V16QI 1 "register_operand")] > UNSPEC_BCDSHIFT)) > - (clobber (reg:CCFP CR6_REGNO))] > + (clobber (reg:CC CR6_REGNO))] > "TARGET_P9_VECTOR" > { > rtx one = gen_reg_rtx (V16QImode); > @@ -4598,9 +4690,9 @@ (define_peephole2 > (match_operand:V1TI 2 "register_operand") > (match_operand:QI 3 "const_0_to_1_operand")] > UNSPEC_BCD_ADD_SUB)) > - (clobber (reg:CCFP CR6_REGNO))]) > - (parallel [(set (reg:CCFP CR6_REGNO) > - (compare:CCFP > + (clobber (reg:CC CR6_REGNO))]) > + (parallel [(set (reg:CC CR6_REGNO) > + (compare:CC > (unspec:V2DF [(match_dup 1) > (match_dup 2) > (match_dup 3)] > @@ -4613,8 +4705,8 @@ (define_peephole2 > (match_dup 2) > (match_dup 3)] > UNSPEC_BCD_ADD_SUB)) > - (set (reg:CCFP CR6_REGNO) > - (compare:CCFP > + (set (reg:CC CR6_REGNO) > + (compare:CC > (unspec:V2DF [(match_dup 1) > (match_dup 2) > (match_dup 3)] > diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def > index f4a9f24bcc5..8e94fe5c438 100644 > --- a/gcc/config/rs6000/rs6000-builtins.def > +++ b/gcc/config/rs6000/rs6000-builtins.def > @@ -2371,10 +2371,10 @@ > BCDADD_LT_V16QI bcdadd_lt_v16qi {} > > const signed int __builtin_bcdadd_ov_v1ti (vsq, vsq, const int<1>); > - BCDADD_OV_V1TI bcdadd_unordered_v1ti {} > + BCDADD_OV_V1TI bcdadd_overflow_v1ti {} > > const signed int __builtin_bcdadd_ov_v16qi (vsc, vsc, const int<1>); > - BCDADD_OV_V16QI bcdadd_unordered_v16qi {} > + BCDADD_OV_V16QI bcdadd_overflow_v16qi {} > > const signed int __builtin_bcdinvalid_v1ti (vsq); > BCDINVALID_V1TI bcdinvalid_v1ti {} > @@ -2419,10 +2419,10 @@ > BCDSUB_LT_V16QI bcdsub_lt_v16qi {} > > const signed int __builtin_bcdsub_ov_v1ti (vsq, vsq, const int<1>); > - BCDSUB_OV_V1TI bcdsub_unordered_v1ti {} > + BCDSUB_OV_V1TI bcdsub_overflow_v1ti {} > > const signed int __builtin_bcdsub_ov_v16qi (vsc, vsc, const int<1>); > - BCDSUB_OV_V16QI bcdsub_unordered_v16qi {} > + BCDSUB_OV_V16QI bcdsub_overflow_v16qi {} > > const vuc __builtin_crypto_vpermxor_v16qi (vuc, vuc, vuc); > VPERMXOR_V16QI crypto_vpermxor_v16qi {} > diff --git a/gcc/testsuite/gcc.target/powerpc/bcd-4.c b/gcc/testsuite/gcc.target/powerpc/bcd-4.c > index 2c8554dfe82..3c25ed60e17 100644 > --- a/gcc/testsuite/gcc.target/powerpc/bcd-4.c > +++ b/gcc/testsuite/gcc.target/powerpc/bcd-4.c > @@ -2,10 +2,11 @@ > /* { dg-require-effective-target int128 } */ > /* { dg-require-effective-target power10_hw } */ > /* { dg-options "-mdejagnu-cpu=power10 -O2 -save-temps" } */ > -/* { dg-final { scan-assembler-times {\mbcdadd\M} 7 } } */ > -/* { dg-final { scan-assembler-times {\mbcdsub\M} 18 } } */ > +/* { dg-final { scan-assembler-times {\mbcdadd\M} 5 } } */ > +/* { dg-final { scan-assembler-times {\mbcdsub\M} 20 } } */ > /* { dg-final { scan-assembler-times {\mbcds\M} 2 } } */ > /* { dg-final { scan-assembler-times {\mdenbcdq\M} 1 } } */ > +/* { dg-final { scan-assembler-not {\mcror\M} 1 } } */ > > #include <altivec.h> >
Hi, Gentle ping this: https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597020.html Thanks. On 4/7/2022 下午 2:33, HAO CHEN GUI wrote: > Hi, > Gentle ping this: > https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597020.html > Thanks. > > On 22/6/2022 下午 4:26, HAO CHEN GUI wrote: >> Hi, >> This patch uses CC instead of CCFP for all BCD operations. Thus, infinite >> math flag has no impact on BCD operations. To support BCD overflow and >> invalid coding, an UNSPEC is defined to move the bit to a general register. >> The patterns of condition branch and return with overflow bit are defined as >> the UNSPEC and branch/return can be combined to one jump insn. The split >> pattern of overflow bit extension is define for optimization. >> >> This patch also replaces bcdadd with bcdsub for BCD invaliding coding >> expand. >> >> ChangeLog >> 2022-06-22 Haochen Gui <guihaoc@linux.ibm.com> >> >> gcc/ >> PR target/100736 >> * config/rs6000/altivec.md (BCD_TEST): Remove unordered. >> (bcd<bcd_add_sub>_<mode>): Replace CCFP with CC. >> (*bcd<bcd_add_sub>_test_<mode>): Replace CCFP with CC. Generate >> condition insn with CC mode. >> (bcd<bcd_add_sub>_overflow_<mode>): New. >> (*bcdoverflow_<mode>): New. >> (*bcdinvalid_<mode>): Removed. >> (bcdinvalid_<mode>): Implement by UNSPEC_BCDSUB and UNSPEC_BCD_OVERFLOW. >> (nuun): New. >> (*overflow_cbranch): New. >> (*overflow_creturn): New. >> (*overflow_extendsidi): New. >> (bcdshift_v16qi): Replace CCFP with CC. >> (bcdmul10_v16qi): Likewise. >> (bcddiv10_v16qi): Likewise. >> (peephole for bcd_add/sub): Likewise. >> * config/rs6000/rs6000-builtins.def (__builtin_bcdadd_ov_v1ti): Set >> pattern to bcdadd_overflow_v1ti. >> (__builtin_bcdadd_ov_v16qi): Set pattern to bcdadd_overflow_v16qi. >> (__builtin_bcdsub_ov_v1ti): Set pattern to bcdsub_overflow_v1ti. >> (__builtin_bcdsub_ov_v16qi): Set pattern to bcdsub_overflow_v16qi. >> >> gcc/testsuite/ >> PR target/100736 >> * gcc.target/powerpc/bcd-4.c: Adjust number of bcdadd and bcdsub. >> Scan no cror insns. >> >> patch.diff >> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md >> index efc8ae35c2e..26f131e61ea 100644 >> --- a/gcc/config/rs6000/altivec.md >> +++ b/gcc/config/rs6000/altivec.md >> @@ -4370,7 +4370,7 @@ (define_int_iterator UNSPEC_BCD_ADD_SUB [UNSPEC_BCDADD UNSPEC_BCDSUB]) >> (define_int_attr bcd_add_sub [(UNSPEC_BCDADD "add") >> (UNSPEC_BCDSUB "sub")]) >> >> -(define_code_iterator BCD_TEST [eq lt le gt ge unordered]) >> +(define_code_iterator BCD_TEST [eq lt le gt ge]) >> (define_mode_iterator VBCD [V1TI V16QI]) >> >> (define_insn "bcd<bcd_add_sub>_<mode>" >> @@ -4379,7 +4379,7 @@ (define_insn "bcd<bcd_add_sub>_<mode>" >> (match_operand:VBCD 2 "register_operand" "v") >> (match_operand:QI 3 "const_0_to_1_operand" "n")] >> UNSPEC_BCD_ADD_SUB)) >> - (clobber (reg:CCFP CR6_REGNO))] >> + (clobber (reg:CC CR6_REGNO))] >> "TARGET_P8_VECTOR" >> "bcd<bcd_add_sub>. %0,%1,%2,%3" >> [(set_attr "type" "vecsimple")]) >> @@ -4389,9 +4389,9 @@ (define_insn "bcd<bcd_add_sub>_<mode>" >> ;; UNORDERED test on an integer type (like V1TImode) is not defined. The type >> ;; probably should be one that can go in the VMX (Altivec) registers, so we >> ;; can't use DDmode or DFmode. >> -(define_insn "*bcd<bcd_add_sub>_test_<mode>" >> - [(set (reg:CCFP CR6_REGNO) >> - (compare:CCFP >> +(define_insn "bcd<bcd_add_sub>_test_<mode>" >> + [(set (reg:CC CR6_REGNO) >> + (compare:CC >> (unspec:V2DF [(match_operand:VBCD 1 "register_operand" "v") >> (match_operand:VBCD 2 "register_operand" "v") >> (match_operand:QI 3 "const_0_to_1_operand" "i")] >> @@ -4408,8 +4408,8 @@ (define_insn "*bcd<bcd_add_sub>_test2_<mode>" >> (match_operand:VBCD 2 "register_operand" "v") >> (match_operand:QI 3 "const_0_to_1_operand" "i")] >> UNSPEC_BCD_ADD_SUB)) >> - (set (reg:CCFP CR6_REGNO) >> - (compare:CCFP >> + (set (reg:CC CR6_REGNO) >> + (compare:CC >> (unspec:V2DF [(match_dup 1) >> (match_dup 2) >> (match_dup 3)] >> @@ -4502,8 +4502,8 @@ (define_insn "vclrrb" >> [(set_attr "type" "vecsimple")]) >> >> (define_expand "bcd<bcd_add_sub>_<code>_<mode>" >> - [(parallel [(set (reg:CCFP CR6_REGNO) >> - (compare:CCFP >> + [(parallel [(set (reg:CC CR6_REGNO) >> + (compare:CC >> (unspec:V2DF [(match_operand:VBCD 1 "register_operand") >> (match_operand:VBCD 2 "register_operand") >> (match_operand:QI 3 "const_0_to_1_operand")] >> @@ -4511,46 +4511,138 @@ (define_expand "bcd<bcd_add_sub>_<code>_<mode>" >> (match_dup 4))) >> (clobber (match_scratch:VBCD 5))]) >> (set (match_operand:SI 0 "register_operand") >> - (BCD_TEST:SI (reg:CCFP CR6_REGNO) >> + (BCD_TEST:SI (reg:CC CR6_REGNO) >> (const_int 0)))] >> "TARGET_P8_VECTOR" >> { >> operands[4] = CONST0_RTX (V2DFmode); >> + emit_insn (gen_bcd<bcd_add_sub>_test_<mode> (operands[0], operands[1], >> + operands[2], operands[3], >> + operands[4])); >> + >> + rtx cr6 = gen_rtx_REG (CCmode, CR6_REGNO); >> + rtx condition_rtx = gen_rtx_<CODE> (SImode, cr6, const0_rtx); >> + >> + if (<CODE> == GE || <CODE> == LE) >> + { >> + rtx not_result = gen_reg_rtx (CCEQmode); >> + rtx not_op, rev_cond_rtx; >> + rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (SImode, <CODE>), >> + SImode, XEXP (condition_rtx, 0), >> + const0_rtx); >> + not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx); >> + emit_insn (gen_rtx_SET (not_result, not_op)); >> + condition_rtx = gen_rtx_EQ (SImode, not_result, const0_rtx); >> + } >> + >> + emit_insn (gen_rtx_SET (operands[0], condition_rtx)); >> + DONE; >> }) >> >> -(define_insn "*bcdinvalid_<mode>" >> - [(set (reg:CCFP CR6_REGNO) >> - (compare:CCFP >> - (unspec:V2DF [(match_operand:VBCD 1 "register_operand" "v")] >> - UNSPEC_BCDADD) >> - (match_operand:V2DF 2 "zero_constant" "j"))) >> - (clobber (match_scratch:VBCD 0 "=v"))] >> +(define_expand "bcd<bcd_add_sub>_overflow_<mode>" >> + [(parallel [(set (reg:CC CR6_REGNO) >> + (compare:CC >> + (unspec:V2DF [(match_operand:VBCD 1 "register_operand") >> + (match_operand:VBCD 2 "register_operand") >> + (match_operand:QI 3 "const_0_to_1_operand")] >> + UNSPEC_BCD_ADD_SUB) >> + (match_dup 4))) >> + (clobber (match_scratch:VBCD 5))]) >> + (set (match_operand:SI 0 "register_operand") >> + (unspec:SI [(reg:CC CR6_REGNO) >> + (const_int 0)] >> + UNSPEC_BCD_OVERFLOW))] >> "TARGET_P8_VECTOR" >> - "bcdadd. %0,%1,%1,0" >> +{ >> + operands[4] = CONST0_RTX (V2DFmode); >> +}) >> + >> +(define_insn "*bcdoverflow_<mode>" >> + [(set (match_operand:SDI 0 "register_operand" "=r") >> + (unspec:SDI [(reg:CC CR6_REGNO) >> + (const_int 0)] >> + UNSPEC_BCD_OVERFLOW))] >> + "TARGET_P8_VECTOR" >> + "mfcr %0,2\;rlwinm %0,%0,28,1" >> [(set_attr "type" "vecsimple")]) >> >> (define_expand "bcdinvalid_<mode>" >> - [(parallel [(set (reg:CCFP CR6_REGNO) >> - (compare:CCFP >> - (unspec:V2DF [(match_operand:VBCD 1 "register_operand")] >> - UNSPEC_BCDADD) >> + [(parallel [(set (reg:CC CR6_REGNO) >> + (compare:CC >> + (unspec:V2DF [(match_operand:VBCD 1 "register_operand") >> + (match_dup 1) >> + (const_int 0)] >> + UNSPEC_BCDSUB) >> (match_dup 2))) >> (clobber (match_scratch:VBCD 3))]) >> (set (match_operand:SI 0 "register_operand") >> - (unordered:SI (reg:CCFP CR6_REGNO) >> - (const_int 0)))] >> + (unspec:SI [(reg:CC CR6_REGNO) >> + (const_int 0)] >> + UNSPEC_BCD_OVERFLOW))] >> "TARGET_P8_VECTOR" >> { >> operands[2] = CONST0_RTX (V2DFmode); >> }) >> >> +(define_code_attr nuun [(eq "nu") >> + (ne "un")]) >> + >> +(define_insn "*overflow_cbranch" >> + [(set (pc) >> + (if_then_else (eqne >> + (unspec:SI [(reg:CC CR6_REGNO) >> + (const_int 0)] >> + UNSPEC_BCD_OVERFLOW) >> + (const_int 0)) >> + (label_ref (match_operand 0)) >> + (pc)))] >> + "TARGET_P8_VECTOR" >> + "b<nuun> 6,%l0" >> + [(set_attr "type" "branch") >> + (set (attr "length") >> + (if_then_else (and (ge (minus (match_dup 0) (pc)) >> + (const_int -32768)) >> + (lt (minus (match_dup 0) (pc)) >> + (const_int 32764))) >> + (const_int 4) >> + (const_int 8)))]) >> + >> +(define_insn "*overflow_creturn" >> + [(set (pc) >> + (if_then_else (eqne >> + (unspec:SI [(reg:CC CR6_REGNO) >> + (const_int 0)] >> + UNSPEC_BCD_OVERFLOW) >> + (const_int 0)) >> + (simple_return) >> + (pc)))] >> + "TARGET_P8_VECTOR" >> + "b<nuun>lr 6" >> + [(set_attr "type" "jmpreg")]) >> + >> +(define_insn_and_split "*overflow_extendsidi" >> + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") >> + (sign_extend:DI >> + (unspec:SI [(reg:CC CR6_REGNO) >> + (const_int 0)] >> + UNSPEC_BCD_OVERFLOW)))] >> + "TARGET_P8_VECTOR" >> + "#" >> + "&& 1" >> + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") >> + (unspec:DI [(reg:CC CR6_REGNO) >> + (const_int 0)] >> + UNSPEC_BCD_OVERFLOW))] >> + "" >> + [(set_attr "type" "vecsimple")]) >> + >> (define_insn "bcdshift_v16qi" >> [(set (match_operand:V16QI 0 "register_operand" "=v") >> (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") >> (match_operand:V16QI 2 "register_operand" "v") >> (match_operand:QI 3 "const_0_to_1_operand" "n")] >> UNSPEC_BCDSHIFT)) >> - (clobber (reg:CCFP CR6_REGNO))] >> + (clobber (reg:CC CR6_REGNO))] >> "TARGET_P8_VECTOR" >> "bcds. %0,%1,%2,%3" >> [(set_attr "type" "vecsimple")]) >> @@ -4559,7 +4651,7 @@ (define_expand "bcdmul10_v16qi" >> [(set (match_operand:V16QI 0 "register_operand") >> (unspec:V16QI [(match_operand:V16QI 1 "register_operand")] >> UNSPEC_BCDSHIFT)) >> - (clobber (reg:CCFP CR6_REGNO))] >> + (clobber (reg:CC CR6_REGNO))] >> "TARGET_P9_VECTOR" >> { >> rtx one = gen_reg_rtx (V16QImode); >> @@ -4574,7 +4666,7 @@ (define_expand "bcddiv10_v16qi" >> [(set (match_operand:V16QI 0 "register_operand") >> (unspec:V16QI [(match_operand:V16QI 1 "register_operand")] >> UNSPEC_BCDSHIFT)) >> - (clobber (reg:CCFP CR6_REGNO))] >> + (clobber (reg:CC CR6_REGNO))] >> "TARGET_P9_VECTOR" >> { >> rtx one = gen_reg_rtx (V16QImode); >> @@ -4598,9 +4690,9 @@ (define_peephole2 >> (match_operand:V1TI 2 "register_operand") >> (match_operand:QI 3 "const_0_to_1_operand")] >> UNSPEC_BCD_ADD_SUB)) >> - (clobber (reg:CCFP CR6_REGNO))]) >> - (parallel [(set (reg:CCFP CR6_REGNO) >> - (compare:CCFP >> + (clobber (reg:CC CR6_REGNO))]) >> + (parallel [(set (reg:CC CR6_REGNO) >> + (compare:CC >> (unspec:V2DF [(match_dup 1) >> (match_dup 2) >> (match_dup 3)] >> @@ -4613,8 +4705,8 @@ (define_peephole2 >> (match_dup 2) >> (match_dup 3)] >> UNSPEC_BCD_ADD_SUB)) >> - (set (reg:CCFP CR6_REGNO) >> - (compare:CCFP >> + (set (reg:CC CR6_REGNO) >> + (compare:CC >> (unspec:V2DF [(match_dup 1) >> (match_dup 2) >> (match_dup 3)] >> diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def >> index f4a9f24bcc5..8e94fe5c438 100644 >> --- a/gcc/config/rs6000/rs6000-builtins.def >> +++ b/gcc/config/rs6000/rs6000-builtins.def >> @@ -2371,10 +2371,10 @@ >> BCDADD_LT_V16QI bcdadd_lt_v16qi {} >> >> const signed int __builtin_bcdadd_ov_v1ti (vsq, vsq, const int<1>); >> - BCDADD_OV_V1TI bcdadd_unordered_v1ti {} >> + BCDADD_OV_V1TI bcdadd_overflow_v1ti {} >> >> const signed int __builtin_bcdadd_ov_v16qi (vsc, vsc, const int<1>); >> - BCDADD_OV_V16QI bcdadd_unordered_v16qi {} >> + BCDADD_OV_V16QI bcdadd_overflow_v16qi {} >> >> const signed int __builtin_bcdinvalid_v1ti (vsq); >> BCDINVALID_V1TI bcdinvalid_v1ti {} >> @@ -2419,10 +2419,10 @@ >> BCDSUB_LT_V16QI bcdsub_lt_v16qi {} >> >> const signed int __builtin_bcdsub_ov_v1ti (vsq, vsq, const int<1>); >> - BCDSUB_OV_V1TI bcdsub_unordered_v1ti {} >> + BCDSUB_OV_V1TI bcdsub_overflow_v1ti {} >> >> const signed int __builtin_bcdsub_ov_v16qi (vsc, vsc, const int<1>); >> - BCDSUB_OV_V16QI bcdsub_unordered_v16qi {} >> + BCDSUB_OV_V16QI bcdsub_overflow_v16qi {} >> >> const vuc __builtin_crypto_vpermxor_v16qi (vuc, vuc, vuc); >> VPERMXOR_V16QI crypto_vpermxor_v16qi {} >> diff --git a/gcc/testsuite/gcc.target/powerpc/bcd-4.c b/gcc/testsuite/gcc.target/powerpc/bcd-4.c >> index 2c8554dfe82..3c25ed60e17 100644 >> --- a/gcc/testsuite/gcc.target/powerpc/bcd-4.c >> +++ b/gcc/testsuite/gcc.target/powerpc/bcd-4.c >> @@ -2,10 +2,11 @@ >> /* { dg-require-effective-target int128 } */ >> /* { dg-require-effective-target power10_hw } */ >> /* { dg-options "-mdejagnu-cpu=power10 -O2 -save-temps" } */ >> -/* { dg-final { scan-assembler-times {\mbcdadd\M} 7 } } */ >> -/* { dg-final { scan-assembler-times {\mbcdsub\M} 18 } } */ >> +/* { dg-final { scan-assembler-times {\mbcdadd\M} 5 } } */ >> +/* { dg-final { scan-assembler-times {\mbcdsub\M} 20 } } */ >> /* { dg-final { scan-assembler-times {\mbcds\M} 2 } } */ >> /* { dg-final { scan-assembler-times {\mdenbcdq\M} 1 } } */ >> +/* { dg-final { scan-assembler-not {\mcror\M} 1 } } */ >> >> #include <altivec.h> >>
Hi, Gentle ping this: https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597020.html Thanks. On 1/8/2022 上午 10:02, HAO CHEN GUI wrote: > Hi, > Gentle ping this: > https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597020.html > Thanks. > > On 4/7/2022 下午 2:33, HAO CHEN GUI wrote: >> Hi, >> Gentle ping this: >> https://gcc.gnu.org/pipermail/gcc-patches/2022-June/597020.html >> Thanks. >> >> On 22/6/2022 下午 4:26, HAO CHEN GUI wrote: >>> Hi, >>> This patch uses CC instead of CCFP for all BCD operations. Thus, infinite >>> math flag has no impact on BCD operations. To support BCD overflow and >>> invalid coding, an UNSPEC is defined to move the bit to a general register. >>> The patterns of condition branch and return with overflow bit are defined as >>> the UNSPEC and branch/return can be combined to one jump insn. The split >>> pattern of overflow bit extension is define for optimization. >>> >>> This patch also replaces bcdadd with bcdsub for BCD invaliding coding >>> expand. >>> >>> ChangeLog >>> 2022-06-22 Haochen Gui <guihaoc@linux.ibm.com> >>> >>> gcc/ >>> PR target/100736 >>> * config/rs6000/altivec.md (BCD_TEST): Remove unordered. >>> (bcd<bcd_add_sub>_<mode>): Replace CCFP with CC. >>> (*bcd<bcd_add_sub>_test_<mode>): Replace CCFP with CC. Generate >>> condition insn with CC mode. >>> (bcd<bcd_add_sub>_overflow_<mode>): New. >>> (*bcdoverflow_<mode>): New. >>> (*bcdinvalid_<mode>): Removed. >>> (bcdinvalid_<mode>): Implement by UNSPEC_BCDSUB and UNSPEC_BCD_OVERFLOW. >>> (nuun): New. >>> (*overflow_cbranch): New. >>> (*overflow_creturn): New. >>> (*overflow_extendsidi): New. >>> (bcdshift_v16qi): Replace CCFP with CC. >>> (bcdmul10_v16qi): Likewise. >>> (bcddiv10_v16qi): Likewise. >>> (peephole for bcd_add/sub): Likewise. >>> * config/rs6000/rs6000-builtins.def (__builtin_bcdadd_ov_v1ti): Set >>> pattern to bcdadd_overflow_v1ti. >>> (__builtin_bcdadd_ov_v16qi): Set pattern to bcdadd_overflow_v16qi. >>> (__builtin_bcdsub_ov_v1ti): Set pattern to bcdsub_overflow_v1ti. >>> (__builtin_bcdsub_ov_v16qi): Set pattern to bcdsub_overflow_v16qi. >>> >>> gcc/testsuite/ >>> PR target/100736 >>> * gcc.target/powerpc/bcd-4.c: Adjust number of bcdadd and bcdsub. >>> Scan no cror insns. >>> >>> patch.diff >>> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md >>> index efc8ae35c2e..26f131e61ea 100644 >>> --- a/gcc/config/rs6000/altivec.md >>> +++ b/gcc/config/rs6000/altivec.md >>> @@ -4370,7 +4370,7 @@ (define_int_iterator UNSPEC_BCD_ADD_SUB [UNSPEC_BCDADD UNSPEC_BCDSUB]) >>> (define_int_attr bcd_add_sub [(UNSPEC_BCDADD "add") >>> (UNSPEC_BCDSUB "sub")]) >>> >>> -(define_code_iterator BCD_TEST [eq lt le gt ge unordered]) >>> +(define_code_iterator BCD_TEST [eq lt le gt ge]) >>> (define_mode_iterator VBCD [V1TI V16QI]) >>> >>> (define_insn "bcd<bcd_add_sub>_<mode>" >>> @@ -4379,7 +4379,7 @@ (define_insn "bcd<bcd_add_sub>_<mode>" >>> (match_operand:VBCD 2 "register_operand" "v") >>> (match_operand:QI 3 "const_0_to_1_operand" "n")] >>> UNSPEC_BCD_ADD_SUB)) >>> - (clobber (reg:CCFP CR6_REGNO))] >>> + (clobber (reg:CC CR6_REGNO))] >>> "TARGET_P8_VECTOR" >>> "bcd<bcd_add_sub>. %0,%1,%2,%3" >>> [(set_attr "type" "vecsimple")]) >>> @@ -4389,9 +4389,9 @@ (define_insn "bcd<bcd_add_sub>_<mode>" >>> ;; UNORDERED test on an integer type (like V1TImode) is not defined. The type >>> ;; probably should be one that can go in the VMX (Altivec) registers, so we >>> ;; can't use DDmode or DFmode. >>> -(define_insn "*bcd<bcd_add_sub>_test_<mode>" >>> - [(set (reg:CCFP CR6_REGNO) >>> - (compare:CCFP >>> +(define_insn "bcd<bcd_add_sub>_test_<mode>" >>> + [(set (reg:CC CR6_REGNO) >>> + (compare:CC >>> (unspec:V2DF [(match_operand:VBCD 1 "register_operand" "v") >>> (match_operand:VBCD 2 "register_operand" "v") >>> (match_operand:QI 3 "const_0_to_1_operand" "i")] >>> @@ -4408,8 +4408,8 @@ (define_insn "*bcd<bcd_add_sub>_test2_<mode>" >>> (match_operand:VBCD 2 "register_operand" "v") >>> (match_operand:QI 3 "const_0_to_1_operand" "i")] >>> UNSPEC_BCD_ADD_SUB)) >>> - (set (reg:CCFP CR6_REGNO) >>> - (compare:CCFP >>> + (set (reg:CC CR6_REGNO) >>> + (compare:CC >>> (unspec:V2DF [(match_dup 1) >>> (match_dup 2) >>> (match_dup 3)] >>> @@ -4502,8 +4502,8 @@ (define_insn "vclrrb" >>> [(set_attr "type" "vecsimple")]) >>> >>> (define_expand "bcd<bcd_add_sub>_<code>_<mode>" >>> - [(parallel [(set (reg:CCFP CR6_REGNO) >>> - (compare:CCFP >>> + [(parallel [(set (reg:CC CR6_REGNO) >>> + (compare:CC >>> (unspec:V2DF [(match_operand:VBCD 1 "register_operand") >>> (match_operand:VBCD 2 "register_operand") >>> (match_operand:QI 3 "const_0_to_1_operand")] >>> @@ -4511,46 +4511,138 @@ (define_expand "bcd<bcd_add_sub>_<code>_<mode>" >>> (match_dup 4))) >>> (clobber (match_scratch:VBCD 5))]) >>> (set (match_operand:SI 0 "register_operand") >>> - (BCD_TEST:SI (reg:CCFP CR6_REGNO) >>> + (BCD_TEST:SI (reg:CC CR6_REGNO) >>> (const_int 0)))] >>> "TARGET_P8_VECTOR" >>> { >>> operands[4] = CONST0_RTX (V2DFmode); >>> + emit_insn (gen_bcd<bcd_add_sub>_test_<mode> (operands[0], operands[1], >>> + operands[2], operands[3], >>> + operands[4])); >>> + >>> + rtx cr6 = gen_rtx_REG (CCmode, CR6_REGNO); >>> + rtx condition_rtx = gen_rtx_<CODE> (SImode, cr6, const0_rtx); >>> + >>> + if (<CODE> == GE || <CODE> == LE) >>> + { >>> + rtx not_result = gen_reg_rtx (CCEQmode); >>> + rtx not_op, rev_cond_rtx; >>> + rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (SImode, <CODE>), >>> + SImode, XEXP (condition_rtx, 0), >>> + const0_rtx); >>> + not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx); >>> + emit_insn (gen_rtx_SET (not_result, not_op)); >>> + condition_rtx = gen_rtx_EQ (SImode, not_result, const0_rtx); >>> + } >>> + >>> + emit_insn (gen_rtx_SET (operands[0], condition_rtx)); >>> + DONE; >>> }) >>> >>> -(define_insn "*bcdinvalid_<mode>" >>> - [(set (reg:CCFP CR6_REGNO) >>> - (compare:CCFP >>> - (unspec:V2DF [(match_operand:VBCD 1 "register_operand" "v")] >>> - UNSPEC_BCDADD) >>> - (match_operand:V2DF 2 "zero_constant" "j"))) >>> - (clobber (match_scratch:VBCD 0 "=v"))] >>> +(define_expand "bcd<bcd_add_sub>_overflow_<mode>" >>> + [(parallel [(set (reg:CC CR6_REGNO) >>> + (compare:CC >>> + (unspec:V2DF [(match_operand:VBCD 1 "register_operand") >>> + (match_operand:VBCD 2 "register_operand") >>> + (match_operand:QI 3 "const_0_to_1_operand")] >>> + UNSPEC_BCD_ADD_SUB) >>> + (match_dup 4))) >>> + (clobber (match_scratch:VBCD 5))]) >>> + (set (match_operand:SI 0 "register_operand") >>> + (unspec:SI [(reg:CC CR6_REGNO) >>> + (const_int 0)] >>> + UNSPEC_BCD_OVERFLOW))] >>> "TARGET_P8_VECTOR" >>> - "bcdadd. %0,%1,%1,0" >>> +{ >>> + operands[4] = CONST0_RTX (V2DFmode); >>> +}) >>> + >>> +(define_insn "*bcdoverflow_<mode>" >>> + [(set (match_operand:SDI 0 "register_operand" "=r") >>> + (unspec:SDI [(reg:CC CR6_REGNO) >>> + (const_int 0)] >>> + UNSPEC_BCD_OVERFLOW))] >>> + "TARGET_P8_VECTOR" >>> + "mfcr %0,2\;rlwinm %0,%0,28,1" >>> [(set_attr "type" "vecsimple")]) >>> >>> (define_expand "bcdinvalid_<mode>" >>> - [(parallel [(set (reg:CCFP CR6_REGNO) >>> - (compare:CCFP >>> - (unspec:V2DF [(match_operand:VBCD 1 "register_operand")] >>> - UNSPEC_BCDADD) >>> + [(parallel [(set (reg:CC CR6_REGNO) >>> + (compare:CC >>> + (unspec:V2DF [(match_operand:VBCD 1 "register_operand") >>> + (match_dup 1) >>> + (const_int 0)] >>> + UNSPEC_BCDSUB) >>> (match_dup 2))) >>> (clobber (match_scratch:VBCD 3))]) >>> (set (match_operand:SI 0 "register_operand") >>> - (unordered:SI (reg:CCFP CR6_REGNO) >>> - (const_int 0)))] >>> + (unspec:SI [(reg:CC CR6_REGNO) >>> + (const_int 0)] >>> + UNSPEC_BCD_OVERFLOW))] >>> "TARGET_P8_VECTOR" >>> { >>> operands[2] = CONST0_RTX (V2DFmode); >>> }) >>> >>> +(define_code_attr nuun [(eq "nu") >>> + (ne "un")]) >>> + >>> +(define_insn "*overflow_cbranch" >>> + [(set (pc) >>> + (if_then_else (eqne >>> + (unspec:SI [(reg:CC CR6_REGNO) >>> + (const_int 0)] >>> + UNSPEC_BCD_OVERFLOW) >>> + (const_int 0)) >>> + (label_ref (match_operand 0)) >>> + (pc)))] >>> + "TARGET_P8_VECTOR" >>> + "b<nuun> 6,%l0" >>> + [(set_attr "type" "branch") >>> + (set (attr "length") >>> + (if_then_else (and (ge (minus (match_dup 0) (pc)) >>> + (const_int -32768)) >>> + (lt (minus (match_dup 0) (pc)) >>> + (const_int 32764))) >>> + (const_int 4) >>> + (const_int 8)))]) >>> + >>> +(define_insn "*overflow_creturn" >>> + [(set (pc) >>> + (if_then_else (eqne >>> + (unspec:SI [(reg:CC CR6_REGNO) >>> + (const_int 0)] >>> + UNSPEC_BCD_OVERFLOW) >>> + (const_int 0)) >>> + (simple_return) >>> + (pc)))] >>> + "TARGET_P8_VECTOR" >>> + "b<nuun>lr 6" >>> + [(set_attr "type" "jmpreg")]) >>> + >>> +(define_insn_and_split "*overflow_extendsidi" >>> + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") >>> + (sign_extend:DI >>> + (unspec:SI [(reg:CC CR6_REGNO) >>> + (const_int 0)] >>> + UNSPEC_BCD_OVERFLOW)))] >>> + "TARGET_P8_VECTOR" >>> + "#" >>> + "&& 1" >>> + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") >>> + (unspec:DI [(reg:CC CR6_REGNO) >>> + (const_int 0)] >>> + UNSPEC_BCD_OVERFLOW))] >>> + "" >>> + [(set_attr "type" "vecsimple")]) >>> + >>> (define_insn "bcdshift_v16qi" >>> [(set (match_operand:V16QI 0 "register_operand" "=v") >>> (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") >>> (match_operand:V16QI 2 "register_operand" "v") >>> (match_operand:QI 3 "const_0_to_1_operand" "n")] >>> UNSPEC_BCDSHIFT)) >>> - (clobber (reg:CCFP CR6_REGNO))] >>> + (clobber (reg:CC CR6_REGNO))] >>> "TARGET_P8_VECTOR" >>> "bcds. %0,%1,%2,%3" >>> [(set_attr "type" "vecsimple")]) >>> @@ -4559,7 +4651,7 @@ (define_expand "bcdmul10_v16qi" >>> [(set (match_operand:V16QI 0 "register_operand") >>> (unspec:V16QI [(match_operand:V16QI 1 "register_operand")] >>> UNSPEC_BCDSHIFT)) >>> - (clobber (reg:CCFP CR6_REGNO))] >>> + (clobber (reg:CC CR6_REGNO))] >>> "TARGET_P9_VECTOR" >>> { >>> rtx one = gen_reg_rtx (V16QImode); >>> @@ -4574,7 +4666,7 @@ (define_expand "bcddiv10_v16qi" >>> [(set (match_operand:V16QI 0 "register_operand") >>> (unspec:V16QI [(match_operand:V16QI 1 "register_operand")] >>> UNSPEC_BCDSHIFT)) >>> - (clobber (reg:CCFP CR6_REGNO))] >>> + (clobber (reg:CC CR6_REGNO))] >>> "TARGET_P9_VECTOR" >>> { >>> rtx one = gen_reg_rtx (V16QImode); >>> @@ -4598,9 +4690,9 @@ (define_peephole2 >>> (match_operand:V1TI 2 "register_operand") >>> (match_operand:QI 3 "const_0_to_1_operand")] >>> UNSPEC_BCD_ADD_SUB)) >>> - (clobber (reg:CCFP CR6_REGNO))]) >>> - (parallel [(set (reg:CCFP CR6_REGNO) >>> - (compare:CCFP >>> + (clobber (reg:CC CR6_REGNO))]) >>> + (parallel [(set (reg:CC CR6_REGNO) >>> + (compare:CC >>> (unspec:V2DF [(match_dup 1) >>> (match_dup 2) >>> (match_dup 3)] >>> @@ -4613,8 +4705,8 @@ (define_peephole2 >>> (match_dup 2) >>> (match_dup 3)] >>> UNSPEC_BCD_ADD_SUB)) >>> - (set (reg:CCFP CR6_REGNO) >>> - (compare:CCFP >>> + (set (reg:CC CR6_REGNO) >>> + (compare:CC >>> (unspec:V2DF [(match_dup 1) >>> (match_dup 2) >>> (match_dup 3)] >>> diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def >>> index f4a9f24bcc5..8e94fe5c438 100644 >>> --- a/gcc/config/rs6000/rs6000-builtins.def >>> +++ b/gcc/config/rs6000/rs6000-builtins.def >>> @@ -2371,10 +2371,10 @@ >>> BCDADD_LT_V16QI bcdadd_lt_v16qi {} >>> >>> const signed int __builtin_bcdadd_ov_v1ti (vsq, vsq, const int<1>); >>> - BCDADD_OV_V1TI bcdadd_unordered_v1ti {} >>> + BCDADD_OV_V1TI bcdadd_overflow_v1ti {} >>> >>> const signed int __builtin_bcdadd_ov_v16qi (vsc, vsc, const int<1>); >>> - BCDADD_OV_V16QI bcdadd_unordered_v16qi {} >>> + BCDADD_OV_V16QI bcdadd_overflow_v16qi {} >>> >>> const signed int __builtin_bcdinvalid_v1ti (vsq); >>> BCDINVALID_V1TI bcdinvalid_v1ti {} >>> @@ -2419,10 +2419,10 @@ >>> BCDSUB_LT_V16QI bcdsub_lt_v16qi {} >>> >>> const signed int __builtin_bcdsub_ov_v1ti (vsq, vsq, const int<1>); >>> - BCDSUB_OV_V1TI bcdsub_unordered_v1ti {} >>> + BCDSUB_OV_V1TI bcdsub_overflow_v1ti {} >>> >>> const signed int __builtin_bcdsub_ov_v16qi (vsc, vsc, const int<1>); >>> - BCDSUB_OV_V16QI bcdsub_unordered_v16qi {} >>> + BCDSUB_OV_V16QI bcdsub_overflow_v16qi {} >>> >>> const vuc __builtin_crypto_vpermxor_v16qi (vuc, vuc, vuc); >>> VPERMXOR_V16QI crypto_vpermxor_v16qi {} >>> diff --git a/gcc/testsuite/gcc.target/powerpc/bcd-4.c b/gcc/testsuite/gcc.target/powerpc/bcd-4.c >>> index 2c8554dfe82..3c25ed60e17 100644 >>> --- a/gcc/testsuite/gcc.target/powerpc/bcd-4.c >>> +++ b/gcc/testsuite/gcc.target/powerpc/bcd-4.c >>> @@ -2,10 +2,11 @@ >>> /* { dg-require-effective-target int128 } */ >>> /* { dg-require-effective-target power10_hw } */ >>> /* { dg-options "-mdejagnu-cpu=power10 -O2 -save-temps" } */ >>> -/* { dg-final { scan-assembler-times {\mbcdadd\M} 7 } } */ >>> -/* { dg-final { scan-assembler-times {\mbcdsub\M} 18 } } */ >>> +/* { dg-final { scan-assembler-times {\mbcdadd\M} 5 } } */ >>> +/* { dg-final { scan-assembler-times {\mbcdsub\M} 20 } } */ >>> /* { dg-final { scan-assembler-times {\mbcds\M} 2 } } */ >>> /* { dg-final { scan-assembler-times {\mdenbcdq\M} 1 } } */ >>> +/* { dg-final { scan-assembler-not {\mcror\M} } } */ >>> >>> #include <altivec.h> >>>
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index efc8ae35c2e..26f131e61ea 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -4370,7 +4370,7 @@ (define_int_iterator UNSPEC_BCD_ADD_SUB [UNSPEC_BCDADD UNSPEC_BCDSUB]) (define_int_attr bcd_add_sub [(UNSPEC_BCDADD "add") (UNSPEC_BCDSUB "sub")]) -(define_code_iterator BCD_TEST [eq lt le gt ge unordered]) +(define_code_iterator BCD_TEST [eq lt le gt ge]) (define_mode_iterator VBCD [V1TI V16QI]) (define_insn "bcd<bcd_add_sub>_<mode>" @@ -4379,7 +4379,7 @@ (define_insn "bcd<bcd_add_sub>_<mode>" (match_operand:VBCD 2 "register_operand" "v") (match_operand:QI 3 "const_0_to_1_operand" "n")] UNSPEC_BCD_ADD_SUB)) - (clobber (reg:CCFP CR6_REGNO))] + (clobber (reg:CC CR6_REGNO))] "TARGET_P8_VECTOR" "bcd<bcd_add_sub>. %0,%1,%2,%3" [(set_attr "type" "vecsimple")]) @@ -4389,9 +4389,9 @@ (define_insn "bcd<bcd_add_sub>_<mode>" ;; UNORDERED test on an integer type (like V1TImode) is not defined. The type ;; probably should be one that can go in the VMX (Altivec) registers, so we ;; can't use DDmode or DFmode. -(define_insn "*bcd<bcd_add_sub>_test_<mode>" - [(set (reg:CCFP CR6_REGNO) - (compare:CCFP +(define_insn "bcd<bcd_add_sub>_test_<mode>" + [(set (reg:CC CR6_REGNO) + (compare:CC (unspec:V2DF [(match_operand:VBCD 1 "register_operand" "v") (match_operand:VBCD 2 "register_operand" "v") (match_operand:QI 3 "const_0_to_1_operand" "i")] @@ -4408,8 +4408,8 @@ (define_insn "*bcd<bcd_add_sub>_test2_<mode>" (match_operand:VBCD 2 "register_operand" "v") (match_operand:QI 3 "const_0_to_1_operand" "i")] UNSPEC_BCD_ADD_SUB)) - (set (reg:CCFP CR6_REGNO) - (compare:CCFP + (set (reg:CC CR6_REGNO) + (compare:CC (unspec:V2DF [(match_dup 1) (match_dup 2) (match_dup 3)] @@ -4502,8 +4502,8 @@ (define_insn "vclrrb" [(set_attr "type" "vecsimple")]) (define_expand "bcd<bcd_add_sub>_<code>_<mode>" - [(parallel [(set (reg:CCFP CR6_REGNO) - (compare:CCFP + [(parallel [(set (reg:CC CR6_REGNO) + (compare:CC (unspec:V2DF [(match_operand:VBCD 1 "register_operand") (match_operand:VBCD 2 "register_operand") (match_operand:QI 3 "const_0_to_1_operand")] @@ -4511,46 +4511,138 @@ (define_expand "bcd<bcd_add_sub>_<code>_<mode>" (match_dup 4))) (clobber (match_scratch:VBCD 5))]) (set (match_operand:SI 0 "register_operand") - (BCD_TEST:SI (reg:CCFP CR6_REGNO) + (BCD_TEST:SI (reg:CC CR6_REGNO) (const_int 0)))] "TARGET_P8_VECTOR" { operands[4] = CONST0_RTX (V2DFmode); + emit_insn (gen_bcd<bcd_add_sub>_test_<mode> (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + + rtx cr6 = gen_rtx_REG (CCmode, CR6_REGNO); + rtx condition_rtx = gen_rtx_<CODE> (SImode, cr6, const0_rtx); + + if (<CODE> == GE || <CODE> == LE) + { + rtx not_result = gen_reg_rtx (CCEQmode); + rtx not_op, rev_cond_rtx; + rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (SImode, <CODE>), + SImode, XEXP (condition_rtx, 0), + const0_rtx); + not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx); + emit_insn (gen_rtx_SET (not_result, not_op)); + condition_rtx = gen_rtx_EQ (SImode, not_result, const0_rtx); + } + + emit_insn (gen_rtx_SET (operands[0], condition_rtx)); + DONE; }) -(define_insn "*bcdinvalid_<mode>" - [(set (reg:CCFP CR6_REGNO) - (compare:CCFP - (unspec:V2DF [(match_operand:VBCD 1 "register_operand" "v")] - UNSPEC_BCDADD) - (match_operand:V2DF 2 "zero_constant" "j"))) - (clobber (match_scratch:VBCD 0 "=v"))] +(define_expand "bcd<bcd_add_sub>_overflow_<mode>" + [(parallel [(set (reg:CC CR6_REGNO) + (compare:CC + (unspec:V2DF [(match_operand:VBCD 1 "register_operand") + (match_operand:VBCD 2 "register_operand") + (match_operand:QI 3 "const_0_to_1_operand")] + UNSPEC_BCD_ADD_SUB) + (match_dup 4))) + (clobber (match_scratch:VBCD 5))]) + (set (match_operand:SI 0 "register_operand") + (unspec:SI [(reg:CC CR6_REGNO) + (const_int 0)] + UNSPEC_BCD_OVERFLOW))] "TARGET_P8_VECTOR" - "bcdadd. %0,%1,%1,0" +{ + operands[4] = CONST0_RTX (V2DFmode); +}) + +(define_insn "*bcdoverflow_<mode>" + [(set (match_operand:SDI 0 "register_operand" "=r") + (unspec:SDI [(reg:CC CR6_REGNO) + (const_int 0)] + UNSPEC_BCD_OVERFLOW))] + "TARGET_P8_VECTOR" + "mfcr %0,2\;rlwinm %0,%0,28,1" [(set_attr "type" "vecsimple")]) (define_expand "bcdinvalid_<mode>" - [(parallel [(set (reg:CCFP CR6_REGNO) - (compare:CCFP - (unspec:V2DF [(match_operand:VBCD 1 "register_operand")] - UNSPEC_BCDADD) + [(parallel [(set (reg:CC CR6_REGNO) + (compare:CC + (unspec:V2DF [(match_operand:VBCD 1 "register_operand") + (match_dup 1) + (const_int 0)] + UNSPEC_BCDSUB) (match_dup 2))) (clobber (match_scratch:VBCD 3))]) (set (match_operand:SI 0 "register_operand") - (unordered:SI (reg:CCFP CR6_REGNO) - (const_int 0)))] + (unspec:SI [(reg:CC CR6_REGNO) + (const_int 0)] + UNSPEC_BCD_OVERFLOW))] "TARGET_P8_VECTOR" { operands[2] = CONST0_RTX (V2DFmode); }) +(define_code_attr nuun [(eq "nu") + (ne "un")]) + +(define_insn "*overflow_cbranch" + [(set (pc) + (if_then_else (eqne + (unspec:SI [(reg:CC CR6_REGNO) + (const_int 0)] + UNSPEC_BCD_OVERFLOW) + (const_int 0)) + (label_ref (match_operand 0)) + (pc)))] + "TARGET_P8_VECTOR" + "b<nuun> 6,%l0" + [(set_attr "type" "branch") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 0) (pc)) + (const_int -32768)) + (lt (minus (match_dup 0) (pc)) + (const_int 32764))) + (const_int 4) + (const_int 8)))]) + +(define_insn "*overflow_creturn" + [(set (pc) + (if_then_else (eqne + (unspec:SI [(reg:CC CR6_REGNO) + (const_int 0)] + UNSPEC_BCD_OVERFLOW) + (const_int 0)) + (simple_return) + (pc)))] + "TARGET_P8_VECTOR" + "b<nuun>lr 6" + [(set_attr "type" "jmpreg")]) + +(define_insn_and_split "*overflow_extendsidi" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (sign_extend:DI + (unspec:SI [(reg:CC CR6_REGNO) + (const_int 0)] + UNSPEC_BCD_OVERFLOW)))] + "TARGET_P8_VECTOR" + "#" + "&& 1" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (unspec:DI [(reg:CC CR6_REGNO) + (const_int 0)] + UNSPEC_BCD_OVERFLOW))] + "" + [(set_attr "type" "vecsimple")]) + (define_insn "bcdshift_v16qi" [(set (match_operand:V16QI 0 "register_operand" "=v") (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") (match_operand:V16QI 2 "register_operand" "v") (match_operand:QI 3 "const_0_to_1_operand" "n")] UNSPEC_BCDSHIFT)) - (clobber (reg:CCFP CR6_REGNO))] + (clobber (reg:CC CR6_REGNO))] "TARGET_P8_VECTOR" "bcds. %0,%1,%2,%3" [(set_attr "type" "vecsimple")]) @@ -4559,7 +4651,7 @@ (define_expand "bcdmul10_v16qi" [(set (match_operand:V16QI 0 "register_operand") (unspec:V16QI [(match_operand:V16QI 1 "register_operand")] UNSPEC_BCDSHIFT)) - (clobber (reg:CCFP CR6_REGNO))] + (clobber (reg:CC CR6_REGNO))] "TARGET_P9_VECTOR" { rtx one = gen_reg_rtx (V16QImode); @@ -4574,7 +4666,7 @@ (define_expand "bcddiv10_v16qi" [(set (match_operand:V16QI 0 "register_operand") (unspec:V16QI [(match_operand:V16QI 1 "register_operand")] UNSPEC_BCDSHIFT)) - (clobber (reg:CCFP CR6_REGNO))] + (clobber (reg:CC CR6_REGNO))] "TARGET_P9_VECTOR" { rtx one = gen_reg_rtx (V16QImode); @@ -4598,9 +4690,9 @@ (define_peephole2 (match_operand:V1TI 2 "register_operand") (match_operand:QI 3 "const_0_to_1_operand")] UNSPEC_BCD_ADD_SUB)) - (clobber (reg:CCFP CR6_REGNO))]) - (parallel [(set (reg:CCFP CR6_REGNO) - (compare:CCFP + (clobber (reg:CC CR6_REGNO))]) + (parallel [(set (reg:CC CR6_REGNO) + (compare:CC (unspec:V2DF [(match_dup 1) (match_dup 2) (match_dup 3)] @@ -4613,8 +4705,8 @@ (define_peephole2 (match_dup 2) (match_dup 3)] UNSPEC_BCD_ADD_SUB)) - (set (reg:CCFP CR6_REGNO) - (compare:CCFP + (set (reg:CC CR6_REGNO) + (compare:CC (unspec:V2DF [(match_dup 1) (match_dup 2) (match_dup 3)] diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index f4a9f24bcc5..8e94fe5c438 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -2371,10 +2371,10 @@ BCDADD_LT_V16QI bcdadd_lt_v16qi {} const signed int __builtin_bcdadd_ov_v1ti (vsq, vsq, const int<1>); - BCDADD_OV_V1TI bcdadd_unordered_v1ti {} + BCDADD_OV_V1TI bcdadd_overflow_v1ti {} const signed int __builtin_bcdadd_ov_v16qi (vsc, vsc, const int<1>); - BCDADD_OV_V16QI bcdadd_unordered_v16qi {} + BCDADD_OV_V16QI bcdadd_overflow_v16qi {} const signed int __builtin_bcdinvalid_v1ti (vsq); BCDINVALID_V1TI bcdinvalid_v1ti {} @@ -2419,10 +2419,10 @@ BCDSUB_LT_V16QI bcdsub_lt_v16qi {} const signed int __builtin_bcdsub_ov_v1ti (vsq, vsq, const int<1>); - BCDSUB_OV_V1TI bcdsub_unordered_v1ti {} + BCDSUB_OV_V1TI bcdsub_overflow_v1ti {} const signed int __builtin_bcdsub_ov_v16qi (vsc, vsc, const int<1>); - BCDSUB_OV_V16QI bcdsub_unordered_v16qi {} + BCDSUB_OV_V16QI bcdsub_overflow_v16qi {} const vuc __builtin_crypto_vpermxor_v16qi (vuc, vuc, vuc); VPERMXOR_V16QI crypto_vpermxor_v16qi {} diff --git a/gcc/testsuite/gcc.target/powerpc/bcd-4.c b/gcc/testsuite/gcc.target/powerpc/bcd-4.c index 2c8554dfe82..3c25ed60e17 100644 --- a/gcc/testsuite/gcc.target/powerpc/bcd-4.c +++ b/gcc/testsuite/gcc.target/powerpc/bcd-4.c @@ -2,10 +2,11 @@ /* { dg-require-effective-target int128 } */ /* { dg-require-effective-target power10_hw } */ /* { dg-options "-mdejagnu-cpu=power10 -O2 -save-temps" } */ -/* { dg-final { scan-assembler-times {\mbcdadd\M} 7 } } */ -/* { dg-final { scan-assembler-times {\mbcdsub\M} 18 } } */ +/* { dg-final { scan-assembler-times {\mbcdadd\M} 5 } } */ +/* { dg-final { scan-assembler-times {\mbcdsub\M} 20 } } */ /* { dg-final { scan-assembler-times {\mbcds\M} 2 } } */ /* { dg-final { scan-assembler-times {\mdenbcdq\M} 1 } } */ +/* { dg-final { scan-assembler-not {\mcror\M} 1 } } */ #include <altivec.h>