Message ID | 20240105073744.1800307-1-xujiahao@loongson.cn |
---|---|
State | New |
Headers | show |
Series | LoongArch: Optimize zero_extendqisi2 and zero_extendqidi2 patterns | expand |
Hi,jiahao: The instruction latencies of the two instructions I tested here are the same on 3a5000 and 3a6000. This issue needs to be confirmed again. 在 2024/1/5 下午3:37, Jiahao Xu 写道: > For zero_extendqisi2 and zero_extendqidi2, use andi instead of bstrpick.w, > because andi is 6 times faster than bstrpick.w. > > gcc/ChangeLog: > > * config/loongarch/loongarch.md: > (zero_extend<SHORT:mode><GPR:mode>2): Rename to .. > (zero_extendhi<GPR:mode>2): .. this, use hi. > (zero_extendqihi2): Rename to .. > (zero_extendqi<HWD:mode>2): .. this, and extend to HWD. > (*zero_extend<GPR:mode>_trunc<SHORT:mode>): Rename to .. > (*zero_extend<GPR:mode>_trunchi): .. this, use hi. > (*zero_extendhi_truncqi): Rename to .. > (*zero_extend<HWD:mode>_truncqi): .. this, and extend to HWD. > > gcc/testsuite/ChangeLog: > > * gcc.target/loongarch/zeroextend-qi.c: New test. > > diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md > index d1f5b94f5d6..843dee77a60 100644 > --- a/gcc/config/loongarch/loongarch.md > +++ b/gcc/config/loongarch/loongarch.md > @@ -397,6 +397,9 @@ > ;; Likewise the 64-bit truncate-and-shift patterns. > (define_mode_iterator SUBDI [QI HI SI]) > > +;; Scalar fixed point modes but excludes QI. > +(define_mode_iterator HWD [HI SI (DI "TARGET_64BIT")]) > + > ;; Iterator for scalar fixed point modes. > (define_mode_iterator QHWD [QI HI SI (DI "TARGET_64BIT")]) > > @@ -1659,48 +1662,48 @@ > [(set_attr "move_type" "arith,load,load,load") > (set_attr "mode" "DI")]) > > -(define_insn "zero_extend<SHORT:mode><GPR:mode>2" > +(define_insn "zero_extendhi<GPR:mode>2" > [(set (match_operand:GPR 0 "register_operand" "=r,r,r") > (zero_extend:GPR > - (match_operand:SHORT 1 "nonimmediate_operand" "r,m,k")))] > + (match_operand:HI 1 "nonimmediate_operand" "r,m,k")))] > "" > "@ > - bstrpick.w\t%0,%1,<SHORT:7_or_15>,0 > - ld.<SHORT:size>u\t%0,%1 > - ldx.<SHORT:size>u\t%0,%1" > + bstrpick.w\t%0,%1,15,0 > + ld.hu\t%0,%1 > + ldx.hu\t%0,%1" > [(set_attr "move_type" "pick_ins,load,load") > (set_attr "mode" "<GPR:MODE>")]) > > -(define_insn "zero_extendqihi2" > - [(set (match_operand:HI 0 "register_operand" "=r,r,r") > - (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "r,k,m")))] > +(define_insn "zero_extendqi<HWD:mode>2" > + [(set (match_operand:HWD 0 "register_operand" "=r,r,r") > + (zero_extend:HWD (match_operand:QI 1 "nonimmediate_operand" "r,k,m")))] > "" > "@ > andi\t%0,%1,0xff > ldx.bu\t%0,%1 > ld.bu\t%0,%1" > [(set_attr "move_type" "andi,load,load") > - (set_attr "mode" "HI")]) > + (set_attr "mode" "<HWD:MODE>")]) > > ;; Combiner patterns to optimize truncate/zero_extend combinations. > > -(define_insn "*zero_extend<GPR:mode>_trunc<SHORT:mode>" > +(define_insn "*zero_extend<GPR:mode>_trunchi" > [(set (match_operand:GPR 0 "register_operand" "=r") > (zero_extend:GPR > - (truncate:SHORT (match_operand:DI 1 "register_operand" "r"))))] > + (truncate:HI (match_operand:DI 1 "register_operand" "r"))))] > "TARGET_64BIT" > - "bstrpick.w\t%0,%1,<SHORT:7_or_15>,0" > + "bstrpick.w\t%0,%1,15,0" > [(set_attr "move_type" "pick_ins") > (set_attr "mode" "<GPR:MODE>")]) > > -(define_insn "*zero_extendhi_truncqi" > - [(set (match_operand:HI 0 "register_operand" "=r") > - (zero_extend:HI > +(define_insn "*zero_extend<HWD:mode>_truncqi" > + [(set (match_operand:HWD 0 "register_operand" "=r") > + (zero_extend:HWD > (truncate:QI (match_operand:DI 1 "register_operand" "r"))))] > "TARGET_64BIT" > "andi\t%0,%1,0xff" > [(set_attr "alu_type" "and") > - (set_attr "mode" "HI")]) > + (set_attr "mode" "<HWD:MODE>")]) > > ;; > ;; .................... > diff --git a/gcc/testsuite/gcc.target/loongarch/zeroextend-qi.c b/gcc/testsuite/gcc.target/loongarch/zeroextend-qi.c > new file mode 100644 > index 00000000000..1da8cdad2ca > --- /dev/null > +++ b/gcc/testsuite/gcc.target/loongarch/zeroextend-qi.c > @@ -0,0 +1,11 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > +/* { dg-final { scan-assembler "andi" } } */ > + > +#include <stdint.h> > + > +uint8_t > +foo (uint64_t a, uint8_t b) > +{ > + return a + b; > +}
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index d1f5b94f5d6..843dee77a60 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -397,6 +397,9 @@ ;; Likewise the 64-bit truncate-and-shift patterns. (define_mode_iterator SUBDI [QI HI SI]) +;; Scalar fixed point modes but excludes QI. +(define_mode_iterator HWD [HI SI (DI "TARGET_64BIT")]) + ;; Iterator for scalar fixed point modes. (define_mode_iterator QHWD [QI HI SI (DI "TARGET_64BIT")]) @@ -1659,48 +1662,48 @@ [(set_attr "move_type" "arith,load,load,load") (set_attr "mode" "DI")]) -(define_insn "zero_extend<SHORT:mode><GPR:mode>2" +(define_insn "zero_extendhi<GPR:mode>2" [(set (match_operand:GPR 0 "register_operand" "=r,r,r") (zero_extend:GPR - (match_operand:SHORT 1 "nonimmediate_operand" "r,m,k")))] + (match_operand:HI 1 "nonimmediate_operand" "r,m,k")))] "" "@ - bstrpick.w\t%0,%1,<SHORT:7_or_15>,0 - ld.<SHORT:size>u\t%0,%1 - ldx.<SHORT:size>u\t%0,%1" + bstrpick.w\t%0,%1,15,0 + ld.hu\t%0,%1 + ldx.hu\t%0,%1" [(set_attr "move_type" "pick_ins,load,load") (set_attr "mode" "<GPR:MODE>")]) -(define_insn "zero_extendqihi2" - [(set (match_operand:HI 0 "register_operand" "=r,r,r") - (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "r,k,m")))] +(define_insn "zero_extendqi<HWD:mode>2" + [(set (match_operand:HWD 0 "register_operand" "=r,r,r") + (zero_extend:HWD (match_operand:QI 1 "nonimmediate_operand" "r,k,m")))] "" "@ andi\t%0,%1,0xff ldx.bu\t%0,%1 ld.bu\t%0,%1" [(set_attr "move_type" "andi,load,load") - (set_attr "mode" "HI")]) + (set_attr "mode" "<HWD:MODE>")]) ;; Combiner patterns to optimize truncate/zero_extend combinations. -(define_insn "*zero_extend<GPR:mode>_trunc<SHORT:mode>" +(define_insn "*zero_extend<GPR:mode>_trunchi" [(set (match_operand:GPR 0 "register_operand" "=r") (zero_extend:GPR - (truncate:SHORT (match_operand:DI 1 "register_operand" "r"))))] + (truncate:HI (match_operand:DI 1 "register_operand" "r"))))] "TARGET_64BIT" - "bstrpick.w\t%0,%1,<SHORT:7_or_15>,0" + "bstrpick.w\t%0,%1,15,0" [(set_attr "move_type" "pick_ins") (set_attr "mode" "<GPR:MODE>")]) -(define_insn "*zero_extendhi_truncqi" - [(set (match_operand:HI 0 "register_operand" "=r") - (zero_extend:HI +(define_insn "*zero_extend<HWD:mode>_truncqi" + [(set (match_operand:HWD 0 "register_operand" "=r") + (zero_extend:HWD (truncate:QI (match_operand:DI 1 "register_operand" "r"))))] "TARGET_64BIT" "andi\t%0,%1,0xff" [(set_attr "alu_type" "and") - (set_attr "mode" "HI")]) + (set_attr "mode" "<HWD:MODE>")]) ;; ;; .................... diff --git a/gcc/testsuite/gcc.target/loongarch/zeroextend-qi.c b/gcc/testsuite/gcc.target/loongarch/zeroextend-qi.c new file mode 100644 index 00000000000..1da8cdad2ca --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/zeroextend-qi.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler "andi" } } */ + +#include <stdint.h> + +uint8_t +foo (uint64_t a, uint8_t b) +{ + return a + b; +}