Message ID | 20240609134929.317546-1-xry111@xry111.site |
---|---|
State | New |
Headers | show |
Series | LoongArch: Use bstrins for "value & (-1u << const)" | expand |
LGTM! Thanks! 在 2024/6/9 下午9:48, Xi Ruoyao 写道: > A move/bstrins pair is as fast as a (addi.w|lu12i.w|lu32i.d|lu52i.d)/and > pair, and twice fast as a srli/slli pair. When the src reg and the dst > reg happens to be the same, the move instruction can be optimized away. > > gcc/ChangeLog: > > * config/loongarch/predicates.md (high_bitmask_operand): New > predicate. > * config/loongarch/constraints.md (Yy): New constriant. > * config/loongarch/loongarch.md (and<mode>3_align): New > define_insn_and_split. > > gcc/testsuite/ChangeLog: > > * gcc.target/loongarch/bstrins-1.c: New test. > * gcc.target/loongarch/bstrins-2.c: New test. > --- > > Bootstrapped and regtested on loongarch64-linux-gnu. Ok for trunk? > > gcc/config/loongarch/constraints.md | 5 +++++ > gcc/config/loongarch/loongarch.md | 17 +++++++++++++++++ > gcc/config/loongarch/predicates.md | 4 ++++ > gcc/testsuite/gcc.target/loongarch/bstrins-1.c | 9 +++++++++ > gcc/testsuite/gcc.target/loongarch/bstrins-2.c | 14 ++++++++++++++ > 5 files changed, 49 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-1.c > create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-2.c > > diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md > index f07d31650d2..12cf5e2924a 100644 > --- a/gcc/config/loongarch/constraints.md > +++ b/gcc/config/loongarch/constraints.md > @@ -94,6 +94,7 @@ > ;; "A constant @code{move_operand} that can be safely loaded using > ;; @code{la}." > ;; "Yx" > +;; "Yy" > ;; "Z" - > ;; "ZC" > ;; "A memory operand whose address is formed by a base register and offset > @@ -291,6 +292,10 @@ (define_constraint "Yx" > "@internal" > (match_operand 0 "low_bitmask_operand")) > > +(define_constraint "Yy" > + "@internal" > + (match_operand 0 "high_bitmask_operand")) > + > (define_constraint "YI" > "@internal > A replicated vector const in which the replicated value is in the range > diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md > index 5c80c169cbf..25c1d323ba0 100644 > --- a/gcc/config/loongarch/loongarch.md > +++ b/gcc/config/loongarch/loongarch.md > @@ -1542,6 +1542,23 @@ (define_insn "and<mode>3_extended" > [(set_attr "move_type" "pick_ins") > (set_attr "mode" "<MODE>")]) > > +(define_insn_and_split "and<mode>3_align" > + [(set (match_operand:GPR 0 "register_operand" "=r") > + (and:GPR (match_operand:GPR 1 "register_operand" "r") > + (match_operand:GPR 2 "high_bitmask_operand" "Yy")))] > + "" > + "#" > + "" > + [(set (match_dup 0) (match_dup 1)) > + (set (zero_extract:GPR (match_dup 0) (match_dup 2) (const_int 0)) > + (const_int 0))] > +{ > + int len; > + > + len = low_bitmask_len (<MODE>mode, ~INTVAL (operands[2])); > + operands[2] = GEN_INT (len); > +}) > + > (define_insn_and_split "*bstrins_<mode>_for_mask" > [(set (match_operand:GPR 0 "register_operand" "=r") > (and:GPR (match_operand:GPR 1 "register_operand" "r") > diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md > index eba7f246c84..58e406ea522 100644 > --- a/gcc/config/loongarch/predicates.md > +++ b/gcc/config/loongarch/predicates.md > @@ -293,6 +293,10 @@ (define_predicate "low_bitmask_operand" > (and (match_code "const_int") > (match_test "low_bitmask_len (mode, INTVAL (op)) > 12"))) > > +(define_predicate "high_bitmask_operand" > + (and (match_code "const_int") > + (match_test "low_bitmask_len (mode, ~INTVAL (op)) > 0"))) > + > (define_predicate "d_operand" > (and (match_code "reg") > (match_test "GP_REG_P (REGNO (op))"))) > diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-1.c b/gcc/testsuite/gcc.target/loongarch/bstrins-1.c > new file mode 100644 > index 00000000000..7cb3a952322 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/loongarch/bstrins-1.c > @@ -0,0 +1,9 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */ > +/* { dg-final { scan-assembler "bstrins\\.d\t\\\$r4,\\\$r0,4,0" } } */ > + > +long > +x (long a) > +{ > + return a & -32; > +} > diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-2.c b/gcc/testsuite/gcc.target/loongarch/bstrins-2.c > new file mode 100644 > index 00000000000..9777f502e5a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/loongarch/bstrins-2.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */ > +/* { dg-final { scan-assembler "bstrins\\.d\t\\\$r\[0-9\]+,\\\$r0,4,0" } } */ > + > +struct aligned_buffer { > + _Alignas(32) char x[1024]; > +}; > + > +extern int f(char *); > +int g(void) > +{ > + struct aligned_buffer buf; > + return f(buf.x); > +}
Xi Ruoyao <xry111@xry111.site> 于2024年6月9日周日 21:50写道: > > A move/bstrins pair is as fast as a (addi.w|lu12i.w|lu32i.d|lu52i.d)/and > pair, and twice fast as a srli/slli pair. When the src reg and the dst Just want to know that why not adjust the RTX cost of bstrins vs srli/slli? It may benefit more cases. > reg happens to be the same, the move instruction can be optimized away. >
diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md index f07d31650d2..12cf5e2924a 100644 --- a/gcc/config/loongarch/constraints.md +++ b/gcc/config/loongarch/constraints.md @@ -94,6 +94,7 @@ ;; "A constant @code{move_operand} that can be safely loaded using ;; @code{la}." ;; "Yx" +;; "Yy" ;; "Z" - ;; "ZC" ;; "A memory operand whose address is formed by a base register and offset @@ -291,6 +292,10 @@ (define_constraint "Yx" "@internal" (match_operand 0 "low_bitmask_operand")) +(define_constraint "Yy" + "@internal" + (match_operand 0 "high_bitmask_operand")) + (define_constraint "YI" "@internal A replicated vector const in which the replicated value is in the range diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index 5c80c169cbf..25c1d323ba0 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -1542,6 +1542,23 @@ (define_insn "and<mode>3_extended" [(set_attr "move_type" "pick_ins") (set_attr "mode" "<MODE>")]) +(define_insn_and_split "and<mode>3_align" + [(set (match_operand:GPR 0 "register_operand" "=r") + (and:GPR (match_operand:GPR 1 "register_operand" "r") + (match_operand:GPR 2 "high_bitmask_operand" "Yy")))] + "" + "#" + "" + [(set (match_dup 0) (match_dup 1)) + (set (zero_extract:GPR (match_dup 0) (match_dup 2) (const_int 0)) + (const_int 0))] +{ + int len; + + len = low_bitmask_len (<MODE>mode, ~INTVAL (operands[2])); + operands[2] = GEN_INT (len); +}) + (define_insn_and_split "*bstrins_<mode>_for_mask" [(set (match_operand:GPR 0 "register_operand" "=r") (and:GPR (match_operand:GPR 1 "register_operand" "r") diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md index eba7f246c84..58e406ea522 100644 --- a/gcc/config/loongarch/predicates.md +++ b/gcc/config/loongarch/predicates.md @@ -293,6 +293,10 @@ (define_predicate "low_bitmask_operand" (and (match_code "const_int") (match_test "low_bitmask_len (mode, INTVAL (op)) > 12"))) +(define_predicate "high_bitmask_operand" + (and (match_code "const_int") + (match_test "low_bitmask_len (mode, ~INTVAL (op)) > 0"))) + (define_predicate "d_operand" (and (match_code "reg") (match_test "GP_REG_P (REGNO (op))"))) diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-1.c b/gcc/testsuite/gcc.target/loongarch/bstrins-1.c new file mode 100644 index 00000000000..7cb3a952322 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/bstrins-1.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */ +/* { dg-final { scan-assembler "bstrins\\.d\t\\\$r4,\\\$r0,4,0" } } */ + +long +x (long a) +{ + return a & -32; +} diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-2.c b/gcc/testsuite/gcc.target/loongarch/bstrins-2.c new file mode 100644 index 00000000000..9777f502e5a --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/bstrins-2.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */ +/* { dg-final { scan-assembler "bstrins\\.d\t\\\$r\[0-9\]+,\\\$r0,4,0" } } */ + +struct aligned_buffer { + _Alignas(32) char x[1024]; +}; + +extern int f(char *); +int g(void) +{ + struct aligned_buffer buf; + return f(buf.x); +}