diff mbox series

[v1,2/2] LoongArch: Provide ashr lshr and ashl RTL pattern for vectors.

Message ID 20240808064747.9012-2-chenglulu@loongson.cn
State New
Headers show
Series [v1,1/2] LoongArch: Drop vcond{,u} expanders. | expand

Commit Message

Lulu Cheng Aug. 8, 2024, 6:47 a.m. UTC
We support vashr vlshr and vashl. However, in r15-1638 support optimize
x < 0 ? -1 : 0 into (signed) x >> 31 and x < 0 ? 1 : 0 into (unsigned) x >> 31.
To support this optimization, vector ashr lshr and ashl need to be implemented.

gcc/ChangeLog:

	* config/loongarch/loongarch.md (insn): Added rotatert rotr pairs.
	* config/loongarch/simd.md (rotr<mode>3): Remove to ...
	(<optab><mode>3): This.

gcc/testsuite/ChangeLog:

	* g++.target/loongarch/vect-ashr-lshr.C: New test.
---
 gcc/config/loongarch/loongarch.md             |   1 +
 gcc/config/loongarch/simd.md                  |  13 +-
 .../g++.target/loongarch/vect-ashr-lshr.C     | 147 ++++++++++++++++++
 3 files changed, 155 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C

Comments

Lulu Cheng Aug. 12, 2024, 1:35 a.m. UTC | #1
Pushed to r15-2879.

在 2024/8/8 下午2:47, Lulu Cheng 写道:
> We support vashr vlshr and vashl. However, in r15-1638 support optimize
> x < 0 ? -1 : 0 into (signed) x >> 31 and x < 0 ? 1 : 0 into (unsigned) x >> 31.
> To support this optimization, vector ashr lshr and ashl need to be implemented.
>
> gcc/ChangeLog:
>
> 	* config/loongarch/loongarch.md (insn): Added rotatert rotr pairs.
> 	* config/loongarch/simd.md (rotr<mode>3): Remove to ...
> 	(<optab><mode>3): This.
>
> gcc/testsuite/ChangeLog:
>
> 	* g++.target/loongarch/vect-ashr-lshr.C: New test.
> ---
>   gcc/config/loongarch/loongarch.md             |   1 +
>   gcc/config/loongarch/simd.md                  |  13 +-
>   .../g++.target/loongarch/vect-ashr-lshr.C     | 147 ++++++++++++++++++
>   3 files changed, 155 insertions(+), 6 deletions(-)
>   create mode 100644 gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C
>
> diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
> index ee0310f2bd6..1f105cbf891 100644
> --- a/gcc/config/loongarch/loongarch.md
> +++ b/gcc/config/loongarch/loongarch.md
> @@ -559,6 +559,7 @@ (define_code_attr optab [(ashift "ashl")
>   (define_code_attr insn [(ashift "sll")
>   			(ashiftrt "sra")
>   			(lshiftrt "srl")
> +			(rotatert "rotr")
>   			(ior "or")
>   			(xor "xor")
>   			(and "and")
> diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
> index 00ff2823a4e..45ea114220e 100644
> --- a/gcc/config/loongarch/simd.md
> +++ b/gcc/config/loongarch/simd.md
> @@ -306,14 +306,15 @@ (define_expand "rotl<mode>3"
>       operands[4] = gen_reg_rtx (<MODE>mode);
>     });
>   
> -;; <x>vrotri.{b/h/w/d}
> +;; <x>v{rotr/sll/sra/srl}i.{b/h/w/d}
>   
> -(define_insn "rotr<mode>3"
> +(define_insn "<optab><mode>3"
>     [(set (match_operand:IVEC 0 "register_operand" "=f")
> -	(rotatert:IVEC (match_operand:IVEC 1 "register_operand" "f")
> -		       (match_operand:SI 2 "const_<bitimm>_operand")))]
> -  ""
> -  "<x>vrotri.<simdfmt>\t%<wu>0,%<wu>1,%2";
> +	(shift_w:IVEC
> +	  (match_operand:IVEC 1 "register_operand" "f")
> +	  (match_operand:SI 2 "const_<bitimm>_operand")))]
> +  "ISA_HAS_LSX"
> +  "<x>v<insn>i.<simdfmt>\t%<wu>0,%<wu>1,%2"
>     [(set_attr "type" "simd_int_arith")
>      (set_attr "mode" "<MODE>")])
>   
> diff --git a/gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C b/gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C
> new file mode 100644
> index 00000000000..bcef985fae2
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C
> @@ -0,0 +1,147 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mlasx -O2" } */
> +/* { dg-final { scan-assembler-times "vsrli.b" 2 } } */
> +/* { dg-final { scan-assembler-times "vsrli.h" 2 } } */
> +/* { dg-final { scan-assembler-times "vsrli.w" 2 } } */
> +/* { dg-final { scan-assembler-times "vsrli.d" 2 } } */
> +/* { dg-final { scan-assembler-times "vsrai.b" 2 } } */
> +/* { dg-final { scan-assembler-times "vsrai.h" 2 } } */
> +/* { dg-final { scan-assembler-times "vsrai.w" 2 } } */
> +/* { dg-final { scan-assembler-times "vsrai.d" 2 } } */
> +
> +typedef signed char v16qi __attribute__((vector_size(16)));
> +typedef signed char v32qi __attribute__((vector_size(32)));
> +typedef short v8hi __attribute__((vector_size(16)));
> +typedef short v16hi __attribute__((vector_size(32)));
> +typedef int v4si __attribute__((vector_size(16)));
> +typedef int v8si __attribute__((vector_size(32)));
> +typedef long long v2di __attribute__((vector_size(16)));
> +typedef long long v4di __attribute__((vector_size(32)));
> +
> +v16qi
> +foo (v16qi a)
> +{
> +  v16qi const1_op = __extension__(v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
> +  v16qi const0_op = __extension__(v16qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
> +  return a < const0_op ? const1_op : const0_op;
> +}
> +
> +v32qi
> +foo2 (v32qi a)
> +{
> +  v32qi const1_op = __extension__(v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
> +  v32qi const0_op = __extension__(v32qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
> +  return a < const0_op ? const1_op : const0_op;
> +}
> +
> +v8hi
> +foo3 (v8hi a)
> +{
> +  v8hi const1_op = __extension__(v8hi){1,1,1,1,1,1,1,1};
> +  v8hi const0_op = __extension__(v8hi){0,0,0,0,0,0,0,0};
> +  return a < const0_op ? const1_op : const0_op;
> +}
> +
> +v16hi
> +foo4 (v16hi a)
> +{
> +  v16hi const1_op = __extension__(v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
> +  v16hi const0_op = __extension__(v16hi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
> +  return a < const0_op ? const1_op : const0_op;
> +}
> +
> +v4si
> +foo5 (v4si a)
> +{
> +  v4si const1_op = __extension__(v4si){1,1,1,1};
> +  v4si const0_op = __extension__(v4si){0,0,0,0};
> +  return a < const0_op ? const1_op : const0_op;
> +}
> +
> +v8si
> +foo6 (v8si a)
> +{
> +  v8si const1_op = __extension__(v8si){1,1,1,1,1,1,1,1};
> +  v8si const0_op = __extension__(v8si){0,0,0,0,0,0,0,0};
> +  return a < const0_op ? const1_op : const0_op;
> +}
> +
> +v2di
> +foo7 (v2di a)
> +{
> +  v2di const1_op = __extension__(v2di){1,1};
> +  v2di const0_op = __extension__(v2di){0,0};
> +  return a < const0_op ? const1_op : const0_op;
> +}
> +
> +v4di
> +foo8 (v4di a)
> +{
> +  v4di const1_op = __extension__(v4di){1,1,1,1};
> +  v4di const0_op = __extension__(v4di){0,0,0,0};
> +  return a < const0_op ? const1_op : const0_op;
> +}
> +
> +v16qi
> +foo9 (v16qi a)
> +{
> +  v16qi const1_op = __extension__(v16qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1};
> +  v16qi const0_op = __extension__(v16qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
> +  return a < const0_op ? const1_op : const0_op;
> +}
> +
> +v32qi
> +foo10 (v32qi a)
> +{
> +  v32qi const1_op = __extension__(v32qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1};
> +  v32qi const0_op = __extension__(v32qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
> +  return a < const0_op ? const1_op : const0_op;
> +}
> +
> +v8hi
> +foo11 (v8hi a)
> +{
> +  v8hi const1_op = __extension__(v8hi){-1,-1,-1,-1,-1,-1,-1,-1};
> +  v8hi const0_op = __extension__(v8hi){0,0,0,0,0,0,0,0};
> +  return a < const0_op ? const1_op : const0_op;
> +}
> +
> +v16hi
> +foo12 (v16hi a)
> +{
> +  v16hi const1_op = __extension__(v16hi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1};
> +  v16hi const0_op = __extension__(v16hi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
> +  return a < const0_op ? const1_op : const0_op;
> +}
> +
> +v4si
> +foo13 (v4si a)
> +{
> +  v4si const1_op = __extension__(v4si){-1,-1,-1,-1};
> +  v4si const0_op = __extension__(v4si){0,0,0,0};
> +  return a < const0_op ? const1_op : const0_op;
> +}
> +
> +v8si
> +foo14 (v8si a)
> +{
> +  v8si const1_op = __extension__(v8si){-1,-1,-1,-1,-1,-1,-1,-1};
> +  v8si const0_op = __extension__(v8si){0,0,0,0,0,0,0,0};
> +  return a < const0_op ? const1_op : const0_op;
> +}
> +
> +v2di
> +foo15 (v2di a)
> +{
> +  v2di const1_op = __extension__(v2di){-1,-1};
> +  v2di const0_op = __extension__(v2di){0,0};
> +  return a < const0_op ? const1_op : const0_op;
> +}
> +
> +v4di
> +foo16 (v4di a)
> +{
> +  v4di const1_op = __extension__(v4di){-1,-1,-1,-1};
> +  v4di const0_op = __extension__(v4di){0,0,0,0};
> +  return a < const0_op ? const1_op : const0_op;
> +}
diff mbox series

Patch

diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index ee0310f2bd6..1f105cbf891 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -559,6 +559,7 @@  (define_code_attr optab [(ashift "ashl")
 (define_code_attr insn [(ashift "sll")
 			(ashiftrt "sra")
 			(lshiftrt "srl")
+			(rotatert "rotr")
 			(ior "or")
 			(xor "xor")
 			(and "and")
diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
index 00ff2823a4e..45ea114220e 100644
--- a/gcc/config/loongarch/simd.md
+++ b/gcc/config/loongarch/simd.md
@@ -306,14 +306,15 @@  (define_expand "rotl<mode>3"
     operands[4] = gen_reg_rtx (<MODE>mode);
   });
 
-;; <x>vrotri.{b/h/w/d}
+;; <x>v{rotr/sll/sra/srl}i.{b/h/w/d}
 
-(define_insn "rotr<mode>3"
+(define_insn "<optab><mode>3"
   [(set (match_operand:IVEC 0 "register_operand" "=f")
-	(rotatert:IVEC (match_operand:IVEC 1 "register_operand" "f")
-		       (match_operand:SI 2 "const_<bitimm>_operand")))]
-  ""
-  "<x>vrotri.<simdfmt>\t%<wu>0,%<wu>1,%2";
+	(shift_w:IVEC
+	  (match_operand:IVEC 1 "register_operand" "f")
+	  (match_operand:SI 2 "const_<bitimm>_operand")))]
+  "ISA_HAS_LSX"
+  "<x>v<insn>i.<simdfmt>\t%<wu>0,%<wu>1,%2"
   [(set_attr "type" "simd_int_arith")
    (set_attr "mode" "<MODE>")])
 
diff --git a/gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C b/gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C
new file mode 100644
index 00000000000..bcef985fae2
--- /dev/null
+++ b/gcc/testsuite/g++.target/loongarch/vect-ashr-lshr.C
@@ -0,0 +1,147 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mlasx -O2" } */
+/* { dg-final { scan-assembler-times "vsrli.b" 2 } } */
+/* { dg-final { scan-assembler-times "vsrli.h" 2 } } */
+/* { dg-final { scan-assembler-times "vsrli.w" 2 } } */
+/* { dg-final { scan-assembler-times "vsrli.d" 2 } } */
+/* { dg-final { scan-assembler-times "vsrai.b" 2 } } */
+/* { dg-final { scan-assembler-times "vsrai.h" 2 } } */
+/* { dg-final { scan-assembler-times "vsrai.w" 2 } } */
+/* { dg-final { scan-assembler-times "vsrai.d" 2 } } */
+
+typedef signed char v16qi __attribute__((vector_size(16)));
+typedef signed char v32qi __attribute__((vector_size(32)));
+typedef short v8hi __attribute__((vector_size(16)));
+typedef short v16hi __attribute__((vector_size(32)));
+typedef int v4si __attribute__((vector_size(16)));
+typedef int v8si __attribute__((vector_size(32)));
+typedef long long v2di __attribute__((vector_size(16)));
+typedef long long v4di __attribute__((vector_size(32)));
+
+v16qi
+foo (v16qi a)
+{
+  v16qi const1_op = __extension__(v16qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
+  v16qi const0_op = __extension__(v16qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+  return a < const0_op ? const1_op : const0_op;
+}
+
+v32qi
+foo2 (v32qi a)
+{
+  v32qi const1_op = __extension__(v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
+  v32qi const0_op = __extension__(v32qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+  return a < const0_op ? const1_op : const0_op;
+}
+
+v8hi
+foo3 (v8hi a)
+{
+  v8hi const1_op = __extension__(v8hi){1,1,1,1,1,1,1,1};
+  v8hi const0_op = __extension__(v8hi){0,0,0,0,0,0,0,0};
+  return a < const0_op ? const1_op : const0_op;
+}
+
+v16hi
+foo4 (v16hi a)
+{
+  v16hi const1_op = __extension__(v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
+  v16hi const0_op = __extension__(v16hi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+  return a < const0_op ? const1_op : const0_op;
+}
+
+v4si
+foo5 (v4si a)
+{
+  v4si const1_op = __extension__(v4si){1,1,1,1};
+  v4si const0_op = __extension__(v4si){0,0,0,0};
+  return a < const0_op ? const1_op : const0_op;
+}
+
+v8si
+foo6 (v8si a)
+{
+  v8si const1_op = __extension__(v8si){1,1,1,1,1,1,1,1};
+  v8si const0_op = __extension__(v8si){0,0,0,0,0,0,0,0};
+  return a < const0_op ? const1_op : const0_op;
+}
+
+v2di
+foo7 (v2di a)
+{
+  v2di const1_op = __extension__(v2di){1,1};
+  v2di const0_op = __extension__(v2di){0,0};
+  return a < const0_op ? const1_op : const0_op;
+}
+
+v4di
+foo8 (v4di a)
+{
+  v4di const1_op = __extension__(v4di){1,1,1,1};
+  v4di const0_op = __extension__(v4di){0,0,0,0};
+  return a < const0_op ? const1_op : const0_op;
+}
+
+v16qi
+foo9 (v16qi a)
+{
+  v16qi const1_op = __extension__(v16qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1};
+  v16qi const0_op = __extension__(v16qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+  return a < const0_op ? const1_op : const0_op;
+}
+
+v32qi
+foo10 (v32qi a)
+{
+  v32qi const1_op = __extension__(v32qi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1};
+  v32qi const0_op = __extension__(v32qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+  return a < const0_op ? const1_op : const0_op;
+}
+
+v8hi
+foo11 (v8hi a)
+{
+  v8hi const1_op = __extension__(v8hi){-1,-1,-1,-1,-1,-1,-1,-1};
+  v8hi const0_op = __extension__(v8hi){0,0,0,0,0,0,0,0};
+  return a < const0_op ? const1_op : const0_op;
+}
+
+v16hi
+foo12 (v16hi a)
+{
+  v16hi const1_op = __extension__(v16hi){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1};
+  v16hi const0_op = __extension__(v16hi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+  return a < const0_op ? const1_op : const0_op;
+}
+
+v4si
+foo13 (v4si a)
+{
+  v4si const1_op = __extension__(v4si){-1,-1,-1,-1};
+  v4si const0_op = __extension__(v4si){0,0,0,0};
+  return a < const0_op ? const1_op : const0_op;
+}
+
+v8si
+foo14 (v8si a)
+{
+  v8si const1_op = __extension__(v8si){-1,-1,-1,-1,-1,-1,-1,-1};
+  v8si const0_op = __extension__(v8si){0,0,0,0,0,0,0,0};
+  return a < const0_op ? const1_op : const0_op;
+}
+
+v2di
+foo15 (v2di a)
+{
+  v2di const1_op = __extension__(v2di){-1,-1};
+  v2di const0_op = __extension__(v2di){0,0};
+  return a < const0_op ? const1_op : const0_op;
+}
+
+v4di
+foo16 (v4di a)
+{
+  v4di const1_op = __extension__(v4di){-1,-1,-1,-1};
+  v4di const0_op = __extension__(v4di){0,0,0,0};
+  return a < const0_op ? const1_op : const0_op;
+}