diff mbox series

[COMMITTED] i386: Add v<any_shift:insn>v4qi3 expander

Message ID CAFULd4YpKfOOwCwLH=qscZECsXcuyoN7qd7sFB0aQq-iAcBuHA@mail.gmail.com
State New
Headers show
Series [COMMITTED] i386: Add v<any_shift:insn>v4qi3 expander | expand

Commit Message

Uros Bizjak May 24, 2023, 2:20 p.m. UTC
Also, move v<any_shift:insn>v8qi3 expander to a better place and enable
it with TARGET_MMX_WITH_SSE.  Remove handling of V8QImode from
ix86_expand_vecop_qihi2 since all partial QI->HI vector modes expand
via ix86_expand_vecop_qihi_partial.

gcc/ChangeLog:

    * config/i386/i386-expand.cc (ix86_expand_vecop_qihi2):
    Remove handling of V8QImode.
    * config/i386/mmx.md (v<insn>v8qi3): Move from sse.md.
    Call ix86_expand_vecop_qihi_partial.  Enable for TARGET_MMX_WITH_SSE.
    (v<insn>v4qi3): Ditto.
    * config/i386/sse.md (v<insn>v8qi3): Remove.

gcc/testsuite/ChangeLog:

    * gcc.target/i386/vect-shiftv4qi.c (dg-options):
    Remove -ftree-vectorize.
    * gcc.target/i386/vect-shiftv8qi.c (dg-options): Ditto.
    * gcc.target/i386/vect-vshiftv4qi.c: New test.
    * gcc.target/i386/vect-vshiftv8qi.c: New test.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Uros.
diff mbox series

Patch

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index ff3d382f1b4..2e6e6585aeb 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -23132,9 +23132,10 @@  ix86_expand_vecop_qihi2 (enum rtx_code code, rtx dest, rtx op1, rtx op2)
   /* vpmovwb only available under AVX512BW.  */
   if (!TARGET_AVX512BW)
     return false;
-  if ((qimode == V8QImode || qimode == V16QImode)
-      && !TARGET_AVX512VL)
+
+  if (qimode == V16QImode && !TARGET_AVX512VL)
     return false;
+
   /* Do not generate ymm/zmm instructions when
      target prefers 128/256 bit vector width.  */
   if ((qimode == V16QImode && TARGET_PREFER_AVX128)
@@ -23143,10 +23144,6 @@  ix86_expand_vecop_qihi2 (enum rtx_code code, rtx dest, rtx op1, rtx op2)
 
   switch (qimode)
     {
-    case E_V8QImode:
-      himode = V8HImode;
-      gen_truncate = gen_truncv8hiv8qi2;
-      break;
     case E_V16QImode:
       himode = V16HImode;
       gen_truncate = gen_truncv16hiv16qi2;
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index a37bbbb811f..dbcb850ffde 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -2734,6 +2734,30 @@  (define_insn_and_split "<insn>v2qi3"
   [(set_attr "type" "multi")
    (set_attr "mode" "QI")])
 
+(define_expand "v<insn>v8qi3"
+  [(set (match_operand:V8QI 0 "register_operand")
+	(any_shift:V8QI
+	  (match_operand:V8QI 1 "register_operand")
+	  (match_operand:V8QI 2 "register_operand")))]
+  "TARGET_AVX512BW && TARGET_AVX512VL && TARGET_MMX_WITH_SSE"
+{
+  ix86_expand_vecop_qihi_partial (<CODE>, operands[0],
+				  operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "v<insn>v4qi3"
+  [(set (match_operand:V4QI 0 "register_operand")
+	(any_shift:V4QI
+	  (match_operand:V4QI 1 "register_operand")
+	  (match_operand:V4QI 2 "register_operand")))]
+  "TARGET_AVX512BW && TARGET_AVX512VL"
+{
+  ix86_expand_vecop_qihi_partial (<CODE>, operands[0],
+				  operands[1], operands[2]);
+  DONE;
+})
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel integral comparisons
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 26dd0b1aa10..0656a5ce717 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -24564,17 +24564,6 @@  (define_expand "v<insn><mode>3"
     }
 })
 
-(define_expand "v<insn>v8qi3"
-  [(set (match_operand:V8QI 0 "register_operand")
-	(any_shift:V8QI
-	  (match_operand:V8QI 1 "register_operand")
-	  (match_operand:V8QI 2 "nonimmediate_operand")))]
-  "TARGET_AVX512BW && TARGET_AVX512VL && TARGET_64BIT"
-{
-  ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
-  DONE;
-})
-
 (define_expand "vlshr<mode>3"
   [(set (match_operand:VI48_512 0 "register_operand")
 	(lshiftrt:VI48_512
diff --git a/gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c b/gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c
index c06dfb87bd1..c6a63903604 100644
--- a/gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c
+++ b/gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -msse2" } */
+/* { dg-options "-O2 -msse2" } */
 
 #define N 4
 
diff --git a/gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c b/gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c
index f5e8925aa25..244b0dbd28a 100644
--- a/gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c
+++ b/gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-O2 -ftree-vectorize -msse2" } */
+/* { dg-options "-O2 -msse2" } */
 
 #define N 8
 
diff --git a/gcc/testsuite/gcc.target/i386/vect-vshiftv4qi.c b/gcc/testsuite/gcc.target/i386/vect-vshiftv4qi.c
new file mode 100644
index 00000000000..c74cc991f59
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-vshiftv4qi.c
@@ -0,0 +1,28 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+
+#define N 4
+
+typedef unsigned char __vu __attribute__ ((__vector_size__ (N)));
+typedef signed char __vi __attribute__ ((__vector_size__ (N)));
+
+__vu sllv (__vu a, __vu b)
+{
+  return a << b;
+}
+
+/* { dg-final { scan-assembler-times "vpsllvw" 1 } } */
+
+__vu srlv (__vu a, __vu b)
+{
+  return a >> b;
+}
+
+/* { dg-final { scan-assembler-times "vpsrlvw" 1 } } */
+
+__vi srav (__vi a, __vi b)
+{
+  return a >> b;
+}
+
+/* { dg-final { scan-assembler-times "vpsravw" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-vshiftv8qi.c b/gcc/testsuite/gcc.target/i386/vect-vshiftv8qi.c
new file mode 100644
index 00000000000..1d838af07f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-vshiftv8qi.c
@@ -0,0 +1,28 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+
+#define N 8
+
+typedef unsigned char __vu __attribute__ ((__vector_size__ (N)));
+typedef signed char __vi __attribute__ ((__vector_size__ (N)));
+
+__vu vsll (__vu a, __vu b)
+{
+  return a << b;
+}
+
+/* { dg-final { scan-assembler-times "vpsllvw" 1 } } */
+
+__vu vsrl (__vu a, __vu b)
+{
+  return a >> b;
+}
+
+/* { dg-final { scan-assembler-times "vpsrlvw" 1 } } */
+
+__vi vsra (__vi a, __vi b)
+{
+  return a >> b;
+}
+
+/* { dg-final { scan-assembler-times "vpsravw" 1 } } */