diff mbox series

[x86] Optimize ashift >> 7 to vpcmpgtb for vector int8.

Message ID 20240515001842.1551438-1-hongtao.liu@intel.com
State New
Headers show
Series [x86] Optimize ashift >> 7 to vpcmpgtb for vector int8. | expand

Commit Message

liuhongt May 15, 2024, 12:18 a.m. UTC
Since there is no corresponding instruction, the shift operation for
vector int8 is implemented using the instructions for vector int16,
but for some special shift counts, it can be transformed into vpcmpgtb.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ready push to trunk.

gcc/ChangeLog:

	PR target/114514
	* config/i386/i386-expand.cc
	(ix86_expand_vec_shift_qihi_constant): Optimize ashift >> 7 to
	vpcmpgtb.
	(ix86_expand_vecop_qihi_partial): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr114514-shift.c: New test.
---
 gcc/config/i386/i386-expand.cc                | 32 ++++++++++++
 .../gcc.target/i386/pr114514-shift.c          | 49 +++++++++++++++++++
 2 files changed, 81 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr114514-shift.c
diff mbox series

Patch

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 1ab22fe7973..ab6631f51e3 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -24182,6 +24182,28 @@  ix86_expand_vec_shift_qihi_constant (enum rtx_code code,
     return false;
 
   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
+
+
+  if (shift_amount == 7
+      && code == ASHIFTRT)
+    {
+      if (qimode == V16QImode
+	  || qimode == V32QImode)
+	{
+	  rtx zero = gen_reg_rtx (qimode);
+	  emit_move_insn (zero, CONST0_RTX (qimode));
+	  emit_move_insn (dest, gen_rtx_fmt_ee (GT, qimode, zero, op1));
+	}
+      else
+	{
+	  gcc_assert (qimode == V64QImode);
+	  rtx kmask = gen_reg_rtx (DImode);
+	  emit_insn (gen_avx512bw_cvtb2maskv64qi (kmask, op1));
+	  emit_insn (gen_avx512bw_cvtmask2bv64qi (dest, kmask));
+	}
+      return true;
+    }
+
   /* Record sign bit.  */
   xor_constant = 1 << (8 - shift_amount - 1);
 
@@ -24292,6 +24314,16 @@  ix86_expand_vecop_qihi_partial (enum rtx_code code, rtx dest, rtx op1, rtx op2)
       return;
     }
 
+  if (CONST_INT_P (op2)
+      && code == ASHIFTRT
+      && INTVAL (op2) == 7)
+    {
+      rtx zero = gen_reg_rtx (qimode);
+      emit_move_insn (zero, CONST0_RTX (qimode));
+      emit_move_insn (dest, gen_rtx_fmt_ee (GT, qimode, zero, op1));
+      return;
+    }
+
   switch (code)
     {
     case MULT:
diff --git a/gcc/testsuite/gcc.target/i386/pr114514-shift.c b/gcc/testsuite/gcc.target/i386/pr114514-shift.c
new file mode 100644
index 00000000000..cf8b32b3b1d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr114514-shift.c
@@ -0,0 +1,49 @@ 
+/* { dg-do compile  } */
+/* { dg-options "-mavx512vl -mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "vpxor" 4 } } */
+/* { dg-final { scan-assembler-times "vpcmpgtb" 4 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpcmpgtb" 5 { target  ia32 } } } */
+/* { dg-final { scan-assembler-times "vpmovb2m" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovm2b" 1 } } */
+
+
+typedef char v16qi __attribute__((vector_size(16)));
+typedef char v32qi __attribute__((vector_size(32)));
+typedef char v64qi __attribute__((vector_size(64)));
+typedef char v8qi __attribute__((vector_size(8)));
+typedef char v4qi __attribute__((vector_size(4)));
+
+v4qi
+__attribute__((noipa))
+foo1 (v4qi a)
+{
+  return a >> 7;
+}
+
+v8qi
+__attribute__((noipa))
+foo2 (v8qi a)
+{
+  return a >> 7;
+}
+
+v16qi
+__attribute__((noipa))
+foo3 (v16qi a)
+{
+  return a >> 7;
+}
+
+v32qi
+__attribute__((noipa))
+foo4 (v32qi a)
+{
+  return a >> 7;
+}
+
+v64qi
+__attribute__((noipa))
+foo5 (v64qi a)
+{
+  return a >> 7;
+}