@@ -24182,6 +24182,28 @@ ix86_expand_vec_shift_qihi_constant (enum rtx_code code,
return false;
gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
+
+
+ if (shift_amount == 7
+ && code == ASHIFTRT)
+ {
+ if (qimode == V16QImode
+ || qimode == V32QImode)
+ {
+ rtx zero = gen_reg_rtx (qimode);
+ emit_move_insn (zero, CONST0_RTX (qimode));
+ emit_move_insn (dest, gen_rtx_fmt_ee (GT, qimode, zero, op1));
+ }
+ else
+ {
+ gcc_assert (qimode == V64QImode);
+ rtx kmask = gen_reg_rtx (DImode);
+ emit_insn (gen_avx512bw_cvtb2maskv64qi (kmask, op1));
+ emit_insn (gen_avx512bw_cvtmask2bv64qi (dest, kmask));
+ }
+ return true;
+ }
+
/* Record sign bit. */
xor_constant = 1 << (8 - shift_amount - 1);
@@ -24292,6 +24314,16 @@ ix86_expand_vecop_qihi_partial (enum rtx_code code, rtx dest, rtx op1, rtx op2)
return;
}
+ if (CONST_INT_P (op2)
+ && code == ASHIFTRT
+ && INTVAL (op2) == 7)
+ {
+ rtx zero = gen_reg_rtx (qimode);
+ emit_move_insn (zero, CONST0_RTX (qimode));
+ emit_move_insn (dest, gen_rtx_fmt_ee (GT, qimode, zero, op1));
+ return;
+ }
+
switch (code)
{
case MULT:
new file mode 100644
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "vpxor" 4 } } */
+/* { dg-final { scan-assembler-times "vpcmpgtb" 4 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpcmpgtb" 5 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "vpmovb2m" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovm2b" 1 } } */
+
+
+typedef char v16qi __attribute__((vector_size(16)));
+typedef char v32qi __attribute__((vector_size(32)));
+typedef char v64qi __attribute__((vector_size(64)));
+typedef char v8qi __attribute__((vector_size(8)));
+typedef char v4qi __attribute__((vector_size(4)));
+
+v4qi
+__attribute__((noipa))
+foo1 (v4qi a)
+{
+ return a >> 7;
+}
+
+v8qi
+__attribute__((noipa))
+foo2 (v8qi a)
+{
+ return a >> 7;
+}
+
+v16qi
+__attribute__((noipa))
+foo3 (v16qi a)
+{
+ return a >> 7;
+}
+
+v32qi
+__attribute__((noipa))
+foo4 (v32qi a)
+{
+ return a >> 7;
+}
+
+v64qi
+__attribute__((noipa))
+foo5 (v64qi a)
+{
+ return a >> 7;
+}