diff mbox series

i386: Optimize EQ/NE comparison between avx512 kmask and -1.

Message ID 20240528075756.293707-1-lin1.hu@intel.com
State New
Headers show
Series i386: Optimize EQ/NE comparison between avx512 kmask and -1. | expand

Commit Message

Hu, Lin1 May 28, 2024, 7:57 a.m. UTC
Hi all,

This patch aims to acheive EQ/NE comparison between avx512 kmask and -1
by using kxortest with checking CF.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,-m64}. Ok for trunk?

BRs,
Lin

gcc/ChangeLog:

	PR target/113609
	* config/i386/sse.md
	(*kortest_cmp<mode>_setcc): New define_insn_and_split.
	(*kortest_cmp<mode>_jcc): Ditto.

gcc/testsuite/ChangeLog:

	PR target/113609
	* gcc.target/i386/pr113609-1.c: New test.
	* gcc.target/i386/pr113609-2.c: Ditto.
---
 gcc/config/i386/sse.md                     |  67 +++++++
 gcc/testsuite/gcc.target/i386/pr113609-1.c | 194 +++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr113609-2.c | 161 +++++++++++++++++
 3 files changed, 422 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113609-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr113609-2.c

Comments

Hongtao Liu May 31, 2024, 4:47 a.m. UTC | #1
On Tue, May 28, 2024 at 4:00 PM Hu, Lin1 <lin1.hu@intel.com> wrote:
>
> Hi all,
>
> This patch aims to acheive EQ/NE comparison between avx512 kmask and -1
> by using kxortest with checking CF.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,-m64}. Ok for trunk?
Ok.
>
> BRs,
> Lin
>
> gcc/ChangeLog:
>
>         PR target/113609
>         * config/i386/sse.md
>         (*kortest_cmp<mode>_setcc): New define_insn_and_split.
>         (*kortest_cmp<mode>_jcc): Ditto.
>
> gcc/testsuite/ChangeLog:
>
>         PR target/113609
>         * gcc.target/i386/pr113609-1.c: New test.
>         * gcc.target/i386/pr113609-2.c: Ditto.
> ---
>  gcc/config/i386/sse.md                     |  67 +++++++
>  gcc/testsuite/gcc.target/i386/pr113609-1.c | 194 +++++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr113609-2.c | 161 +++++++++++++++++
>  3 files changed, 422 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr113609-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr113609-2.c
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index b59c988fc31..34fd2e4afac 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -2201,6 +2201,73 @@ (define_expand "kortest<mode>"
>           UNSPEC_KORTEST))]
>    "TARGET_AVX512F")
>
> +;; Optimize cmp + setcc with mask register by kortest + setcc.
> +(define_insn_and_split "*kortest_cmp<mode>_setcc"
> +   [(set (match_operand:QI 0 "nonimmediate_operand" "=qm, qm")
> +        (match_operator:QI 1 "bt_comparison_operator"
> +           [(match_operand:SWI1248_AVX512BWDQ_64 2 "register_operand" "?k, <r>")
> +            (const_int -1)]))
> +  (clobber (reg:CC FLAGS_REG))]
> +  "TARGET_AVX512BW"
> +  "#"
> +  "&& reload_completed"
> +  [(const_int 0)]
> +{
> +  if (MASK_REGNO_P (REGNO (operands[2])))
> +    {
> +      emit_insn (gen_kortest<mode>_ccc (operands[2], operands[2]));
> +      operands[4] = gen_rtx_REG (CCCmode, FLAGS_REG);
> +    }
> +  else
> +    {
> +      operands[4] = gen_rtx_REG (CCZmode, FLAGS_REG);
> +      emit_insn (gen_rtx_SET (operands[4],
> +                             gen_rtx_COMPARE (CCZmode,
> +                                              operands[2],
> +                                              constm1_rtx)));
> +    }
> +  ix86_expand_setcc (operands[0],
> +                    GET_CODE (operands[1]),
> +                    operands[4],
> +                    const0_rtx);
> +  DONE;
> +})
> +
> +;; Optimize cmp + jcc with mask register by kortest + jcc.
> +(define_insn_and_split "*kortest_cmp<mode>_jcc"
> +   [(set (pc)
> +      (if_then_else
> +       (match_operator 0 "bt_comparison_operator"
> +         [(match_operand:SWI1248_AVX512BWDQ_64 1 "register_operand" "?k, <r>")
> +          (const_int -1)])
> +         (label_ref (match_operand 2))
> +      (pc)))
> +  (clobber (reg:CC FLAGS_REG))]
> +  "TARGET_AVX512BW"
> +  "#"
> +  "&& reload_completed"
> +  [(const_int 0)]
> +{
> +  if (MASK_REGNO_P (REGNO (operands[1])))
> +    {
> +      emit_insn (gen_kortest<mode>_ccc (operands[1], operands[1]));
> +      operands[4] = gen_rtx_REG (CCCmode, FLAGS_REG);
> +    }
> +  else
> +    {
> +      operands[4] = gen_rtx_REG (CCZmode, FLAGS_REG);
> +      emit_insn (gen_rtx_SET (operands[4],
> +                             gen_rtx_COMPARE (CCZmode,
> +                                              operands[1],
> +                                              constm1_rtx)));
> +    }
> +  ix86_expand_branch (GET_CODE (operands[0]),
> +                     operands[4],
> +                     const0_rtx,
> +                     operands[2]);
> +  DONE;
> +})
> +
>  (define_insn "kunpckhi"
>    [(set (match_operand:HI 0 "register_operand" "=k")
>         (ior:HI
> diff --git a/gcc/testsuite/gcc.target/i386/pr113609-1.c b/gcc/testsuite/gcc.target/i386/pr113609-1.c
> new file mode 100644
> index 00000000000..f0639b8500a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr113609-1.c
> @@ -0,0 +1,194 @@
> +/* PR target/113609 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=x86-64-v4" } */
> +/* { dg-final { scan-assembler-not "^cmp" } } */
> +/* { dg-final { scan-assembler-not "\[ \\t\]+sete" { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-not "\[ \\t\]+setne" { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-not "\[ \\t\]+je" { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-not "\[ \\t\]+jne" { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "\[ \\t\]+sete" 1 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "\[ \\t\]+setne" 1 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "\[ \\t\]+je" 1 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "\[ \\t\]+jne" 2 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "kortest" 12 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "kortest" 17 { target { ! ia32 } } } } */
> +
> +#include <immintrin.h>
> +
> +unsigned int
> +cmp_vector_sete_mask8(__m128i a, __m128i b)
> +{
> +    __mmask8 k = _mm_cmpeq_epi16_mask (a, b);
> +    if (k == (__mmask8) -1)
> +      return 1;
> +    else
> +      return 0;
> +}
> +
> +unsigned int
> +cmp_vector_sete_mask16(__m128i a, __m128i b)
> +{
> +    __mmask16 k = _mm_cmpeq_epi8_mask (a, b);
> +    if (k == (__mmask16) -1)
> +      return 1;
> +    else
> +      return 0;
> +}
> +
> +unsigned int
> +cmp_vector_sete_mask32(__m256i a, __m256i b)
> +{
> +    __mmask32 k = _mm256_cmpeq_epi8_mask (a, b);
> +    if (k == (__mmask32) -1)
> +      return 1;
> +    else
> +      return 0;
> +}
> +
> +unsigned int
> +cmp_vector_sete_mask64(__m512i a, __m512i b)
> +{
> +    __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
> +    if (k == (__mmask64) -1)
> +      return 1;
> +    else
> +      return 0;
> +}
> +
> +unsigned int
> +cmp_vector_setne_mask8(__m128i a, __m128i b)
> +{
> +    __mmask8 k = _mm_cmpeq_epi16_mask (a, b);
> +    if (k != (__mmask8) -1)
> +      return 1;
> +    else
> +      return 0;
> +}
> +
> +unsigned int
> +cmp_vector_setne_mask16(__m128i a, __m128i b)
> +{
> +    __mmask16 k = _mm_cmpeq_epi8_mask (a, b);
> +    if (k != (__mmask16) -1)
> +      return 1;
> +    else
> +      return 0;
> +}
> +
> +unsigned int
> +cmp_vector_setne_mask32(__m256i a, __m256i b)
> +{
> +    __mmask32 k = _mm256_cmpeq_epi8_mask (a, b);
> +    if (k != (__mmask32) -1)
> +      return 1;
> +    else
> +      return 0;
> +}
> +
> +unsigned int
> +cmp_vector_setne_mask64(__m512i a, __m512i b)
> +{
> +    __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
> +    if (k != (__mmask64) -1)
> +      return 1;
> +    else
> +      return 0;
> +}
> +
> +__m128i
> +cmp_vector_je_mask8(__m128i a, __m128i b) {
> +    __mmask8 k = _mm_cmpeq_epi16_mask (a, b);
> +    if (k == (__mmask8) -1) {
> +       a[0] = a[0] + 1;
> +    }
> +    else {
> +       a[0] = a[0] - 1;
> +    }
> +    return a;
> +}
> +
> +__m128i
> +cmp_vector_je_mask16(__m128i a, __m128i b) {
> +    __mmask16 k = _mm_cmpeq_epi8_mask (a, b);
> +    if (k == (__mmask16) -1) {
> +       a[0] = a[0] + 1;
> +    }
> +    else {
> +       a[0] = a[0] - 1;
> +    }
> +    return a;
> +}
> +
> +__m256i
> +cmp_vector_je_mask32(__m256i a, __m256i b) {
> +    __mmask32 k = _mm256_cmpeq_epi8_mask (a, b);
> +    if (k == (__mmask32) -1) {
> +       a[0] = a[0] + 1;
> +    }
> +    else {
> +       a[0] = a[0] - 1;
> +    }
> +    return a;
> +}
> +
> +__m512i
> +cmp_vector_je_mask64(__m512i a, __m512i b) {
> +    __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
> +    if (k == (__mmask64) -1) {
> +       a[0] = a[0] + 1;
> +    }
> +    else {
> +       a[0] = a[0] - 5;
> +    }
> +    return a;
> +}
> +
> +__m128i
> +cmp_vector_jne_mask8(__m128i a, __m128i b) {
> +    __mmask8 k = _mm_cmpeq_epi16_mask (a, b);
> +    if (k == (__mmask8) -1) {
> +       a[0] = a[0] + 1;
> +    }
> +    a[0] = a[0] - 4;
> +    return a;
> +}
> +
> +__m128i
> +cmp_vector_jne_mask16(__m128i a, __m128i b) {
> +    __mmask16 k = _mm_cmpeq_epi8_mask (a, b);
> +    if (k == (__mmask16) -1) {
> +       a[0] = a[0] + 1;
> +    }
> +    a[0] = a[0] - 4;
> +    return a;
> +}
> +
> +__m256i
> +cmp_vector_jne_mask32(__m256i a, __m256i b) {
> +    __mmask32 k = _mm256_cmpeq_epi8_mask (a, b);
> +    if (k == (__mmask32) -1) {
> +       a[0] = a[0] + 1;
> +    }
> +    a[0] = a[0] - 4;
> +    return a;
> +}
> +
> +__m512i
> +cmp_vector_jne_mask64(__m512i a, __m512i b) {
> +    __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
> +    if (k == (__mmask64) -1) {
> +       a[0] = a[0] + 1;
> +    }
> +    a[0] = a[0] - 4;
> +    return a;
> +}
> +
> +__m512i
> +mask_cmp_vector_jne_mask64(__m512i a, __m512i b) {
> +    __mmask64 k = _mm512_mask_cmpeq_epi8_mask ((__mmask64)0xffffffefffffffff, a, b);
> +    if (k == (__mmask64) -1) {
> +       a[0] = a[0] + 1;
> +    }
> +    a[0] = a[0] - 4;
> +    return a;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr113609-2.c b/gcc/testsuite/gcc.target/i386/pr113609-2.c
> new file mode 100644
> index 00000000000..e9503f51538
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr113609-2.c
> @@ -0,0 +1,161 @@
> +/* PR target/113609 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=x86-64-v4" } */
> +/* { dg-final { scan-assembler-times "\[ \\t\]+sete" 4 } } */
> +/* { dg-final { scan-assembler-times "\[ \\t\]+setne" 4 } } */
> +/* { dg-final { scan-assembler-times "\[ \\t\]+je" 4 } } */
> +/* { dg-final { scan-assembler-times "\[ \\t\]+jne" 4 } } */
> +
> +#include <immintrin.h>
> +
> +unsigned int
> +cmp_pi8_setcc(char a)
> +{
> +    if (a == -1)
> +      return 1;
> +    else
> +      return 0;
> +}
> +
> +unsigned int
> +cmp_pi16_setcc(short a)
> +{
> +    if (a == -1)
> +      return 1;
> +    else
> +      return 0;
> +}
> +
> +unsigned int
> +cmp_pi32_setcc(int a)
> +{
> +    if (a == -1)
> +      return 1;
> +    else
> +      return 0;
> +}
> +
> +unsigned int
> +cmp_pi64_setcc(long long a)
> +{
> +    if (a == -1)
> +      return 1;
> +    else
> +      return 0;
> +}
> +
> +unsigned int
> +cmp_pi8_setne(char a)
> +{
> +    if (a != -1)
> +      return 1;
> +    else
> +      return 0;
> +}
> +
> +unsigned int
> +cmp_pi16_setne(short a)
> +{
> +    if (a != -1)
> +      return 1;
> +    else
> +      return 0;
> +}
> +
> +unsigned int
> +cmp_pi32_setne(int a)
> +{
> +    if (a != -1)
> +      return 1;
> +    else
> +      return 0;
> +}
> +
> +unsigned int
> +cmp_pi64_setne(long long a)
> +{
> +    if (a != -1)
> +      return 1;
> +    else
> +      return 0;
> +}
> +
> +__m128i
> +cmp_pi8_je(__m128i a, char b) {
> +    if (b == -1) {
> +       a[0] = a[0] + 1;
> +    }
> +    else {
> +       a[0] = a[0] - 1;
> +    }
> +    return a;
> +}
> +
> +__m128i
> +cmp_pi16_je(__m128i a, short b) {
> +    if (b == -1) {
> +       a[0] = a[0] + 1;
> +    }
> +    else {
> +       a[0] = a[0] - 1;
> +    }
> +    return a;
> +}
> +
> +__m128i
> +cmp_pi32_je(__m128i a, int b) {
> +    if (b == -1) {
> +       a[0] = a[0] + 1;
> +    }
> +    else {
> +       a[0] = a[0] - 1;
> +    }
> +    return a;
> +}
> +
> +__m128i
> +cmp_pi64_je(__m128i a, long long b) {
> +    if (b == -1) {
> +       a[0] = a[0] + 1;
> +    }
> +    else {
> +       a[0] = a[0] - 1;
> +    }
> +    return a;
> +}
> +
> +__m128i
> +cmp_pi8_jne(__m128i a, char b) {
> +    if (b == -1) {
> +       a[0] = a[0] + 1;
> +    }
> +    a[0] = a[0] - 4;
> +    return a;
> +}
> +
> +__m128i
> +cmp_pi16_jne(__m128i a, short b) {
> +    if (b == -1) {
> +       a[0] = a[0] + 1;
> +    }
> +    a[0] = a[0] - 4;
> +    return a;
> +}
> +
> +__m128i
> +cmp_pi32_jne(__m128i a, int b) {
> +    if (b == -1) {
> +       a[0] = a[0] + 1;
> +    }
> +    a[0] = a[0] - 4;
> +    return a;
> +}
> +
> +__m128i
> +cmp_pi64_jne(__m128i a, long long b) {
> +    if (b == -1) {
> +       a[0] = a[0] + 1;
> +    }
> +    a[0] = a[0] - 4;
> +    return a;
> +}
> --
> 2.31.1
>
diff mbox series

Patch

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index b59c988fc31..34fd2e4afac 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -2201,6 +2201,73 @@  (define_expand "kortest<mode>"
 	  UNSPEC_KORTEST))]
   "TARGET_AVX512F")
 
+;; Optimize cmp + setcc with mask register by kortest + setcc.
+(define_insn_and_split "*kortest_cmp<mode>_setcc"
+   [(set (match_operand:QI 0 "nonimmediate_operand" "=qm, qm")
+	 (match_operator:QI 1 "bt_comparison_operator"
+	    [(match_operand:SWI1248_AVX512BWDQ_64 2 "register_operand" "?k, <r>")
+	     (const_int -1)]))
+  (clobber (reg:CC FLAGS_REG))]
+  "TARGET_AVX512BW"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  if (MASK_REGNO_P (REGNO (operands[2])))
+    {
+      emit_insn (gen_kortest<mode>_ccc (operands[2], operands[2]));
+      operands[4] = gen_rtx_REG (CCCmode, FLAGS_REG);
+    }
+  else
+    {
+      operands[4] = gen_rtx_REG (CCZmode, FLAGS_REG);
+      emit_insn (gen_rtx_SET (operands[4],
+			      gen_rtx_COMPARE (CCZmode,
+					       operands[2],
+					       constm1_rtx)));
+    }
+  ix86_expand_setcc (operands[0],
+		     GET_CODE (operands[1]),
+		     operands[4],
+		     const0_rtx);
+  DONE;
+})
+
+;; Optimize cmp + jcc with mask register by kortest + jcc.
+(define_insn_and_split "*kortest_cmp<mode>_jcc"
+   [(set (pc)
+      (if_then_else
+	(match_operator 0 "bt_comparison_operator"
+	  [(match_operand:SWI1248_AVX512BWDQ_64 1 "register_operand" "?k, <r>")
+	   (const_int -1)])
+	  (label_ref (match_operand 2))
+      (pc)))
+  (clobber (reg:CC FLAGS_REG))]
+  "TARGET_AVX512BW"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  if (MASK_REGNO_P (REGNO (operands[1])))
+    {
+      emit_insn (gen_kortest<mode>_ccc (operands[1], operands[1]));
+      operands[4] = gen_rtx_REG (CCCmode, FLAGS_REG);
+    }
+  else
+    {
+      operands[4] = gen_rtx_REG (CCZmode, FLAGS_REG);
+      emit_insn (gen_rtx_SET (operands[4],
+			      gen_rtx_COMPARE (CCZmode,
+					       operands[1],
+					       constm1_rtx)));
+    }
+  ix86_expand_branch (GET_CODE (operands[0]),
+		      operands[4],
+		      const0_rtx,
+		      operands[2]);
+  DONE;
+})
+
 (define_insn "kunpckhi"
   [(set (match_operand:HI 0 "register_operand" "=k")
 	(ior:HI
diff --git a/gcc/testsuite/gcc.target/i386/pr113609-1.c b/gcc/testsuite/gcc.target/i386/pr113609-1.c
new file mode 100644
index 00000000000..f0639b8500a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113609-1.c
@@ -0,0 +1,194 @@ 
+/* PR target/113609 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4" } */
+/* { dg-final { scan-assembler-not "^cmp" } } */
+/* { dg-final { scan-assembler-not "\[ \\t\]+sete" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "\[ \\t\]+setne" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "\[ \\t\]+je" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "\[ \\t\]+jne" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+sete" 1 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+setne" 1 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+je" 1 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+jne" 2 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "kortest" 12 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "kortest" 17 { target { ! ia32 } } } } */
+
+#include <immintrin.h>
+
+unsigned int
+cmp_vector_sete_mask8(__m128i a, __m128i b)
+{
+    __mmask8 k = _mm_cmpeq_epi16_mask (a, b);
+    if (k == (__mmask8) -1)
+      return 1;
+    else
+      return 0;
+}
+
+unsigned int
+cmp_vector_sete_mask16(__m128i a, __m128i b)
+{
+    __mmask16 k = _mm_cmpeq_epi8_mask (a, b);
+    if (k == (__mmask16) -1)
+      return 1;
+    else
+      return 0;
+}
+
+unsigned int
+cmp_vector_sete_mask32(__m256i a, __m256i b)
+{
+    __mmask32 k = _mm256_cmpeq_epi8_mask (a, b);
+    if (k == (__mmask32) -1)
+      return 1;
+    else
+      return 0;
+}
+
+unsigned int
+cmp_vector_sete_mask64(__m512i a, __m512i b)
+{
+    __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
+    if (k == (__mmask64) -1)
+      return 1;
+    else
+      return 0;
+}
+
+unsigned int
+cmp_vector_setne_mask8(__m128i a, __m128i b)
+{
+    __mmask8 k = _mm_cmpeq_epi16_mask (a, b);
+    if (k != (__mmask8) -1)
+      return 1;
+    else
+      return 0;
+}
+
+unsigned int
+cmp_vector_setne_mask16(__m128i a, __m128i b)
+{
+    __mmask16 k = _mm_cmpeq_epi8_mask (a, b);
+    if (k != (__mmask16) -1)
+      return 1;
+    else
+      return 0;
+}
+
+unsigned int
+cmp_vector_setne_mask32(__m256i a, __m256i b)
+{
+    __mmask32 k = _mm256_cmpeq_epi8_mask (a, b);
+    if (k != (__mmask32) -1)
+      return 1;
+    else
+      return 0;
+}
+
+unsigned int
+cmp_vector_setne_mask64(__m512i a, __m512i b)
+{
+    __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
+    if (k != (__mmask64) -1)
+      return 1;
+    else
+      return 0;
+}
+
+__m128i
+cmp_vector_je_mask8(__m128i a, __m128i b) {
+    __mmask8 k = _mm_cmpeq_epi16_mask (a, b);
+    if (k == (__mmask8) -1) {
+	a[0] = a[0] + 1;
+    }
+    else {
+	a[0] = a[0] - 1;
+    }
+    return a; 
+}
+
+__m128i
+cmp_vector_je_mask16(__m128i a, __m128i b) {
+    __mmask16 k = _mm_cmpeq_epi8_mask (a, b);
+    if (k == (__mmask16) -1) {
+	a[0] = a[0] + 1;
+    }
+    else {
+	a[0] = a[0] - 1;
+    }
+    return a; 
+}
+
+__m256i
+cmp_vector_je_mask32(__m256i a, __m256i b) {
+    __mmask32 k = _mm256_cmpeq_epi8_mask (a, b);
+    if (k == (__mmask32) -1) {
+	a[0] = a[0] + 1;
+    }
+    else {
+	a[0] = a[0] - 1;
+    }
+    return a; 
+}
+
+__m512i
+cmp_vector_je_mask64(__m512i a, __m512i b) {
+    __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
+    if (k == (__mmask64) -1) {
+	a[0] = a[0] + 1;
+    }
+    else {
+	a[0] = a[0] - 5;
+    }
+    return a; 
+}
+
+__m128i
+cmp_vector_jne_mask8(__m128i a, __m128i b) {
+    __mmask8 k = _mm_cmpeq_epi16_mask (a, b);
+    if (k == (__mmask8) -1) {
+	a[0] = a[0] + 1;
+    }
+    a[0] = a[0] - 4;
+    return a; 
+}
+
+__m128i
+cmp_vector_jne_mask16(__m128i a, __m128i b) {
+    __mmask16 k = _mm_cmpeq_epi8_mask (a, b);
+    if (k == (__mmask16) -1) {
+	a[0] = a[0] + 1;
+    }
+    a[0] = a[0] - 4;
+    return a; 
+}
+
+__m256i
+cmp_vector_jne_mask32(__m256i a, __m256i b) {
+    __mmask32 k = _mm256_cmpeq_epi8_mask (a, b);
+    if (k == (__mmask32) -1) {
+	a[0] = a[0] + 1;
+    }
+    a[0] = a[0] - 4;
+    return a; 
+}
+
+__m512i
+cmp_vector_jne_mask64(__m512i a, __m512i b) {
+    __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
+    if (k == (__mmask64) -1) {
+	a[0] = a[0] + 1;
+    }
+    a[0] = a[0] - 4;
+    return a; 
+}
+
+__m512i
+mask_cmp_vector_jne_mask64(__m512i a, __m512i b) {
+    __mmask64 k = _mm512_mask_cmpeq_epi8_mask ((__mmask64)0xffffffefffffffff, a, b);
+    if (k == (__mmask64) -1) {
+	a[0] = a[0] + 1;
+    }
+    a[0] = a[0] - 4;
+    return a; 
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr113609-2.c b/gcc/testsuite/gcc.target/i386/pr113609-2.c
new file mode 100644
index 00000000000..e9503f51538
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113609-2.c
@@ -0,0 +1,161 @@ 
+/* PR target/113609 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4" } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+sete" 4 } } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+setne" 4 } } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+je" 4 } } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+jne" 4 } } */
+
+#include <immintrin.h>
+
+unsigned int
+cmp_pi8_setcc(char a)
+{
+    if (a == -1)
+      return 1;
+    else
+      return 0;
+}
+
+unsigned int
+cmp_pi16_setcc(short a)
+{
+    if (a == -1)
+      return 1;
+    else
+      return 0;
+}
+
+unsigned int
+cmp_pi32_setcc(int a)
+{
+    if (a == -1)
+      return 1;
+    else
+      return 0;
+}
+
+unsigned int
+cmp_pi64_setcc(long long a)
+{
+    if (a == -1)
+      return 1;
+    else
+      return 0;
+}
+
+unsigned int
+cmp_pi8_setne(char a)
+{
+    if (a != -1)
+      return 1;
+    else
+      return 0;
+}
+
+unsigned int
+cmp_pi16_setne(short a)
+{
+    if (a != -1)
+      return 1;
+    else
+      return 0;
+}
+
+unsigned int
+cmp_pi32_setne(int a)
+{
+    if (a != -1)
+      return 1;
+    else
+      return 0;
+}
+
+unsigned int
+cmp_pi64_setne(long long a)
+{
+    if (a != -1)
+      return 1;
+    else
+      return 0;
+}
+
+__m128i
+cmp_pi8_je(__m128i a, char b) {
+    if (b == -1) {
+	a[0] = a[0] + 1;
+    }
+    else {
+	a[0] = a[0] - 1;
+    }
+    return a; 
+}
+
+__m128i
+cmp_pi16_je(__m128i a, short b) {
+    if (b == -1) {
+	a[0] = a[0] + 1;
+    }
+    else {
+	a[0] = a[0] - 1;
+    }
+    return a; 
+}
+
+__m128i
+cmp_pi32_je(__m128i a, int b) {
+    if (b == -1) {
+	a[0] = a[0] + 1;
+    }
+    else {
+	a[0] = a[0] - 1;
+    }
+    return a; 
+}
+
+__m128i
+cmp_pi64_je(__m128i a, long long b) {
+    if (b == -1) {
+	a[0] = a[0] + 1;
+    }
+    else {
+	a[0] = a[0] - 1;
+    }
+    return a; 
+}
+
+__m128i
+cmp_pi8_jne(__m128i a, char b) {
+    if (b == -1) {
+	a[0] = a[0] + 1;
+    }
+    a[0] = a[0] - 4;
+    return a; 
+}
+
+__m128i
+cmp_pi16_jne(__m128i a, short b) {
+    if (b == -1) {
+	a[0] = a[0] + 1;
+    }
+    a[0] = a[0] - 4;
+    return a; 
+}
+
+__m128i
+cmp_pi32_jne(__m128i a, int b) {
+    if (b == -1) {
+	a[0] = a[0] + 1;
+    }
+    a[0] = a[0] - 4;
+    return a; 
+}
+
+__m128i
+cmp_pi64_jne(__m128i a, long long b) {
+    if (b == -1) {
+	a[0] = a[0] + 1;
+    }
+    a[0] = a[0] - 4;
+    return a; 
+}