diff mbox series

middle-end: Enhance conditional reduction vectorization by re-association in ifcvt [PR109088]

Message ID 20240130111634.322503-1-juzhe.zhong@rivai.ai
State New
Headers show
Series middle-end: Enhance conditional reduction vectorization by re-association in ifcvt [PR109088] | expand

Commit Message

钟居哲 Jan. 30, 2024, 11:16 a.m. UTC
This patch targets GCC-15.

Consider this following case:

unsigned int
single_loop_with_if_condition (unsigned int *restrict a, unsigned int *restrict b,
			       unsigned int *restrict c, unsigned int loop_size)
{
  unsigned int result = 0;
  for (unsigned int i = 0; i < loop_size; i++)
    {
      if (a[i] > b[i])
	{
	  result += a[i] + 0xa - c[i];
	}
    }
  return result;
}

After investigation of LLVM, I find LLVM re-associate such case to make it easier
to be vectorized.

Take RISC-V ASM as example.

Before this patch:

        beq     a3,zero,.L5
        slli    a5,a3,32
        srli    a3,a5,30
        mv      a4,a0
        add     a7,a0,a3
        li      a0,0
.L4:
        lw      a3,0(a4)
        addiw   a5,a0,10
        lw      a6,0(a1)
        addi    a4,a4,4
        addw    a5,a5,a3
        bgeu    a6,a3,.L3
        lw      a0,0(a2)
        subw    a0,a5,a0
.L3:
        addi    a1,a1,4
        addi    a2,a2,4
        bne     a7,a4,.L4
        ret
.L5:
        li      a0,0
        ret

After this patch:

	beq	a3,zero,.L4
	slli	a3,a3,32
	srli	a3,a3,32
	vsetvli	a5,zero,e32,m1,ta,ma
	vmv.v.i	v2,0
.L3:
	vsetvli	a5,a3,e32,m1,tu,mu
	slli	a4,a5,2
	sub	a3,a3,a5
	vle32.v	v3,0(a0)
	vle32.v	v0,0(a1)
	add	a0,a0,a4
	vmsgtu.vv	v0,v3,v0
	add	a1,a1,a4
	vle32.v	v1,0(a2),v0.t
	add	a2,a2,a4
	vadd.vi	v1,v1,-10
	vsub.vv	v1,v1,v3
	vadd.vv	v2,v2,v1,v0.t
	bne	a3,zero,.L3
	li	a5,0
	vsetivli	zero,1,e32,m1,ta,ma
	vmv.s.x	v1,a5
	vsetvli	a5,zero,e32,m1,ta,ma
	vredsum.vs	v2,v2,v1
	vmv.x.s	a0,v2
	ret

	PR middle-end/109088

gcc/ChangeLog:

	* tree-if-conv.cc (is_cond_scalar_reduction): Enhance conditional reduction.
	(convert_scalar_cond_reduction): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.dg/vect/pr109088-1.c: New test.
	* gcc.dg/vect/pr109088-2.c: New test.
	* gcc.dg/vect/pr109088-3.c: New test.
	* gcc.dg/vect/pr109088-4.c: New test.
	* gcc.dg/vect/pr109088-5.c: New test.

---
 gcc/testsuite/gcc.dg/vect/pr109088-1.c | 201 ++++++++++++++++
 gcc/testsuite/gcc.dg/vect/pr109088-2.c | 202 ++++++++++++++++
 gcc/testsuite/gcc.dg/vect/pr109088-3.c | 314 +++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/vect/pr109088-4.c |  84 +++++++
 gcc/testsuite/gcc.dg/vect/pr109088-5.c |  96 ++++++++
 gcc/tree-if-conv.cc                    | 150 +++++++++++-
 6 files changed, 1042 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr109088-1.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr109088-2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr109088-3.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr109088-4.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr109088-5.c
diff mbox series

Patch

diff --git a/gcc/testsuite/gcc.dg/vect/pr109088-1.c b/gcc/testsuite/gcc.dg/vect/pr109088-1.c
new file mode 100644
index 00000000000..6772e908535
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr109088-1.c
@@ -0,0 +1,201 @@ 
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_condition } */
+
+#include "tree-vect.h"
+
+#define N 27
+
+#define COND_REDUC(NAME, TYPE, OP)                                             \
+  TYPE __attribute__ ((noipa))                                                 \
+  cond_##NAME (TYPE *__restrict a, int *__restrict cond1,                      \
+	       int *__restrict cond2, TYPE init, int n)                        \
+  {                                                                            \
+    TYPE result = init;                                                        \
+    for (int i = 0; i < n; i++)                                                \
+      if (cond1[i] > cond2[i])                                                 \
+	result OP a[i];                                                        \
+    return result;                                                             \
+  }
+
+COND_REDUC (reduc_sum_char, char, +=)
+COND_REDUC (reduc_sum_short, short, +=)
+COND_REDUC (reduc_sum_int, int, +=)
+COND_REDUC (reduc_sum_long, long, +=)
+COND_REDUC (reduc_and_char, char, &=)
+COND_REDUC (reduc_and_short, short, &=)
+COND_REDUC (reduc_and_int, int, &=)
+COND_REDUC (reduc_and_long, long, &=)
+COND_REDUC (reduc_ior_char, char, |=)
+COND_REDUC (reduc_ior_short, short, |=)
+COND_REDUC (reduc_ior_int, int, |=)
+COND_REDUC (reduc_ior_long, long, |=)
+COND_REDUC (reduc_xor_char, char, ^=)
+COND_REDUC (reduc_xor_short, short, ^=)
+COND_REDUC (reduc_xor_int, int, ^=)
+COND_REDUC (reduc_xor_long, long, ^=)
+
+int
+main (void)
+{
+  check_vect ();
+  int cond1[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1,  2,  3, 4,
+		  5,  6,  7,  8,  9,  10, 21, 22, 23, 24, 25, 26, 27};
+  int cond2[N] = {15, 5,  6,  7,  8,  9,  16, 17, 18, 19, 20, 11, 12, 10,
+		  21, 22, 23, 24, 25, 26, 27, 1,  2,  3,  4,  13, 14};
+
+  char a_char[N]
+    = {55, 66, 77,  -88, 111, -9,   109, 37,  23, -101, 22, 37, -56, 67,
+       70, 50, -95, 87,	 73,  -123, -59, 107, 91, 27,	72, 29, 58};
+  short a_short[N] = {551,   662,   773,  -881, 1113, -97, 1094, 378,  237,
+		      -1013, 224,   376,  -562, 673,  705, 508,	 -956, 877,
+		      734,   -1235, -590, 1071, 910,  270, 726,	 298,  589};
+  int a_int[N]
+    = {5510,   6626,   7738,  -8819, 11133, -974, 10947, 3789,	2373,
+       -10132, 2245,   3767,  -5627, 6738,  7059, 5081,	 -9567, 8777,
+       7345,   -12350, -5909, 10710, 9104,  2704, 7263,	 2987,	5898};
+  long a_long[N]
+    = {55106,	66266,	 77387,	 -88198, 111339, -9740, 109475, 37890,	23730,
+       -101326, 22457,	 37679,	 -56270, 67383,	 70593, 50813,	-95677, 87773,
+       73457,	-123501, -59091, 107101, 91049,	 27049, 72639,	29877,	58987};
+
+  char init_char = 7;
+  short init_short = 77;
+  int init_int = 777;
+  long init_long = 777;
+
+#define COND_REDUC_GOLDEN(NAME, TYPE, OP)                                      \
+  for (int i = 0; i < N; i++)                                                  \
+    if (cond1[i] > cond2[i])                                                   \
+      result_gold_##NAME OP a_##TYPE[i];
+
+  char result_reduc_sum_char
+    = cond_reduc_sum_char (a_char, cond1, cond2, init_char, N);
+  char result_gold_reduc_sum_char = init_char;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_sum_char, char, +=)
+  if (result_reduc_sum_char != result_gold_reduc_sum_char)
+    __builtin_abort ();
+
+  short result_reduc_sum_short
+    = cond_reduc_sum_short (a_short, cond1, cond2, init_short, N);
+  short result_gold_reduc_sum_short = init_short;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_sum_short, short, +=)
+  if (result_reduc_sum_short != result_gold_reduc_sum_short)
+    __builtin_abort ();
+
+  int result_reduc_sum_int
+    = cond_reduc_sum_int (a_int, cond1, cond2, init_int, N);
+  int result_gold_reduc_sum_int = init_int;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_sum_int, int, +=)
+  if (result_reduc_sum_int != result_gold_reduc_sum_int)
+    __builtin_abort ();
+
+  long result_reduc_sum_long
+    = cond_reduc_sum_long (a_long, cond1, cond2, init_long, N);
+  long result_gold_reduc_sum_long = init_long;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_sum_long, long, +=)
+  if (result_reduc_sum_long != result_gold_reduc_sum_long)
+    __builtin_abort ();
+
+  char result_reduc_and_char
+    = cond_reduc_and_char (a_char, cond1, cond2, init_char, N);
+  char result_gold_reduc_and_char = init_char;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_and_char, char, &=)
+  if (result_reduc_and_char != result_gold_reduc_and_char)
+    __builtin_abort ();
+
+  short result_reduc_and_short
+    = cond_reduc_and_short (a_short, cond1, cond2, init_short, N);
+  short result_gold_reduc_and_short = init_short;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_and_short, short, &=)
+  if (result_reduc_and_short != result_gold_reduc_and_short)
+    __builtin_abort ();
+
+  int result_reduc_and_int
+    = cond_reduc_and_int (a_int, cond1, cond2, init_int, N);
+  int result_gold_reduc_and_int = init_int;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_and_int, int, &=)
+  if (result_reduc_and_int != result_gold_reduc_and_int)
+    __builtin_abort ();
+
+  long result_reduc_and_long
+    = cond_reduc_and_long (a_long, cond1, cond2, init_long, N);
+  long result_gold_reduc_and_long = init_long;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_and_long, long, &=)
+  if (result_reduc_and_long != result_gold_reduc_and_long)
+    __builtin_abort ();
+
+  char result_reduc_ior_char
+    = cond_reduc_ior_char (a_char, cond1, cond2, init_char, N);
+  char result_gold_reduc_ior_char = init_char;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_ior_char, char, |=)
+  if (result_reduc_ior_char != result_gold_reduc_ior_char)
+    __builtin_abort ();
+
+  short result_reduc_ior_short
+    = cond_reduc_ior_short (a_short, cond1, cond2, init_short, N);
+  short result_gold_reduc_ior_short = init_short;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_ior_short, short, |=)
+  if (result_reduc_ior_short != result_gold_reduc_ior_short)
+    __builtin_abort ();
+
+  int result_reduc_ior_int
+    = cond_reduc_ior_int (a_int, cond1, cond2, init_int, N);
+  int result_gold_reduc_ior_int = init_int;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_ior_int, int, |=)
+  if (result_reduc_ior_int != result_gold_reduc_ior_int)
+    __builtin_abort ();
+
+  long result_reduc_ior_long
+    = cond_reduc_ior_long (a_long, cond1, cond2, init_long, N);
+  long result_gold_reduc_ior_long = init_long;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_ior_long, long, |=)
+  if (result_reduc_ior_long != result_gold_reduc_ior_long)
+    __builtin_abort ();
+
+  char result_reduc_xor_char
+    = cond_reduc_xor_char (a_char, cond1, cond2, init_char, N);
+  char result_gold_reduc_xor_char = init_char;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_xor_char, char, ^=)
+  if (result_reduc_xor_char != result_gold_reduc_xor_char)
+    __builtin_abort ();
+
+  short result_reduc_xor_short
+    = cond_reduc_xor_short (a_short, cond1, cond2, init_short, N);
+  short result_gold_reduc_xor_short = init_short;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_xor_short, short, ^=)
+  if (result_reduc_xor_short != result_gold_reduc_xor_short)
+    __builtin_abort ();
+
+  int result_reduc_xor_int
+    = cond_reduc_xor_int (a_int, cond1, cond2, init_int, N);
+  int result_gold_reduc_xor_int = init_int;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_xor_int, int, ^=)
+  if (result_reduc_xor_int != result_gold_reduc_xor_int)
+    __builtin_abort ();
+
+  long result_reduc_xor_long
+    = cond_reduc_xor_long (a_long, cond1, cond2, init_long, N);
+  long result_gold_reduc_xor_long = init_long;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_xor_long, long, ^=)
+  if (result_reduc_xor_long != result_gold_reduc_xor_long)
+    __builtin_abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 16 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr109088-2.c b/gcc/testsuite/gcc.dg/vect/pr109088-2.c
new file mode 100644
index 00000000000..94da43b1fdb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr109088-2.c
@@ -0,0 +1,202 @@ 
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_condition } */
+/* { dg-additional-options "-fwrapv -fno-trapv" } */
+
+#include "tree-vect.h"
+
+#define N 27
+
+#define COND_REDUC(NAME, TYPE, OP1, OP2)                                       \
+  TYPE __attribute__ ((noipa))                                                 \
+  cond_##NAME (TYPE *__restrict a, int *__restrict cond1,                      \
+	       int *__restrict cond2, TYPE init, int n)                        \
+  {                                                                            \
+    TYPE result = init;                                                        \
+    for (int i = 0; i < n; i++)                                                \
+      if (cond1[i] > cond2[i])                                                 \
+	result OP1 a[i] OP2 init;                                              \
+    return result;                                                             \
+  }
+
+COND_REDUC (reduc_sum_char, char, +=, +)
+COND_REDUC (reduc_sum_short, short, +=, +)
+COND_REDUC (reduc_sum_int, int, +=, +)
+COND_REDUC (reduc_sum_long, long, +=, +)
+COND_REDUC (reduc_and_char, char, &=, &)
+COND_REDUC (reduc_and_short, short, &=, &)
+COND_REDUC (reduc_and_int, int, &=, &)
+COND_REDUC (reduc_and_long, long, &=, &)
+COND_REDUC (reduc_ior_char, char, |=, |)
+COND_REDUC (reduc_ior_short, short, |=, |)
+COND_REDUC (reduc_ior_int, int, |=, |)
+COND_REDUC (reduc_ior_long, long, |=, |)
+COND_REDUC (reduc_xor_char, char, ^=, ^)
+COND_REDUC (reduc_xor_short, short, ^=, ^)
+COND_REDUC (reduc_xor_int, int, ^=, ^)
+COND_REDUC (reduc_xor_long, long, ^=, ^)
+
+int
+main (void)
+{
+  check_vect ();
+  int cond1[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1,  2,  3, 4,
+		  5,  6,  7,  8,  9,  10, 21, 22, 23, 24, 25, 26, 27};
+  int cond2[N] = {15, 5,  6,  7,  8,  9,  16, 17, 18, 19, 20, 11, 12, 10,
+		  21, 22, 23, 24, 25, 26, 27, 1,  2,  3,  4,  13, 14};
+
+  char a_char[N]
+    = {55, 66, 77,  -88, 111, -9,   109, 37,  23, -101, 22, 37, -56, 67,
+       70, 50, -95, 87,	 73,  -123, -59, 107, 91, 27,	72, 29, 58};
+  short a_short[N] = {551,   662,   773,  -881, 1113, -97, 1094, 378,  237,
+		      -1013, 224,   376,  -562, 673,  705, 508,	 -956, 877,
+		      734,   -1235, -590, 1071, 910,  270, 726,	 298,  589};
+  int a_int[N]
+    = {5510,   6626,   7738,  -8819, 11133, -974, 10947, 3789,	2373,
+       -10132, 2245,   3767,  -5627, 6738,  7059, 5081,	 -9567, 8777,
+       7345,   -12350, -5909, 10710, 9104,  2704, 7263,	 2987,	5898};
+  long a_long[N]
+    = {55106,	66266,	 77387,	 -88198, 111339, -9740, 109475, 37890,	23730,
+       -101326, 22457,	 37679,	 -56270, 67383,	 70593, 50813,	-95677, 87773,
+       73457,	-123501, -59091, 107101, 91049,	 27049, 72639,	29877,	58987};
+
+  char init_char = 7;
+  short init_short = 77;
+  int init_int = 777;
+  long init_long = 777;
+
+#define COND_REDUC_GOLDEN(NAME, TYPE, OP1, OP2)                                \
+  for (int i = 0; i < N; i++)                                                  \
+    if (cond1[i] > cond2[i])                                                   \
+      result_gold_##NAME OP1 a_##TYPE[i] OP2 init_##TYPE;
+
+  char result_reduc_sum_char
+    = cond_reduc_sum_char (a_char, cond1, cond2, init_char, N);
+  char result_gold_reduc_sum_char = init_char;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_sum_char, char, +=, +)
+  if (result_reduc_sum_char != result_gold_reduc_sum_char)
+    __builtin_abort ();
+
+  short result_reduc_sum_short
+    = cond_reduc_sum_short (a_short, cond1, cond2, init_short, N);
+  short result_gold_reduc_sum_short = init_short;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_sum_short, short, +=, +)
+  if (result_reduc_sum_short != result_gold_reduc_sum_short)
+    __builtin_abort ();
+
+  int result_reduc_sum_int
+    = cond_reduc_sum_int (a_int, cond1, cond2, init_int, N);
+  int result_gold_reduc_sum_int = init_int;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_sum_int, int, +=, +)
+  if (result_reduc_sum_int != result_gold_reduc_sum_int)
+    __builtin_abort ();
+
+  long result_reduc_sum_long
+    = cond_reduc_sum_long (a_long, cond1, cond2, init_long, N);
+  long result_gold_reduc_sum_long = init_long;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_sum_long, long, +=, +)
+  if (result_reduc_sum_long != result_gold_reduc_sum_long)
+    __builtin_abort ();
+
+  char result_reduc_and_char
+    = cond_reduc_and_char (a_char, cond1, cond2, init_char, N);
+  char result_gold_reduc_and_char = init_char;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_and_char, char, &=, &)
+  if (result_reduc_and_char != result_gold_reduc_and_char)
+    __builtin_abort ();
+
+  short result_reduc_and_short
+    = cond_reduc_and_short (a_short, cond1, cond2, init_short, N);
+  short result_gold_reduc_and_short = init_short;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_and_short, short, &=, &)
+  if (result_reduc_and_short != result_gold_reduc_and_short)
+    __builtin_abort ();
+
+  int result_reduc_and_int
+    = cond_reduc_and_int (a_int, cond1, cond2, init_int, N);
+  int result_gold_reduc_and_int = init_int;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_and_int, int, &=, &)
+  if (result_reduc_and_int != result_gold_reduc_and_int)
+    __builtin_abort ();
+
+  long result_reduc_and_long
+    = cond_reduc_and_long (a_long, cond1, cond2, init_long, N);
+  long result_gold_reduc_and_long = init_long;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_and_long, long, &=, &)
+  if (result_reduc_and_long != result_gold_reduc_and_long)
+    __builtin_abort ();
+
+  char result_reduc_ior_char
+    = cond_reduc_ior_char (a_char, cond1, cond2, init_char, N);
+  char result_gold_reduc_ior_char = init_char;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_ior_char, char, |=, |)
+  if (result_reduc_ior_char != result_gold_reduc_ior_char)
+    __builtin_abort ();
+
+  short result_reduc_ior_short
+    = cond_reduc_ior_short (a_short, cond1, cond2, init_short, N);
+  short result_gold_reduc_ior_short = init_short;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_ior_short, short, |=, |)
+  if (result_reduc_ior_short != result_gold_reduc_ior_short)
+    __builtin_abort ();
+
+  int result_reduc_ior_int
+    = cond_reduc_ior_int (a_int, cond1, cond2, init_int, N);
+  int result_gold_reduc_ior_int = init_int;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_ior_int, int, |=, |)
+  if (result_reduc_ior_int != result_gold_reduc_ior_int)
+    __builtin_abort ();
+
+  long result_reduc_ior_long
+    = cond_reduc_ior_long (a_long, cond1, cond2, init_long, N);
+  long result_gold_reduc_ior_long = init_long;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_ior_long, long, |=, |)
+  if (result_reduc_ior_long != result_gold_reduc_ior_long)
+    __builtin_abort ();
+
+  char result_reduc_xor_char
+    = cond_reduc_xor_char (a_char, cond1, cond2, init_char, N);
+  char result_gold_reduc_xor_char = init_char;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_xor_char, char, ^=, ^)
+  if (result_reduc_xor_char != result_gold_reduc_xor_char)
+    __builtin_abort ();
+
+  short result_reduc_xor_short
+    = cond_reduc_xor_short (a_short, cond1, cond2, init_short, N);
+  short result_gold_reduc_xor_short = init_short;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_xor_short, short, ^=, ^)
+  if (result_reduc_xor_short != result_gold_reduc_xor_short)
+    __builtin_abort ();
+
+  int result_reduc_xor_int
+    = cond_reduc_xor_int (a_int, cond1, cond2, init_int, N);
+  int result_gold_reduc_xor_int = init_int;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_xor_int, int, ^=, ^)
+  if (result_reduc_xor_int != result_gold_reduc_xor_int)
+    __builtin_abort ();
+
+  long result_reduc_xor_long
+    = cond_reduc_xor_long (a_long, cond1, cond2, init_long, N);
+  long result_gold_reduc_xor_long = init_long;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_xor_long, long, ^=, ^)
+  if (result_reduc_xor_long != result_gold_reduc_xor_long)
+    __builtin_abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 16 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr109088-3.c b/gcc/testsuite/gcc.dg/vect/pr109088-3.c
new file mode 100644
index 00000000000..31bc64ec0d8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr109088-3.c
@@ -0,0 +1,314 @@ 
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_condition } */
+/* { dg-additional-options "-fwrapv -fno-trapv" } */
+
+#include "tree-vect.h"
+
+#define N 27
+
+#define COND_REDUC(NAME, TYPE, OP1, OP2)                                       \
+  TYPE __attribute__ ((noipa))                                                 \
+  cond_##NAME (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,     \
+	       TYPE *__restrict d, TYPE *__restrict e, TYPE *__restrict f,     \
+	       TYPE *__restrict g, int *__restrict cond1,                      \
+	       int *__restrict cond2, TYPE init, int n)                        \
+  {                                                                            \
+    TYPE result = init;                                                        \
+    for (int i = 0; i < n; i++)                                                \
+      if (cond1[i] > cond2[i])                                                 \
+	result OP1 a[i] OP2 init OP2                                           \
+	  b[i] OP2 c[i] OP2 d[i] OP2 e[i] OP2 f[i] OP2 g[i];                   \
+    return result;                                                             \
+  }
+
+COND_REDUC (reduc_sum_char, char, +=, +)
+COND_REDUC (reduc_sum_short, short, +=, +)
+COND_REDUC (reduc_sum_int, int, +=, +)
+COND_REDUC (reduc_sum_long, long, +=, +)
+COND_REDUC (reduc_and_char, char, &=, &)
+COND_REDUC (reduc_and_short, short, &=, &)
+COND_REDUC (reduc_and_int, int, &=, &)
+COND_REDUC (reduc_and_long, long, &=, &)
+COND_REDUC (reduc_ior_char, char, |=, |)
+COND_REDUC (reduc_ior_short, short, |=, |)
+COND_REDUC (reduc_ior_int, int, |=, |)
+COND_REDUC (reduc_ior_long, long, |=, |)
+COND_REDUC (reduc_xor_char, char, ^=, ^)
+COND_REDUC (reduc_xor_short, short, ^=, ^)
+COND_REDUC (reduc_xor_int, int, ^=, ^)
+COND_REDUC (reduc_xor_long, long, ^=, ^)
+
+int
+main (void)
+{
+  check_vect ();
+  int cond1[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1,  2,  3, 4,
+		  5,  6,  7,  8,  9,  10, 21, 22, 23, 24, 25, 26, 27};
+  int cond2[N] = {15, 5,  6,  7,  8,  9,  16, 17, 18, 19, 20, 11, 12, 10,
+		  21, 22, 23, 24, 25, 26, 27, 1,  2,  3,  4,  13, 14};
+
+  char a_char[N]
+    = {55, 66, 77,  -88, 111, -9,   109, 37,  23, -101, 22, 37, -56, 67,
+       70, 50, -95, 87,	 73,  -123, -59, 107, 91, 27,	72, 29, 58};
+  short a_short[N] = {551,   662,   773,  -881, 1113, -97, 1094, 378,  237,
+		      -1013, 224,   376,  -562, 673,  705, 508,	 -956, 877,
+		      734,   -1235, -590, 1071, 910,  270, 726,	 298,  589};
+  int a_int[N]
+    = {5510,   6626,   7738,  -8819, 11133, -974, 10947, 3789,	2373,
+       -10132, 2245,   3767,  -5627, 6738,  7059, 5081,	 -9567, 8777,
+       7345,   -12350, -5909, 10710, 9104,  2704, 7263,	 2987,	5898};
+  long a_long[N]
+    = {55106,	66266,	 77387,	 -88198, 111339, -9740, 109475, 37890,	23730,
+       -101326, 22457,	 37679,	 -56270, 67383,	 70593, 50813,	-95677, 87773,
+       73457,	-123501, -59091, 107101, 91049,	 27049, 72639,	29877,	58987};
+
+  char b_char[N]
+    = {55, 66, 77,  -88, 111, -9,   109, 37,  23, -101, 22, 37, -56, 67,
+       70, 50, -95, 87,	 73,  -123, -59, 107, 91, 27,	72, 29, 58};
+  short b_short[N] = {551,   662,   773,  -881, 1113, -97, 1094, 378,  237,
+		      -1013, 224,   376,  -562, 673,  705, 508,	 -956, 877,
+		      734,   -1235, -590, 1071, 910,  270, 726,	 298,  589};
+  int b_int[N]
+    = {5510,   6626,   7738,  -8819, 11133, -974, 10947, 3789,	2373,
+       -10132, 2245,   3767,  -5627, 6738,  7059, 5081,	 -9567, 8777,
+       7345,   -12350, -5909, 10710, 9104,  2704, 7263,	 2987,	5898};
+  long b_long[N]
+    = {55106,	66266,	 77387,	 -88198, 111339, -9740, 109475, 37890,	23730,
+       -101326, 22457,	 37679,	 -56270, 67383,	 70593, 50813,	-95677, 87773,
+       73457,	-123501, -59091, 107101, 91049,	 27049, 72639,	29877,	58987};
+
+  char c_char[N]
+    = {55, 66, 77,  -88, 111, -9,   109, 37,  23, -101, 22, 37, -56, 67,
+       70, 50, -95, 87,	 73,  -123, -59, 107, 91, 27,	72, 29, 58};
+  short c_short[N] = {551,   662,   773,  -881, 1113, -97, 1094, 378,  237,
+		      -1013, 224,   376,  -562, 673,  705, 508,	 -956, 877,
+		      734,   -1235, -590, 1071, 910,  270, 726,	 298,  589};
+  int c_int[N]
+    = {5510,   6626,   7738,  -8819, 11133, -974, 10947, 3789,	2373,
+       -10132, 2245,   3767,  -5627, 6738,  7059, 5081,	 -9567, 8777,
+       7345,   -12350, -5909, 10710, 9104,  2704, 7263,	 2987,	5898};
+  long c_long[N]
+    = {55106,	66266,	 77387,	 -88198, 111339, -9740, 109475, 37890,	23730,
+       -101326, 22457,	 37679,	 -56270, 67383,	 70593, 50813,	-95677, 87773,
+       73457,	-123501, -59091, 107101, 91049,	 27049, 72639,	29877,	58987};
+
+  char d_char[N]
+    = {55, 66, 77,  -88, 111, -9,   109, 37,  23, -101, 22, 37, -56, 67,
+       70, 50, -95, 87,	 73,  -123, -59, 107, 91, 27,	72, 29, 58};
+  short d_short[N] = {551,   662,   773,  -881, 1113, -97, 1094, 378,  237,
+		      -1013, 224,   376,  -562, 673,  705, 508,	 -956, 877,
+		      734,   -1235, -590, 1071, 910,  270, 726,	 298,  589};
+  int d_int[N]
+    = {5510,   6626,   7738,  -8819, 11133, -974, 10947, 3789,	2373,
+       -10132, 2245,   3767,  -5627, 6738,  7059, 5081,	 -9567, 8777,
+       7345,   -12350, -5909, 10710, 9104,  2704, 7263,	 2987,	5898};
+  long d_long[N]
+    = {55106,	66266,	 77387,	 -88198, 111339, -9740, 109475, 37890,	23730,
+       -101326, 22457,	 37679,	 -56270, 67383,	 70593, 50813,	-95677, 87773,
+       73457,	-123501, -59091, 107101, 91049,	 27049, 72639,	29877,	58987};
+
+  char e_char[N]
+    = {55, 66, 77,  -88, 111, -9,   109, 37,  23, -101, 22, 37, -56, 67,
+       70, 50, -95, 87,	 73,  -123, -59, 107, 91, 27,	72, 29, 58};
+  short e_short[N] = {551,   662,   773,  -881, 1113, -97, 1094, 378,  237,
+		      -1013, 224,   376,  -562, 673,  705, 508,	 -956, 877,
+		      734,   -1235, -590, 1071, 910,  270, 726,	 298,  589};
+  int e_int[N]
+    = {5510,   6626,   7738,  -8819, 11133, -974, 10947, 3789,	2373,
+       -10132, 2245,   3767,  -5627, 6738,  7059, 5081,	 -9567, 8777,
+       7345,   -12350, -5909, 10710, 9104,  2704, 7263,	 2987,	5898};
+  long e_long[N]
+    = {55106,	66266,	 77387,	 -88198, 111339, -9740, 109475, 37890,	23730,
+       -101326, 22457,	 37679,	 -56270, 67383,	 70593, 50813,	-95677, 87773,
+       73457,	-123501, -59091, 107101, 91049,	 27049, 72639,	29877,	58987};
+
+  char f_char[N]
+    = {55, 66, 77,  -88, 111, -9,   109, 37,  23, -101, 22, 37, -56, 67,
+       70, 50, -95, 87,	 73,  -123, -59, 107, 91, 27,	72, 29, 58};
+  short f_short[N] = {551,   662,   773,  -881, 1113, -97, 1094, 378,  237,
+		      -1013, 224,   376,  -562, 673,  705, 508,	 -956, 877,
+		      734,   -1235, -590, 1071, 910,  270, 726,	 298,  589};
+  int f_int[N]
+    = {5510,   6626,   7738,  -8819, 11133, -974, 10947, 3789,	2373,
+       -10132, 2245,   3767,  -5627, 6738,  7059, 5081,	 -9567, 8777,
+       7345,   -12350, -5909, 10710, 9104,  2704, 7263,	 2987,	5898};
+  long f_long[N]
+    = {55106,	66266,	 77387,	 -88198, 111339, -9740, 109475, 37890,	23730,
+       -101326, 22457,	 37679,	 -56270, 67383,	 70593, 50813,	-95677, 87773,
+       73457,	-123501, -59091, 107101, 91049,	 27049, 72639,	29877,	58987};
+
+  char g_char[N]
+    = {55, 66, 77,  -88, 111, -9,   109, 37,  23, -101, 22, 37, -56, 67,
+       70, 50, -95, 87,	 73,  -123, -59, 107, 91, 27,	72, 29, 58};
+  short g_short[N] = {551,   662,   773,  -881, 1113, -97, 1094, 378,  237,
+		      -1013, 224,   376,  -562, 673,  705, 508,	 -956, 877,
+		      734,   -1235, -590, 1071, 910,  270, 726,	 298,  589};
+  int g_int[N]
+    = {5510,   6626,   7738,  -8819, 11133, -974, 10947, 3789,	2373,
+       -10132, 2245,   3767,  -5627, 6738,  7059, 5081,	 -9567, 8777,
+       7345,   -12350, -5909, 10710, 9104,  2704, 7263,	 2987,	5898};
+  long g_long[N]
+    = {55106,	66266,	 77387,	 -88198, 111339, -9740, 109475, 37890,	23730,
+       -101326, 22457,	 37679,	 -56270, 67383,	 70593, 50813,	-95677, 87773,
+       73457,	-123501, -59091, 107101, 91049,	 27049, 72639,	29877,	58987};
+
+  char init_char = 7;
+  short init_short = 77;
+  int init_int = 777;
+  long init_long = 777;
+
+#define COND_REDUC_GOLDEN(NAME, TYPE, OP1, OP2)                                \
+  for (int i = 0; i < N; i++)                                                  \
+    if (cond1[i] > cond2[i])                                                   \
+      result_gold_##NAME OP1 a_##TYPE[i] OP2 init_##TYPE OP2                   \
+	b_##TYPE[i] OP2 c_##TYPE[i] OP2 d_##TYPE[i] OP2 e_##TYPE[i] OP2        \
+	  f_##TYPE[i] OP2 g_##TYPE[i];
+
+  char result_reduc_sum_char
+    = cond_reduc_sum_char (a_char, b_char, c_char, d_char, e_char, f_char,
+			   g_char, cond1, cond2, init_char, N);
+  char result_gold_reduc_sum_char = init_char;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_sum_char, char, +=, +)
+  if (result_reduc_sum_char != result_gold_reduc_sum_char)
+    __builtin_abort ();
+
+  short result_reduc_sum_short
+    = cond_reduc_sum_short (a_short, b_short, c_short, d_short, e_short,
+			    f_short, g_short, cond1, cond2, init_short, N);
+  short result_gold_reduc_sum_short = init_short;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_sum_short, short, +=, +)
+  if (result_reduc_sum_short != result_gold_reduc_sum_short)
+    __builtin_abort ();
+
+  int result_reduc_sum_int
+    = cond_reduc_sum_int (a_int, b_int, c_int, d_int, e_int, f_int, g_int,
+			  cond1, cond2, init_int, N);
+  int result_gold_reduc_sum_int = init_int;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_sum_int, int, +=, +)
+  if (result_reduc_sum_int != result_gold_reduc_sum_int)
+    __builtin_abort ();
+
+  long result_reduc_sum_long
+    = cond_reduc_sum_long (a_long, b_long, c_long, d_long, e_long, f_long,
+			   g_long, cond1, cond2, init_long, N);
+  long result_gold_reduc_sum_long = init_long;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_sum_long, long, +=, +)
+  if (result_reduc_sum_long != result_gold_reduc_sum_long)
+    __builtin_abort ();
+
+  char result_reduc_and_char
+    = cond_reduc_and_char (a_char, b_char, c_char, d_char, e_char, f_char,
+			   g_char, cond1, cond2, init_char, N);
+  char result_gold_reduc_and_char = init_char;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_and_char, char, &=, &)
+  if (result_reduc_and_char != result_gold_reduc_and_char)
+    __builtin_abort ();
+
+  short result_reduc_and_short
+    = cond_reduc_and_short (a_short, b_short, c_short, d_short, e_short,
+			    f_short, g_short, cond1, cond2, init_short, N);
+  short result_gold_reduc_and_short = init_short;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_and_short, short, &=, &)
+  if (result_reduc_and_short != result_gold_reduc_and_short)
+    __builtin_abort ();
+
+  int result_reduc_and_int
+    = cond_reduc_and_int (a_int, b_int, c_int, d_int, e_int, f_int, g_int,
+			  cond1, cond2, init_int, N);
+  int result_gold_reduc_and_int = init_int;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_and_int, int, &=, &)
+  if (result_reduc_and_int != result_gold_reduc_and_int)
+    __builtin_abort ();
+
+  long result_reduc_and_long
+    = cond_reduc_and_long (a_long, b_long, c_long, d_long, e_long, f_long,
+			   g_long, cond1, cond2, init_long, N);
+  long result_gold_reduc_and_long = init_long;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_and_long, long, &=, &)
+  if (result_reduc_and_long != result_gold_reduc_and_long)
+    __builtin_abort ();
+
+  char result_reduc_ior_char
+    = cond_reduc_ior_char (a_char, b_char, c_char, d_char, e_char, f_char,
+			   g_char, cond1, cond2, init_char, N);
+  char result_gold_reduc_ior_char = init_char;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_ior_char, char, |=, |)
+  if (result_reduc_ior_char != result_gold_reduc_ior_char)
+    __builtin_abort ();
+
+  short result_reduc_ior_short
+    = cond_reduc_ior_short (a_short, b_short, c_short, d_short, e_short,
+			    f_short, g_short, cond1, cond2, init_short, N);
+  short result_gold_reduc_ior_short = init_short;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_ior_short, short, |=, |)
+  if (result_reduc_ior_short != result_gold_reduc_ior_short)
+    __builtin_abort ();
+
+  int result_reduc_ior_int
+    = cond_reduc_ior_int (a_int, b_int, c_int, d_int, e_int, f_int, g_int,
+			  cond1, cond2, init_int, N);
+  int result_gold_reduc_ior_int = init_int;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_ior_int, int, |=, |)
+  if (result_reduc_ior_int != result_gold_reduc_ior_int)
+    __builtin_abort ();
+
+  long result_reduc_ior_long
+    = cond_reduc_ior_long (a_long, b_long, c_long, d_long, e_long, f_long,
+			   g_long, cond1, cond2, init_long, N);
+  long result_gold_reduc_ior_long = init_long;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_ior_long, long, |=, |)
+  if (result_reduc_ior_long != result_gold_reduc_ior_long)
+    __builtin_abort ();
+
+  char result_reduc_xor_char
+    = cond_reduc_xor_char (a_char, b_char, c_char, d_char, e_char, f_char,
+			   g_char, cond1, cond2, init_char, N);
+  char result_gold_reduc_xor_char = init_char;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_xor_char, char, ^=, ^)
+  if (result_reduc_xor_char != result_gold_reduc_xor_char)
+    __builtin_abort ();
+
+  short result_reduc_xor_short
+    = cond_reduc_xor_short (a_short, b_short, c_short, d_short, e_short,
+			    f_short, g_short, cond1, cond2, init_short, N);
+  short result_gold_reduc_xor_short = init_short;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_xor_short, short, ^=, ^)
+  if (result_reduc_xor_short != result_gold_reduc_xor_short)
+    __builtin_abort ();
+
+  int result_reduc_xor_int
+    = cond_reduc_xor_int (a_int, b_int, c_int, d_int, e_int, f_int, g_int,
+			  cond1, cond2, init_int, N);
+  int result_gold_reduc_xor_int = init_int;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_xor_int, int, ^=, ^)
+  if (result_reduc_xor_int != result_gold_reduc_xor_int)
+    __builtin_abort ();
+
+  long result_reduc_xor_long
+    = cond_reduc_xor_long (a_long, b_long, c_long, d_long, e_long, f_long,
+			   g_long, cond1, cond2, init_long, N);
+  long result_gold_reduc_xor_long = init_long;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_xor_long, long, ^=, ^)
+  if (result_reduc_xor_long != result_gold_reduc_xor_long)
+    __builtin_abort ();
+  return 0;
+}
+
+/* FIXME: It should be '16' instead of '15' but we failed to vectorize the 'reduc_sum_char' case.  */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 15 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr109088-4.c b/gcc/testsuite/gcc.dg/vect/pr109088-4.c
new file mode 100644
index 00000000000..eb5bbbac1f0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr109088-4.c
@@ -0,0 +1,84 @@ 
+/* { dg-require-effective-target vect_float } */
+/* { dg-require-effective-target vect_condition } */
+/* { dg-additional-options "-ffast-math" } */
+
+#include "tree-vect.h"
+
+#define N 27
+
+#define COND_REDUC(NAME, TYPE, OP1, OP2)                                       \
+  TYPE __attribute__ ((noipa))                                                 \
+  cond_##NAME (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,     \
+	       TYPE *__restrict d, TYPE *__restrict e, TYPE *__restrict f,     \
+	       TYPE *__restrict g, int *__restrict cond1,                      \
+	       int *__restrict cond2, TYPE init, int n)                        \
+  {                                                                            \
+    TYPE result = init;                                                        \
+    for (int i = 0; i < n; i++)                                                \
+      if (cond1[i] > cond2[i])                                                 \
+	result OP1 a[i] OP2 init OP2                                           \
+	  b[i] OP2 c[i] OP2 d[i] OP2 e[i] OP2 f[i] OP2 g[i];                   \
+    return result;                                                             \
+  }
+
+COND_REDUC (reduc_sum_float, float, +=, +)
+
+int
+main (void)
+{
+  check_vect ();
+  int cond1[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1,  2,  3, 4,
+		  5,  6,  7,  8,  9,  10, 21, 22, 23, 24, 25, 26, 27};
+  int cond2[N] = {15, 5,  6,  7,  8,  9,  16, 17, 18, 19, 20, 11, 12, 10,
+		  21, 22, 23, 24, 25, 26, 27, 1,  2,  3,  4,  13, 14};
+
+  float a_float[N]
+    = {55, 66, 77,  -88, 111, -9,   109, 37,  23, -101, 22, 37, -56, 67,
+       70, 50, -95, 87,	 73,  -123, -59, 107, 91, 27,	72, 29, 58};
+
+  float b_float[N]
+    = {55, 66, 77,  -88, 111, -9,   109, 37,  23, -101, 22, 37, -56, 67,
+       70, 50, -95, 87,	 73,  -123, -59, 107, 91, 27,	72, 29, 58};
+
+  float c_float[N]
+    = {55, 66, 77,  -88, 111, -9,   109, 37,  23, -101, 22, 37, -56, 67,
+       70, 50, -95, 87,	 73,  -123, -59, 107, 91, 27,	72, 29, 58};
+
+  float d_float[N]
+    = {55, 66, 77,  -88, 111, -9,   109, 37,  23, -101, 22, 37, -56, 67,
+       70, 50, -95, 87,	 73,  -123, -59, 107, 91, 27,	72, 29, 58};
+
+  float e_float[N]
+    = {55, 66, 77,  -88, 111, -9,   109, 37,  23, -101, 22, 37, -56, 67,
+       70, 50, -95, 87,	 73,  -123, -59, 107, 91, 27,	72, 29, 58};
+
+  float f_float[N]
+    = {55, 66, 77,  -88, 111, -9,   109, 37,  23, -101, 22, 37, -56, 67,
+       70, 50, -95, 87,	 73,  -123, -59, 107, 91, 27,	72, 29, 58};
+
+  float g_float[N]
+    = {55, 66, 77,  -88, 111, -9,   109, 37,  23, -101, 22, 37, -56, 67,
+       70, 50, -95, 87,	 73,  -123, -59, 107, 91, 27,	72, 29, 58};
+
+  float init_float = 7;
+
+#define COND_REDUC_GOLDEN(NAME, TYPE, OP1, OP2)                                \
+  for (int i = 0; i < N; i++)                                                  \
+    if (cond1[i] > cond2[i])                                                   \
+      result_gold_##NAME OP1 a_##TYPE[i] OP2 init_##TYPE OP2                   \
+	b_##TYPE[i] OP2 c_##TYPE[i] OP2 d_##TYPE[i] OP2 e_##TYPE[i] OP2        \
+	  f_##TYPE[i] OP2 g_##TYPE[i];
+
+  float result_reduc_sum_float
+    = cond_reduc_sum_float (a_float, b_float, c_float, d_float, e_float, f_float,
+			   g_float, cond1, cond2, init_float, N);
+  float result_gold_reduc_sum_float = init_float;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_sum_float, float, +=, +)
+  if (result_reduc_sum_float != result_gold_reduc_sum_float)
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr109088-5.c b/gcc/testsuite/gcc.dg/vect/pr109088-5.c
new file mode 100644
index 00000000000..99cc2d65a4c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr109088-5.c
@@ -0,0 +1,96 @@ 
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_condition } */
+/* { dg-additional-options "-fwrapv -fno-trapv" } */
+
+#include "tree-vect.h"
+
+#define N 27
+
+#define COND_REDUC(NAME, TYPE, OP1, OP2)                                       \
+  TYPE __attribute__ ((noipa))                                                 \
+  cond_##NAME (TYPE *__restrict a, int *__restrict cond1,                      \
+	       int *__restrict cond2, TYPE init, int n)                        \
+  {                                                                            \
+    TYPE result = init;                                                        \
+    for (int i = 0; i < n; i++)                                                \
+      if (cond1[i] > cond2[i])                                                 \
+	result OP1 a[i] OP2 init;                                              \
+    return result;                                                             \
+  }
+
+COND_REDUC (reduc_sum_char, char, -=, +)
+COND_REDUC (reduc_sum_short, short, -=, +)
+COND_REDUC (reduc_sum_int, int, -=, +)
+COND_REDUC (reduc_sum_long, long, -=, +)
+
+
+int
+main (void)
+{
+  check_vect ();
+  int cond1[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1,  2,  3, 4,
+		  5,  6,  7,  8,  9,  10, 21, 22, 23, 24, 25, 26, 27};
+  int cond2[N] = {15, 5,  6,  7,  8,  9,  16, 17, 18, 19, 20, 11, 12, 10,
+		  21, 22, 23, 24, 25, 26, 27, 1,  2,  3,  4,  13, 14};
+
+  char a_char[N]
+    = {55, 66, 77,  -88, 111, -9,   109, 37,  23, -101, 22, 37, -56, 67,
+       70, 50, -95, 87,	 73,  -123, -59, 107, 91, 27,	72, 29, 58};
+  short a_short[N] = {551,   662,   773,  -881, 1113, -97, 1094, 378,  237,
+		      -1013, 224,   376,  -562, 673,  705, 508,	 -956, 877,
+		      734,   -1235, -590, 1071, 910,  270, 726,	 298,  589};
+  int a_int[N]
+    = {5510,   6626,   7738,  -8819, 11133, -974, 10947, 3789,	2373,
+       -10132, 2245,   3767,  -5627, 6738,  7059, 5081,	 -9567, 8777,
+       7345,   -12350, -5909, 10710, 9104,  2704, 7263,	 2987,	5898};
+  long a_long[N]
+    = {55106,	66266,	 77387,	 -88198, 111339, -9740, 109475, 37890,	23730,
+       -101326, 22457,	 37679,	 -56270, 67383,	 70593, 50813,	-95677, 87773,
+       73457,	-123501, -59091, 107101, 91049,	 27049, 72639,	29877,	58987};
+
+  char init_char = 7;
+  short init_short = 77;
+  int init_int = 777;
+  long init_long = 777;
+
+#define COND_REDUC_GOLDEN(NAME, TYPE, OP1, OP2)                                \
+  for (int i = 0; i < N; i++)                                                  \
+    if (cond1[i] > cond2[i])                                                   \
+      result_gold_##NAME OP1 a_##TYPE[i] OP2 init_##TYPE;
+
+  char result_reduc_sum_char
+    = cond_reduc_sum_char (a_char, cond1, cond2, init_char, N);
+  char result_gold_reduc_sum_char = init_char;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_sum_char, char, -=, +)
+  if (result_reduc_sum_char != result_gold_reduc_sum_char)
+    __builtin_abort ();
+
+  short result_reduc_sum_short
+    = cond_reduc_sum_short (a_short, cond1, cond2, init_short, N);
+  short result_gold_reduc_sum_short = init_short;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_sum_short, short, -=, +)
+  if (result_reduc_sum_short != result_gold_reduc_sum_short)
+    __builtin_abort ();
+
+  int result_reduc_sum_int
+    = cond_reduc_sum_int (a_int, cond1, cond2, init_int, N);
+  int result_gold_reduc_sum_int = init_int;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_sum_int, int, -=, +)
+  if (result_reduc_sum_int != result_gold_reduc_sum_int)
+    __builtin_abort ();
+
+  long result_reduc_sum_long
+    = cond_reduc_sum_long (a_long, cond1, cond2, init_long, N);
+  long result_gold_reduc_sum_long = init_long;
+#pragma GCC novector
+  COND_REDUC_GOLDEN (reduc_sum_long, long, -=, +)
+  if (result_reduc_sum_long != result_gold_reduc_sum_long)
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index 8e79362f96a..53da65d6ae8 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -1788,14 +1788,118 @@  is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
   r_nop2 = strip_nop_cond_scalar_reduction (*has_nop, r_op2);
 
   /* Make R_OP1 to hold reduction variable.  */
+  gimple *reassociate_stmt = NULL;
   if (r_nop2 == PHI_RESULT (header_phi)
       && commutative_tree_code (reduction_op))
     {
       std::swap (r_op1, r_op2);
       std::swap (r_nop1, r_nop2);
     }
-  else if (r_nop1 != PHI_RESULT (header_phi))
-    return false;
+  else if (r_nop1 == PHI_RESULT (header_phi))
+    ;
+  /* Analyze the statement chain of STMT so that we could teach generate
+     better if-converison code sequence.  We are trying to catch this
+     following situation:
+
+       loop-header:
+	 reduc_1 = PHI <0, reduc_2>
+
+	 ...
+	 if (...)
+	 tmp1 = reduc_1 + rhs1;
+	 tmp2 = tmp1 + rhs2;
+	 tmp3 = tmp2 + rhs3;
+	 ...
+	 reduc_3 = tmpN-1 + rhsN-1;
+
+	 reduc_2 = PHI <reduc_1, reduc_3>
+
+       and re-associate it to:
+
+	 reduc_1 = PHI <0, reduc_2>
+
+	 tmp1 = rhs1;
+	 tmp2 = tmp1 + rhs2;
+	 tmp3 = tmp2 + rhs3;
+	 ...
+	 reduc_3 = tmpN-1 + rhsN-1;
+
+	 ifcvt = cond_expr ? reduc_3 : 0;
+	 reduc_2 = reduc_1 +/- ifcvt;  */
+  else
+    {
+      /* We only re-associate the header PHI has 2 uses.
+	 One is simple assign use with PLUS_EXPR or MINU_EXPR,
+	 the other is the current PHI.  That is:
+
+	 reduc_1 = PHI <..., reduc_2>     ---> Header PHI.
+	 ...
+	 if (...)
+	   tmp1 = reduc_1 + rhs1;         ---> First use.
+	 ...
+	 reduc_2 = PHI <reduc_1, reduc_3> ---> Last use.
+	 ...
+
+	 TODO: We can relax the check here in the future when we see there
+	 are more cases to be optimized.  */
+      if (num_imm_uses (PHI_RESULT (header_phi)) != 2
+	  || EDGE_COUNT (gimple_bb (stmt)->succs) != 1)
+	return false;
+
+      /* For TYPE_OVERFLOW_UNDEFINED you have to convert the ops to unsigned
+	 to avoid spurious undefined overflow.  */
+      if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (PHI_RESULT (phi)))
+	  && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (PHI_RESULT (phi))))
+	return false;
+
+      /* We should not re-associate floating-point reduction that will have
+	 spurious exceptions.  */
+      if (FLOAT_TYPE_P (TREE_TYPE (PHI_RESULT (phi)))
+	  && (!flag_associative_math
+	      || HONOR_SIGNED_ZEROS (TREE_TYPE (PHI_RESULT (phi)))
+	      || HONOR_SIGN_DEPENDENT_ROUNDING (TREE_TYPE (PHI_RESULT (phi)))
+	      || HONOR_NANS (TREE_TYPE (PHI_RESULT (phi)))))
+	return false;
+
+      /* The first use should be PHI that we are visiting.  */
+      gimple *first_use_stmt = USE_STMT (
+	first_readonly_imm_use (&imm_iter, PHI_RESULT (header_phi)));
+      if (first_use_stmt != phi
+	  /* This first use should locate at the fallthrough block.  */
+	  || gimple_bb (first_use_stmt) == gimple_bb (stmt)
+	  || !flow_bb_inside_loop_p (gimple_bb (stmt)->loop_father,
+				     gimple_bb (first_use_stmt))
+	  || !find_fallthru_edge (gimple_bb (stmt)->succs)
+	  || gimple_bb (first_use_stmt)
+	       != FALLTHRU_EDGE (gimple_bb (stmt))->dest)
+	return false;
+
+      /* The last use STMT which should be a simple assign STMT
+	 that has SSA_NAME lhs.  */
+      gimple *last_use_stmt = USE_STMT (next_readonly_imm_use (&imm_iter));
+      if (gimple_code (last_use_stmt) != GIMPLE_ASSIGN
+	  || TREE_CODE (gimple_assign_lhs (last_use_stmt)) != SSA_NAME
+	  /* The last use STMT lhs should be single use and in the same
+	     block as the current STMT.  */
+	  || !has_single_use (gimple_assign_lhs (last_use_stmt))
+	  || gimple_bb (last_use_stmt) != gimple_bb (stmt))
+	return false;
+
+      r_op1 = *has_nop ? gimple_assign_lhs (last_use_stmt)
+		       : PHI_RESULT (header_phi);
+      r_op2 = gimple_assign_lhs (stmt);
+      r_nop1 = *has_nop ? PHI_RESULT (header_phi) : NULL_TREE;
+      r_nop2 = *has_nop ? gimple_assign_lhs (last_use_stmt) : NULL_TREE;
+      reassociate_stmt = last_use_stmt;
+      tree_code reassociate_op = gimple_assign_rhs_code (reassociate_stmt);
+      if (reassociate_op != PLUS_EXPR
+	  && reassociate_op != MINUS_EXPR
+	  && reassociate_op != MULT_EXPR
+	  && reassociate_op != BIT_IOR_EXPR
+	  && reassociate_op != BIT_XOR_EXPR
+	  && reassociate_op != BIT_AND_EXPR)
+	return false;
+    }
 
   if (*has_nop)
     {
@@ -1820,12 +1924,43 @@  is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
 	continue;
       if (use_stmt == stmt)
 	continue;
+      if (use_stmt == reassociate_stmt)
+	continue;
       if (gimple_code (use_stmt) != GIMPLE_PHI)
 	return false;
     }
 
   *op0 = r_op1; *op1 = r_op2;
   *reduc = stmt;
+
+  if (reassociate_stmt)
+    {
+      /* Transform:
+
+	if (...)
+	   tmp1 = reduc_1 + rhs1;
+	   tmp2 = tmp1 + rhs2;
+	   tmp3 = tmp2 + rhs3;
+
+	into:
+
+	   tmp1 = rhs1 + 0;   ---> We replace reduc_1 into '0'
+	   tmp2 = tmp1 + rhs2;
+	   tmp3 = tmp2 + rhs3;
+	   ...
+	   reduc_3 = tmpN-1 + rhsN-1;
+	   ifcvt = cond_expr ? reduc_3 : 0;  */
+      gimple_stmt_iterator gsi = gsi_for_stmt (reassociate_stmt);
+      gimple *new_stmt;
+      if (gimple_assign_rhs1 (reassociate_stmt) == r_op1)
+	new_stmt = gimple_build_assign (gimple_assign_lhs (reassociate_stmt),
+					gimple_assign_rhs2 (reassociate_stmt));
+      else if (gimple_assign_rhs2 (reassociate_stmt) == r_op1)
+	new_stmt = gimple_build_assign (gimple_assign_lhs (reassociate_stmt),
+					gimple_assign_rhs1 (reassociate_stmt));
+
+      gsi_replace (&gsi, new_stmt, true);
+    }
   return true;
 }
 
@@ -1912,12 +2047,17 @@  convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
       gsi_remove (&stmt_it, true);
       release_defs (nop_reduc);
     }
+
   gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
 
   /* Delete original reduction stmt.  */
-  stmt_it = gsi_for_stmt (reduc);
-  gsi_remove (&stmt_it, true);
-  release_defs (reduc);
+  if (op1 != gimple_assign_lhs (reduc))
+    {
+      stmt_it = gsi_for_stmt (reduc);
+      gsi_remove (&stmt_it, true);
+      release_defs (reduc);
+    }
+
   return rhs;
 }