@@ -1436,3 +1436,36 @@ (define_insn_and_split "*n<optab><mode>"
DONE;
}
[(set_attr "type" "vmalu")])
+
+;; Optimization pattern for early break auto-vectorization
+;; vcond_mask_len (mask, ones, zeros, len, bias) + vlmax popcount
+;; -> non vlmax popcount (mask, len)
+(define_insn_and_split "*vcond_mask_len_popcount_<VB_VLS:mode><P:mode>"
+ [(set (match_operand:P 0 "register_operand")
+ (popcount:P
+ (unspec:VB_VLS [
+ (unspec:VB_VLS [
+ (match_operand:VB_VLS 1 "register_operand")
+ (match_operand:VB_VLS 2 "const_1_operand")
+ (match_operand:VB_VLS 3 "const_0_operand")
+ (match_operand 4 "autovec_length_operand")
+ (match_operand 5 "const_0_operand")] UNSPEC_SELECT_MASK)
+ (match_operand 6 "autovec_length_operand")
+ (const_int 1)
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)))]
+ "TARGET_VECTOR
+ && can_create_pseudo_p ()
+ && riscv_vector::get_vector_mode (Pmode, GET_MODE_NUNITS (<VB_VLS:MODE>mode)).exists ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ riscv_vector::emit_nonvlmax_insn (
+ code_for_pred_popcount (<VB_VLS:MODE>mode, Pmode),
+ riscv_vector::CPOP_OP,
+ operands, operands[4]);
+ DONE;
+ }
+ [(set_attr "type" "vector")]
+)
@@ -2612,3 +2612,63 @@ (define_expand "rawmemchr<ANYI:mode>"
DONE;
}
)
+
+;; =========================================================================
+;; == Early break auto-vectorization patterns
+;; =========================================================================
+
+;; vcond_mask_len
+(define_insn_and_split "vcond_mask_len_<mode>"
+ [(set (match_operand:VB 0 "register_operand")
+ (unspec: VB [
+ (match_operand:VB 1 "register_operand")
+ (match_operand:VB 2 "const_1_operand")
+ (match_operand:VB 3 "const_0_operand")
+ (match_operand 4 "autovec_length_operand")
+ (match_operand 5 "const_0_operand")] UNSPEC_SELECT_MASK))]
+ "TARGET_VECTOR
+ && can_create_pseudo_p ()
+ && riscv_vector::get_vector_mode (Pmode, GET_MODE_NUNITS (<MODE>mode)).exists ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ machine_mode mode = riscv_vector::get_vector_mode (Pmode,
+ GET_MODE_NUNITS (<MODE>mode)).require ();
+ rtx reg = gen_reg_rtx (mode);
+ riscv_vector::expand_vec_series (reg, const0_rtx, const1_rtx);
+ rtx dup_rtx = gen_rtx_VEC_DUPLICATE (mode, operands[4]);
+ insn_code icode = code_for_pred_cmp_scalar (mode);
+ rtx cmp = gen_rtx_fmt_ee (LTU, <MODE>mode, reg, dup_rtx);
+ rtx ops[] = {operands[0], operands[1], operands[1], cmp, reg, operands[4]};
+ emit_vlmax_insn (icode, riscv_vector::COMPARE_OP_MU, ops);
+ DONE;
+ }
+ [(set_attr "type" "vector")])
+
+;; cbranch
+(define_expand "cbranch<mode>4"
+ [(set (pc)
+ (if_then_else
+ (match_operator 0 "equality_operator"
+ [(match_operand:VB_VLS 1 "register_operand")
+ (match_operand:VB_VLS 2 "reg_or_0_operand")])
+ (label_ref (match_operand 3 ""))
+ (pc)))]
+ "TARGET_VECTOR"
+ {
+ rtx pred;
+ if (operands[2] == CONST0_RTX (<MODE>mode))
+ pred = operands[1];
+ else
+ pred = expand_binop (<MODE>mode, xor_optab, operands[1],
+ operands[2], NULL_RTX, 0,
+ OPTAB_DIRECT);
+ rtx reg = gen_reg_rtx (Pmode);
+ rtx cpop_ops[] = {reg, pred};
+ emit_vlmax_insn (code_for_pred_popcount (<MODE>mode, Pmode),
+ riscv_vector::CPOP_OP, cpop_ops);
+ operands[1] = reg;
+ operands[2] = const0_rtx;
+ }
+)
@@ -102,6 +102,7 @@ (define_c_enum "unspec" [
UNSPEC_WREDUC_SUMU
UNSPEC_WREDUC_SUM_ORDERED
UNSPEC_WREDUC_SUM_UNORDERED
+ UNSPEC_SELECT_MASK
])
(define_c_enum "unspecv" [
@@ -6121,21 +6121,21 @@ (define_insn "@pred_not<mode>"
(set_attr "vl_op_idx" "4")
(set (attr "avl_type_idx") (const_int 5))])
-(define_insn "@pred_popcount<VB:mode><P:mode>"
- [(set (match_operand:P 0 "register_operand" "=r")
+(define_insn "@pred_popcount<VB_VLS:mode><P:mode>"
+ [(set (match_operand:P 0 "register_operand" "=r")
(popcount:P
- (unspec:VB
- [(and:VB
- (match_operand:VB 1 "vector_mask_operand" "vmWc1")
- (match_operand:VB 2 "register_operand" " vr"))
- (match_operand 3 "vector_length_operand" " rK")
- (match_operand 4 "const_int_operand" " i")
+ (unspec:VB_VLS
+ [(and:VB_VLS
+ (match_operand:VB_VLS 1 "vector_mask_operand" "vmWc1")
+ (match_operand:VB_VLS 2 "register_operand" " vr"))
+ (match_operand 3 "vector_length_operand" " rK")
+ (match_operand 4 "const_int_operand" " i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)))]
"TARGET_VECTOR"
"vcpop.m\t%0,%2%p1"
[(set_attr "type" "vmpop")
- (set_attr "mode" "<VB:MODE>")])
+ (set_attr "mode" "<VB_VLS:MODE>")])
(define_insn "@pred_ffs<VB:mode><P:mode>"
[(set (match_operand:P 0 "register_operand" "=r")
new file mode 100644
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 -fdump-tree-vect-details" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define N 803
+
+unsigned vect_a[N];
+unsigned vect_b[N];
+
+/*
+** test:
+** ...
+** vmsltu\.vv\s+v[0-9]+\s*,v[0-9]+,\s*v[0-9]+
+** vcpop\.m\s+[atx][0-9]+\s*,v[0-9]+
+** ...
+*/
+unsigned test (unsigned x, int n)
+{
+ unsigned ret = 0;
+
+ for (int i = 0; i < n; i++)
+ {
+ vect_b[i] = x + i;
+
+ if (vect_a[i] > x)
+ break;
+
+ vect_a[i] = x;
+ }
+
+ return ret;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */
new file mode 100644
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 -fdump-tree-vect-details" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define N 1728
+
+unsigned vect_a[N];
+unsigned vect_b[N];
+
+/*
+** test:
+** ...
+** vmsltu\.vv\s+v[0-9]+\s*,v[0-9]+,\s*v[0-9]+
+** vcpop\.m\s+[atx][0-9]+\s*,v[0-9]+
+** ...
+*/
+unsigned test (unsigned limit, int n)
+{
+ unsigned ret = 0;
+
+ for (int i = 0; i < n; i++)
+ {
+ vect_b[i] = limit + i;
+
+ if (vect_a[i] > limit)
+ {
+ ret = vect_b[i];
+ return ret;
+ }
+
+ vect_a[i] = limit;
+ }
+
+ return ret;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */