@@ -11883,6 +11883,15 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
|| TARGET_HAVE_MVE)
&& simd_immediate_valid_for_move (x, mode, NULL, NULL))
*cost = COSTS_N_INSNS (1);
+ else if (TARGET_HAVE_MVE
+ && outer_code == COMPARE
+ && VALID_MVE_PRED_MODE (mode))
+ /* MVE allows very limited instructions on VPT.P0, however comparisons
+ to 0 do not require us to materialze this constant or require a
+ predicate comparison as we can go through SImode. For that reason
+ allow P0 CMP 0 as a cheap operation such that the 0 isn't forced to
+ registers as we can't compare two predicates. */
+ *cost = COSTS_N_INSNS (1);
else
*cost = COSTS_N_INSNS (4);
return true;
@@ -6880,6 +6880,21 @@ (define_expand "vcond_mask_<mode><MVE_vpred>"
DONE;
})
+(define_expand "cbranch<mode>4"
+ [(set (pc) (if_then_else
+ (match_operator 0 "expandable_comparison_operator"
+ [(match_operand:MVE_7 1 "register_operand")
+ (match_operand:MVE_7 2 "zero_operand")])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))]
+ "TARGET_HAVE_MVE"
+{
+ rtx val = gen_reg_rtx (SImode);
+ emit_move_insn (val, gen_lowpart (SImode, operands[1]));
+ emit_jump_insn (gen_cbranchsi4 (operands[0], val, const0_rtx, operands[3]));
+ DONE;
+})
+
;; Reinterpret operand 1 in operand 0's mode, without changing its contents.
(define_expand "@arm_mve_reinterpret<mode>"
[(set (match_operand:MVE_vecs 0 "register_operand")
new file mode 100644
@@ -0,0 +1,117 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 640
+int a[N] = {0};
+int b[N] = {0};
+
+/*
+** f1:
+** ...
+** vcmp.s32 gt, q[0-9]+, q[0-9]+
+** vmrs r[0-9]+, p0 @ movhi
+** cbnz r[0-9]+, \.L[0-9]+
+** ...
+*/
+void f1 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] > 0)
+ break;
+ }
+}
+
+/*
+** f2:
+** ...
+** vcmp.s32 ge, q[0-9]+, q[0-9]+
+** vmrs r[0-9]+, p0 @ movhi
+** cbnz r[0-9]+, \.L[0-9]+
+** ...
+*/
+void f2 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] >= 0)
+ break;
+ }
+}
+
+/*
+** f3:
+** ...
+** vcmp.i32 eq, q[0-9]+, q[0-9]+
+** vmrs r[0-9]+, p0 @ movhi
+** cbnz r[0-9]+, \.L[0-9]+
+** ...
+*/
+void f3 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] == 0)
+ break;
+ }
+}
+
+/*
+** f4:
+** ...
+** vcmp.i32 ne, q[0-9]+, q[0-9]+
+** vmrs r[0-9]+, p0 @ movhi
+** cbnz r[0-9]+, \.L[0-9]+
+** ...
+*/
+void f4 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] != 0)
+ break;
+ }
+}
+
+/*
+** f5:
+** ...
+** vcmp.s32 lt, q[0-9]+, q[0-9]+
+** vmrs r[0-9]+, p0 @ movhi
+** cbnz r[0-9]+, \.L[0-9]+
+** ...
+*/
+void f5 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] < 0)
+ break;
+ }
+}
+
+/*
+** f6:
+** ...
+** vcmp.s32 le, q[0-9]+, q[0-9]+
+** vmrs r[0-9]+, p0 @ movhi
+** cbnz r[0-9]+, \.L[0-9]+
+** ...
+*/
+void f6 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] <= 0)
+ break;
+ }
+}
@@ -3785,6 +3785,8 @@ proc check_effective_target_vect_early_break { } {
expr {
[istarget aarch64*-*-*]
|| [check_effective_target_arm_neon_ok]
+ || ([check_effective_target_arm_v8_1m_mve_fp_ok]
+ && [check_effective_target_arm_little_endian])
}}]
}
# Return 1 if the target supports hardware vectorization of complex additions of