@@ -5109,6 +5109,20 @@ (define_expand "@cond_<optab><mode>"
"TARGET_SVE"
)
+;; Predicated FCADD using ptrue for unpredicated optab for auto-vectorizer
+(define_expand "@cadd<rot><mode>3"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand")
+ (unspec:SVE_FULL_F
+ [(match_dup 3)
+ (const_int SVE_RELAXED_GP)
+ (match_operand:SVE_FULL_F 1 "register_operand")
+ (match_operand:SVE_FULL_F 2 "register_operand")]
+ SVE_COND_FCADD))]
+ "TARGET_SVE"
+{
+ operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+})
+
;; Predicated FCADD, merging with the first input.
(define_insn_and_rewrite "*cond_<optab><mode>_2"
[(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
@@ -6554,6 +6568,62 @@ (define_insn "@aarch64_pred_<optab><mode>"
[(set_attr "movprfx" "*,yes")]
)
+;; unpredicated optab pattern for auto-vectorizer
+;; The complex mla/mls operations always need to expand to two instructions.
+;; The first operation does half the computation and the second does the
+;; remainder. Because of this, expand early.
+(define_expand "cml<fcmac1><rot_op><mode>4"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand")
+ (unspec:SVE_FULL_F
+ [(match_dup 4)
+ (match_dup 5)
+ (match_operand:SVE_FULL_F 1 "register_operand")
+ (match_operand:SVE_FULL_F 2 "register_operand")
+ (match_operand:SVE_FULL_F 3 "register_operand")]
+ FCMLA_OP))]
+ "TARGET_SVE"
+{
+ operands[4] = aarch64_ptrue_reg (<VPRED>mode);
+ operands[5] = gen_int_mode (SVE_RELAXED_GP, SImode);
+ emit_insn (
+ gen_aarch64_pred_fcmla<sve_rot1><mode> (operands[0], operands[4],
+ operands[1], operands[2],
+ operands[3], operands[5]));
+ emit_insn (
+ gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], operands[4],
+ operands[0], operands[2],
+ operands[3], operands[5]));
+ DONE;
+})
+
+;; unpredicated optab pattern for auto-vectorizer
+;; The complex mul operations always need to expand to two instructions.
+;; The first operation does half the computation and the second does the
+;; remainder. Because of this, expand early.
+(define_expand "cmul<rot_op><mode>3"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand")
+ (unspec:SVE_FULL_F
+ [(match_dup 3)
+ (match_dup 4)
+ (match_operand:SVE_FULL_F 1 "register_operand")
+ (match_operand:SVE_FULL_F 2 "register_operand")
+ (match_dup 5)]
+ FCMUL_OP))]
+ "TARGET_SVE"
+{
+ operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+ operands[4] = gen_int_mode (SVE_RELAXED_GP, SImode);
+ operands[5] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
+ emit_insn (
+ gen_aarch64_pred_fcmla<sve_rot1><mode> (operands[0], operands[3], operands[1],
+ operands[2], operands[5], operands[4]));
+ emit_insn (
+ gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], operands[3], operands[1],
+ operands[2], operands[0],
+ operands[4]));
+ DONE;
+})
+
;; Predicated FCMLA with merging.
(define_expand "@cond_<optab><mode>"
[(set (match_operand:SVE_FULL_F 0 "register_operand")
@@ -3443,6 +3443,35 @@ (define_int_attr rotsplit2 [(UNSPEC_FCMLA "90")
(UNSPEC_FCMLS "180")
(UNSPEC_FCMLS180 "180")])
+;; SVE has slightly different namings from NEON so we have to split these
+;; iterators.
+(define_int_attr sve_rot1 [(UNSPEC_FCMLA "")
+ (UNSPEC_FCMLA180 "")
+ (UNSPEC_FCMUL "")
+ (UNSPEC_FCMUL180 "")
+ (UNSPEC_FCMLS "270")
+ (UNSPEC_FCMLS180 "90")
+ (UNSPEC_CMLA "")
+ (UNSPEC_CMLA180 "")
+ (UNSPEC_CMUL "")
+ (UNSPEC_CMUL180 "")
+ (UNSPEC_CMLS "270")
+ (UNSPEC_CMLS180 "90")])
+
+(define_int_attr sve_rot2 [(UNSPEC_FCMLA "90")
+ (UNSPEC_FCMLA180 "270")
+ (UNSPEC_FCMUL "90")
+ (UNSPEC_FCMUL180 "270")
+ (UNSPEC_FCMLS "180")
+ (UNSPEC_FCMLS180 "180")
+ (UNSPEC_CMLA "90")
+ (UNSPEC_CMLA180 "270")
+ (UNSPEC_CMUL "90")
+ (UNSPEC_CMUL180 "270")
+ (UNSPEC_CMLS "180")
+ (UNSPEC_CMLS180 "180")])
+
+
(define_int_attr fcmac1 [(UNSPEC_FCMLA "a") (UNSPEC_FCMLA180 "a")
(UNSPEC_FCMLS "s") (UNSPEC_FCMLS180 "s")
(UNSPEC_CMLA "a") (UNSPEC_CMLA180 "a")