@@ -389,26 +389,6 @@ (define_insn "mul<mode>3<vczle><vczbe>"
[(set_attr "type" "neon_mul_<Vetype><q>")]
)
-;; Advanced SIMD does not support vector DImode MUL, but SVE does.
-;; Make use of the overlap between Z and V registers to implement the V2DI
-;; optab for TARGET_SVE. The mulvnx2di3 expander can
-;; handle the TARGET_SVE2 case transparently.
-(define_expand "mulv2di3"
- [(set (match_operand:V2DI 0 "register_operand")
- (mult:V2DI (match_operand:V2DI 1 "register_operand")
- (match_operand:V2DI 2 "aarch64_sve_vsm_operand")))]
- "TARGET_SVE"
- {
- machine_mode sve_mode = VNx2DImode;
- rtx sve_op0 = simplify_gen_subreg (sve_mode, operands[0], V2DImode, 0);
- rtx sve_op1 = simplify_gen_subreg (sve_mode, operands[1], V2DImode, 0);
- rtx sve_op2 = simplify_gen_subreg (sve_mode, operands[2], V2DImode, 0);
-
- emit_insn (gen_mulvnx2di3 (sve_op0, sve_op1, sve_op2));
- DONE;
- }
-)
-
(define_insn "bswap<mode>2"
[(set (match_operand:VDQHSD 0 "register_operand" "=w")
(bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
@@ -2678,27 +2658,6 @@ (define_insn "*div<mode>3<vczle><vczbe>"
[(set_attr "type" "neon_fp_div_<stype><q>")]
)
-;; SVE has vector integer divisions, unlike Advanced SIMD.
-;; We can use it with Advanced SIMD modes to expose the V2DI and V4SI
-;; optabs to the midend.
-(define_expand "<su_optab>div<mode>3"
- [(set (match_operand:VQDIV 0 "register_operand")
- (ANY_DIV:VQDIV
- (match_operand:VQDIV 1 "register_operand")
- (match_operand:VQDIV 2 "register_operand")))]
- "TARGET_SVE"
- {
- machine_mode sve_mode
- = aarch64_full_sve_mode (GET_MODE_INNER (<MODE>mode)).require ();
- rtx sve_op0 = simplify_gen_subreg (sve_mode, operands[0], <MODE>mode, 0);
- rtx sve_op1 = simplify_gen_subreg (sve_mode, operands[1], <MODE>mode, 0);
- rtx sve_op2 = simplify_gen_subreg (sve_mode, operands[2], <MODE>mode, 0);
-
- emit_insn (gen_<su_optab>div<vnx>3 (sve_op0, sve_op1, sve_op2));
- DONE;
- }
-)
-
(define_insn "neg<mode>2<vczle><vczbe>"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
@@ -3789,16 +3789,35 @@ (define_expand "<optab><mode>3"
[(set (match_operand:SVE_I 0 "register_operand")
(unspec:SVE_I
[(match_dup 3)
- (SVE_INT_BINARY_IMM:SVE_I
+ (SVE_INT_BINARY_MULTI:SVE_I
(match_operand:SVE_I 1 "register_operand")
(match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_operand"))]
UNSPEC_PRED_X))]
"TARGET_SVE"
+ {
+ operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+ }
+)
+
+;; Unpredicated integer binary operations that have an immediate form.
+;; Advanced SIMD does not support vector DImode MUL, but SVE does.
+;; Make use of the overlap between Z and V registers to implement the V2DI
+;; optab for TARGET_SVE. The mulvnx2di3 expander can
+;; handle the TARGET_SVE2 case transparently.
+(define_expand "mul<mode>3"
+ [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand")
+ (unspec:SVE_I_SIMD_DI
+ [(match_dup 3)
+ (mult:SVE_I_SIMD_DI
+ (match_operand:SVE_I_SIMD_DI 1 "register_operand")
+ (match_operand:SVE_I_SIMD_DI 2 "aarch64_sve_vsm_operand"))]
+ UNSPEC_PRED_X))]
+ "TARGET_SVE"
{
/* SVE2 supports the MUL (vectors, unpredicated) form. Emit the simple
pattern for it here rather than splitting off the MULT expander
separately. */
- if (TARGET_SVE2 && <CODE> == MULT)
+ if (TARGET_SVE2)
{
emit_move_insn (operands[0], gen_rtx_MULT (<MODE>mode,
operands[1], operands[2]));
@@ -3814,26 +3833,26 @@ (define_expand "<optab><mode>3"
;; and would make the instruction seem less uniform to the register
;; allocator.
(define_insn_and_split "@aarch64_pred_<optab><mode>"
- [(set (match_operand:SVE_I 0 "register_operand")
- (unspec:SVE_I
+ [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand")
+ (unspec:SVE_I_SIMD_DI
[(match_operand:<VPRED> 1 "register_operand")
- (SVE_INT_BINARY_IMM:SVE_I
- (match_operand:SVE_I 2 "register_operand")
- (match_operand:SVE_I 3 "aarch64_sve_<sve_imm_con>_operand"))]
+ (SVE_INT_BINARY_IMM:SVE_I_SIMD_DI
+ (match_operand:SVE_I_SIMD_DI 2 "register_operand")
+ (match_operand:SVE_I_SIMD_DI 3 "aarch64_sve_<sve_imm_con>_operand"))]
UNSPEC_PRED_X))]
"TARGET_SVE"
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
[ w , Upl , %0 , <sve_imm_con> ; * ] #
- [ w , Upl , 0 , w ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ [ w , Upl , 0 , w ; * ] <sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
[ ?&w , Upl , w , <sve_imm_con> ; yes ] #
- [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ [ ?&w , Upl , w , w ; yes ] movprfx\t%Z0, %Z2\;<sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
}
; Split the unpredicated form after reload, so that we don't have
; the unnecessary PTRUE.
"&& reload_completed
&& !register_operand (operands[3], <MODE>mode)"
[(set (match_dup 0)
- (SVE_INT_BINARY_IMM:SVE_I (match_dup 2) (match_dup 3)))]
+ (SVE_INT_BINARY_IMM:SVE_I_SIMD_DI (match_dup 2) (match_dup 3)))]
""
)
@@ -3841,14 +3860,14 @@ (define_insn_and_split "@aarch64_pred_<optab><mode>"
;; These are generated by splitting a predicated instruction whose
;; predicate is unused.
(define_insn "*post_ra_<optab><mode>3"
- [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
- (SVE_INT_BINARY_IMM:SVE_I
- (match_operand:SVE_I 1 "register_operand" "0, w")
- (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_immediate")))]
+ [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand" "=w, ?&w")
+ (SVE_INT_BINARY_IMM:SVE_I_SIMD_DI
+ (match_operand:SVE_I_SIMD_DI 1 "register_operand" "0, w")
+ (match_operand:SVE_I_SIMD_DI 2 "aarch64_sve_<sve_imm_con>_immediate")))]
"TARGET_SVE && reload_completed"
"@
- <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2
- movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2"
+ <sve_int_op>\t%Z0.<Vetype>, %Z0.<Vetype>, #%<sve_imm_prefix>2
+ movprfx\t%Z0, %Z1\;<sve_int_op>\t%Z0.<Vetype>, %Z0.<Vetype>, #%<sve_imm_prefix>2"
[(set_attr "movprfx" "*,yes")]
)
@@ -4458,13 +4477,16 @@ (define_insn "*cond_<optab><mode>_z"
;; -------------------------------------------------------------------------
;; Unpredicated integer division.
+;; SVE has vector integer divisions, unlike Advanced SIMD.
+;; We can use it with Advanced SIMD modes to expose the V2DI and V4SI
+;; optabs to the midend.
(define_expand "<optab><mode>3"
- [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
- (unspec:SVE_FULL_SDI
+ [(set (match_operand:SVE_FULL_SDI_SIMD 0 "register_operand")
+ (unspec:SVE_FULL_SDI_SIMD
[(match_dup 3)
- (SVE_INT_BINARY_SD:SVE_FULL_SDI
- (match_operand:SVE_FULL_SDI 1 "register_operand")
- (match_operand:SVE_FULL_SDI 2 "register_operand"))]
+ (SVE_INT_BINARY_SD:SVE_FULL_SDI_SIMD
+ (match_operand:SVE_FULL_SDI_SIMD 1 "register_operand")
+ (match_operand:SVE_FULL_SDI_SIMD 2 "register_operand"))]
UNSPEC_PRED_X))]
"TARGET_SVE"
{
@@ -4474,18 +4496,18 @@ (define_expand "<optab><mode>3"
;; Integer division predicated with a PTRUE.
(define_insn "@aarch64_pred_<optab><mode>"
- [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
- (unspec:SVE_FULL_SDI
+ [(set (match_operand:SVE_FULL_SDI_SIMD 0 "register_operand")
+ (unspec:SVE_FULL_SDI_SIMD
[(match_operand:<VPRED> 1 "register_operand")
- (SVE_INT_BINARY_SD:SVE_FULL_SDI
- (match_operand:SVE_FULL_SDI 2 "register_operand")
- (match_operand:SVE_FULL_SDI 3 "register_operand"))]
+ (SVE_INT_BINARY_SD:SVE_FULL_SDI_SIMD
+ (match_operand:SVE_FULL_SDI_SIMD 2 "register_operand")
+ (match_operand:SVE_FULL_SDI_SIMD 3 "register_operand"))]
UNSPEC_PRED_X))]
"TARGET_SVE"
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
- [ w , Upl , 0 , w ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- [ w , Upl , w , 0 ; * ] <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
- [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ [ w , Upl , 0 , w ; * ] <sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
+ [ w , Upl , w , 0 ; * ] <sve_int_op>r\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z2.<Vetype>
+ [ ?&w , Upl , w , w ; yes ] movprfx\t%Z0, %Z2\;<sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
}
)
@@ -615,29 +615,29 @@ (define_insn "@aarch64_sve_<su>clamp_single<mode>"
;; -------------------------------------------------------------------------
(define_insn "@aarch64_mul_lane_<mode>"
- [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
- (mult:SVE_FULL_HSDI
- (unspec:SVE_FULL_HSDI
- [(match_operand:SVE_FULL_HSDI 2 "register_operand" "<sve_lane_con>")
+ [(set (match_operand:SVE_FULL_HSDI_SIMD_DI 0 "register_operand" "=w")
+ (mult:SVE_FULL_HSDI_SIMD_DI
+ (unspec:SVE_FULL_HSDI_SIMD_DI
+ [(match_operand:SVE_FULL_HSDI_SIMD_DI 2 "register_operand" "<sve_lane_con>")
(match_operand:SI 3 "const_int_operand")]
UNSPEC_SVE_LANE_SELECT)
- (match_operand:SVE_FULL_HSDI 1 "register_operand" "w")))]
+ (match_operand:SVE_FULL_HSDI_SIMD_DI 1 "register_operand" "w")))]
"TARGET_SVE2"
- "mul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
+ "mul\t%Z0.<Vetype>, %Z1.<Vetype>, %Z2.<Vetype>[%3]"
)
;; The 2nd and 3rd alternatives are valid for just TARGET_SVE as well but
;; we include them here to allow matching simpler, unpredicated RTL.
(define_insn "*aarch64_mul_unpredicated_<mode>"
- [(set (match_operand:SVE_I 0 "register_operand")
- (mult:SVE_I
- (match_operand:SVE_I 1 "register_operand")
- (match_operand:SVE_I 2 "aarch64_sve_vsm_operand")))]
+ [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand")
+ (mult:SVE_I_SIMD_DI
+ (match_operand:SVE_I_SIMD_DI 1 "register_operand")
+ (match_operand:SVE_I_SIMD_DI 2 "aarch64_sve_vsm_operand")))]
"TARGET_SVE2"
{@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
- [ w , w , w ; * ] mul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
- [ w , 0 , vsm ; * ] mul\t%0.<Vetype>, %0.<Vetype>, #%2
- [ ?&w , w , vsm ; yes ] movprfx\t%0, %1\;mul\t%0.<Vetype>, %0.<Vetype>, #%2
+ [ w , w , w ; * ] mul\t%Z0.<Vetype>, %Z1.<Vetype>, %Z2.<Vetype>
+ [ w , 0 , vsm ; * ] mul\t%Z0.<Vetype>, %Z0.<Vetype>, #%2
+ [ ?&w , w , vsm ; yes ] movprfx\t%Z0, %Z1\;mul\t%Z0.<Vetype>, %Z0.<Vetype>, #%2
}
)
@@ -108,9 +108,6 @@ (define_mode_iterator DREG [V8QI V4HI V4HF V2SI V2SF DF])
;; Copy of the above.
(define_mode_iterator DREG2 [DREG])
-;; Advanced SIMD modes for integer divides.
-(define_mode_iterator VQDIV [V4SI V2DI])
-
;; All modes suitable to store/load pair (2 elements) using STP/LDP.
(define_mode_iterator VP_2E [V2SI V2SF V2DI V2DF])
@@ -471,6 +468,10 @@ (define_mode_iterator SVE_FULL_HSD [VNx8HI VNx4SI VNx2DI
;; elements.
(define_mode_iterator SVE_FULL_HSDI [VNx8HI VNx4SI VNx2DI])
+;; Fully-packed SVE integer vector modes that have 16-bit, 32-bit or 64-bit
+;; elements and Advanced SIMD Fully-packed 64-bit elements.
+(define_mode_iterator SVE_FULL_HSDI_SIMD_DI [VNx8HI VNx4SI VNx2DI V2DI])
+
;; Fully-packed SVE integer vector modes that have 16-bit or 32-bit
;; elements.
(define_mode_iterator SVE_FULL_HSI [VNx8HI VNx4SI])
@@ -488,6 +489,14 @@ (define_mode_iterator SVE_FULL_SD [VNx4SI VNx2DI VNx4SF VNx2DF])
;; Fully-packed SVE integer vector modes that have 32-bit or 64-bit elements.
(define_mode_iterator SVE_FULL_SDI [VNx4SI VNx2DI])
+;; Fully-packed SVE and Advanced SIMD integer vector modes that have 32-bit or
+;; 64-bit elements.
+(define_mode_iterator SVE_FULL_SDI_SIMD [VNx4SI VNx2DI V4SI V2DI])
+
+;; Fully-packed SVE integer vector modes that have 32-bit or 64-bit elements
+;; and Advanced SIMD 64-bit elements.
+(define_mode_iterator SVE_FULL_SDI_SIMD_DI [VNx4SI VNx2DI])
+
;; 2x and 4x tuples of the above, excluding 2x DI.
(define_mode_iterator SVE_FULL_SIx2_SDIx4 [VNx8SI VNx16SI VNx8DI])
@@ -550,6 +559,13 @@ (define_mode_iterator SVE_I [VNx16QI VNx8QI VNx4QI VNx2QI
VNx4SI VNx2SI
VNx2DI])
+;; All SVE integer vector modes and Advanced SIMD 64-bit vector
+;; element modes
+(define_mode_iterator SVE_I_SIMD_DI [VNx16QI VNx8QI VNx4QI VNx2QI
+ VNx8HI VNx4HI VNx2HI
+ VNx4SI VNx2SI
+ VNx2DI V2DI])
+
;; SVE integer vector modes whose elements are 16 bits or wider.
(define_mode_iterator SVE_HSDI [VNx8HI VNx4HI VNx2HI
VNx4SI VNx2SI
@@ -2268,7 +2284,8 @@ (define_mode_attr VPRED [(VNx16QI "VNx16BI") (VNx8QI "VNx8BI")
(VNx32HI "VNx8BI") (VNx32HF "VNx8BI")
(VNx32BF "VNx8BI")
(VNx16SI "VNx4BI") (VNx16SF "VNx4BI")
- (VNx8DI "VNx2BI") (VNx8DF "VNx2BI")])
+ (VNx8DI "VNx2BI") (VNx8DF "VNx2BI")
+ (V4SI "VNx4BI") (V2DI "VNx2BI")])
;; ...and again in lower case.
(define_mode_attr vpred [(VNx16QI "vnx16bi") (VNx8QI "vnx8bi")
@@ -2370,6 +2387,7 @@ (define_mode_attr narrower_mask [(VNx8HI "0x81") (VNx4HI "0x41")
;; The constraint to use for an SVE [SU]DOT, FMUL, FMLA or FMLS lane index.
(define_mode_attr sve_lane_con [(VNx8HI "y") (VNx4SI "y") (VNx2DI "x")
+ (V2DI "x")
(VNx8HF "y") (VNx4SF "y") (VNx2DF "x")])
;; The constraint to use for an SVE FCMLA lane index.
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-additional-options "-O -mtune=a64fx" } */
+
+typedef unsigned long long __attribute__((__vector_size__ (16))) V;
+typedef unsigned long long __attribute__((__vector_size__ (32))) W;
+
+extern void bar (V v);
+
+void foo (V v, W w)
+{
+ bar (__builtin_shuffle (v, __builtin_shufflevector ((V){}, w, 4, 5) / v));
+}
+
+/* { dg-final { scan-assembler {udiv\tz[0-9]+.d, p[0-9]+/m, z[0-9]+.d, z[0-9]+.d} } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-additional-options "-O -mcpu=a64fx" } */
+
+typedef unsigned long long __attribute__((__vector_size__ (16))) V;
+typedef unsigned long long __attribute__((__vector_size__ (32))) W;
+
+extern void bar (V v);
+
+void foom (V v, W w)
+{
+ bar (__builtin_shuffle (v, __builtin_shufflevector ((V){}, w, 4, 5) * v));
+}
+
+/* { dg-final { scan-assembler {mul\tz[0-9]+.d, p[0-9]+/m, z[0-9]+.d, z[0-9]+.d} } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-additional-options "-O -mtune=a64fx" } */
+
+typedef unsigned long long __attribute__((__vector_size__ (16))) V;
+typedef unsigned long long __attribute__((__vector_size__ (32))) W;
+
+extern void bar (V v);
+
+void foom (V v, W w)
+{
+ bar (__builtin_shuffle (v, __builtin_shufflevector ((V){}, w, 4, 5) * v));
+}
+
+/* { dg-final { scan-assembler {mul\tz[0-9]+.d, z[0-9]+.d, z[0-9]+.d} } } */