@@ -2611,11 +2611,18 @@ (define_insn_and_split "*vec_duplicate<mode>_reg"
)
;; Duplicate an Advanced SIMD vector to fill an SVE vector (LE version).
+;;
+;; The addressing mode range of LD1RQ does not match the addressing mode
+;; range of LDR Qn. If the predicate enforced the LD1RQ range, we would
+;; not be able to combine LDR Qns outside that range. The predicate
+;; therefore accepts all memory operands, with only the constraints
+;; enforcing the actual restrictions. If the instruction is split
+;; before RA, we need to load invalid addresses into a temporary.
(define_insn_and_split "@aarch64_vec_duplicate_vq<mode>_le"
[(set (match_operand:SVE_FULL 0 "register_operand" "=w, w")
(vec_duplicate:SVE_FULL
- (match_operand:<V128> 1 "aarch64_sve_dup_ld1rq_operand" "w, UtQ")))
+ (match_operand:<V128> 1 "nonimmediate_operand" "w, UtQ")))
(clobber (match_scratch:VNx16BI 2 "=X, Upl"))]
"TARGET_SVE && !BYTES_BIG_ENDIAN"
{
@@ -2633,6 +2640,12 @@ (define_insn_and_split "@aarch64_vec_duplicate_vq<mode>_le"
"&& MEM_P (operands[1])"
[(const_int 0)]
{
+ if (can_create_pseudo_p ()
+ && !aarch64_sve_ld1rq_operand (operands[1], <V128>mode))
+ {
+ rtx addr = force_reg (Pmode, XEXP (operands[1], 0));
+ operands[1] = replace_equiv_address (operands[1], addr);
+ }
if (GET_CODE (operands[2]) == SCRATCH)
operands[2] = gen_reg_rtx (VNx16BImode);
emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
@@ -732,10 +732,6 @@ (define_predicate "aarch64_sve_dup_operand"
(ior (match_operand 0 "register_operand")
(match_operand 0 "aarch64_sve_ld1r_operand")))
-(define_predicate "aarch64_sve_dup_ld1rq_operand"
- (ior (match_operand 0 "register_operand")
- (match_operand 0 "aarch64_sve_ld1rq_operand")))
-
(define_predicate "aarch64_sve_ptrue_svpattern_immediate"
(and (match_code "const")
(match_test "aarch64_sve_ptrue_svpattern_p (op, NULL)")))
new file mode 100644
@@ -0,0 +1,33 @@
+/* { dg-options "-O2" } */
+
+#include <arm_sve.h>
+
+#define TEST_OFFSET(TYPE, SUFFIX, OFFSET) \
+ sv##TYPE##_t \
+ test_##TYPE##_##SUFFIX (TYPE##_t *ptr) \
+ { \
+ return svld1rq(svptrue_b8(), ptr + OFFSET); \
+ }
+
+#define TEST(TYPE) \
+ TEST_OFFSET (TYPE, 0, 0) \
+ TEST_OFFSET (TYPE, 1, 1) \
+ TEST_OFFSET (TYPE, 2, 2) \
+ TEST_OFFSET (TYPE, 16, 16) \
+ TEST_OFFSET (TYPE, 0x10000, 0x10000) \
+ TEST_OFFSET (TYPE, 0x10001, 0x10001) \
+ TEST_OFFSET (TYPE, m1, -1) \
+ TEST_OFFSET (TYPE, m2, -2) \
+ TEST_OFFSET (TYPE, m16, -16) \
+ TEST_OFFSET (TYPE, m0x10000, -0x10000) \
+ TEST_OFFSET (TYPE, m0x10001, -0x10001)
+
+TEST (int8)
+TEST (int16)
+TEST (uint32)
+TEST (uint64)
+
+/* { dg-final { scan-assembler-times {\tld1rqb\t} 11 { target aarch64_little_endian } } } */
+/* { dg-final { scan-assembler-times {\tld1rqh\t} 11 { target aarch64_little_endian } } } */
+/* { dg-final { scan-assembler-times {\tld1rqw\t} 11 { target aarch64_little_endian } } } */
+/* { dg-final { scan-assembler-times {\tld1rqd\t} 11 { target aarch64_little_endian } } } */