@@ -1060,8 +1060,7 @@ is_stride_candidate (rtx_insn *insn)
return false;
auto stride_type = get_attr_stride_type (insn);
- return (stride_type == STRIDE_TYPE_LUTI_CONSECUTIVE
- || stride_type == STRIDE_TYPE_LD1_CONSECUTIVE
+ return (stride_type == STRIDE_TYPE_LD1_CONSECUTIVE
|| stride_type == STRIDE_TYPE_ST1_CONSECUTIVE);
}
@@ -3212,8 +3211,7 @@ early_ra::maybe_convert_to_strided_access (rtx_insn *insn)
auto stride_type = get_attr_stride_type (insn);
rtx pat = PATTERN (insn);
rtx op;
- if (stride_type == STRIDE_TYPE_LUTI_CONSECUTIVE
- || stride_type == STRIDE_TYPE_LD1_CONSECUTIVE)
+ if (stride_type == STRIDE_TYPE_LD1_CONSECUTIVE)
op = SET_DEST (pat);
else if (stride_type == STRIDE_TYPE_ST1_CONSECUTIVE)
op = XVECEXP (SET_SRC (pat), 0, 1);
@@ -3263,20 +3261,6 @@ early_ra::maybe_convert_to_strided_access (rtx_insn *insn)
XVECEXP (SET_SRC (pat), 0, XVECLEN (SET_SRC (pat), 0) - 1)
= *recog_data.dup_loc[0];
}
- else if (stride_type == STRIDE_TYPE_LUTI_CONSECUTIVE)
- {
- auto bits = INTVAL (XVECEXP (SET_SRC (pat), 0, 4));
- if (range.count == 2)
- pat = gen_aarch64_sme_lut_strided2 (bits, single_mode,
- regs[0], regs[1],
- recog_data.operand[1],
- recog_data.operand[2]);
- else
- pat = gen_aarch64_sme_lut_strided4 (bits, single_mode,
- regs[0], regs[1], regs[2], regs[3],
- recog_data.operand[1],
- recog_data.operand[2]);
- }
else
gcc_unreachable ();
PATTERN (insn) = pat;
@@ -1939,74 +1939,4 @@ (define_insn "@aarch64_sme_lut<LUTI_BITS><mode>"
"TARGET_STREAMING_SME2
&& !(<LUTI_BITS> == 4 && <vector_count> == 4 && <elem_bits> == 8)"
"luti<LUTI_BITS>\t%0, zt0, %1[%2]"
- [(set_attr "stride_type" "luti_consecutive")]
-)
-
-(define_insn "@aarch64_sme_lut<LUTI_BITS><mode>_strided2"
- [(set (match_operand:SVE_FULL_BHS 0 "aarch64_simd_register" "=Uwd")
- (unspec:SVE_FULL_BHS
- [(reg:V8DI ZT0_REGNUM)
- (reg:DI SME_STATE_REGNUM)
- (match_operand:VNx16QI 2 "register_operand" "w")
- (match_operand:DI 3 "const_int_operand")
- (const_int LUTI_BITS)
- (const_int 0)]
- UNSPEC_SME_LUTI))
- (set (match_operand:SVE_FULL_BHS 1 "aarch64_simd_register" "=w")
- (unspec:SVE_FULL_BHS
- [(reg:V8DI ZT0_REGNUM)
- (reg:DI SME_STATE_REGNUM)
- (match_dup 2)
- (match_dup 3)
- (const_int LUTI_BITS)
- (const_int 1)]
- UNSPEC_SME_LUTI))]
- "TARGET_STREAMING_SME2
- && aarch64_strided_registers_p (operands, 2, 8)"
- "luti<LUTI_BITS>\t{%0.<Vetype>, %1.<Vetype>}, zt0, %2[%3]"
- [(set_attr "stride_type" "luti_strided")]
-)
-
-(define_insn "@aarch64_sme_lut<LUTI_BITS><mode>_strided4"
- [(set (match_operand:SVE_FULL_BHS 0 "aarch64_simd_register" "=Uwt")
- (unspec:SVE_FULL_BHS
- [(reg:V8DI ZT0_REGNUM)
- (reg:DI SME_STATE_REGNUM)
- (match_operand:VNx16QI 4 "register_operand" "w")
- (match_operand:DI 5 "const_int_operand")
- (const_int LUTI_BITS)
- (const_int 0)]
- UNSPEC_SME_LUTI))
- (set (match_operand:SVE_FULL_BHS 1 "aarch64_simd_register" "=w")
- (unspec:SVE_FULL_BHS
- [(reg:V8DI ZT0_REGNUM)
- (reg:DI SME_STATE_REGNUM)
- (match_dup 4)
- (match_dup 5)
- (const_int LUTI_BITS)
- (const_int 1)]
- UNSPEC_SME_LUTI))
- (set (match_operand:SVE_FULL_BHS 2 "aarch64_simd_register" "=w")
- (unspec:SVE_FULL_BHS
- [(reg:V8DI ZT0_REGNUM)
- (reg:DI SME_STATE_REGNUM)
- (match_dup 4)
- (match_dup 5)
- (const_int LUTI_BITS)
- (const_int 2)]
- UNSPEC_SME_LUTI))
- (set (match_operand:SVE_FULL_BHS 3 "aarch64_simd_register" "=w")
- (unspec:SVE_FULL_BHS
- [(reg:V8DI ZT0_REGNUM)
- (reg:DI SME_STATE_REGNUM)
- (match_dup 4)
- (match_dup 5)
- (const_int LUTI_BITS)
- (const_int 3)]
- UNSPEC_SME_LUTI))]
- "TARGET_STREAMING_SME2
- && !(<LUTI_BITS> == 4 && <elem_bits> == 8)
- && aarch64_strided_registers_p (operands, 4, 4)"
- "luti<LUTI_BITS>\t{%0.<Vetype>, %1.<Vetype>, %2.<Vetype>, %3.<Vetype>}, zt0, %4[%5]"
- [(set_attr "stride_type" "luti_strided")]
)
@@ -553,8 +553,7 @@ (define_attr "speculation_barrier" "true,false" (const_string "false"))
;; The RTL mapping therefore applies at LD1 granularity, rather than
;; being broken down into individual types of load.
(define_attr "stride_type"
- "none,ld1_consecutive,ld1_strided,st1_consecutive,st1_strided,
- luti_consecutive,luti_strided"
+ "none,ld1_consecutive,ld1_strided,st1_consecutive,st1_strided"
(const_string "none"))
;; Attribute used to identify load pair and store pair instructions.
@@ -180,61 +180,6 @@ void test4(int32_t *dest, int32_t *src) __arm_streaming
svget4(l2, 3), svget4(l3, 3)));
}
-/*
-** test5:
-** ptrue [^\n]+
-** ld1b [^\n]+
-** ld1b [^\n]+
-** ptrue ([^\n]+)\.s
-** ld1w [^\n]+, \1/z, \[x0\]
-** luti4 {z16\.s, z20\.s, z24\.s, z28\.s}, zt0, z[0-9]+\[0\]
-** luti4 {z17\.s, z21\.s, z25\.s, z29\.s}, zt0, z[0-9]+\[1\]
-** luti4 {z18\.s, z22\.s, z26\.s, z30\.s}, zt0, z[0-9]+\[0\]
-** luti4 {z19\.s, z23\.s, z27\.s, z31\.s}, zt0, z[0-9]+\[1\]
-** uclamp {z16\.s - z19\.s}, z[0-9]+\.s, z[0-9]+\.s
-** uclamp {z20\.s - z23\.s}, z[0-9]+\.s, z[0-9]+\.s
-** uclamp {z24\.s - z27\.s}, z[0-9]+\.s, z[0-9]+\.s
-** uclamp {z28\.s - z31\.s}, z[0-9]+\.s, z[0-9]+\.s
-** st1w {z16\.s - z19\.s}, \1, \[x0\]
-** st1w {z20\.s - z23\.s}, \1, \[x0, #4, mul vl\]
-** st1w {z24\.s - z27\.s}, \1, \[x0, #8, mul vl\]
-** st1w {z28\.s - z31\.s}, \1, \[x0, #12, mul vl\]
-** ret
-*/
-void test5(uint32_t *dest, uint8_t *indices)
- __arm_streaming __arm_preserves("za") __arm_inout("zt0")
-{
- svuint8_t indices1 = svld1_vnum(svptrue_b8(), indices, 0);
- svuint8_t indices2 = svld1_vnum(svptrue_b8(), indices, 2);
-
- svcount_t pg = svptrue_c32();
- svuint32x4_t bounds = svld1_x4(pg, dest);
-
- svuint32x4_t x0 = svluti4_lane_zt_u32_x4(0, indices1, 0);
- svuint32x4_t x1 = svluti4_lane_zt_u32_x4(0, indices1, 1);
- svuint32x4_t x2 = svluti4_lane_zt_u32_x4(0, indices2, 0);
- svuint32x4_t x3 = svluti4_lane_zt_u32_x4(0, indices2, 1);
-
- svuint32x4_t y0 = svcreate4(svget4(x0, 0), svget4(x1, 0),
- svget4(x2, 0), svget4(x3, 0));
- svuint32x4_t y1 = svcreate4(svget4(x0, 1), svget4(x1, 1),
- svget4(x2, 1), svget4(x3, 1));
- svuint32x4_t y2 = svcreate4(svget4(x0, 2), svget4(x1, 2),
- svget4(x2, 2), svget4(x3, 2));
- svuint32x4_t y3 = svcreate4(svget4(x0, 3), svget4(x1, 3),
- svget4(x2, 3), svget4(x3, 3));
-
- y0 = svclamp(y0, svget4(bounds, 0), svget4(bounds, 1));
- y1 = svclamp(y1, svget4(bounds, 2), svget4(bounds, 3));
- y2 = svclamp(y2, svget4(bounds, 0), svget4(bounds, 1));
- y3 = svclamp(y3, svget4(bounds, 2), svget4(bounds, 3));
-
- svst1_vnum(pg, dest, 0, y0);
- svst1_vnum(pg, dest, 4, y1);
- svst1_vnum(pg, dest, 8, y2);
- svst1_vnum(pg, dest, 12, y3);
-}
-
/*
** test6:
** ptrue [^\n]+