[pushed] aarch64: Remove SME2.1 forms of LUTI2/4

Message ID	mpt7cigftwh.fsf@arm.com
State	New
Headers	show Return-Path: <gcc-patches-bounces+incoming=patchwork.ozlabs.org@gcc.gnu.org> DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org F2F1B3858D20 From: Richard Sandiford <richard.sandiford@arm.com> To: gcc-patches@gcc.gnu.org Mail-Followup-To: gcc-patches@gcc.gnu.org, richard.sandiford@arm.com Subject: [pushed] aarch64: Remove SME2.1 forms of LUTI2/4 Date: Tue, 05 Mar 2024 17:52:30 +0000 Message-ID: <mpt7cigftwh.fsf@arm.com> User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/26.3 (gnu/linux) MIME-Version: 1.0 Content-Type: text/plain Precedence: list Errors-To: gcc-patches-bounces+incoming=patchwork.ozlabs.org@gcc.gnu.org
Series	[pushed] aarch64: Remove SME2.1 forms of LUTI2/4 \| expand [pushed] aarch64: Remove SME2.1 forms of LUTI2/4

diff --git a/gcc/config/aarch64/aarch64-early-ra.cc b/gcc/config/aarch64/aarch64-early-ra.cc index 8530b0ae41e..1e2c823cb2e 100644 --- a/gcc/config/aarch64/aarch64-early-ra.cc +++ b/gcc/config/aarch64/aarch64-early-ra.cc @@ -1060,8 +1060,7 @@ is_stride_candidate (rtx_insn *insn) return false; auto stride_type = get_attr_stride_type (insn); - return (stride_type == STRIDE_TYPE_LUTI_CONSECUTIVE - || stride_type == STRIDE_TYPE_LD1_CONSECUTIVE + return (stride_type == STRIDE_TYPE_LD1_CONSECUTIVE || stride_type == STRIDE_TYPE_ST1_CONSECUTIVE); } @@ -3212,8 +3211,7 @@ early_ra::maybe_convert_to_strided_access (rtx_insn *insn) auto stride_type = get_attr_stride_type (insn); rtx pat = PATTERN (insn); rtx op; - if (stride_type == STRIDE_TYPE_LUTI_CONSECUTIVE - || stride_type == STRIDE_TYPE_LD1_CONSECUTIVE) + if (stride_type == STRIDE_TYPE_LD1_CONSECUTIVE) op = SET_DEST (pat); else if (stride_type == STRIDE_TYPE_ST1_CONSECUTIVE) op = XVECEXP (SET_SRC (pat), 0, 1); @@ -3263,20 +3261,6 @@ early_ra::maybe_convert_to_strided_access (rtx_insn *insn) XVECEXP (SET_SRC (pat), 0, XVECLEN (SET_SRC (pat), 0) - 1) = *recog_data.dup_loc[0]; } - else if (stride_type == STRIDE_TYPE_LUTI_CONSECUTIVE) - { - auto bits = INTVAL (XVECEXP (SET_SRC (pat), 0, 4)); - if (range.count == 2) - pat = gen_aarch64_sme_lut_strided2 (bits, single_mode, - regs[0], regs[1], - recog_data.operand[1], - recog_data.operand[2]); - else - pat = gen_aarch64_sme_lut_strided4 (bits, single_mode, - regs[0], regs[1], regs[2], regs[3], - recog_data.operand[1], - recog_data.operand[2]); - } else gcc_unreachable (); PATTERN (insn) = pat; diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md index c95d4aa696c..78ad2fc699f 100644 --- a/gcc/config/aarch64/aarch64-sme.md +++ b/gcc/config/aarch64/aarch64-sme.md @@ -1939,74 +1939,4 @@ (define_insn "@aarch64_sme_lut<LUTI_BITS><mode>" "TARGET_STREAMING_SME2 && !(<LUTI_BITS> == 4 && <vector_count> == 4 && <elem_bits> == 8)" "luti<LUTI_BITS>\t%0, zt0, %1[%2]" - [(set_attr "stride_type" "luti_consecutive")] -) - -(define_insn "@aarch64_sme_lut<LUTI_BITS><mode>_strided2" - [(set (match_operand:SVE_FULL_BHS 0 "aarch64_simd_register" "=Uwd") - (unspec:SVE_FULL_BHS - [(reg:V8DI ZT0_REGNUM) - (reg:DI SME_STATE_REGNUM) - (match_operand:VNx16QI 2 "register_operand" "w") - (match_operand:DI 3 "const_int_operand") - (const_int LUTI_BITS) - (const_int 0)] - UNSPEC_SME_LUTI)) - (set (match_operand:SVE_FULL_BHS 1 "aarch64_simd_register" "=w") - (unspec:SVE_FULL_BHS - [(reg:V8DI ZT0_REGNUM) - (reg:DI SME_STATE_REGNUM) - (match_dup 2) - (match_dup 3) - (const_int LUTI_BITS) - (const_int 1)] - UNSPEC_SME_LUTI))] - "TARGET_STREAMING_SME2 - && aarch64_strided_registers_p (operands, 2, 8)" - "luti<LUTI_BITS>\t{%0.<Vetype>, %1.<Vetype>}, zt0, %2[%3]" - [(set_attr "stride_type" "luti_strided")] -) - -(define_insn "@aarch64_sme_lut<LUTI_BITS><mode>_strided4" - [(set (match_operand:SVE_FULL_BHS 0 "aarch64_simd_register" "=Uwt") - (unspec:SVE_FULL_BHS - [(reg:V8DI ZT0_REGNUM) - (reg:DI SME_STATE_REGNUM) - (match_operand:VNx16QI 4 "register_operand" "w") - (match_operand:DI 5 "const_int_operand") - (const_int LUTI_BITS) - (const_int 0)] - UNSPEC_SME_LUTI)) - (set (match_operand:SVE_FULL_BHS 1 "aarch64_simd_register" "=w") - (unspec:SVE_FULL_BHS - [(reg:V8DI ZT0_REGNUM) - (reg:DI SME_STATE_REGNUM) - (match_dup 4) - (match_dup 5) - (const_int LUTI_BITS) - (const_int 1)] - UNSPEC_SME_LUTI)) - (set (match_operand:SVE_FULL_BHS 2 "aarch64_simd_register" "=w") - (unspec:SVE_FULL_BHS - [(reg:V8DI ZT0_REGNUM) - (reg:DI SME_STATE_REGNUM) - (match_dup 4) - (match_dup 5) - (const_int LUTI_BITS) - (const_int 2)] - UNSPEC_SME_LUTI)) - (set (match_operand:SVE_FULL_BHS 3 "aarch64_simd_register" "=w") - (unspec:SVE_FULL_BHS - [(reg:V8DI ZT0_REGNUM) - (reg:DI SME_STATE_REGNUM) - (match_dup 4) - (match_dup 5) - (const_int LUTI_BITS) - (const_int 3)] - UNSPEC_SME_LUTI))] - "TARGET_STREAMING_SME2 - && !(<LUTI_BITS> == 4 && <elem_bits> == 8) - && aarch64_strided_registers_p (operands, 4, 4)" - "luti<LUTI_BITS>\t{%0.<Vetype>, %1.<Vetype>, %2.<Vetype>, %3.<Vetype>}, zt0, %4[%5]" - [(set_attr "stride_type" "luti_strided")] ) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 33fbe1b2e8d..7d51d923bf6 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -553,8 +553,7 @@ (define_attr "speculation_barrier" "true,false" (const_string "false")) ;; The RTL mapping therefore applies at LD1 granularity, rather than ;; being broken down into individual types of load. (define_attr "stride_type" - "none,ld1_consecutive,ld1_strided,st1_consecutive,st1_strided, - luti_consecutive,luti_strided" + "none,ld1_consecutive,ld1_strided,st1_consecutive,st1_strided" (const_string "none")) ;; Attribute used to identify load pair and store pair instructions. diff --git a/gcc/testsuite/gcc.target/aarch64/sme/strided_1.c b/gcc/testsuite/gcc.target/aarch64/sme/strided_1.c index 3620fff3668..73aac0683ea 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/strided_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/strided_1.c @@ -180,61 +180,6 @@ void test4(int32_t *dest, int32_t *src) __arm_streaming svget4(l2, 3), svget4(l3, 3))); } -/* -** test5: -** ptrue [^\n]+ -** ld1b [^\n]+ -** ld1b [^\n]+ -** ptrue ([^\n]+)\.s -** ld1w [^\n]+, \1/z, \[x0\] -** luti4 {z16\.s, z20\.s, z24\.s, z28\.s}, zt0, z[0-9]+\[0\] -** luti4 {z17\.s, z21\.s, z25\.s, z29\.s}, zt0, z[0-9]+\[1\] -** luti4 {z18\.s, z22\.s, z26\.s, z30\.s}, zt0, z[0-9]+\[0\] -** luti4 {z19\.s, z23\.s, z27\.s, z31\.s}, zt0, z[0-9]+\[1\] -** uclamp {z16\.s - z19\.s}, z[0-9]+\.s, z[0-9]+\.s -** uclamp {z20\.s - z23\.s}, z[0-9]+\.s, z[0-9]+\.s -** uclamp {z24\.s - z27\.s}, z[0-9]+\.s, z[0-9]+\.s -** uclamp {z28\.s - z31\.s}, z[0-9]+\.s, z[0-9]+\.s -** st1w {z16\.s - z19\.s}, \1, \[x0\] -** st1w {z20\.s - z23\.s}, \1, \[x0, #4, mul vl\] -** st1w {z24\.s - z27\.s}, \1, \[x0, #8, mul vl\] -** st1w {z28\.s - z31\.s}, \1, \[x0, #12, mul vl\] -** ret -*/ -void test5(uint32_t *dest, uint8_t *indices) - __arm_streaming __arm_preserves("za") __arm_inout("zt0") -{ - svuint8_t indices1 = svld1_vnum(svptrue_b8(), indices, 0); - svuint8_t indices2 = svld1_vnum(svptrue_b8(), indices, 2); - - svcount_t pg = svptrue_c32(); - svuint32x4_t bounds = svld1_x4(pg, dest); - - svuint32x4_t x0 = svluti4_lane_zt_u32_x4(0, indices1, 0); - svuint32x4_t x1 = svluti4_lane_zt_u32_x4(0, indices1, 1); - svuint32x4_t x2 = svluti4_lane_zt_u32_x4(0, indices2, 0); - svuint32x4_t x3 = svluti4_lane_zt_u32_x4(0, indices2, 1); - - svuint32x4_t y0 = svcreate4(svget4(x0, 0), svget4(x1, 0), - svget4(x2, 0), svget4(x3, 0)); - svuint32x4_t y1 = svcreate4(svget4(x0, 1), svget4(x1, 1), - svget4(x2, 1), svget4(x3, 1)); - svuint32x4_t y2 = svcreate4(svget4(x0, 2), svget4(x1, 2), - svget4(x2, 2), svget4(x3, 2)); - svuint32x4_t y3 = svcreate4(svget4(x0, 3), svget4(x1, 3), - svget4(x2, 3), svget4(x3, 3)); - - y0 = svclamp(y0, svget4(bounds, 0), svget4(bounds, 1)); - y1 = svclamp(y1, svget4(bounds, 2), svget4(bounds, 3)); - y2 = svclamp(y2, svget4(bounds, 0), svget4(bounds, 1)); - y3 = svclamp(y3, svget4(bounds, 2), svget4(bounds, 3)); - - svst1_vnum(pg, dest, 0, y0); - svst1_vnum(pg, dest, 4, y1); - svst1_vnum(pg, dest, 8, y2); - svst1_vnum(pg, dest, 12, y3); -} - /* ** test6: ** ptrue [^\n]+

[pushed] aarch64: Remove SME2.1 forms of LUTI2/4

Commit Message

Patch