[4/7] RISC-V: Honour -mrvv-max-lmul in riscv_vector::expand_block_move

Message ID	20241018131300.1150819-5-craig.blackmore@embecosm.com
State	New
Headers	show Return-Path: <gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org> DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org C2B193858401 From: Craig Blackmore <craig.blackmore@embecosm.com> To: gcc-patches@gcc.gnu.org Cc: Craig Blackmore <craig.blackmore@embecosm.com> Subject: [PATCH 4/7] RISC-V: Honour -mrvv-max-lmul in riscv_vector::expand_block_move Date: Fri, 18 Oct 2024 14:12:57 +0100 Message-ID: <20241018131300.1150819-5-craig.blackmore@embecosm.com> In-Reply-To: <20241018131300.1150819-1-craig.blackmore@embecosm.com> References: <20241018131300.1150819-1-craig.blackmore@embecosm.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: list Errors-To: gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org
Series	RISC-V: Vector memcpy/memset fixes and improvements \| expand [0/7] RISC-V: Vector memcpy/memset fixes and improvements [1/7] RISC-V: Fix indentation in riscv_vector::expand_block_move [NFC] [2/7] RISC-V: Fix uninitialized reg in memcpy [3/7] RISC-V: Fix vector memcpy smaller LMUL generation [4/7] RISC-V: Honour -mrvv-max-lmul in riscv_vector::expand_block_move [5/7] RISC-V: Move vector memcpy decision making to separate function [NFC] [6/7] RISC-V: Make vectorized memset handle more cases [7/7] RISC-V: Disable by pieces for vector setmem length > UNITS_PER_WORD

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 1e6d10a1402..5f6f0cb59dc 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -624,6 +624,7 @@ enum mask_policy enum tail_policy get_prefer_tail_policy (); enum mask_policy get_prefer_mask_policy (); rtx get_avl_type_rtx (enum avl_type); +opt_machine_mode get_lmul_mode (scalar_mode, int); opt_machine_mode get_vector_mode (scalar_mode, poly_uint64); opt_machine_mode get_tuple_mode (machine_mode, unsigned int); bool simm5_p (rtx); @@ -672,7 +673,7 @@ bool slide1_sew64_helper (int, machine_mode, machine_mode, machine_mode, rtx *); rtx gen_avl_for_scalar_move (rtx); void expand_tuple_move (rtx *); -bool expand_block_move (rtx, rtx, rtx); +bool expand_block_move (rtx, rtx, rtx, bool); machine_mode preferred_simd_mode (scalar_mode); machine_mode get_mask_mode (machine_mode); void expand_vec_series (rtx, rtx, rtx, rtx = 0); diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc index b590c516354..64fd6b29092 100644 --- a/gcc/config/riscv/riscv-string.cc +++ b/gcc/config/riscv/riscv-string.cc @@ -966,7 +966,7 @@ riscv_expand_block_move_scalar (rtx dest, rtx src, rtx length) /* This function delegates block-move expansion to either the vector implementation or the scalar one. Return TRUE if successful or FALSE - otherwise. */ + otherwise. Assume that the memory regions do not overlap. */ bool riscv_expand_block_move (rtx dest, rtx src, rtx length) @@ -974,7 +974,7 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length) if ((TARGET_VECTOR && !TARGET_XTHEADVECTOR) && stringop_strategy & STRATEGY_VECTOR) { - bool ok = riscv_vector::expand_block_move (dest, src, length); + bool ok = riscv_vector::expand_block_move (dest, src, length, false); if (ok) return true; } @@ -1054,7 +1054,7 @@ namespace riscv_vector { /* Used by cpymemsi in riscv.md . */ bool -expand_block_move (rtx dst_in, rtx src_in, rtx length_in) +expand_block_move (rtx dst_in, rtx src_in, rtx length_in, bool movmem_p) { /* memcpy: @@ -1085,10 +1085,9 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in) { HOST_WIDE_INT length = INTVAL (length_in); - /* By using LMUL=8, we can copy as many bytes in one go as there - are bits in a vector register. If the entire block thus fits, - we don't need a loop. */ - if (length <= TARGET_MIN_VLEN) + /* If the VLEN and preferred LMUL allow the entire block to be copied in + one go then no loop is needed. */ + if (known_le (length, BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL)) { need_loop = false; @@ -1114,19 +1113,32 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in) for small element widths, we might allow larger element widths for loops too. */ if (need_loop) - potential_ew = 1; + { + if (movmem_p) + /* Inlining general memmove is a pessimisation: we can't avoid + having to decide which direction to go at runtime, which is + costly in instruction count however for situations where the + entire move fits in one vector operation we can do all reads + before doing any writes so we don't have to worry so generate + the inline vector code in such situations. */ + return false; + potential_ew = 1; + } for (; potential_ew; potential_ew >>= 1) { scalar_int_mode elem_mode; unsigned HOST_WIDE_INT bits = potential_ew * BITS_PER_UNIT; - unsigned HOST_WIDE_INT per_iter; - HOST_WIDE_INT nunits; + poly_uint64 per_iter; + poly_int64 nunits; if (need_loop) - per_iter = TARGET_MIN_VLEN; + per_iter = BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL; else per_iter = length; - nunits = per_iter / potential_ew; + /* BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL may not be divisible by + this potential_ew. */ + if (!multiple_p (per_iter, potential_ew, &nunits)) + continue; /* Unless we get an implementation that's slow for small element size / non-word-aligned accesses, we assume that the hardware @@ -1137,6 +1149,8 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in) if (length % potential_ew != 0 || !int_mode_for_size (bits, 0).exists (&elem_mode)) continue; + + poly_uint64 mode_units; /* Find the mode to use for the copy inside the loop - or the sole copy, if there is no loop. */ if (!need_loop) @@ -1152,12 +1166,12 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in) pointless. Still, by choosing a lower LMUL factor that still allows an entire transfer, we can reduce register pressure. */ - for (unsigned lmul = 1; lmul <= 4; lmul <<= 1) - if (length * BITS_PER_UNIT <= TARGET_MIN_VLEN * lmul - && multiple_p (BYTES_PER_RISCV_VECTOR * lmul, potential_ew) + for (unsigned lmul = 1; lmul < TARGET_MAX_LMUL; lmul <<= 1) + if (known_le (length * BITS_PER_UNIT, TARGET_MIN_VLEN * lmul) + && multiple_p (BYTES_PER_RISCV_VECTOR * lmul, potential_ew, + &mode_units) && (riscv_vector::get_vector_mode - (elem_mode, exact_div (BYTES_PER_RISCV_VECTOR * lmul, - potential_ew)).exists (&vmode))) + (elem_mode, mode_units).exists (&vmode))) break; } @@ -1165,15 +1179,12 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in) if (vmode != VOIDmode) break; - /* The RVVM8?I modes are notionally 8 * BYTES_PER_RISCV_VECTOR bytes - wide. BYTES_PER_RISCV_VECTOR can't be evenly divided by - the sizes of larger element types; the LMUL factor of 8 can at - the moment be divided by the SEW, with SEW of up to 8 bytes, - but there are reserved encodings so there might be larger - SEW in the future. */ - if (riscv_vector::get_vector_mode - (elem_mode, exact_div (BYTES_PER_RISCV_VECTOR * 8, - potential_ew)).exists (&vmode)) + /* BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL will at least be divisible + by potential_ew 1, so this should succeed eventually. */ + if (multiple_p (BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL, + potential_ew, &mode_units) + && riscv_vector::get_vector_mode (elem_mode, + mode_units).exists (&vmode)) break; /* We may get here if we tried an element size that's larger than @@ -1186,7 +1197,7 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in) } else { - vmode = E_RVVM8QImode; + gcc_assert (get_lmul_mode (QImode, TARGET_MAX_LMUL).exists (&vmode)); } /* A memcpy libcall in the worst case takes 3 instructions to prepare the diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index ca3a80cceb9..0802a7069a2 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -1890,6 +1890,18 @@ get_mask_mode (machine_mode mode) return get_vector_mode (BImode, nunits).require (); } +/* Return the appropriate LMUL mode for MODE. */ + +opt_machine_mode +get_lmul_mode (scalar_mode mode, int lmul) +{ + poly_uint64 lmul_nunits; + unsigned int bytes = GET_MODE_SIZE (mode); + if (multiple_p (BYTES_PER_RISCV_VECTOR * lmul, bytes, &lmul_nunits)) + return get_vector_mode (mode, lmul_nunits); + return E_VOIDmode; +} + /* Return the appropriate M1 mode for MODE. */ static opt_machine_mode diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 78112afbb26..c5a38b42301 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -2745,12 +2745,6 @@ FAIL; }) -;; Inlining general memmove is a pessimisation: we can't avoid having to decide -;; which direction to go at runtime, which is costly in instruction count -;; however for situations where the entire move fits in one vector operation -;; we can do all reads before doing any writes so we don't have to worry -;; so generate the inline vector code in such situations -;; nb. prefer scalar path for tiny memmoves. (define_expand "movmem<mode>" [(parallel [(set (match_operand:BLK 0 "general_operand") (match_operand:BLK 1 "general_operand")) @@ -2758,10 +2752,8 @@ (use (match_operand:SI 3 "const_int_operand"))])] "TARGET_VECTOR" { - if ((INTVAL (operands[2]) >= TARGET_MIN_VLEN / 8) - && (INTVAL (operands[2]) <= TARGET_MIN_VLEN) - && riscv_vector::expand_block_move (operands[0], operands[1], - operands[2])) + if (riscv_vector::expand_block_move (operands[0], operands[1], operands[2], + true)) DONE; else FAIL; diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113206-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113206-1.c index ef92c6f35d1..45086182aa8 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113206-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113206-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m8" } */ signed char e; short f = 8; diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113206-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113206-2.c index cfce88988f7..a3c61b467da 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113206-2.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113206-2.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -frename-registers" } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -frename-registers -mrvv-max-lmul=m8" } */ signed char e; short f = 8; diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-1.c index 82039f5ac4e..86c2400ce51 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2" } */ +/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2 -mrvv-max-lmul=m8" } */ #include "def.h" @@ -143,10 +143,6 @@ DEF_RET1_ARG9 (v1024qi) DEF_RET1_ARG9 (v2048qi) DEF_RET1_ARG9 (v4096qi) -// RET1_ARG0 tests -/* { dg-final { scan-assembler-times {li\s+a[0-1],\s*0} 9 } } */ -/* { dg-final { scan-assembler-times {call\s+memset} 3 } } */ - // v1qi tests: return value (lbu) and function prologue (sb) // 1 lbu per test, argnum sb's when args > 1 /* { dg-final { scan-assembler-times {lbu\s+a0,\s*[0-9]+\(sp\)} 8 } } */ @@ -169,7 +165,4 @@ DEF_RET1_ARG9 (v4096qi) /* { dg-final { scan-assembler-times {sd\s+a[0-7],\s*[0-9]+\(sp\)} 103 } } */ // v32-4096qi tests: return value (vse8.v) -/* { dg-final { scan-assembler-times {vse8.v\s+v[0-9],\s*[0-9]+\(a0\)} 74 } } */ -// v1024-4096qi_ARG1 tests: return value (vse64.v) -// for some reason ARG1 returns using vse64 instead of vse8 -/* { dg-final { scan-assembler-times {vse64.v\s+v[0-9],\s*[0-9]+\(a0\)\s+ret} 3 } } */ +/* { dg-final { scan-assembler-times {vse8.v\s+v[0-9],\s*[0-9]+\(a0\)} 80 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-2.c index af52b703986..c489a9ff796 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-2.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-2.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2" } */ +/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2 -mrvv-max-lmul=m8" } */ #include "def.h" @@ -133,10 +133,6 @@ DEF_RET1_ARG9 (v512hi) DEF_RET1_ARG9 (v1024hi) DEF_RET1_ARG9 (v2048hi) -// RET1_ARG0 tests -/* { dg-final { scan-assembler-times {li\s+a[0-1],\s*0} 8 } } */ -/* { dg-final { scan-assembler-times {call\s+memset} 3 } } */ - // v1hi tests: return value (lhu) and function prologue (sh) // 1 lhu per test, argnum sh's when args > 1 /* { dg-final { scan-assembler-times {lhu\s+a0,\s*[0-9]+\(sp\)} 8 } } */ @@ -155,7 +151,4 @@ DEF_RET1_ARG9 (v2048hi) /* { dg-final { scan-assembler-times {sd\s+a[0-7],\s*[0-9]+\(sp\)} 103 } } */ // v16-2048hi tests: return value (vse16.v) -/* { dg-final { scan-assembler-times {vse16.v\s+v[0-9],\s*[0-9]+\(a0\)} 74 } } */ -// v512-2048qi_ARG1 tests: return value (vse64.v) -// for some reason ARG1 returns using vse64 instead of vse16 -/* { dg-final { scan-assembler-times {vse64.v\s+v[0-9],\s*[0-9]+\(a0\)\s+ret} 3 } } */ +/* { dg-final { scan-assembler-times {vse16.v\s+v[0-9],\s*[0-9]+\(a0\)} 80 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-3.c index 01c5a1a1ba2..97a3282a657 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-3.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-3.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2" } */ +/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2 -mrvv-max-lmul=m8" } */ #include "def.h" @@ -123,10 +123,6 @@ DEF_RET1_ARG9 (v256si) DEF_RET1_ARG9 (v512si) DEF_RET1_ARG9 (v1024si) -// RET1_ARG0 tests -/* { dg-final { scan-assembler-times {li\s+a[0-1],\s*0} 7 } } */ -/* { dg-final { scan-assembler-times {call\s+memset} 3 } } */ - // v1si tests: return value (lw) and function prologue (sw) // 1 lw per test, argnum sw's when args > 1 /* { dg-final { scan-assembler-times {lw\s+a0,\s*[0-9]+\(sp\)} 8 } } */ @@ -140,7 +136,4 @@ DEF_RET1_ARG9 (v1024si) /* { dg-final { scan-assembler-times {sd\s+a[0-7],\s*[0-9]+\(sp\)} 103 } } */ // v8-1024si tests: return value (vse32.v) -/* { dg-final { scan-assembler-times {vse32.v\s+v[0-9],\s*[0-9]+\(a0\)} 74 } } */ -// 256-1024si tests: return value (vse64.v) -// for some reason ARG1 returns using vse64 instead of vse32 -/* { dg-final { scan-assembler-times {vse64.v\s+v[0-9],\s*[0-9]+\(a0\)\s+ret} 3 } } */ +/* { dg-final { scan-assembler-times {vse32.v\s+v[0-9],\s*[0-9]+\(a0\)} 80 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-4.c index 2c01aa8c260..a892919feb7 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-4.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-4.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2" } */ +/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2 -mrvv-max-lmul=m8" } */ #include "def.h" @@ -113,10 +113,6 @@ DEF_RET1_ARG9 (v128di) DEF_RET1_ARG9 (v256di) DEF_RET1_ARG9 (v512di) -// RET1_ARG0 tests -/* { dg-final { scan-assembler-times {li\s+a[0-1],\s*0} 6 } } */ -/* { dg-final { scan-assembler-times {call\s+memset} 3 } } */ - // v1di and v2di tests: return value (ld) and function prologue (sd) // - 1 ld per v1di and 2 ld per v2di with args > 1 // - argnum sd's per v1di when argnum > 1 @@ -125,4 +121,4 @@ DEF_RET1_ARG9 (v512di) /* { dg-final { scan-assembler-times {sd\s+a[0-7],\s*[0-9]+\(sp\)} 103 } } */ // v4-512di tests: return value (vse64.v) -/* { dg-final { scan-assembler-times {vse64.v\s+v[0-9],\s*[0-9]+\(a0\)} 77 } } */ +/* { dg-final { scan-assembler-times {vse64.v\s+v[0-9],\s*[0-9]+\(a0\)} 80 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-5.c index 98d6d4a758a..0d40349fb0f 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-5.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-5.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2" } */ +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2 -mrvv-max-lmul=m8" } */ #include "def.h" @@ -133,10 +133,6 @@ DEF_RET1_ARG9 (v512hf) DEF_RET1_ARG9 (v1024hf) DEF_RET1_ARG9 (v2048hf) -// RET1_ARG0 tests -/* { dg-final { scan-assembler-times {li\s+a[0-1],\s*0} 8 } } */ -/* { dg-final { scan-assembler-times {call\s+memset} 3 } } */ - // v1hf tests: return value (lhu) and function prologue (sh) // 1 lhu per test, argnum sh's when args > 1 /* { dg-final { scan-assembler-times {lhu\s+a[0-1],\s*[0-9]+\(sp\)} 8 } } */ @@ -155,7 +151,4 @@ DEF_RET1_ARG9 (v2048hf) /* { dg-final { scan-assembler-times {sd\s+a[0-7],\s*[0-9]+\(sp\)} 103 } } */ // v16-2048hf tests: return value (vse16.v) -/* { dg-final { scan-assembler-times {vse16.v\s+v[0-9],\s*[0-9]+\(a0\)} 74 } } */ -// v512-2048qf_ARG1 tests: return value (vse64.v) -// for some reason ARG1 returns using vse64 instead of vse16 -/* { dg-final { scan-assembler-times {vse64.v\s+v[0-9],\s*[0-9]+\(a0\)\s+ret} 3 } } */ +/* { dg-final { scan-assembler-times {vse16.v\s+v[0-9],\s*[0-9]+\(a0\)} 80 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-6.c index 5f59f001969..8b5a779467d 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-6.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-6.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2" } */ +/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2 -mrvv-max-lmul=m8" } */ #include "def.h" @@ -123,10 +123,6 @@ DEF_RET1_ARG9 (v256sf) DEF_RET1_ARG9 (v512sf) DEF_RET1_ARG9 (v1024sf) -// RET1_ARG0 tests -/* { dg-final { scan-assembler-times {li\s+a[0-1],\s*0} 7 } } */ -/* { dg-final { scan-assembler-times {call\s+memset} 3 } } */ - // v1sf tests: return value (lw) and function prologue (sw) // 1 lw per test, argnum sw's when args > 1 /* { dg-final { scan-assembler-times {lw\s+a[0-1],\s*[0-9]+\(sp\)} 8 } } */ @@ -140,7 +136,4 @@ DEF_RET1_ARG9 (v1024sf) /* { dg-final { scan-assembler-times {sd\s+a[0-7],\s*[0-9]+\(sp\)} 103 } } */ // v8-1024sf tests: return value (vse32.v) -/* { dg-final { scan-assembler-times {vse32.v\s+v[0-9],\s*[0-9]+\(a0\)} 74 } } */ -// 256-1024sf tests: return value (vse64.v) -// for some reason ARG1 returns using vse64 instead of vse32 -/* { dg-final { scan-assembler-times {vse64.v\s+v[0-9],\s*[0-9]+\(a0\)\s+ret} 3 } } */ +/* { dg-final { scan-assembler-times {vse32.v\s+v[0-9],\s*[0-9]+\(a0\)} 80 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-7.c index 1d427fd08d6..3ba4e1f1864 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-7.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/calling-convention-7.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2" } */ +/* { dg-options "-march=rv64gcv_zvl4096b -mrvv-vector-bits=scalable -mabi=lp64d -O3 -fno-schedule-insns2 -mrvv-max-lmul=m8" } */ #include "def.h" @@ -113,10 +113,6 @@ DEF_RET1_ARG9 (v128df) DEF_RET1_ARG9 (v256df) DEF_RET1_ARG9 (v512df) -// RET1_ARG0 tests -/* { dg-final { scan-assembler-times {li\s+a[0-1],\s*0} 6 } } */ -/* { dg-final { scan-assembler-times {call\s+memset} 3 } } */ - // v1df and v2df tests: return value (ld) and function prologue (sd) // - 1 ld per v1df and 2 ld per v2df with args > 1 // - argnum sd's per v1df when argnum > 1 @@ -125,4 +121,4 @@ DEF_RET1_ARG9 (v512df) /* { dg-final { scan-assembler-times {sd\s+a[0-7],\s*[0-9]+\(sp\)} 103 } } */ // v4-512df tests: return value (vse64.v) -/* { dg-final { scan-assembler-times {vse64.v\s+v[0-9],\s*[0-9]+\(a0\)} 77 } } */ +/* { dg-final { scan-assembler-times {vse64.v\s+v[0-9],\s*[0-9]+\(a0\)} 80 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-4.c index 1faf31ffd8e..1df83847363 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-4.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-4.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 -mrvv-max-lmul=m8" } */ #include "def.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-7.c index e3980a29540..74b7b699f1a 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-7.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/spill-7.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 -mrvv-max-lmul=m8" } */ #include "def.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-1.c index 0699cb78dd5..6edb4c9253a 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-1.c @@ -12,7 +12,7 @@ extern void *memcpy(void *__restrict dest, const void *__restrict src, __SIZE_TY /* memcpy should be implemented using the cpymem pattern. ** f1: XX \.L\d+: # local label is ignored -** vsetvli\s+[ta][0-7],a2,e8,m8,ta,ma +** vsetvli\s+[ta][0-7],a2,e8,m1,ta,ma ** vle8\.v\s+v\d+,0\(a1\) ** vse8\.v\s+v\d+,0\(a0\) ** add\s+a1,a1,[ta][0-7] @@ -31,7 +31,7 @@ void f1 (void *a, void *b, __SIZE_TYPE__ l) overflow is undefined. ** f2: XX \.L\d+: # local label is ignored -** vsetvli\s+[ta][0-7],a2,e8,m8,ta,ma +** vsetvli\s+[ta][0-7],a2,e8,m1,ta,ma ** vle8\.v\s+v\d+,0\(a1\) ** vse8\.v\s+v\d+,0\(a0\) ** add\s+a1,a1,[ta][0-7] diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-2.c index 6a854c87cd0..7b6a429f34c 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-2.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-2.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-additional-options "-O1 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-additional-options "-O1 -fno-schedule-insns -fno-schedule-insns2 -mrvv-max-lmul=m8" } */ /* { dg-add-options riscv_v } */ /* { dg-final { check-function-bodies "**" "" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/movmem-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/movmem-1.c index d9d4a70a392..1f148bc7052 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/movmem-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/movmem-1.c @@ -7,13 +7,14 @@ /* Tiny memmoves should not be vectorised. ** f1: -** li\s+a2,\d+ -** tail\s+memmove +** lbu\s+[ta][0-7],0\(a1\) +** sb\s+[ta][0-7],0\(a0\) +** ret */ char * f1 (char *a, char const *b) { - return __builtin_memmove (a, b, MIN_VECTOR_BYTES - 1); + return __builtin_memmove (a, b, 1); } /* Vectorise+inline minimum vector register width with LMUL=1 diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-0.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-0.c index 8265105f4eb..7e40ac583bb 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-0.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-0.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl" } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl -mrvv-max-lmul=m8" } */ #include "riscv_vector.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-1.c index 682d3e9cb7e..c5be5b1d28e 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl" } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl -mrvv-max-lmul=m8" } */ #include "riscv_vector.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-2.c index 73a9f51a16b..8f66d9670f3 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-2.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-2.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl" } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl -mrvv-max-lmul=m8" } */ #include "riscv_vector.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-3.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-3.c index bec9b28008d..3e23ae717ae 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-3.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-3.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl" } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl -mrvv-max-lmul=m8" } */ #include "riscv_vector.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-4.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-4.c index c8978052b91..11cdc74ec72 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-4.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-4.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl" } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl -mrvv-max-lmul=m8" } */ #include "riscv_vector.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-5.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-5.c index 5604ca280fe..7a5d04e3c5c 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-5.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-5.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl" } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl -mrvv-max-lmul=m8" } */ #include "riscv_vector.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-6.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-6.c index 9c6484479cf..ba96b340279 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-6.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-6.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl" } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl -mrvv-max-lmul=m8" } */ #include "riscv_vector.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-7.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-7.c index 0bb2260cf1c..c0e8d6f1b39 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-7.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-7.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl" } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl -mrvv-max-lmul=m8" } */ #include "riscv_vector.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-8.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-8.c index 1ad588ff8ad..91743009639 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-8.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-8.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl" } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl -mrvv-max-lmul=m8" } */ #include "riscv_vector.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-9.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-9.c index 5b28863b6ad..ac7ec74472d 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-9.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-9.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl" } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize -mrvv-vector-bits=zvl -mrvv-max-lmul=m8" } */ #include "riscv_vector.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112929-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112929-1.c index e55604e1114..9ab04b07c12 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112929-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112929-1.c @@ -52,7 +52,7 @@ int main() { printf("%d\n", m); } -/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */ +/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */ /* { dg-final { scan-assembler-not {vsetivli} } } */ -/* { dg-final { scan-assembler-times {vsetvli\tzero,\s*[a-x0-9]+,\s*e8,\s*m2,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */ -/* { dg-final { scan-assembler-times {li\t[a-x0-9]+,\s*32} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */ +/* { dg-final { scan-assembler-times {vsetvli\t[a-x0-9]+,\s*[a-x0-9]+,\s*e8,\s*m1,\s*t[au],\s*m[au]} 3 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */ +/* { dg-final { scan-assembler-times {li\t[a-x0-9]+,\s*32} 3 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112988-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112988-1.c index b20e46395aa..1facfd55d79 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112988-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112988-1.c @@ -62,7 +62,7 @@ int main() { return 0; } -/* { dg-final { scan-assembler-times {vsetvli} 4 } } */ +/* { dg-final { scan-assembler-times {vsetvli} 5 } } */ /* { dg-final { scan-assembler-not {vsetivli} } } */ -/* { dg-final { scan-assembler-times {vsetvli\tzero,\s*[a-x0-9]+,\s*e8,\s*m2,\s*t[au],\s*m[au]} 1 } } */ -/* { dg-final { scan-assembler-times {li\t[a-x0-9]+,\s*32} 1 } } */ +/* { dg-final { scan-assembler-times {vsetvli\t[a-x0-9]+,\s*[a-x0-9]+,\s*e8,\s*m1,\s*t[au],\s*m[au]} 2 } } */ +/* { dg-final { scan-assembler-times {li\t[a-x0-9]+,\s*32} 2 } } */

[4/7] RISC-V: Honour -mrvv-max-lmul in riscv_vector::expand_block_move

Commit Message

Patch