@@ -12583,6 +12583,22 @@ riscv_stack_clash_protection_alloca_probe_range (void)
return STACK_CLASH_CALLER_GUARD;
}
+static bool
+riscv_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
+ unsigned alignment,
+ enum by_pieces_operation op, bool speed_p)
+{
+ /* For set/clear with size > UNITS_PER_WORD, by pieces uses vector broadcasts
+ with UNITS_PER_WORD size pieces. Use setmem<mode> instead which can use
+ bigger chunks. */
+ if (TARGET_VECTOR && stringop_strategy & STRATEGY_VECTOR
+ && (op == CLEAR_BY_PIECES || op == SET_BY_PIECES)
+ && speed_p && size > UNITS_PER_WORD)
+ return false;
+
+ return default_use_by_pieces_infrastructure_p (size, alignment, op, speed_p);
+}
+
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -12948,6 +12964,9 @@ riscv_stack_clash_protection_alloca_probe_range (void)
#undef TARGET_C_MODE_FOR_FLOATING_TYPE
#define TARGET_C_MODE_FOR_FLOATING_TYPE riscv_c_mode_for_floating_type
+#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
+#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P riscv_use_by_pieces_infrastructure_p
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-riscv.h"
@@ -51,4 +51,5 @@ void p(int buf, __builtin_va_list ab, int q) {
} while (k);
}
-/* { dg-final { scan-assembler-times {vsetivli\tzero,\s*4,\s*e8,\s*mf4,\s*t[au],\s*m[au]} 2 } } */
+/* { dg-final { scan-assembler-times {vsetivli\tzero,\s*4,\s*e8,\s*mf4,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli\tzero,\s*8,\s*e8,\s*mf2,\s*t[au],\s*m[au]} 1 } } */
@@ -5,15 +5,17 @@
#define MIN_VECTOR_BYTES (__riscv_v_min_vlen / 8)
-/* Small memsets shouldn't be vectorised.
+/* Vectorise with no loop.
** f1:
** (
-** sb\s+a1,0\(a0\)
-** ...
+** vsetivli\s+zero,\d+,e8,m1,ta,ma
** |
-** li\s+a2,\d+
-** tail\s+memset
+** li\s+a\d+,\d+
+** vsetvli\s+zero,a\d+,e8,m1,ta,ma
** )
+** vmv\.v\.x\s+v\d+,a1
+** vse8\.v\s+v\d+,0\(a0\)
+** ret
*/
void *
f1 (void *a, int const b)
@@ -5,15 +5,17 @@
#define MIN_VECTOR_BYTES (__riscv_v_min_vlen / 8)
-/* Small memsets shouldn't be vectorised.
+/* Vectorise with no loop.
** f1:
** (
-** sb\s+a1,0\(a0\)
-** ...
+** vsetivli\s+zero,\d+,e8,m1,ta,ma
** |
-** li\s+a2,\d+
-** tail\s+memset
+** li\s+a\d+,\d+
+** vsetvli\s+zero,a\d+,e8,m1,ta,ma
** )
+** vmv\.v\.x\s+v\d+,a1
+** vse8\.v\s+v\d+,0\(a0\)
+** ret
*/
void *
f1 (void *a, int const b)