From fd42d5ac491308b2ffa955ea11b4843813387510 Mon Sep 17 00:00:00 2001
From: Hau Hsu <hau.hsu@sifive.com>
Date: Wed, 30 Oct 2024 11:10:25 -0600
Subject: [PATCH] risc-v: Enable vectorized memset via ifunc
This patch adds the ability for glibc to select a vectorized memset
implementation for RISC-V using the ifunc/hwprobe mechanism.
Implementation on the ifunc/hwprobe is quite simple. We call hwprobe
with the right key and we check the returned value to see if vector is
enabled. If so, then we use the vector memset implementation. Else we
fall back to the generic memset implementation.
The guts of the memset itself are quite simple as well and represent a
generic vector implementation for riscv. In particular we use a VLA
style loop where each iteration of the loop tries to handle as much data
as the cpu core indicates it can reasonably handle. On something like
the spacemit design we can handle up to 256 bytes of data per loop
iteration (256bit vector length * LMUL8).
Naturally this has been tested with the glibc testsuite. I've tested on
kernels with and without hwprobe support. For the latter we naturally
only use memset_generic.
The memset implementation is originally from Hau Hsu (SiFive), posted to
libc-alpha back in May 2023. Sergei from Rivos posted an alternate,
more complex implementation back in Feb 2023. I took the simpler
implementation largely because it included performance data. Sergei's
could well be better, but it likely depends on uarch details like
quality of branch predictors as Sergei's has multiple conditional
branches to select between a few variants.
I've got several other of these routines queued up that I'll submit once
we're acked on memset. Obviously any feedback on memset will be
incorporated into the other routines.
Co-authored-by: Jerry Shih <jerry.shih@sifive.com>
Co-authored-by: Jeff Law <jlaw@ventanamicro.com>
---
sysdeps/riscv/multiarch/memset-generic.c | 26 +++++++++
sysdeps/riscv/multiarch/memset_vector.S | 51 ++++++++++++++++
.../unix/sysv/linux/riscv/multiarch/Makefile | 3 +
.../linux/riscv/multiarch/ifunc-impl-list.c | 12 ++++
.../unix/sysv/linux/riscv/multiarch/memset.c | 58 +++++++++++++++++++
5 files changed, 150 insertions(+)
create mode 100644 sysdeps/riscv/multiarch/memset-generic.c
create mode 100644 sysdeps/riscv/multiarch/memset_vector.S
create mode 100644 sysdeps/unix/sysv/linux/riscv/multiarch/memset.c
new file mode 100644
@@ -0,0 +1,26 @@
+/* Re-include the default memset implementation.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <string.h>
+
+#if IS_IN(libc)
+# define MEMSET __memset_generic
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(x)
+#endif
+#include <string/memset.c>
new file mode 100644
@@ -0,0 +1,51 @@
+/* memset for RISC-V, using vectors
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+#define dst a0
+#define value a1
+#define num a2
+
+#define ivl a3
+#define dst_ptr a5
+
+#define ELEM_LMUL_SETTING m8
+#define vdata v0
+
+ .attribute unaligned_access, 1
+ .option arch, +v
+
+ENTRY(__memset_vector)
+
+ mv dst_ptr, dst
+
+ vsetvli ivl, num, e8, ELEM_LMUL_SETTING, ta, ma
+ vmv.v.x vdata, value
+
+L(loop):
+ vse8.v vdata, (dst_ptr)
+ sub num, num, ivl
+ add dst_ptr, dst_ptr, ivl
+ vsetvli ivl, num, e8, ELEM_LMUL_SETTING, ta, ma
+ bnez num, L(loop)
+
+ ret
+
+END(__memset_vector)
@@ -3,6 +3,9 @@ sysdep_routines += \
memcpy \
memcpy-generic \
memcpy_noalignment \
+ memset \
+ memset-generic \
+ memset_vector \
# sysdep_routines
CFLAGS-memcpy_noalignment.c += -mno-strict-align
@@ -27,6 +27,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
size_t i = max;
bool fast_unaligned = false;
+ bool v_ext = false;
struct riscv_hwprobe pair = { .key = RISCV_HWPROBE_KEY_CPUPERF_0 };
if (__riscv_hwprobe (&pair, 1, 0, NULL, 0) == 0
@@ -34,10 +35,21 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
== RISCV_HWPROBE_MISALIGNED_FAST)
fast_unaligned = true;
+ pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0;
+ pair.value = 0;
+ if (__riscv_hwprobe (&pair, 1, 0, NULL, 0) == 0
+ && (pair.value & RISCV_HWPROBE_IMA_V) == RISCV_HWPROBE_IMA_V)
+ v_ext = true;
+
IFUNC_IMPL (i, name, memcpy,
IFUNC_IMPL_ADD (array, i, memcpy, fast_unaligned,
__memcpy_noalignment)
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic))
+ IFUNC_IMPL (i, name, memset,
+ IFUNC_IMPL_ADD (array, i, memset, v_ext,
+ __memset_vector)
+ IFUNC_IMPL_ADD (array, i, memset, 1, __memset_generic))
+
return 0;
}
new file mode 100644
@@ -0,0 +1,58 @@
+/* Multiple versions of memset.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+/* Redefine memset so that the compiler won't complain about the type
+ mismatch with the IFUNC selector in strong_alias, below. */
+# undef memset
+# define memset __redirect_memset
+# include <stdint.h>
+# include <string.h>
+# include <ifunc-init.h>
+# include <riscv-ifunc.h>
+# include <sys/hwprobe.h>
+
+extern __typeof (__redirect_memset) __libc_memset;
+
+extern __typeof (__redirect_memset) __memset_generic attribute_hidden;
+extern __typeof (__redirect_memset) __memset_vector attribute_hidden;
+
+static inline __typeof (__redirect_memset) *
+select_memset_ifunc (uint64_t dl_hwcap, __riscv_hwprobe_t hwprobe_func)
+{
+ unsigned long long int v;
+
+ if (__riscv_hwprobe_one (hwprobe_func, RISCV_HWPROBE_KEY_IMA_EXT_0, &v) == 0
+ && (v & RISCV_HWPROBE_IMA_V) == RISCV_HWPROBE_IMA_V)
+ return __memset_vector;
+
+ return __memset_generic;
+}
+
+riscv_libc_ifunc (__libc_memset, select_memset_ifunc);
+
+# undef memset
+strong_alias (__libc_memset, memset);
+# ifdef SHARED
+__hidden_ver1 (memset, __GI_memset, __redirect_memset)
+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memset);
+# endif
+#else
+# include <string/memset.c>
+#endif
--
2.45.2