@@ -63,6 +63,16 @@
# endif
#endif
+# if VEC_SIZE == 16
+# define LOG_VEC_SIZE 4
+# elif VEC_SIZE == 32
+# define LOG_VEC_SIZE 5
+# elif VEC_SIZE == 64
+# define LOG_VEC_SIZE 6
+# else
+# error Unsupported VEC_SIZE
+#endif
+
#define PAGE_SIZE 4096
#ifndef SECTION
@@ -196,14 +206,17 @@ L(return):
ret
#endif
+ .p2align 4
L(loop_start):
VMOVU %VEC(0), (VEC_SIZE * 2)(%rdi)
VMOVU %VEC(0), (VEC_SIZE * 3)(%rdi)
cmpq $(VEC_SIZE * 8), %rdx
jbe L(loop_end)
+ leaq -(VEC_SIZE * 4 + 1)(%rdi, %rdx), %rcx
andq $-(VEC_SIZE * 2), %rdi
+ subq %rdi, %rcx
subq $-(VEC_SIZE * 4), %rdi
- leaq -(VEC_SIZE * 4)(%rax, %rdx), %rcx
+ sarq $(LOG_VEC_SIZE + 2), %rcx
.p2align 4
L(loop):
VMOVA %VEC(0), (%rdi)
@@ -211,8 +224,8 @@ L(loop):
VMOVA %VEC(0), (VEC_SIZE * 2)(%rdi)
VMOVA %VEC(0), (VEC_SIZE * 3)(%rdi)
subq $-(VEC_SIZE * 4), %rdi
- cmpq %rcx, %rdi
- jb L(loop)
+ decq %rcx
+ jne L(loop)
L(loop_end):
/* NB: rax is set as ptr in MEMSET_VDUP_TO_VEC0_AND_SET_RETURN.
rdx as length is also unchanged. */
The following commit: author Noah Goldstein <goldstein.w.n@gmail.com> Thu, 20 May 2021 17:13:51 +0000 (13:13 -0400) commit 6abf27980a947f9b6e514d6b33b83059d39566ae added a bug to memset so that if destination + length overflowed memset would return early rather than throw a Segmentation Fault as is expected behavior: This commit fixes that bug. Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com> --- .../multiarch/memset-vec-unaligned-erms.S | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-)