@@ -46,13 +46,11 @@ sysdep_routines += \
stpcpy-evex \
stpcpy-sse2 \
stpcpy-sse2-unaligned \
- stpcpy-ssse3 \
stpncpy-avx2 \
stpncpy-avx2-rtm \
stpncpy-c \
stpncpy-evex \
stpncpy-sse2-unaligned \
- stpncpy-ssse3 \
strcasecmp_l-avx2 \
strcasecmp_l-avx2-rtm \
strcasecmp_l-evex \
@@ -83,7 +81,6 @@ sysdep_routines += \
strcpy-evex \
strcpy-sse2 \
strcpy-sse2-unaligned \
- strcpy-ssse3 \
strcspn-c \
strcspn-sse2 \
strlen-avx2 \
@@ -110,7 +107,6 @@ sysdep_routines += \
strncpy-c \
strncpy-evex \
strncpy-sse2-unaligned \
- strncpy-ssse3 \
strnlen-avx2 \
strnlen-avx2-rtm \
strnlen-evex \
@@ -399,8 +399,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/stpncpy.c. */
IFUNC_IMPL (i, name, stpncpy,
- IFUNC_IMPL_ADD (array, i, stpncpy, CPU_FEATURE_USABLE (SSSE3),
- __stpncpy_ssse3)
IFUNC_IMPL_ADD (array, i, stpncpy, CPU_FEATURE_USABLE (AVX2),
__stpncpy_avx2)
IFUNC_IMPL_ADD (array, i, stpncpy,
@@ -417,8 +415,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/stpcpy.c. */
IFUNC_IMPL (i, name, stpcpy,
- IFUNC_IMPL_ADD (array, i, stpcpy, CPU_FEATURE_USABLE (SSSE3),
- __stpcpy_ssse3)
IFUNC_IMPL_ADD (array, i, stpcpy, CPU_FEATURE_USABLE (AVX2),
__stpcpy_avx2)
IFUNC_IMPL_ADD (array, i, stpcpy,
@@ -567,8 +563,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)),
__strcpy_evex)
- IFUNC_IMPL_ADD (array, i, strcpy, CPU_FEATURE_USABLE (SSSE3),
- __strcpy_ssse3)
IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_sse2))
@@ -644,8 +638,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)),
__strncpy_evex)
- IFUNC_IMPL_ADD (array, i, strncpy, CPU_FEATURE_USABLE (SSSE3),
- __strncpy_ssse3)
IFUNC_IMPL_ADD (array, i, strncpy, 1,
__strncpy_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2))
deleted file mode 100644
@@ -1,3 +0,0 @@
-#define USE_AS_STPCPY
-#define STRCPY __stpcpy_ssse3
-#include "strcpy-ssse3.S"
deleted file mode 100644
@@ -1,4 +0,0 @@
-#define USE_AS_STPCPY
-#define USE_AS_STRNCPY
-#define STRCPY __stpncpy_ssse3
-#include "strcpy-ssse3.S"
deleted file mode 100644
@@ -1,3550 +0,0 @@
-/* strcpy with SSSE3
- Copyright (C) 2011-2022 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-
-# ifndef USE_AS_STRCAT
-# include <sysdep.h>
-
-# ifndef STRCPY
-# define STRCPY __strcpy_ssse3
-# endif
-
- .section .text.ssse3,"ax",@progbits
-ENTRY (STRCPY)
-
- mov %rsi, %rcx
-# ifdef USE_AS_STRNCPY
- mov %RDX_LP, %R8_LP
-# endif
- mov %rdi, %rdx
-# ifdef USE_AS_STRNCPY
- test %R8_LP, %R8_LP
- jz L(Exit0)
- cmp $8, %R8_LP
- jbe L(StrncpyExit8Bytes)
-# endif
- cmpb $0, (%rcx)
- jz L(Exit1)
- cmpb $0, 1(%rcx)
- jz L(Exit2)
- cmpb $0, 2(%rcx)
- jz L(Exit3)
- cmpb $0, 3(%rcx)
- jz L(Exit4)
- cmpb $0, 4(%rcx)
- jz L(Exit5)
- cmpb $0, 5(%rcx)
- jz L(Exit6)
- cmpb $0, 6(%rcx)
- jz L(Exit7)
- cmpb $0, 7(%rcx)
- jz L(Exit8)
-# ifdef USE_AS_STRNCPY
- cmp $16, %r8
- jb L(StrncpyExit15Bytes)
-# endif
- cmpb $0, 8(%rcx)
- jz L(Exit9)
- cmpb $0, 9(%rcx)
- jz L(Exit10)
- cmpb $0, 10(%rcx)
- jz L(Exit11)
- cmpb $0, 11(%rcx)
- jz L(Exit12)
- cmpb $0, 12(%rcx)
- jz L(Exit13)
- cmpb $0, 13(%rcx)
- jz L(Exit14)
- cmpb $0, 14(%rcx)
- jz L(Exit15)
-# ifdef USE_AS_STRNCPY
- cmp $16, %r8
- je L(Exit16)
-# endif
- cmpb $0, 15(%rcx)
- jz L(Exit16)
-# endif
-
-# ifdef USE_AS_STRNCPY
- mov %rcx, %rsi
- sub $16, %r8
- and $0xf, %rsi
-
-/* add 16 bytes rcx_offset to r8 */
-
- add %rsi, %r8
-# endif
- lea 16(%rcx), %rsi
- and $-16, %rsi
- pxor %xmm0, %xmm0
- mov (%rcx), %r9
- mov %r9, (%rdx)
- pcmpeqb (%rsi), %xmm0
- mov 8(%rcx), %r9
- mov %r9, 8(%rdx)
-
-/* convert byte mask in xmm0 to bit mask */
-
- pmovmskb %xmm0, %rax
- sub %rcx, %rsi
-
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(CopyFrom1To16BytesCase2OrCase3)
-# endif
- test %rax, %rax
- jnz L(CopyFrom1To16Bytes)
-
- mov %rdx, %rax
- lea 16(%rdx), %rdx
- and $-16, %rdx
- sub %rdx, %rax
-
-# ifdef USE_AS_STRNCPY
- add %rax, %rsi
- lea -1(%rsi), %rsi
- and $1<<31, %esi
- test %rsi, %rsi
- jnz L(ContinueCopy)
- lea 16(%r8), %r8
-
-L(ContinueCopy):
-# endif
- sub %rax, %rcx
- mov %rcx, %rax
- and $0xf, %rax
- mov $0, %rsi
-
-/* case: rcx_offset == rdx_offset */
-
- jz L(Align16Both)
-
- cmp $8, %rax
- jae L(ShlHigh8)
- cmp $1, %rax
- je L(Shl1)
- cmp $2, %rax
- je L(Shl2)
- cmp $3, %rax
- je L(Shl3)
- cmp $4, %rax
- je L(Shl4)
- cmp $5, %rax
- je L(Shl5)
- cmp $6, %rax
- je L(Shl6)
- jmp L(Shl7)
-
-L(ShlHigh8):
- je L(Shl8)
- cmp $9, %rax
- je L(Shl9)
- cmp $10, %rax
- je L(Shl10)
- cmp $11, %rax
- je L(Shl11)
- cmp $12, %rax
- je L(Shl12)
- cmp $13, %rax
- je L(Shl13)
- cmp $14, %rax
- je L(Shl14)
- jmp L(Shl15)
-
-L(Align16Both):
- movaps (%rcx), %xmm1
- movaps 16(%rcx), %xmm2
- movaps %xmm1, (%rdx)
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %rax
- lea 16(%rsi), %rsi
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(CopyFrom1To16BytesCase2OrCase3)
-# endif
- test %rax, %rax
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%rcx, %rsi), %xmm3
- movaps %xmm2, (%rdx, %rsi)
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %rax
- lea 16(%rsi), %rsi
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(CopyFrom1To16BytesCase2OrCase3)
-# endif
- test %rax, %rax
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%rcx, %rsi), %xmm4
- movaps %xmm3, (%rdx, %rsi)
- pcmpeqb %xmm4, %xmm0
- pmovmskb %xmm0, %rax
- lea 16(%rsi), %rsi
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(CopyFrom1To16BytesCase2OrCase3)
-# endif
- test %rax, %rax
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%rcx, %rsi), %xmm1
- movaps %xmm4, (%rdx, %rsi)
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %rax
- lea 16(%rsi), %rsi
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(CopyFrom1To16BytesCase2OrCase3)
-# endif
- test %rax, %rax
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%rcx, %rsi), %xmm2
- movaps %xmm1, (%rdx, %rsi)
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %rax
- lea 16(%rsi), %rsi
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(CopyFrom1To16BytesCase2OrCase3)
-# endif
- test %rax, %rax
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%rcx, %rsi), %xmm3
- movaps %xmm2, (%rdx, %rsi)
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %rax
- lea 16(%rsi), %rsi
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(CopyFrom1To16BytesCase2OrCase3)
-# endif
- test %rax, %rax
- jnz L(CopyFrom1To16Bytes)
-
- movaps %xmm3, (%rdx, %rsi)
- mov %rcx, %rax
- lea 16(%rcx, %rsi), %rcx
- and $-0x40, %rcx
- sub %rcx, %rax
- sub %rax, %rdx
-# ifdef USE_AS_STRNCPY
- lea 112(%r8, %rax), %r8
-# endif
- mov $-0x40, %rsi
-
- .p2align 4
-L(Aligned64Loop):
- movaps (%rcx), %xmm2
- movaps %xmm2, %xmm4
- movaps 16(%rcx), %xmm5
- movaps 32(%rcx), %xmm3
- movaps %xmm3, %xmm6
- movaps 48(%rcx), %xmm7
- pminub %xmm5, %xmm2
- pminub %xmm7, %xmm3
- pminub %xmm2, %xmm3
- pcmpeqb %xmm0, %xmm3
- pmovmskb %xmm3, %rax
- lea 64(%rdx), %rdx
- lea 64(%rcx), %rcx
-# ifdef USE_AS_STRNCPY
- sub $64, %r8
- jbe L(StrncpyLeaveCase2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Aligned64Leave)
- movaps %xmm4, -64(%rdx)
- movaps %xmm5, -48(%rdx)
- movaps %xmm6, -32(%rdx)
- movaps %xmm7, -16(%rdx)
- jmp L(Aligned64Loop)
-
-L(Aligned64Leave):
-# ifdef USE_AS_STRNCPY
- lea 48(%r8), %r8
-# endif
- pcmpeqb %xmm4, %xmm0
- pmovmskb %xmm0, %rax
- test %rax, %rax
- jnz L(CopyFrom1To16Bytes)
-
- pcmpeqb %xmm5, %xmm0
-# ifdef USE_AS_STRNCPY
- lea -16(%r8), %r8
-# endif
- pmovmskb %xmm0, %rax
- movaps %xmm4, -64(%rdx)
- test %rax, %rax
- lea 16(%rsi), %rsi
- jnz L(CopyFrom1To16Bytes)
-
- pcmpeqb %xmm6, %xmm0
-# ifdef USE_AS_STRNCPY
- lea -16(%r8), %r8
-# endif
- pmovmskb %xmm0, %rax
- movaps %xmm5, -48(%rdx)
- test %rax, %rax
- lea 16(%rsi), %rsi
- jnz L(CopyFrom1To16Bytes)
-
- movaps %xmm6, -32(%rdx)
- pcmpeqb %xmm7, %xmm0
-# ifdef USE_AS_STRNCPY
- lea -16(%r8), %r8
-# endif
- pmovmskb %xmm0, %rax
- lea 16(%rsi), %rsi
- jmp L(CopyFrom1To16Bytes)
-
- .p2align 4
-L(Shl1):
- movaps -1(%rcx), %xmm1
- movaps 15(%rcx), %xmm2
-L(Shl1Start):
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %rax
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit1Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl1LoopExit)
-
- palignr $1, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 31(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
- movaps %xmm2, %xmm1
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit1Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl1LoopExit)
-
- palignr $1, %xmm3, %xmm2
- movaps %xmm2, (%rdx)
- movaps 31(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit1Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl1LoopExit)
-
- palignr $1, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 31(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit1Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl1LoopExit)
-
- palignr $1, %xmm3, %xmm2
- movaps %xmm2, (%rdx)
- lea 31(%rcx), %rcx
- lea 16(%rdx), %rdx
-
- mov %rcx, %rax
- and $-0x40, %rcx
- sub %rcx, %rax
- lea -15(%rcx), %rcx
- sub %rax, %rdx
-# ifdef USE_AS_STRNCPY
- add %rax, %r8
-# endif
- movaps -1(%rcx), %xmm1
-
-/* 64 bytes loop */
- .p2align 4
-L(Shl1LoopStart):
- movaps 15(%rcx), %xmm2
- movaps 31(%rcx), %xmm3
- movaps %xmm3, %xmm6
- movaps 47(%rcx), %xmm4
- movaps %xmm4, %xmm7
- movaps 63(%rcx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqb %xmm0, %xmm7
- pmovmskb %xmm7, %rax
- movaps %xmm5, %xmm7
- palignr $1, %xmm4, %xmm5
- test %rax, %rax
- palignr $1, %xmm3, %xmm4
- jnz L(Shl1Start)
-# ifdef USE_AS_STRNCPY
- sub $64, %r8
- jbe L(StrncpyLeave1)
-# endif
- palignr $1, %xmm2, %xmm3
- lea 64(%rcx), %rcx
- palignr $1, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%rdx)
- movaps %xmm4, 32(%rdx)
- movaps %xmm3, 16(%rdx)
- movaps %xmm2, (%rdx)
- lea 64(%rdx), %rdx
- jmp L(Shl1LoopStart)
-
-L(Shl1LoopExit):
- movdqu -1(%rcx), %xmm1
- mov $15, %rsi
- movdqu %xmm1, -1(%rdx)
- jmp L(CopyFrom1To16Bytes)
-
- .p2align 4
-L(Shl2):
- movaps -2(%rcx), %xmm1
- movaps 14(%rcx), %xmm2
-L(Shl2Start):
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %rax
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit2Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl2LoopExit)
-
- palignr $2, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 30(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
- movaps %xmm2, %xmm1
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit2Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl2LoopExit)
-
- palignr $2, %xmm3, %xmm2
- movaps %xmm2, (%rdx)
- movaps 30(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit2Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl2LoopExit)
-
- palignr $2, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 30(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit2Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl2LoopExit)
-
- palignr $2, %xmm3, %xmm2
- movaps %xmm2, (%rdx)
- lea 30(%rcx), %rcx
- lea 16(%rdx), %rdx
-
- mov %rcx, %rax
- and $-0x40, %rcx
- sub %rcx, %rax
- lea -14(%rcx), %rcx
- sub %rax, %rdx
-# ifdef USE_AS_STRNCPY
- add %rax, %r8
-# endif
- movaps -2(%rcx), %xmm1
-
-/* 64 bytes loop */
- .p2align 4
-L(Shl2LoopStart):
- movaps 14(%rcx), %xmm2
- movaps 30(%rcx), %xmm3
- movaps %xmm3, %xmm6
- movaps 46(%rcx), %xmm4
- movaps %xmm4, %xmm7
- movaps 62(%rcx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqb %xmm0, %xmm7
- pmovmskb %xmm7, %rax
- movaps %xmm5, %xmm7
- palignr $2, %xmm4, %xmm5
- test %rax, %rax
- palignr $2, %xmm3, %xmm4
- jnz L(Shl2Start)
-# ifdef USE_AS_STRNCPY
- sub $64, %r8
- jbe L(StrncpyLeave2)
-# endif
- palignr $2, %xmm2, %xmm3
- lea 64(%rcx), %rcx
- palignr $2, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%rdx)
- movaps %xmm4, 32(%rdx)
- movaps %xmm3, 16(%rdx)
- movaps %xmm2, (%rdx)
- lea 64(%rdx), %rdx
- jmp L(Shl2LoopStart)
-
-L(Shl2LoopExit):
- movdqu -2(%rcx), %xmm1
- mov $14, %rsi
- movdqu %xmm1, -2(%rdx)
- jmp L(CopyFrom1To16Bytes)
-
- .p2align 4
-L(Shl3):
- movaps -3(%rcx), %xmm1
- movaps 13(%rcx), %xmm2
-L(Shl3Start):
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %rax
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit3Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl3LoopExit)
-
- palignr $3, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 29(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
- movaps %xmm2, %xmm1
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit3Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl3LoopExit)
-
- palignr $3, %xmm3, %xmm2
- movaps %xmm2, (%rdx)
- movaps 29(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit3Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl3LoopExit)
-
- palignr $3, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 29(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit3Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl3LoopExit)
-
- palignr $3, %xmm3, %xmm2
- movaps %xmm2, (%rdx)
- lea 29(%rcx), %rcx
- lea 16(%rdx), %rdx
-
- mov %rcx, %rax
- and $-0x40, %rcx
- sub %rcx, %rax
- lea -13(%rcx), %rcx
- sub %rax, %rdx
-# ifdef USE_AS_STRNCPY
- add %rax, %r8
-# endif
- movaps -3(%rcx), %xmm1
-
-/* 64 bytes loop */
- .p2align 4
-L(Shl3LoopStart):
- movaps 13(%rcx), %xmm2
- movaps 29(%rcx), %xmm3
- movaps %xmm3, %xmm6
- movaps 45(%rcx), %xmm4
- movaps %xmm4, %xmm7
- movaps 61(%rcx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqb %xmm0, %xmm7
- pmovmskb %xmm7, %rax
- movaps %xmm5, %xmm7
- palignr $3, %xmm4, %xmm5
- test %rax, %rax
- palignr $3, %xmm3, %xmm4
- jnz L(Shl3Start)
-# ifdef USE_AS_STRNCPY
- sub $64, %r8
- jbe L(StrncpyLeave3)
-# endif
- palignr $3, %xmm2, %xmm3
- lea 64(%rcx), %rcx
- palignr $3, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%rdx)
- movaps %xmm4, 32(%rdx)
- movaps %xmm3, 16(%rdx)
- movaps %xmm2, (%rdx)
- lea 64(%rdx), %rdx
- jmp L(Shl3LoopStart)
-
-L(Shl3LoopExit):
- movdqu -3(%rcx), %xmm1
- mov $13, %rsi
- movdqu %xmm1, -3(%rdx)
- jmp L(CopyFrom1To16Bytes)
-
- .p2align 4
-L(Shl4):
- movaps -4(%rcx), %xmm1
- movaps 12(%rcx), %xmm2
-L(Shl4Start):
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %rax
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit4Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl4LoopExit)
-
- palignr $4, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 28(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
- movaps %xmm2, %xmm1
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit4Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl4LoopExit)
-
- palignr $4, %xmm3, %xmm2
- movaps %xmm2, (%rdx)
- movaps 28(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit4Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl4LoopExit)
-
- palignr $4, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 28(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit4Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl4LoopExit)
-
- palignr $4, %xmm3, %xmm2
- movaps %xmm2, (%rdx)
- lea 28(%rcx), %rcx
- lea 16(%rdx), %rdx
-
- mov %rcx, %rax
- and $-0x40, %rcx
- sub %rcx, %rax
- lea -12(%rcx), %rcx
- sub %rax, %rdx
-# ifdef USE_AS_STRNCPY
- add %rax, %r8
-# endif
- movaps -4(%rcx), %xmm1
-
-/* 64 bytes loop */
- .p2align 4
-L(Shl4LoopStart):
- movaps 12(%rcx), %xmm2
- movaps 28(%rcx), %xmm3
- movaps %xmm3, %xmm6
- movaps 44(%rcx), %xmm4
- movaps %xmm4, %xmm7
- movaps 60(%rcx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqb %xmm0, %xmm7
- pmovmskb %xmm7, %rax
- movaps %xmm5, %xmm7
- palignr $4, %xmm4, %xmm5
- test %rax, %rax
- palignr $4, %xmm3, %xmm4
- jnz L(Shl4Start)
-# ifdef USE_AS_STRNCPY
- sub $64, %r8
- jbe L(StrncpyLeave4)
-# endif
- palignr $4, %xmm2, %xmm3
- lea 64(%rcx), %rcx
- palignr $4, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%rdx)
- movaps %xmm4, 32(%rdx)
- movaps %xmm3, 16(%rdx)
- movaps %xmm2, (%rdx)
- lea 64(%rdx), %rdx
- jmp L(Shl4LoopStart)
-
-L(Shl4LoopExit):
- movdqu -4(%rcx), %xmm1
- mov $12, %rsi
- movdqu %xmm1, -4(%rdx)
- jmp L(CopyFrom1To16Bytes)
-
- .p2align 4
-L(Shl5):
- movaps -5(%rcx), %xmm1
- movaps 11(%rcx), %xmm2
-L(Shl5Start):
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %rax
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit5Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl5LoopExit)
-
- palignr $5, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 27(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
- movaps %xmm2, %xmm1
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit5Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl5LoopExit)
-
- palignr $5, %xmm3, %xmm2
- movaps %xmm2, (%rdx)
- movaps 27(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit5Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl5LoopExit)
-
- palignr $5, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 27(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit5Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl5LoopExit)
-
- palignr $5, %xmm3, %xmm2
- movaps %xmm2, (%rdx)
- lea 27(%rcx), %rcx
- lea 16(%rdx), %rdx
-
- mov %rcx, %rax
- and $-0x40, %rcx
- sub %rcx, %rax
- lea -11(%rcx), %rcx
- sub %rax, %rdx
-# ifdef USE_AS_STRNCPY
- add %rax, %r8
-# endif
- movaps -5(%rcx), %xmm1
-
-/* 64 bytes loop */
- .p2align 4
-L(Shl5LoopStart):
- movaps 11(%rcx), %xmm2
- movaps 27(%rcx), %xmm3
- movaps %xmm3, %xmm6
- movaps 43(%rcx), %xmm4
- movaps %xmm4, %xmm7
- movaps 59(%rcx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqb %xmm0, %xmm7
- pmovmskb %xmm7, %rax
- movaps %xmm5, %xmm7
- palignr $5, %xmm4, %xmm5
- test %rax, %rax
- palignr $5, %xmm3, %xmm4
- jnz L(Shl5Start)
-# ifdef USE_AS_STRNCPY
- sub $64, %r8
- jbe L(StrncpyLeave5)
-# endif
- palignr $5, %xmm2, %xmm3
- lea 64(%rcx), %rcx
- palignr $5, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%rdx)
- movaps %xmm4, 32(%rdx)
- movaps %xmm3, 16(%rdx)
- movaps %xmm2, (%rdx)
- lea 64(%rdx), %rdx
- jmp L(Shl5LoopStart)
-
-L(Shl5LoopExit):
- movdqu -5(%rcx), %xmm1
- mov $11, %rsi
- movdqu %xmm1, -5(%rdx)
- jmp L(CopyFrom1To16Bytes)
-
- .p2align 4
-L(Shl6):
- movaps -6(%rcx), %xmm1
- movaps 10(%rcx), %xmm2
-L(Shl6Start):
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %rax
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit6Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl6LoopExit)
-
- palignr $6, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 26(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
- movaps %xmm2, %xmm1
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit6Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl6LoopExit)
-
- palignr $6, %xmm3, %xmm2
- movaps %xmm2, (%rdx)
- movaps 26(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit6Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl6LoopExit)
-
- palignr $6, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 26(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit6Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl6LoopExit)
-
- palignr $6, %xmm3, %xmm2
- movaps %xmm2, (%rdx)
- lea 26(%rcx), %rcx
- lea 16(%rdx), %rdx
-
- mov %rcx, %rax
- and $-0x40, %rcx
- sub %rcx, %rax
- lea -10(%rcx), %rcx
- sub %rax, %rdx
-# ifdef USE_AS_STRNCPY
- add %rax, %r8
-# endif
- movaps -6(%rcx), %xmm1
-
-/* 64 bytes loop */
- .p2align 4
-L(Shl6LoopStart):
- movaps 10(%rcx), %xmm2
- movaps 26(%rcx), %xmm3
- movaps %xmm3, %xmm6
- movaps 42(%rcx), %xmm4
- movaps %xmm4, %xmm7
- movaps 58(%rcx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqb %xmm0, %xmm7
- pmovmskb %xmm7, %rax
- movaps %xmm5, %xmm7
- palignr $6, %xmm4, %xmm5
- test %rax, %rax
- palignr $6, %xmm3, %xmm4
- jnz L(Shl6Start)
-# ifdef USE_AS_STRNCPY
- sub $64, %r8
- jbe L(StrncpyLeave6)
-# endif
- palignr $6, %xmm2, %xmm3
- lea 64(%rcx), %rcx
- palignr $6, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%rdx)
- movaps %xmm4, 32(%rdx)
- movaps %xmm3, 16(%rdx)
- movaps %xmm2, (%rdx)
- lea 64(%rdx), %rdx
- jmp L(Shl6LoopStart)
-
-L(Shl6LoopExit):
- mov (%rcx), %r9
- mov 6(%rcx), %esi
- mov %r9, (%rdx)
- mov %esi, 6(%rdx)
- mov $10, %rsi
- jmp L(CopyFrom1To16Bytes)
-
- .p2align 4
-L(Shl7):
- movaps -7(%rcx), %xmm1
- movaps 9(%rcx), %xmm2
-L(Shl7Start):
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %rax
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit7Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl7LoopExit)
-
- palignr $7, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 25(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
- movaps %xmm2, %xmm1
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit7Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl7LoopExit)
-
- palignr $7, %xmm3, %xmm2
- movaps %xmm2, (%rdx)
- movaps 25(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit7Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl7LoopExit)
-
- palignr $7, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 25(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit7Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl7LoopExit)
-
- palignr $7, %xmm3, %xmm2
- movaps %xmm2, (%rdx)
- lea 25(%rcx), %rcx
- lea 16(%rdx), %rdx
-
- mov %rcx, %rax
- and $-0x40, %rcx
- sub %rcx, %rax
- lea -9(%rcx), %rcx
- sub %rax, %rdx
-# ifdef USE_AS_STRNCPY
- add %rax, %r8
-# endif
- movaps -7(%rcx), %xmm1
-
-/* 64 bytes loop */
- .p2align 4
-L(Shl7LoopStart):
- movaps 9(%rcx), %xmm2
- movaps 25(%rcx), %xmm3
- movaps %xmm3, %xmm6
- movaps 41(%rcx), %xmm4
- movaps %xmm4, %xmm7
- movaps 57(%rcx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqb %xmm0, %xmm7
- pmovmskb %xmm7, %rax
- movaps %xmm5, %xmm7
- palignr $7, %xmm4, %xmm5
- test %rax, %rax
- palignr $7, %xmm3, %xmm4
- jnz L(Shl7Start)
-# ifdef USE_AS_STRNCPY
- sub $64, %r8
- jbe L(StrncpyLeave7)
-# endif
- palignr $7, %xmm2, %xmm3
- lea 64(%rcx), %rcx
- palignr $7, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%rdx)
- movaps %xmm4, 32(%rdx)
- movaps %xmm3, 16(%rdx)
- movaps %xmm2, (%rdx)
- lea 64(%rdx), %rdx
- jmp L(Shl7LoopStart)
-
-L(Shl7LoopExit):
- mov (%rcx), %r9
- mov 5(%rcx), %esi
- mov %r9, (%rdx)
- mov %esi, 5(%rdx)
- mov $9, %rsi
- jmp L(CopyFrom1To16Bytes)
-
- .p2align 4
-L(Shl8):
- movaps -8(%rcx), %xmm1
- movaps 8(%rcx), %xmm2
-L(Shl8Start):
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %rax
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit8Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl8LoopExit)
-
- palignr $8, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 24(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
- movaps %xmm2, %xmm1
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit8Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl8LoopExit)
-
- palignr $8, %xmm3, %xmm2
- movaps %xmm2, (%rdx)
- movaps 24(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit8Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl8LoopExit)
-
- palignr $8, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 24(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit8Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl8LoopExit)
-
- palignr $8, %xmm3, %xmm2
- movaps %xmm2, (%rdx)
- lea 24(%rcx), %rcx
- lea 16(%rdx), %rdx
-
- mov %rcx, %rax
- and $-0x40, %rcx
- sub %rcx, %rax
- lea -8(%rcx), %rcx
- sub %rax, %rdx
-# ifdef USE_AS_STRNCPY
- add %rax, %r8
-# endif
- movaps -8(%rcx), %xmm1
-
-/* 64 bytes loop */
- .p2align 4
-L(Shl8LoopStart):
- movaps 8(%rcx), %xmm2
- movaps 24(%rcx), %xmm3
- movaps %xmm3, %xmm6
- movaps 40(%rcx), %xmm4
- movaps %xmm4, %xmm7
- movaps 56(%rcx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqb %xmm0, %xmm7
- pmovmskb %xmm7, %rax
- movaps %xmm5, %xmm7
- palignr $8, %xmm4, %xmm5
- test %rax, %rax
- palignr $8, %xmm3, %xmm4
- jnz L(Shl8Start)
-# ifdef USE_AS_STRNCPY
- sub $64, %r8
- jbe L(StrncpyLeave8)
-# endif
- palignr $8, %xmm2, %xmm3
- lea 64(%rcx), %rcx
- palignr $8, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%rdx)
- movaps %xmm4, 32(%rdx)
- movaps %xmm3, 16(%rdx)
- movaps %xmm2, (%rdx)
- lea 64(%rdx), %rdx
- jmp L(Shl8LoopStart)
-
-L(Shl8LoopExit):
- mov (%rcx), %r9
- mov $8, %rsi
- mov %r9, (%rdx)
- jmp L(CopyFrom1To16Bytes)
-
- .p2align 4
-L(Shl9):
- movaps -9(%rcx), %xmm1
- movaps 7(%rcx), %xmm2
-L(Shl9Start):
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %rax
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit9Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl9LoopExit)
-
- palignr $9, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 23(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
- movaps %xmm2, %xmm1
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit9Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl9LoopExit)
-
- palignr $9, %xmm3, %xmm2
- movaps %xmm2, (%rdx)
- movaps 23(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit9Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl9LoopExit)
-
- palignr $9, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 23(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit9Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl9LoopExit)
-
- palignr $9, %xmm3, %xmm2
- movaps %xmm2, (%rdx)
- lea 23(%rcx), %rcx
- lea 16(%rdx), %rdx
-
- mov %rcx, %rax
- and $-0x40, %rcx
- sub %rcx, %rax
- lea -7(%rcx), %rcx
- sub %rax, %rdx
-# ifdef USE_AS_STRNCPY
- add %rax, %r8
-# endif
- movaps -9(%rcx), %xmm1
-
-/* 64 bytes loop */
- .p2align 4
-L(Shl9LoopStart):
- movaps 7(%rcx), %xmm2
- movaps 23(%rcx), %xmm3
- movaps %xmm3, %xmm6
- movaps 39(%rcx), %xmm4
- movaps %xmm4, %xmm7
- movaps 55(%rcx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqb %xmm0, %xmm7
- pmovmskb %xmm7, %rax
- movaps %xmm5, %xmm7
- palignr $9, %xmm4, %xmm5
- test %rax, %rax
- palignr $9, %xmm3, %xmm4
- jnz L(Shl9Start)
-# ifdef USE_AS_STRNCPY
- sub $64, %r8
- jbe L(StrncpyLeave9)
-# endif
- palignr $9, %xmm2, %xmm3
- lea 64(%rcx), %rcx
- palignr $9, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%rdx)
- movaps %xmm4, 32(%rdx)
- movaps %xmm3, 16(%rdx)
- movaps %xmm2, (%rdx)
- lea 64(%rdx), %rdx
- jmp L(Shl9LoopStart)
-
-L(Shl9LoopExit):
- mov -1(%rcx), %r9
- mov $7, %rsi
- mov %r9, -1(%rdx)
- jmp L(CopyFrom1To16Bytes)
-
- .p2align 4
-L(Shl10):
- movaps -10(%rcx), %xmm1
- movaps 6(%rcx), %xmm2
-L(Shl10Start):
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %rax
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit10Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl10LoopExit)
-
- palignr $10, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 22(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
- movaps %xmm2, %xmm1
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit10Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl10LoopExit)
-
- palignr $10, %xmm3, %xmm2
- movaps %xmm2, (%rdx)
- movaps 22(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit10Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl10LoopExit)
-
- palignr $10, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 22(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit10Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl10LoopExit)
-
- palignr $10, %xmm3, %xmm2
- movaps %xmm2, (%rdx)
- lea 22(%rcx), %rcx
- lea 16(%rdx), %rdx
-
- mov %rcx, %rax
- and $-0x40, %rcx
- sub %rcx, %rax
- lea -6(%rcx), %rcx
- sub %rax, %rdx
-# ifdef USE_AS_STRNCPY
- add %rax, %r8
-# endif
- movaps -10(%rcx), %xmm1
-
-/* 64 bytes loop */
- .p2align 4
-L(Shl10LoopStart):
- movaps 6(%rcx), %xmm2
- movaps 22(%rcx), %xmm3
- movaps %xmm3, %xmm6
- movaps 38(%rcx), %xmm4
- movaps %xmm4, %xmm7
- movaps 54(%rcx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqb %xmm0, %xmm7
- pmovmskb %xmm7, %rax
- movaps %xmm5, %xmm7
- palignr $10, %xmm4, %xmm5
- test %rax, %rax
- palignr $10, %xmm3, %xmm4
- jnz L(Shl10Start)
-# ifdef USE_AS_STRNCPY
- sub $64, %r8
- jbe L(StrncpyLeave10)
-# endif
- palignr $10, %xmm2, %xmm3
- lea 64(%rcx), %rcx
- palignr $10, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%rdx)
- movaps %xmm4, 32(%rdx)
- movaps %xmm3, 16(%rdx)
- movaps %xmm2, (%rdx)
- lea 64(%rdx), %rdx
- jmp L(Shl10LoopStart)
-
-L(Shl10LoopExit):
- mov -2(%rcx), %r9
- mov $6, %rsi
- mov %r9, -2(%rdx)
- jmp L(CopyFrom1To16Bytes)
-
- .p2align 4
-L(Shl11):
- movaps -11(%rcx), %xmm1
- movaps 5(%rcx), %xmm2
-L(Shl11Start):
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %rax
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit11Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl11LoopExit)
-
- palignr $11, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 21(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
- movaps %xmm2, %xmm1
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit11Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl11LoopExit)
-
- palignr $11, %xmm3, %xmm2
- movaps %xmm2, (%rdx)
- movaps 21(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit11Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl11LoopExit)
-
- palignr $11, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 21(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit11Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl11LoopExit)
-
- palignr $11, %xmm3, %xmm2
- movaps %xmm2, (%rdx)
- lea 21(%rcx), %rcx
- lea 16(%rdx), %rdx
-
- mov %rcx, %rax
- and $-0x40, %rcx
- sub %rcx, %rax
- lea -5(%rcx), %rcx
- sub %rax, %rdx
-# ifdef USE_AS_STRNCPY
- add %rax, %r8
-# endif
- movaps -11(%rcx), %xmm1
-
-/* 64 bytes loop */
- .p2align 4
-L(Shl11LoopStart):
- movaps 5(%rcx), %xmm2
- movaps 21(%rcx), %xmm3
- movaps %xmm3, %xmm6
- movaps 37(%rcx), %xmm4
- movaps %xmm4, %xmm7
- movaps 53(%rcx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqb %xmm0, %xmm7
- pmovmskb %xmm7, %rax
- movaps %xmm5, %xmm7
- palignr $11, %xmm4, %xmm5
- test %rax, %rax
- palignr $11, %xmm3, %xmm4
- jnz L(Shl11Start)
-# ifdef USE_AS_STRNCPY
- sub $64, %r8
- jbe L(StrncpyLeave11)
-# endif
- palignr $11, %xmm2, %xmm3
- lea 64(%rcx), %rcx
- palignr $11, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%rdx)
- movaps %xmm4, 32(%rdx)
- movaps %xmm3, 16(%rdx)
- movaps %xmm2, (%rdx)
- lea 64(%rdx), %rdx
- jmp L(Shl11LoopStart)
-
-L(Shl11LoopExit):
- mov -3(%rcx), %r9
- mov $5, %rsi
- mov %r9, -3(%rdx)
- jmp L(CopyFrom1To16Bytes)
-
- .p2align 4
-L(Shl12):
- movaps -12(%rcx), %xmm1
- movaps 4(%rcx), %xmm2
-L(Shl12Start):
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %rax
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit12Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl12LoopExit)
-
- palignr $12, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 20(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
- movaps %xmm2, %xmm1
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit12Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl12LoopExit)
-
- palignr $12, %xmm3, %xmm2
- movaps %xmm2, (%rdx)
- movaps 20(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit12Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl12LoopExit)
-
- palignr $12, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 20(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit12Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl12LoopExit)
-
- palignr $12, %xmm3, %xmm2
- movaps %xmm2, (%rdx)
- lea 20(%rcx), %rcx
- lea 16(%rdx), %rdx
-
- mov %rcx, %rax
- and $-0x40, %rcx
- sub %rcx, %rax
- lea -4(%rcx), %rcx
- sub %rax, %rdx
-# ifdef USE_AS_STRNCPY
- add %rax, %r8
-# endif
- movaps -12(%rcx), %xmm1
-
-/* 64 bytes loop */
- .p2align 4
-L(Shl12LoopStart):
- movaps 4(%rcx), %xmm2
- movaps 20(%rcx), %xmm3
- movaps %xmm3, %xmm6
- movaps 36(%rcx), %xmm4
- movaps %xmm4, %xmm7
- movaps 52(%rcx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqb %xmm0, %xmm7
- pmovmskb %xmm7, %rax
- movaps %xmm5, %xmm7
- palignr $12, %xmm4, %xmm5
- test %rax, %rax
- palignr $12, %xmm3, %xmm4
- jnz L(Shl12Start)
-# ifdef USE_AS_STRNCPY
- sub $64, %r8
- jbe L(StrncpyLeave12)
-# endif
- palignr $12, %xmm2, %xmm3
- lea 64(%rcx), %rcx
- palignr $12, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%rdx)
- movaps %xmm4, 32(%rdx)
- movaps %xmm3, 16(%rdx)
- movaps %xmm2, (%rdx)
- lea 64(%rdx), %rdx
- jmp L(Shl12LoopStart)
-
-L(Shl12LoopExit):
- mov (%rcx), %r9d
- mov $4, %rsi
- mov %r9d, (%rdx)
- jmp L(CopyFrom1To16Bytes)
-
- .p2align 4
-L(Shl13):
- movaps -13(%rcx), %xmm1
- movaps 3(%rcx), %xmm2
-L(Shl13Start):
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %rax
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit13Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl13LoopExit)
-
- palignr $13, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 19(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
- movaps %xmm2, %xmm1
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit13Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl13LoopExit)
-
- palignr $13, %xmm3, %xmm2
- movaps %xmm2, (%rdx)
- movaps 19(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit13Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl13LoopExit)
-
- palignr $13, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 19(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit13Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl13LoopExit)
-
- palignr $13, %xmm3, %xmm2
- movaps %xmm2, (%rdx)
- lea 19(%rcx), %rcx
- lea 16(%rdx), %rdx
-
- mov %rcx, %rax
- and $-0x40, %rcx
- sub %rcx, %rax
- lea -3(%rcx), %rcx
- sub %rax, %rdx
-# ifdef USE_AS_STRNCPY
- add %rax, %r8
-# endif
- movaps -13(%rcx), %xmm1
-
-/* 64 bytes loop */
- .p2align 4
-L(Shl13LoopStart):
- movaps 3(%rcx), %xmm2
- movaps 19(%rcx), %xmm3
- movaps %xmm3, %xmm6
- movaps 35(%rcx), %xmm4
- movaps %xmm4, %xmm7
- movaps 51(%rcx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqb %xmm0, %xmm7
- pmovmskb %xmm7, %rax
- movaps %xmm5, %xmm7
- palignr $13, %xmm4, %xmm5
- test %rax, %rax
- palignr $13, %xmm3, %xmm4
- jnz L(Shl13Start)
-# ifdef USE_AS_STRNCPY
- sub $64, %r8
- jbe L(StrncpyLeave13)
-# endif
- palignr $13, %xmm2, %xmm3
- lea 64(%rcx), %rcx
- palignr $13, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%rdx)
- movaps %xmm4, 32(%rdx)
- movaps %xmm3, 16(%rdx)
- movaps %xmm2, (%rdx)
- lea 64(%rdx), %rdx
- jmp L(Shl13LoopStart)
-
-L(Shl13LoopExit):
- mov -1(%rcx), %r9d
- mov $3, %rsi
- mov %r9d, -1(%rdx)
- jmp L(CopyFrom1To16Bytes)
-
- .p2align 4
-L(Shl14):
- movaps -14(%rcx), %xmm1
- movaps 2(%rcx), %xmm2
-L(Shl14Start):
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %rax
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit14Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl14LoopExit)
-
- palignr $14, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 18(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
- movaps %xmm2, %xmm1
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit14Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl14LoopExit)
-
- palignr $14, %xmm3, %xmm2
- movaps %xmm2, (%rdx)
- movaps 18(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit14Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl14LoopExit)
-
- palignr $14, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 18(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit14Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl14LoopExit)
-
- palignr $14, %xmm3, %xmm2
- movaps %xmm2, (%rdx)
- lea 18(%rcx), %rcx
- lea 16(%rdx), %rdx
-
- mov %rcx, %rax
- and $-0x40, %rcx
- sub %rcx, %rax
- lea -2(%rcx), %rcx
- sub %rax, %rdx
-# ifdef USE_AS_STRNCPY
- add %rax, %r8
-# endif
- movaps -14(%rcx), %xmm1
-
-/* 64 bytes loop */
- .p2align 4
-L(Shl14LoopStart):
- movaps 2(%rcx), %xmm2
- movaps 18(%rcx), %xmm3
- movaps %xmm3, %xmm6
- movaps 34(%rcx), %xmm4
- movaps %xmm4, %xmm7
- movaps 50(%rcx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqb %xmm0, %xmm7
- pmovmskb %xmm7, %rax
- movaps %xmm5, %xmm7
- palignr $14, %xmm4, %xmm5
- test %rax, %rax
- palignr $14, %xmm3, %xmm4
- jnz L(Shl14Start)
-# ifdef USE_AS_STRNCPY
- sub $64, %r8
- jbe L(StrncpyLeave14)
-# endif
- palignr $14, %xmm2, %xmm3
- lea 64(%rcx), %rcx
- palignr $14, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%rdx)
- movaps %xmm4, 32(%rdx)
- movaps %xmm3, 16(%rdx)
- movaps %xmm2, (%rdx)
- lea 64(%rdx), %rdx
- jmp L(Shl14LoopStart)
-
-L(Shl14LoopExit):
- mov -2(%rcx), %r9d
- mov $2, %rsi
- mov %r9d, -2(%rdx)
- jmp L(CopyFrom1To16Bytes)
-
- .p2align 4
-L(Shl15):
- movaps -15(%rcx), %xmm1
- movaps 1(%rcx), %xmm2
-L(Shl15Start):
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %rax
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit15Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl15LoopExit)
-
- palignr $15, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 17(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
- movaps %xmm2, %xmm1
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit15Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl15LoopExit)
-
- palignr $15, %xmm3, %xmm2
- movaps %xmm2, (%rdx)
- movaps 17(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit15Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl15LoopExit)
-
- palignr $15, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 17(%rcx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%rdx), %rdx
- pmovmskb %xmm0, %rax
- lea 16(%rcx), %rcx
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- jbe L(StrncpyExit15Case2OrCase3)
-# endif
- test %rax, %rax
- jnz L(Shl15LoopExit)
-
- palignr $15, %xmm3, %xmm2
- movaps %xmm2, (%rdx)
- lea 17(%rcx), %rcx
- lea 16(%rdx), %rdx
-
- mov %rcx, %rax
- and $-0x40, %rcx
- sub %rcx, %rax
- lea -1(%rcx), %rcx
- sub %rax, %rdx
-# ifdef USE_AS_STRNCPY
- add %rax, %r8
-# endif
- movaps -15(%rcx), %xmm1
-
-/* 64 bytes loop */
- .p2align 4
-L(Shl15LoopStart):
- movaps 1(%rcx), %xmm2
- movaps 17(%rcx), %xmm3
- movaps %xmm3, %xmm6
- movaps 33(%rcx), %xmm4
- movaps %xmm4, %xmm7
- movaps 49(%rcx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqb %xmm0, %xmm7
- pmovmskb %xmm7, %rax
- movaps %xmm5, %xmm7
- palignr $15, %xmm4, %xmm5
- test %rax, %rax
- palignr $15, %xmm3, %xmm4
- jnz L(Shl15Start)
-# ifdef USE_AS_STRNCPY
- sub $64, %r8
- jbe L(StrncpyLeave15)
-# endif
- palignr $15, %xmm2, %xmm3
- lea 64(%rcx), %rcx
- palignr $15, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%rdx)
- movaps %xmm4, 32(%rdx)
- movaps %xmm3, 16(%rdx)
- movaps %xmm2, (%rdx)
- lea 64(%rdx), %rdx
- jmp L(Shl15LoopStart)
-
-L(Shl15LoopExit):
- mov -3(%rcx), %r9d
- mov $1, %rsi
- mov %r9d, -3(%rdx)
-# ifdef USE_AS_STRCAT
- jmp L(CopyFrom1To16Bytes)
-# endif
-
-# ifndef USE_AS_STRCAT
-
- .p2align 4
-L(CopyFrom1To16Bytes):
-# ifdef USE_AS_STRNCPY
- add $16, %r8
-# endif
- add %rsi, %rdx
- add %rsi, %rcx
-
- test %al, %al
- jz L(ExitHigh)
- test $0x01, %al
- jnz L(Exit1)
- test $0x02, %al
- jnz L(Exit2)
- test $0x04, %al
- jnz L(Exit3)
- test $0x08, %al
- jnz L(Exit4)
- test $0x10, %al
- jnz L(Exit5)
- test $0x20, %al
- jnz L(Exit6)
- test $0x40, %al
- jnz L(Exit7)
-
- .p2align 4
-L(Exit8):
- mov (%rcx), %rax
- mov %rax, (%rdx)
-# ifdef USE_AS_STPCPY
- lea 7(%rdx), %rax
-# else
- mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
- sub $8, %r8
- lea 8(%rdx), %rcx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%rax)
- sbb $-1, %rax
-# endif
-# endif
- ret
-
- .p2align 4
-L(ExitHigh):
- test $0x01, %ah
- jnz L(Exit9)
- test $0x02, %ah
- jnz L(Exit10)
- test $0x04, %ah
- jnz L(Exit11)
- test $0x08, %ah
- jnz L(Exit12)
- test $0x10, %ah
- jnz L(Exit13)
- test $0x20, %ah
- jnz L(Exit14)
- test $0x40, %ah
- jnz L(Exit15)
-
- .p2align 4
-L(Exit16):
- mov (%rcx), %rax
- mov %rax, (%rdx)
- mov 8(%rcx), %rax
- mov %rax, 8(%rdx)
-# ifdef USE_AS_STPCPY
- lea 15(%rdx), %rax
-# else
- mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
- sub $16, %r8
- lea 16(%rdx), %rcx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%rax)
- sbb $-1, %rax
-# endif
-# endif
- ret
-
-# ifdef USE_AS_STRNCPY
-
- .p2align 4
-L(CopyFrom1To16BytesCase2):
- add $16, %r8
- add %rsi, %rcx
- lea (%rsi, %rdx), %rsi
- lea -9(%r8), %rdx
- and $1<<7, %dh
- or %al, %dh
- test %dh, %dh
- lea (%rsi), %rdx
- jz L(ExitHighCase2)
-
- cmp $1, %r8
- je L(Exit1)
- test $0x01, %al
- jnz L(Exit1)
- cmp $2, %r8
- je L(Exit2)
- test $0x02, %al
- jnz L(Exit2)
- cmp $3, %r8
- je L(Exit3)
- test $0x04, %al
- jnz L(Exit3)
- cmp $4, %r8
- je L(Exit4)
- test $0x08, %al
- jnz L(Exit4)
- cmp $5, %r8
- je L(Exit5)
- test $0x10, %al
- jnz L(Exit5)
- cmp $6, %r8
- je L(Exit6)
- test $0x20, %al
- jnz L(Exit6)
- cmp $7, %r8
- je L(Exit7)
- test $0x40, %al
- jnz L(Exit7)
- jmp L(Exit8)
-
- .p2align 4
-L(ExitHighCase2):
- cmp $9, %r8
- je L(Exit9)
- test $0x01, %ah
- jnz L(Exit9)
- cmp $10, %r8
- je L(Exit10)
- test $0x02, %ah
- jnz L(Exit10)
- cmp $11, %r8
- je L(Exit11)
- test $0x04, %ah
- jnz L(Exit11)
- cmp $12, %r8
- je L(Exit12)
- test $0x8, %ah
- jnz L(Exit12)
- cmp $13, %r8
- je L(Exit13)
- test $0x10, %ah
- jnz L(Exit13)
- cmp $14, %r8
- je L(Exit14)
- test $0x20, %ah
- jnz L(Exit14)
- cmp $15, %r8
- je L(Exit15)
- test $0x40, %ah
- jnz L(Exit15)
- jmp L(Exit16)
-
-L(CopyFrom1To16BytesCase2OrCase3):
- test %rax, %rax
- jnz L(CopyFrom1To16BytesCase2)
-
- .p2align 4
-L(CopyFrom1To16BytesCase3):
- add $16, %r8
- add %rsi, %rdx
- add %rsi, %rcx
-
- cmp $16, %r8
- je L(Exit16)
- cmp $8, %r8
- je L(Exit8)
- jg L(More8Case3)
- cmp $4, %r8
- je L(Exit4)
- jg L(More4Case3)
- cmp $2, %r8
- jl L(Exit1)
- je L(Exit2)
- jg L(Exit3)
-L(More8Case3): /* but less than 16 */
- cmp $12, %r8
- je L(Exit12)
- jl L(Less12Case3)
- cmp $14, %r8
- jl L(Exit13)
- je L(Exit14)
- jg L(Exit15)
-L(More4Case3): /* but less than 8 */
- cmp $6, %r8
- jl L(Exit5)
- je L(Exit6)
- jg L(Exit7)
-L(Less12Case3): /* but more than 8 */
- cmp $10, %r8
- jl L(Exit9)
- je L(Exit10)
- jg L(Exit11)
-# endif
-
- .p2align 4
-L(Exit1):
- movb (%rcx), %al
- movb %al, (%rdx)
-# ifdef USE_AS_STPCPY
- lea (%rdx), %rax
-# else
- mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
- sub $1, %r8
- lea 1(%rdx), %rcx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%rax)
- sbb $-1, %rax
-# endif
-# endif
- ret
-
- .p2align 4
-L(Exit2):
- movw (%rcx), %ax
- movw %ax, (%rdx)
-# ifdef USE_AS_STPCPY
- lea 1(%rdx), %rax
-# else
- mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
- sub $2, %r8
- lea 2(%rdx), %rcx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%rax)
- sbb $-1, %rax
-# endif
-# endif
- ret
-
- .p2align 4
-L(Exit3):
- movw (%rcx), %ax
- movw %ax, (%rdx)
- movb 2(%rcx), %al
- movb %al, 2(%rdx)
-# ifdef USE_AS_STPCPY
- lea 2(%rdx), %rax
-# else
- mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
- sub $3, %r8
- lea 3(%rdx), %rcx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%rax)
- sbb $-1, %rax
-# endif
-# endif
- ret
-
- .p2align 4
-L(Exit4):
- movl (%rcx), %eax
- movl %eax, (%rdx)
-# ifdef USE_AS_STPCPY
- lea 3(%rdx), %rax
-# else
- mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
- sub $4, %r8
- lea 4(%rdx), %rcx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%rax)
- sbb $-1, %rax
-# endif
-# endif
- ret
-
- .p2align 4
-L(Exit5):
- movl (%rcx), %eax
- movl %eax, (%rdx)
- movb 4(%rcx), %al
- movb %al, 4(%rdx)
-# ifdef USE_AS_STPCPY
- lea 4(%rdx), %rax
-# else
- mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
- sub $5, %r8
- lea 5(%rdx), %rcx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%rax)
- sbb $-1, %rax
-# endif
-# endif
- ret
-
- .p2align 4
-L(Exit6):
- movl (%rcx), %eax
- movl %eax, (%rdx)
- movw 4(%rcx), %ax
- movw %ax, 4(%rdx)
-# ifdef USE_AS_STPCPY
- lea 5(%rdx), %rax
-# else
- mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
- sub $6, %r8
- lea 6(%rdx), %rcx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%rax)
- sbb $-1, %rax
-# endif
-# endif
- ret
-
- .p2align 4
-L(Exit7):
- movl (%rcx), %eax
- movl %eax, (%rdx)
- movl 3(%rcx), %eax
- movl %eax, 3(%rdx)
-# ifdef USE_AS_STPCPY
- lea 6(%rdx), %rax
-# else
- mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
- sub $7, %r8
- lea 7(%rdx), %rcx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%rax)
- sbb $-1, %rax
-# endif
-# endif
- ret
-
- .p2align 4
-L(Exit9):
- mov (%rcx), %rax
- mov %rax, (%rdx)
- mov 5(%rcx), %eax
- mov %eax, 5(%rdx)
-# ifdef USE_AS_STPCPY
- lea 8(%rdx), %rax
-# else
- mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
- sub $9, %r8
- lea 9(%rdx), %rcx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%rax)
- sbb $-1, %rax
-# endif
-# endif
- ret
-
- .p2align 4
-L(Exit10):
- mov (%rcx), %rax
- mov %rax, (%rdx)
- mov 6(%rcx), %eax
- mov %eax, 6(%rdx)
-# ifdef USE_AS_STPCPY
- lea 9(%rdx), %rax
-# else
- mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
- sub $10, %r8
- lea 10(%rdx), %rcx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%rax)
- sbb $-1, %rax
-# endif
-# endif
- ret
-
- .p2align 4
-L(Exit11):
- mov (%rcx), %rax
- mov %rax, (%rdx)
- mov 7(%rcx), %eax
- mov %eax, 7(%rdx)
-# ifdef USE_AS_STPCPY
- lea 10(%rdx), %rax
-# else
- mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
- sub $11, %r8
- lea 11(%rdx), %rcx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%rax)
- sbb $-1, %rax
-# endif
-# endif
- ret
-
- .p2align 4
-L(Exit12):
- mov (%rcx), %rax
- mov %rax, (%rdx)
- mov 8(%rcx), %eax
- mov %eax, 8(%rdx)
-# ifdef USE_AS_STPCPY
- lea 11(%rdx), %rax
-# else
- mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
- sub $12, %r8
- lea 12(%rdx), %rcx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%rax)
- sbb $-1, %rax
-# endif
-# endif
- ret
-
- .p2align 4
-L(Exit13):
- mov (%rcx), %rax
- mov %rax, (%rdx)
- mov 5(%rcx), %rax
- mov %rax, 5(%rdx)
-# ifdef USE_AS_STPCPY
- lea 12(%rdx), %rax
-# else
- mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
- sub $13, %r8
- lea 13(%rdx), %rcx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%rax)
- sbb $-1, %rax
-# endif
-# endif
- ret
-
- .p2align 4
-L(Exit14):
- mov (%rcx), %rax
- mov %rax, (%rdx)
- mov 6(%rcx), %rax
- mov %rax, 6(%rdx)
-# ifdef USE_AS_STPCPY
- lea 13(%rdx), %rax
-# else
- mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
- sub $14, %r8
- lea 14(%rdx), %rcx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%rax)
- sbb $-1, %rax
-# endif
-# endif
- ret
-
- .p2align 4
-L(Exit15):
- mov (%rcx), %rax
- mov %rax, (%rdx)
- mov 7(%rcx), %rax
- mov %rax, 7(%rdx)
-# ifdef USE_AS_STPCPY
- lea 14(%rdx), %rax
-# else
- mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
- sub $15, %r8
- lea 15(%rdx), %rcx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%rax)
- sbb $-1, %rax
-# endif
-# endif
- ret
-
-# ifdef USE_AS_STRNCPY
- .p2align 4
-L(Fill0):
- ret
-
- .p2align 4
-L(Fill1):
- movb %dl, (%rcx)
- ret
-
- .p2align 4
-L(Fill2):
- movw %dx, (%rcx)
- ret
-
- .p2align 4
-L(Fill3):
- movw %dx, (%rcx)
- movb %dl, 2(%rcx)
- ret
-
- .p2align 4
-L(Fill4):
- movl %edx, (%rcx)
- ret
-
- .p2align 4
-L(Fill5):
- movl %edx, (%rcx)
- movb %dl, 4(%rcx)
- ret
-
- .p2align 4
-L(Fill6):
- movl %edx, (%rcx)
- movw %dx, 4(%rcx)
- ret
-
- .p2align 4
-L(Fill7):
- movl %edx, (%rcx)
- movl %edx, 3(%rcx)
- ret
-
- .p2align 4
-L(Fill8):
- mov %rdx, (%rcx)
- ret
-
- .p2align 4
-L(Fill9):
- mov %rdx, (%rcx)
- movb %dl, 8(%rcx)
- ret
-
- .p2align 4
-L(Fill10):
- mov %rdx, (%rcx)
- movw %dx, 8(%rcx)
- ret
-
- .p2align 4
-L(Fill11):
- mov %rdx, (%rcx)
- movl %edx, 7(%rcx)
- ret
-
- .p2align 4
-L(Fill12):
- mov %rdx, (%rcx)
- movl %edx, 8(%rcx)
- ret
-
- .p2align 4
-L(Fill13):
- mov %rdx, (%rcx)
- mov %rdx, 5(%rcx)
- ret
-
- .p2align 4
-L(Fill14):
- mov %rdx, (%rcx)
- mov %rdx, 6(%rcx)
- ret
-
- .p2align 4
-L(Fill15):
- mov %rdx, (%rcx)
- mov %rdx, 7(%rcx)
- ret
-
- .p2align 4
-L(Fill16):
- mov %rdx, (%rcx)
- mov %rdx, 8(%rcx)
- ret
-
- .p2align 4
-L(StrncpyFillExit1):
- lea 16(%r8), %r8
-L(FillFrom1To16Bytes):
- test %r8, %r8
- jz L(Fill0)
- cmp $16, %r8
- je L(Fill16)
- cmp $8, %r8
- je L(Fill8)
- jg L(FillMore8)
- cmp $4, %r8
- je L(Fill4)
- jg L(FillMore4)
- cmp $2, %r8
- jl L(Fill1)
- je L(Fill2)
- jg L(Fill3)
-L(FillMore8): /* but less than 16 */
- cmp $12, %r8
- je L(Fill12)
- jl L(FillLess12)
- cmp $14, %r8
- jl L(Fill13)
- je L(Fill14)
- jg L(Fill15)
-L(FillMore4): /* but less than 8 */
- cmp $6, %r8
- jl L(Fill5)
- je L(Fill6)
- jg L(Fill7)
-L(FillLess12): /* but more than 8 */
- cmp $10, %r8
- jl L(Fill9)
- je L(Fill10)
- jmp L(Fill11)
-
- .p2align 4
-L(StrncpyFillTailWithZero1):
- xor %rdx, %rdx
- sub $16, %r8
- jbe L(StrncpyFillExit1)
-
- pxor %xmm0, %xmm0
- mov %rdx, (%rcx)
- mov %rdx, 8(%rcx)
-
- lea 16(%rcx), %rcx
-
- mov %rcx, %rdx
- and $0xf, %rdx
- sub %rdx, %rcx
- add %rdx, %r8
- xor %rdx, %rdx
- sub $64, %r8
- jb L(StrncpyFillLess64)
-
-L(StrncpyFillLoopMovdqa):
- movdqa %xmm0, (%rcx)
- movdqa %xmm0, 16(%rcx)
- movdqa %xmm0, 32(%rcx)
- movdqa %xmm0, 48(%rcx)
- lea 64(%rcx), %rcx
- sub $64, %r8
- jae L(StrncpyFillLoopMovdqa)
-
-L(StrncpyFillLess64):
- add $32, %r8
- jl L(StrncpyFillLess32)
- movdqa %xmm0, (%rcx)
- movdqa %xmm0, 16(%rcx)
- lea 32(%rcx), %rcx
- sub $16, %r8
- jl L(StrncpyFillExit1)
- movdqa %xmm0, (%rcx)
- lea 16(%rcx), %rcx
- jmp L(FillFrom1To16Bytes)
-
-L(StrncpyFillLess32):
- add $16, %r8
- jl L(StrncpyFillExit1)
- movdqa %xmm0, (%rcx)
- lea 16(%rcx), %rcx
- jmp L(FillFrom1To16Bytes)
-
- .p2align 4
-L(Exit0):
- mov %rdx, %rax
- ret
-
- .p2align 4
-L(StrncpyExit15Bytes):
- cmp $9, %r8
- je L(Exit9)
- cmpb $0, 8(%rcx)
- jz L(Exit9)
- cmp $10, %r8
- je L(Exit10)
- cmpb $0, 9(%rcx)
- jz L(Exit10)
- cmp $11, %r8
- je L(Exit11)
- cmpb $0, 10(%rcx)
- jz L(Exit11)
- cmp $12, %r8
- je L(Exit12)
- cmpb $0, 11(%rcx)
- jz L(Exit12)
- cmp $13, %r8
- je L(Exit13)
- cmpb $0, 12(%rcx)
- jz L(Exit13)
- cmp $14, %r8
- je L(Exit14)
- cmpb $0, 13(%rcx)
- jz L(Exit14)
- mov (%rcx), %rax
- mov %rax, (%rdx)
- mov 7(%rcx), %rax
- mov %rax, 7(%rdx)
-# ifdef USE_AS_STPCPY
- lea 14(%rdx), %rax
- cmpb $1, (%rax)
- sbb $-1, %rax
-# else
- mov %rdi, %rax
-# endif
- ret
-
- .p2align 4
-L(StrncpyExit8Bytes):
- cmp $1, %r8
- je L(Exit1)
- cmpb $0, (%rcx)
- jz L(Exit1)
- cmp $2, %r8
- je L(Exit2)
- cmpb $0, 1(%rcx)
- jz L(Exit2)
- cmp $3, %r8
- je L(Exit3)
- cmpb $0, 2(%rcx)
- jz L(Exit3)
- cmp $4, %r8
- je L(Exit4)
- cmpb $0, 3(%rcx)
- jz L(Exit4)
- cmp $5, %r8
- je L(Exit5)
- cmpb $0, 4(%rcx)
- jz L(Exit5)
- cmp $6, %r8
- je L(Exit6)
- cmpb $0, 5(%rcx)
- jz L(Exit6)
- cmp $7, %r8
- je L(Exit7)
- cmpb $0, 6(%rcx)
- jz L(Exit7)
- mov (%rcx), %rax
- mov %rax, (%rdx)
-# ifdef USE_AS_STPCPY
- lea 7(%rdx), %rax
- cmpb $1, (%rax)
- sbb $-1, %rax
-# else
- mov %rdi, %rax
-# endif
- ret
-
-# endif
-# endif
-
-# ifdef USE_AS_STRNCPY
- .p2align 4
-L(StrncpyLeaveCase2OrCase3):
- test %rax, %rax
- jnz L(Aligned64LeaveCase2)
-
-L(Aligned64LeaveCase3):
- lea 64(%r8), %r8
- sub $16, %r8
- jbe L(CopyFrom1To16BytesCase3)
- movaps %xmm4, -64(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(CopyFrom1To16BytesCase3)
- movaps %xmm5, -48(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(CopyFrom1To16BytesCase3)
- movaps %xmm6, -32(%rdx)
- lea 16(%rsi), %rsi
- lea -16(%r8), %r8
- jmp L(CopyFrom1To16BytesCase3)
-
-L(Aligned64LeaveCase2):
- pcmpeqb %xmm4, %xmm0
- pmovmskb %xmm0, %rax
- add $48, %r8
- jle L(CopyFrom1To16BytesCase2OrCase3)
- test %rax, %rax
- jnz L(CopyFrom1To16Bytes)
-
- pcmpeqb %xmm5, %xmm0
- pmovmskb %xmm0, %rax
- movaps %xmm4, -64(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(CopyFrom1To16BytesCase2OrCase3)
- test %rax, %rax
- jnz L(CopyFrom1To16Bytes)
-
- pcmpeqb %xmm6, %xmm0
- pmovmskb %xmm0, %rax
- movaps %xmm5, -48(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(CopyFrom1To16BytesCase2OrCase3)
- test %rax, %rax
- jnz L(CopyFrom1To16Bytes)
-
- pcmpeqb %xmm7, %xmm0
- pmovmskb %xmm0, %rax
- movaps %xmm6, -32(%rdx)
- lea 16(%rsi), %rsi
- lea -16(%r8), %r8
- jmp L(CopyFrom1To16BytesCase2)
-/*--------------------------------------------------*/
- .p2align 4
-L(StrncpyExit1Case2OrCase3):
- movdqu -1(%rcx), %xmm0
- movdqu %xmm0, -1(%rdx)
- mov $15, %rsi
- test %rax, %rax
- jnz L(CopyFrom1To16BytesCase2)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyExit2Case2OrCase3):
- movdqu -2(%rcx), %xmm0
- movdqu %xmm0, -2(%rdx)
- mov $14, %rsi
- test %rax, %rax
- jnz L(CopyFrom1To16BytesCase2)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyExit3Case2OrCase3):
- movdqu -3(%rcx), %xmm0
- movdqu %xmm0, -3(%rdx)
- mov $13, %rsi
- test %rax, %rax
- jnz L(CopyFrom1To16BytesCase2)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyExit4Case2OrCase3):
- movdqu -4(%rcx), %xmm0
- movdqu %xmm0, -4(%rdx)
- mov $12, %rsi
- test %rax, %rax
- jnz L(CopyFrom1To16BytesCase2)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyExit5Case2OrCase3):
- movdqu -5(%rcx), %xmm0
- movdqu %xmm0, -5(%rdx)
- mov $11, %rsi
- test %rax, %rax
- jnz L(CopyFrom1To16BytesCase2)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyExit6Case2OrCase3):
- mov (%rcx), %rsi
- mov 6(%rcx), %r9d
- mov %r9d, 6(%rdx)
- mov %rsi, (%rdx)
- test %rax, %rax
- mov $10, %rsi
- jnz L(CopyFrom1To16BytesCase2)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyExit7Case2OrCase3):
- mov (%rcx), %rsi
- mov 5(%rcx), %r9d
- mov %r9d, 5(%rdx)
- mov %rsi, (%rdx)
- test %rax, %rax
- mov $9, %rsi
- jnz L(CopyFrom1To16BytesCase2)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyExit8Case2OrCase3):
- mov (%rcx), %r9
- mov $8, %rsi
- mov %r9, (%rdx)
- test %rax, %rax
- jnz L(CopyFrom1To16BytesCase2)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyExit9Case2OrCase3):
- mov -1(%rcx), %r9
- mov $7, %rsi
- mov %r9, -1(%rdx)
- test %rax, %rax
- jnz L(CopyFrom1To16BytesCase2)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyExit10Case2OrCase3):
- mov -2(%rcx), %r9
- mov $6, %rsi
- mov %r9, -2(%rdx)
- test %rax, %rax
- jnz L(CopyFrom1To16BytesCase2)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyExit11Case2OrCase3):
- mov -3(%rcx), %r9
- mov $5, %rsi
- mov %r9, -3(%rdx)
- test %rax, %rax
- jnz L(CopyFrom1To16BytesCase2)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyExit12Case2OrCase3):
- mov (%rcx), %r9d
- mov $4, %rsi
- mov %r9d, (%rdx)
- test %rax, %rax
- jnz L(CopyFrom1To16BytesCase2)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyExit13Case2OrCase3):
- mov -1(%rcx), %r9d
- mov $3, %rsi
- mov %r9d, -1(%rdx)
- test %rax, %rax
- jnz L(CopyFrom1To16BytesCase2)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyExit14Case2OrCase3):
- mov -2(%rcx), %r9d
- mov $2, %rsi
- mov %r9d, -2(%rdx)
- test %rax, %rax
- jnz L(CopyFrom1To16BytesCase2)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyExit15Case2OrCase3):
- mov -3(%rcx), %r9d
- mov $1, %rsi
- mov %r9d, -3(%rdx)
- test %rax, %rax
- jnz L(CopyFrom1To16BytesCase2)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyLeave1):
- movaps %xmm2, %xmm3
- add $48, %r8
- jle L(StrncpyExit1)
- palignr $1, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 31(%rcx), %xmm2
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit1)
- palignr $1, %xmm3, %xmm2
- movaps %xmm2, 16(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit1)
- movaps %xmm4, 32(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit1)
- movaps %xmm5, 48(%rdx)
- lea 16(%rsi), %rsi
- lea -16(%r8), %r8
-
-L(StrncpyExit1):
- lea 15(%rdx, %rsi), %rdx
- lea 15(%rcx, %rsi), %rcx
- mov -15(%rcx), %rsi
- mov -8(%rcx), %rax
- mov %rsi, -15(%rdx)
- mov %rax, -8(%rdx)
- xor %rsi, %rsi
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyLeave2):
- movaps %xmm2, %xmm3
- add $48, %r8
- jle L(StrncpyExit2)
- palignr $2, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 30(%rcx), %xmm2
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit2)
- palignr $2, %xmm3, %xmm2
- movaps %xmm2, 16(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit2)
- movaps %xmm4, 32(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit2)
- movaps %xmm5, 48(%rdx)
- lea 16(%rsi), %rsi
- lea -16(%r8), %r8
-
-L(StrncpyExit2):
- lea 14(%rdx, %rsi), %rdx
- lea 14(%rcx, %rsi), %rcx
- mov -14(%rcx), %rsi
- mov -8(%rcx), %rax
- mov %rsi, -14(%rdx)
- mov %rax, -8(%rdx)
- xor %rsi, %rsi
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyLeave3):
- movaps %xmm2, %xmm3
- add $48, %r8
- jle L(StrncpyExit3)
- palignr $3, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 29(%rcx), %xmm2
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit3)
- palignr $3, %xmm3, %xmm2
- movaps %xmm2, 16(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit3)
- movaps %xmm4, 32(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit3)
- movaps %xmm5, 48(%rdx)
- lea 16(%rsi), %rsi
- lea -16(%r8), %r8
-
-L(StrncpyExit3):
- lea 13(%rdx, %rsi), %rdx
- lea 13(%rcx, %rsi), %rcx
- mov -13(%rcx), %rsi
- mov -8(%rcx), %rax
- mov %rsi, -13(%rdx)
- mov %rax, -8(%rdx)
- xor %rsi, %rsi
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyLeave4):
- movaps %xmm2, %xmm3
- add $48, %r8
- jle L(StrncpyExit4)
- palignr $4, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 28(%rcx), %xmm2
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit4)
- palignr $4, %xmm3, %xmm2
- movaps %xmm2, 16(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit4)
- movaps %xmm4, 32(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit4)
- movaps %xmm5, 48(%rdx)
- lea 16(%rsi), %rsi
- lea -16(%r8), %r8
-
-L(StrncpyExit4):
- lea 12(%rdx, %rsi), %rdx
- lea 12(%rcx, %rsi), %rcx
- mov -12(%rcx), %rsi
- mov -4(%rcx), %eax
- mov %rsi, -12(%rdx)
- mov %eax, -4(%rdx)
- xor %rsi, %rsi
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyLeave5):
- movaps %xmm2, %xmm3
- add $48, %r8
- jle L(StrncpyExit5)
- palignr $5, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 27(%rcx), %xmm2
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit5)
- palignr $5, %xmm3, %xmm2
- movaps %xmm2, 16(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit5)
- movaps %xmm4, 32(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit5)
- movaps %xmm5, 48(%rdx)
- lea 16(%rsi), %rsi
- lea -16(%r8), %r8
-
-L(StrncpyExit5):
- lea 11(%rdx, %rsi), %rdx
- lea 11(%rcx, %rsi), %rcx
- mov -11(%rcx), %rsi
- mov -4(%rcx), %eax
- mov %rsi, -11(%rdx)
- mov %eax, -4(%rdx)
- xor %rsi, %rsi
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyLeave6):
- movaps %xmm2, %xmm3
- add $48, %r8
- jle L(StrncpyExit6)
- palignr $6, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 26(%rcx), %xmm2
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit6)
- palignr $6, %xmm3, %xmm2
- movaps %xmm2, 16(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit6)
- movaps %xmm4, 32(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit6)
- movaps %xmm5, 48(%rdx)
- lea 16(%rsi), %rsi
- lea -16(%r8), %r8
-
-L(StrncpyExit6):
- lea 10(%rdx, %rsi), %rdx
- lea 10(%rcx, %rsi), %rcx
- mov -10(%rcx), %rsi
- movw -2(%rcx), %ax
- mov %rsi, -10(%rdx)
- movw %ax, -2(%rdx)
- xor %rsi, %rsi
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyLeave7):
- movaps %xmm2, %xmm3
- add $48, %r8
- jle L(StrncpyExit7)
- palignr $7, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 25(%rcx), %xmm2
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit7)
- palignr $7, %xmm3, %xmm2
- movaps %xmm2, 16(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit7)
- movaps %xmm4, 32(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit7)
- movaps %xmm5, 48(%rdx)
- lea 16(%rsi), %rsi
- lea -16(%r8), %r8
-
-L(StrncpyExit7):
- lea 9(%rdx, %rsi), %rdx
- lea 9(%rcx, %rsi), %rcx
- mov -9(%rcx), %rsi
- movb -1(%rcx), %ah
- mov %rsi, -9(%rdx)
- movb %ah, -1(%rdx)
- xor %rsi, %rsi
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyLeave8):
- movaps %xmm2, %xmm3
- add $48, %r8
- jle L(StrncpyExit8)
- palignr $8, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 24(%rcx), %xmm2
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit8)
- palignr $8, %xmm3, %xmm2
- movaps %xmm2, 16(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit8)
- movaps %xmm4, 32(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit8)
- movaps %xmm5, 48(%rdx)
- lea 16(%rsi), %rsi
- lea -16(%r8), %r8
-
-L(StrncpyExit8):
- lea 8(%rdx, %rsi), %rdx
- lea 8(%rcx, %rsi), %rcx
- mov -8(%rcx), %rax
- xor %rsi, %rsi
- mov %rax, -8(%rdx)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyLeave9):
- movaps %xmm2, %xmm3
- add $48, %r8
- jle L(StrncpyExit9)
- palignr $9, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 23(%rcx), %xmm2
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit9)
- palignr $9, %xmm3, %xmm2
- movaps %xmm2, 16(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit9)
- movaps %xmm4, 32(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit9)
- movaps %xmm5, 48(%rdx)
- lea 16(%rsi), %rsi
- lea -16(%r8), %r8
-
-L(StrncpyExit9):
- lea 7(%rdx, %rsi), %rdx
- lea 7(%rcx, %rsi), %rcx
- mov -8(%rcx), %rax
- xor %rsi, %rsi
- mov %rax, -8(%rdx)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyLeave10):
- movaps %xmm2, %xmm3
- add $48, %r8
- jle L(StrncpyExit10)
- palignr $10, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 22(%rcx), %xmm2
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit10)
- palignr $10, %xmm3, %xmm2
- movaps %xmm2, 16(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit10)
- movaps %xmm4, 32(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit10)
- movaps %xmm5, 48(%rdx)
- lea 16(%rsi), %rsi
- lea -16(%r8), %r8
-
-L(StrncpyExit10):
- lea 6(%rdx, %rsi), %rdx
- lea 6(%rcx, %rsi), %rcx
- mov -8(%rcx), %rax
- xor %rsi, %rsi
- mov %rax, -8(%rdx)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyLeave11):
- movaps %xmm2, %xmm3
- add $48, %r8
- jle L(StrncpyExit11)
- palignr $11, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 21(%rcx), %xmm2
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit11)
- palignr $11, %xmm3, %xmm2
- movaps %xmm2, 16(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit11)
- movaps %xmm4, 32(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit11)
- movaps %xmm5, 48(%rdx)
- lea 16(%rsi), %rsi
- lea -16(%r8), %r8
-
-L(StrncpyExit11):
- lea 5(%rdx, %rsi), %rdx
- lea 5(%rcx, %rsi), %rcx
- mov -8(%rcx), %rax
- xor %rsi, %rsi
- mov %rax, -8(%rdx)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyLeave12):
- movaps %xmm2, %xmm3
- add $48, %r8
- jle L(StrncpyExit12)
- palignr $12, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 20(%rcx), %xmm2
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit12)
- palignr $12, %xmm3, %xmm2
- movaps %xmm2, 16(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit12)
- movaps %xmm4, 32(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit12)
- movaps %xmm5, 48(%rdx)
- lea 16(%rsi), %rsi
- lea -16(%r8), %r8
-
-L(StrncpyExit12):
- lea 4(%rdx, %rsi), %rdx
- lea 4(%rcx, %rsi), %rcx
- mov -4(%rcx), %eax
- xor %rsi, %rsi
- mov %eax, -4(%rdx)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyLeave13):
- movaps %xmm2, %xmm3
- add $48, %r8
- jle L(StrncpyExit13)
- palignr $13, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 19(%rcx), %xmm2
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit13)
- palignr $13, %xmm3, %xmm2
- movaps %xmm2, 16(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit13)
- movaps %xmm4, 32(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit13)
- movaps %xmm5, 48(%rdx)
- lea 16(%rsi), %rsi
- lea -16(%r8), %r8
-
-L(StrncpyExit13):
- lea 3(%rdx, %rsi), %rdx
- lea 3(%rcx, %rsi), %rcx
- mov -4(%rcx), %eax
- xor %rsi, %rsi
- mov %eax, -4(%rdx)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyLeave14):
- movaps %xmm2, %xmm3
- add $48, %r8
- jle L(StrncpyExit14)
- palignr $14, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 18(%rcx), %xmm2
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit14)
- palignr $14, %xmm3, %xmm2
- movaps %xmm2, 16(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit14)
- movaps %xmm4, 32(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit14)
- movaps %xmm5, 48(%rdx)
- lea 16(%rsi), %rsi
- lea -16(%r8), %r8
-
-L(StrncpyExit14):
- lea 2(%rdx, %rsi), %rdx
- lea 2(%rcx, %rsi), %rcx
- movw -2(%rcx), %ax
- xor %rsi, %rsi
- movw %ax, -2(%rdx)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyLeave15):
- movaps %xmm2, %xmm3
- add $48, %r8
- jle L(StrncpyExit15)
- palignr $15, %xmm1, %xmm2
- movaps %xmm2, (%rdx)
- movaps 17(%rcx), %xmm2
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit15)
- palignr $15, %xmm3, %xmm2
- movaps %xmm2, 16(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit15)
- movaps %xmm4, 32(%rdx)
- lea 16(%rsi), %rsi
- sub $16, %r8
- jbe L(StrncpyExit15)
- movaps %xmm5, 48(%rdx)
- lea 16(%rsi), %rsi
- lea -16(%r8), %r8
-
-L(StrncpyExit15):
- lea 1(%rdx, %rsi), %rdx
- lea 1(%rcx, %rsi), %rcx
- movb -1(%rcx), %ah
- xor %rsi, %rsi
- movb %ah, -1(%rdx)
- jmp L(CopyFrom1To16BytesCase3)
-
-# endif
-# ifndef USE_AS_STRCAT
-END (STRCPY)
-# endif
-#endif
deleted file mode 100644
@@ -1,3 +0,0 @@
-#define USE_AS_STRNCPY
-#define STRCPY __strncpy_ssse3
-#include "strcpy-ssse3.S"