Message ID | 20221018024901.3381469-3-goldstein.w.n@gmail.com |
---|---|
State | New |
Headers | show |
Series | [v1,1/7] x86: Optimize memchr-evex.S and implement with VMM headers | expand |
On Mon, Oct 17, 2022 at 7:49 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > Optimizations are: > 1. Use the fact that bsf(0) leaves the destination unchanged to save a > branch in short string case. > 2. Restructure code so that small strings are given the hot path. > - This is a net-zero on the benchmark suite but in general makes > sense as smaller sizes are far more common. > 3. Use more code-size efficient instructions. > - tzcnt ... -> bsf ... > - vpcmpb $0 ... -> vpcmpeq ... > 4. Align labels less aggressively, especially if it doesn't save fetch > blocks / causes the basic-block to span extra cache-lines. > > The optimizations (especially for point 2) make the strnlen and > strlen code essentially incompatible so split strnlen-evex > to a new file. > > Code Size Changes: > strlen-evex.S : -23 bytes > strnlen-evex.S : -167 bytes > > Net perf changes: > > Reported as geometric mean of all improvements / regressions from N=10 > runs of the benchtests. Value as New Time / Old Time so < 1.0 is > improvement and 1.0 is regression. > > strlen-evex.S : 0.992 (No real change) > strnlen-evex.S : 0.947 > > Full results attached in email. > > Full check passes on x86-64. > --- > sysdeps/x86_64/multiarch/strlen-evex.S | 544 +++++++----------------- > sysdeps/x86_64/multiarch/strnlen-evex.S | 427 ++++++++++++++++++- > sysdeps/x86_64/multiarch/wcsnlen-evex.S | 5 +- > 3 files changed, 572 insertions(+), 404 deletions(-) > > diff --git a/sysdeps/x86_64/multiarch/strlen-evex.S b/sysdeps/x86_64/multiarch/strlen-evex.S > index 2109ec2f7a..487846f098 100644 > --- a/sysdeps/x86_64/multiarch/strlen-evex.S > +++ b/sysdeps/x86_64/multiarch/strlen-evex.S > @@ -26,466 +26,220 @@ > # define STRLEN __strlen_evex > # endif > > -# define VMOVA vmovdqa64 > +# ifndef VEC_SIZE > +# include "x86-evex256-vecs.h" > +# endif > > # ifdef USE_AS_WCSLEN > -# define VPCMP vpcmpd > +# define VPCMPEQ vpcmpeqd > +# define VPCMPNEQ vpcmpneqd > +# define VPTESTN vptestnmd > +# define VPTEST vptestmd > # define VPMINU vpminud > -# define SHIFT_REG ecx > # define CHAR_SIZE 4 > +# define CHAR_SIZE_SHIFT_REG(reg) sar $2, %reg > # else > -# define VPCMP vpcmpb > +# define VPCMPEQ vpcmpeqb > +# define VPCMPNEQ vpcmpneqb > +# define VPTESTN vptestnmb > +# define VPTEST vptestmb > # define VPMINU vpminub > -# define SHIFT_REG edx > # define CHAR_SIZE 1 > +# define CHAR_SIZE_SHIFT_REG(reg) > + > +# define REG_WIDTH VEC_SIZE > # endif > > -# define XMMZERO xmm16 > -# define YMMZERO ymm16 > -# define YMM1 ymm17 > -# define YMM2 ymm18 > -# define YMM3 ymm19 > -# define YMM4 ymm20 > -# define YMM5 ymm21 > -# define YMM6 ymm22 > - > -# define VEC_SIZE 32 > -# define PAGE_SIZE 4096 > -# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) > - > - .section .text.evex,"ax",@progbits > -ENTRY (STRLEN) > -# ifdef USE_AS_STRNLEN > - /* Check zero length. */ > - test %RSI_LP, %RSI_LP > - jz L(zero) > -# ifdef __ILP32__ > - /* Clear the upper 32 bits. */ > - movl %esi, %esi > -# endif > - mov %RSI_LP, %R8_LP > +# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) > + > +# include "reg-macros.h" > + > +# if CHAR_PER_VEC == 64 > + > +# define TAIL_RETURN_LBL first_vec_x2 > +# define TAIL_RETURN_OFFSET (CHAR_PER_VEC * 2) > + > +# define FALLTHROUGH_RETURN_LBL first_vec_x3 > +# define FALLTHROUGH_RETURN_OFFSET (CHAR_PER_VEC * 3) > + > +# else > + > +# define TAIL_RETURN_LBL first_vec_x3 > +# define TAIL_RETURN_OFFSET (CHAR_PER_VEC * 3) > + > +# define FALLTHROUGH_RETURN_LBL first_vec_x2 > +# define FALLTHROUGH_RETURN_OFFSET (CHAR_PER_VEC * 2) > # endif > + > +# define XZERO VMM_128(0) > +# define VZERO VMM(0) > +# define PAGE_SIZE 4096 > + > + .section SECTION(.text), "ax", @progbits > +ENTRY_P2ALIGN (STRLEN, 6) > movl %edi, %eax > - vpxorq %XMMZERO, %XMMZERO, %XMMZERO > - /* Clear high bits from edi. Only keeping bits relevant to page > - cross check. */ > + vpxorq %XZERO, %XZERO, %XZERO > andl $(PAGE_SIZE - 1), %eax > - /* Check if we may cross page boundary with one vector load. */ > cmpl $(PAGE_SIZE - VEC_SIZE), %eax > ja L(cross_page_boundary) > > /* Check the first VEC_SIZE bytes. Each bit in K0 represents a > null byte. */ > - VPCMP $0, (%rdi), %YMMZERO, %k0 > - kmovd %k0, %eax > -# ifdef USE_AS_STRNLEN > - /* If length < CHAR_PER_VEC handle special. */ > - cmpq $CHAR_PER_VEC, %rsi > - jbe L(first_vec_x0) > -# endif > - testl %eax, %eax > + VPCMPEQ (%rdi), %VZERO, %k0 > + KMOV %k0, %VRAX > + test %VRAX, %VRAX > jz L(aligned_more) > - tzcntl %eax, %eax > - ret > -# ifdef USE_AS_STRNLEN > -L(zero): > - xorl %eax, %eax > - ret > - > - .p2align 4 > -L(first_vec_x0): > - /* Set bit for max len so that tzcnt will return min of max len > - and position of first match. */ > - btsq %rsi, %rax > - tzcntl %eax, %eax > - ret > -# endif > - > - .p2align 4 > -L(first_vec_x1): > - tzcntl %eax, %eax > - /* Safe to use 32 bit instructions as these are only called for > - size = [1, 159]. */ > -# ifdef USE_AS_STRNLEN > - /* Use ecx which was computed earlier to compute correct value. > - */ > - leal -(CHAR_PER_VEC * 4 + 1)(%rcx, %rax), %eax > -# else > - subl %edx, %edi > -# ifdef USE_AS_WCSLEN > - /* NB: Divide bytes by 4 to get the wchar_t count. */ > - sarl $2, %edi > -# endif > - leal CHAR_PER_VEC(%rdi, %rax), %eax > -# endif > - ret > - > - .p2align 4 > -L(first_vec_x2): > - tzcntl %eax, %eax > - /* Safe to use 32 bit instructions as these are only called for > - size = [1, 159]. */ > -# ifdef USE_AS_STRNLEN > - /* Use ecx which was computed earlier to compute correct value. > - */ > - leal -(CHAR_PER_VEC * 3 + 1)(%rcx, %rax), %eax > -# else > - subl %edx, %edi > -# ifdef USE_AS_WCSLEN > - /* NB: Divide bytes by 4 to get the wchar_t count. */ > - sarl $2, %edi > -# endif > - leal (CHAR_PER_VEC * 2)(%rdi, %rax), %eax > -# endif > + bsf %VRAX, %VRAX > ret > > - .p2align 4 > -L(first_vec_x3): > - tzcntl %eax, %eax > - /* Safe to use 32 bit instructions as these are only called for > - size = [1, 159]. */ > -# ifdef USE_AS_STRNLEN > - /* Use ecx which was computed earlier to compute correct value. > - */ > - leal -(CHAR_PER_VEC * 2 + 1)(%rcx, %rax), %eax > -# else > - subl %edx, %edi > -# ifdef USE_AS_WCSLEN > - /* NB: Divide bytes by 4 to get the wchar_t count. */ > - sarl $2, %edi > -# endif > - leal (CHAR_PER_VEC * 3)(%rdi, %rax), %eax > -# endif > - ret > - > - .p2align 4 > + .p2align 4,, 8 > L(first_vec_x4): > - tzcntl %eax, %eax > - /* Safe to use 32 bit instructions as these are only called for > - size = [1, 159]. */ > -# ifdef USE_AS_STRNLEN > - /* Use ecx which was computed earlier to compute correct value. > - */ > - leal -(CHAR_PER_VEC + 1)(%rcx, %rax), %eax > -# else > - subl %edx, %edi > -# ifdef USE_AS_WCSLEN > - /* NB: Divide bytes by 4 to get the wchar_t count. */ > - sarl $2, %edi > -# endif > + bsf %VRAX, %VRAX > + subl %ecx, %edi > + CHAR_SIZE_SHIFT_REG (edi) > leal (CHAR_PER_VEC * 4)(%rdi, %rax), %eax > -# endif > ret > > - .p2align 5 > + > + > + /* Aligned more for strnlen compares remaining length vs 2 * > + CHAR_PER_VEC, 4 * CHAR_PER_VEC, and 8 * CHAR_PER_VEC before > + going to the loop. */ > + .p2align 4,, 10 > L(aligned_more): > - movq %rdi, %rdx > - /* Align data to VEC_SIZE. */ > - andq $-(VEC_SIZE), %rdi > + movq %rdi, %rcx > + andq $(VEC_SIZE * -1), %rdi > L(cross_page_continue): > - /* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time > - since data is only aligned to VEC_SIZE. */ > -# ifdef USE_AS_STRNLEN > - /* + CHAR_SIZE because it simplies the logic in > - last_4x_vec_or_less. */ > - leaq (VEC_SIZE * 5 + CHAR_SIZE)(%rdi), %rcx > - subq %rdx, %rcx > -# ifdef USE_AS_WCSLEN > - /* NB: Divide bytes by 4 to get the wchar_t count. */ > - sarl $2, %ecx > -# endif > -# endif > - /* Load first VEC regardless. */ > - VPCMP $0, VEC_SIZE(%rdi), %YMMZERO, %k0 > -# ifdef USE_AS_STRNLEN > - /* Adjust length. If near end handle specially. */ > - subq %rcx, %rsi > - jb L(last_4x_vec_or_less) > -# endif > - kmovd %k0, %eax > - testl %eax, %eax > + /* Remaining length >= 2 * CHAR_PER_VEC so do VEC0/VEC1 without > + rechecking bounds. */ > + VPCMPEQ (VEC_SIZE * 1)(%rdi), %VZERO, %k0 > + KMOV %k0, %VRAX > + test %VRAX, %VRAX > jnz L(first_vec_x1) > > - VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMZERO, %k0 > - kmovd %k0, %eax > - test %eax, %eax > + VPCMPEQ (VEC_SIZE * 2)(%rdi), %VZERO, %k0 > + KMOV %k0, %VRAX > + test %VRAX, %VRAX > jnz L(first_vec_x2) > > - VPCMP $0, (VEC_SIZE * 3)(%rdi), %YMMZERO, %k0 > - kmovd %k0, %eax > - testl %eax, %eax > + VPCMPEQ (VEC_SIZE * 3)(%rdi), %VZERO, %k0 > + KMOV %k0, %VRAX > + test %VRAX, %VRAX > jnz L(first_vec_x3) > > - VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMZERO, %k0 > - kmovd %k0, %eax > - testl %eax, %eax > + VPCMPEQ (VEC_SIZE * 4)(%rdi), %VZERO, %k0 > + KMOV %k0, %VRAX > + test %VRAX, %VRAX > jnz L(first_vec_x4) > > - addq $VEC_SIZE, %rdi > -# ifdef USE_AS_STRNLEN > - /* Check if at last VEC_SIZE * 4 length. */ > - cmpq $(CHAR_PER_VEC * 4 - 1), %rsi > - jbe L(last_4x_vec_or_less_load) > - movl %edi, %ecx > - andl $(VEC_SIZE * 4 - 1), %ecx > -# ifdef USE_AS_WCSLEN > - /* NB: Divide bytes by 4 to get the wchar_t count. */ > - sarl $2, %ecx > -# endif > - /* Readjust length. */ > - addq %rcx, %rsi > -# endif > - /* Align data to VEC_SIZE * 4. */ > + subq $(VEC_SIZE * -1), %rdi > + > +# if CHAR_PER_VEC == 64 > + /* No partial register stalls on processors that we use evex512 > + on and this saves code size. */ > + xorb %dil, %dil > +# else > andq $-(VEC_SIZE * 4), %rdi > +# endif > + > + > > /* Compare 4 * VEC at a time forward. */ > .p2align 4 > L(loop_4x_vec): > - /* Load first VEC regardless. */ > - VMOVA (VEC_SIZE * 4)(%rdi), %YMM1 > -# ifdef USE_AS_STRNLEN > - /* Break if at end of length. */ > - subq $(CHAR_PER_VEC * 4), %rsi > - jb L(last_4x_vec_or_less_cmpeq) > -# endif > - /* Save some code size by microfusing VPMINU with the load. Since > - the matches in ymm2/ymm4 can only be returned if there where no > - matches in ymm1/ymm3 respectively there is no issue with overlap. > - */ > - VPMINU (VEC_SIZE * 5)(%rdi), %YMM1, %YMM2 > - VMOVA (VEC_SIZE * 6)(%rdi), %YMM3 > - VPMINU (VEC_SIZE * 7)(%rdi), %YMM3, %YMM4 > + VMOVA (VEC_SIZE * 4)(%rdi), %VMM(1) > + VPMINU (VEC_SIZE * 5)(%rdi), %VMM(1), %VMM(2) > + VMOVA (VEC_SIZE * 6)(%rdi), %VMM(3) > + VPMINU (VEC_SIZE * 7)(%rdi), %VMM(3), %VMM(4) > + VPTESTN %VMM(2), %VMM(2), %k0 > + VPTESTN %VMM(4), %VMM(4), %k2 > > - VPCMP $0, %YMM2, %YMMZERO, %k0 > - VPCMP $0, %YMM4, %YMMZERO, %k1 > subq $-(VEC_SIZE * 4), %rdi > - kortestd %k0, %k1 > + KORTEST %k0, %k2 > jz L(loop_4x_vec) > > - /* Check if end was in first half. */ > - kmovd %k0, %eax > - subq %rdx, %rdi > -# ifdef USE_AS_WCSLEN > - shrq $2, %rdi > -# endif > - testl %eax, %eax > - jz L(second_vec_return) > + VPTESTN %VMM(1), %VMM(1), %k1 > + KMOV %k1, %VRAX > + test %VRAX, %VRAX > + jnz L(first_vec_x0) > > - VPCMP $0, %YMM1, %YMMZERO, %k2 > - kmovd %k2, %edx > - /* Combine VEC1 matches (edx) with VEC2 matches (eax). */ > -# ifdef USE_AS_WCSLEN > - sall $CHAR_PER_VEC, %eax > - orl %edx, %eax > - tzcntl %eax, %eax > -# else > - salq $CHAR_PER_VEC, %rax > - orq %rdx, %rax > - tzcntq %rax, %rax > -# endif > - addq %rdi, %rax > - ret > - > - > -# ifdef USE_AS_STRNLEN > - > -L(last_4x_vec_or_less_load): > - /* Depending on entry adjust rdi / prepare first VEC in YMM1. */ > - VMOVA (VEC_SIZE * 4)(%rdi), %YMM1 > -L(last_4x_vec_or_less_cmpeq): > - VPCMP $0, %YMM1, %YMMZERO, %k0 > - addq $(VEC_SIZE * 3), %rdi > -L(last_4x_vec_or_less): > - kmovd %k0, %eax > - /* If remaining length > VEC_SIZE * 2. This works if esi is off by > - VEC_SIZE * 4. */ > - testl $(CHAR_PER_VEC * 2), %esi > - jnz L(last_4x_vec) > - > - /* length may have been negative or positive by an offset of > - CHAR_PER_VEC * 4 depending on where this was called from. This > - fixes that. */ > - andl $(CHAR_PER_VEC * 4 - 1), %esi > - testl %eax, %eax > - jnz L(last_vec_x1_check) > + KMOV %k0, %VRAX > + test %VRAX, %VRAX > + jnz L(first_vec_x1) > > - /* Check the end of data. */ > - subl $CHAR_PER_VEC, %esi > - jb L(max) > + VPTESTN %VMM(3), %VMM(3), %k0 > > - VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMZERO, %k0 > - kmovd %k0, %eax > - tzcntl %eax, %eax > - /* Check the end of data. */ > - cmpl %eax, %esi > - jb L(max) > - > - subq %rdx, %rdi > -# ifdef USE_AS_WCSLEN > - /* NB: Divide bytes by 4 to get the wchar_t count. */ > - sarq $2, %rdi > -# endif > - leaq (CHAR_PER_VEC * 2)(%rdi, %rax), %rax > - ret > -L(max): > - movq %r8, %rax > - ret > -# endif > - > - /* Placed here in strnlen so that the jcc L(last_4x_vec_or_less) > - in the 4x VEC loop can use 2 byte encoding. */ > - .p2align 4 > -L(second_vec_return): > - VPCMP $0, %YMM3, %YMMZERO, %k0 > - /* Combine YMM3 matches (k0) with YMM4 matches (k1). */ > -# ifdef USE_AS_WCSLEN > - kunpckbw %k0, %k1, %k0 > - kmovd %k0, %eax > - tzcntl %eax, %eax > +# if CHAR_PER_VEC == 64 > + KMOV %k0, %VRAX > + test %VRAX, %VRAX > + jnz L(first_vec_x2) > + KMOV %k2, %VRAX > # else > - kunpckdq %k0, %k1, %k0 > - kmovq %k0, %rax > - tzcntq %rax, %rax > + /* We can only combine last 2x VEC masks if CHAR_PER_VEC <= 32. > + */ > + kmovd %k2, %edx > + kmovd %k0, %eax > + salq $CHAR_PER_VEC, %rdx > + orq %rdx, %rax > # endif > - leaq (CHAR_PER_VEC * 2)(%rdi, %rax), %rax > - ret > > - > -# ifdef USE_AS_STRNLEN > -L(last_vec_x1_check): > - tzcntl %eax, %eax > - /* Check the end of data. */ > - cmpl %eax, %esi > - jb L(max) > - subq %rdx, %rdi > -# ifdef USE_AS_WCSLEN > - /* NB: Divide bytes by 4 to get the wchar_t count. */ > - sarq $2, %rdi > -# endif > - leaq (CHAR_PER_VEC)(%rdi, %rax), %rax > + /* first_vec_x3 for strlen-ZMM and first_vec_x2 for strlen-YMM. > + */ > + .p2align 4,, 2 > +L(FALLTHROUGH_RETURN_LBL): > + bsfq %rax, %rax > + subq %rcx, %rdi > + CHAR_SIZE_SHIFT_REG (rdi) > + leaq (FALLTHROUGH_RETURN_OFFSET)(%rdi, %rax), %rax > ret > > - .p2align 4 > -L(last_4x_vec): > - /* Test first 2x VEC normally. */ > - testl %eax, %eax > - jnz L(last_vec_x1) > - > - VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMZERO, %k0 > - kmovd %k0, %eax > - testl %eax, %eax > - jnz L(last_vec_x2) > - > - /* Normalize length. */ > - andl $(CHAR_PER_VEC * 4 - 1), %esi > - VPCMP $0, (VEC_SIZE * 3)(%rdi), %YMMZERO, %k0 > - kmovd %k0, %eax > - testl %eax, %eax > - jnz L(last_vec_x3) > - > - /* Check the end of data. */ > - subl $(CHAR_PER_VEC * 3), %esi > - jb L(max) > - > - VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMZERO, %k0 > - kmovd %k0, %eax > - tzcntl %eax, %eax > - /* Check the end of data. */ > - cmpl %eax, %esi > - jb L(max_end) > - > - subq %rdx, %rdi > -# ifdef USE_AS_WCSLEN > - /* NB: Divide bytes by 4 to get the wchar_t count. */ > - sarq $2, %rdi > -# endif > - leaq (CHAR_PER_VEC * 4)(%rdi, %rax), %rax > + .p2align 4,, 8 > +L(first_vec_x0): > + bsf %VRAX, %VRAX > + sub %rcx, %rdi > + CHAR_SIZE_SHIFT_REG (rdi) > + addq %rdi, %rax > ret > > - .p2align 4 > -L(last_vec_x1): > - tzcntl %eax, %eax > - subq %rdx, %rdi > -# ifdef USE_AS_WCSLEN > - /* NB: Divide bytes by 4 to get the wchar_t count. */ > - sarq $2, %rdi > -# endif > + .p2align 4,, 10 > +L(first_vec_x1): > + bsf %VRAX, %VRAX > + sub %rcx, %rdi > + CHAR_SIZE_SHIFT_REG (rdi) > leaq (CHAR_PER_VEC)(%rdi, %rax), %rax > ret > > - .p2align 4 > -L(last_vec_x2): > - tzcntl %eax, %eax > - subq %rdx, %rdi > -# ifdef USE_AS_WCSLEN > - /* NB: Divide bytes by 4 to get the wchar_t count. */ > - sarq $2, %rdi > -# endif > - leaq (CHAR_PER_VEC * 2)(%rdi, %rax), %rax > - ret > - > - .p2align 4 > -L(last_vec_x3): > - tzcntl %eax, %eax > - subl $(CHAR_PER_VEC * 2), %esi > - /* Check the end of data. */ > - cmpl %eax, %esi > - jb L(max_end) > - subq %rdx, %rdi > -# ifdef USE_AS_WCSLEN > - /* NB: Divide bytes by 4 to get the wchar_t count. */ > - sarq $2, %rdi > -# endif > - leaq (CHAR_PER_VEC * 3)(%rdi, %rax), %rax > - ret > -L(max_end): > - movq %r8, %rax > + .p2align 4,, 10 > + /* first_vec_x2 for strlen-ZMM and first_vec_x3 for strlen-YMM. > + */ > +L(TAIL_RETURN_LBL): > + bsf %VRAX, %VRAX > + sub %VRCX, %VRDI > + CHAR_SIZE_SHIFT_REG (VRDI) > + lea (TAIL_RETURN_OFFSET)(%rdi, %rax), %VRAX > ret > -# endif > > - /* Cold case for crossing page with first load. */ > - .p2align 4 > + .p2align 4,, 8 > L(cross_page_boundary): > - movq %rdi, %rdx > + movq %rdi, %rcx > /* Align data to VEC_SIZE. */ > andq $-VEC_SIZE, %rdi > - VPCMP $0, (%rdi), %YMMZERO, %k0 > - kmovd %k0, %eax > - /* Remove the leading bytes. */ > + > + VPCMPEQ (%rdi), %VZERO, %k0 > + > + KMOV %k0, %VRAX > # ifdef USE_AS_WCSLEN > - /* NB: Divide shift count by 4 since each bit in K0 represent 4 > - bytes. */ > - movl %edx, %ecx > - shrl $2, %ecx > - andl $(CHAR_PER_VEC - 1), %ecx > -# endif > - /* SHIFT_REG is ecx for USE_AS_WCSLEN and edx otherwise. */ > - sarxl %SHIFT_REG, %eax, %eax > + movl %ecx, %edx > + shrl $2, %edx > + andl $(CHAR_PER_VEC - 1), %edx > + shrx %edx, %eax, %eax > testl %eax, %eax > -# ifndef USE_AS_STRNLEN > - jz L(cross_page_continue) > - tzcntl %eax, %eax > - ret > # else > - jnz L(cross_page_less_vec) > -# ifndef USE_AS_WCSLEN > - movl %edx, %ecx > - andl $(CHAR_PER_VEC - 1), %ecx > -# endif > - movl $CHAR_PER_VEC, %eax > - subl %ecx, %eax > - /* Check the end of data. */ > - cmpq %rax, %rsi > - ja L(cross_page_continue) > - movl %esi, %eax > - ret > -L(cross_page_less_vec): > - tzcntl %eax, %eax > - /* Select min of length and position of first null. */ > - cmpq %rax, %rsi > - cmovb %esi, %eax > - ret > + shr %cl, %VRAX > # endif > + jz L(cross_page_continue) > + bsf %VRAX, %VRAX > + ret > > END (STRLEN) > #endif > diff --git a/sysdeps/x86_64/multiarch/strnlen-evex.S b/sysdeps/x86_64/multiarch/strnlen-evex.S > index 64a9fc2606..443a32a749 100644 > --- a/sysdeps/x86_64/multiarch/strnlen-evex.S > +++ b/sysdeps/x86_64/multiarch/strnlen-evex.S > @@ -1,8 +1,423 @@ > -#ifndef STRNLEN > -# define STRNLEN __strnlen_evex > -#endif > +/* strnlen/wcsnlen optimized with 256-bit EVEX instructions. > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <isa-level.h> > +#include <sysdep.h> > + > +#if ISA_SHOULD_BUILD (4) > + > +# ifndef VEC_SIZE > +# include "x86-evex256-vecs.h" > +# endif > + > + > +# ifndef STRNLEN > +# define STRNLEN __strnlen_evex > +# endif > + > +# ifdef USE_AS_WCSLEN > +# define VPCMPEQ vpcmpeqd > +# define VPCMPNEQ vpcmpneqd > +# define VPTESTN vptestnmd > +# define VPTEST vptestmd > +# define VPMINU vpminud > +# define CHAR_SIZE 4 > + > +# else > +# define VPCMPEQ vpcmpeqb > +# define VPCMPNEQ vpcmpneqb > +# define VPTESTN vptestnmb > +# define VPTEST vptestmb > +# define VPMINU vpminub > +# define CHAR_SIZE 1 > + > +# define REG_WIDTH VEC_SIZE > +# endif > + > +# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) > + > +# include "reg-macros.h" > + > +# if CHAR_PER_VEC == 32 > +# define SUB_SHORT(imm, reg) subb $(imm), %VGPR_SZ(reg, 8) > +# else > +# define SUB_SHORT(imm, reg) subl $(imm), %VGPR_SZ(reg, 32) > +# endif > + > + > + > +# if CHAR_PER_VEC == 64 > +# define FALLTHROUGH_RETURN_OFFSET (CHAR_PER_VEC * 3) > +# else > +# define FALLTHROUGH_RETURN_OFFSET (CHAR_PER_VEC * 2) > +# endif > + > + > +# define XZERO VMM_128(0) > +# define VZERO VMM(0) > +# define PAGE_SIZE 4096 > + > + .section SECTION(.text), "ax", @progbits > +ENTRY_P2ALIGN (STRNLEN, 6) > + /* Check zero length. */ > + test %RSI_LP, %RSI_LP > + jz L(zero) > +# ifdef __ILP32__ > + /* Clear the upper 32 bits. */ > + movl %esi, %esi > +# endif > + > + movl %edi, %eax > + vpxorq %XZERO, %XZERO, %XZERO > + andl $(PAGE_SIZE - 1), %eax > + cmpl $(PAGE_SIZE - VEC_SIZE), %eax > + ja L(cross_page_boundary) > + > + /* Check the first VEC_SIZE bytes. Each bit in K0 represents a > + null byte. */ > + VPCMPEQ (%rdi), %VZERO, %k0 > + > + KMOV %k0, %VRCX > + movq %rsi, %rax > + > + /* If src (rcx) is zero, bsf does not change the result. NB: > + Must use 64-bit bsf here so that upper bits of len are not > + cleared. */ > + bsfq %rcx, %rax > + /* If rax > CHAR_PER_VEC then rcx must have been zero (no null > + CHAR) and rsi must be > CHAR_PER_VEC. */ > + cmpq $CHAR_PER_VEC, %rax > + ja L(more_1x_vec) > + /* Check if first match in bounds. */ > + cmpq %rax, %rsi > + cmovb %esi, %eax > + ret > + > + > +# if CHAR_PER_VEC != 32 > + .p2align 4,, 2 > +L(zero): > +L(max_0): > + movl %esi, %eax > + ret > +# endif > + > + /* Aligned more for strnlen compares remaining length vs 2 * > + CHAR_PER_VEC, 4 * CHAR_PER_VEC, and 8 * CHAR_PER_VEC before > + going to the loop. */ > + .p2align 4,, 10 > +L(more_1x_vec): > +L(cross_page_continue): > + /* Compute number of words checked after aligning. */ > +# ifdef USE_AS_WCSLEN > + /* Need to compute directly for wcslen as CHAR_SIZE * rsi can > + overflow. */ > + movq %rdi, %rax > + andq $(VEC_SIZE * -1), %rdi > + subq %rdi, %rax > + sarq $2, %rax > + leaq -(CHAR_PER_VEC * 1)(%rax, %rsi), %rax > +# else > + leaq (VEC_SIZE * -1)(%rsi, %rdi), %rax > + andq $(VEC_SIZE * -1), %rdi > + subq %rdi, %rax > +# endif > + > + > + VPCMPEQ VEC_SIZE(%rdi), %VZERO, %k0 > + > + cmpq $(CHAR_PER_VEC * 2), %rax > + ja L(more_2x_vec) > + > +L(last_2x_vec_or_less): > + KMOV %k0, %VRDX > + test %VRDX, %VRDX > + jnz L(last_vec_check) > + > + /* Check the end of data. */ > + SUB_SHORT (CHAR_PER_VEC, rax) > + jbe L(max_0) > + VPCMPEQ (VEC_SIZE * 2)(%rdi), %VZERO, %k0 > + KMOV %k0, %VRDX > + test %VRDX, %VRDX > + jz L(max_0) > + /* Best place for LAST_VEC_CHECK if ZMM. */ > + .p2align 4,, 8 > +L(last_vec_check): > + bsf %VRDX, %VRDX > + sub %eax, %edx > + lea (%rsi, %rdx), %eax > + cmovae %esi, %eax > + ret > + > +# if CHAR_PER_VEC == 32 > + .p2align 4,, 2 > +L(zero): > +L(max_0): > + movl %esi, %eax > + ret > +# endif > + > + .p2align 4,, 8 > +L(last_4x_vec_or_less): > + addl $(CHAR_PER_VEC * -4), %eax > + VPCMPEQ (VEC_SIZE * 5)(%rdi), %VZERO, %k0 > + subq $(VEC_SIZE * -4), %rdi > + cmpl $(CHAR_PER_VEC * 2), %eax > + jbe L(last_2x_vec_or_less) > + > + .p2align 4,, 6 > +L(more_2x_vec): > + /* Remaining length >= 2 * CHAR_PER_VEC so do VEC0/VEC1 without > + rechecking bounds. */ > > -#define USE_AS_STRNLEN 1 > -#define STRLEN STRNLEN > + KMOV %k0, %VRDX > > -#include "strlen-evex.S" > + test %VRDX, %VRDX > + jnz L(first_vec_x1) > + > + VPCMPEQ (VEC_SIZE * 2)(%rdi), %VZERO, %k0 > + KMOV %k0, %VRDX > + test %VRDX, %VRDX > + jnz L(first_vec_x2) > + > + cmpq $(CHAR_PER_VEC * 4), %rax > + ja L(more_4x_vec) > + > + > + VPCMPEQ (VEC_SIZE * 3)(%rdi), %VZERO, %k0 > + KMOV %k0, %VRDX > + addl $(CHAR_PER_VEC * -2), %eax > + test %VRDX, %VRDX > + jnz L(last_vec_check) > + > + subl $(CHAR_PER_VEC), %eax > + jbe L(max_1) > + > + VPCMPEQ (VEC_SIZE * 4)(%rdi), %VZERO, %k0 > + KMOV %k0, %VRDX > + > + test %VRDX, %VRDX > + jnz L(last_vec_check) > +L(max_1): > + movl %esi, %eax > + ret > + > + .p2align 4,, 3 > +L(first_vec_x2): > +# if VEC_SIZE == 64 > + /* If VEC_SIZE == 64 we can fit logic for full return label in > + spare bytes before next cache line. */ > + bsf %VRDX, %VRDX > + sub %eax, %esi > + leal (CHAR_PER_VEC * 1)(%rsi, %rdx), %eax > + ret > + .p2align 4,, 6 > +# else > + addl $CHAR_PER_VEC, %esi > +# endif > +L(first_vec_x1): > + bsf %VRDX, %VRDX > + sub %eax, %esi > + leal (CHAR_PER_VEC * 0)(%rsi, %rdx), %eax > + ret > + > + > + .p2align 4,, 6 > +L(first_vec_x4): > +# if VEC_SIZE == 64 > + /* If VEC_SIZE == 64 we can fit logic for full return label in > + spare bytes before next cache line. */ > + bsf %VRDX, %VRDX > + sub %eax, %esi > + leal (CHAR_PER_VEC * 3)(%rsi, %rdx), %eax > + ret > + .p2align 4,, 6 > +# else > + addl $CHAR_PER_VEC, %esi > +# endif > +L(first_vec_x3): > + bsf %VRDX, %VRDX > + sub %eax, %esi > + leal (CHAR_PER_VEC * 2)(%rsi, %rdx), %eax > + ret > + > + .p2align 4,, 5 > +L(more_4x_vec): > + VPCMPEQ (VEC_SIZE * 3)(%rdi), %VZERO, %k0 > + KMOV %k0, %VRDX > + test %VRDX, %VRDX > + jnz L(first_vec_x3) > + > + VPCMPEQ (VEC_SIZE * 4)(%rdi), %VZERO, %k0 > + KMOV %k0, %VRDX > + test %VRDX, %VRDX > + jnz L(first_vec_x4) > + > + /* Check if at last VEC_SIZE * 4 length before aligning for the > + loop. */ > + cmpq $(CHAR_PER_VEC * 8), %rax > + jbe L(last_4x_vec_or_less) > + > + > + /* Compute number of words checked after aligning. */ > +# ifdef USE_AS_WCSLEN > + /* Need to compute directly for wcslen as CHAR_SIZE * rsi can > + overflow. */ > + leaq (VEC_SIZE * -3)(%rdi), %rdx > +# else > + leaq (VEC_SIZE * -3)(%rdi, %rax), %rax > +# endif > + > + subq $(VEC_SIZE * -1), %rdi > + > + /* Align data to VEC_SIZE * 4. */ > +# if VEC_SIZE == 64 > + /* Saves code size. No evex512 processor has partial register > + stalls. If that change this can be replaced with `andq > + $-(VEC_SIZE * 4), %rdi`. */ > + xorb %dil, %dil > +# else > + andq $-(VEC_SIZE * 4), %rdi > +# endif > + > +# ifdef USE_AS_WCSLEN > + subq %rdi, %rdx > + sarq $2, %rdx > + addq %rdx, %rax > +# else > + subq %rdi, %rax > +# endif > + /* Compare 4 * VEC at a time forward. */ > + .p2align 4,, 11 > +L(loop_4x_vec): > + VMOVA (VEC_SIZE * 4)(%rdi), %VMM(1) > + VPMINU (VEC_SIZE * 5)(%rdi), %VMM(1), %VMM(2) > + VMOVA (VEC_SIZE * 6)(%rdi), %VMM(3) > + VPMINU (VEC_SIZE * 7)(%rdi), %VMM(3), %VMM(4) > + VPTESTN %VMM(2), %VMM(2), %k0 > + VPTESTN %VMM(4), %VMM(4), %k2 > + subq $-(VEC_SIZE * 4), %rdi > + /* Break if at end of length. */ > + subq $(CHAR_PER_VEC * 4), %rax > + jbe L(loop_len_end) > + > + > + KORTEST %k0, %k2 > + jz L(loop_4x_vec) > + > + > +L(loop_last_4x_vec): > + movq %rsi, %rcx > + subq %rax, %rsi > + VPTESTN %VMM(1), %VMM(1), %k1 > + KMOV %k1, %VRDX > + test %VRDX, %VRDX > + jnz L(last_vec_x0) > + > + KMOV %k0, %VRDX > + test %VRDX, %VRDX > + jnz L(last_vec_x1) > + > + VPTESTN %VMM(3), %VMM(3), %k0 > + > + /* Seperate logic for VEC_SIZE == 64 and VEC_SIZE == 32 for > + returning last 2x VEC. For VEC_SIZE == 64 we test each VEC > + individually, for VEC_SIZE == 32 we combine them in a single > + 64-bit GPR. */ > +# if CHAR_PER_VEC == 64 > + KMOV %k0, %VRDX > + test %VRDX, %VRDX > + jnz L(last_vec_x2) > + KMOV %k2, %VRDX > +# else > + /* We can only combine last 2x VEC masks if CHAR_PER_VEC <= 32. > + */ > + kmovd %k2, %edx > + kmovd %k0, %eax > + salq $CHAR_PER_VEC, %rdx > + orq %rax, %rdx > +# endif > + > + /* first_vec_x3 for strlen-ZMM and first_vec_x2 for strlen-YMM. > + */ > + bsfq %rdx, %rdx > + leaq (FALLTHROUGH_RETURN_OFFSET - CHAR_PER_VEC * 4)(%rsi, %rdx), %rax > + cmpq %rax, %rcx > + cmovb %rcx, %rax > + ret > + > + /* Handle last 4x VEC after loop. All VECs have been loaded. */ > + .p2align 4,, 4 > +L(loop_len_end): > + KORTEST %k0, %k2 > + jnz L(loop_last_4x_vec) > + movq %rsi, %rax > + ret > + > + > +# if CHAR_PER_VEC == 64 > + /* Since we can't combine the last 2x VEC for VEC_SIZE == 64 > + need return label for it. */ > + .p2align 4,, 8 > +L(last_vec_x2): > + bsf %VRDX, %VRDX > + leaq (CHAR_PER_VEC * -2)(%rsi, %rdx), %rax > + cmpq %rax, %rcx > + cmovb %rcx, %rax > + ret > +# endif > + > + > + .p2align 4,, 10 > +L(last_vec_x1): > + addq $CHAR_PER_VEC, %rsi > +L(last_vec_x0): > + bsf %VRDX, %VRDX > + leaq (CHAR_PER_VEC * -4)(%rsi, %rdx), %rax > + cmpq %rax, %rcx > + cmovb %rcx, %rax > + ret > + > + > + .p2align 4,, 8 > +L(cross_page_boundary): > + /* Align data to VEC_SIZE. */ > + movq %rdi, %rcx > + andq $-VEC_SIZE, %rcx > + VPCMPEQ (%rcx), %VZERO, %k0 > + > + KMOV %k0, %VRCX > +# ifdef USE_AS_WCSLEN > + shrl $2, %eax > + andl $(CHAR_PER_VEC - 1), %eax > +# endif > + shrx %VRAX, %VRCX, %VRCX > + > + negl %eax > + andl $(CHAR_PER_VEC - 1), %eax > + movq %rsi, %rdx > + bsf %VRCX, %VRDX > + cmpq %rax, %rdx > + ja L(cross_page_continue) > + movl %edx, %eax > + cmpq %rdx, %rsi > + cmovb %esi, %eax > + ret > +END (STRNLEN) > +#endif > diff --git a/sysdeps/x86_64/multiarch/wcsnlen-evex.S b/sysdeps/x86_64/multiarch/wcsnlen-evex.S > index e2aad94c1e..57a7e93fbf 100644 > --- a/sysdeps/x86_64/multiarch/wcsnlen-evex.S > +++ b/sysdeps/x86_64/multiarch/wcsnlen-evex.S > @@ -2,8 +2,7 @@ > # define WCSNLEN __wcsnlen_evex > #endif > > -#define STRLEN WCSNLEN > +#define STRNLEN WCSNLEN > #define USE_AS_WCSLEN 1 > -#define USE_AS_STRNLEN 1 > > -#include "strlen-evex.S" > +#include "strnlen-evex.S" > -- > 2.34.1 > Results For: strlen alignment,length ,__strlen_evex ,__strlen_evex_orig 0 ,0 ,2.789 ,2.836 ,0.983 0 ,1 ,2.576 ,2.59 ,0.995 0 ,1024 ,18.366 ,18.987 ,0.967 0 ,1152 ,19.69 ,20.571 ,0.957 0 ,128 ,5.532 ,5.481 ,1.009 0 ,1280 ,21.278 ,22.211 ,0.958 0 ,1408 ,22.981 ,23.668 ,0.971 0 ,1536 ,25.244 ,24.822 ,1.017 0 ,16 ,2.832 ,2.832 ,1.0 0 ,160 ,8.36 ,8.71 ,0.96 0 ,1664 ,26.608 ,26.666 ,0.998 0 ,1792 ,28.21 ,28.953 ,0.974 0 ,192 ,9.399 ,8.475 ,1.109 0 ,1920 ,29.609 ,30.389 ,0.974 0 ,2 ,3.652 ,3.779 ,0.966 0 ,2048 ,31.087 ,32.884 ,0.945 0 ,224 ,9.305 ,8.356 ,1.114 0 ,2304 ,34.284 ,35.183 ,0.974 0 ,256 ,9.083 ,10.019 ,0.907 0 ,2560 ,36.909 ,40.442 ,0.913 0 ,2816 ,43.14 ,48.723 ,0.885 0 ,288 ,9.432 ,9.851 ,0.957 0 ,3 ,2.636 ,2.608 ,1.011 0 ,3072 ,58.749 ,66.729 ,0.88 0 ,32 ,4.239 ,4.272 ,0.992 0 ,320 ,10.685 ,9.969 ,1.072 0 ,3328 ,69.222 ,68.331 ,1.013 0 ,352 ,10.704 ,9.7 ,1.104 0 ,3584 ,72.488 ,72.329 ,1.002 0 ,384 ,10.635 ,11.528 ,0.923 0 ,3840 ,74.933 ,76.039 ,0.985 0 ,4 ,2.777 ,2.743 ,1.013 0 ,4096 ,79.241 ,77.521 ,1.022 0 ,416 ,11.036 ,11.535 ,0.957 0 ,448 ,12.466 ,11.544 ,1.08 0 ,4608 ,84.571 ,84.503 ,1.001 0 ,480 ,12.479 ,11.472 ,1.088 0 ,5 ,2.923 ,2.784 ,1.05 0 ,512 ,12.12 ,12.888 ,0.94 0 ,5120 ,91.334 ,91.435 ,0.999 0 ,5632 ,98.695 ,95.914 ,1.029 0 ,576 ,13.732 ,12.493 ,1.099 0 ,6 ,2.928 ,2.75 ,1.064 0 ,6144 ,104.673 ,102.746 ,1.019 0 ,64 ,4.372 ,4.281 ,1.021 0 ,640 ,13.884 ,14.217 ,0.977 0 ,6656 ,112.122 ,110.392 ,1.016 0 ,7 ,2.798 ,2.763 ,1.012 0 ,704 ,15.31 ,14.697 ,1.042 0 ,7168 ,117.652 ,114.757 ,1.025 0 ,768 ,15.406 ,16.286 ,0.946 0 ,7680 ,122.809 ,121.845 ,1.008 0 ,8 ,2.83 ,2.818 ,1.004 0 ,832 ,17.179 ,16.597 ,1.035 0 ,896 ,16.906 ,17.978 ,0.94 0 ,96 ,4.933 ,4.884 ,1.01 0 ,960 ,18.548 ,18.041 ,1.028 1 ,1 ,2.594 ,2.619 ,0.991 10 ,1024 ,18.161 ,19.003 ,0.956 10 ,682 ,14.286 ,14.158 ,1.009 11 ,1365 ,23.596 ,21.917 ,1.077 11 ,2048 ,31.044 ,32.299 ,0.961 12 ,2730 ,50.067 ,52.292 ,0.957 12 ,4096 ,79.161 ,78.804 ,1.005 2 ,2 ,3.055 ,3.22 ,0.949 2 ,4 ,2.818 ,2.836 ,0.994 3 ,3 ,2.699 ,2.896 ,0.932 3 ,5 ,2.843 ,2.852 ,0.997 3 ,8 ,2.837 ,2.839 ,0.999 4 ,10 ,2.84 ,2.825 ,1.005 4 ,16 ,2.811 ,2.826 ,0.994 4 ,4 ,2.715 ,2.714 ,1.0 5 ,21 ,2.782 ,2.824 ,0.985 5 ,32 ,4.189 ,4.222 ,0.992 5 ,5 ,2.721 ,2.701 ,1.007 6 ,42 ,4.295 ,4.211 ,1.02 6 ,6 ,2.775 ,2.81 ,0.988 6 ,64 ,4.224 ,4.27 ,0.989 7 ,1024 ,18.286 ,18.987 ,0.963 7 ,128 ,5.4 ,5.343 ,1.011 7 ,16 ,2.846 ,2.836 ,1.003 7 ,2048 ,31.003 ,32.319 ,0.959 7 ,256 ,9.04 ,9.946 ,0.909 7 ,32 ,4.219 ,4.218 ,1.0 7 ,4 ,2.909 ,2.906 ,1.001 7 ,4096 ,79.073 ,78.896 ,1.002 7 ,512 ,12.178 ,12.742 ,0.956 7 ,64 ,4.368 ,4.519 ,0.967 7 ,7 ,2.762 ,2.771 ,0.997 7 ,8 ,2.867 ,2.839 ,1.01 7 ,85 ,4.187 ,4.336 ,0.966 8 ,170 ,7.993 ,8.372 ,0.955 8 ,256 ,9.016 ,9.91 ,0.91 9 ,341 ,10.593 ,9.577 ,1.106 9 ,512 ,11.939 ,12.694 ,0.941 0.9925909850217739 Results For: strnlen align,len ,max_char ,maxlen ,__strnlen_evex ,__strnlen_evex_orig 0 ,1 ,127 ,0 ,8.826 ,10.545 ,0.837 0 ,1 ,127 ,1 ,8.36 ,9.794 ,0.854 0 ,1 ,127 ,128 ,8.707 ,8.733 ,0.997 0 ,1 ,127 ,2 ,8.43 ,9.042 ,0.932 0 ,1 ,127 ,5000 ,8.226 ,8.442 ,0.974 0 ,1024 ,127 ,1024 ,50.898 ,54.809 ,0.929 0 ,1024 ,127 ,1056 ,61.814 ,56.289 ,1.098 0 ,1024 ,127 ,1088 ,61.941 ,57.059 ,1.086 0 ,1024 ,127 ,1120 ,61.708 ,57.166 ,1.079 0 ,1024 ,127 ,1152 ,61.88 ,57.664 ,1.073 0 ,1024 ,127 ,1184 ,62.084 ,60.571 ,1.025 0 ,1024 ,127 ,1216 ,61.799 ,60.38 ,1.023 0 ,1024 ,127 ,1248 ,61.836 ,60.313 ,1.025 0 ,1024 ,127 ,1280 ,61.829 ,60.038 ,1.03 0 ,1024 ,127 ,1312 ,61.932 ,60.317 ,1.027 0 ,1024 ,127 ,1344 ,61.726 ,60.014 ,1.029 0 ,1024 ,127 ,1376 ,62.018 ,60.242 ,1.029 0 ,1024 ,127 ,1408 ,61.944 ,60.107 ,1.031 0 ,1024 ,127 ,1440 ,61.799 ,59.875 ,1.032 0 ,1024 ,127 ,1472 ,61.891 ,60.589 ,1.021 0 ,1024 ,127 ,1504 ,61.95 ,59.84 ,1.035 0 ,1024 ,127 ,1536 ,61.757 ,59.769 ,1.033 0 ,1024 ,127 ,1568 ,61.685 ,60.345 ,1.022 0 ,1024 ,127 ,1600 ,61.986 ,60.672 ,1.022 0 ,1024 ,127 ,1632 ,61.845 ,60.189 ,1.028 0 ,1024 ,127 ,1664 ,61.971 ,61.093 ,1.014 0 ,1024 ,127 ,1696 ,61.855 ,60.162 ,1.028 0 ,1024 ,127 ,1728 ,63.386 ,59.919 ,1.058 0 ,1024 ,127 ,320 ,26.779 ,30.15 ,0.888 0 ,1024 ,127 ,352 ,26.779 ,30.898 ,0.867 0 ,1024 ,127 ,384 ,26.768 ,32.851 ,0.815 0 ,1024 ,127 ,416 ,31.668 ,31.878 ,0.993 0 ,1024 ,127 ,448 ,31.654 ,33.63 ,0.941 0 ,1024 ,127 ,480 ,31.685 ,34.387 ,0.921 0 ,1024 ,127 ,5000 ,61.853 ,62.0 ,0.998 0 ,1024 ,127 ,512 ,31.67 ,37.012 ,0.856 0 ,1024 ,127 ,544 ,36.553 ,37.076 ,0.986 0 ,1024 ,127 ,576 ,36.533 ,38.968 ,0.938 0 ,1024 ,127 ,608 ,36.527 ,40.962 ,0.892 0 ,1024 ,127 ,640 ,36.512 ,41.935 ,0.871 0 ,1024 ,127 ,672 ,41.601 ,40.159 ,1.036 0 ,1024 ,127 ,704 ,43.111 ,43.128 ,1.0 0 ,1024 ,127 ,736 ,41.645 ,44.285 ,0.94 0 ,1024 ,127 ,768 ,41.631 ,46.597 ,0.893 0 ,1024 ,127 ,800 ,46.671 ,46.504 ,1.004 0 ,1024 ,127 ,832 ,46.815 ,47.772 ,0.98 0 ,1024 ,127 ,864 ,46.688 ,51.689 ,0.903 0 ,1024 ,127 ,896 ,46.743 ,52.56 ,0.889 0 ,1024 ,127 ,928 ,51.212 ,51.64 ,0.992 0 ,1024 ,127 ,960 ,51.243 ,53.334 ,0.961 0 ,1024 ,127 ,992 ,51.256 ,54.768 ,0.936 0 ,1056 ,127 ,1024 ,51.215 ,55.52 ,0.922 0 ,1056 ,127 ,512 ,31.646 ,36.902 ,0.858 0 ,1088 ,127 ,1024 ,51.259 ,56.534 ,0.907 0 ,1088 ,127 ,512 ,31.647 ,36.903 ,0.858 0 ,112 ,127 ,16 ,8.512 ,9.287 ,0.917 0 ,1120 ,127 ,1024 ,51.303 ,55.574 ,0.923 0 ,1120 ,127 ,512 ,31.644 ,37.558 ,0.843 0 ,1152 ,127 ,1024 ,51.252 ,56.372 ,0.909 0 ,1152 ,127 ,512 ,31.647 ,37.888 ,0.835 0 ,1184 ,127 ,1024 ,51.223 ,56.414 ,0.908 0 ,1184 ,127 ,512 ,31.635 ,36.859 ,0.858 0 ,1216 ,127 ,1024 ,51.243 ,55.82 ,0.918 0 ,1216 ,127 ,512 ,31.66 ,36.881 ,0.858 0 ,1248 ,127 ,1024 ,51.211 ,55.607 ,0.921 0 ,128 ,127 ,1 ,8.815 ,8.894 ,0.991 0 ,128 ,127 ,128 ,15.165 ,17.562 ,0.863 0 ,128 ,127 ,160 ,18.865 ,20.212 ,0.933 0 ,128 ,127 ,192 ,17.618 ,16.757 ,1.051 0 ,128 ,127 ,224 ,17.609 ,16.766 ,1.05 0 ,128 ,127 ,256 ,17.597 ,16.589 ,1.061 0 ,128 ,127 ,288 ,17.592 ,17.272 ,1.019 0 ,128 ,127 ,32 ,8.262 ,8.96 ,0.922 0 ,128 ,127 ,320 ,17.6 ,16.518 ,1.065 0 ,128 ,127 ,352 ,17.601 ,16.965 ,1.037 0 ,128 ,127 ,384 ,17.595 ,16.917 ,1.04 0 ,128 ,127 ,416 ,17.608 ,16.805 ,1.048 0 ,128 ,127 ,448 ,17.599 ,17.616 ,0.999 0 ,128 ,127 ,480 ,17.604 ,16.925 ,1.04 0 ,128 ,127 ,5000 ,17.6 ,17.169 ,1.025 0 ,128 ,127 ,512 ,17.617 ,16.877 ,1.044 0 ,128 ,127 ,544 ,17.618 ,16.679 ,1.056 0 ,128 ,127 ,576 ,17.588 ,17.283 ,1.018 0 ,128 ,127 ,608 ,17.611 ,17.113 ,1.029 0 ,128 ,127 ,64 ,11.588 ,16.35 ,0.709 0 ,128 ,127 ,640 ,17.596 ,16.752 ,1.05 0 ,128 ,127 ,672 ,17.606 ,16.778 ,1.049 0 ,128 ,127 ,704 ,17.591 ,17.232 ,1.021 0 ,128 ,127 ,736 ,17.605 ,16.987 ,1.036 0 ,128 ,127 ,768 ,17.619 ,17.879 ,0.985 0 ,128 ,127 ,800 ,17.605 ,17.371 ,1.013 0 ,128 ,127 ,832 ,17.603 ,16.967 ,1.037 0 ,128 ,127 ,96 ,12.339 ,16.454 ,0.75 0 ,1280 ,127 ,1024 ,51.193 ,55.361 ,0.925 0 ,1312 ,127 ,1024 ,51.2 ,56.589 ,0.905 0 ,1344 ,127 ,1024 ,51.203 ,55.915 ,0.916 0 ,1344 ,127 ,2048 ,75.041 ,70.123 ,1.07 0 ,1376 ,127 ,1024 ,51.251 ,55.31 ,0.927 0 ,1376 ,127 ,2048 ,75.027 ,70.119 ,1.07 0 ,1408 ,127 ,1024 ,51.199 ,56.591 ,0.905 0 ,1408 ,127 ,2048 ,75.92 ,74.458 ,1.02 0 ,144 ,127 ,16 ,8.276 ,9.446 ,0.876 0 ,1440 ,127 ,1024 ,51.278 ,55.935 ,0.917 0 ,1440 ,127 ,2048 ,76.43 ,72.711 ,1.051 0 ,1472 ,127 ,1024 ,51.257 ,56.579 ,0.906 0 ,1472 ,127 ,2048 ,79.523 ,74.993 ,1.06 0 ,1504 ,127 ,1024 ,51.191 ,56.314 ,0.909 0 ,1504 ,127 ,2048 ,79.489 ,74.554 ,1.066 0 ,1536 ,127 ,1024 ,51.204 ,55.617 ,0.921 0 ,1536 ,127 ,2048 ,80.762 ,80.577 ,1.002 0 ,1568 ,127 ,1024 ,51.231 ,55.206 ,0.928 0 ,1568 ,127 ,2048 ,81.672 ,77.45 ,1.055 0 ,16 ,127 ,112 ,8.028 ,7.947 ,1.01 0 ,16 ,127 ,144 ,8.253 ,7.179 ,1.15 0 ,16 ,127 ,16 ,7.711 ,8.782 ,0.878 0 ,16 ,127 ,176 ,7.765 ,7.904 ,0.982 0 ,16 ,127 ,208 ,7.985 ,7.606 ,1.05 0 ,16 ,127 ,240 ,7.872 ,8.401 ,0.937 0 ,16 ,127 ,272 ,7.991 ,7.467 ,1.07 0 ,16 ,127 ,304 ,7.872 ,7.737 ,1.018 0 ,16 ,127 ,336 ,7.981 ,7.474 ,1.068 0 ,16 ,127 ,368 ,7.985 ,8.093 ,0.987 0 ,16 ,127 ,400 ,8.134 ,7.181 ,1.133 0 ,16 ,127 ,432 ,7.913 ,8.09 ,0.978 0 ,16 ,127 ,464 ,7.873 ,8.062 ,0.976 0 ,16 ,127 ,48 ,8.523 ,7.473 ,1.14 0 ,16 ,127 ,496 ,7.872 ,7.469 ,1.054 0 ,16 ,127 ,5000 ,8.014 ,7.552 ,1.061 0 ,16 ,127 ,528 ,8.103 ,7.766 ,1.043 0 ,16 ,127 ,560 ,7.77 ,7.495 ,1.037 0 ,16 ,127 ,592 ,7.872 ,7.779 ,1.012 0 ,16 ,127 ,624 ,7.877 ,7.929 ,0.993 0 ,16 ,127 ,656 ,8.207 ,8.078 ,1.016 0 ,16 ,127 ,688 ,8.081 ,8.243 ,0.98 0 ,16 ,127 ,720 ,7.895 ,7.96 ,0.992 0 ,16 ,127 ,80 ,7.766 ,8.232 ,0.943 0 ,160 ,127 ,128 ,15.154 ,18.801 ,0.806 0 ,160 ,127 ,256 ,20.798 ,22.397 ,0.929 0 ,160 ,127 ,32 ,8.391 ,9.465 ,0.887 0 ,160 ,127 ,512 ,28.453 ,27.335 ,1.041 0 ,160 ,127 ,64 ,11.772 ,16.048 ,0.734 0 ,1600 ,127 ,1024 ,51.248 ,56.536 ,0.906 0 ,1600 ,127 ,2048 ,83.783 ,79.095 ,1.059 0 ,1632 ,127 ,1024 ,51.209 ,55.354 ,0.925 0 ,1632 ,127 ,2048 ,83.795 ,80.783 ,1.037 0 ,1664 ,127 ,1024 ,51.231 ,55.463 ,0.924 0 ,1664 ,127 ,2048 ,84.843 ,81.011 ,1.047 0 ,1696 ,127 ,1024 ,51.224 ,55.806 ,0.918 0 ,1696 ,127 ,2048 ,85.355 ,81.067 ,1.053 0 ,1728 ,127 ,1024 ,51.24 ,55.575 ,0.922 0 ,1728 ,127 ,2048 ,88.35 ,85.182 ,1.037 0 ,176 ,127 ,16 ,7.848 ,9.112 ,0.861 0 ,1760 ,127 ,2048 ,88.324 ,86.607 ,1.02 0 ,1792 ,127 ,2048 ,89.051 ,89.539 ,0.995 0 ,1824 ,127 ,2048 ,89.869 ,89.569 ,1.003 0 ,1856 ,127 ,2048 ,92.812 ,92.592 ,1.002 0 ,1888 ,127 ,2048 ,92.888 ,89.784 ,1.035 0 ,192 ,127 ,128 ,16.134 ,19.141 ,0.843 0 ,192 ,127 ,256 ,22.552 ,23.728 ,0.95 0 ,192 ,127 ,32 ,7.771 ,8.878 ,0.875 0 ,192 ,127 ,512 ,30.556 ,27.211 ,1.123 0 ,192 ,127 ,64 ,11.901 ,15.859 ,0.75 0 ,1920 ,127 ,2048 ,93.42 ,87.672 ,1.066 0 ,1952 ,127 ,2048 ,94.412 ,89.887 ,1.05 0 ,1984 ,127 ,2048 ,97.4 ,95.328 ,1.022 0 ,2 ,127 ,1 ,8.372 ,8.943 ,0.936 0 ,2 ,127 ,2 ,8.219 ,9.107 ,0.902 0 ,2 ,127 ,3 ,8.136 ,9.115 ,0.893 0 ,2 ,127 ,5000 ,8.244 ,7.468 ,1.104 0 ,2016 ,127 ,2048 ,97.397 ,93.516 ,1.042 0 ,2048 ,127 ,1344 ,65.155 ,65.144 ,1.0 0 ,2048 ,127 ,1376 ,65.218 ,68.192 ,0.956 0 ,2048 ,127 ,1408 ,65.129 ,69.788 ,0.933 0 ,2048 ,127 ,1440 ,69.729 ,69.167 ,1.008 0 ,2048 ,127 ,1472 ,69.858 ,70.173 ,0.996 0 ,2048 ,127 ,1504 ,69.811 ,76.589 ,0.912 0 ,2048 ,127 ,1536 ,69.755 ,71.866 ,0.971 0 ,2048 ,127 ,1568 ,74.011 ,72.649 ,1.019 0 ,2048 ,127 ,1600 ,74.101 ,73.454 ,1.009 0 ,2048 ,127 ,1632 ,74.022 ,78.453 ,0.944 0 ,2048 ,127 ,1664 ,74.022 ,76.724 ,0.965 0 ,2048 ,127 ,1696 ,78.328 ,77.968 ,1.005 0 ,2048 ,127 ,1728 ,78.165 ,79.1 ,0.988 0 ,2048 ,127 ,1760 ,78.292 ,86.051 ,0.91 0 ,2048 ,127 ,1792 ,78.238 ,82.325 ,0.95 0 ,2048 ,127 ,1824 ,82.681 ,91.502 ,0.904 0 ,2048 ,127 ,1856 ,82.708 ,90.495 ,0.914 0 ,2048 ,127 ,1888 ,82.688 ,90.966 ,0.909 0 ,2048 ,127 ,1920 ,82.953 ,88.146 ,0.941 0 ,2048 ,127 ,1952 ,88.907 ,86.354 ,1.03 0 ,2048 ,127 ,1984 ,87.401 ,89.249 ,0.979 0 ,2048 ,127 ,2016 ,87.451 ,93.03 ,0.94 0 ,2048 ,127 ,2048 ,87.085 ,87.77 ,0.992 0 ,2048 ,127 ,2080 ,97.034 ,91.859 ,1.056 0 ,2048 ,127 ,2112 ,97.241 ,89.463 ,1.087 0 ,2048 ,127 ,2144 ,97.439 ,91.745 ,1.062 0 ,2048 ,127 ,2176 ,97.365 ,91.434 ,1.065 0 ,2048 ,127 ,2208 ,97.29 ,94.349 ,1.031 0 ,2048 ,127 ,2240 ,97.514 ,94.828 ,1.028 0 ,2048 ,127 ,2272 ,97.354 ,96.468 ,1.009 0 ,2048 ,127 ,2304 ,97.463 ,95.07 ,1.025 0 ,2048 ,127 ,2336 ,97.521 ,93.862 ,1.039 0 ,2048 ,127 ,2368 ,97.458 ,91.991 ,1.059 0 ,2048 ,127 ,2400 ,97.462 ,95.001 ,1.026 0 ,2048 ,127 ,2432 ,97.431 ,94.729 ,1.029 0 ,2048 ,127 ,2464 ,98.059 ,96.648 ,1.015 0 ,2048 ,127 ,2496 ,98.201 ,94.299 ,1.041 0 ,2048 ,127 ,2528 ,97.463 ,92.872 ,1.049 0 ,2048 ,127 ,2560 ,97.224 ,92.746 ,1.048 0 ,2048 ,127 ,2592 ,97.552 ,92.734 ,1.052 0 ,2048 ,127 ,2624 ,97.225 ,94.323 ,1.031 0 ,2048 ,127 ,2656 ,97.533 ,92.955 ,1.049 0 ,2048 ,127 ,2688 ,97.286 ,92.563 ,1.051 0 ,2048 ,127 ,2720 ,97.663 ,93.009 ,1.05 0 ,2048 ,127 ,2752 ,97.566 ,92.544 ,1.054 0 ,208 ,127 ,16 ,8.269 ,9.636 ,0.858 0 ,2080 ,127 ,2048 ,87.327 ,88.36 ,0.988 0 ,2112 ,127 ,2048 ,87.295 ,88.916 ,0.982 0 ,2144 ,127 ,2048 ,87.303 ,88.041 ,0.992 0 ,2176 ,127 ,2048 ,87.271 ,92.076 ,0.948 0 ,2208 ,127 ,2048 ,87.277 ,88.826 ,0.983 0 ,224 ,127 ,128 ,15.744 ,18.486 ,0.852 0 ,224 ,127 ,256 ,25.117 ,24.473 ,1.026 0 ,224 ,127 ,32 ,8.188 ,9.108 ,0.899 0 ,224 ,127 ,512 ,30.598 ,27.231 ,1.124 0 ,224 ,127 ,64 ,11.588 ,14.368 ,0.807 0 ,2240 ,127 ,2048 ,87.264 ,92.115 ,0.947 0 ,2272 ,127 ,2048 ,87.337 ,93.49 ,0.934 0 ,2304 ,127 ,2048 ,89.4 ,88.821 ,1.007 0 ,2336 ,127 ,2048 ,87.416 ,91.319 ,0.957 0 ,2368 ,127 ,2048 ,87.567 ,91.481 ,0.957 0 ,240 ,127 ,16 ,7.919 ,9.446 ,0.838 0 ,2400 ,127 ,2048 ,87.283 ,91.766 ,0.951 0 ,2432 ,127 ,2048 ,87.24 ,88.452 ,0.986 0 ,2464 ,127 ,2048 ,87.265 ,89.14 ,0.979 0 ,2496 ,127 ,2048 ,87.269 ,90.857 ,0.961 0 ,2528 ,127 ,2048 ,87.281 ,88.188 ,0.99 0 ,256 ,127 ,128 ,15.801 ,18.709 ,0.845 0 ,256 ,127 ,160 ,16.748 ,19.81 ,0.845 0 ,256 ,127 ,192 ,20.426 ,22.021 ,0.928 0 ,256 ,127 ,224 ,21.854 ,25.135 ,0.869 0 ,256 ,127 ,256 ,24.458 ,23.601 ,1.036 0 ,256 ,127 ,288 ,27.505 ,26.207 ,1.05 0 ,256 ,127 ,32 ,8.482 ,8.969 ,0.946 0 ,256 ,127 ,320 ,32.108 ,29.16 ,1.101 0 ,256 ,127 ,352 ,32.026 ,27.815 ,1.151 0 ,256 ,127 ,384 ,32.05 ,27.73 ,1.156 0 ,256 ,127 ,416 ,31.946 ,31.99 ,0.999 0 ,256 ,127 ,448 ,32.078 ,32.051 ,1.001 0 ,256 ,127 ,480 ,32.029 ,31.955 ,1.002 0 ,256 ,127 ,5000 ,32.099 ,32.119 ,0.999 0 ,256 ,127 ,512 ,32.106 ,31.981 ,1.004 0 ,256 ,127 ,544 ,32.112 ,32.085 ,1.001 0 ,256 ,127 ,576 ,32.102 ,32.016 ,1.003 0 ,256 ,127 ,608 ,32.129 ,32.028 ,1.003 0 ,256 ,127 ,64 ,11.543 ,16.009 ,0.721 0 ,256 ,127 ,640 ,32.065 ,32.097 ,0.999 0 ,256 ,127 ,672 ,32.034 ,31.884 ,1.005 0 ,256 ,127 ,704 ,33.044 ,32.017 ,1.032 0 ,256 ,127 ,736 ,32.079 ,31.959 ,1.004 0 ,256 ,127 ,768 ,32.121 ,32.047 ,1.002 0 ,256 ,127 ,800 ,32.118 ,31.976 ,1.004 0 ,256 ,127 ,832 ,32.062 ,31.96 ,1.003 0 ,256 ,127 ,864 ,32.031 ,31.882 ,1.005 0 ,256 ,127 ,896 ,32.091 ,31.986 ,1.003 0 ,256 ,127 ,928 ,32.001 ,31.985 ,1.001 0 ,256 ,127 ,96 ,12.448 ,16.698 ,0.745 0 ,256 ,127 ,960 ,32.025 ,32.087 ,0.998 0 ,2560 ,127 ,2048 ,87.253 ,88.383 ,0.987 0 ,2592 ,127 ,2048 ,87.302 ,88.626 ,0.985 0 ,2624 ,127 ,2048 ,87.315 ,93.108 ,0.938 0 ,2656 ,127 ,2048 ,88.187 ,88.823 ,0.993 0 ,2688 ,127 ,2048 ,87.345 ,88.174 ,0.991 0 ,272 ,127 ,16 ,7.93 ,9.626 ,0.824 0 ,2720 ,127 ,2048 ,87.285 ,88.878 ,0.982 0 ,2752 ,127 ,2048 ,87.233 ,88.579 ,0.985 0 ,288 ,127 ,128 ,15.364 ,18.403 ,0.835 0 ,288 ,127 ,256 ,24.552 ,24.252 ,1.012 0 ,288 ,127 ,32 ,8.017 ,9.577 ,0.837 0 ,288 ,127 ,512 ,33.191 ,32.165 ,1.032 0 ,288 ,127 ,64 ,11.494 ,15.185 ,0.757 0 ,3 ,127 ,2 ,8.285 ,8.966 ,0.924 0 ,3 ,127 ,3 ,8.167 ,8.983 ,0.909 0 ,3 ,127 ,4 ,8.01 ,9.069 ,0.883 0 ,3 ,127 ,5000 ,8.128 ,7.766 ,1.047 0 ,304 ,127 ,16 ,8.096 ,9.454 ,0.856 0 ,32 ,127 ,128 ,12.311 ,16.153 ,0.762 0 ,32 ,127 ,160 ,12.336 ,16.172 ,0.763 0 ,32 ,127 ,192 ,12.305 ,13.279 ,0.927 0 ,32 ,127 ,224 ,12.308 ,13.091 ,0.94 0 ,32 ,127 ,256 ,12.632 ,13.381 ,0.944 0 ,32 ,127 ,288 ,12.294 ,12.47 ,0.986 0 ,32 ,127 ,32 ,7.66 ,8.781 ,0.872 0 ,32 ,127 ,320 ,12.333 ,13.122 ,0.94 0 ,32 ,127 ,352 ,12.339 ,12.464 ,0.99 0 ,32 ,127 ,384 ,12.304 ,12.46 ,0.987 0 ,32 ,127 ,416 ,12.336 ,13.574 ,0.909 0 ,32 ,127 ,448 ,12.354 ,12.306 ,1.004 0 ,32 ,127 ,480 ,12.304 ,12.304 ,1.0 0 ,32 ,127 ,5000 ,12.306 ,13.123 ,0.938 0 ,32 ,127 ,512 ,12.32 ,13.246 ,0.93 0 ,32 ,127 ,544 ,12.34 ,13.222 ,0.933 0 ,32 ,127 ,576 ,12.339 ,12.918 ,0.955 0 ,32 ,127 ,608 ,12.343 ,12.805 ,0.964 0 ,32 ,127 ,64 ,12.98 ,14.809 ,0.877 0 ,32 ,127 ,640 ,12.304 ,12.471 ,0.987 0 ,32 ,127 ,672 ,12.303 ,12.464 ,0.987 0 ,32 ,127 ,704 ,12.3 ,12.804 ,0.961 0 ,32 ,127 ,736 ,12.298 ,12.464 ,0.987 0 ,32 ,127 ,96 ,12.424 ,14.9 ,0.834 0 ,320 ,127 ,1024 ,35.324 ,31.788 ,1.111 0 ,320 ,127 ,128 ,15.262 ,18.518 ,0.824 0 ,320 ,127 ,256 ,24.669 ,25.17 ,0.98 0 ,320 ,127 ,32 ,7.999 ,9.123 ,0.877 0 ,320 ,127 ,512 ,35.3 ,31.824 ,1.109 0 ,320 ,127 ,64 ,11.522 ,15.007 ,0.768 0 ,336 ,127 ,16 ,7.981 ,8.948 ,0.892 0 ,3392 ,127 ,4096 ,150.235 ,190.301 ,0.789 0 ,3424 ,127 ,4096 ,144.605 ,190.131 ,0.761 0 ,3456 ,127 ,4096 ,142.366 ,193.997 ,0.734 0 ,3488 ,127 ,4096 ,145.561 ,196.579 ,0.74 0 ,352 ,127 ,1024 ,35.334 ,31.77 ,1.112 0 ,352 ,127 ,128 ,16.03 ,18.485 ,0.867 0 ,352 ,127 ,256 ,24.505 ,24.607 ,0.996 0 ,352 ,127 ,32 ,8.016 ,9.285 ,0.863 0 ,352 ,127 ,512 ,35.297 ,31.777 ,1.111 0 ,352 ,127 ,64 ,11.594 ,16.022 ,0.724 0 ,3520 ,127 ,4096 ,149.189 ,187.86 ,0.794 0 ,3552 ,127 ,4096 ,148.896 ,189.592 ,0.785 0 ,3584 ,127 ,4096 ,146.434 ,195.891 ,0.748 0 ,3616 ,127 ,4096 ,149.628 ,194.825 ,0.768 0 ,3648 ,127 ,4096 ,153.47 ,190.168 ,0.807 0 ,368 ,127 ,16 ,8.17 ,9.113 ,0.897 0 ,3680 ,127 ,4096 ,155.436 ,191.619 ,0.811 0 ,3712 ,127 ,4096 ,149.822 ,203.939 ,0.735 0 ,3744 ,127 ,4096 ,153.881 ,196.519 ,0.783 0 ,3776 ,127 ,4096 ,158.302 ,200.946 ,0.788 0 ,3808 ,127 ,4096 ,158.081 ,209.14 ,0.756 0 ,384 ,127 ,1024 ,37.181 ,36.796 ,1.01 0 ,384 ,127 ,128 ,16.028 ,18.65 ,0.859 0 ,384 ,127 ,256 ,24.866 ,24.507 ,1.015 0 ,384 ,127 ,32 ,8.429 ,8.943 ,0.943 0 ,384 ,127 ,512 ,37.171 ,32.643 ,1.139 0 ,384 ,127 ,64 ,11.473 ,15.68 ,0.732 0 ,3840 ,127 ,4096 ,155.507 ,200.042 ,0.777 0 ,3872 ,127 ,4096 ,158.122 ,199.468 ,0.793 0 ,3904 ,127 ,4096 ,163.552 ,199.163 ,0.821 0 ,3936 ,127 ,4096 ,162.695 ,204.503 ,0.796 0 ,3968 ,127 ,4096 ,173.435 ,177.618 ,0.976 0 ,4 ,127 ,3 ,8.129 ,9.283 ,0.876 0 ,4 ,127 ,4 ,7.918 ,9.049 ,0.875 0 ,4 ,127 ,5 ,8.122 ,9.107 ,0.892 0 ,4 ,127 ,5000 ,7.665 ,7.321 ,1.047 0 ,400 ,127 ,16 ,8.183 ,8.943 ,0.915 0 ,4000 ,127 ,4096 ,182.372 ,176.806 ,1.031 0 ,4032 ,127 ,4096 ,173.531 ,176.896 ,0.981 0 ,4064 ,127 ,4096 ,170.429 ,188.202 ,0.906 0 ,4096 ,127 ,3392 ,134.112 ,159.888 ,0.839 0 ,4096 ,127 ,3424 ,134.255 ,171.495 ,0.783 0 ,4096 ,127 ,3456 ,134.558 ,165.724 ,0.812 0 ,4096 ,127 ,3488 ,138.429 ,166.295 ,0.832 0 ,4096 ,127 ,3520 ,138.508 ,163.608 ,0.847 0 ,4096 ,127 ,3552 ,138.455 ,167.833 ,0.825 0 ,4096 ,127 ,3584 ,139.393 ,165.671 ,0.841 0 ,4096 ,127 ,3616 ,142.563 ,170.198 ,0.838 0 ,4096 ,127 ,3648 ,142.746 ,169.878 ,0.84 0 ,4096 ,127 ,3680 ,142.798 ,171.673 ,0.832 0 ,4096 ,127 ,3712 ,142.619 ,173.275 ,0.823 0 ,4096 ,127 ,3744 ,147.268 ,170.217 ,0.865 0 ,4096 ,127 ,3776 ,147.036 ,169.047 ,0.87 0 ,4096 ,127 ,3808 ,146.977 ,172.515 ,0.852 0 ,4096 ,127 ,3840 ,147.399 ,175.952 ,0.838 0 ,4096 ,127 ,3872 ,151.254 ,178.702 ,0.846 0 ,4096 ,127 ,3904 ,151.309 ,177.89 ,0.851 0 ,4096 ,127 ,3936 ,151.626 ,181.201 ,0.837 0 ,4096 ,127 ,3968 ,151.281 ,177.809 ,0.851 0 ,4096 ,127 ,4000 ,155.566 ,176.872 ,0.88 0 ,4096 ,127 ,4032 ,156.314 ,178.469 ,0.876 0 ,4096 ,127 ,4064 ,156.323 ,191.263 ,0.817 0 ,4096 ,127 ,4096 ,155.278 ,175.579 ,0.884 0 ,4096 ,127 ,4128 ,163.473 ,187.974 ,0.87 0 ,4096 ,127 ,4160 ,166.296 ,182.482 ,0.911 0 ,4096 ,127 ,4192 ,162.559 ,178.45 ,0.911 0 ,4096 ,127 ,4224 ,164.064 ,179.153 ,0.916 0 ,4096 ,127 ,4256 ,181.209 ,212.238 ,0.854 0 ,4096 ,127 ,4288 ,167.509 ,206.898 ,0.81 0 ,4096 ,127 ,4320 ,162.726 ,210.745 ,0.772 0 ,4096 ,127 ,4352 ,163.294 ,215.134 ,0.759 0 ,4096 ,127 ,4384 ,163.785 ,208.764 ,0.785 0 ,4096 ,127 ,4416 ,164.439 ,207.951 ,0.791 0 ,4096 ,127 ,4448 ,163.662 ,206.41 ,0.793 0 ,4096 ,127 ,4480 ,164.414 ,205.231 ,0.801 0 ,4096 ,127 ,4512 ,163.637 ,214.655 ,0.762 0 ,4096 ,127 ,4544 ,162.945 ,207.81 ,0.784 0 ,4096 ,127 ,4576 ,162.81 ,212.317 ,0.767 0 ,4096 ,127 ,4608 ,167.929 ,207.966 ,0.807 0 ,4096 ,127 ,4640 ,162.01 ,207.893 ,0.779 0 ,4096 ,127 ,4672 ,172.59 ,209.725 ,0.823 0 ,4096 ,127 ,4704 ,168.842 ,209.017 ,0.808 0 ,4096 ,127 ,4736 ,172.708 ,221.116 ,0.781 0 ,4096 ,127 ,4768 ,163.522 ,209.261 ,0.781 0 ,4096 ,127 ,4800 ,162.52 ,213.294 ,0.762 0 ,4128 ,127 ,4096 ,155.478 ,182.694 ,0.851 0 ,416 ,127 ,1024 ,38.324 ,37.116 ,1.033 0 ,416 ,127 ,128 ,15.347 ,18.663 ,0.822 0 ,416 ,127 ,256 ,24.518 ,24.291 ,1.009 0 ,416 ,127 ,32 ,8.096 ,9.275 ,0.873 0 ,416 ,127 ,512 ,38.394 ,34.173 ,1.124 0 ,416 ,127 ,64 ,11.255 ,14.832 ,0.759 0 ,4160 ,127 ,4096 ,155.74 ,184.944 ,0.842 0 ,4192 ,127 ,4096 ,155.272 ,183.359 ,0.847 0 ,4224 ,127 ,4096 ,155.427 ,181.21 ,0.858 0 ,4256 ,127 ,4096 ,155.675 ,180.996 ,0.86 0 ,4288 ,127 ,4096 ,156.771 ,179.921 ,0.871 0 ,432 ,127 ,16 ,8.512 ,8.949 ,0.951 0 ,4320 ,127 ,4096 ,157.846 ,181.116 ,0.872 0 ,4352 ,127 ,4096 ,155.56 ,185.393 ,0.839 0 ,4384 ,127 ,4096 ,155.489 ,186.039 ,0.836 0 ,4416 ,127 ,4096 ,155.707 ,182.402 ,0.854 0 ,4448 ,127 ,4096 ,155.77 ,181.283 ,0.859 0 ,448 ,127 ,1024 ,40.651 ,36.497 ,1.114 0 ,448 ,127 ,128 ,15.182 ,19.331 ,0.785 0 ,448 ,127 ,256 ,24.505 ,24.898 ,0.984 0 ,448 ,127 ,32 ,7.933 ,8.788 ,0.903 0 ,448 ,127 ,512 ,40.662 ,37.111 ,1.096 0 ,448 ,127 ,64 ,11.556 ,16.163 ,0.715 0 ,4480 ,127 ,4096 ,156.429 ,184.441 ,0.848 0 ,4512 ,127 ,4096 ,155.53 ,180.857 ,0.86 0 ,4544 ,127 ,4096 ,156.2 ,183.916 ,0.849 0 ,4576 ,127 ,4096 ,155.654 ,180.911 ,0.86 0 ,4608 ,127 ,4096 ,155.66 ,185.312 ,0.84 0 ,464 ,127 ,16 ,8.127 ,9.619 ,0.845 0 ,4640 ,127 ,4096 ,155.667 ,179.762 ,0.866 0 ,4672 ,127 ,4096 ,155.61 ,186.585 ,0.834 0 ,4704 ,127 ,4096 ,155.664 ,189.499 ,0.821 0 ,4736 ,127 ,4096 ,155.896 ,187.151 ,0.833 0 ,4768 ,127 ,4096 ,155.663 ,185.39 ,0.84 0 ,48 ,127 ,16 ,8.181 ,8.943 ,0.915 0 ,480 ,127 ,1024 ,40.736 ,36.551 ,1.115 0 ,480 ,127 ,128 ,15.69 ,18.342 ,0.855 0 ,480 ,127 ,256 ,24.684 ,24.586 ,1.004 0 ,480 ,127 ,32 ,8.127 ,9.456 ,0.859 0 ,480 ,127 ,512 ,40.643 ,37.968 ,1.07 0 ,480 ,127 ,64 ,11.367 ,15.192 ,0.748 0 ,4800 ,127 ,4096 ,155.66 ,185.849 ,0.838 0 ,496 ,127 ,16 ,8.395 ,9.28 ,0.905 0 ,5 ,127 ,4 ,8.201 ,9.108 ,0.9 0 ,5 ,127 ,5 ,8.085 ,9.107 ,0.888 0 ,5 ,127 ,5000 ,8.128 ,7.622 ,1.066 0 ,5 ,127 ,6 ,8.156 ,9.28 ,0.879 0 ,5000 ,127 ,1 ,8.628 ,8.806 ,0.98 0 ,5000 ,127 ,1024 ,51.209 ,56.867 ,0.901 0 ,5000 ,127 ,128 ,17.026 ,18.619 ,0.914 0 ,5000 ,127 ,16 ,8.186 ,9.38 ,0.873 0 ,5000 ,127 ,2 ,8.136 ,9.123 ,0.892 0 ,5000 ,127 ,256 ,24.936 ,24.81 ,1.005 0 ,5000 ,127 ,3 ,8.277 ,9.624 ,0.86 0 ,5000 ,127 ,32 ,8.417 ,9.114 ,0.924 0 ,5000 ,127 ,4 ,7.665 ,8.788 ,0.872 0 ,5000 ,127 ,5 ,7.872 ,8.943 ,0.88 0 ,5000 ,127 ,512 ,31.663 ,37.085 ,0.854 0 ,5000 ,127 ,6 ,8.644 ,9.052 ,0.955 0 ,5000 ,127 ,64 ,11.542 ,15.94 ,0.724 0 ,5000 ,127 ,7 ,8.02 ,9.011 ,0.89 0 ,5000 ,127 ,8 ,8.026 ,8.952 ,0.897 0 ,512 ,127 ,1024 ,41.887 ,41.549 ,1.008 0 ,512 ,127 ,1056 ,41.851 ,41.465 ,1.009 0 ,512 ,127 ,1088 ,41.795 ,42.078 ,0.993 0 ,512 ,127 ,1120 ,41.903 ,41.43 ,1.011 0 ,512 ,127 ,1152 ,42.096 ,41.437 ,1.016 0 ,512 ,127 ,1184 ,41.949 ,41.367 ,1.014 0 ,512 ,127 ,1216 ,42.025 ,41.343 ,1.016 0 ,512 ,127 ,128 ,16.134 ,18.676 ,0.864 0 ,512 ,127 ,160 ,16.73 ,19.325 ,0.866 0 ,512 ,127 ,192 ,20.227 ,22.514 ,0.898 0 ,512 ,127 ,224 ,21.703 ,23.175 ,0.936 0 ,512 ,127 ,256 ,24.883 ,25.43 ,0.978 0 ,512 ,127 ,288 ,26.298 ,26.515 ,0.992 0 ,512 ,127 ,32 ,8.456 ,9.142 ,0.925 0 ,512 ,127 ,320 ,26.787 ,30.445 ,0.88 0 ,512 ,127 ,352 ,26.768 ,31.235 ,0.857 0 ,512 ,127 ,384 ,26.813 ,32.966 ,0.813 0 ,512 ,127 ,416 ,31.659 ,32.359 ,0.978 0 ,512 ,127 ,448 ,31.659 ,34.141 ,0.927 0 ,512 ,127 ,480 ,31.653 ,33.596 ,0.942 0 ,512 ,127 ,5000 ,41.891 ,41.417 ,1.011 0 ,512 ,127 ,512 ,31.538 ,36.786 ,0.857 0 ,512 ,127 ,544 ,41.989 ,37.363 ,1.124 0 ,512 ,127 ,576 ,42.276 ,37.994 ,1.113 0 ,512 ,127 ,608 ,42.033 ,37.045 ,1.135 0 ,512 ,127 ,64 ,11.594 ,15.701 ,0.738 0 ,512 ,127 ,640 ,41.864 ,37.692 ,1.111 0 ,512 ,127 ,672 ,41.934 ,41.474 ,1.011 0 ,512 ,127 ,704 ,41.944 ,41.419 ,1.013 0 ,512 ,127 ,736 ,41.991 ,41.586 ,1.01 0 ,512 ,127 ,768 ,41.921 ,41.356 ,1.014 0 ,512 ,127 ,800 ,41.983 ,41.394 ,1.014 0 ,512 ,127 ,832 ,42.518 ,41.454 ,1.026 0 ,512 ,127 ,864 ,41.914 ,41.342 ,1.014 0 ,512 ,127 ,896 ,41.8 ,41.642 ,1.004 0 ,512 ,127 ,928 ,42.012 ,41.354 ,1.016 0 ,512 ,127 ,96 ,12.48 ,16.392 ,0.761 0 ,512 ,127 ,960 ,41.87 ,43.373 ,0.965 0 ,512 ,127 ,992 ,41.867 ,41.742 ,1.003 0 ,528 ,127 ,16 ,8.391 ,9.293 ,0.903 0 ,544 ,127 ,1024 ,43.101 ,41.449 ,1.04 0 ,544 ,127 ,128 ,15.444 ,19.018 ,0.812 0 ,544 ,127 ,256 ,24.483 ,25.001 ,0.979 0 ,544 ,127 ,32 ,8.179 ,9.353 ,0.874 0 ,544 ,127 ,512 ,31.643 ,36.862 ,0.858 0 ,544 ,127 ,64 ,11.256 ,15.206 ,0.74 0 ,560 ,127 ,16 ,7.766 ,9.446 ,0.822 0 ,576 ,127 ,1024 ,45.631 ,41.479 ,1.1 0 ,576 ,127 ,128 ,15.526 ,19.48 ,0.797 0 ,576 ,127 ,256 ,24.474 ,24.807 ,0.987 0 ,576 ,127 ,32 ,8.244 ,9.45 ,0.872 0 ,576 ,127 ,512 ,31.66 ,37.825 ,0.837 0 ,576 ,127 ,64 ,11.602 ,15.611 ,0.743 0 ,592 ,127 ,16 ,7.991 ,9.556 ,0.836 0 ,6 ,127 ,5 ,8.498 ,9.134 ,0.93 0 ,6 ,127 ,5000 ,7.999 ,7.767 ,1.03 0 ,6 ,127 ,6 ,8.148 ,8.948 ,0.911 0 ,6 ,127 ,7 ,7.877 ,9.218 ,0.855 0 ,608 ,127 ,1024 ,45.647 ,41.482 ,1.1 0 ,608 ,127 ,128 ,15.588 ,19.387 ,0.804 0 ,608 ,127 ,256 ,24.653 ,24.723 ,0.997 0 ,608 ,127 ,32 ,8.028 ,8.953 ,0.897 0 ,608 ,127 ,512 ,31.66 ,37.302 ,0.849 0 ,608 ,127 ,64 ,11.819 ,14.897 ,0.793 0 ,624 ,127 ,16 ,8.175 ,9.101 ,0.898 0 ,64 ,127 ,128 ,14.215 ,18.247 ,0.779 0 ,64 ,127 ,160 ,14.242 ,18.062 ,0.788 0 ,64 ,127 ,192 ,14.176 ,14.246 ,0.995 0 ,64 ,127 ,224 ,14.199 ,14.057 ,1.01 0 ,64 ,127 ,256 ,14.202 ,13.852 ,1.025 0 ,64 ,127 ,288 ,14.208 ,14.229 ,0.999 0 ,64 ,127 ,32 ,8.243 ,9.068 ,0.909 0 ,64 ,127 ,320 ,14.18 ,14.165 ,1.001 0 ,64 ,127 ,352 ,14.164 ,14.056 ,1.008 0 ,64 ,127 ,384 ,14.185 ,13.535 ,1.048 0 ,64 ,127 ,416 ,14.203 ,14.318 ,0.992 0 ,64 ,127 ,448 ,14.183 ,13.366 ,1.061 0 ,64 ,127 ,480 ,14.178 ,13.852 ,1.024 0 ,64 ,127 ,5000 ,14.273 ,14.58 ,0.979 0 ,64 ,127 ,512 ,14.219 ,14.24 ,0.999 0 ,64 ,127 ,544 ,14.156 ,13.952 ,1.015 0 ,64 ,127 ,576 ,14.158 ,14.481 ,0.978 0 ,64 ,127 ,608 ,14.189 ,14.159 ,1.002 0 ,64 ,127 ,64 ,11.14 ,14.05 ,0.793 0 ,64 ,127 ,640 ,14.171 ,13.543 ,1.046 0 ,64 ,127 ,672 ,14.193 ,13.751 ,1.032 0 ,64 ,127 ,704 ,14.182 ,13.959 ,1.016 0 ,64 ,127 ,736 ,14.171 ,14.055 ,1.008 0 ,64 ,127 ,768 ,14.157 ,14.204 ,0.997 0 ,64 ,127 ,96 ,14.456 ,17.141 ,0.843 0 ,640 ,127 ,1024 ,47.142 ,46.073 ,1.023 0 ,640 ,127 ,128 ,15.872 ,18.998 ,0.835 0 ,640 ,127 ,256 ,24.671 ,24.487 ,1.008 0 ,640 ,127 ,32 ,8.396 ,9.055 ,0.927 0 ,640 ,127 ,512 ,31.646 ,37.804 ,0.837 0 ,640 ,127 ,64 ,11.552 ,14.921 ,0.774 0 ,656 ,127 ,16 ,8.022 ,9.28 ,0.864 0 ,672 ,127 ,1024 ,47.939 ,46.177 ,1.038 0 ,672 ,127 ,128 ,16.03 ,19.0 ,0.844 0 ,672 ,127 ,256 ,24.487 ,25.587 ,0.957 0 ,672 ,127 ,32 ,7.765 ,9.282 ,0.837 0 ,672 ,127 ,512 ,31.655 ,37.045 ,0.855 0 ,672 ,127 ,64 ,11.707 ,15.716 ,0.745 0 ,688 ,127 ,16 ,8.176 ,9.109 ,0.898 0 ,7 ,127 ,5000 ,7.908 ,7.778 ,1.017 0 ,7 ,127 ,6 ,8.239 ,9.113 ,0.904 0 ,7 ,127 ,7 ,8.091 ,8.943 ,0.905 0 ,7 ,127 ,8 ,8.495 ,9.113 ,0.932 0 ,704 ,127 ,1024 ,50.512 ,46.444 ,1.088 0 ,704 ,127 ,128 ,15.947 ,18.644 ,0.855 0 ,704 ,127 ,256 ,24.475 ,24.618 ,0.994 0 ,704 ,127 ,32 ,8.339 ,8.943 ,0.932 0 ,704 ,127 ,512 ,31.672 ,37.016 ,0.856 0 ,704 ,127 ,64 ,11.676 ,16.287 ,0.717 0 ,720 ,127 ,16 ,8.073 ,9.451 ,0.854 0 ,736 ,127 ,1024 ,50.557 ,46.873 ,1.079 0 ,736 ,127 ,128 ,15.519 ,19.137 ,0.811 0 ,736 ,127 ,256 ,24.493 ,24.042 ,1.019 0 ,736 ,127 ,32 ,7.963 ,9.314 ,0.855 0 ,736 ,127 ,512 ,31.674 ,37.365 ,0.848 0 ,736 ,127 ,64 ,11.588 ,15.9 ,0.729 0 ,7488 ,127 ,8192 ,328.179 ,308.263 ,1.065 0 ,7520 ,127 ,8192 ,329.61 ,306.088 ,1.077 0 ,7552 ,127 ,8192 ,337.338 ,308.477 ,1.094 0 ,7584 ,127 ,8192 ,331.688 ,309.124 ,1.073 0 ,7616 ,127 ,8192 ,336.799 ,308.588 ,1.091 0 ,7648 ,127 ,8192 ,335.838 ,309.37 ,1.086 0 ,768 ,127 ,1024 ,51.751 ,51.583 ,1.003 0 ,768 ,127 ,128 ,15.601 ,19.449 ,0.802 0 ,768 ,127 ,256 ,24.518 ,24.414 ,1.004 0 ,768 ,127 ,512 ,31.647 ,36.928 ,0.857 0 ,768 ,127 ,64 ,11.269 ,15.894 ,0.709 0 ,7680 ,127 ,8192 ,337.088 ,310.192 ,1.087 0 ,7712 ,127 ,8192 ,335.836 ,312.243 ,1.076 0 ,7744 ,127 ,8192 ,341.67 ,313.952 ,1.088 0 ,7776 ,127 ,8192 ,337.677 ,312.114 ,1.082 0 ,7808 ,127 ,8192 ,338.394 ,313.933 ,1.078 0 ,7840 ,127 ,8192 ,337.827 ,318.984 ,1.059 0 ,7872 ,127 ,8192 ,338.106 ,315.827 ,1.071 0 ,7904 ,127 ,8192 ,341.94 ,319.556 ,1.07 0 ,7936 ,127 ,8192 ,345.793 ,319.103 ,1.084 0 ,7968 ,127 ,8192 ,343.159 ,323.411 ,1.061 0 ,8 ,127 ,5000 ,8.327 ,7.9 ,1.054 0 ,80 ,127 ,16 ,7.876 ,8.949 ,0.88 0 ,800 ,127 ,1024 ,53.244 ,52.011 ,1.024 0 ,800 ,127 ,128 ,15.693 ,19.293 ,0.813 0 ,800 ,127 ,256 ,24.473 ,24.437 ,1.001 0 ,800 ,127 ,512 ,31.654 ,36.836 ,0.859 0 ,8000 ,127 ,8192 ,344.845 ,321.799 ,1.072 0 ,8032 ,127 ,8192 ,343.376 ,322.474 ,1.065 0 ,8064 ,127 ,8192 ,326.536 ,296.036 ,1.103 0 ,8096 ,127 ,8192 ,328.024 ,301.152 ,1.089 0 ,8128 ,127 ,8192 ,331.53 ,297.397 ,1.115 0 ,8160 ,127 ,8192 ,331.008 ,303.453 ,1.091 0 ,832 ,127 ,1024 ,57.15 ,51.405 ,1.112 0 ,832 ,127 ,128 ,15.531 ,19.35 ,0.803 0 ,832 ,127 ,256 ,24.545 ,24.501 ,1.002 0 ,832 ,127 ,512 ,31.643 ,38.15 ,0.829 0 ,864 ,127 ,1024 ,55.392 ,51.462 ,1.076 0 ,864 ,127 ,256 ,24.472 ,24.553 ,0.997 0 ,864 ,127 ,512 ,31.672 ,37.169 ,0.852 0 ,896 ,127 ,1024 ,56.578 ,52.206 ,1.084 0 ,896 ,127 ,256 ,24.485 ,24.586 ,0.996 0 ,896 ,127 ,512 ,31.659 ,37.055 ,0.854 0 ,928 ,127 ,1024 ,58.075 ,54.221 ,1.071 0 ,928 ,127 ,256 ,24.829 ,24.799 ,1.001 0 ,928 ,127 ,512 ,31.663 ,36.843 ,0.859 0 ,96 ,127 ,128 ,17.064 ,17.918 ,0.952 0 ,96 ,127 ,256 ,16.1 ,15.861 ,1.015 0 ,96 ,127 ,32 ,8.507 ,9.108 ,0.934 0 ,96 ,127 ,512 ,15.739 ,15.943 ,0.987 0 ,96 ,127 ,64 ,11.63 ,14.875 ,0.782 0 ,960 ,127 ,1024 ,60.301 ,56.801 ,1.062 0 ,960 ,127 ,256 ,24.872 ,25.147 ,0.989 0 ,960 ,127 ,512 ,31.651 ,36.958 ,0.856 0 ,992 ,127 ,1024 ,60.336 ,57.422 ,1.051 0 ,992 ,127 ,512 ,31.738 ,36.905 ,0.86 1 ,1 ,127 ,0 ,8.786 ,10.542 ,0.833 1 ,1 ,127 ,1 ,8.823 ,9.62 ,0.917 1 ,1 ,127 ,128 ,8.579 ,8.57 ,1.001 1 ,1 ,127 ,2 ,7.938 ,9.048 ,0.877 1 ,1 ,127 ,5000 ,8.662 ,7.751 ,1.118 1 ,1024 ,127 ,5000 ,61.941 ,61.077 ,1.014 1 ,128 ,127 ,1 ,8.993 ,8.961 ,1.004 1 ,128 ,127 ,5000 ,17.592 ,16.919 ,1.04 1 ,16 ,127 ,5000 ,8.352 ,7.627 ,1.095 1 ,2 ,127 ,1 ,8.51 ,8.819 ,0.965 1 ,256 ,127 ,5000 ,32.189 ,32.041 ,1.005 1 ,32 ,127 ,5000 ,12.297 ,12.844 ,0.957 1 ,4 ,127 ,5000 ,8.183 ,7.857 ,1.041 1 ,5000 ,127 ,1 ,8.134 ,9.275 ,0.877 1 ,5000 ,127 ,1024 ,55.756 ,57.004 ,0.978 1 ,5000 ,127 ,128 ,17.023 ,18.302 ,0.93 1 ,5000 ,127 ,16 ,7.98 ,9.495 ,0.84 1 ,5000 ,127 ,256 ,26.307 ,26.073 ,1.009 1 ,5000 ,127 ,32 ,8.057 ,9.253 ,0.871 1 ,5000 ,127 ,4 ,8.057 ,9.275 ,0.869 1 ,5000 ,127 ,512 ,36.553 ,38.267 ,0.955 1 ,5000 ,127 ,64 ,12.699 ,17.646 ,0.72 1 ,5000 ,127 ,8 ,8.276 ,9.449 ,0.876 1 ,512 ,127 ,5000 ,41.962 ,41.559 ,1.01 1 ,64 ,127 ,5000 ,14.202 ,14.256 ,0.996 1 ,8 ,127 ,5000 ,8.383 ,7.613 ,1.101 2 ,2 ,127 ,1 ,8.013 ,8.943 ,0.896 2 ,2 ,127 ,2 ,8.014 ,8.943 ,0.896 2 ,2 ,127 ,3 ,8.214 ,8.953 ,0.917 2 ,2 ,127 ,5000 ,8.133 ,7.479 ,1.087 2 ,5000 ,127 ,2 ,7.872 ,9.802 ,0.803 3 ,3 ,127 ,2 ,8.54 ,8.965 ,0.953 3 ,3 ,127 ,3 ,8.26 ,8.943 ,0.924 3 ,3 ,127 ,4 ,8.314 ,8.966 ,0.927 3 ,3 ,127 ,5000 ,8.127 ,7.177 ,1.132 3 ,5000 ,127 ,3 ,7.952 ,9.648 ,0.824 32 ,1 ,127 ,128 ,8.566 ,8.881 ,0.964 32 ,1 ,127 ,2 ,8.76 ,9.099 ,0.963 32 ,128 ,127 ,1 ,8.717 ,8.944 ,0.975 32 ,2 ,127 ,1 ,8.889 ,9.109 ,0.976 33 ,1 ,127 ,128 ,8.826 ,8.419 ,1.048 33 ,1 ,127 ,2 ,8.587 ,9.136 ,0.94 33 ,128 ,127 ,1 ,8.82 ,8.973 ,0.983 33 ,2 ,127 ,1 ,8.91 ,8.952 ,0.995 4 ,4 ,127 ,3 ,8.127 ,8.943 ,0.909 4 ,4 ,127 ,4 ,7.993 ,8.948 ,0.893 4 ,4 ,127 ,5 ,8.6 ,9.107 ,0.944 4 ,4 ,127 ,5000 ,8.232 ,7.626 ,1.079 4 ,5000 ,127 ,4 ,7.77 ,9.413 ,0.825 5 ,5 ,127 ,4 ,7.872 ,9.446 ,0.833 5 ,5 ,127 ,5 ,7.872 ,8.915 ,0.883 5 ,5 ,127 ,5000 ,7.98 ,7.329 ,1.089 5 ,5 ,127 ,6 ,8.178 ,9.446 ,0.866 5 ,5000 ,127 ,5 ,8.255 ,9.456 ,0.873 6 ,5000 ,127 ,6 ,8.068 ,9.62 ,0.839 6 ,6 ,127 ,5 ,7.77 ,8.943 ,0.869 6 ,6 ,127 ,5000 ,8.362 ,7.463 ,1.12 6 ,6 ,127 ,6 ,7.987 ,8.949 ,0.893 6 ,6 ,127 ,7 ,8.097 ,9.107 ,0.889 64 ,1024 ,127 ,1024 ,64.971 ,55.783 ,1.165 64 ,1024 ,127 ,1056 ,65.377 ,63.04 ,1.037 64 ,1024 ,127 ,1088 ,65.398 ,62.278 ,1.05 64 ,1024 ,127 ,1120 ,65.333 ,61.871 ,1.056 64 ,1024 ,127 ,1152 ,65.387 ,60.977 ,1.072 64 ,1024 ,127 ,1184 ,65.403 ,61.454 ,1.064 64 ,1024 ,127 ,1216 ,65.416 ,62.302 ,1.05 64 ,1024 ,127 ,1248 ,65.408 ,60.884 ,1.074 64 ,1024 ,127 ,1280 ,65.39 ,62.096 ,1.053 64 ,1024 ,127 ,1312 ,65.439 ,60.899 ,1.075 64 ,1024 ,127 ,1344 ,65.408 ,61.893 ,1.057 64 ,1024 ,127 ,1376 ,65.416 ,61.402 ,1.065 64 ,1024 ,127 ,1408 ,65.419 ,61.418 ,1.065 64 ,1024 ,127 ,1440 ,65.391 ,62.334 ,1.049 64 ,1024 ,127 ,1472 ,65.463 ,61.948 ,1.057 64 ,1024 ,127 ,1504 ,65.411 ,62.018 ,1.055 64 ,1024 ,127 ,1536 ,65.417 ,61.016 ,1.072 64 ,1024 ,127 ,1568 ,65.892 ,61.578 ,1.07 64 ,1024 ,127 ,1600 ,65.384 ,61.727 ,1.059 64 ,1024 ,127 ,1632 ,65.415 ,60.985 ,1.073 64 ,1024 ,127 ,1664 ,65.416 ,61.007 ,1.072 64 ,1024 ,127 ,1696 ,65.424 ,60.987 ,1.073 64 ,1024 ,127 ,1728 ,65.373 ,61.051 ,1.071 64 ,1024 ,127 ,320 ,26.766 ,33.089 ,0.809 64 ,1024 ,127 ,352 ,31.673 ,32.153 ,0.985 64 ,1024 ,127 ,384 ,31.643 ,33.68 ,0.94 64 ,1024 ,127 ,416 ,31.774 ,34.205 ,0.929 64 ,1024 ,127 ,448 ,31.646 ,36.928 ,0.857 64 ,1024 ,127 ,480 ,36.544 ,38.926 ,0.939 64 ,1024 ,127 ,512 ,36.515 ,38.739 ,0.943 64 ,1024 ,127 ,544 ,36.517 ,40.344 ,0.905 64 ,1024 ,127 ,576 ,36.509 ,42.023 ,0.869 64 ,1024 ,127 ,608 ,41.605 ,40.212 ,1.035 64 ,1024 ,127 ,640 ,41.74 ,44.206 ,0.944 64 ,1024 ,127 ,672 ,41.64 ,44.05 ,0.945 64 ,1024 ,127 ,704 ,41.663 ,46.577 ,0.894 64 ,1024 ,127 ,736 ,46.661 ,47.867 ,0.975 64 ,1024 ,127 ,768 ,46.684 ,48.378 ,0.965 64 ,1024 ,127 ,800 ,46.629 ,50.581 ,0.922 64 ,1024 ,127 ,832 ,46.701 ,52.198 ,0.895 64 ,1024 ,127 ,864 ,51.219 ,50.305 ,1.018 64 ,1024 ,127 ,896 ,51.27 ,52.707 ,0.973 64 ,1024 ,127 ,928 ,51.218 ,54.9 ,0.933 64 ,1024 ,127 ,960 ,53.183 ,55.933 ,0.951 64 ,1024 ,127 ,992 ,65.381 ,58.381 ,1.12 64 ,1056 ,127 ,1024 ,65.39 ,56.236 ,1.163 64 ,1056 ,127 ,512 ,36.552 ,38.571 ,0.948 64 ,1088 ,127 ,1024 ,55.746 ,58.405 ,0.954 64 ,1088 ,127 ,512 ,36.516 ,38.472 ,0.949 64 ,112 ,127 ,16 ,7.765 ,9.107 ,0.853 64 ,1120 ,127 ,1024 ,55.837 ,57.316 ,0.974 64 ,1120 ,127 ,512 ,36.537 ,38.32 ,0.953 64 ,1152 ,127 ,1024 ,55.772 ,57.132 ,0.976 64 ,1152 ,127 ,512 ,36.667 ,38.258 ,0.958 64 ,1184 ,127 ,1024 ,55.83 ,57.747 ,0.967 64 ,1184 ,127 ,512 ,36.546 ,38.311 ,0.954 64 ,1216 ,127 ,1024 ,55.75 ,57.945 ,0.962 64 ,1216 ,127 ,512 ,36.52 ,38.478 ,0.949 64 ,1248 ,127 ,1024 ,55.72 ,56.268 ,0.99 64 ,128 ,127 ,128 ,14.951 ,17.567 ,0.851 64 ,128 ,127 ,160 ,18.82 ,19.533 ,0.963 64 ,128 ,127 ,192 ,17.623 ,17.414 ,1.012 64 ,128 ,127 ,224 ,17.614 ,17.218 ,1.023 64 ,128 ,127 ,256 ,17.599 ,16.681 ,1.055 64 ,128 ,127 ,288 ,17.587 ,17.555 ,1.002 64 ,128 ,127 ,32 ,8.451 ,9.044 ,0.934 64 ,128 ,127 ,320 ,17.591 ,16.707 ,1.053 64 ,128 ,127 ,352 ,17.603 ,16.752 ,1.051 64 ,128 ,127 ,384 ,17.581 ,17.236 ,1.02 64 ,128 ,127 ,416 ,17.593 ,16.641 ,1.057 64 ,128 ,127 ,448 ,17.571 ,16.475 ,1.067 64 ,128 ,127 ,480 ,17.636 ,16.765 ,1.052 64 ,128 ,127 ,512 ,17.594 ,16.557 ,1.063 64 ,128 ,127 ,544 ,17.601 ,16.699 ,1.054 64 ,128 ,127 ,576 ,17.587 ,16.917 ,1.04 64 ,128 ,127 ,608 ,17.58 ,16.869 ,1.042 64 ,128 ,127 ,64 ,11.533 ,15.234 ,0.757 64 ,128 ,127 ,640 ,17.605 ,16.752 ,1.051 64 ,128 ,127 ,672 ,17.598 ,16.915 ,1.04 64 ,128 ,127 ,704 ,17.584 ,16.814 ,1.046 64 ,128 ,127 ,736 ,17.604 ,16.323 ,1.078 64 ,128 ,127 ,768 ,17.607 ,17.409 ,1.011 64 ,128 ,127 ,800 ,17.617 ,16.328 ,1.079 64 ,128 ,127 ,832 ,17.609 ,16.614 ,1.06 64 ,128 ,127 ,96 ,12.296 ,16.585 ,0.741 64 ,1280 ,127 ,1024 ,55.662 ,56.854 ,0.979 64 ,1312 ,127 ,1024 ,55.745 ,56.286 ,0.99 64 ,1344 ,127 ,1024 ,55.767 ,56.956 ,0.979 64 ,1344 ,127 ,2048 ,76.337 ,73.18 ,1.043 64 ,1376 ,127 ,1024 ,55.742 ,56.238 ,0.991 64 ,1376 ,127 ,2048 ,76.411 ,72.703 ,1.051 64 ,1408 ,127 ,1024 ,55.742 ,56.724 ,0.983 64 ,1408 ,127 ,2048 ,79.436 ,75.642 ,1.05 64 ,144 ,127 ,16 ,7.986 ,8.943 ,0.893 64 ,1440 ,127 ,1024 ,55.829 ,56.224 ,0.993 64 ,1440 ,127 ,2048 ,79.46 ,75.995 ,1.046 64 ,1472 ,127 ,1024 ,55.69 ,57.423 ,0.97 64 ,1472 ,127 ,2048 ,80.68 ,78.008 ,1.034 64 ,1504 ,127 ,1024 ,55.713 ,56.294 ,0.99 64 ,1504 ,127 ,2048 ,80.871 ,77.185 ,1.048 64 ,1536 ,127 ,1024 ,55.79 ,56.47 ,0.988 64 ,1536 ,127 ,2048 ,84.926 ,79.761 ,1.065 64 ,1568 ,127 ,1024 ,55.771 ,57.598 ,0.968 64 ,1568 ,127 ,2048 ,83.826 ,80.832 ,1.037 64 ,16 ,127 ,112 ,8.045 ,7.761 ,1.037 64 ,16 ,127 ,144 ,8.06 ,7.325 ,1.1 64 ,16 ,127 ,16 ,7.659 ,8.781 ,0.872 64 ,16 ,127 ,176 ,8.018 ,7.925 ,1.012 64 ,16 ,127 ,208 ,8.175 ,7.767 ,1.052 64 ,16 ,127 ,240 ,8.392 ,7.472 ,1.123 64 ,16 ,127 ,272 ,7.988 ,7.458 ,1.071 64 ,16 ,127 ,304 ,8.391 ,7.469 ,1.123 64 ,16 ,127 ,336 ,7.987 ,7.611 ,1.05 64 ,16 ,127 ,368 ,8.207 ,7.466 ,1.099 64 ,16 ,127 ,400 ,7.982 ,7.631 ,1.046 64 ,16 ,127 ,432 ,8.017 ,7.692 ,1.042 64 ,16 ,127 ,464 ,7.986 ,8.078 ,0.989 64 ,16 ,127 ,48 ,8.09 ,7.976 ,1.014 64 ,16 ,127 ,496 ,8.022 ,7.466 ,1.074 64 ,16 ,127 ,528 ,7.77 ,7.904 ,0.983 64 ,16 ,127 ,560 ,7.872 ,7.785 ,1.011 64 ,16 ,127 ,592 ,8.124 ,7.612 ,1.067 64 ,16 ,127 ,624 ,7.999 ,7.642 ,1.047 64 ,16 ,127 ,656 ,8.09 ,8.08 ,1.001 64 ,16 ,127 ,688 ,8.017 ,7.911 ,1.013 64 ,16 ,127 ,720 ,7.77 ,8.901 ,0.873 64 ,16 ,127 ,80 ,8.277 ,8.178 ,1.012 64 ,160 ,127 ,128 ,15.611 ,18.063 ,0.864 64 ,160 ,127 ,256 ,20.686 ,21.588 ,0.958 64 ,160 ,127 ,32 ,8.055 ,9.108 ,0.884 64 ,160 ,127 ,512 ,30.56 ,27.229 ,1.122 64 ,160 ,127 ,64 ,11.603 ,15.072 ,0.77 64 ,1600 ,127 ,1024 ,55.78 ,57.748 ,0.966 64 ,1600 ,127 ,2048 ,84.93 ,81.269 ,1.045 64 ,1632 ,127 ,1024 ,55.708 ,56.24 ,0.991 64 ,1632 ,127 ,2048 ,85.373 ,84.345 ,1.012 64 ,1664 ,127 ,1024 ,55.749 ,57.664 ,0.967 64 ,1664 ,127 ,2048 ,88.322 ,83.584 ,1.057 64 ,1696 ,127 ,1024 ,55.741 ,56.289 ,0.99 64 ,1696 ,127 ,2048 ,88.315 ,83.622 ,1.056 64 ,1728 ,127 ,1024 ,55.793 ,56.68 ,0.984 64 ,1728 ,127 ,2048 ,89.163 ,87.273 ,1.022 64 ,176 ,127 ,16 ,7.985 ,8.943 ,0.893 64 ,1760 ,127 ,2048 ,89.93 ,89.578 ,1.004 64 ,1792 ,127 ,2048 ,92.85 ,88.006 ,1.055 64 ,1824 ,127 ,2048 ,92.89 ,99.222 ,0.936 64 ,1856 ,127 ,2048 ,93.394 ,94.794 ,0.985 64 ,1888 ,127 ,2048 ,94.411 ,90.86 ,1.039 64 ,192 ,127 ,128 ,16.037 ,18.753 ,0.855 64 ,192 ,127 ,256 ,22.517 ,23.013 ,0.978 64 ,192 ,127 ,32 ,8.018 ,8.948 ,0.896 64 ,192 ,127 ,512 ,32.004 ,32.347 ,0.989 64 ,192 ,127 ,64 ,11.592 ,14.371 ,0.807 64 ,1920 ,127 ,2048 ,97.241 ,93.101 ,1.044 64 ,1952 ,127 ,2048 ,97.428 ,94.797 ,1.028 64 ,1984 ,127 ,2048 ,97.133 ,90.174 ,1.077 64 ,2016 ,127 ,2048 ,98.795 ,91.341 ,1.082 64 ,2048 ,127 ,1344 ,65.138 ,68.223 ,0.955 64 ,2048 ,127 ,1376 ,69.864 ,67.995 ,1.027 64 ,2048 ,127 ,1408 ,69.756 ,69.011 ,1.011 64 ,2048 ,127 ,1440 ,69.704 ,73.781 ,0.945 64 ,2048 ,127 ,1472 ,69.865 ,74.31 ,0.94 64 ,2048 ,127 ,1504 ,73.951 ,76.322 ,0.969 64 ,2048 ,127 ,1536 ,74.002 ,73.291 ,1.01 64 ,2048 ,127 ,1568 ,73.91 ,77.498 ,0.954 64 ,2048 ,127 ,1600 ,74.011 ,76.796 ,0.964 64 ,2048 ,127 ,1632 ,78.217 ,80.64 ,0.97 64 ,2048 ,127 ,1664 ,78.286 ,77.664 ,1.008 64 ,2048 ,127 ,1696 ,78.253 ,81.062 ,0.965 64 ,2048 ,127 ,1728 ,78.262 ,82.213 ,0.952 64 ,2048 ,127 ,1760 ,82.727 ,87.374 ,0.947 64 ,2048 ,127 ,1792 ,84.58 ,82.094 ,1.03 64 ,2048 ,127 ,1824 ,83.053 ,85.553 ,0.971 64 ,2048 ,127 ,1856 ,84.106 ,89.528 ,0.939 64 ,2048 ,127 ,1888 ,87.284 ,89.447 ,0.976 64 ,2048 ,127 ,1920 ,87.312 ,88.408 ,0.988 64 ,2048 ,127 ,1952 ,87.307 ,99.35 ,0.879 64 ,2048 ,127 ,1984 ,87.275 ,91.012 ,0.959 64 ,2048 ,127 ,2016 ,101.886 ,91.884 ,1.109 64 ,2048 ,127 ,2048 ,101.515 ,89.226 ,1.138 64 ,2048 ,127 ,2080 ,101.94 ,99.396 ,1.026 64 ,2048 ,127 ,2112 ,101.904 ,96.903 ,1.052 64 ,2048 ,127 ,2144 ,101.87 ,99.579 ,1.023 64 ,2048 ,127 ,2176 ,101.849 ,96.54 ,1.055 64 ,2048 ,127 ,2208 ,101.879 ,98.68 ,1.032 64 ,2048 ,127 ,2240 ,101.91 ,102.184 ,0.997 64 ,2048 ,127 ,2272 ,101.87 ,104.041 ,0.979 64 ,2048 ,127 ,2304 ,101.912 ,96.477 ,1.056 64 ,2048 ,127 ,2336 ,101.909 ,98.526 ,1.034 64 ,2048 ,127 ,2368 ,101.899 ,96.566 ,1.055 64 ,2048 ,127 ,2400 ,101.916 ,96.489 ,1.056 64 ,2048 ,127 ,2432 ,101.903 ,96.423 ,1.057 64 ,2048 ,127 ,2464 ,101.905 ,99.235 ,1.027 64 ,2048 ,127 ,2496 ,104.879 ,96.592 ,1.086 64 ,2048 ,127 ,2528 ,101.86 ,96.762 ,1.053 64 ,2048 ,127 ,2560 ,101.881 ,96.481 ,1.056 64 ,2048 ,127 ,2592 ,101.88 ,96.514 ,1.056 64 ,2048 ,127 ,2624 ,101.892 ,98.573 ,1.034 64 ,2048 ,127 ,2656 ,101.857 ,96.487 ,1.056 64 ,2048 ,127 ,2688 ,101.889 ,98.711 ,1.032 64 ,2048 ,127 ,2720 ,101.908 ,96.524 ,1.056 64 ,2048 ,127 ,2752 ,101.91 ,96.637 ,1.055 64 ,208 ,127 ,16 ,7.981 ,9.125 ,0.875 64 ,2080 ,127 ,2048 ,102.44 ,89.479 ,1.145 64 ,2112 ,127 ,2048 ,91.705 ,89.65 ,1.023 64 ,2144 ,127 ,2048 ,91.734 ,89.971 ,1.02 64 ,2176 ,127 ,2048 ,91.835 ,89.61 ,1.025 64 ,2208 ,127 ,2048 ,91.823 ,92.301 ,0.995 64 ,224 ,127 ,128 ,15.289 ,18.061 ,0.847 64 ,224 ,127 ,256 ,25.093 ,24.496 ,1.024 64 ,224 ,127 ,32 ,7.985 ,8.786 ,0.909 64 ,224 ,127 ,512 ,33.216 ,31.969 ,1.039 64 ,224 ,127 ,64 ,11.702 ,15.55 ,0.753 64 ,2240 ,127 ,2048 ,91.783 ,89.771 ,1.022 64 ,2272 ,127 ,2048 ,91.741 ,95.858 ,0.957 64 ,2304 ,127 ,2048 ,91.698 ,92.925 ,0.987 64 ,2336 ,127 ,2048 ,91.693 ,91.869 ,0.998 64 ,2368 ,127 ,2048 ,91.767 ,89.529 ,1.025 64 ,240 ,127 ,16 ,7.95 ,9.458 ,0.841 64 ,2400 ,127 ,2048 ,91.725 ,92.771 ,0.989 64 ,2432 ,127 ,2048 ,93.544 ,89.835 ,1.041 64 ,2464 ,127 ,2048 ,91.614 ,91.863 ,0.997 64 ,2496 ,127 ,2048 ,91.719 ,92.649 ,0.99 64 ,2528 ,127 ,2048 ,91.735 ,89.808 ,1.021 64 ,256 ,127 ,128 ,16.115 ,18.733 ,0.86 64 ,256 ,127 ,160 ,17.265 ,18.078 ,0.955 64 ,256 ,127 ,192 ,20.265 ,21.571 ,0.939 64 ,256 ,127 ,224 ,22.035 ,22.455 ,0.981 64 ,256 ,127 ,256 ,24.459 ,23.401 ,1.045 64 ,256 ,127 ,288 ,27.516 ,26.141 ,1.053 64 ,256 ,127 ,32 ,8.08 ,8.966 ,0.901 64 ,256 ,127 ,320 ,35.289 ,32.263 ,1.094 64 ,256 ,127 ,352 ,35.299 ,31.886 ,1.107 64 ,256 ,127 ,384 ,35.324 ,31.95 ,1.106 64 ,256 ,127 ,416 ,35.299 ,33.723 ,1.047 64 ,256 ,127 ,448 ,35.356 ,31.778 ,1.113 64 ,256 ,127 ,480 ,35.331 ,31.728 ,1.114 64 ,256 ,127 ,512 ,35.305 ,31.78 ,1.111 64 ,256 ,127 ,544 ,35.298 ,31.759 ,1.111 64 ,256 ,127 ,576 ,35.276 ,31.757 ,1.111 64 ,256 ,127 ,608 ,35.297 ,31.762 ,1.111 64 ,256 ,127 ,64 ,11.657 ,15.239 ,0.765 64 ,256 ,127 ,640 ,35.423 ,31.735 ,1.116 64 ,256 ,127 ,672 ,35.299 ,31.788 ,1.11 64 ,256 ,127 ,704 ,35.371 ,31.742 ,1.114 64 ,256 ,127 ,736 ,35.294 ,31.754 ,1.111 64 ,256 ,127 ,768 ,35.314 ,31.747 ,1.112 64 ,256 ,127 ,800 ,35.289 ,31.731 ,1.112 64 ,256 ,127 ,832 ,35.291 ,31.744 ,1.112 64 ,256 ,127 ,864 ,35.304 ,31.789 ,1.111 64 ,256 ,127 ,896 ,35.312 ,31.775 ,1.111 64 ,256 ,127 ,928 ,35.306 ,31.767 ,1.111 64 ,256 ,127 ,96 ,12.99 ,15.93 ,0.815 64 ,256 ,127 ,960 ,35.303 ,31.738 ,1.112 64 ,2560 ,127 ,2048 ,91.702 ,89.771 ,1.022 64 ,2592 ,127 ,2048 ,91.794 ,89.687 ,1.023 64 ,2624 ,127 ,2048 ,91.692 ,96.465 ,0.951 64 ,2656 ,127 ,2048 ,91.783 ,89.551 ,1.025 64 ,2688 ,127 ,2048 ,91.787 ,89.754 ,1.023 64 ,272 ,127 ,16 ,7.873 ,9.114 ,0.864 64 ,2720 ,127 ,2048 ,92.755 ,89.501 ,1.036 64 ,2752 ,127 ,2048 ,91.612 ,89.613 ,1.022 64 ,288 ,127 ,128 ,15.772 ,17.991 ,0.877 64 ,288 ,127 ,256 ,24.482 ,24.561 ,0.997 64 ,288 ,127 ,32 ,7.995 ,8.788 ,0.91 64 ,288 ,127 ,512 ,35.334 ,31.832 ,1.11 64 ,288 ,127 ,64 ,11.718 ,15.726 ,0.745 64 ,304 ,127 ,16 ,8.038 ,9.446 ,0.851 64 ,32 ,127 ,128 ,12.379 ,16.075 ,0.77 64 ,32 ,127 ,160 ,12.299 ,16.699 ,0.737 64 ,32 ,127 ,192 ,12.308 ,13.155 ,0.936 64 ,32 ,127 ,224 ,12.301 ,12.975 ,0.948 64 ,32 ,127 ,256 ,12.307 ,13.601 ,0.905 64 ,32 ,127 ,288 ,12.294 ,13.125 ,0.937 64 ,32 ,127 ,32 ,7.66 ,8.781 ,0.872 64 ,32 ,127 ,320 ,12.345 ,12.939 ,0.954 64 ,32 ,127 ,352 ,12.299 ,13.598 ,0.904 64 ,32 ,127 ,384 ,12.294 ,13.147 ,0.935 64 ,32 ,127 ,416 ,12.342 ,12.807 ,0.964 64 ,32 ,127 ,448 ,12.336 ,12.772 ,0.966 64 ,32 ,127 ,480 ,12.294 ,13.09 ,0.939 64 ,32 ,127 ,512 ,12.302 ,12.968 ,0.949 64 ,32 ,127 ,544 ,12.299 ,12.801 ,0.961 64 ,32 ,127 ,576 ,12.335 ,12.474 ,0.989 64 ,32 ,127 ,608 ,12.336 ,12.551 ,0.983 64 ,32 ,127 ,64 ,12.454 ,15.309 ,0.814 64 ,32 ,127 ,640 ,12.318 ,13.428 ,0.917 64 ,32 ,127 ,672 ,12.294 ,12.801 ,0.96 64 ,32 ,127 ,704 ,12.317 ,12.469 ,0.988 64 ,32 ,127 ,736 ,12.299 ,12.824 ,0.959 64 ,32 ,127 ,96 ,12.414 ,15.582 ,0.797 64 ,320 ,127 ,1024 ,37.12 ,36.922 ,1.005 64 ,320 ,127 ,128 ,15.531 ,18.152 ,0.856 64 ,320 ,127 ,256 ,24.657 ,24.923 ,0.989 64 ,320 ,127 ,32 ,8.09 ,9.282 ,0.872 64 ,320 ,127 ,512 ,37.521 ,36.809 ,1.019 64 ,320 ,127 ,64 ,11.702 ,15.282 ,0.766 64 ,336 ,127 ,16 ,7.872 ,9.208 ,0.855 64 ,3392 ,127 ,4096 ,142.48 ,192.837 ,0.739 64 ,3424 ,127 ,4096 ,145.42 ,192.324 ,0.756 64 ,3456 ,127 ,4096 ,149.279 ,197.89 ,0.754 64 ,3488 ,127 ,4096 ,149.085 ,194.594 ,0.766 64 ,352 ,127 ,1024 ,38.319 ,36.879 ,1.039 64 ,352 ,127 ,128 ,15.359 ,18.48 ,0.831 64 ,352 ,127 ,256 ,24.475 ,24.979 ,0.98 64 ,352 ,127 ,32 ,8.277 ,9.275 ,0.892 64 ,352 ,127 ,512 ,38.31 ,36.782 ,1.042 64 ,352 ,127 ,64 ,11.704 ,15.576 ,0.751 64 ,3520 ,127 ,4096 ,146.936 ,196.64 ,0.747 64 ,3552 ,127 ,4096 ,149.58 ,194.169 ,0.77 64 ,3584 ,127 ,4096 ,153.647 ,192.594 ,0.798 64 ,3616 ,127 ,4096 ,153.753 ,194.453 ,0.791 64 ,3648 ,127 ,4096 ,151.528 ,194.552 ,0.779 64 ,368 ,127 ,16 ,8.371 ,8.948 ,0.936 64 ,3680 ,127 ,4096 ,153.849 ,195.728 ,0.786 64 ,3712 ,127 ,4096 ,158.049 ,201.301 ,0.785 64 ,3744 ,127 ,4096 ,158.077 ,199.971 ,0.79 64 ,3776 ,127 ,4096 ,155.904 ,199.662 ,0.781 64 ,3808 ,127 ,4096 ,159.441 ,204.15 ,0.781 64 ,384 ,127 ,1024 ,40.642 ,36.491 ,1.114 64 ,384 ,127 ,128 ,15.605 ,19.163 ,0.814 64 ,384 ,127 ,256 ,24.459 ,24.134 ,1.013 64 ,384 ,127 ,32 ,8.239 ,9.035 ,0.912 64 ,384 ,127 ,512 ,40.663 ,36.5 ,1.114 64 ,384 ,127 ,64 ,11.714 ,15.514 ,0.755 64 ,3840 ,127 ,4096 ,162.766 ,205.416 ,0.792 64 ,3872 ,127 ,4096 ,162.281 ,204.965 ,0.792 64 ,3904 ,127 ,4096 ,162.984 ,204.368 ,0.798 64 ,3936 ,127 ,4096 ,166.82 ,205.068 ,0.813 64 ,3968 ,127 ,4096 ,166.561 ,205.982 ,0.809 64 ,400 ,127 ,16 ,8.277 ,9.275 ,0.892 64 ,4000 ,127 ,4096 ,166.61 ,205.727 ,0.81 64 ,4032 ,127 ,4096 ,166.001 ,182.025 ,0.912 64 ,4064 ,127 ,4096 ,170.568 ,183.146 ,0.931 64 ,4096 ,127 ,3392 ,134.25 ,167.121 ,0.803 64 ,4096 ,127 ,3424 ,138.383 ,170.362 ,0.812 64 ,4096 ,127 ,3456 ,138.382 ,169.139 ,0.818 64 ,4096 ,127 ,3488 ,138.307 ,175.368 ,0.789 64 ,4096 ,127 ,3520 ,138.249 ,167.96 ,0.823 64 ,4096 ,127 ,3552 ,142.7 ,170.201 ,0.838 64 ,4096 ,127 ,3584 ,142.6 ,171.287 ,0.833 64 ,4096 ,127 ,3616 ,142.872 ,177.928 ,0.803 64 ,4096 ,127 ,3648 ,142.755 ,168.606 ,0.847 64 ,4096 ,127 ,3680 ,146.907 ,172.935 ,0.849 64 ,4096 ,127 ,3712 ,146.919 ,170.171 ,0.863 64 ,4096 ,127 ,3744 ,149.022 ,176.907 ,0.842 64 ,4096 ,127 ,3776 ,146.889 ,179.14 ,0.82 64 ,4096 ,127 ,3808 ,151.458 ,175.67 ,0.862 64 ,4096 ,127 ,3840 ,152.743 ,177.074 ,0.863 64 ,4096 ,127 ,3872 ,151.354 ,179.163 ,0.845 64 ,4096 ,127 ,3904 ,151.249 ,176.688 ,0.856 64 ,4096 ,127 ,3936 ,164.341 ,187.46 ,0.877 64 ,4096 ,127 ,3968 ,155.67 ,180.712 ,0.861 64 ,4096 ,127 ,4000 ,155.521 ,186.318 ,0.835 64 ,4096 ,127 ,4032 ,158.276 ,184.134 ,0.86 64 ,4096 ,127 ,4064 ,170.873 ,182.524 ,0.936 64 ,4096 ,127 ,4096 ,170.591 ,176.172 ,0.968 64 ,4096 ,127 ,4128 ,170.902 ,182.988 ,0.934 64 ,4096 ,127 ,4160 ,176.928 ,180.486 ,0.98 64 ,4096 ,127 ,4192 ,206.3 ,209.978 ,0.982 64 ,4096 ,127 ,4224 ,195.332 ,209.087 ,0.934 64 ,4096 ,127 ,4256 ,180.675 ,212.911 ,0.849 64 ,4096 ,127 ,4288 ,175.117 ,223.157 ,0.785 64 ,4096 ,127 ,4320 ,170.977 ,209.594 ,0.816 64 ,4096 ,127 ,4352 ,170.872 ,214.023 ,0.798 64 ,4096 ,127 ,4384 ,171.021 ,210.292 ,0.813 64 ,4096 ,127 ,4416 ,171.041 ,208.592 ,0.82 64 ,4096 ,127 ,4448 ,170.921 ,213.242 ,0.802 64 ,4096 ,127 ,4480 ,175.356 ,209.464 ,0.837 64 ,4096 ,127 ,4512 ,170.966 ,207.261 ,0.825 64 ,4096 ,127 ,4544 ,174.869 ,209.292 ,0.836 64 ,4096 ,127 ,4576 ,170.922 ,207.908 ,0.822 64 ,4096 ,127 ,4608 ,175.563 ,211.322 ,0.831 64 ,4096 ,127 ,4640 ,170.879 ,217.164 ,0.787 64 ,4096 ,127 ,4672 ,184.643 ,209.815 ,0.88 64 ,4096 ,127 ,4704 ,171.124 ,215.689 ,0.793 64 ,4096 ,127 ,4736 ,180.595 ,217.091 ,0.832 64 ,4096 ,127 ,4768 ,170.862 ,212.905 ,0.803 64 ,4096 ,127 ,4800 ,171.022 ,210.846 ,0.811 64 ,4128 ,127 ,4096 ,171.589 ,185.468 ,0.925 64 ,416 ,127 ,1024 ,40.673 ,36.987 ,1.1 64 ,416 ,127 ,128 ,15.241 ,18.563 ,0.821 64 ,416 ,127 ,256 ,24.472 ,25.273 ,0.968 64 ,416 ,127 ,32 ,8.169 ,8.955 ,0.912 64 ,416 ,127 ,512 ,40.665 ,36.518 ,1.114 64 ,416 ,127 ,64 ,11.51 ,15.211 ,0.757 64 ,4160 ,127 ,4096 ,159.801 ,186.421 ,0.857 64 ,4192 ,127 ,4096 ,159.891 ,181.758 ,0.88 64 ,4224 ,127 ,4096 ,159.878 ,182.758 ,0.875 64 ,4256 ,127 ,4096 ,161.458 ,184.396 ,0.876 64 ,4288 ,127 ,4096 ,161.191 ,183.008 ,0.881 64 ,432 ,127 ,16 ,7.766 ,9.107 ,0.853 64 ,4320 ,127 ,4096 ,159.789 ,182.923 ,0.874 64 ,4352 ,127 ,4096 ,165.247 ,185.383 ,0.891 64 ,4384 ,127 ,4096 ,161.743 ,182.33 ,0.887 64 ,4416 ,127 ,4096 ,160.01 ,183.496 ,0.872 64 ,4448 ,127 ,4096 ,162.218 ,181.611 ,0.893 64 ,448 ,127 ,1024 ,42.065 ,41.352 ,1.017 64 ,448 ,127 ,128 ,15.186 ,18.339 ,0.828 64 ,448 ,127 ,256 ,24.512 ,24.805 ,0.988 64 ,448 ,127 ,32 ,8.277 ,8.787 ,0.942 64 ,448 ,127 ,512 ,42.013 ,37.53 ,1.119 64 ,448 ,127 ,64 ,11.596 ,14.871 ,0.78 64 ,4480 ,127 ,4096 ,163.127 ,183.933 ,0.887 64 ,4512 ,127 ,4096 ,164.937 ,186.469 ,0.885 64 ,4544 ,127 ,4096 ,161.351 ,181.491 ,0.889 64 ,4576 ,127 ,4096 ,162.731 ,177.275 ,0.918 64 ,4608 ,127 ,4096 ,164.463 ,181.843 ,0.904 64 ,464 ,127 ,16 ,8.047 ,9.629 ,0.836 64 ,4640 ,127 ,4096 ,160.0 ,181.165 ,0.883 64 ,4672 ,127 ,4096 ,161.484 ,185.762 ,0.869 64 ,4704 ,127 ,4096 ,159.8 ,183.296 ,0.872 64 ,4736 ,127 ,4096 ,161.754 ,188.718 ,0.857 64 ,4768 ,127 ,4096 ,161.781 ,183.893 ,0.88 64 ,48 ,127 ,16 ,8.071 ,8.96 ,0.901 64 ,480 ,127 ,1024 ,43.12 ,41.388 ,1.042 64 ,480 ,127 ,128 ,15.086 ,19.072 ,0.791 64 ,480 ,127 ,256 ,24.499 ,24.671 ,0.993 64 ,480 ,127 ,32 ,8.095 ,9.281 ,0.872 64 ,480 ,127 ,512 ,43.09 ,39.439 ,1.093 64 ,480 ,127 ,64 ,11.823 ,15.559 ,0.76 64 ,4800 ,127 ,4096 ,159.781 ,188.592 ,0.847 64 ,496 ,127 ,16 ,8.164 ,9.113 ,0.896 64 ,512 ,127 ,1024 ,45.669 ,41.583 ,1.098 64 ,512 ,127 ,1056 ,45.632 ,41.507 ,1.099 64 ,512 ,127 ,1088 ,45.69 ,41.573 ,1.099 64 ,512 ,127 ,1120 ,45.63 ,41.96 ,1.087 64 ,512 ,127 ,1152 ,45.679 ,41.513 ,1.1 64 ,512 ,127 ,1184 ,45.643 ,41.485 ,1.1 64 ,512 ,127 ,1216 ,45.634 ,41.481 ,1.1 64 ,512 ,127 ,128 ,16.158 ,19.405 ,0.833 64 ,512 ,127 ,160 ,16.667 ,18.292 ,0.911 64 ,512 ,127 ,192 ,19.931 ,22.015 ,0.905 64 ,512 ,127 ,224 ,21.976 ,23.494 ,0.935 64 ,512 ,127 ,256 ,24.613 ,24.359 ,1.01 64 ,512 ,127 ,288 ,26.287 ,25.949 ,1.013 64 ,512 ,127 ,32 ,8.278 ,9.032 ,0.916 64 ,512 ,127 ,320 ,26.777 ,33.628 ,0.796 64 ,512 ,127 ,352 ,31.718 ,32.567 ,0.974 64 ,512 ,127 ,384 ,31.644 ,33.668 ,0.94 64 ,512 ,127 ,416 ,31.653 ,34.469 ,0.918 64 ,512 ,127 ,448 ,31.661 ,37.227 ,0.85 64 ,512 ,127 ,480 ,45.612 ,37.516 ,1.216 64 ,512 ,127 ,512 ,45.445 ,38.135 ,1.192 64 ,512 ,127 ,544 ,45.647 ,41.604 ,1.097 64 ,512 ,127 ,576 ,45.612 ,42.634 ,1.07 64 ,512 ,127 ,608 ,47.646 ,41.531 ,1.147 64 ,512 ,127 ,64 ,11.82 ,14.893 ,0.794 64 ,512 ,127 ,640 ,45.647 ,41.497 ,1.1 64 ,512 ,127 ,672 ,45.657 ,41.698 ,1.095 64 ,512 ,127 ,704 ,45.635 ,41.689 ,1.095 64 ,512 ,127 ,736 ,45.631 ,41.568 ,1.098 64 ,512 ,127 ,768 ,45.623 ,41.785 ,1.092 64 ,512 ,127 ,800 ,45.639 ,41.499 ,1.1 64 ,512 ,127 ,832 ,45.747 ,41.524 ,1.102 64 ,512 ,127 ,864 ,45.628 ,41.5 ,1.099 64 ,512 ,127 ,896 ,45.754 ,41.626 ,1.099 64 ,512 ,127 ,928 ,45.66 ,42.102 ,1.085 64 ,512 ,127 ,96 ,12.466 ,16.513 ,0.755 64 ,512 ,127 ,960 ,46.467 ,41.564 ,1.118 64 ,512 ,127 ,992 ,45.639 ,41.484 ,1.1 64 ,528 ,127 ,16 ,8.146 ,9.17 ,0.888 64 ,544 ,127 ,1024 ,45.65 ,41.55 ,1.099 64 ,544 ,127 ,128 ,16.205 ,17.925 ,0.904 64 ,544 ,127 ,256 ,24.496 ,23.797 ,1.029 64 ,544 ,127 ,32 ,7.986 ,9.463 ,0.844 64 ,544 ,127 ,512 ,45.613 ,38.692 ,1.179 64 ,544 ,127 ,64 ,11.702 ,15.498 ,0.755 64 ,560 ,127 ,16 ,7.931 ,9.284 ,0.854 64 ,576 ,127 ,1024 ,47.078 ,46.296 ,1.017 64 ,576 ,127 ,128 ,15.685 ,19.175 ,0.818 64 ,576 ,127 ,256 ,24.469 ,24.79 ,0.987 64 ,576 ,127 ,32 ,7.98 ,9.112 ,0.876 64 ,576 ,127 ,512 ,36.553 ,39.743 ,0.92 64 ,576 ,127 ,64 ,11.704 ,15.692 ,0.746 64 ,592 ,127 ,16 ,7.987 ,9.275 ,0.861 64 ,608 ,127 ,1024 ,47.954 ,46.277 ,1.036 64 ,608 ,127 ,128 ,15.56 ,19.322 ,0.805 64 ,608 ,127 ,256 ,24.471 ,24.902 ,0.983 64 ,608 ,127 ,32 ,8.128 ,8.786 ,0.925 64 ,608 ,127 ,512 ,36.538 ,38.313 ,0.954 64 ,608 ,127 ,64 ,11.823 ,15.789 ,0.749 64 ,624 ,127 ,16 ,8.239 ,8.949 ,0.921 64 ,64 ,127 ,128 ,14.211 ,18.094 ,0.785 64 ,64 ,127 ,160 ,14.186 ,17.917 ,0.792 64 ,64 ,127 ,192 ,14.198 ,13.693 ,1.037 64 ,64 ,127 ,224 ,14.167 ,13.813 ,1.026 64 ,64 ,127 ,256 ,14.197 ,14.137 ,1.004 64 ,64 ,127 ,288 ,14.179 ,13.739 ,1.032 64 ,64 ,127 ,32 ,8.413 ,9.124 ,0.922 64 ,64 ,127 ,320 ,14.193 ,14.154 ,1.003 64 ,64 ,127 ,352 ,14.178 ,13.659 ,1.038 64 ,64 ,127 ,384 ,14.17 ,13.727 ,1.032 64 ,64 ,127 ,416 ,14.198 ,13.955 ,1.017 64 ,64 ,127 ,448 ,14.183 ,14.055 ,1.009 64 ,64 ,127 ,480 ,14.159 ,14.482 ,0.978 64 ,64 ,127 ,512 ,14.209 ,14.355 ,0.99 64 ,64 ,127 ,544 ,14.185 ,13.543 ,1.047 64 ,64 ,127 ,576 ,14.186 ,13.246 ,1.071 64 ,64 ,127 ,608 ,14.178 ,13.644 ,1.039 64 ,64 ,127 ,64 ,11.14 ,14.054 ,0.793 64 ,64 ,127 ,640 ,14.179 ,13.408 ,1.058 64 ,64 ,127 ,672 ,14.162 ,13.757 ,1.029 64 ,64 ,127 ,704 ,14.185 ,13.754 ,1.031 64 ,64 ,127 ,736 ,14.187 ,14.247 ,0.996 64 ,64 ,127 ,768 ,14.186 ,13.961 ,1.016 64 ,64 ,127 ,96 ,14.472 ,16.34 ,0.886 64 ,640 ,127 ,1024 ,50.528 ,46.462 ,1.088 64 ,640 ,127 ,128 ,15.77 ,19.27 ,0.818 64 ,640 ,127 ,256 ,24.464 ,23.926 ,1.022 64 ,640 ,127 ,32 ,8.127 ,9.275 ,0.876 64 ,640 ,127 ,512 ,36.539 ,38.523 ,0.949 64 ,640 ,127 ,64 ,11.48 ,15.885 ,0.723 64 ,656 ,127 ,16 ,8.09 ,9.604 ,0.842 64 ,672 ,127 ,1024 ,50.522 ,46.415 ,1.088 64 ,672 ,127 ,128 ,15.433 ,19.327 ,0.799 64 ,672 ,127 ,256 ,24.473 ,24.043 ,1.018 64 ,672 ,127 ,32 ,8.022 ,9.275 ,0.865 64 ,672 ,127 ,512 ,36.526 ,38.373 ,0.952 64 ,672 ,127 ,64 ,11.504 ,16.131 ,0.713 64 ,688 ,127 ,16 ,7.772 ,9.446 ,0.823 64 ,704 ,127 ,1024 ,51.762 ,51.756 ,1.0 64 ,704 ,127 ,128 ,14.994 ,19.119 ,0.784 64 ,704 ,127 ,256 ,24.468 ,24.094 ,1.016 64 ,704 ,127 ,32 ,7.879 ,9.06 ,0.87 64 ,704 ,127 ,512 ,36.534 ,38.902 ,0.939 64 ,704 ,127 ,64 ,11.593 ,15.055 ,0.77 64 ,720 ,127 ,16 ,8.176 ,9.409 ,0.869 64 ,736 ,127 ,1024 ,53.036 ,51.776 ,1.024 64 ,736 ,127 ,128 ,15.694 ,18.752 ,0.837 64 ,736 ,127 ,256 ,24.559 ,24.598 ,0.998 64 ,736 ,127 ,32 ,8.245 ,9.112 ,0.905 64 ,736 ,127 ,512 ,36.553 ,38.249 ,0.956 64 ,736 ,127 ,64 ,11.474 ,15.868 ,0.723 64 ,7488 ,127 ,8192 ,335.579 ,309.388 ,1.085 64 ,7520 ,127 ,8192 ,333.901 ,310.714 ,1.075 64 ,7552 ,127 ,8192 ,337.389 ,309.396 ,1.09 64 ,7584 ,127 ,8192 ,332.885 ,309.217 ,1.077 64 ,7616 ,127 ,8192 ,340.286 ,312.344 ,1.089 64 ,7648 ,127 ,8192 ,334.582 ,313.115 ,1.069 64 ,768 ,127 ,1024 ,55.575 ,52.117 ,1.066 64 ,768 ,127 ,128 ,16.385 ,19.115 ,0.857 64 ,768 ,127 ,256 ,24.476 ,24.578 ,0.996 64 ,768 ,127 ,512 ,36.545 ,38.263 ,0.955 64 ,768 ,127 ,64 ,11.371 ,16.618 ,0.684 64 ,7680 ,127 ,8192 ,339.98 ,319.364 ,1.065 64 ,7712 ,127 ,8192 ,340.797 ,315.618 ,1.08 64 ,7744 ,127 ,8192 ,337.328 ,318.327 ,1.06 64 ,7776 ,127 ,8192 ,339.25 ,318.181 ,1.066 64 ,7808 ,127 ,8192 ,342.429 ,315.16 ,1.087 64 ,7840 ,127 ,8192 ,343.402 ,315.784 ,1.087 64 ,7872 ,127 ,8192 ,345.654 ,322.79 ,1.071 64 ,7904 ,127 ,8192 ,343.039 ,325.493 ,1.054 64 ,7936 ,127 ,8192 ,346.126 ,321.615 ,1.076 64 ,7968 ,127 ,8192 ,345.973 ,324.619 ,1.066 64 ,80 ,127 ,16 ,8.127 ,9.107 ,0.892 64 ,800 ,127 ,1024 ,55.356 ,51.892 ,1.067 64 ,800 ,127 ,128 ,15.682 ,18.887 ,0.83 64 ,800 ,127 ,256 ,24.659 ,23.97 ,1.029 64 ,800 ,127 ,512 ,36.56 ,38.423 ,0.951 64 ,8000 ,127 ,8192 ,351.063 ,324.66 ,1.081 64 ,8032 ,127 ,8192 ,348.884 ,328.445 ,1.062 64 ,8064 ,127 ,8192 ,345.907 ,330.401 ,1.047 64 ,8096 ,127 ,8192 ,347.606 ,330.126 ,1.053 64 ,832 ,127 ,1024 ,58.47 ,57.029 ,1.025 64 ,832 ,127 ,128 ,15.717 ,18.825 ,0.835 64 ,832 ,127 ,256 ,25.07 ,24.481 ,1.024 64 ,832 ,127 ,512 ,36.517 ,38.58 ,0.947 64 ,864 ,127 ,1024 ,58.007 ,56.577 ,1.025 64 ,864 ,127 ,256 ,24.5 ,24.807 ,0.988 64 ,864 ,127 ,512 ,36.539 ,38.751 ,0.943 64 ,896 ,127 ,1024 ,60.306 ,57.951 ,1.041 64 ,896 ,127 ,256 ,24.476 ,24.648 ,0.993 64 ,896 ,127 ,512 ,36.547 ,38.437 ,0.951 64 ,928 ,127 ,1024 ,60.318 ,56.638 ,1.065 64 ,928 ,127 ,256 ,24.563 ,25.382 ,0.968 64 ,928 ,127 ,512 ,36.51 ,39.339 ,0.928 64 ,96 ,127 ,128 ,16.791 ,17.59 ,0.955 64 ,96 ,127 ,256 ,15.692 ,15.259 ,1.028 64 ,96 ,127 ,32 ,7.898 ,8.966 ,0.881 64 ,96 ,127 ,512 ,15.754 ,15.908 ,0.99 64 ,96 ,127 ,64 ,11.94 ,14.555 ,0.82 64 ,960 ,127 ,1024 ,62.045 ,57.457 ,1.08 64 ,960 ,127 ,256 ,24.646 ,24.87 ,0.991 64 ,960 ,127 ,512 ,36.512 ,38.429 ,0.95 64 ,992 ,127 ,1024 ,62.779 ,59.056 ,1.063 64 ,992 ,127 ,512 ,36.502 ,38.5 ,0.948 7 ,5000 ,127 ,7 ,7.766 ,9.279 ,0.837 7 ,7 ,127 ,5000 ,8.023 ,7.318 ,1.096 7 ,7 ,127 ,6 ,8.017 ,8.829 ,0.908 7 ,7 ,127 ,7 ,7.873 ,8.958 ,0.879 7 ,7 ,127 ,8 ,7.992 ,8.781 ,0.91 0.9468079980272118
diff --git a/sysdeps/x86_64/multiarch/strlen-evex.S b/sysdeps/x86_64/multiarch/strlen-evex.S index 2109ec2f7a..487846f098 100644 --- a/sysdeps/x86_64/multiarch/strlen-evex.S +++ b/sysdeps/x86_64/multiarch/strlen-evex.S @@ -26,466 +26,220 @@ # define STRLEN __strlen_evex # endif -# define VMOVA vmovdqa64 +# ifndef VEC_SIZE +# include "x86-evex256-vecs.h" +# endif # ifdef USE_AS_WCSLEN -# define VPCMP vpcmpd +# define VPCMPEQ vpcmpeqd +# define VPCMPNEQ vpcmpneqd +# define VPTESTN vptestnmd +# define VPTEST vptestmd # define VPMINU vpminud -# define SHIFT_REG ecx # define CHAR_SIZE 4 +# define CHAR_SIZE_SHIFT_REG(reg) sar $2, %reg # else -# define VPCMP vpcmpb +# define VPCMPEQ vpcmpeqb +# define VPCMPNEQ vpcmpneqb +# define VPTESTN vptestnmb +# define VPTEST vptestmb # define VPMINU vpminub -# define SHIFT_REG edx # define CHAR_SIZE 1 +# define CHAR_SIZE_SHIFT_REG(reg) + +# define REG_WIDTH VEC_SIZE # endif -# define XMMZERO xmm16 -# define YMMZERO ymm16 -# define YMM1 ymm17 -# define YMM2 ymm18 -# define YMM3 ymm19 -# define YMM4 ymm20 -# define YMM5 ymm21 -# define YMM6 ymm22 - -# define VEC_SIZE 32 -# define PAGE_SIZE 4096 -# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) - - .section .text.evex,"ax",@progbits -ENTRY (STRLEN) -# ifdef USE_AS_STRNLEN - /* Check zero length. */ - test %RSI_LP, %RSI_LP - jz L(zero) -# ifdef __ILP32__ - /* Clear the upper 32 bits. */ - movl %esi, %esi -# endif - mov %RSI_LP, %R8_LP +# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) + +# include "reg-macros.h" + +# if CHAR_PER_VEC == 64 + +# define TAIL_RETURN_LBL first_vec_x2 +# define TAIL_RETURN_OFFSET (CHAR_PER_VEC * 2) + +# define FALLTHROUGH_RETURN_LBL first_vec_x3 +# define FALLTHROUGH_RETURN_OFFSET (CHAR_PER_VEC * 3) + +# else + +# define TAIL_RETURN_LBL first_vec_x3 +# define TAIL_RETURN_OFFSET (CHAR_PER_VEC * 3) + +# define FALLTHROUGH_RETURN_LBL first_vec_x2 +# define FALLTHROUGH_RETURN_OFFSET (CHAR_PER_VEC * 2) # endif + +# define XZERO VMM_128(0) +# define VZERO VMM(0) +# define PAGE_SIZE 4096 + + .section SECTION(.text), "ax", @progbits +ENTRY_P2ALIGN (STRLEN, 6) movl %edi, %eax - vpxorq %XMMZERO, %XMMZERO, %XMMZERO - /* Clear high bits from edi. Only keeping bits relevant to page - cross check. */ + vpxorq %XZERO, %XZERO, %XZERO andl $(PAGE_SIZE - 1), %eax - /* Check if we may cross page boundary with one vector load. */ cmpl $(PAGE_SIZE - VEC_SIZE), %eax ja L(cross_page_boundary) /* Check the first VEC_SIZE bytes. Each bit in K0 represents a null byte. */ - VPCMP $0, (%rdi), %YMMZERO, %k0 - kmovd %k0, %eax -# ifdef USE_AS_STRNLEN - /* If length < CHAR_PER_VEC handle special. */ - cmpq $CHAR_PER_VEC, %rsi - jbe L(first_vec_x0) -# endif - testl %eax, %eax + VPCMPEQ (%rdi), %VZERO, %k0 + KMOV %k0, %VRAX + test %VRAX, %VRAX jz L(aligned_more) - tzcntl %eax, %eax - ret -# ifdef USE_AS_STRNLEN -L(zero): - xorl %eax, %eax - ret - - .p2align 4 -L(first_vec_x0): - /* Set bit for max len so that tzcnt will return min of max len - and position of first match. */ - btsq %rsi, %rax - tzcntl %eax, %eax - ret -# endif - - .p2align 4 -L(first_vec_x1): - tzcntl %eax, %eax - /* Safe to use 32 bit instructions as these are only called for - size = [1, 159]. */ -# ifdef USE_AS_STRNLEN - /* Use ecx which was computed earlier to compute correct value. - */ - leal -(CHAR_PER_VEC * 4 + 1)(%rcx, %rax), %eax -# else - subl %edx, %edi -# ifdef USE_AS_WCSLEN - /* NB: Divide bytes by 4 to get the wchar_t count. */ - sarl $2, %edi -# endif - leal CHAR_PER_VEC(%rdi, %rax), %eax -# endif - ret - - .p2align 4 -L(first_vec_x2): - tzcntl %eax, %eax - /* Safe to use 32 bit instructions as these are only called for - size = [1, 159]. */ -# ifdef USE_AS_STRNLEN - /* Use ecx which was computed earlier to compute correct value. - */ - leal -(CHAR_PER_VEC * 3 + 1)(%rcx, %rax), %eax -# else - subl %edx, %edi -# ifdef USE_AS_WCSLEN - /* NB: Divide bytes by 4 to get the wchar_t count. */ - sarl $2, %edi -# endif - leal (CHAR_PER_VEC * 2)(%rdi, %rax), %eax -# endif + bsf %VRAX, %VRAX ret - .p2align 4 -L(first_vec_x3): - tzcntl %eax, %eax - /* Safe to use 32 bit instructions as these are only called for - size = [1, 159]. */ -# ifdef USE_AS_STRNLEN - /* Use ecx which was computed earlier to compute correct value. - */ - leal -(CHAR_PER_VEC * 2 + 1)(%rcx, %rax), %eax -# else - subl %edx, %edi -# ifdef USE_AS_WCSLEN - /* NB: Divide bytes by 4 to get the wchar_t count. */ - sarl $2, %edi -# endif - leal (CHAR_PER_VEC * 3)(%rdi, %rax), %eax -# endif - ret - - .p2align 4 + .p2align 4,, 8 L(first_vec_x4): - tzcntl %eax, %eax - /* Safe to use 32 bit instructions as these are only called for - size = [1, 159]. */ -# ifdef USE_AS_STRNLEN - /* Use ecx which was computed earlier to compute correct value. - */ - leal -(CHAR_PER_VEC + 1)(%rcx, %rax), %eax -# else - subl %edx, %edi -# ifdef USE_AS_WCSLEN - /* NB: Divide bytes by 4 to get the wchar_t count. */ - sarl $2, %edi -# endif + bsf %VRAX, %VRAX + subl %ecx, %edi + CHAR_SIZE_SHIFT_REG (edi) leal (CHAR_PER_VEC * 4)(%rdi, %rax), %eax -# endif ret - .p2align 5 + + + /* Aligned more for strnlen compares remaining length vs 2 * + CHAR_PER_VEC, 4 * CHAR_PER_VEC, and 8 * CHAR_PER_VEC before + going to the loop. */ + .p2align 4,, 10 L(aligned_more): - movq %rdi, %rdx - /* Align data to VEC_SIZE. */ - andq $-(VEC_SIZE), %rdi + movq %rdi, %rcx + andq $(VEC_SIZE * -1), %rdi L(cross_page_continue): - /* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time - since data is only aligned to VEC_SIZE. */ -# ifdef USE_AS_STRNLEN - /* + CHAR_SIZE because it simplies the logic in - last_4x_vec_or_less. */ - leaq (VEC_SIZE * 5 + CHAR_SIZE)(%rdi), %rcx - subq %rdx, %rcx -# ifdef USE_AS_WCSLEN - /* NB: Divide bytes by 4 to get the wchar_t count. */ - sarl $2, %ecx -# endif -# endif - /* Load first VEC regardless. */ - VPCMP $0, VEC_SIZE(%rdi), %YMMZERO, %k0 -# ifdef USE_AS_STRNLEN - /* Adjust length. If near end handle specially. */ - subq %rcx, %rsi - jb L(last_4x_vec_or_less) -# endif - kmovd %k0, %eax - testl %eax, %eax + /* Remaining length >= 2 * CHAR_PER_VEC so do VEC0/VEC1 without + rechecking bounds. */ + VPCMPEQ (VEC_SIZE * 1)(%rdi), %VZERO, %k0 + KMOV %k0, %VRAX + test %VRAX, %VRAX jnz L(first_vec_x1) - VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMZERO, %k0 - kmovd %k0, %eax - test %eax, %eax + VPCMPEQ (VEC_SIZE * 2)(%rdi), %VZERO, %k0 + KMOV %k0, %VRAX + test %VRAX, %VRAX jnz L(first_vec_x2) - VPCMP $0, (VEC_SIZE * 3)(%rdi), %YMMZERO, %k0 - kmovd %k0, %eax - testl %eax, %eax + VPCMPEQ (VEC_SIZE * 3)(%rdi), %VZERO, %k0 + KMOV %k0, %VRAX + test %VRAX, %VRAX jnz L(first_vec_x3) - VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMZERO, %k0 - kmovd %k0, %eax - testl %eax, %eax + VPCMPEQ (VEC_SIZE * 4)(%rdi), %VZERO, %k0 + KMOV %k0, %VRAX + test %VRAX, %VRAX jnz L(first_vec_x4) - addq $VEC_SIZE, %rdi -# ifdef USE_AS_STRNLEN - /* Check if at last VEC_SIZE * 4 length. */ - cmpq $(CHAR_PER_VEC * 4 - 1), %rsi - jbe L(last_4x_vec_or_less_load) - movl %edi, %ecx - andl $(VEC_SIZE * 4 - 1), %ecx -# ifdef USE_AS_WCSLEN - /* NB: Divide bytes by 4 to get the wchar_t count. */ - sarl $2, %ecx -# endif - /* Readjust length. */ - addq %rcx, %rsi -# endif - /* Align data to VEC_SIZE * 4. */ + subq $(VEC_SIZE * -1), %rdi + +# if CHAR_PER_VEC == 64 + /* No partial register stalls on processors that we use evex512 + on and this saves code size. */ + xorb %dil, %dil +# else andq $-(VEC_SIZE * 4), %rdi +# endif + + /* Compare 4 * VEC at a time forward. */ .p2align 4 L(loop_4x_vec): - /* Load first VEC regardless. */ - VMOVA (VEC_SIZE * 4)(%rdi), %YMM1 -# ifdef USE_AS_STRNLEN - /* Break if at end of length. */ - subq $(CHAR_PER_VEC * 4), %rsi - jb L(last_4x_vec_or_less_cmpeq) -# endif - /* Save some code size by microfusing VPMINU with the load. Since - the matches in ymm2/ymm4 can only be returned if there where no - matches in ymm1/ymm3 respectively there is no issue with overlap. - */ - VPMINU (VEC_SIZE * 5)(%rdi), %YMM1, %YMM2 - VMOVA (VEC_SIZE * 6)(%rdi), %YMM3 - VPMINU (VEC_SIZE * 7)(%rdi), %YMM3, %YMM4 + VMOVA (VEC_SIZE * 4)(%rdi), %VMM(1) + VPMINU (VEC_SIZE * 5)(%rdi), %VMM(1), %VMM(2) + VMOVA (VEC_SIZE * 6)(%rdi), %VMM(3) + VPMINU (VEC_SIZE * 7)(%rdi), %VMM(3), %VMM(4) + VPTESTN %VMM(2), %VMM(2), %k0 + VPTESTN %VMM(4), %VMM(4), %k2 - VPCMP $0, %YMM2, %YMMZERO, %k0 - VPCMP $0, %YMM4, %YMMZERO, %k1 subq $-(VEC_SIZE * 4), %rdi - kortestd %k0, %k1 + KORTEST %k0, %k2 jz L(loop_4x_vec) - /* Check if end was in first half. */ - kmovd %k0, %eax - subq %rdx, %rdi -# ifdef USE_AS_WCSLEN - shrq $2, %rdi -# endif - testl %eax, %eax - jz L(second_vec_return) + VPTESTN %VMM(1), %VMM(1), %k1 + KMOV %k1, %VRAX + test %VRAX, %VRAX + jnz L(first_vec_x0) - VPCMP $0, %YMM1, %YMMZERO, %k2 - kmovd %k2, %edx - /* Combine VEC1 matches (edx) with VEC2 matches (eax). */ -# ifdef USE_AS_WCSLEN - sall $CHAR_PER_VEC, %eax - orl %edx, %eax - tzcntl %eax, %eax -# else - salq $CHAR_PER_VEC, %rax - orq %rdx, %rax - tzcntq %rax, %rax -# endif - addq %rdi, %rax - ret - - -# ifdef USE_AS_STRNLEN - -L(last_4x_vec_or_less_load): - /* Depending on entry adjust rdi / prepare first VEC in YMM1. */ - VMOVA (VEC_SIZE * 4)(%rdi), %YMM1 -L(last_4x_vec_or_less_cmpeq): - VPCMP $0, %YMM1, %YMMZERO, %k0 - addq $(VEC_SIZE * 3), %rdi -L(last_4x_vec_or_less): - kmovd %k0, %eax - /* If remaining length > VEC_SIZE * 2. This works if esi is off by - VEC_SIZE * 4. */ - testl $(CHAR_PER_VEC * 2), %esi - jnz L(last_4x_vec) - - /* length may have been negative or positive by an offset of - CHAR_PER_VEC * 4 depending on where this was called from. This - fixes that. */ - andl $(CHAR_PER_VEC * 4 - 1), %esi - testl %eax, %eax - jnz L(last_vec_x1_check) + KMOV %k0, %VRAX + test %VRAX, %VRAX + jnz L(first_vec_x1) - /* Check the end of data. */ - subl $CHAR_PER_VEC, %esi - jb L(max) + VPTESTN %VMM(3), %VMM(3), %k0 - VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMZERO, %k0 - kmovd %k0, %eax - tzcntl %eax, %eax - /* Check the end of data. */ - cmpl %eax, %esi - jb L(max) - - subq %rdx, %rdi -# ifdef USE_AS_WCSLEN - /* NB: Divide bytes by 4 to get the wchar_t count. */ - sarq $2, %rdi -# endif - leaq (CHAR_PER_VEC * 2)(%rdi, %rax), %rax - ret -L(max): - movq %r8, %rax - ret -# endif - - /* Placed here in strnlen so that the jcc L(last_4x_vec_or_less) - in the 4x VEC loop can use 2 byte encoding. */ - .p2align 4 -L(second_vec_return): - VPCMP $0, %YMM3, %YMMZERO, %k0 - /* Combine YMM3 matches (k0) with YMM4 matches (k1). */ -# ifdef USE_AS_WCSLEN - kunpckbw %k0, %k1, %k0 - kmovd %k0, %eax - tzcntl %eax, %eax +# if CHAR_PER_VEC == 64 + KMOV %k0, %VRAX + test %VRAX, %VRAX + jnz L(first_vec_x2) + KMOV %k2, %VRAX # else - kunpckdq %k0, %k1, %k0 - kmovq %k0, %rax - tzcntq %rax, %rax + /* We can only combine last 2x VEC masks if CHAR_PER_VEC <= 32. + */ + kmovd %k2, %edx + kmovd %k0, %eax + salq $CHAR_PER_VEC, %rdx + orq %rdx, %rax # endif - leaq (CHAR_PER_VEC * 2)(%rdi, %rax), %rax - ret - -# ifdef USE_AS_STRNLEN -L(last_vec_x1_check): - tzcntl %eax, %eax - /* Check the end of data. */ - cmpl %eax, %esi - jb L(max) - subq %rdx, %rdi -# ifdef USE_AS_WCSLEN - /* NB: Divide bytes by 4 to get the wchar_t count. */ - sarq $2, %rdi -# endif - leaq (CHAR_PER_VEC)(%rdi, %rax), %rax + /* first_vec_x3 for strlen-ZMM and first_vec_x2 for strlen-YMM. + */ + .p2align 4,, 2 +L(FALLTHROUGH_RETURN_LBL): + bsfq %rax, %rax + subq %rcx, %rdi + CHAR_SIZE_SHIFT_REG (rdi) + leaq (FALLTHROUGH_RETURN_OFFSET)(%rdi, %rax), %rax ret - .p2align 4 -L(last_4x_vec): - /* Test first 2x VEC normally. */ - testl %eax, %eax - jnz L(last_vec_x1) - - VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMZERO, %k0 - kmovd %k0, %eax - testl %eax, %eax - jnz L(last_vec_x2) - - /* Normalize length. */ - andl $(CHAR_PER_VEC * 4 - 1), %esi - VPCMP $0, (VEC_SIZE * 3)(%rdi), %YMMZERO, %k0 - kmovd %k0, %eax - testl %eax, %eax - jnz L(last_vec_x3) - - /* Check the end of data. */ - subl $(CHAR_PER_VEC * 3), %esi - jb L(max) - - VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMZERO, %k0 - kmovd %k0, %eax - tzcntl %eax, %eax - /* Check the end of data. */ - cmpl %eax, %esi - jb L(max_end) - - subq %rdx, %rdi -# ifdef USE_AS_WCSLEN - /* NB: Divide bytes by 4 to get the wchar_t count. */ - sarq $2, %rdi -# endif - leaq (CHAR_PER_VEC * 4)(%rdi, %rax), %rax + .p2align 4,, 8 +L(first_vec_x0): + bsf %VRAX, %VRAX + sub %rcx, %rdi + CHAR_SIZE_SHIFT_REG (rdi) + addq %rdi, %rax ret - .p2align 4 -L(last_vec_x1): - tzcntl %eax, %eax - subq %rdx, %rdi -# ifdef USE_AS_WCSLEN - /* NB: Divide bytes by 4 to get the wchar_t count. */ - sarq $2, %rdi -# endif + .p2align 4,, 10 +L(first_vec_x1): + bsf %VRAX, %VRAX + sub %rcx, %rdi + CHAR_SIZE_SHIFT_REG (rdi) leaq (CHAR_PER_VEC)(%rdi, %rax), %rax ret - .p2align 4 -L(last_vec_x2): - tzcntl %eax, %eax - subq %rdx, %rdi -# ifdef USE_AS_WCSLEN - /* NB: Divide bytes by 4 to get the wchar_t count. */ - sarq $2, %rdi -# endif - leaq (CHAR_PER_VEC * 2)(%rdi, %rax), %rax - ret - - .p2align 4 -L(last_vec_x3): - tzcntl %eax, %eax - subl $(CHAR_PER_VEC * 2), %esi - /* Check the end of data. */ - cmpl %eax, %esi - jb L(max_end) - subq %rdx, %rdi -# ifdef USE_AS_WCSLEN - /* NB: Divide bytes by 4 to get the wchar_t count. */ - sarq $2, %rdi -# endif - leaq (CHAR_PER_VEC * 3)(%rdi, %rax), %rax - ret -L(max_end): - movq %r8, %rax + .p2align 4,, 10 + /* first_vec_x2 for strlen-ZMM and first_vec_x3 for strlen-YMM. + */ +L(TAIL_RETURN_LBL): + bsf %VRAX, %VRAX + sub %VRCX, %VRDI + CHAR_SIZE_SHIFT_REG (VRDI) + lea (TAIL_RETURN_OFFSET)(%rdi, %rax), %VRAX ret -# endif - /* Cold case for crossing page with first load. */ - .p2align 4 + .p2align 4,, 8 L(cross_page_boundary): - movq %rdi, %rdx + movq %rdi, %rcx /* Align data to VEC_SIZE. */ andq $-VEC_SIZE, %rdi - VPCMP $0, (%rdi), %YMMZERO, %k0 - kmovd %k0, %eax - /* Remove the leading bytes. */ + + VPCMPEQ (%rdi), %VZERO, %k0 + + KMOV %k0, %VRAX # ifdef USE_AS_WCSLEN - /* NB: Divide shift count by 4 since each bit in K0 represent 4 - bytes. */ - movl %edx, %ecx - shrl $2, %ecx - andl $(CHAR_PER_VEC - 1), %ecx -# endif - /* SHIFT_REG is ecx for USE_AS_WCSLEN and edx otherwise. */ - sarxl %SHIFT_REG, %eax, %eax + movl %ecx, %edx + shrl $2, %edx + andl $(CHAR_PER_VEC - 1), %edx + shrx %edx, %eax, %eax testl %eax, %eax -# ifndef USE_AS_STRNLEN - jz L(cross_page_continue) - tzcntl %eax, %eax - ret # else - jnz L(cross_page_less_vec) -# ifndef USE_AS_WCSLEN - movl %edx, %ecx - andl $(CHAR_PER_VEC - 1), %ecx -# endif - movl $CHAR_PER_VEC, %eax - subl %ecx, %eax - /* Check the end of data. */ - cmpq %rax, %rsi - ja L(cross_page_continue) - movl %esi, %eax - ret -L(cross_page_less_vec): - tzcntl %eax, %eax - /* Select min of length and position of first null. */ - cmpq %rax, %rsi - cmovb %esi, %eax - ret + shr %cl, %VRAX # endif + jz L(cross_page_continue) + bsf %VRAX, %VRAX + ret END (STRLEN) #endif diff --git a/sysdeps/x86_64/multiarch/strnlen-evex.S b/sysdeps/x86_64/multiarch/strnlen-evex.S index 64a9fc2606..443a32a749 100644 --- a/sysdeps/x86_64/multiarch/strnlen-evex.S +++ b/sysdeps/x86_64/multiarch/strnlen-evex.S @@ -1,8 +1,423 @@ -#ifndef STRNLEN -# define STRNLEN __strnlen_evex -#endif +/* strnlen/wcsnlen optimized with 256-bit EVEX instructions. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <isa-level.h> +#include <sysdep.h> + +#if ISA_SHOULD_BUILD (4) + +# ifndef VEC_SIZE +# include "x86-evex256-vecs.h" +# endif + + +# ifndef STRNLEN +# define STRNLEN __strnlen_evex +# endif + +# ifdef USE_AS_WCSLEN +# define VPCMPEQ vpcmpeqd +# define VPCMPNEQ vpcmpneqd +# define VPTESTN vptestnmd +# define VPTEST vptestmd +# define VPMINU vpminud +# define CHAR_SIZE 4 + +# else +# define VPCMPEQ vpcmpeqb +# define VPCMPNEQ vpcmpneqb +# define VPTESTN vptestnmb +# define VPTEST vptestmb +# define VPMINU vpminub +# define CHAR_SIZE 1 + +# define REG_WIDTH VEC_SIZE +# endif + +# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) + +# include "reg-macros.h" + +# if CHAR_PER_VEC == 32 +# define SUB_SHORT(imm, reg) subb $(imm), %VGPR_SZ(reg, 8) +# else +# define SUB_SHORT(imm, reg) subl $(imm), %VGPR_SZ(reg, 32) +# endif + + + +# if CHAR_PER_VEC == 64 +# define FALLTHROUGH_RETURN_OFFSET (CHAR_PER_VEC * 3) +# else +# define FALLTHROUGH_RETURN_OFFSET (CHAR_PER_VEC * 2) +# endif + + +# define XZERO VMM_128(0) +# define VZERO VMM(0) +# define PAGE_SIZE 4096 + + .section SECTION(.text), "ax", @progbits +ENTRY_P2ALIGN (STRNLEN, 6) + /* Check zero length. */ + test %RSI_LP, %RSI_LP + jz L(zero) +# ifdef __ILP32__ + /* Clear the upper 32 bits. */ + movl %esi, %esi +# endif + + movl %edi, %eax + vpxorq %XZERO, %XZERO, %XZERO + andl $(PAGE_SIZE - 1), %eax + cmpl $(PAGE_SIZE - VEC_SIZE), %eax + ja L(cross_page_boundary) + + /* Check the first VEC_SIZE bytes. Each bit in K0 represents a + null byte. */ + VPCMPEQ (%rdi), %VZERO, %k0 + + KMOV %k0, %VRCX + movq %rsi, %rax + + /* If src (rcx) is zero, bsf does not change the result. NB: + Must use 64-bit bsf here so that upper bits of len are not + cleared. */ + bsfq %rcx, %rax + /* If rax > CHAR_PER_VEC then rcx must have been zero (no null + CHAR) and rsi must be > CHAR_PER_VEC. */ + cmpq $CHAR_PER_VEC, %rax + ja L(more_1x_vec) + /* Check if first match in bounds. */ + cmpq %rax, %rsi + cmovb %esi, %eax + ret + + +# if CHAR_PER_VEC != 32 + .p2align 4,, 2 +L(zero): +L(max_0): + movl %esi, %eax + ret +# endif + + /* Aligned more for strnlen compares remaining length vs 2 * + CHAR_PER_VEC, 4 * CHAR_PER_VEC, and 8 * CHAR_PER_VEC before + going to the loop. */ + .p2align 4,, 10 +L(more_1x_vec): +L(cross_page_continue): + /* Compute number of words checked after aligning. */ +# ifdef USE_AS_WCSLEN + /* Need to compute directly for wcslen as CHAR_SIZE * rsi can + overflow. */ + movq %rdi, %rax + andq $(VEC_SIZE * -1), %rdi + subq %rdi, %rax + sarq $2, %rax + leaq -(CHAR_PER_VEC * 1)(%rax, %rsi), %rax +# else + leaq (VEC_SIZE * -1)(%rsi, %rdi), %rax + andq $(VEC_SIZE * -1), %rdi + subq %rdi, %rax +# endif + + + VPCMPEQ VEC_SIZE(%rdi), %VZERO, %k0 + + cmpq $(CHAR_PER_VEC * 2), %rax + ja L(more_2x_vec) + +L(last_2x_vec_or_less): + KMOV %k0, %VRDX + test %VRDX, %VRDX + jnz L(last_vec_check) + + /* Check the end of data. */ + SUB_SHORT (CHAR_PER_VEC, rax) + jbe L(max_0) + VPCMPEQ (VEC_SIZE * 2)(%rdi), %VZERO, %k0 + KMOV %k0, %VRDX + test %VRDX, %VRDX + jz L(max_0) + /* Best place for LAST_VEC_CHECK if ZMM. */ + .p2align 4,, 8 +L(last_vec_check): + bsf %VRDX, %VRDX + sub %eax, %edx + lea (%rsi, %rdx), %eax + cmovae %esi, %eax + ret + +# if CHAR_PER_VEC == 32 + .p2align 4,, 2 +L(zero): +L(max_0): + movl %esi, %eax + ret +# endif + + .p2align 4,, 8 +L(last_4x_vec_or_less): + addl $(CHAR_PER_VEC * -4), %eax + VPCMPEQ (VEC_SIZE * 5)(%rdi), %VZERO, %k0 + subq $(VEC_SIZE * -4), %rdi + cmpl $(CHAR_PER_VEC * 2), %eax + jbe L(last_2x_vec_or_less) + + .p2align 4,, 6 +L(more_2x_vec): + /* Remaining length >= 2 * CHAR_PER_VEC so do VEC0/VEC1 without + rechecking bounds. */ -#define USE_AS_STRNLEN 1 -#define STRLEN STRNLEN + KMOV %k0, %VRDX -#include "strlen-evex.S" + test %VRDX, %VRDX + jnz L(first_vec_x1) + + VPCMPEQ (VEC_SIZE * 2)(%rdi), %VZERO, %k0 + KMOV %k0, %VRDX + test %VRDX, %VRDX + jnz L(first_vec_x2) + + cmpq $(CHAR_PER_VEC * 4), %rax + ja L(more_4x_vec) + + + VPCMPEQ (VEC_SIZE * 3)(%rdi), %VZERO, %k0 + KMOV %k0, %VRDX + addl $(CHAR_PER_VEC * -2), %eax + test %VRDX, %VRDX + jnz L(last_vec_check) + + subl $(CHAR_PER_VEC), %eax + jbe L(max_1) + + VPCMPEQ (VEC_SIZE * 4)(%rdi), %VZERO, %k0 + KMOV %k0, %VRDX + + test %VRDX, %VRDX + jnz L(last_vec_check) +L(max_1): + movl %esi, %eax + ret + + .p2align 4,, 3 +L(first_vec_x2): +# if VEC_SIZE == 64 + /* If VEC_SIZE == 64 we can fit logic for full return label in + spare bytes before next cache line. */ + bsf %VRDX, %VRDX + sub %eax, %esi + leal (CHAR_PER_VEC * 1)(%rsi, %rdx), %eax + ret + .p2align 4,, 6 +# else + addl $CHAR_PER_VEC, %esi +# endif +L(first_vec_x1): + bsf %VRDX, %VRDX + sub %eax, %esi + leal (CHAR_PER_VEC * 0)(%rsi, %rdx), %eax + ret + + + .p2align 4,, 6 +L(first_vec_x4): +# if VEC_SIZE == 64 + /* If VEC_SIZE == 64 we can fit logic for full return label in + spare bytes before next cache line. */ + bsf %VRDX, %VRDX + sub %eax, %esi + leal (CHAR_PER_VEC * 3)(%rsi, %rdx), %eax + ret + .p2align 4,, 6 +# else + addl $CHAR_PER_VEC, %esi +# endif +L(first_vec_x3): + bsf %VRDX, %VRDX + sub %eax, %esi + leal (CHAR_PER_VEC * 2)(%rsi, %rdx), %eax + ret + + .p2align 4,, 5 +L(more_4x_vec): + VPCMPEQ (VEC_SIZE * 3)(%rdi), %VZERO, %k0 + KMOV %k0, %VRDX + test %VRDX, %VRDX + jnz L(first_vec_x3) + + VPCMPEQ (VEC_SIZE * 4)(%rdi), %VZERO, %k0 + KMOV %k0, %VRDX + test %VRDX, %VRDX + jnz L(first_vec_x4) + + /* Check if at last VEC_SIZE * 4 length before aligning for the + loop. */ + cmpq $(CHAR_PER_VEC * 8), %rax + jbe L(last_4x_vec_or_less) + + + /* Compute number of words checked after aligning. */ +# ifdef USE_AS_WCSLEN + /* Need to compute directly for wcslen as CHAR_SIZE * rsi can + overflow. */ + leaq (VEC_SIZE * -3)(%rdi), %rdx +# else + leaq (VEC_SIZE * -3)(%rdi, %rax), %rax +# endif + + subq $(VEC_SIZE * -1), %rdi + + /* Align data to VEC_SIZE * 4. */ +# if VEC_SIZE == 64 + /* Saves code size. No evex512 processor has partial register + stalls. If that change this can be replaced with `andq + $-(VEC_SIZE * 4), %rdi`. */ + xorb %dil, %dil +# else + andq $-(VEC_SIZE * 4), %rdi +# endif + +# ifdef USE_AS_WCSLEN + subq %rdi, %rdx + sarq $2, %rdx + addq %rdx, %rax +# else + subq %rdi, %rax +# endif + /* Compare 4 * VEC at a time forward. */ + .p2align 4,, 11 +L(loop_4x_vec): + VMOVA (VEC_SIZE * 4)(%rdi), %VMM(1) + VPMINU (VEC_SIZE * 5)(%rdi), %VMM(1), %VMM(2) + VMOVA (VEC_SIZE * 6)(%rdi), %VMM(3) + VPMINU (VEC_SIZE * 7)(%rdi), %VMM(3), %VMM(4) + VPTESTN %VMM(2), %VMM(2), %k0 + VPTESTN %VMM(4), %VMM(4), %k2 + subq $-(VEC_SIZE * 4), %rdi + /* Break if at end of length. */ + subq $(CHAR_PER_VEC * 4), %rax + jbe L(loop_len_end) + + + KORTEST %k0, %k2 + jz L(loop_4x_vec) + + +L(loop_last_4x_vec): + movq %rsi, %rcx + subq %rax, %rsi + VPTESTN %VMM(1), %VMM(1), %k1 + KMOV %k1, %VRDX + test %VRDX, %VRDX + jnz L(last_vec_x0) + + KMOV %k0, %VRDX + test %VRDX, %VRDX + jnz L(last_vec_x1) + + VPTESTN %VMM(3), %VMM(3), %k0 + + /* Seperate logic for VEC_SIZE == 64 and VEC_SIZE == 32 for + returning last 2x VEC. For VEC_SIZE == 64 we test each VEC + individually, for VEC_SIZE == 32 we combine them in a single + 64-bit GPR. */ +# if CHAR_PER_VEC == 64 + KMOV %k0, %VRDX + test %VRDX, %VRDX + jnz L(last_vec_x2) + KMOV %k2, %VRDX +# else + /* We can only combine last 2x VEC masks if CHAR_PER_VEC <= 32. + */ + kmovd %k2, %edx + kmovd %k0, %eax + salq $CHAR_PER_VEC, %rdx + orq %rax, %rdx +# endif + + /* first_vec_x3 for strlen-ZMM and first_vec_x2 for strlen-YMM. + */ + bsfq %rdx, %rdx + leaq (FALLTHROUGH_RETURN_OFFSET - CHAR_PER_VEC * 4)(%rsi, %rdx), %rax + cmpq %rax, %rcx + cmovb %rcx, %rax + ret + + /* Handle last 4x VEC after loop. All VECs have been loaded. */ + .p2align 4,, 4 +L(loop_len_end): + KORTEST %k0, %k2 + jnz L(loop_last_4x_vec) + movq %rsi, %rax + ret + + +# if CHAR_PER_VEC == 64 + /* Since we can't combine the last 2x VEC for VEC_SIZE == 64 + need return label for it. */ + .p2align 4,, 8 +L(last_vec_x2): + bsf %VRDX, %VRDX + leaq (CHAR_PER_VEC * -2)(%rsi, %rdx), %rax + cmpq %rax, %rcx + cmovb %rcx, %rax + ret +# endif + + + .p2align 4,, 10 +L(last_vec_x1): + addq $CHAR_PER_VEC, %rsi +L(last_vec_x0): + bsf %VRDX, %VRDX + leaq (CHAR_PER_VEC * -4)(%rsi, %rdx), %rax + cmpq %rax, %rcx + cmovb %rcx, %rax + ret + + + .p2align 4,, 8 +L(cross_page_boundary): + /* Align data to VEC_SIZE. */ + movq %rdi, %rcx + andq $-VEC_SIZE, %rcx + VPCMPEQ (%rcx), %VZERO, %k0 + + KMOV %k0, %VRCX +# ifdef USE_AS_WCSLEN + shrl $2, %eax + andl $(CHAR_PER_VEC - 1), %eax +# endif + shrx %VRAX, %VRCX, %VRCX + + negl %eax + andl $(CHAR_PER_VEC - 1), %eax + movq %rsi, %rdx + bsf %VRCX, %VRDX + cmpq %rax, %rdx + ja L(cross_page_continue) + movl %edx, %eax + cmpq %rdx, %rsi + cmovb %esi, %eax + ret +END (STRNLEN) +#endif diff --git a/sysdeps/x86_64/multiarch/wcsnlen-evex.S b/sysdeps/x86_64/multiarch/wcsnlen-evex.S index e2aad94c1e..57a7e93fbf 100644 --- a/sysdeps/x86_64/multiarch/wcsnlen-evex.S +++ b/sysdeps/x86_64/multiarch/wcsnlen-evex.S @@ -2,8 +2,7 @@ # define WCSNLEN __wcsnlen_evex #endif -#define STRLEN WCSNLEN +#define STRNLEN WCSNLEN #define USE_AS_WCSLEN 1 -#define USE_AS_STRNLEN 1 -#include "strlen-evex.S" +#include "strnlen-evex.S"