Message ID | 20220711220730.1968923-1-goldstein.w.n@gmail.com |
---|---|
State | New |
Headers | show |
Series | [v1] x86: Use regular casting instead of _cvtmask64_u64 in strstr-avx512 | expand |
On Mon, Jul 11, 2022 at 3:07 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > _cvtmask64_u64 is not available before GCC7. > --- > sysdeps/x86_64/multiarch/strstr-avx512.c | 12 +++++++----- > 1 file changed, 7 insertions(+), 5 deletions(-) > > diff --git a/sysdeps/x86_64/multiarch/strstr-avx512.c b/sysdeps/x86_64/multiarch/strstr-avx512.c > index 2ab9e96db8..e41b44abe1 100644 > --- a/sysdeps/x86_64/multiarch/strstr-avx512.c > +++ b/sysdeps/x86_64/multiarch/strstr-avx512.c > @@ -26,6 +26,8 @@ > #define ZMM_SIZE_IN_BYTES 64 > #define PAGESIZE 4096 > > +#define cvtmask64_u64(...) (uint64_t) (__VA_ARGS__) > + > /* > Returns the index of the first edge within the needle, returns 0 if no edge > is found. Example: 'ab' is the first edge in 'aaaaaaaaaabaarddg' > @@ -133,15 +135,15 @@ __strstr_avx512 (const char *haystack, const char *ned) > __m512i hay0 = _mm512_maskz_loadu_epi8 (loadmask, haystack + hay_index); > /* Search for NULL and compare only till null char */ > uint64_t nullmask > - = _cvtmask64_u64 (_mm512_mask_testn_epi8_mask (loadmask, hay0, hay0)); > + = cvtmask64_u64 (_mm512_mask_testn_epi8_mask (loadmask, hay0, hay0)); > uint64_t cmpmask = nullmask ^ (nullmask - ONE_64BIT); > - cmpmask = cmpmask & _cvtmask64_u64 (loadmask); > + cmpmask = cmpmask & cvtmask64_u64 (loadmask); > /* Search for the 2 charaters of needle */ > __mmask64 k0 = _mm512_cmpeq_epi8_mask (hay0, ned0); > __mmask64 k1 = _mm512_cmpeq_epi8_mask (hay0, ned1); > k1 = _kshiftri_mask64 (k1, 1); > /* k2 masks tell us if both chars from needle match */ > - uint64_t k2 = _cvtmask64_u64 (_kand_mask64 (k0, k1)) & cmpmask; > + uint64_t k2 = cvtmask64_u64 (_kand_mask64 (k0, k1)) & cmpmask; > /* For every match, search for the entire needle for a full match */ > while (k2) > { > @@ -178,13 +180,13 @@ __strstr_avx512 (const char *haystack, const char *ned) > hay0 = _mm512_loadu_si512 (haystack + hay_index); > hay1 = _mm512_load_si512 (haystack + hay_index > + 1); // Always 64 byte aligned > - nullmask = _cvtmask64_u64 (_mm512_testn_epi8_mask (hay1, hay1)); > + nullmask = cvtmask64_u64 (_mm512_testn_epi8_mask (hay1, hay1)); > /* Compare only till null char */ > cmpmask = nullmask ^ (nullmask - ONE_64BIT); > k0 = _mm512_cmpeq_epi8_mask (hay0, ned0); > k1 = _mm512_cmpeq_epi8_mask (hay1, ned1); > /* k2 masks tell us if both chars from needle match */ > - k2 = _cvtmask64_u64 (_kand_mask64 (k0, k1)) & cmpmask; > + k2 = cvtmask64_u64 (_kand_mask64 (k0, k1)) & cmpmask; > /* For every match, compare full strings for potential match */ > while (k2) > { > -- > 2.34.1 > Sunil, can you see if this fixed the build issue with gcc6?
On Mon, Jul 11, 2022 at 3:08 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > On Mon, Jul 11, 2022 at 3:07 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > _cvtmask64_u64 is not available before GCC7. > > --- > > sysdeps/x86_64/multiarch/strstr-avx512.c | 12 +++++++----- > > 1 file changed, 7 insertions(+), 5 deletions(-) > > > > diff --git a/sysdeps/x86_64/multiarch/strstr-avx512.c b/sysdeps/x86_64/multiarch/strstr-avx512.c > > index 2ab9e96db8..e41b44abe1 100644 > > --- a/sysdeps/x86_64/multiarch/strstr-avx512.c > > +++ b/sysdeps/x86_64/multiarch/strstr-avx512.c > > @@ -26,6 +26,8 @@ > > #define ZMM_SIZE_IN_BYTES 64 > > #define PAGESIZE 4096 > > > > +#define cvtmask64_u64(...) (uint64_t) (__VA_ARGS__) > > + > > /* > > Returns the index of the first edge within the needle, returns 0 if no edge > > is found. Example: 'ab' is the first edge in 'aaaaaaaaaabaarddg' > > @@ -133,15 +135,15 @@ __strstr_avx512 (const char *haystack, const char *ned) > > __m512i hay0 = _mm512_maskz_loadu_epi8 (loadmask, haystack + hay_index); > > /* Search for NULL and compare only till null char */ > > uint64_t nullmask > > - = _cvtmask64_u64 (_mm512_mask_testn_epi8_mask (loadmask, hay0, hay0)); > > + = cvtmask64_u64 (_mm512_mask_testn_epi8_mask (loadmask, hay0, hay0)); > > uint64_t cmpmask = nullmask ^ (nullmask - ONE_64BIT); > > - cmpmask = cmpmask & _cvtmask64_u64 (loadmask); > > + cmpmask = cmpmask & cvtmask64_u64 (loadmask); > > /* Search for the 2 charaters of needle */ > > __mmask64 k0 = _mm512_cmpeq_epi8_mask (hay0, ned0); > > __mmask64 k1 = _mm512_cmpeq_epi8_mask (hay0, ned1); > > k1 = _kshiftri_mask64 (k1, 1); > > /* k2 masks tell us if both chars from needle match */ > > - uint64_t k2 = _cvtmask64_u64 (_kand_mask64 (k0, k1)) & cmpmask; > > + uint64_t k2 = cvtmask64_u64 (_kand_mask64 (k0, k1)) & cmpmask; > > /* For every match, search for the entire needle for a full match */ > > while (k2) > > { > > @@ -178,13 +180,13 @@ __strstr_avx512 (const char *haystack, const char *ned) > > hay0 = _mm512_loadu_si512 (haystack + hay_index); > > hay1 = _mm512_load_si512 (haystack + hay_index > > + 1); // Always 64 byte aligned > > - nullmask = _cvtmask64_u64 (_mm512_testn_epi8_mask (hay1, hay1)); > > + nullmask = cvtmask64_u64 (_mm512_testn_epi8_mask (hay1, hay1)); > > /* Compare only till null char */ > > cmpmask = nullmask ^ (nullmask - ONE_64BIT); > > k0 = _mm512_cmpeq_epi8_mask (hay0, ned0); > > k1 = _mm512_cmpeq_epi8_mask (hay1, ned1); > > /* k2 masks tell us if both chars from needle match */ > > - k2 = _cvtmask64_u64 (_kand_mask64 (k0, k1)) & cmpmask; > > + k2 = cvtmask64_u64 (_kand_mask64 (k0, k1)) & cmpmask; > > /* For every match, compare full strings for potential match */ > > while (k2) > > { > > -- > > 2.34.1 > > > > Sunil, can you see if this fixed the build issue with gcc6? Nope, there are more missing intrinsics ../sysdeps/x86_64/multiarch/strstr-avx512.c:144:8: error: implicit declaration of function ?_kshiftri_mask64? [-Wer ror=implicit-function-declaration] ../sysdeps/x86_64/multiarch/strstr-avx512.c:146:32: error: implicit declaration of function ?_kand_mask64? [-Werror =implicit-function-declaration] ../sysdeps/x86_64/multiarch/strstr-avx512.c:144:8: error: implicit declaration of function ?_kshiftri_mask64? [-Wer ror=implicit-function-declaration] ../sysdeps/x86_64/multiarch/strstr-avx512.c:146:32: error: implicit declaration of function ?_kand_mask64? [-Werror =implicit-function-declaration]
On Mon, Jul 11, 2022 at 3:18 PM Sunil Pandey <skpgkp2@gmail.com> wrote: > > On Mon, Jul 11, 2022 at 3:08 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > On Mon, Jul 11, 2022 at 3:07 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > > > _cvtmask64_u64 is not available before GCC7. > > > --- > > > sysdeps/x86_64/multiarch/strstr-avx512.c | 12 +++++++----- > > > 1 file changed, 7 insertions(+), 5 deletions(-) > > > > > > diff --git a/sysdeps/x86_64/multiarch/strstr-avx512.c b/sysdeps/x86_64/multiarch/strstr-avx512.c > > > index 2ab9e96db8..e41b44abe1 100644 > > > --- a/sysdeps/x86_64/multiarch/strstr-avx512.c > > > +++ b/sysdeps/x86_64/multiarch/strstr-avx512.c > > > @@ -26,6 +26,8 @@ > > > #define ZMM_SIZE_IN_BYTES 64 > > > #define PAGESIZE 4096 > > > > > > +#define cvtmask64_u64(...) (uint64_t) (__VA_ARGS__) > > > + > > > /* > > > Returns the index of the first edge within the needle, returns 0 if no edge > > > is found. Example: 'ab' is the first edge in 'aaaaaaaaaabaarddg' > > > @@ -133,15 +135,15 @@ __strstr_avx512 (const char *haystack, const char *ned) > > > __m512i hay0 = _mm512_maskz_loadu_epi8 (loadmask, haystack + hay_index); > > > /* Search for NULL and compare only till null char */ > > > uint64_t nullmask > > > - = _cvtmask64_u64 (_mm512_mask_testn_epi8_mask (loadmask, hay0, hay0)); > > > + = cvtmask64_u64 (_mm512_mask_testn_epi8_mask (loadmask, hay0, hay0)); > > > uint64_t cmpmask = nullmask ^ (nullmask - ONE_64BIT); > > > - cmpmask = cmpmask & _cvtmask64_u64 (loadmask); > > > + cmpmask = cmpmask & cvtmask64_u64 (loadmask); > > > /* Search for the 2 charaters of needle */ > > > __mmask64 k0 = _mm512_cmpeq_epi8_mask (hay0, ned0); > > > __mmask64 k1 = _mm512_cmpeq_epi8_mask (hay0, ned1); > > > k1 = _kshiftri_mask64 (k1, 1); > > > /* k2 masks tell us if both chars from needle match */ > > > - uint64_t k2 = _cvtmask64_u64 (_kand_mask64 (k0, k1)) & cmpmask; > > > + uint64_t k2 = cvtmask64_u64 (_kand_mask64 (k0, k1)) & cmpmask; > > > /* For every match, search for the entire needle for a full match */ > > > while (k2) > > > { > > > @@ -178,13 +180,13 @@ __strstr_avx512 (const char *haystack, const char *ned) > > > hay0 = _mm512_loadu_si512 (haystack + hay_index); > > > hay1 = _mm512_load_si512 (haystack + hay_index > > > + 1); // Always 64 byte aligned > > > - nullmask = _cvtmask64_u64 (_mm512_testn_epi8_mask (hay1, hay1)); > > > + nullmask = cvtmask64_u64 (_mm512_testn_epi8_mask (hay1, hay1)); > > > /* Compare only till null char */ > > > cmpmask = nullmask ^ (nullmask - ONE_64BIT); > > > k0 = _mm512_cmpeq_epi8_mask (hay0, ned0); > > > k1 = _mm512_cmpeq_epi8_mask (hay1, ned1); > > > /* k2 masks tell us if both chars from needle match */ > > > - k2 = _cvtmask64_u64 (_kand_mask64 (k0, k1)) & cmpmask; > > > + k2 = cvtmask64_u64 (_kand_mask64 (k0, k1)) & cmpmask; > > > /* For every match, compare full strings for potential match */ > > > while (k2) > > > { > > > -- > > > 2.34.1 > > > > > > > Sunil, can you see if this fixed the build issue with gcc6? > > Nope, there are more missing intrinsics > > ../sysdeps/x86_64/multiarch/strstr-avx512.c:144:8: error: implicit > declaration of function ?_kshiftri_mask64? [-Wer > ror=implicit-function-declaration] > ../sysdeps/x86_64/multiarch/strstr-avx512.c:146:32: error: implicit > declaration of function ?_kand_mask64? [-Werror > =implicit-function-declaration] > ../sysdeps/x86_64/multiarch/strstr-avx512.c:144:8: error: implicit > declaration of function ?_kshiftri_mask64? [-Wer > ror=implicit-function-declaration] > ../sysdeps/x86_64/multiarch/strstr-avx512.c:146:32: error: implicit > declaration of function ?_kand_mask64? [-Werror > =implicit-function-declaration] Oh sorry, didn't see those ones. Will have patch up in a second.
diff --git a/sysdeps/x86_64/multiarch/strstr-avx512.c b/sysdeps/x86_64/multiarch/strstr-avx512.c index 2ab9e96db8..e41b44abe1 100644 --- a/sysdeps/x86_64/multiarch/strstr-avx512.c +++ b/sysdeps/x86_64/multiarch/strstr-avx512.c @@ -26,6 +26,8 @@ #define ZMM_SIZE_IN_BYTES 64 #define PAGESIZE 4096 +#define cvtmask64_u64(...) (uint64_t) (__VA_ARGS__) + /* Returns the index of the first edge within the needle, returns 0 if no edge is found. Example: 'ab' is the first edge in 'aaaaaaaaaabaarddg' @@ -133,15 +135,15 @@ __strstr_avx512 (const char *haystack, const char *ned) __m512i hay0 = _mm512_maskz_loadu_epi8 (loadmask, haystack + hay_index); /* Search for NULL and compare only till null char */ uint64_t nullmask - = _cvtmask64_u64 (_mm512_mask_testn_epi8_mask (loadmask, hay0, hay0)); + = cvtmask64_u64 (_mm512_mask_testn_epi8_mask (loadmask, hay0, hay0)); uint64_t cmpmask = nullmask ^ (nullmask - ONE_64BIT); - cmpmask = cmpmask & _cvtmask64_u64 (loadmask); + cmpmask = cmpmask & cvtmask64_u64 (loadmask); /* Search for the 2 charaters of needle */ __mmask64 k0 = _mm512_cmpeq_epi8_mask (hay0, ned0); __mmask64 k1 = _mm512_cmpeq_epi8_mask (hay0, ned1); k1 = _kshiftri_mask64 (k1, 1); /* k2 masks tell us if both chars from needle match */ - uint64_t k2 = _cvtmask64_u64 (_kand_mask64 (k0, k1)) & cmpmask; + uint64_t k2 = cvtmask64_u64 (_kand_mask64 (k0, k1)) & cmpmask; /* For every match, search for the entire needle for a full match */ while (k2) { @@ -178,13 +180,13 @@ __strstr_avx512 (const char *haystack, const char *ned) hay0 = _mm512_loadu_si512 (haystack + hay_index); hay1 = _mm512_load_si512 (haystack + hay_index + 1); // Always 64 byte aligned - nullmask = _cvtmask64_u64 (_mm512_testn_epi8_mask (hay1, hay1)); + nullmask = cvtmask64_u64 (_mm512_testn_epi8_mask (hay1, hay1)); /* Compare only till null char */ cmpmask = nullmask ^ (nullmask - ONE_64BIT); k0 = _mm512_cmpeq_epi8_mask (hay0, ned0); k1 = _mm512_cmpeq_epi8_mask (hay1, ned1); /* k2 masks tell us if both chars from needle match */ - k2 = _cvtmask64_u64 (_kand_mask64 (k0, k1)) & cmpmask; + k2 = cvtmask64_u64 (_kand_mask64 (k0, k1)) & cmpmask; /* For every match, compare full strings for potential match */ while (k2) {