Message ID | 20240806161850.18839-1-amonakov@ispras.ru |
---|---|
State | New |
Headers | show |
Series | libcpp: improve x86 vectorized helpers | expand |
On Tue, Aug 6, 2024 at 6:19 PM Alexander Monakov <amonakov@ispras.ru> wrote: > > Upcoming patches first drop Binutils ISA support from SSE4.2 to SSSE3, > then bump it to AVX2. Instead of fiddling with detection, just bump > our configure check to AVX2 immediately: if by some accident somebody > builds GCC without AVX2 support in the assembler, they will get SSE2 > vectorized lexer, which is not too slow. OK. > libcpp/ChangeLog: > > * config.in: Regenerate. > * configure: Regenerate. > * configure.ac: Check for AVX2 instead of SSE4.2. > * lex.cc: Adjust for changed config macro. > --- > libcpp/config.in | 6 +++--- > libcpp/configure | 4 ++-- > libcpp/configure.ac | 6 +++--- > libcpp/lex.cc | 2 +- > 4 files changed, 9 insertions(+), 9 deletions(-) > > diff --git a/libcpp/config.in b/libcpp/config.in > index 253ef03a3d..a0ca9e4df4 100644 > --- a/libcpp/config.in > +++ b/libcpp/config.in > @@ -35,6 +35,9 @@ > */ > #undef HAVE_ALLOCA_H > > +/* Define to 1 if you can assemble AVX2 insns. */ > +#undef HAVE_AVX2 > + > /* Define to 1 if you have the Mac OS X function > CFLocaleCopyPreferredLanguages in the CoreFoundation framework. */ > #undef HAVE_CFLOCALECOPYPREFERREDLANGUAGES > @@ -210,9 +213,6 @@ > /* Define to 1 if you have the `putc_unlocked' function. */ > #undef HAVE_PUTC_UNLOCKED > > -/* Define to 1 if you can assemble SSE4 insns. */ > -#undef HAVE_SSE4 > - > /* Define to 1 if you have the <stddef.h> header file. */ > #undef HAVE_STDDEF_H > > diff --git a/libcpp/configure b/libcpp/configure > index 32d6aaa306..74af097620 100755 > --- a/libcpp/configure > +++ b/libcpp/configure > @@ -9140,14 +9140,14 @@ case $target in > int > main () > { > -asm ("pcmpestri %0, %%xmm0, %%xmm1" : : "i"(0)) > +asm ("vpshufb %ymm0, %ymm1, %ymm2") > ; > return 0; > } > _ACEOF > if ac_fn_c_try_compile "$LINENO"; then : > > -$as_echo "#define HAVE_SSE4 1" >>confdefs.h > +$as_echo "#define HAVE_AVX2 1" >>confdefs.h > > fi > rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext > diff --git a/libcpp/configure.ac b/libcpp/configure.ac > index b883fec776..cfefb63552 100644 > --- a/libcpp/configure.ac > +++ b/libcpp/configure.ac > @@ -197,9 +197,9 @@ fi > > case $target in > i?86-* | x86_64-*) > - AC_TRY_COMPILE([], [asm ("pcmpestri %0, %%xmm0, %%xmm1" : : "i"(0))], > - [AC_DEFINE([HAVE_SSE4], [1], > - [Define to 1 if you can assemble SSE4 insns.])]) > + AC_TRY_COMPILE([], [asm ("vpshufb %ymm0, %ymm1, %ymm2")], > + [AC_DEFINE([HAVE_AVX2], [1], > + [Define to 1 if you can assemble AVX2 insns.])]) > esac > > # Enable --enable-host-shared. > diff --git a/libcpp/lex.cc b/libcpp/lex.cc > index 1591dcdf15..fa9c03614c 100644 > --- a/libcpp/lex.cc > +++ b/libcpp/lex.cc > @@ -344,7 +344,7 @@ search_line_sse2 (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) > return (const uchar *)p + found; > } > > -#ifdef HAVE_SSE4 > +#ifdef HAVE_AVX2 > /* A version of the fast scanner using SSE 4.2 vectorized string insns. */ > > static const uchar * > -- > 2.44.0 >
diff --git a/libcpp/config.in b/libcpp/config.in index 253ef03a3d..a0ca9e4df4 100644 --- a/libcpp/config.in +++ b/libcpp/config.in @@ -35,6 +35,9 @@ */ #undef HAVE_ALLOCA_H +/* Define to 1 if you can assemble AVX2 insns. */ +#undef HAVE_AVX2 + /* Define to 1 if you have the Mac OS X function CFLocaleCopyPreferredLanguages in the CoreFoundation framework. */ #undef HAVE_CFLOCALECOPYPREFERREDLANGUAGES @@ -210,9 +213,6 @@ /* Define to 1 if you have the `putc_unlocked' function. */ #undef HAVE_PUTC_UNLOCKED -/* Define to 1 if you can assemble SSE4 insns. */ -#undef HAVE_SSE4 - /* Define to 1 if you have the <stddef.h> header file. */ #undef HAVE_STDDEF_H diff --git a/libcpp/configure b/libcpp/configure index 32d6aaa306..74af097620 100755 --- a/libcpp/configure +++ b/libcpp/configure @@ -9140,14 +9140,14 @@ case $target in int main () { -asm ("pcmpestri %0, %%xmm0, %%xmm1" : : "i"(0)) +asm ("vpshufb %ymm0, %ymm1, %ymm2") ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : -$as_echo "#define HAVE_SSE4 1" >>confdefs.h +$as_echo "#define HAVE_AVX2 1" >>confdefs.h fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext diff --git a/libcpp/configure.ac b/libcpp/configure.ac index b883fec776..cfefb63552 100644 --- a/libcpp/configure.ac +++ b/libcpp/configure.ac @@ -197,9 +197,9 @@ fi case $target in i?86-* | x86_64-*) - AC_TRY_COMPILE([], [asm ("pcmpestri %0, %%xmm0, %%xmm1" : : "i"(0))], - [AC_DEFINE([HAVE_SSE4], [1], - [Define to 1 if you can assemble SSE4 insns.])]) + AC_TRY_COMPILE([], [asm ("vpshufb %ymm0, %ymm1, %ymm2")], + [AC_DEFINE([HAVE_AVX2], [1], + [Define to 1 if you can assemble AVX2 insns.])]) esac # Enable --enable-host-shared. diff --git a/libcpp/lex.cc b/libcpp/lex.cc index 1591dcdf15..fa9c03614c 100644 --- a/libcpp/lex.cc +++ b/libcpp/lex.cc @@ -344,7 +344,7 @@ search_line_sse2 (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) return (const uchar *)p + found; } -#ifdef HAVE_SSE4 +#ifdef HAVE_AVX2 /* A version of the fast scanner using SSE 4.2 vectorized string insns. */ static const uchar *