diff mbox series

[v2,2/6] x86-64: Require BMI2 for AVX2 str*cmp and wcs(n)cmp implementations

Message ID 20221002123424.3079805-3-aurelien@aurel32.net
State New
Headers show
Series x86: Fix AVX2 string functions requiring BMI1, BMI2 or LZCNT (BZ #29611) | expand

Commit Message

Aurelien Jarno Oct. 2, 2022, 12:34 p.m. UTC
The AVX2 str*cmp and wcs(n)cmp implementations use the 'bzhi'
instruction, which belongs to the BMI2 CPU feature.

NB: It also uses the 'tzcnt' BMI1 instruction, but it is executed as BSF
as BSF if the CPU doesn't support TZCNT, and produces the same result
for non-zero input.

Fixes: b77b06e0e296 ("x86: Optimize strcmp-avx2.S")
Partially resolves: BZ #29611
---
 sysdeps/x86_64/multiarch/ifunc-impl-list.c  | 47 +++++++++++++++------
 sysdeps/x86_64/multiarch/ifunc-strcasecmp.h |  1 +
 sysdeps/x86_64/multiarch/strcmp.c           |  4 +-
 sysdeps/x86_64/multiarch/strncmp.c          |  4 +-
 4 files changed, 39 insertions(+), 17 deletions(-)

Comments

Noah Goldstein Oct. 2, 2022, 9:08 p.m. UTC | #1
On Sun, Oct 2, 2022 at 8:34 AM Aurelien Jarno <aurelien@aurel32.net> wrote:
>
> The AVX2 str*cmp and wcs(n)cmp implementations use the 'bzhi'
> instruction, which belongs to the BMI2 CPU feature.
>
> NB: It also uses the 'tzcnt' BMI1 instruction, but it is executed as BSF
> as BSF if the CPU doesn't support TZCNT, and produces the same result
> for non-zero input.
>
> Fixes: b77b06e0e296 ("x86: Optimize strcmp-avx2.S")
> Partially resolves: BZ #29611
> ---
>  sysdeps/x86_64/multiarch/ifunc-impl-list.c  | 47 +++++++++++++++------
>  sysdeps/x86_64/multiarch/ifunc-strcasecmp.h |  1 +
>  sysdeps/x86_64/multiarch/strcmp.c           |  4 +-
>  sysdeps/x86_64/multiarch/strncmp.c          |  4 +-
>  4 files changed, 39 insertions(+), 17 deletions(-)
>
> diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> index a71444eccb..fec8790c11 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> @@ -448,13 +448,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>    IFUNC_IMPL (i, name, strcasecmp,
>               X86_IFUNC_IMPL_ADD_V4 (array, i, strcasecmp,
>                                      (CPU_FEATURE_USABLE (AVX512VL)
> -                                     && CPU_FEATURE_USABLE (AVX512BW)),
> +                                     && CPU_FEATURE_USABLE (AVX512BW)
> +                                     && CPU_FEATURE_USABLE (BMI2)),
>                                      __strcasecmp_evex)
>               X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
> -                                    CPU_FEATURE_USABLE (AVX2),
> +                                    (CPU_FEATURE_USABLE (AVX2)
> +                                     && CPU_FEATURE_USABLE (BMI2)),
>                                      __strcasecmp_avx2)
>               X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
>                                      (CPU_FEATURE_USABLE (AVX2)
> +                                     && CPU_FEATURE_USABLE (BMI2)
>                                       && CPU_FEATURE_USABLE (RTM)),
>                                      __strcasecmp_avx2_rtm)
>               X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp,
> @@ -470,13 +473,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>    IFUNC_IMPL (i, name, strcasecmp_l,
>               X86_IFUNC_IMPL_ADD_V4 (array, i, strcasecmp,
>                                      (CPU_FEATURE_USABLE (AVX512VL)
> -                                     && CPU_FEATURE_USABLE (AVX512BW)),
> +                                     && CPU_FEATURE_USABLE (AVX512BW)
> +                                     && CPU_FEATURE_USABLE (BMI2)),
>                                      __strcasecmp_l_evex)
>               X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
> -                                    CPU_FEATURE_USABLE (AVX2),
> +                                    (CPU_FEATURE_USABLE (AVX2)
> +                                     && CPU_FEATURE_USABLE (BMI2)),
>                                      __strcasecmp_l_avx2)
>               X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
>                                      (CPU_FEATURE_USABLE (AVX2)
> +                                     && CPU_FEATURE_USABLE (BMI2)
>                                       && CPU_FEATURE_USABLE (RTM)),
>                                      __strcasecmp_l_avx2_rtm)
>               X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp_l,
> @@ -585,10 +591,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>                                       && CPU_FEATURE_USABLE (BMI2)),
>                                      __strcmp_evex)
>               X86_IFUNC_IMPL_ADD_V3 (array, i, strcmp,
> -                                    CPU_FEATURE_USABLE (AVX2),
> +                                    (CPU_FEATURE_USABLE (AVX2)
> +                                     && CPU_FEATURE_USABLE (BMI2)),
>                                      __strcmp_avx2)
>               X86_IFUNC_IMPL_ADD_V3 (array, i, strcmp,
>                                      (CPU_FEATURE_USABLE (AVX2)
> +                                     && CPU_FEATURE_USABLE (BMI2)
>                                       && CPU_FEATURE_USABLE (RTM)),
>                                      __strcmp_avx2_rtm)
>               X86_IFUNC_IMPL_ADD_V2 (array, i, strcmp,
> @@ -638,13 +646,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>    IFUNC_IMPL (i, name, strncasecmp,
>               X86_IFUNC_IMPL_ADD_V4 (array, i, strncasecmp,
>                                      (CPU_FEATURE_USABLE (AVX512VL)
> -                                     && CPU_FEATURE_USABLE (AVX512BW)),
> +                                     && CPU_FEATURE_USABLE (AVX512BW)
> +                                     && CPU_FEATURE_USABLE (BMI2)),
>                                      __strncasecmp_evex)
>               X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
> -                                    CPU_FEATURE_USABLE (AVX2),
> +                                    (CPU_FEATURE_USABLE (AVX2)
> +                                     && CPU_FEATURE_USABLE (BMI2)),
>                                      __strncasecmp_avx2)
>               X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
>                                      (CPU_FEATURE_USABLE (AVX2)
> +                                     && CPU_FEATURE_USABLE (BMI2)
>                                       && CPU_FEATURE_USABLE (RTM)),
>                                      __strncasecmp_avx2_rtm)
>               X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp,
> @@ -660,13 +671,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>    IFUNC_IMPL (i, name, strncasecmp_l,
>               X86_IFUNC_IMPL_ADD_V4 (array, i, strncasecmp,
>                                      (CPU_FEATURE_USABLE (AVX512VL)
> -                                     && CPU_FEATURE_USABLE (AVX512BW)),
> +                                     & CPU_FEATURE_USABLE (AVX512BW)
> +                                     && CPU_FEATURE_USABLE (BMI2)),
>                                      __strncasecmp_l_evex)
>               X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
> -                                    CPU_FEATURE_USABLE (AVX2),
> +                                    (CPU_FEATURE_USABLE (AVX2)
> +                                     && CPU_FEATURE_USABLE (BMI2)),
>                                      __strncasecmp_l_avx2)
>               X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
>                                      (CPU_FEATURE_USABLE (AVX2)
> +                                     && CPU_FEATURE_USABLE (BMI2)
>                                       && CPU_FEATURE_USABLE (RTM)),
>                                      __strncasecmp_l_avx2_rtm)
>               X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp_l,
> @@ -796,10 +810,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>                                       && CPU_FEATURE_USABLE (BMI2)),
>                                      __wcscmp_evex)
>               X86_IFUNC_IMPL_ADD_V3 (array, i, wcscmp,
> -                                    CPU_FEATURE_USABLE (AVX2),
> +                                    (CPU_FEATURE_USABLE (AVX2)
> +                                     && CPU_FEATURE_USABLE (BMI2)),
>                                      __wcscmp_avx2)
>               X86_IFUNC_IMPL_ADD_V3 (array, i, wcscmp,
>                                      (CPU_FEATURE_USABLE (AVX2)
> +                                     && CPU_FEATURE_USABLE (BMI2)
>                                       && CPU_FEATURE_USABLE (RTM)),
>                                      __wcscmp_avx2_rtm)
>               /* ISA V2 wrapper for SSE2 implementation because the SSE2
> @@ -816,10 +832,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>                                       && CPU_FEATURE_USABLE (BMI2)),
>                                      __wcsncmp_evex)
>               X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp,
> -                                    CPU_FEATURE_USABLE (AVX2),
> +                                    (CPU_FEATURE_USABLE (AVX2)
> +                                     && CPU_FEATURE_USABLE (BMI2)),
>                                      __wcsncmp_avx2)
>               X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp,
>                                      (CPU_FEATURE_USABLE (AVX2)
> +                                     && CPU_FEATURE_USABLE (BMI2)
>                                       && CPU_FEATURE_USABLE (RTM)),
>                                      __wcsncmp_avx2_rtm)
>               /* ISA V2 wrapper for GENERIC implementation because the
> @@ -1162,13 +1180,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>    IFUNC_IMPL (i, name, strncmp,
>               X86_IFUNC_IMPL_ADD_V4 (array, i, strncmp,
>                                      (CPU_FEATURE_USABLE (AVX512VL)
> -                                     && CPU_FEATURE_USABLE (AVX512BW)),
> +                                     && CPU_FEATURE_USABLE (AVX512BW)
> +                                     && CPU_FEATURE_USABLE (BMI2)),
>                                      __strncmp_evex)
>               X86_IFUNC_IMPL_ADD_V3 (array, i, strncmp,
> -                                    CPU_FEATURE_USABLE (AVX2),
> +                                    (CPU_FEATURE_USABLE (AVX2)
> +                                     && CPU_FEATURE_USABLE (BMI2)),
>                                      __strncmp_avx2)
>               X86_IFUNC_IMPL_ADD_V3 (array, i, strncmp,
>                                      (CPU_FEATURE_USABLE (AVX2)
> +                                     && CPU_FEATURE_USABLE (BMI2)
>                                       && CPU_FEATURE_USABLE (RTM)),
>                                      __strncmp_avx2_rtm)
>               X86_IFUNC_IMPL_ADD_V2 (array, i, strncmp,
> diff --git a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
> index 68646ef199..7622af259c 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
> +++ b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
> @@ -34,6 +34,7 @@ IFUNC_SELECTOR (void)
>    const struct cpu_features *cpu_features = __get_cpu_features ();
>
>    if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
> +      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
>        && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
>                                       AVX_Fast_Unaligned_Load, ))
>      {
> diff --git a/sysdeps/x86_64/multiarch/strcmp.c b/sysdeps/x86_64/multiarch/strcmp.c
> index fdd5afe3af..9d6c9f66ba 100644
> --- a/sysdeps/x86_64/multiarch/strcmp.c
> +++ b/sysdeps/x86_64/multiarch/strcmp.c
> @@ -45,12 +45,12 @@ IFUNC_SELECTOR (void)
>    const struct cpu_features *cpu_features = __get_cpu_features ();
>
>    if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
> +      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
>        && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
>                                       AVX_Fast_Unaligned_Load, ))
>      {
>        if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> -         && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
> -         && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
> +         && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
>         return OPTIMIZE (evex);
>
>        if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
> diff --git a/sysdeps/x86_64/multiarch/strncmp.c b/sysdeps/x86_64/multiarch/strncmp.c
> index 4ebe4bde30..c4f8b6bbb5 100644
> --- a/sysdeps/x86_64/multiarch/strncmp.c
> +++ b/sysdeps/x86_64/multiarch/strncmp.c
> @@ -41,12 +41,12 @@ IFUNC_SELECTOR (void)
>    const struct cpu_features *cpu_features = __get_cpu_features ();
>
>    if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
> +      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
>        && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
>                                       AVX_Fast_Unaligned_Load, ))
>      {
>        if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> -         && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
> -         && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
> +         && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
>         return OPTIMIZE (evex);
>
>        if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
> --
> 2.35.1
>

LGTM.
Sunil Pandey Oct. 3, 2022, 4:19 p.m. UTC | #2
Please separate this patch into 4 separate patches.

Patch1: sysdeps/x86_64/multiarch/strncmp.c
Patch2: sysdeps/x86_64/multiarch/strcmp.c
Patch3: sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
Patch4:  sysdeps/x86_64/multiarch/ifunc-impl-list.c

Rest of them looks OK to me.

On Sun, Oct 2, 2022 at 2:08 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Sun, Oct 2, 2022 at 8:34 AM Aurelien Jarno <aurelien@aurel32.net> wrote:
> >
> > The AVX2 str*cmp and wcs(n)cmp implementations use the 'bzhi'
> > instruction, which belongs to the BMI2 CPU feature.
> >
> > NB: It also uses the 'tzcnt' BMI1 instruction, but it is executed as BSF
> > as BSF if the CPU doesn't support TZCNT, and produces the same result
> > for non-zero input.
> >
> > Fixes: b77b06e0e296 ("x86: Optimize strcmp-avx2.S")
> > Partially resolves: BZ #29611
> > ---
> >  sysdeps/x86_64/multiarch/ifunc-impl-list.c  | 47 +++++++++++++++------
> >  sysdeps/x86_64/multiarch/ifunc-strcasecmp.h |  1 +
> >  sysdeps/x86_64/multiarch/strcmp.c           |  4 +-
> >  sysdeps/x86_64/multiarch/strncmp.c          |  4 +-
> >  4 files changed, 39 insertions(+), 17 deletions(-)
> >
> > diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > index a71444eccb..fec8790c11 100644
> > --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > @@ -448,13 +448,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> >    IFUNC_IMPL (i, name, strcasecmp,
> >               X86_IFUNC_IMPL_ADD_V4 (array, i, strcasecmp,
> >                                      (CPU_FEATURE_USABLE (AVX512VL)
> > -                                     && CPU_FEATURE_USABLE (AVX512BW)),
> > +                                     && CPU_FEATURE_USABLE (AVX512BW)
> > +                                     && CPU_FEATURE_USABLE (BMI2)),
> >                                      __strcasecmp_evex)
> >               X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
> > -                                    CPU_FEATURE_USABLE (AVX2),
> > +                                    (CPU_FEATURE_USABLE (AVX2)
> > +                                     && CPU_FEATURE_USABLE (BMI2)),
> >                                      __strcasecmp_avx2)
> >               X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
> >                                      (CPU_FEATURE_USABLE (AVX2)
> > +                                     && CPU_FEATURE_USABLE (BMI2)
> >                                       && CPU_FEATURE_USABLE (RTM)),
> >                                      __strcasecmp_avx2_rtm)
> >               X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp,
> > @@ -470,13 +473,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> >    IFUNC_IMPL (i, name, strcasecmp_l,
> >               X86_IFUNC_IMPL_ADD_V4 (array, i, strcasecmp,
> >                                      (CPU_FEATURE_USABLE (AVX512VL)
> > -                                     && CPU_FEATURE_USABLE (AVX512BW)),
> > +                                     && CPU_FEATURE_USABLE (AVX512BW)
> > +                                     && CPU_FEATURE_USABLE (BMI2)),
> >                                      __strcasecmp_l_evex)
> >               X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
> > -                                    CPU_FEATURE_USABLE (AVX2),
> > +                                    (CPU_FEATURE_USABLE (AVX2)
> > +                                     && CPU_FEATURE_USABLE (BMI2)),
> >                                      __strcasecmp_l_avx2)
> >               X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
> >                                      (CPU_FEATURE_USABLE (AVX2)
> > +                                     && CPU_FEATURE_USABLE (BMI2)
> >                                       && CPU_FEATURE_USABLE (RTM)),
> >                                      __strcasecmp_l_avx2_rtm)
> >               X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp_l,
> > @@ -585,10 +591,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> >                                       && CPU_FEATURE_USABLE (BMI2)),
> >                                      __strcmp_evex)
> >               X86_IFUNC_IMPL_ADD_V3 (array, i, strcmp,
> > -                                    CPU_FEATURE_USABLE (AVX2),
> > +                                    (CPU_FEATURE_USABLE (AVX2)
> > +                                     && CPU_FEATURE_USABLE (BMI2)),
> >                                      __strcmp_avx2)
> >               X86_IFUNC_IMPL_ADD_V3 (array, i, strcmp,
> >                                      (CPU_FEATURE_USABLE (AVX2)
> > +                                     && CPU_FEATURE_USABLE (BMI2)
> >                                       && CPU_FEATURE_USABLE (RTM)),
> >                                      __strcmp_avx2_rtm)
> >               X86_IFUNC_IMPL_ADD_V2 (array, i, strcmp,
> > @@ -638,13 +646,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> >    IFUNC_IMPL (i, name, strncasecmp,
> >               X86_IFUNC_IMPL_ADD_V4 (array, i, strncasecmp,
> >                                      (CPU_FEATURE_USABLE (AVX512VL)
> > -                                     && CPU_FEATURE_USABLE (AVX512BW)),
> > +                                     && CPU_FEATURE_USABLE (AVX512BW)
> > +                                     && CPU_FEATURE_USABLE (BMI2)),
> >                                      __strncasecmp_evex)
> >               X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
> > -                                    CPU_FEATURE_USABLE (AVX2),
> > +                                    (CPU_FEATURE_USABLE (AVX2)
> > +                                     && CPU_FEATURE_USABLE (BMI2)),
> >                                      __strncasecmp_avx2)
> >               X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
> >                                      (CPU_FEATURE_USABLE (AVX2)
> > +                                     && CPU_FEATURE_USABLE (BMI2)
> >                                       && CPU_FEATURE_USABLE (RTM)),
> >                                      __strncasecmp_avx2_rtm)
> >               X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp,
> > @@ -660,13 +671,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> >    IFUNC_IMPL (i, name, strncasecmp_l,
> >               X86_IFUNC_IMPL_ADD_V4 (array, i, strncasecmp,
> >                                      (CPU_FEATURE_USABLE (AVX512VL)
> > -                                     && CPU_FEATURE_USABLE (AVX512BW)),
> > +                                     & CPU_FEATURE_USABLE (AVX512BW)
> > +                                     && CPU_FEATURE_USABLE (BMI2)),
> >                                      __strncasecmp_l_evex)
> >               X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
> > -                                    CPU_FEATURE_USABLE (AVX2),
> > +                                    (CPU_FEATURE_USABLE (AVX2)
> > +                                     && CPU_FEATURE_USABLE (BMI2)),
> >                                      __strncasecmp_l_avx2)
> >               X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
> >                                      (CPU_FEATURE_USABLE (AVX2)
> > +                                     && CPU_FEATURE_USABLE (BMI2)
> >                                       && CPU_FEATURE_USABLE (RTM)),
> >                                      __strncasecmp_l_avx2_rtm)
> >               X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp_l,
> > @@ -796,10 +810,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> >                                       && CPU_FEATURE_USABLE (BMI2)),
> >                                      __wcscmp_evex)
> >               X86_IFUNC_IMPL_ADD_V3 (array, i, wcscmp,
> > -                                    CPU_FEATURE_USABLE (AVX2),
> > +                                    (CPU_FEATURE_USABLE (AVX2)
> > +                                     && CPU_FEATURE_USABLE (BMI2)),
> >                                      __wcscmp_avx2)
> >               X86_IFUNC_IMPL_ADD_V3 (array, i, wcscmp,
> >                                      (CPU_FEATURE_USABLE (AVX2)
> > +                                     && CPU_FEATURE_USABLE (BMI2)
> >                                       && CPU_FEATURE_USABLE (RTM)),
> >                                      __wcscmp_avx2_rtm)
> >               /* ISA V2 wrapper for SSE2 implementation because the SSE2
> > @@ -816,10 +832,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> >                                       && CPU_FEATURE_USABLE (BMI2)),
> >                                      __wcsncmp_evex)
> >               X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp,
> > -                                    CPU_FEATURE_USABLE (AVX2),
> > +                                    (CPU_FEATURE_USABLE (AVX2)
> > +                                     && CPU_FEATURE_USABLE (BMI2)),
> >                                      __wcsncmp_avx2)
> >               X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp,
> >                                      (CPU_FEATURE_USABLE (AVX2)
> > +                                     && CPU_FEATURE_USABLE (BMI2)
> >                                       && CPU_FEATURE_USABLE (RTM)),
> >                                      __wcsncmp_avx2_rtm)
> >               /* ISA V2 wrapper for GENERIC implementation because the
> > @@ -1162,13 +1180,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> >    IFUNC_IMPL (i, name, strncmp,
> >               X86_IFUNC_IMPL_ADD_V4 (array, i, strncmp,
> >                                      (CPU_FEATURE_USABLE (AVX512VL)
> > -                                     && CPU_FEATURE_USABLE (AVX512BW)),
> > +                                     && CPU_FEATURE_USABLE (AVX512BW)
> > +                                     && CPU_FEATURE_USABLE (BMI2)),
> >                                      __strncmp_evex)
> >               X86_IFUNC_IMPL_ADD_V3 (array, i, strncmp,
> > -                                    CPU_FEATURE_USABLE (AVX2),
> > +                                    (CPU_FEATURE_USABLE (AVX2)
> > +                                     && CPU_FEATURE_USABLE (BMI2)),
> >                                      __strncmp_avx2)
> >               X86_IFUNC_IMPL_ADD_V3 (array, i, strncmp,
> >                                      (CPU_FEATURE_USABLE (AVX2)
> > +                                     && CPU_FEATURE_USABLE (BMI2)
> >                                       && CPU_FEATURE_USABLE (RTM)),
> >                                      __strncmp_avx2_rtm)
> >               X86_IFUNC_IMPL_ADD_V2 (array, i, strncmp,
> > diff --git a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
> > index 68646ef199..7622af259c 100644
> > --- a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
> > +++ b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
> > @@ -34,6 +34,7 @@ IFUNC_SELECTOR (void)
> >    const struct cpu_features *cpu_features = __get_cpu_features ();
> >
> >    if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
> > +      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
> >        && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> >                                       AVX_Fast_Unaligned_Load, ))
> >      {
> > diff --git a/sysdeps/x86_64/multiarch/strcmp.c b/sysdeps/x86_64/multiarch/strcmp.c
> > index fdd5afe3af..9d6c9f66ba 100644
> > --- a/sysdeps/x86_64/multiarch/strcmp.c
> > +++ b/sysdeps/x86_64/multiarch/strcmp.c
> > @@ -45,12 +45,12 @@ IFUNC_SELECTOR (void)
> >    const struct cpu_features *cpu_features = __get_cpu_features ();
> >
> >    if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
> > +      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
> >        && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> >                                       AVX_Fast_Unaligned_Load, ))
> >      {
> >        if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> > -         && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
> > -         && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
> > +         && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
> >         return OPTIMIZE (evex);
> >
> >        if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
> > diff --git a/sysdeps/x86_64/multiarch/strncmp.c b/sysdeps/x86_64/multiarch/strncmp.c
> > index 4ebe4bde30..c4f8b6bbb5 100644
> > --- a/sysdeps/x86_64/multiarch/strncmp.c
> > +++ b/sysdeps/x86_64/multiarch/strncmp.c
> > @@ -41,12 +41,12 @@ IFUNC_SELECTOR (void)
> >    const struct cpu_features *cpu_features = __get_cpu_features ();
> >
> >    if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
> > +      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
> >        && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> >                                       AVX_Fast_Unaligned_Load, ))
> >      {
> >        if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> > -         && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
> > -         && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
> > +         && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
> >         return OPTIMIZE (evex);
> >
> >        if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
> > --
> > 2.35.1
> >
>
> LGTM.
Aurelien Jarno Oct. 3, 2022, 5:35 p.m. UTC | #3
On 2022-10-03 09:19, Sunil Pandey via Libc-alpha wrote:
> Please separate this patch into 4 separate patches.
> 
> Patch1: sysdeps/x86_64/multiarch/strncmp.c
> Patch2: sysdeps/x86_64/multiarch/strcmp.c
> Patch3: sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
> Patch4:  sysdeps/x86_64/multiarch/ifunc-impl-list.c
> 
> Rest of them looks OK to me.

I don't fully see the point of doing that, but i'll do it in the next
version.
Noah Goldstein Oct. 3, 2022, 5:50 p.m. UTC | #4
On Mon, Oct 3, 2022 at 10:35 AM Aurelien Jarno <aurelien@aurel32.net> wrote:
>
> On 2022-10-03 09:19, Sunil Pandey via Libc-alpha wrote:
> > Please separate this patch into 4 separate patches.
> >
> > Patch1: sysdeps/x86_64/multiarch/strncmp.c
> > Patch2: sysdeps/x86_64/multiarch/strcmp.c
> > Patch3: sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
> > Patch4:  sysdeps/x86_64/multiarch/ifunc-impl-list.c
> >
> > Rest of them looks OK to me.
>
> I don't fully see the point of doing that, but i'll do it in the next
> version.

I think to make backporting easier.
>
> --
> Aurelien Jarno                          GPG: 4096R/1DDD8C9B
> aurelien@aurel32.net                 http://www.aurel32.net
Sunil Pandey Oct. 3, 2022, 6:43 p.m. UTC | #5
On Mon, Oct 3, 2022 at 10:50 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Mon, Oct 3, 2022 at 10:35 AM Aurelien Jarno <aurelien@aurel32.net> wrote:
> >
> > On 2022-10-03 09:19, Sunil Pandey via Libc-alpha wrote:
> > > Please separate this patch into 4 separate patches.
> > >
> > > Patch1: sysdeps/x86_64/multiarch/strncmp.c
> > > Patch2: sysdeps/x86_64/multiarch/strcmp.c
> > > Patch3: sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
> > > Patch4:  sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > >
> > > Rest of them looks OK to me.
> >
> > I don't fully see the point of doing that, but i'll do it in the next
> > version.
>
> I think to make backporting easier.

Exactly.

If you look closely, this patch combines 4 independent ifunc
functionality in one single patch.

As per latest backporting guideline, backport patches must apply
cleanly without any change. Having small independent patches
makes backporting a little easier.

> >
> > --
> > Aurelien Jarno                          GPG: 4096R/1DDD8C9B
> > aurelien@aurel32.net                 http://www.aurel32.net
Aurelien Jarno Oct. 3, 2022, 7:21 p.m. UTC | #6
On 2022-10-03 11:43, Sunil Pandey wrote:
> On Mon, Oct 3, 2022 at 10:50 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > On Mon, Oct 3, 2022 at 10:35 AM Aurelien Jarno <aurelien@aurel32.net> wrote:
> > >
> > > On 2022-10-03 09:19, Sunil Pandey via Libc-alpha wrote:
> > > > Please separate this patch into 4 separate patches.
> > > >
> > > > Patch1: sysdeps/x86_64/multiarch/strncmp.c
> > > > Patch2: sysdeps/x86_64/multiarch/strcmp.c
> > > > Patch3: sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
> > > > Patch4:  sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > > >
> > > > Rest of them looks OK to me.
> > >
> > > I don't fully see the point of doing that, but i'll do it in the next
> > > version.
> >
> > I think to make backporting easier.
> 
> Exactly.
> 
> If you look closely, this patch combines 4 independent ifunc
> functionality in one single patch.

It's partially true. I actually count 3 different functionalities. The
changes in ifunc-impl-list are all related to the other 3.

> As per latest backporting guideline, backport patches must apply
> cleanly without any change. Having small independent patches
> makes backporting a little easier.
> 

I agree with that when we are talking about backporting improvements. In
that case we have to backport the changes to fix the bug, even if they
don't apply cleanly.
Aurelien Jarno Oct. 3, 2022, 7:59 p.m. UTC | #7
On 2022-10-03 21:21, Aurelien Jarno wrote:
> On 2022-10-03 11:43, Sunil Pandey wrote:
> > On Mon, Oct 3, 2022 at 10:50 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > >
> > > On Mon, Oct 3, 2022 at 10:35 AM Aurelien Jarno <aurelien@aurel32.net> wrote:
> > > >
> > > > On 2022-10-03 09:19, Sunil Pandey via Libc-alpha wrote:
> > > > > Please separate this patch into 4 separate patches.
> > > > >
> > > > > Patch1: sysdeps/x86_64/multiarch/strncmp.c
> > > > > Patch2: sysdeps/x86_64/multiarch/strcmp.c
> > > > > Patch3: sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
> > > > > Patch4:  sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > > > >
> > > > > Rest of them looks OK to me.
> > > >
> > > > I don't fully see the point of doing that, but i'll do it in the next
> > > > version.
> > >
> > > I think to make backporting easier.
> > 
> > Exactly.
> > 
> > If you look closely, this patch combines 4 independent ifunc
> > functionality in one single patch.
> 
> It's partially true. I actually count 3 different functionalities. The
> changes in ifunc-impl-list are all related to the other 3.

I have just sent a v3 that split that patch in smaller pieces:
- str(n)casecmp
- strcmp
- strncmp
- wcs(n)cmp

I do not see the point in separating the changes from ifunc-impl-list.c,
as we should actually ensure that they are in sync with the ifunc
selector so that the testing is done properly.
Sunil Pandey Oct. 3, 2022, 8:51 p.m. UTC | #8
On Mon, Oct 3, 2022 at 12:59 PM Aurelien Jarno <aurelien@aurel32.net> wrote:
>
> On 2022-10-03 21:21, Aurelien Jarno wrote:
> > On 2022-10-03 11:43, Sunil Pandey wrote:
> > > On Mon, Oct 3, 2022 at 10:50 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > > >
> > > > On Mon, Oct 3, 2022 at 10:35 AM Aurelien Jarno <aurelien@aurel32.net> wrote:
> > > > >
> > > > > On 2022-10-03 09:19, Sunil Pandey via Libc-alpha wrote:
> > > > > > Please separate this patch into 4 separate patches.
> > > > > >
> > > > > > Patch1: sysdeps/x86_64/multiarch/strncmp.c
> > > > > > Patch2: sysdeps/x86_64/multiarch/strcmp.c
> > > > > > Patch3: sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
> > > > > > Patch4:  sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > > > > >
> > > > > > Rest of them looks OK to me.
> > > > >
> > > > > I don't fully see the point of doing that, but i'll do it in the next
> > > > > version.
> > > >
> > > > I think to make backporting easier.
> > >
> > > Exactly.
> > >
> > > If you look closely, this patch combines 4 independent ifunc
> > > functionality in one single patch.
> >
> > It's partially true. I actually count 3 different functionalities. The
> > changes in ifunc-impl-list are all related to the other 3.
>
> I have just sent a v3 that split that patch in smaller pieces:
> - str(n)casecmp
> - strcmp
> - strncmp
> - wcs(n)cmp
>
> I do not see the point in separating the changes from ifunc-impl-list.c,
> as we should actually ensure that they are in sync with the ifunc
> selector so that the testing is done properly.

My feedback was for the specific patch for your previous arrangement.
After you rearrange in v3, it looks ok.

>
> --
> Aurelien Jarno                          GPG: 4096R/1DDD8C9B
> aurelien@aurel32.net                 http://www.aurel32.net
diff mbox series

Patch

diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index a71444eccb..fec8790c11 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -448,13 +448,16 @@  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   IFUNC_IMPL (i, name, strcasecmp,
 	      X86_IFUNC_IMPL_ADD_V4 (array, i, strcasecmp,
 				     (CPU_FEATURE_USABLE (AVX512VL)
-				      && CPU_FEATURE_USABLE (AVX512BW)),
+				      && CPU_FEATURE_USABLE (AVX512BW)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __strcasecmp_evex)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
-				     CPU_FEATURE_USABLE (AVX2),
+				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __strcasecmp_avx2)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
 				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)
 				      && CPU_FEATURE_USABLE (RTM)),
 				     __strcasecmp_avx2_rtm)
 	      X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp,
@@ -470,13 +473,16 @@  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   IFUNC_IMPL (i, name, strcasecmp_l,
 	      X86_IFUNC_IMPL_ADD_V4 (array, i, strcasecmp,
 				     (CPU_FEATURE_USABLE (AVX512VL)
-				      && CPU_FEATURE_USABLE (AVX512BW)),
+				      && CPU_FEATURE_USABLE (AVX512BW)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __strcasecmp_l_evex)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
-				     CPU_FEATURE_USABLE (AVX2),
+				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __strcasecmp_l_avx2)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
 				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)
 				      && CPU_FEATURE_USABLE (RTM)),
 				     __strcasecmp_l_avx2_rtm)
 	      X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp_l,
@@ -585,10 +591,12 @@  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 				      && CPU_FEATURE_USABLE (BMI2)),
 				     __strcmp_evex)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, strcmp,
-				     CPU_FEATURE_USABLE (AVX2),
+				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __strcmp_avx2)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, strcmp,
 				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)
 				      && CPU_FEATURE_USABLE (RTM)),
 				     __strcmp_avx2_rtm)
 	      X86_IFUNC_IMPL_ADD_V2 (array, i, strcmp,
@@ -638,13 +646,16 @@  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   IFUNC_IMPL (i, name, strncasecmp,
 	      X86_IFUNC_IMPL_ADD_V4 (array, i, strncasecmp,
 				     (CPU_FEATURE_USABLE (AVX512VL)
-				      && CPU_FEATURE_USABLE (AVX512BW)),
+				      && CPU_FEATURE_USABLE (AVX512BW)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __strncasecmp_evex)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
-				     CPU_FEATURE_USABLE (AVX2),
+				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __strncasecmp_avx2)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
 				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)
 				      && CPU_FEATURE_USABLE (RTM)),
 				     __strncasecmp_avx2_rtm)
 	      X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp,
@@ -660,13 +671,16 @@  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   IFUNC_IMPL (i, name, strncasecmp_l,
 	      X86_IFUNC_IMPL_ADD_V4 (array, i, strncasecmp,
 				     (CPU_FEATURE_USABLE (AVX512VL)
-				      && CPU_FEATURE_USABLE (AVX512BW)),
+				      & CPU_FEATURE_USABLE (AVX512BW)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __strncasecmp_l_evex)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
-				     CPU_FEATURE_USABLE (AVX2),
+				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __strncasecmp_l_avx2)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
 				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)
 				      && CPU_FEATURE_USABLE (RTM)),
 				     __strncasecmp_l_avx2_rtm)
 	      X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp_l,
@@ -796,10 +810,12 @@  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 				      && CPU_FEATURE_USABLE (BMI2)),
 				     __wcscmp_evex)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, wcscmp,
-				     CPU_FEATURE_USABLE (AVX2),
+				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __wcscmp_avx2)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, wcscmp,
 				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)
 				      && CPU_FEATURE_USABLE (RTM)),
 				     __wcscmp_avx2_rtm)
 	      /* ISA V2 wrapper for SSE2 implementation because the SSE2
@@ -816,10 +832,12 @@  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 				      && CPU_FEATURE_USABLE (BMI2)),
 				     __wcsncmp_evex)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp,
-				     CPU_FEATURE_USABLE (AVX2),
+				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __wcsncmp_avx2)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp,
 				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)
 				      && CPU_FEATURE_USABLE (RTM)),
 				     __wcsncmp_avx2_rtm)
 	      /* ISA V2 wrapper for GENERIC implementation because the
@@ -1162,13 +1180,16 @@  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   IFUNC_IMPL (i, name, strncmp,
 	      X86_IFUNC_IMPL_ADD_V4 (array, i, strncmp,
 				     (CPU_FEATURE_USABLE (AVX512VL)
-				      && CPU_FEATURE_USABLE (AVX512BW)),
+				      && CPU_FEATURE_USABLE (AVX512BW)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __strncmp_evex)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, strncmp,
-				     CPU_FEATURE_USABLE (AVX2),
+				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __strncmp_avx2)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, strncmp,
 				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)
 				      && CPU_FEATURE_USABLE (RTM)),
 				     __strncmp_avx2_rtm)
 	      X86_IFUNC_IMPL_ADD_V2 (array, i, strncmp,
diff --git a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
index 68646ef199..7622af259c 100644
--- a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
+++ b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
@@ -34,6 +34,7 @@  IFUNC_SELECTOR (void)
   const struct cpu_features *cpu_features = __get_cpu_features ();
 
   if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
       && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
 				      AVX_Fast_Unaligned_Load, ))
     {
diff --git a/sysdeps/x86_64/multiarch/strcmp.c b/sysdeps/x86_64/multiarch/strcmp.c
index fdd5afe3af..9d6c9f66ba 100644
--- a/sysdeps/x86_64/multiarch/strcmp.c
+++ b/sysdeps/x86_64/multiarch/strcmp.c
@@ -45,12 +45,12 @@  IFUNC_SELECTOR (void)
   const struct cpu_features *cpu_features = __get_cpu_features ();
 
   if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
       && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
 				      AVX_Fast_Unaligned_Load, ))
     {
       if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
-	  && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
-	  && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
+	  && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
 	return OPTIMIZE (evex);
 
       if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
diff --git a/sysdeps/x86_64/multiarch/strncmp.c b/sysdeps/x86_64/multiarch/strncmp.c
index 4ebe4bde30..c4f8b6bbb5 100644
--- a/sysdeps/x86_64/multiarch/strncmp.c
+++ b/sysdeps/x86_64/multiarch/strncmp.c
@@ -41,12 +41,12 @@  IFUNC_SELECTOR (void)
   const struct cpu_features *cpu_features = __get_cpu_features ();
 
   if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
       && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
 				      AVX_Fast_Unaligned_Load, ))
     {
       if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
-	  && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
-	  && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
+	  && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
 	return OPTIMIZE (evex);
 
       if (CPU_FEATURE_USABLE_P (cpu_features, RTM))