diff mbox series

Align ix86_{move_max,store_max} with vectorizer.

Message ID 20240821053953.1727019-1-hongtao.liu@intel.com
State New
Headers show
Series Align ix86_{move_max,store_max} with vectorizer. | expand

Commit Message

liuhongt Aug. 21, 2024, 5:39 a.m. UTC
When none of mprefer-vector-width, avx256_optimal/avx128_optimal,
avx256_store_by_pieces/avx512_store_by_pieces is specified, GCC will
set ix86_{move_max,store_max} as max available vector length except
for AVX part.

	      if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
		  && TARGET_EVEX512_P (opts->x_ix86_isa_flags2))
		opts->x_ix86_move_max = PVW_AVX512;
	      else
		opts->x_ix86_move_max = PVW_AVX128;

So for -mavx2, vectorizer will choose 256-bit for vectorization, but
128-bit is used for struct copy, there could be a potential STLF issue
due to this "misalign".

The patch fixes that and improved 538.imagick_r by ~30% for -march=x86-64-v3 -O2.
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Any comments?

gcc/ChangeLog:

	* config/i386/i386-options.cc (ix86_option_override_internal):
	set ix86_{move_max,store_max} to PVW_AVX256 when TARGET_AVX
	instead of PVW_AVX128.

gcc/testsuite/ChangeLog:
	* gcc.target/i386/pieces-memcpy-10.c: Add -mprefer-vector-width=128.
	* gcc.target/i386/pieces-memcpy-6.c: Ditto.
	* gcc.target/i386/pieces-memset-38.c: Ditto.
	* gcc.target/i386/pieces-memset-40.c: Ditto.
	* gcc.target/i386/pieces-memset-41.c: Ditto.
	* gcc.target/i386/pieces-memset-42.c: Ditto.
	* gcc.target/i386/pieces-memset-43.c: Ditto.
	* gcc.target/i386/pieces-strcpy-2.c: Ditto.
	* gcc.target/i386/pieces-memcpy-22.c: New test.
	* gcc.target/i386/pieces-memset-51.c: New test.
	* gcc.target/i386/pieces-strcpy-3.c: New test.
---
 gcc/config/i386/i386-options.cc                  |  6 ++++++
 gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c |  2 +-
 gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c | 12 ++++++++++++
 gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c  |  2 +-
 gcc/testsuite/gcc.target/i386/pieces-memset-38.c |  2 +-
 gcc/testsuite/gcc.target/i386/pieces-memset-40.c |  2 +-
 gcc/testsuite/gcc.target/i386/pieces-memset-41.c |  2 +-
 gcc/testsuite/gcc.target/i386/pieces-memset-42.c |  2 +-
 gcc/testsuite/gcc.target/i386/pieces-memset-43.c |  2 +-
 gcc/testsuite/gcc.target/i386/pieces-memset-51.c | 12 ++++++++++++
 gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c  |  2 +-
 gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c  | 15 +++++++++++++++
 12 files changed, 53 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-51.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c

Comments

Richard Biener Aug. 21, 2024, 8:49 a.m. UTC | #1
On Wed, Aug 21, 2024 at 7:40 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> When none of mprefer-vector-width, avx256_optimal/avx128_optimal,
> avx256_store_by_pieces/avx512_store_by_pieces is specified, GCC will
> set ix86_{move_max,store_max} as max available vector length except
> for AVX part.
>
>               if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
>                   && TARGET_EVEX512_P (opts->x_ix86_isa_flags2))
>                 opts->x_ix86_move_max = PVW_AVX512;
>               else
>                 opts->x_ix86_move_max = PVW_AVX128;
>
> So for -mavx2, vectorizer will choose 256-bit for vectorization, but
> 128-bit is used for struct copy, there could be a potential STLF issue
> due to this "misalign".
>
> The patch fixes that and improved 538.imagick_r by ~30% for -march=x86-64-v3 -O2.
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Any comments?

Should we look at the avx128_optimal tune and/or avx256_split_regs and
avx256_optimal
also for 512?  Because IIRC the vectorizers default looks at that as
well (OTOH larger
stores should be fine for STLF).

> gcc/ChangeLog:
>
>         * config/i386/i386-options.cc (ix86_option_override_internal):
>         set ix86_{move_max,store_max} to PVW_AVX256 when TARGET_AVX
>         instead of PVW_AVX128.
>
> gcc/testsuite/ChangeLog:
>         * gcc.target/i386/pieces-memcpy-10.c: Add -mprefer-vector-width=128.
>         * gcc.target/i386/pieces-memcpy-6.c: Ditto.
>         * gcc.target/i386/pieces-memset-38.c: Ditto.
>         * gcc.target/i386/pieces-memset-40.c: Ditto.
>         * gcc.target/i386/pieces-memset-41.c: Ditto.
>         * gcc.target/i386/pieces-memset-42.c: Ditto.
>         * gcc.target/i386/pieces-memset-43.c: Ditto.
>         * gcc.target/i386/pieces-strcpy-2.c: Ditto.
>         * gcc.target/i386/pieces-memcpy-22.c: New test.
>         * gcc.target/i386/pieces-memset-51.c: New test.
>         * gcc.target/i386/pieces-strcpy-3.c: New test.
> ---
>  gcc/config/i386/i386-options.cc                  |  6 ++++++
>  gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c |  2 +-
>  gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c | 12 ++++++++++++
>  gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c  |  2 +-
>  gcc/testsuite/gcc.target/i386/pieces-memset-38.c |  2 +-
>  gcc/testsuite/gcc.target/i386/pieces-memset-40.c |  2 +-
>  gcc/testsuite/gcc.target/i386/pieces-memset-41.c |  2 +-
>  gcc/testsuite/gcc.target/i386/pieces-memset-42.c |  2 +-
>  gcc/testsuite/gcc.target/i386/pieces-memset-43.c |  2 +-
>  gcc/testsuite/gcc.target/i386/pieces-memset-51.c | 12 ++++++++++++
>  gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c  |  2 +-
>  gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c  | 15 +++++++++++++++
>  12 files changed, 53 insertions(+), 8 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-51.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c
>
> diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
> index f423455b363..f79257cc764 100644
> --- a/gcc/config/i386/i386-options.cc
> +++ b/gcc/config/i386/i386-options.cc
> @@ -3023,6 +3023,9 @@ ix86_option_override_internal (bool main_args_p,
>               if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
>                   && TARGET_EVEX512_P (opts->x_ix86_isa_flags2))
>                 opts->x_ix86_move_max = PVW_AVX512;
> +             /* Align with vectorizer to avoid potential STLF issue.  */
> +             else if (TARGET_AVX_P (opts->x_ix86_isa_flags))
> +               opts->x_ix86_move_max = PVW_AVX256;
>               else
>                 opts->x_ix86_move_max = PVW_AVX128;
>             }
> @@ -3047,6 +3050,9 @@ ix86_option_override_internal (bool main_args_p,
>               if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
>                   && TARGET_EVEX512_P (opts->x_ix86_isa_flags2))
>                 opts->x_ix86_store_max = PVW_AVX512;
> +             /* Align with vectorizer to avoid potential STLF issue.  */
> +             else if (TARGET_AVX_P (opts->x_ix86_isa_flags))
> +               opts->x_ix86_store_max = PVW_AVX256;
>               else
>                 opts->x_ix86_store_max = PVW_AVX128;
>             }
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c
> index 5faee21f9b9..53ad0b3be44 100644
> --- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */
>
>  extern char *dst, *src;
>
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c
> new file mode 100644
> index 00000000000..605b3623ffc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
> +
> +extern char *dst, *src;
> +
> +void
> +foo (void)
> +{
> +  __builtin_memcpy (dst, src, 33);
> +}
> +
> +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c
> index 5f99cc98c47..cfd2a86cf33 100644
> --- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile { target { ! ia32 } } } */
> -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */
>
>  extern char *dst, *src;
>
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c
> index ed4a24a54fd..ddd194debd5 100644
> --- a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */
> +/* { dg-options "-O2 -mno-avx512f -mavx2 -mprefer-vector-width=128 -mtune=sandybridge" } */
>
>  extern char *dst;
>
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c
> index 86358c99a83..5878876550c 100644
> --- a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */
> +/* { dg-options "-O2 -mno-avx512f -mavx2 -mprefer-vector-width=128 -mtune=sandybridge" } */
>  /* Cope with --enable-frame-pointer, Solaris/x86 -mstackrealign default.  */
>  /* { dg-additional-options "-fomit-frame-pointer -mno-stackrealign" } */
>
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c
> index d7a27f52983..27a6c8ad139 100644
> --- a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge -mno-stackrealign" } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge -mno-stackrealign" } */
>  /* Cope with --enable-frame-pointer.  */
>  /* { dg-additional-options "-fomit-frame-pointer" } */
>
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c
> index df0c122aae7..103da699ae5 100644
> --- a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */
>
>  extern char *dst;
>
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c
> index 2f2179c2df9..f1494e17610 100644
> --- a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */
>
>  extern char *dst;
>
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-51.c b/gcc/testsuite/gcc.target/i386/pieces-memset-51.c
> new file mode 100644
> index 00000000000..192ec0d1647
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-51.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
> +
> +extern char *dst;
> +
> +void
> +foo (int x)
> +{
> +  __builtin_memset (dst, x, 64);
> +}
> +
> +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c
> index 90446edb4f3..9bb94b7419b 100644
> --- a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c
> +++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile { target { ! ia32 } } } */
> -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */
>
>  extern char *strcpy (char *, const char *);
>
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c
> new file mode 100644
> index 00000000000..df7571b547f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
> +
> +extern char *strcpy (char *, const char *);
> +
> +void
> +foo (char *s)
> +{
> +  strcpy (s,
> +         "1234567890abcdef123456abcdef5678123456abcdef567abcdef678"
> +         "1234567");
> +}
> +
> +/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\[^\n\]*%ymm" 2 } } */
> +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
> --
> 2.31.1
>
Hongtao Liu Aug. 21, 2024, 9:24 a.m. UTC | #2
On Wed, Aug 21, 2024 at 4:49 PM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On Wed, Aug 21, 2024 at 7:40 AM liuhongt <hongtao.liu@intel.com> wrote:
> >
> > When none of mprefer-vector-width, avx256_optimal/avx128_optimal,
> > avx256_store_by_pieces/avx512_store_by_pieces is specified, GCC will
> > set ix86_{move_max,store_max} as max available vector length except
> > for AVX part.
> >
> >               if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
> >                   && TARGET_EVEX512_P (opts->x_ix86_isa_flags2))
> >                 opts->x_ix86_move_max = PVW_AVX512;
> >               else
> >                 opts->x_ix86_move_max = PVW_AVX128;
> >
> > So for -mavx2, vectorizer will choose 256-bit for vectorization, but
> > 128-bit is used for struct copy, there could be a potential STLF issue
> > due to this "misalign".
> >
> > The patch fixes that and improved 538.imagick_r by ~30% for -march=x86-64-v3 -O2.
> > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> > Any comments?
>
> Should we look at the avx128_optimal tune and/or avx256_split_regs and
> avx256_optimal
> also for 512?  Because IIRC the vectorizers default looks at that as
> well (OTOH larger
> stores should be fine for STLF).
For Double Pumped processors, i.e. SRF, there's no STLF issue for
128-bit store and 256-bit load since the 256-bit load is teared down
to 2 128-bit load.
I guess it should be similar for Znver1/Znve4, so it should be fine
with the mismatch between struct copy and vectorizer size.
One exception is that we use 256-bit for vectorization and 512-bit for
struct copy on SPR, it could be an issue when the struct copy is after
the vectorization.
But I didn't observe any cases yet, and for not-STLF-stall case,
512-bit copy should be better than 256-bit copy on SPR, So I'll leave
it there.(There's a plan to enable 512-bit vectorization for SPR by
default, it's ongoing).
>
> > gcc/ChangeLog:
> >
> >         * config/i386/i386-options.cc (ix86_option_override_internal):
> >         set ix86_{move_max,store_max} to PVW_AVX256 when TARGET_AVX
> >         instead of PVW_AVX128.
> >
> > gcc/testsuite/ChangeLog:
> >         * gcc.target/i386/pieces-memcpy-10.c: Add -mprefer-vector-width=128.
> >         * gcc.target/i386/pieces-memcpy-6.c: Ditto.
> >         * gcc.target/i386/pieces-memset-38.c: Ditto.
> >         * gcc.target/i386/pieces-memset-40.c: Ditto.
> >         * gcc.target/i386/pieces-memset-41.c: Ditto.
> >         * gcc.target/i386/pieces-memset-42.c: Ditto.
> >         * gcc.target/i386/pieces-memset-43.c: Ditto.
> >         * gcc.target/i386/pieces-strcpy-2.c: Ditto.
> >         * gcc.target/i386/pieces-memcpy-22.c: New test.
> >         * gcc.target/i386/pieces-memset-51.c: New test.
> >         * gcc.target/i386/pieces-strcpy-3.c: New test.
> > ---
> >  gcc/config/i386/i386-options.cc                  |  6 ++++++
> >  gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c |  2 +-
> >  gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c | 12 ++++++++++++
> >  gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c  |  2 +-
> >  gcc/testsuite/gcc.target/i386/pieces-memset-38.c |  2 +-
> >  gcc/testsuite/gcc.target/i386/pieces-memset-40.c |  2 +-
> >  gcc/testsuite/gcc.target/i386/pieces-memset-41.c |  2 +-
> >  gcc/testsuite/gcc.target/i386/pieces-memset-42.c |  2 +-
> >  gcc/testsuite/gcc.target/i386/pieces-memset-43.c |  2 +-
> >  gcc/testsuite/gcc.target/i386/pieces-memset-51.c | 12 ++++++++++++
> >  gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c  |  2 +-
> >  gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c  | 15 +++++++++++++++
> >  12 files changed, 53 insertions(+), 8 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-51.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c
> >
> > diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
> > index f423455b363..f79257cc764 100644
> > --- a/gcc/config/i386/i386-options.cc
> > +++ b/gcc/config/i386/i386-options.cc
> > @@ -3023,6 +3023,9 @@ ix86_option_override_internal (bool main_args_p,
> >               if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
> >                   && TARGET_EVEX512_P (opts->x_ix86_isa_flags2))
> >                 opts->x_ix86_move_max = PVW_AVX512;
> > +             /* Align with vectorizer to avoid potential STLF issue.  */
> > +             else if (TARGET_AVX_P (opts->x_ix86_isa_flags))
> > +               opts->x_ix86_move_max = PVW_AVX256;
> >               else
> >                 opts->x_ix86_move_max = PVW_AVX128;
> >             }
> > @@ -3047,6 +3050,9 @@ ix86_option_override_internal (bool main_args_p,
> >               if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
> >                   && TARGET_EVEX512_P (opts->x_ix86_isa_flags2))
> >                 opts->x_ix86_store_max = PVW_AVX512;
> > +             /* Align with vectorizer to avoid potential STLF issue.  */
> > +             else if (TARGET_AVX_P (opts->x_ix86_isa_flags))
> > +               opts->x_ix86_store_max = PVW_AVX256;
> >               else
> >                 opts->x_ix86_store_max = PVW_AVX128;
> >             }
> > diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c
> > index 5faee21f9b9..53ad0b3be44 100644
> > --- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c
> > +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c
> > @@ -1,5 +1,5 @@
> >  /* { dg-do compile } */
> > -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
> > +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */
> >
> >  extern char *dst, *src;
> >
> > diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c
> > new file mode 100644
> > index 00000000000..605b3623ffc
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c
> > @@ -0,0 +1,12 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
> > +
> > +extern char *dst, *src;
> > +
> > +void
> > +foo (void)
> > +{
> > +  __builtin_memcpy (dst, src, 33);
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c
> > index 5f99cc98c47..cfd2a86cf33 100644
> > --- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c
> > +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c
> > @@ -1,5 +1,5 @@
> >  /* { dg-do compile { target { ! ia32 } } } */
> > -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
> > +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */
> >
> >  extern char *dst, *src;
> >
> > diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c
> > index ed4a24a54fd..ddd194debd5 100644
> > --- a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c
> > +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c
> > @@ -1,5 +1,5 @@
> >  /* { dg-do compile } */
> > -/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */
> > +/* { dg-options "-O2 -mno-avx512f -mavx2 -mprefer-vector-width=128 -mtune=sandybridge" } */
> >
> >  extern char *dst;
> >
> > diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c
> > index 86358c99a83..5878876550c 100644
> > --- a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c
> > +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c
> > @@ -1,5 +1,5 @@
> >  /* { dg-do compile } */
> > -/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */
> > +/* { dg-options "-O2 -mno-avx512f -mavx2 -mprefer-vector-width=128 -mtune=sandybridge" } */
> >  /* Cope with --enable-frame-pointer, Solaris/x86 -mstackrealign default.  */
> >  /* { dg-additional-options "-fomit-frame-pointer -mno-stackrealign" } */
> >
> > diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c
> > index d7a27f52983..27a6c8ad139 100644
> > --- a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c
> > +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c
> > @@ -1,5 +1,5 @@
> >  /* { dg-do compile } */
> > -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge -mno-stackrealign" } */
> > +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge -mno-stackrealign" } */
> >  /* Cope with --enable-frame-pointer.  */
> >  /* { dg-additional-options "-fomit-frame-pointer" } */
> >
> > diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c
> > index df0c122aae7..103da699ae5 100644
> > --- a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c
> > +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c
> > @@ -1,5 +1,5 @@
> >  /* { dg-do compile } */
> > -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
> > +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */
> >
> >  extern char *dst;
> >
> > diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c
> > index 2f2179c2df9..f1494e17610 100644
> > --- a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c
> > +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c
> > @@ -1,5 +1,5 @@
> >  /* { dg-do compile } */
> > -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
> > +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */
> >
> >  extern char *dst;
> >
> > diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-51.c b/gcc/testsuite/gcc.target/i386/pieces-memset-51.c
> > new file mode 100644
> > index 00000000000..192ec0d1647
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-51.c
> > @@ -0,0 +1,12 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
> > +
> > +extern char *dst;
> > +
> > +void
> > +foo (int x)
> > +{
> > +  __builtin_memset (dst, x, 64);
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c
> > index 90446edb4f3..9bb94b7419b 100644
> > --- a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c
> > +++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c
> > @@ -1,5 +1,5 @@
> >  /* { dg-do compile { target { ! ia32 } } } */
> > -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
> > +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */
> >
> >  extern char *strcpy (char *, const char *);
> >
> > diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c
> > new file mode 100644
> > index 00000000000..df7571b547f
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c
> > @@ -0,0 +1,15 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
> > +
> > +extern char *strcpy (char *, const char *);
> > +
> > +void
> > +foo (char *s)
> > +{
> > +  strcpy (s,
> > +         "1234567890abcdef123456abcdef5678123456abcdef567abcdef678"
> > +         "1234567");
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\[^\n\]*%ymm" 2 } } */
> > +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
> > --
> > 2.31.1
> >
diff mbox series

Patch

diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index f423455b363..f79257cc764 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -3023,6 +3023,9 @@  ix86_option_override_internal (bool main_args_p,
 	      if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
 		  && TARGET_EVEX512_P (opts->x_ix86_isa_flags2))
 		opts->x_ix86_move_max = PVW_AVX512;
+	      /* Align with vectorizer to avoid potential STLF issue.  */
+	      else if (TARGET_AVX_P (opts->x_ix86_isa_flags))
+		opts->x_ix86_move_max = PVW_AVX256;
 	      else
 		opts->x_ix86_move_max = PVW_AVX128;
 	    }
@@ -3047,6 +3050,9 @@  ix86_option_override_internal (bool main_args_p,
 	      if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
 		  && TARGET_EVEX512_P (opts->x_ix86_isa_flags2))
 		opts->x_ix86_store_max = PVW_AVX512;
+	      /* Align with vectorizer to avoid potential STLF issue.  */
+	      else if (TARGET_AVX_P (opts->x_ix86_isa_flags))
+		opts->x_ix86_store_max = PVW_AVX256;
 	      else
 		opts->x_ix86_store_max = PVW_AVX128;
 	    }
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c
index 5faee21f9b9..53ad0b3be44 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */
 
 extern char *dst, *src;
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c
new file mode 100644
index 00000000000..605b3623ffc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+  __builtin_memcpy (dst, src, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c
index 5f99cc98c47..cfd2a86cf33 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */
 
 extern char *dst, *src;
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c
index ed4a24a54fd..ddd194debd5 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mprefer-vector-width=128 -mtune=sandybridge" } */
 
 extern char *dst;
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c
index 86358c99a83..5878876550c 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mprefer-vector-width=128 -mtune=sandybridge" } */
 /* Cope with --enable-frame-pointer, Solaris/x86 -mstackrealign default.  */
 /* { dg-additional-options "-fomit-frame-pointer -mno-stackrealign" } */
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c
index d7a27f52983..27a6c8ad139 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge -mno-stackrealign" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge -mno-stackrealign" } */
 /* Cope with --enable-frame-pointer.  */
 /* { dg-additional-options "-fomit-frame-pointer" } */
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c
index df0c122aae7..103da699ae5 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */
 
 extern char *dst;
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c
index 2f2179c2df9..f1494e17610 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */
 
 extern char *dst;
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-51.c b/gcc/testsuite/gcc.target/i386/pieces-memset-51.c
new file mode 100644
index 00000000000..192ec0d1647
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-51.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+  __builtin_memset (dst, x, 64);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c
index 90446edb4f3..9bb94b7419b 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */
 
 extern char *strcpy (char *, const char *);
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c
new file mode 100644
index 00000000000..df7571b547f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c
@@ -0,0 +1,15 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
+
+extern char *strcpy (char *, const char *);
+
+void
+foo (char *s)
+{
+  strcpy (s,
+	  "1234567890abcdef123456abcdef5678123456abcdef567abcdef678"
+	  "1234567");
+}
+
+/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\[^\n\]*%ymm" 2 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */