Message ID | 20240821053953.1727019-1-hongtao.liu@intel.com |
---|---|
State | New |
Headers | show |
Series | Align ix86_{move_max,store_max} with vectorizer. | expand |
On Wed, Aug 21, 2024 at 7:40 AM liuhongt <hongtao.liu@intel.com> wrote: > > When none of mprefer-vector-width, avx256_optimal/avx128_optimal, > avx256_store_by_pieces/avx512_store_by_pieces is specified, GCC will > set ix86_{move_max,store_max} as max available vector length except > for AVX part. > > if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) > && TARGET_EVEX512_P (opts->x_ix86_isa_flags2)) > opts->x_ix86_move_max = PVW_AVX512; > else > opts->x_ix86_move_max = PVW_AVX128; > > So for -mavx2, vectorizer will choose 256-bit for vectorization, but > 128-bit is used for struct copy, there could be a potential STLF issue > due to this "misalign". > > The patch fixes that and improved 538.imagick_r by ~30% for -march=x86-64-v3 -O2. > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. > Any comments? Should we look at the avx128_optimal tune and/or avx256_split_regs and avx256_optimal also for 512? Because IIRC the vectorizers default looks at that as well (OTOH larger stores should be fine for STLF). > gcc/ChangeLog: > > * config/i386/i386-options.cc (ix86_option_override_internal): > set ix86_{move_max,store_max} to PVW_AVX256 when TARGET_AVX > instead of PVW_AVX128. > > gcc/testsuite/ChangeLog: > * gcc.target/i386/pieces-memcpy-10.c: Add -mprefer-vector-width=128. > * gcc.target/i386/pieces-memcpy-6.c: Ditto. > * gcc.target/i386/pieces-memset-38.c: Ditto. > * gcc.target/i386/pieces-memset-40.c: Ditto. > * gcc.target/i386/pieces-memset-41.c: Ditto. > * gcc.target/i386/pieces-memset-42.c: Ditto. > * gcc.target/i386/pieces-memset-43.c: Ditto. > * gcc.target/i386/pieces-strcpy-2.c: Ditto. > * gcc.target/i386/pieces-memcpy-22.c: New test. > * gcc.target/i386/pieces-memset-51.c: New test. > * gcc.target/i386/pieces-strcpy-3.c: New test. > --- > gcc/config/i386/i386-options.cc | 6 ++++++ > gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c | 2 +- > gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c | 12 ++++++++++++ > gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c | 2 +- > gcc/testsuite/gcc.target/i386/pieces-memset-38.c | 2 +- > gcc/testsuite/gcc.target/i386/pieces-memset-40.c | 2 +- > gcc/testsuite/gcc.target/i386/pieces-memset-41.c | 2 +- > gcc/testsuite/gcc.target/i386/pieces-memset-42.c | 2 +- > gcc/testsuite/gcc.target/i386/pieces-memset-43.c | 2 +- > gcc/testsuite/gcc.target/i386/pieces-memset-51.c | 12 ++++++++++++ > gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c | 2 +- > gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c | 15 +++++++++++++++ > 12 files changed, 53 insertions(+), 8 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c > create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-51.c > create mode 100644 gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c > > diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc > index f423455b363..f79257cc764 100644 > --- a/gcc/config/i386/i386-options.cc > +++ b/gcc/config/i386/i386-options.cc > @@ -3023,6 +3023,9 @@ ix86_option_override_internal (bool main_args_p, > if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) > && TARGET_EVEX512_P (opts->x_ix86_isa_flags2)) > opts->x_ix86_move_max = PVW_AVX512; > + /* Align with vectorizer to avoid potential STLF issue. */ > + else if (TARGET_AVX_P (opts->x_ix86_isa_flags)) > + opts->x_ix86_move_max = PVW_AVX256; > else > opts->x_ix86_move_max = PVW_AVX128; > } > @@ -3047,6 +3050,9 @@ ix86_option_override_internal (bool main_args_p, > if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) > && TARGET_EVEX512_P (opts->x_ix86_isa_flags2)) > opts->x_ix86_store_max = PVW_AVX512; > + /* Align with vectorizer to avoid potential STLF issue. */ > + else if (TARGET_AVX_P (opts->x_ix86_isa_flags)) > + opts->x_ix86_store_max = PVW_AVX256; > else > opts->x_ix86_store_max = PVW_AVX128; > } > diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c > index 5faee21f9b9..53ad0b3be44 100644 > --- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c > +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ > +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ > > extern char *dst, *src; > > diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c > new file mode 100644 > index 00000000000..605b3623ffc > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */ > + > +extern char *dst, *src; > + > +void > +foo (void) > +{ > + __builtin_memcpy (dst, src, 33); > +} > + > +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c > index 5f99cc98c47..cfd2a86cf33 100644 > --- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c > +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c > @@ -1,5 +1,5 @@ > /* { dg-do compile { target { ! ia32 } } } */ > -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ > +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ > > extern char *dst, *src; > > diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c > index ed4a24a54fd..ddd194debd5 100644 > --- a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c > +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */ > +/* { dg-options "-O2 -mno-avx512f -mavx2 -mprefer-vector-width=128 -mtune=sandybridge" } */ > > extern char *dst; > > diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c > index 86358c99a83..5878876550c 100644 > --- a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c > +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */ > +/* { dg-options "-O2 -mno-avx512f -mavx2 -mprefer-vector-width=128 -mtune=sandybridge" } */ > /* Cope with --enable-frame-pointer, Solaris/x86 -mstackrealign default. */ > /* { dg-additional-options "-fomit-frame-pointer -mno-stackrealign" } */ > > diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c > index d7a27f52983..27a6c8ad139 100644 > --- a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c > +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge -mno-stackrealign" } */ > +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge -mno-stackrealign" } */ > /* Cope with --enable-frame-pointer. */ > /* { dg-additional-options "-fomit-frame-pointer" } */ > > diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c > index df0c122aae7..103da699ae5 100644 > --- a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c > +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ > +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ > > extern char *dst; > > diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c > index 2f2179c2df9..f1494e17610 100644 > --- a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c > +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ > +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ > > extern char *dst; > > diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-51.c b/gcc/testsuite/gcc.target/i386/pieces-memset-51.c > new file mode 100644 > index 00000000000..192ec0d1647 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-51.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */ > + > +extern char *dst; > + > +void > +foo (int x) > +{ > + __builtin_memset (dst, x, 64); > +} > + > +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c > index 90446edb4f3..9bb94b7419b 100644 > --- a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c > +++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c > @@ -1,5 +1,5 @@ > /* { dg-do compile { target { ! ia32 } } } */ > -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ > +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ > > extern char *strcpy (char *, const char *); > > diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c > new file mode 100644 > index 00000000000..df7571b547f > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c > @@ -0,0 +1,15 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */ > + > +extern char *strcpy (char *, const char *); > + > +void > +foo (char *s) > +{ > + strcpy (s, > + "1234567890abcdef123456abcdef5678123456abcdef567abcdef678" > + "1234567"); > +} > + > +/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\[^\n\]*%ymm" 2 } } */ > +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */ > -- > 2.31.1 >
On Wed, Aug 21, 2024 at 4:49 PM Richard Biener <richard.guenther@gmail.com> wrote: > > On Wed, Aug 21, 2024 at 7:40 AM liuhongt <hongtao.liu@intel.com> wrote: > > > > When none of mprefer-vector-width, avx256_optimal/avx128_optimal, > > avx256_store_by_pieces/avx512_store_by_pieces is specified, GCC will > > set ix86_{move_max,store_max} as max available vector length except > > for AVX part. > > > > if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) > > && TARGET_EVEX512_P (opts->x_ix86_isa_flags2)) > > opts->x_ix86_move_max = PVW_AVX512; > > else > > opts->x_ix86_move_max = PVW_AVX128; > > > > So for -mavx2, vectorizer will choose 256-bit for vectorization, but > > 128-bit is used for struct copy, there could be a potential STLF issue > > due to this "misalign". > > > > The patch fixes that and improved 538.imagick_r by ~30% for -march=x86-64-v3 -O2. > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. > > Any comments? > > Should we look at the avx128_optimal tune and/or avx256_split_regs and > avx256_optimal > also for 512? Because IIRC the vectorizers default looks at that as > well (OTOH larger > stores should be fine for STLF). For Double Pumped processors, i.e. SRF, there's no STLF issue for 128-bit store and 256-bit load since the 256-bit load is teared down to 2 128-bit load. I guess it should be similar for Znver1/Znve4, so it should be fine with the mismatch between struct copy and vectorizer size. One exception is that we use 256-bit for vectorization and 512-bit for struct copy on SPR, it could be an issue when the struct copy is after the vectorization. But I didn't observe any cases yet, and for not-STLF-stall case, 512-bit copy should be better than 256-bit copy on SPR, So I'll leave it there.(There's a plan to enable 512-bit vectorization for SPR by default, it's ongoing). > > > gcc/ChangeLog: > > > > * config/i386/i386-options.cc (ix86_option_override_internal): > > set ix86_{move_max,store_max} to PVW_AVX256 when TARGET_AVX > > instead of PVW_AVX128. > > > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/pieces-memcpy-10.c: Add -mprefer-vector-width=128. > > * gcc.target/i386/pieces-memcpy-6.c: Ditto. > > * gcc.target/i386/pieces-memset-38.c: Ditto. > > * gcc.target/i386/pieces-memset-40.c: Ditto. > > * gcc.target/i386/pieces-memset-41.c: Ditto. > > * gcc.target/i386/pieces-memset-42.c: Ditto. > > * gcc.target/i386/pieces-memset-43.c: Ditto. > > * gcc.target/i386/pieces-strcpy-2.c: Ditto. > > * gcc.target/i386/pieces-memcpy-22.c: New test. > > * gcc.target/i386/pieces-memset-51.c: New test. > > * gcc.target/i386/pieces-strcpy-3.c: New test. > > --- > > gcc/config/i386/i386-options.cc | 6 ++++++ > > gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c | 2 +- > > gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c | 12 ++++++++++++ > > gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c | 2 +- > > gcc/testsuite/gcc.target/i386/pieces-memset-38.c | 2 +- > > gcc/testsuite/gcc.target/i386/pieces-memset-40.c | 2 +- > > gcc/testsuite/gcc.target/i386/pieces-memset-41.c | 2 +- > > gcc/testsuite/gcc.target/i386/pieces-memset-42.c | 2 +- > > gcc/testsuite/gcc.target/i386/pieces-memset-43.c | 2 +- > > gcc/testsuite/gcc.target/i386/pieces-memset-51.c | 12 ++++++++++++ > > gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c | 2 +- > > gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c | 15 +++++++++++++++ > > 12 files changed, 53 insertions(+), 8 deletions(-) > > create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c > > create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-51.c > > create mode 100644 gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c > > > > diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc > > index f423455b363..f79257cc764 100644 > > --- a/gcc/config/i386/i386-options.cc > > +++ b/gcc/config/i386/i386-options.cc > > @@ -3023,6 +3023,9 @@ ix86_option_override_internal (bool main_args_p, > > if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) > > && TARGET_EVEX512_P (opts->x_ix86_isa_flags2)) > > opts->x_ix86_move_max = PVW_AVX512; > > + /* Align with vectorizer to avoid potential STLF issue. */ > > + else if (TARGET_AVX_P (opts->x_ix86_isa_flags)) > > + opts->x_ix86_move_max = PVW_AVX256; > > else > > opts->x_ix86_move_max = PVW_AVX128; > > } > > @@ -3047,6 +3050,9 @@ ix86_option_override_internal (bool main_args_p, > > if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) > > && TARGET_EVEX512_P (opts->x_ix86_isa_flags2)) > > opts->x_ix86_store_max = PVW_AVX512; > > + /* Align with vectorizer to avoid potential STLF issue. */ > > + else if (TARGET_AVX_P (opts->x_ix86_isa_flags)) > > + opts->x_ix86_store_max = PVW_AVX256; > > else > > opts->x_ix86_store_max = PVW_AVX128; > > } > > diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c > > index 5faee21f9b9..53ad0b3be44 100644 > > --- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c > > +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c > > @@ -1,5 +1,5 @@ > > /* { dg-do compile } */ > > -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ > > +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ > > > > extern char *dst, *src; > > > > diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c > > new file mode 100644 > > index 00000000000..605b3623ffc > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c > > @@ -0,0 +1,12 @@ > > +/* { dg-do compile { target { ! ia32 } } } */ > > +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */ > > + > > +extern char *dst, *src; > > + > > +void > > +foo (void) > > +{ > > + __builtin_memcpy (dst, src, 33); > > +} > > + > > +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */ > > diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c > > index 5f99cc98c47..cfd2a86cf33 100644 > > --- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c > > +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c > > @@ -1,5 +1,5 @@ > > /* { dg-do compile { target { ! ia32 } } } */ > > -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ > > +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ > > > > extern char *dst, *src; > > > > diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c > > index ed4a24a54fd..ddd194debd5 100644 > > --- a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c > > +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c > > @@ -1,5 +1,5 @@ > > /* { dg-do compile } */ > > -/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */ > > +/* { dg-options "-O2 -mno-avx512f -mavx2 -mprefer-vector-width=128 -mtune=sandybridge" } */ > > > > extern char *dst; > > > > diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c > > index 86358c99a83..5878876550c 100644 > > --- a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c > > +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c > > @@ -1,5 +1,5 @@ > > /* { dg-do compile } */ > > -/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */ > > +/* { dg-options "-O2 -mno-avx512f -mavx2 -mprefer-vector-width=128 -mtune=sandybridge" } */ > > /* Cope with --enable-frame-pointer, Solaris/x86 -mstackrealign default. */ > > /* { dg-additional-options "-fomit-frame-pointer -mno-stackrealign" } */ > > > > diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c > > index d7a27f52983..27a6c8ad139 100644 > > --- a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c > > +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c > > @@ -1,5 +1,5 @@ > > /* { dg-do compile } */ > > -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge -mno-stackrealign" } */ > > +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge -mno-stackrealign" } */ > > /* Cope with --enable-frame-pointer. */ > > /* { dg-additional-options "-fomit-frame-pointer" } */ > > > > diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c > > index df0c122aae7..103da699ae5 100644 > > --- a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c > > +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c > > @@ -1,5 +1,5 @@ > > /* { dg-do compile } */ > > -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ > > +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ > > > > extern char *dst; > > > > diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c > > index 2f2179c2df9..f1494e17610 100644 > > --- a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c > > +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c > > @@ -1,5 +1,5 @@ > > /* { dg-do compile } */ > > -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ > > +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ > > > > extern char *dst; > > > > diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-51.c b/gcc/testsuite/gcc.target/i386/pieces-memset-51.c > > new file mode 100644 > > index 00000000000..192ec0d1647 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-51.c > > @@ -0,0 +1,12 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */ > > + > > +extern char *dst; > > + > > +void > > +foo (int x) > > +{ > > + __builtin_memset (dst, x, 64); > > +} > > + > > +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */ > > diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c > > index 90446edb4f3..9bb94b7419b 100644 > > --- a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c > > +++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c > > @@ -1,5 +1,5 @@ > > /* { dg-do compile { target { ! ia32 } } } */ > > -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ > > +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ > > > > extern char *strcpy (char *, const char *); > > > > diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c > > new file mode 100644 > > index 00000000000..df7571b547f > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c > > @@ -0,0 +1,15 @@ > > +/* { dg-do compile { target { ! ia32 } } } */ > > +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */ > > + > > +extern char *strcpy (char *, const char *); > > + > > +void > > +foo (char *s) > > +{ > > + strcpy (s, > > + "1234567890abcdef123456abcdef5678123456abcdef567abcdef678" > > + "1234567"); > > +} > > + > > +/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\[^\n\]*%ymm" 2 } } */ > > +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */ > > -- > > 2.31.1 > >
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index f423455b363..f79257cc764 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -3023,6 +3023,9 @@ ix86_option_override_internal (bool main_args_p, if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) && TARGET_EVEX512_P (opts->x_ix86_isa_flags2)) opts->x_ix86_move_max = PVW_AVX512; + /* Align with vectorizer to avoid potential STLF issue. */ + else if (TARGET_AVX_P (opts->x_ix86_isa_flags)) + opts->x_ix86_move_max = PVW_AVX256; else opts->x_ix86_move_max = PVW_AVX128; } @@ -3047,6 +3050,9 @@ ix86_option_override_internal (bool main_args_p, if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) && TARGET_EVEX512_P (opts->x_ix86_isa_flags2)) opts->x_ix86_store_max = PVW_AVX512; + /* Align with vectorizer to avoid potential STLF issue. */ + else if (TARGET_AVX_P (opts->x_ix86_isa_flags)) + opts->x_ix86_store_max = PVW_AVX256; else opts->x_ix86_store_max = PVW_AVX128; } diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c index 5faee21f9b9..53ad0b3be44 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ extern char *dst, *src; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c new file mode 100644 index 00000000000..605b3623ffc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c @@ -0,0 +1,12 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */ + +extern char *dst, *src; + +void +foo (void) +{ + __builtin_memcpy (dst, src, 33); +} + +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c index 5f99cc98c47..cfd2a86cf33 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c @@ -1,5 +1,5 @@ /* { dg-do compile { target { ! ia32 } } } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ extern char *dst, *src; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c index ed4a24a54fd..ddd194debd5 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */ +/* { dg-options "-O2 -mno-avx512f -mavx2 -mprefer-vector-width=128 -mtune=sandybridge" } */ extern char *dst; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c index 86358c99a83..5878876550c 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */ +/* { dg-options "-O2 -mno-avx512f -mavx2 -mprefer-vector-width=128 -mtune=sandybridge" } */ /* Cope with --enable-frame-pointer, Solaris/x86 -mstackrealign default. */ /* { dg-additional-options "-fomit-frame-pointer -mno-stackrealign" } */ diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c index d7a27f52983..27a6c8ad139 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge -mno-stackrealign" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge -mno-stackrealign" } */ /* Cope with --enable-frame-pointer. */ /* { dg-additional-options "-fomit-frame-pointer" } */ diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c index df0c122aae7..103da699ae5 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ extern char *dst; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c index 2f2179c2df9..f1494e17610 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ extern char *dst; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-51.c b/gcc/testsuite/gcc.target/i386/pieces-memset-51.c new file mode 100644 index 00000000000..192ec0d1647 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-51.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */ + +extern char *dst; + +void +foo (int x) +{ + __builtin_memset (dst, x, 64); +} + +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c index 90446edb4f3..9bb94b7419b 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c +++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c @@ -1,5 +1,5 @@ /* { dg-do compile { target { ! ia32 } } } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ extern char *strcpy (char *, const char *); diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c new file mode 100644 index 00000000000..df7571b547f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c @@ -0,0 +1,15 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */ + +extern char *strcpy (char *, const char *); + +void +foo (char *s) +{ + strcpy (s, + "1234567890abcdef123456abcdef5678123456abcdef567abcdef678" + "1234567"); +} + +/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\[^\n\]*%ymm" 2 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */