Message ID | 20240523063742.2333446-3-lin1.hu@intel.com |
---|---|
State | New |
Headers | show |
Series | Optimize __builtin_convertvector for x86-64-v4 and | expand |
On Thu, May 23, 2024 at 2:38 PM Hu, Lin1 <lin1.hu@intel.com> wrote: > > gcc/ChangeLog: > > PR target/107432 > * config/i386/mmx.md (truncv4hiv4qi2): New define_insn. > > gcc/testsuite/ChangeLog: > > PR target/107432 > * gcc.target/i386/pr107432-6.c: Add test. > --- > gcc/config/i386/mmx.md | 10 ++++++++++ > gcc/testsuite/gcc.target/i386/pr107432-1.c | 12 +++++++++++- > gcc/testsuite/gcc.target/i386/pr107432-6.c | 19 ++++++++++++++++--- > 3 files changed, 37 insertions(+), 4 deletions(-) > > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md > index 5f342497885..30f0d88af9f 100644 > --- a/gcc/config/i386/mmx.md > +++ b/gcc/config/i386/mmx.md > @@ -4883,6 +4883,16 @@ (define_insn "truncv2hiv2qi2" > (set_attr "prefix" "evex") > (set_attr "mode" "TI")]) > > +(define_insn "truncv4hiv4qi2" > + [(set (match_operand:V4QI 0 "register_operand" "=v") > + (truncate:V4QI > + (match_operand:V4HI 1 "register_operand" "v")))] > + "TARGET_AVX512VL && TARGET_AVX512BW" Please also add TARGET_MMX_WITH_SSE since v4hi is 64-bit vector. Others LGTM. > + "vpmovwb\t{%1, %0|%0, %1}" > + [(set_attr "type" "ssemov") > + (set_attr "prefix" "evex") > + (set_attr "mode" "TI")]) > + > (define_mode_iterator V2QI_V2HI [V2QI V2HI]) > (define_insn "truncv2si<mode>2" > [(set (match_operand:V2QI_V2HI 0 "register_operand" "=v") > diff --git a/gcc/testsuite/gcc.target/i386/pr107432-1.c b/gcc/testsuite/gcc.target/i386/pr107432-1.c > index a4f37447eb4..e0c7ffc8e5b 100644 > --- a/gcc/testsuite/gcc.target/i386/pr107432-1.c > +++ b/gcc/testsuite/gcc.target/i386/pr107432-1.c > @@ -7,7 +7,7 @@ > /* { dg-final { scan-assembler-times "vpmovdw" 8 { target { ! ia32 } } } } */ > /* { dg-final { scan-assembler-times "vpmovdb" 6 { target { ia32 } } } } */ > /* { dg-final { scan-assembler-times "vpmovdb" 8 { target { ! ia32 } } } } */ > -/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */ > +/* { dg-final { scan-assembler-times "vpmovwb" 10 } } */ > > #include <x86intrin.h> > > @@ -113,6 +113,11 @@ __v2qi mm32_cvtepi16_epi8_builtin_convertvector(__v2hi a) > return __builtin_convertvector((__v2hi)a, __v2qi); > } > > +__v4qi mm64_cvtepi16_epi8_builtin_convertvector(__v4hi a) > +{ > + return __builtin_convertvector((__v4hi)a, __v4qi); > +} > + > __v8qi mm_cvtepi16_epi8_builtin_convertvector(__m128i a) > { > return __builtin_convertvector((__v8hi)a, __v8qi); > @@ -218,6 +223,11 @@ __v2qu mm32_cvtepu16_epu8_builtin_convertvector(__v2hu a) > return __builtin_convertvector((__v2hu)a, __v2qu); > } > > +__v4qu mm64_cvtepu16_epu8_builtin_convertvector(__v4hu a) > +{ > + return __builtin_convertvector((__v4hu)a, __v4qu); > +} > + > __v8qu mm_cvtepu16_epu8_builtin_convertvector(__m128i a) > { > return __builtin_convertvector((__v8hu)a, __v8qu); > diff --git a/gcc/testsuite/gcc.target/i386/pr107432-6.c b/gcc/testsuite/gcc.target/i386/pr107432-6.c > index 4a68a10b089..7d3717d45bc 100644 > --- a/gcc/testsuite/gcc.target/i386/pr107432-6.c > +++ b/gcc/testsuite/gcc.target/i386/pr107432-6.c > @@ -8,11 +8,14 @@ > /* { dg-final { scan-assembler-times "vcvttps2dq" 4 { target { ! ia32 } } } } */ > /* { dg-final { scan-assembler-times "vcvttps2udq" 3 { target { ia32 } } } } */ > /* { dg-final { scan-assembler-times "vcvttps2udq" 4 { target { ! ia32 } } } } */ > -/* { dg-final { scan-assembler-times "vcvttph2w" 4 } } */ > -/* { dg-final { scan-assembler-times "vcvttph2uw" 4 } } */ > +/* { dg-final { scan-assembler-times "vcvttph2w" 4 { target { ia32 } } } } */ > +/* { dg-final { scan-assembler-times "vcvttph2w" 5 { target { ! ia32 } } } } */ > +/* { dg-final { scan-assembler-times "vcvttph2uw" 4 { target { ia32 } } } } */ > +/* { dg-final { scan-assembler-times "vcvttph2uw" 5 { target { ! ia32 } } } } */ > /* { dg-final { scan-assembler-times "vpmovdb" 10 { target { ia32 } } } } */ > /* { dg-final { scan-assembler-times "vpmovdb" 14 { target { ! ia32 } } } } */ > -/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */ > +/* { dg-final { scan-assembler-times "vpmovwb" 8 { target { ia32 } } } } */ > +/* { dg-final { scan-assembler-times "vpmovwb" 10 { target { ! ia32 } } } } */ > > #include <x86intrin.h> > > @@ -103,6 +106,11 @@ __v2qi mm32_cvtph_epi8_builtin_convertvector(__v2hf a) > return __builtin_convertvector((__v2hf)a, __v2qi); > } > > +__v4qi mm64_cvtph_epi8_builtin_convertvector(__v4hf a) > +{ > + return __builtin_convertvector((__v4hf)a, __v4qi); > +} > + > __v8qi mm128_cvtph_epi8_builtin_convertvector(__v8hf a) > { > return __builtin_convertvector((__v8hf)a, __v8qi); > @@ -123,6 +131,11 @@ __v2qu mm32_cvtph_epu8_builtin_convertvector(__v2hf a) > return __builtin_convertvector((__v2hf)a, __v2qu); > } > > +__v4qu mm64_cvtph_epu8_builtin_convertvector(__v4hf a) > +{ > + return __builtin_convertvector((__v4hf)a, __v4qu); > +} > + > __v8qu mm128_cvtph_epu8_builtin_convertvector(__v8hf a) > { > return __builtin_convertvector((__v8hf)a, __v8qu); > -- > 2.31.1 >
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 5f342497885..30f0d88af9f 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -4883,6 +4883,16 @@ (define_insn "truncv2hiv2qi2" (set_attr "prefix" "evex") (set_attr "mode" "TI")]) +(define_insn "truncv4hiv4qi2" + [(set (match_operand:V4QI 0 "register_operand" "=v") + (truncate:V4QI + (match_operand:V4HI 1 "register_operand" "v")))] + "TARGET_AVX512VL && TARGET_AVX512BW" + "vpmovwb\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + (define_mode_iterator V2QI_V2HI [V2QI V2HI]) (define_insn "truncv2si<mode>2" [(set (match_operand:V2QI_V2HI 0 "register_operand" "=v") diff --git a/gcc/testsuite/gcc.target/i386/pr107432-1.c b/gcc/testsuite/gcc.target/i386/pr107432-1.c index a4f37447eb4..e0c7ffc8e5b 100644 --- a/gcc/testsuite/gcc.target/i386/pr107432-1.c +++ b/gcc/testsuite/gcc.target/i386/pr107432-1.c @@ -7,7 +7,7 @@ /* { dg-final { scan-assembler-times "vpmovdw" 8 { target { ! ia32 } } } } */ /* { dg-final { scan-assembler-times "vpmovdb" 6 { target { ia32 } } } } */ /* { dg-final { scan-assembler-times "vpmovdb" 8 { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */ +/* { dg-final { scan-assembler-times "vpmovwb" 10 } } */ #include <x86intrin.h> @@ -113,6 +113,11 @@ __v2qi mm32_cvtepi16_epi8_builtin_convertvector(__v2hi a) return __builtin_convertvector((__v2hi)a, __v2qi); } +__v4qi mm64_cvtepi16_epi8_builtin_convertvector(__v4hi a) +{ + return __builtin_convertvector((__v4hi)a, __v4qi); +} + __v8qi mm_cvtepi16_epi8_builtin_convertvector(__m128i a) { return __builtin_convertvector((__v8hi)a, __v8qi); @@ -218,6 +223,11 @@ __v2qu mm32_cvtepu16_epu8_builtin_convertvector(__v2hu a) return __builtin_convertvector((__v2hu)a, __v2qu); } +__v4qu mm64_cvtepu16_epu8_builtin_convertvector(__v4hu a) +{ + return __builtin_convertvector((__v4hu)a, __v4qu); +} + __v8qu mm_cvtepu16_epu8_builtin_convertvector(__m128i a) { return __builtin_convertvector((__v8hu)a, __v8qu); diff --git a/gcc/testsuite/gcc.target/i386/pr107432-6.c b/gcc/testsuite/gcc.target/i386/pr107432-6.c index 4a68a10b089..7d3717d45bc 100644 --- a/gcc/testsuite/gcc.target/i386/pr107432-6.c +++ b/gcc/testsuite/gcc.target/i386/pr107432-6.c @@ -8,11 +8,14 @@ /* { dg-final { scan-assembler-times "vcvttps2dq" 4 { target { ! ia32 } } } } */ /* { dg-final { scan-assembler-times "vcvttps2udq" 3 { target { ia32 } } } } */ /* { dg-final { scan-assembler-times "vcvttps2udq" 4 { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler-times "vcvttph2w" 4 } } */ -/* { dg-final { scan-assembler-times "vcvttph2uw" 4 } } */ +/* { dg-final { scan-assembler-times "vcvttph2w" 4 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "vcvttph2w" 5 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vcvttph2uw" 4 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "vcvttph2uw" 5 { target { ! ia32 } } } } */ /* { dg-final { scan-assembler-times "vpmovdb" 10 { target { ia32 } } } } */ /* { dg-final { scan-assembler-times "vpmovdb" 14 { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */ +/* { dg-final { scan-assembler-times "vpmovwb" 8 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpmovwb" 10 { target { ! ia32 } } } } */ #include <x86intrin.h> @@ -103,6 +106,11 @@ __v2qi mm32_cvtph_epi8_builtin_convertvector(__v2hf a) return __builtin_convertvector((__v2hf)a, __v2qi); } +__v4qi mm64_cvtph_epi8_builtin_convertvector(__v4hf a) +{ + return __builtin_convertvector((__v4hf)a, __v4qi); +} + __v8qi mm128_cvtph_epi8_builtin_convertvector(__v8hf a) { return __builtin_convertvector((__v8hf)a, __v8qi); @@ -123,6 +131,11 @@ __v2qu mm32_cvtph_epu8_builtin_convertvector(__v2hf a) return __builtin_convertvector((__v2hf)a, __v2qu); } +__v4qu mm64_cvtph_epu8_builtin_convertvector(__v4hf a) +{ + return __builtin_convertvector((__v4hf)a, __v4qu); +} + __v8qu mm128_cvtph_epu8_builtin_convertvector(__v8hf a) { return __builtin_convertvector((__v8hf)a, __v8qu);