Message ID | 20211203061855.32518-1-hongtao.liu@intel.com |
---|---|
State | New |
Headers | show |
Series | [i386] Prefer INT_SSE_REGS for SSE_FLOAT_MODE_P in preferred_reload_class. | expand |
On Fri, Dec 3, 2021 at 7:19 AM liuhongt <hongtao.liu@intel.com> wrote: > > Hi: > > Please also consider TARGET_INTER_UNIT_MOVES_TO_VEC and > > TARGET_INTER_UNIT_MOVES_FROM_VEC. > Here's updated patch. > > Also honor TARGET_INTER_UNIT_MOVES_TO/FROM_VEC and in > preferred_{,out_}reload_class. > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32\ -march=k8,\ -march=k8}. > Ok? > > gcc/ChangeLog: > > PR target/95740 > * config/i386/i386.c (ix86_preferred_output_reload_class): > don't reload integer register to/from sse register when tune > "inter_unit_moves_to/from_vec" is off. > (ix86_preferred_reload_class): Ditto, also prefer > INT_SSE_REGS for SSE_FLOAT_MODE_P. > * config/i386/i386.h (INT_SSE_CLASS_P): New. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/pr95740.c: New test. I was thinking about: --cut here-- @@ -19194,9 +19194,17 @@ ix86_preferred_reload_class (rtx x, reg_class_t regclass) return NO_REGS; } - /* Prefer SSE regs only, if we can use them for math. */ + /* Prefer SSE if we can use them for math. Also allow integer regs + when moves between register units are cheap. */ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) - return SSE_CLASS_P (regclass) ? regclass : NO_REGS; + { + if (TARGET_INTER_UNIT_MOVES_FROM_VEC + && TARGET_INTER_UNIT_MOVES_TO_VEC + && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (word_mode)) + return INT_SSE_CLASS_P (regclass) ? regclass : NO_REGS; + else + return SSE_CLASS_P (regclass) ? regclass : NO_REGS; + } /* Generally when we see PLUS here, it's the function invariant (plus soft-fp const_int). Which can only be computed into general --cut here-- So, INT_SSE class is allowed when interunit moves are enabled. The patch also takes care for 64-bit moves which are expensive on 32-bit targets. Uros. > --- > gcc/config/i386/i386.c | 32 +++++++++++++++++++++++-- > gcc/config/i386/i386.h | 2 ++ > gcc/testsuite/gcc.target/i386/pr95740.c | 26 ++++++++++++++++++++ > 3 files changed, 58 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr95740.c > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > index 80fee627358..5b90c09a0ba 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -19194,9 +19194,24 @@ ix86_preferred_reload_class (rtx x, reg_class_t regclass) > return NO_REGS; > } > > - /* Prefer SSE regs only, if we can use them for math. */ > + /* Unless hard register REGNO is known, it is hard to to tell whether a movd > + instruction will be generated based on MODE and REGCLASS, because for > + pseudo-registers, even SFmode could be assigned to INTGER_CLASS_P. */ > + if (GENERAL_REG_P (x) > + && !TARGET_INTER_UNIT_MOVES_TO_VEC > + && MAYBE_SSE_CLASS_P (regclass)) > + return NO_REGS; > + > + if (SSE_REG_P (x) > + && !TARGET_INTER_UNIT_MOVES_FROM_VEC > + && MAYBE_INTEGER_CLASS_P (regclass)) > + return NO_REGS; > + > + /* Prefer INT_SSE_REGS, enable reload from SSE register to GENERAL_REGS, > + MAYBE_SSE_CLASS_P is too broad, for sse math, FLOAT_SSE_REGS, > + FLOAT_INT_SSE_REGS should be disliked. */ > if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) > - return SSE_CLASS_P (regclass) ? regclass : NO_REGS; > + return INT_SSE_CLASS_P (regclass) ? regclass : NO_REGS; > > /* Generally when we see PLUS here, it's the function invariant > (plus soft-fp const_int). Which can only be computed into general > @@ -19226,6 +19241,19 @@ ix86_preferred_reload_class (rtx x, reg_class_t regclass) > static reg_class_t > ix86_preferred_output_reload_class (rtx x, reg_class_t regclass) > { > + > + /* Handle movement between integer and sse register like > + ix86_preferred_reload_class. */ > + if (GENERAL_REG_P (x) > + && !TARGET_INTER_UNIT_MOVES_TO_VEC > + && MAYBE_SSE_CLASS_P (regclass)) > + return NO_REGS; > + > + if (SSE_REG_P (x) > + && !TARGET_INTER_UNIT_MOVES_FROM_VEC > + && MAYBE_INTEGER_CLASS_P (regclass)) > + return NO_REGS; > + > /* Restrict the output reload class to the register bank that we are doing > math on. If we would like not to return a subset of CLASS, reject this > alternative: if reload cannot do this, it will still use its choice. */ > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > index 2fda1e0686e..ec90e47904b 100644 > --- a/gcc/config/i386/i386.h > +++ b/gcc/config/i386/i386.h > @@ -1283,6 +1283,8 @@ enum reg_class > reg_class_subset_p ((CLASS), FLOAT_REGS) > #define SSE_CLASS_P(CLASS) \ > reg_class_subset_p ((CLASS), ALL_SSE_REGS) > +#define INT_SSE_CLASS_P(CLASS) \ > + reg_class_subset_p ((CLASS), INT_SSE_REGS) > #define MMX_CLASS_P(CLASS) \ > ((CLASS) == MMX_REGS) > #define MASK_CLASS_P(CLASS) \ > diff --git a/gcc/testsuite/gcc.target/i386/pr95740.c b/gcc/testsuite/gcc.target/i386/pr95740.c > new file mode 100644 > index 00000000000..9bc7b862787 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr95740.c > @@ -0,0 +1,26 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-msse2 -O2 -mtune-ctrl=use_incdec -masm=att -mfpmath=sse" } */ > +/* { dg-final { scan-assembler-times {(?n)movd[\t ]*%xmm0.*%eax} 1 } } */ > +/* { dg-final { scan-assembler-times {(?n)incl[\t ]*%eax} 1 } } */ > +/* { dg-final { scan-assembler-times {(?n)movq[\t ]*%xmm0.*%rax} 1 } } */ > +/* { dg-final { scan-assembler-times {(?n)incq[\t ]*%rax} 1 } } */ > + > +int > +foo (float a) > +{ > + union{ > + int b; > + float a;}u; > + u.a = a; > + return u.b + 1; > +} > + > +long long > +foo1 (double a) > +{ > + union{ > + long long b; > + double a;}u; > + u.a = a; > + return u.b + 1; > +} > -- > 2.18.1 >
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 80fee627358..5b90c09a0ba 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -19194,9 +19194,24 @@ ix86_preferred_reload_class (rtx x, reg_class_t regclass) return NO_REGS; } - /* Prefer SSE regs only, if we can use them for math. */ + /* Unless hard register REGNO is known, it is hard to to tell whether a movd + instruction will be generated based on MODE and REGCLASS, because for + pseudo-registers, even SFmode could be assigned to INTGER_CLASS_P. */ + if (GENERAL_REG_P (x) + && !TARGET_INTER_UNIT_MOVES_TO_VEC + && MAYBE_SSE_CLASS_P (regclass)) + return NO_REGS; + + if (SSE_REG_P (x) + && !TARGET_INTER_UNIT_MOVES_FROM_VEC + && MAYBE_INTEGER_CLASS_P (regclass)) + return NO_REGS; + + /* Prefer INT_SSE_REGS, enable reload from SSE register to GENERAL_REGS, + MAYBE_SSE_CLASS_P is too broad, for sse math, FLOAT_SSE_REGS, + FLOAT_INT_SSE_REGS should be disliked. */ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) - return SSE_CLASS_P (regclass) ? regclass : NO_REGS; + return INT_SSE_CLASS_P (regclass) ? regclass : NO_REGS; /* Generally when we see PLUS here, it's the function invariant (plus soft-fp const_int). Which can only be computed into general @@ -19226,6 +19241,19 @@ ix86_preferred_reload_class (rtx x, reg_class_t regclass) static reg_class_t ix86_preferred_output_reload_class (rtx x, reg_class_t regclass) { + + /* Handle movement between integer and sse register like + ix86_preferred_reload_class. */ + if (GENERAL_REG_P (x) + && !TARGET_INTER_UNIT_MOVES_TO_VEC + && MAYBE_SSE_CLASS_P (regclass)) + return NO_REGS; + + if (SSE_REG_P (x) + && !TARGET_INTER_UNIT_MOVES_FROM_VEC + && MAYBE_INTEGER_CLASS_P (regclass)) + return NO_REGS; + /* Restrict the output reload class to the register bank that we are doing math on. If we would like not to return a subset of CLASS, reject this alternative: if reload cannot do this, it will still use its choice. */ diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 2fda1e0686e..ec90e47904b 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1283,6 +1283,8 @@ enum reg_class reg_class_subset_p ((CLASS), FLOAT_REGS) #define SSE_CLASS_P(CLASS) \ reg_class_subset_p ((CLASS), ALL_SSE_REGS) +#define INT_SSE_CLASS_P(CLASS) \ + reg_class_subset_p ((CLASS), INT_SSE_REGS) #define MMX_CLASS_P(CLASS) \ ((CLASS) == MMX_REGS) #define MASK_CLASS_P(CLASS) \ diff --git a/gcc/testsuite/gcc.target/i386/pr95740.c b/gcc/testsuite/gcc.target/i386/pr95740.c new file mode 100644 index 00000000000..9bc7b862787 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr95740.c @@ -0,0 +1,26 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-msse2 -O2 -mtune-ctrl=use_incdec -masm=att -mfpmath=sse" } */ +/* { dg-final { scan-assembler-times {(?n)movd[\t ]*%xmm0.*%eax} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)incl[\t ]*%eax} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)movq[\t ]*%xmm0.*%rax} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)incq[\t ]*%rax} 1 } } */ + +int +foo (float a) +{ + union{ + int b; + float a;}u; + u.a = a; + return u.b + 1; +} + +long long +foo1 (double a) +{ + union{ + long long b; + double a;}u; + u.a = a; + return u.b + 1; +}