Message ID | YYSuqkJe2ZwADOA9@toto.the-meissners.org |
---|---|
State | New |
Headers | show |
Series | Add Power10 XXSPLTI* and LXVKQ instructions | expand |
On Fri, 2021-11-05 at 00:10 -0400, Michael Meissner wrote: > Generate XXSPLTIDP for vectors on power10. > > This patch implements XXSPLTIDP support for all vector constants. The > XXSPLTIDP instruction is given a 32-bit immediate that is converted to a vector > of two DFmode constants. The immediate is in SFmode format, so only constants > that fit as SFmode values can be loaded with XXSPLTIDP. > > The constraint (eP) added in the previous patch for XXSPLTIW is also used > for XXSPLTIDP. > ok > DImode scalar constants are not handled. This is due to the majority of DImode > constants will be in the GPR registers. With vector registers, you have the > problem that XXSPLTIDP splats the double word into both elements of the > vector. However, if TImode is loaded with an integer constant, it wants a full > 128-bit constant. This may be worth as adding to a todo somewhere in the code. > > SFmode and DFmode scalar constants are not handled in this patch. The > support for for those constants will be in the next patch. ok > > I have added a temporary switch (-msplat-float-constant) to control whether or > not the XXSPLTIDP instruction is generated. > > I added 2 new tests to test loading up V2DI and V2DF vector constants. > > 2021-11-05 Michael Meissner <meissner@the-meissners.org> > > gcc/ > > * config/rs6000/predicates.md (easy_fp_constant): Add support for > generating XXSPLTIDP. > (vsx_prefixed_constant): Likewise. > (easy_vector_constant): Likewise. > * config/rs6000/rs6000-protos.h (constant_generates_xxspltidp): > New declaration. > * config/rs6000/rs6000.c (output_vec_const_move): Add support for > generating XXSPLTIDP. > (prefixed_xxsplti_p): Likewise. > (constant_generates_xxspltidp): New function. > * config/rs6000/rs6000.opt (-msplat-float-constant): New debug option. > > gcc/testsuite/ > > * gcc.target/powerpc/pr86731-fwrapv-longlong.c: Update insn > regex for power10. > * gcc.target/powerpc/vec-splat-constant-v2df.c: New test. > * gcc.target/powerpc/vec-splat-constant-v2di.c: New test. > --- ok > gcc/config/rs6000/predicates.md | 9 ++ > gcc/config/rs6000/rs6000-protos.h | 1 + > gcc/config/rs6000/rs6000.c | 108 ++++++++++++++++++ > gcc/config/rs6000/rs6000.opt | 4 + > .../powerpc/pr86731-fwrapv-longlong.c | 9 +- > .../powerpc/vec-splat-constant-v2df.c | 64 +++++++++++ > .../powerpc/vec-splat-constant-v2di.c | 50 ++++++++ > 7 files changed, 241 insertions(+), 4 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2di.c > > diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md > index ed6252bd0c4..d748b11857c 100644 > --- a/gcc/config/rs6000/predicates.md > +++ b/gcc/config/rs6000/predicates.md > @@ -610,6 +610,9 @@ (define_predicate "easy_fp_constant" > > if (constant_generates_xxspltiw (&vsx_const)) > return true; > + > + if (constant_generates_xxspltidp (&vsx_const)) > + return true; > } > > /* Otherwise consider floating point constants hard, so that the > @@ -653,6 +656,9 @@ (define_predicate "vsx_prefixed_constant" > if (constant_generates_xxspltiw (&vsx_const)) > return true; > > + if (constant_generates_xxspltidp (&vsx_const)) > + return true; > + > return false; > }) > > @@ -727,6 +733,9 @@ (define_predicate "easy_vector_constant" > > if (constant_generates_xxspltiw (&vsx_const)) > return true; > + > + if (constant_generates_xxspltidp (&vsx_const)) > + return true; > } ok > > if (TARGET_P9_VECTOR > diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h > index 99c6a671289..2d28df7442d 100644 > --- a/gcc/config/rs6000/rs6000-protos.h > +++ b/gcc/config/rs6000/rs6000-protos.h > @@ -253,6 +253,7 @@ extern bool vec_const_128bit_to_bytes (rtx, machine_mode, > vec_const_128bit_type *); > extern unsigned constant_generates_lxvkq (vec_const_128bit_type *); > extern unsigned constant_generates_xxspltiw (vec_const_128bit_type *); > +extern unsigned constant_generates_xxspltidp (vec_const_128bit_type *); > #endif /* RTX_CODE */ > > #ifdef TREE_CODE > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c > index be24f56eb31..8fde48cf2b3 100644 > --- a/gcc/config/rs6000/rs6000.c > +++ b/gcc/config/rs6000/rs6000.c > @@ -7012,6 +7012,13 @@ output_vec_const_move (rtx *operands) > operands[2] = GEN_INT (imm); > return "xxspltiw %x0,%2"; > } > + > + imm = constant_generates_xxspltidp (&vsx_const); > + if (imm) Just a nit that the two lines could be combined into a similar form as used elsewhere as ... if (constant_generates_xxspltidp(&vsx_const)) > + { > + operands[2] = GEN_INT (imm); > + return "xxspltidp %x0,%2"; > + } > } > > if (TARGET_P9_VECTOR > @@ -26809,6 +26816,9 @@ prefixed_xxsplti_p (rtx_insn *insn) > { > if (constant_generates_xxspltiw (&vsx_const)) > return true; > + > + if (constant_generates_xxspltidp (&vsx_const)) > + return true; > } > > return false; > @@ -29014,6 +29024,104 @@ constant_generates_xxspltiw (vec_const_128bit_type *vsx_const) > return vsx_const->words[0]; > } > > +/* Determine if a vector constant can be loaded with XXSPLTIDP. Return zero if > + the XXSPLTIDP instruction cannot be used. Otherwise return the immediate > + value to be used with the XXSPLTIDP instruction. */ > + > +unsigned > +constant_generates_xxspltidp (vec_const_128bit_type *vsx_const) > +{ > + if (!TARGET_SPLAT_FLOAT_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX) > + return 0; > + > + /* Make sure that the two 64-bit segments are the same. */ > + if (!vsx_const->all_double_words_same) > + return 0; Perhaps more like "Reject if the two 64-bit segments are (not?) the same." > + > + /* If the bytes, half words, or words are all the same, don't use XXSPLTIDP. > + Use a simpler instruction (XXSPLTIB, VSPLTISB, VSPLTISH, or VSPLTISW). */ > + if (vsx_const->all_bytes_same > + || vsx_const->all_half_words_same > + || vsx_const->all_words_same) > + return 0; > + > + unsigned HOST_WIDE_INT value = vsx_const->double_words[0]; > + > + /* Avoid values that look like DFmode NaN's, except for the normal NaN bit > + pattern and the signalling NaN bit pattern. Recognize infinity and > + negative infinity. */ > + > + /* Bit representation of DFmode normal quiet NaN. */ > +#define RS6000_CONST_DF_NAN HOST_WIDE_INT_UC (0x7ff8000000000000) > + > + /* Bit representation of DFmode normal signaling NaN. */ > +#define RS6000_CONST_DF_NANS HOST_WIDE_INT_UC (0x7ff4000000000000) > + > + /* Bit representation of DFmode positive infinity. */ > +#define RS6000_CONST_DF_INF HOST_WIDE_INT_UC (0x7ff0000000000000) > + > + /* Bit representation of DFmode negative infinity. */ > +#define RS6000_CONST_DF_NEG_INF HOST_WIDE_INT_UC (0xfff0000000000000) Defines may be more useful in a header file? > + > + if (value != RS6000_CONST_DF_NAN > + && value != RS6000_CONST_DF_NANS > + && value != RS6000_CONST_DF_INF > + && value != RS6000_CONST_DF_NEG_INF) > + { > + /* The IEEE 754 64-bit floating format has 1 bit for sign, 11 bits for > + the exponent, and 52 bits for the mantissa (not counting the hidden > + bit used for normal numbers). NaN values have the exponent set to all > + 1 bits, and the mantissa non-zero (mantissa == 0 is infinity). */ > + > + int df_exponent = (value >> 52) & 0x7ff; > + unsigned HOST_WIDE_INT df_mantissa > + = value & ((HOST_WIDE_INT_1U << 52) - HOST_WIDE_INT_1U); Should the "=" be on the end of the previous line? > + > + if (df_exponent == 0x7ff && df_mantissa != 0) /* other NaNs. */ > + return 0; > + > + /* Avoid values that are DFmode subnormal values. Subnormal numbers have > + the exponent all 0 bits, and the mantissa non-zero. If the value is > + subnormal, then the hidden bit in the mantissa is not set. */ > + if (df_exponent == 0 && df_mantissa != 0) /* subnormal. */ > + return 0; > + } > + > + /* Change the representation to DFmode constant. */ > + long df_words[2] = { vsx_const->words[0], vsx_const->words[1] }; > + > + /* real_from_target takes the target words in target order. */ Extra space before target order. > + if (!BYTES_BIG_ENDIAN) > + std::swap (df_words[0], df_words[1]); > + > + REAL_VALUE_TYPE rv_type; > + real_from_target (&rv_type, df_words, DFmode); > + > + const REAL_VALUE_TYPE *rv = &rv_type; > + > + /* Validate that the number can be stored as a SFmode value. */ > + if (!exact_real_truncate (SFmode, rv)) > + return 0; > + > + /* Validate that the number is not a SFmode subnormal value (exponent is 0, > + mantissa field is non-zero) which is undefined for the XXSPLTIDP > + instruction. */ > + long sf_value; > + real_to_target (&sf_value, rv, SFmode); > + > + /* IEEE 754 32-bit values have 1 bit for the sign, 8 bits for the exponent, > + and 23 bits for the mantissa. Subnormal numbers have the exponent all > + 0 bits, and the mantissa non-zero. */ > + long sf_exponent = (sf_value >> 23) & 0xFF; > + long sf_mantissa = sf_value & 0x7FFFFF; > + > + if (sf_exponent == 0 && sf_mantissa != 0) > + return 0; > + > + /* Return the immediate to be used. */ > + return sf_value; > +} ok > + > > struct gcc_target targetm = TARGET_INITIALIZER; > > diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt > index ec7b106fddb..c1d661d7e6b 100644 > --- a/gcc/config/rs6000/rs6000.opt > +++ b/gcc/config/rs6000/rs6000.opt > @@ -644,6 +644,10 @@ msplat-word-constant > Target Var(TARGET_SPLAT_WORD_CONSTANT) Init(1) Save > Generate (do not generate) code that uses the XXSPLTIW instruction. > > +msplat-float-constant > +Target Var(TARGET_SPLAT_FLOAT_CONSTANT) Init(1) Save > +Generate (do not generate) code that uses the XXSPLTIDP instruction. > + > mieee128-constant > Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save > Generate (do not generate) code that uses the LXVKQ instruction. ok > diff --git a/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c b/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c > index bd1502bb30a..dcb30e1d886 100644 > --- a/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c > +++ b/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c > @@ -24,11 +24,12 @@ vector signed long long splats4(void) > return (vector signed long long) vec_sl(mzero, mzero); > } > > -/* Codegen will consist of splat and shift instructions for most types. > - If folding is enabled, the vec_sl tests using vector long long type will > - generate a lvx instead of a vspltisw+vsld pair. */ > +/* Codegen will consist of splat and shift instructions for most types. If > + folding is enabled, the vec_sl tests using vector long long type will > + generate a lvx instead of a vspltisw+vsld pair. On power10, it will > + generate a xxspltidp instruction instead of the lvx. */ > > /* { dg-final { scan-assembler-times {\mvspltis[bhw]\M} 0 } } */ > /* { dg-final { scan-assembler-times {\mvsl[bhwd]\M} 0 } } */ > -/* { dg-final { scan-assembler-times {\mp?lxv\M|\mlxv\M|\mlxvd2x\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mp?lxv\M|\mlxv\M|\mlxvd2x\M|\mxxspltidp\M} 2 } } */ ok No further comments, Thanks -Will > > diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c > new file mode 100644 > index 00000000000..82ffc86f8aa > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c > @@ -0,0 +1,64 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target power10_ok } */ > +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ > + > +#include <math.h> > + > +/* Test generating V2DFmode constants with the ISA 3.1 (power10) XXSPLTIDP > + instruction. */ > + > +vector double > +v2df_double_0 (void) > +{ > + return (vector double) { 0.0, 0.0 }; /* XXSPLTIB or XXLXOR. */ > +} > + > +vector double > +v2df_double_1 (void) > +{ > + return (vector double) { 1.0, 1.0 }; /* XXSPLTIDP. */ > +} > + > +#ifndef __FAST_MATH__ > +vector double > +v2df_double_m0 (void) > +{ > + return (vector double) { -0.0, -0.0 }; /* XXSPLTIDP. */ > +} > + > +vector double > +v2df_double_nan (void) > +{ > + return (vector double) { __builtin_nan (""), > + __builtin_nan ("") }; /* XXSPLTIDP. */ > +} > + > +vector double > +v2df_double_inf (void) > +{ > + return (vector double) { __builtin_inf (), > + __builtin_inf () }; /* XXSPLTIDP. */ > +} > + > +vector double > +v2df_double_m_inf (void) > +{ > + return (vector double) { - __builtin_inf (), > + - __builtin_inf () }; /* XXSPLTIDP. */ > +} > +#endif > + > +vector double > +v2df_double_pi (void) > +{ > + return (vector double) { M_PI, M_PI }; /* PLVX. */ > +} > + > +vector double > +v2df_double_denorm (void) > +{ > + return (vector double) { (double)0x1p-149f, > + (double)0x1p-149f }; /* PLVX. */ > +} > + > +/* { dg-final { scan-assembler-times {\mxxspltidp\M} 5 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2di.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2di.c > new file mode 100644 > index 00000000000..4d44f943d26 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2di.c > @@ -0,0 +1,50 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target power10_ok } */ > +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ > + > +/* Test generating V2DImode constants that have the same bit pattern as > + V2DFmode constants that can be loaded with the XXSPLTIDP instruction with > + the ISA 3.1 (power10). */ > + > +vector long long > +vector_0 (void) > +{ > + /* XXSPLTIB or XXLXOR. */ > + return (vector long long) { 0LL, 0LL }; > +} > + > +vector long long > +vector_1 (void) > +{ > + /* XXSPLTIB and VEXTSB2D. */ > + return (vector long long) { 1LL, 1LL }; > +} > + > +/* 0x8000000000000000LL is the bit pattern for -0.0, which can be generated > + with XXSPLTISDP. */ > +vector long long > +vector_float_neg_0 (void) > +{ > + /* XXSPLTIDP. */ > + return (vector long long) { 0x8000000000000000LL, 0x8000000000000000LL }; > +} > + > +/* 0x3ff0000000000000LL is the bit pattern for 1.0 which can be generated with > + XXSPLTISDP. */ > +vector long long > +vector_float_1_0 (void) > +{ > + /* XXSPLTIDP. */ > + return (vector long long) { 0x3ff0000000000000LL, 0x3ff0000000000000LL }; > +} > + > +/* 0x400921fb54442d18LL is the bit pattern for PI, which cannot be generated > + with XXSPLTIDP. */ > +vector long long > +scalar_pi (void) > +{ > + /* PLXV. */ > + return (vector long long) { 0x400921fb54442d18LL, 0x400921fb54442d18LL }; > +} > + > +/* { dg-final { scan-assembler-times {\mxxspltidp\M} 2 } } */ > -- > 2.31.1 > >
Ping patch. | Date: Fri, 5 Nov 2021 00:10:18 -0400 | Subject: [PATCH 4/5] Add Power10 XXSPLTIDP for vector constants | Message-ID: <YYSuqkJe2ZwADOA9@toto.the-meissners.org>
Ping patch #2. | Date: Fri, 5 Nov 2021 00:10:18 -0400 | From: Michael Meissner <meissner@linux.ibm.com> | Subject: [PATCH 4/5] Add Power10 XXSPLTIDP for vector constants | Message-ID: <YYSuqkJe2ZwADOA9@toto.the-meissners.org> https://gcc.gnu.org/pipermail/gcc-patches/2021-November/583393.html Note, I will be on-line through December 20th. I will be off-line from December 21st through January 1st.
On Fri, Nov 5, 2021 at 3:24 PM will schmidt <will_schmidt@vnet.ibm.com> wrote: > > On Fri, 2021-11-05 at 00:10 -0400, Michael Meissner wrote: > > Generate XXSPLTIDP for vectors on power10. > > > > This patch implements XXSPLTIDP support for all vector constants. The > > XXSPLTIDP instruction is given a 32-bit immediate that is converted to a vector > > of two DFmode constants. The immediate is in SFmode format, so only constants > > that fit as SFmode values can be loaded with XXSPLTIDP. > > > > The constraint (eP) added in the previous patch for XXSPLTIW is also used > > for XXSPLTIDP. > > > > ok > > > > DImode scalar constants are not handled. This is due to the majority of DImode > > constants will be in the GPR registers. With vector registers, you have the > > problem that XXSPLTIDP splats the double word into both elements of the > > vector. However, if TImode is loaded with an integer constant, it wants a full > > 128-bit constant. > > This may be worth as adding to a todo somewhere in the code. > > > > > SFmode and DFmode scalar constants are not handled in this patch. The > > support for for those constants will be in the next patch. > > ok > > > > > I have added a temporary switch (-msplat-float-constant) to control whether or > > not the XXSPLTIDP instruction is generated. > > > > I added 2 new tests to test loading up V2DI and V2DF vector constants. > > > > > > > > 2021-11-05 Michael Meissner <meissner@the-meissners.org> > > > > gcc/ > > > > * config/rs6000/predicates.md (easy_fp_constant): Add support for > > generating XXSPLTIDP. > > (vsx_prefixed_constant): Likewise. > > (easy_vector_constant): Likewise. > > * config/rs6000/rs6000-protos.h (constant_generates_xxspltidp): > > New declaration. > > * config/rs6000/rs6000.c (output_vec_const_move): Add support for > > generating XXSPLTIDP. > > (prefixed_xxsplti_p): Likewise. > > (constant_generates_xxspltidp): New function. > > * config/rs6000/rs6000.opt (-msplat-float-constant): New debug option. > > > > gcc/testsuite/ > > > > * gcc.target/powerpc/pr86731-fwrapv-longlong.c: Update insn > > regex for power10. > > * gcc.target/powerpc/vec-splat-constant-v2df.c: New test. > > * gcc.target/powerpc/vec-splat-constant-v2di.c: New test. > > --- > > > ok > > > gcc/config/rs6000/predicates.md | 9 ++ > > gcc/config/rs6000/rs6000-protos.h | 1 + > > gcc/config/rs6000/rs6000.c | 108 ++++++++++++++++++ > > gcc/config/rs6000/rs6000.opt | 4 + > > .../powerpc/pr86731-fwrapv-longlong.c | 9 +- > > .../powerpc/vec-splat-constant-v2df.c | 64 +++++++++++ > > .../powerpc/vec-splat-constant-v2di.c | 50 ++++++++ > > 7 files changed, 241 insertions(+), 4 deletions(-) > > create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c > > create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2di.c > > > > diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md > > index ed6252bd0c4..d748b11857c 100644 > > --- a/gcc/config/rs6000/predicates.md > > +++ b/gcc/config/rs6000/predicates.md > > @@ -610,6 +610,9 @@ (define_predicate "easy_fp_constant" > > > > if (constant_generates_xxspltiw (&vsx_const)) > > return true; > > + > > + if (constant_generates_xxspltidp (&vsx_const)) > > + return true; > > } > > > > /* Otherwise consider floating point constants hard, so that the > > @@ -653,6 +656,9 @@ (define_predicate "vsx_prefixed_constant" > > if (constant_generates_xxspltiw (&vsx_const)) > > return true; > > > > + if (constant_generates_xxspltidp (&vsx_const)) > > + return true; > > + > > return false; > > }) > > > > @@ -727,6 +733,9 @@ (define_predicate "easy_vector_constant" > > > > if (constant_generates_xxspltiw (&vsx_const)) > > return true; > > + > > + if (constant_generates_xxspltidp (&vsx_const)) > > + return true; > > } > > > ok > > > > > if (TARGET_P9_VECTOR > > diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h > > index 99c6a671289..2d28df7442d 100644 > > --- a/gcc/config/rs6000/rs6000-protos.h > > +++ b/gcc/config/rs6000/rs6000-protos.h > > @@ -253,6 +253,7 @@ extern bool vec_const_128bit_to_bytes (rtx, machine_mode, > > vec_const_128bit_type *); > > extern unsigned constant_generates_lxvkq (vec_const_128bit_type *); > > extern unsigned constant_generates_xxspltiw (vec_const_128bit_type *); > > +extern unsigned constant_generates_xxspltidp (vec_const_128bit_type *); > > #endif /* RTX_CODE */ > > > > #ifdef TREE_CODE > > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c > > index be24f56eb31..8fde48cf2b3 100644 > > --- a/gcc/config/rs6000/rs6000.c > > +++ b/gcc/config/rs6000/rs6000.c > > @@ -7012,6 +7012,13 @@ output_vec_const_move (rtx *operands) > > operands[2] = GEN_INT (imm); > > return "xxspltiw %x0,%2"; > > } > > + > > + imm = constant_generates_xxspltidp (&vsx_const); > > + if (imm) > > > Just a nit that the two lines could be combined into a similar form > as used elsewhere as ... > if (constant_generates_xxspltidp(&vsx_const)) > > > > + { > > + operands[2] = GEN_INT (imm); > > + return "xxspltidp %x0,%2"; > > + } > > > } > > > > if (TARGET_P9_VECTOR > > @@ -26809,6 +26816,9 @@ prefixed_xxsplti_p (rtx_insn *insn) > > { > > if (constant_generates_xxspltiw (&vsx_const)) > > return true; > > + > > + if (constant_generates_xxspltidp (&vsx_const)) > > + return true; > > } > > > > return false; > > @@ -29014,6 +29024,104 @@ constant_generates_xxspltiw (vec_const_128bit_type *vsx_const) > > return vsx_const->words[0]; > > } > > > > +/* Determine if a vector constant can be loaded with XXSPLTIDP. Return zero if > > + the XXSPLTIDP instruction cannot be used. Otherwise return the immediate > > + value to be used with the XXSPLTIDP instruction. */ > > + > > +unsigned > > +constant_generates_xxspltidp (vec_const_128bit_type *vsx_const) > > +{ > > + if (!TARGET_SPLAT_FLOAT_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX) > > + return 0; > > + > > + /* Make sure that the two 64-bit segments are the same. */ > > + if (!vsx_const->all_double_words_same) > > + return 0; > > Perhaps more like "Reject if the two 64-bit segments are (not?) the > same." > > > > + > > + /* If the bytes, half words, or words are all the same, don't use XXSPLTIDP. > > + Use a simpler instruction (XXSPLTIB, VSPLTISB, VSPLTISH, or VSPLTISW). */ > > + if (vsx_const->all_bytes_same > > + || vsx_const->all_half_words_same > > + || vsx_const->all_words_same) > > + return 0; > > + > > + unsigned HOST_WIDE_INT value = vsx_const->double_words[0]; > > + > > + /* Avoid values that look like DFmode NaN's, except for the normal NaN bit > > + pattern and the signalling NaN bit pattern. Recognize infinity and > > + negative infinity. */ > > + > > + /* Bit representation of DFmode normal quiet NaN. */ > > +#define RS6000_CONST_DF_NAN HOST_WIDE_INT_UC (0x7ff8000000000000) > > + > > + /* Bit representation of DFmode normal signaling NaN. */ > > +#define RS6000_CONST_DF_NANS HOST_WIDE_INT_UC (0x7ff4000000000000) > > + > > + /* Bit representation of DFmode positive infinity. */ > > +#define RS6000_CONST_DF_INF HOST_WIDE_INT_UC (0x7ff0000000000000) > > + > > + /* Bit representation of DFmode negative infinity. */ > > +#define RS6000_CONST_DF_NEG_INF HOST_WIDE_INT_UC (0xfff0000000000000) > > Defines may be more useful in a header file? > > > + > > + if (value != RS6000_CONST_DF_NAN > > + && value != RS6000_CONST_DF_NANS > > + && value != RS6000_CONST_DF_INF > > + && value != RS6000_CONST_DF_NEG_INF) > > + { > > + /* The IEEE 754 64-bit floating format has 1 bit for sign, 11 bits for > > + the exponent, and 52 bits for the mantissa (not counting the hidden > > + bit used for normal numbers). NaN values have the exponent set to all > > + 1 bits, and the mantissa non-zero (mantissa == 0 is infinity). */ > > + > > + int df_exponent = (value >> 52) & 0x7ff; > > + unsigned HOST_WIDE_INT df_mantissa > > + = value & ((HOST_WIDE_INT_1U << 52) - HOST_WIDE_INT_1U); > > > Should the "=" be on the end of the previous line? > > > > + > > + if (df_exponent == 0x7ff && df_mantissa != 0) /* other NaNs. */ > > + return 0; > > + > > + /* Avoid values that are DFmode subnormal values. Subnormal numbers have > > + the exponent all 0 bits, and the mantissa non-zero. If the value is > > + subnormal, then the hidden bit in the mantissa is not set. */ > > + if (df_exponent == 0 && df_mantissa != 0) /* subnormal. */ > > + return 0; > > + } > > + > > + /* Change the representation to DFmode constant. */ > > + long df_words[2] = { vsx_const->words[0], vsx_const->words[1] }; > > + > > + /* real_from_target takes the target words in target order. */ > > Extra space before target order. > > > + if (!BYTES_BIG_ENDIAN) > > + std::swap (df_words[0], df_words[1]); > > + > > + REAL_VALUE_TYPE rv_type; > > + real_from_target (&rv_type, df_words, DFmode); > > + > > + const REAL_VALUE_TYPE *rv = &rv_type; > > + > > + /* Validate that the number can be stored as a SFmode value. */ > > + if (!exact_real_truncate (SFmode, rv)) > > + return 0; > > + > > + /* Validate that the number is not a SFmode subnormal value (exponent is 0, > > + mantissa field is non-zero) which is undefined for the XXSPLTIDP > > + instruction. */ > > + long sf_value; > > + real_to_target (&sf_value, rv, SFmode); > > + > > + /* IEEE 754 32-bit values have 1 bit for the sign, 8 bits for the exponent, > > + and 23 bits for the mantissa. Subnormal numbers have the exponent all > > + 0 bits, and the mantissa non-zero. */ > > + long sf_exponent = (sf_value >> 23) & 0xFF; > > + long sf_mantissa = sf_value & 0x7FFFFF; > > + > > + if (sf_exponent == 0 && sf_mantissa != 0) > > + return 0; > > + > > + /* Return the immediate to be used. */ > > + return sf_value; > > +} > > ok > > > + > > > > struct gcc_target targetm = TARGET_INITIALIZER; > > > > diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt > > index ec7b106fddb..c1d661d7e6b 100644 > > --- a/gcc/config/rs6000/rs6000.opt > > +++ b/gcc/config/rs6000/rs6000.opt > > @@ -644,6 +644,10 @@ msplat-word-constant > > Target Var(TARGET_SPLAT_WORD_CONSTANT) Init(1) Save > > Generate (do not generate) code that uses the XXSPLTIW instruction. > > > > +msplat-float-constant > > +Target Var(TARGET_SPLAT_FLOAT_CONSTANT) Init(1) Save > > +Generate (do not generate) code that uses the XXSPLTIDP instruction. > > + > > mieee128-constant > > Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save > > Generate (do not generate) code that uses the LXVKQ instruction. > > ok > > > > diff --git a/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c b/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c > > index bd1502bb30a..dcb30e1d886 100644 > > --- a/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c > > +++ b/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c > > @@ -24,11 +24,12 @@ vector signed long long splats4(void) > > return (vector signed long long) vec_sl(mzero, mzero); > > } > > > > -/* Codegen will consist of splat and shift instructions for most types. > > - If folding is enabled, the vec_sl tests using vector long long type will > > - generate a lvx instead of a vspltisw+vsld pair. */ > > +/* Codegen will consist of splat and shift instructions for most types. If > > + folding is enabled, the vec_sl tests using vector long long type will > > + generate a lvx instead of a vspltisw+vsld pair. On power10, it will > > + generate a xxspltidp instruction instead of the lvx. */ > > > > /* { dg-final { scan-assembler-times {\mvspltis[bhw]\M} 0 } } */ > > /* { dg-final { scan-assembler-times {\mvsl[bhwd]\M} 0 } } */ > > -/* { dg-final { scan-assembler-times {\mp?lxv\M|\mlxv\M|\mlxvd2x\M} 2 } } */ > > +/* { dg-final { scan-assembler-times {\mp?lxv\M|\mlxv\M|\mlxvd2x\M|\mxxspltidp\M} 2 } } */ > > > ok > > No further comments, > Thanks > -Will Okay. Thanks, David
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index ed6252bd0c4..d748b11857c 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -610,6 +610,9 @@ (define_predicate "easy_fp_constant" if (constant_generates_xxspltiw (&vsx_const)) return true; + + if (constant_generates_xxspltidp (&vsx_const)) + return true; } /* Otherwise consider floating point constants hard, so that the @@ -653,6 +656,9 @@ (define_predicate "vsx_prefixed_constant" if (constant_generates_xxspltiw (&vsx_const)) return true; + if (constant_generates_xxspltidp (&vsx_const)) + return true; + return false; }) @@ -727,6 +733,9 @@ (define_predicate "easy_vector_constant" if (constant_generates_xxspltiw (&vsx_const)) return true; + + if (constant_generates_xxspltidp (&vsx_const)) + return true; } if (TARGET_P9_VECTOR diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 99c6a671289..2d28df7442d 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -253,6 +253,7 @@ extern bool vec_const_128bit_to_bytes (rtx, machine_mode, vec_const_128bit_type *); extern unsigned constant_generates_lxvkq (vec_const_128bit_type *); extern unsigned constant_generates_xxspltiw (vec_const_128bit_type *); +extern unsigned constant_generates_xxspltidp (vec_const_128bit_type *); #endif /* RTX_CODE */ #ifdef TREE_CODE diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index be24f56eb31..8fde48cf2b3 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -7012,6 +7012,13 @@ output_vec_const_move (rtx *operands) operands[2] = GEN_INT (imm); return "xxspltiw %x0,%2"; } + + imm = constant_generates_xxspltidp (&vsx_const); + if (imm) + { + operands[2] = GEN_INT (imm); + return "xxspltidp %x0,%2"; + } } if (TARGET_P9_VECTOR @@ -26809,6 +26816,9 @@ prefixed_xxsplti_p (rtx_insn *insn) { if (constant_generates_xxspltiw (&vsx_const)) return true; + + if (constant_generates_xxspltidp (&vsx_const)) + return true; } return false; @@ -29014,6 +29024,104 @@ constant_generates_xxspltiw (vec_const_128bit_type *vsx_const) return vsx_const->words[0]; } +/* Determine if a vector constant can be loaded with XXSPLTIDP. Return zero if + the XXSPLTIDP instruction cannot be used. Otherwise return the immediate + value to be used with the XXSPLTIDP instruction. */ + +unsigned +constant_generates_xxspltidp (vec_const_128bit_type *vsx_const) +{ + if (!TARGET_SPLAT_FLOAT_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX) + return 0; + + /* Make sure that the two 64-bit segments are the same. */ + if (!vsx_const->all_double_words_same) + return 0; + + /* If the bytes, half words, or words are all the same, don't use XXSPLTIDP. + Use a simpler instruction (XXSPLTIB, VSPLTISB, VSPLTISH, or VSPLTISW). */ + if (vsx_const->all_bytes_same + || vsx_const->all_half_words_same + || vsx_const->all_words_same) + return 0; + + unsigned HOST_WIDE_INT value = vsx_const->double_words[0]; + + /* Avoid values that look like DFmode NaN's, except for the normal NaN bit + pattern and the signalling NaN bit pattern. Recognize infinity and + negative infinity. */ + + /* Bit representation of DFmode normal quiet NaN. */ +#define RS6000_CONST_DF_NAN HOST_WIDE_INT_UC (0x7ff8000000000000) + + /* Bit representation of DFmode normal signaling NaN. */ +#define RS6000_CONST_DF_NANS HOST_WIDE_INT_UC (0x7ff4000000000000) + + /* Bit representation of DFmode positive infinity. */ +#define RS6000_CONST_DF_INF HOST_WIDE_INT_UC (0x7ff0000000000000) + + /* Bit representation of DFmode negative infinity. */ +#define RS6000_CONST_DF_NEG_INF HOST_WIDE_INT_UC (0xfff0000000000000) + + if (value != RS6000_CONST_DF_NAN + && value != RS6000_CONST_DF_NANS + && value != RS6000_CONST_DF_INF + && value != RS6000_CONST_DF_NEG_INF) + { + /* The IEEE 754 64-bit floating format has 1 bit for sign, 11 bits for + the exponent, and 52 bits for the mantissa (not counting the hidden + bit used for normal numbers). NaN values have the exponent set to all + 1 bits, and the mantissa non-zero (mantissa == 0 is infinity). */ + + int df_exponent = (value >> 52) & 0x7ff; + unsigned HOST_WIDE_INT df_mantissa + = value & ((HOST_WIDE_INT_1U << 52) - HOST_WIDE_INT_1U); + + if (df_exponent == 0x7ff && df_mantissa != 0) /* other NaNs. */ + return 0; + + /* Avoid values that are DFmode subnormal values. Subnormal numbers have + the exponent all 0 bits, and the mantissa non-zero. If the value is + subnormal, then the hidden bit in the mantissa is not set. */ + if (df_exponent == 0 && df_mantissa != 0) /* subnormal. */ + return 0; + } + + /* Change the representation to DFmode constant. */ + long df_words[2] = { vsx_const->words[0], vsx_const->words[1] }; + + /* real_from_target takes the target words in target order. */ + if (!BYTES_BIG_ENDIAN) + std::swap (df_words[0], df_words[1]); + + REAL_VALUE_TYPE rv_type; + real_from_target (&rv_type, df_words, DFmode); + + const REAL_VALUE_TYPE *rv = &rv_type; + + /* Validate that the number can be stored as a SFmode value. */ + if (!exact_real_truncate (SFmode, rv)) + return 0; + + /* Validate that the number is not a SFmode subnormal value (exponent is 0, + mantissa field is non-zero) which is undefined for the XXSPLTIDP + instruction. */ + long sf_value; + real_to_target (&sf_value, rv, SFmode); + + /* IEEE 754 32-bit values have 1 bit for the sign, 8 bits for the exponent, + and 23 bits for the mantissa. Subnormal numbers have the exponent all + 0 bits, and the mantissa non-zero. */ + long sf_exponent = (sf_value >> 23) & 0xFF; + long sf_mantissa = sf_value & 0x7FFFFF; + + if (sf_exponent == 0 && sf_mantissa != 0) + return 0; + + /* Return the immediate to be used. */ + return sf_value; +} + struct gcc_target targetm = TARGET_INITIALIZER; diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index ec7b106fddb..c1d661d7e6b 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -644,6 +644,10 @@ msplat-word-constant Target Var(TARGET_SPLAT_WORD_CONSTANT) Init(1) Save Generate (do not generate) code that uses the XXSPLTIW instruction. +msplat-float-constant +Target Var(TARGET_SPLAT_FLOAT_CONSTANT) Init(1) Save +Generate (do not generate) code that uses the XXSPLTIDP instruction. + mieee128-constant Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save Generate (do not generate) code that uses the LXVKQ instruction. diff --git a/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c b/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c index bd1502bb30a..dcb30e1d886 100644 --- a/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c +++ b/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c @@ -24,11 +24,12 @@ vector signed long long splats4(void) return (vector signed long long) vec_sl(mzero, mzero); } -/* Codegen will consist of splat and shift instructions for most types. - If folding is enabled, the vec_sl tests using vector long long type will - generate a lvx instead of a vspltisw+vsld pair. */ +/* Codegen will consist of splat and shift instructions for most types. If + folding is enabled, the vec_sl tests using vector long long type will + generate a lvx instead of a vspltisw+vsld pair. On power10, it will + generate a xxspltidp instruction instead of the lvx. */ /* { dg-final { scan-assembler-times {\mvspltis[bhw]\M} 0 } } */ /* { dg-final { scan-assembler-times {\mvsl[bhwd]\M} 0 } } */ -/* { dg-final { scan-assembler-times {\mp?lxv\M|\mlxv\M|\mlxvd2x\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mp?lxv\M|\mlxv\M|\mlxvd2x\M|\mxxspltidp\M} 2 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c new file mode 100644 index 00000000000..82ffc86f8aa --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c @@ -0,0 +1,64 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +#include <math.h> + +/* Test generating V2DFmode constants with the ISA 3.1 (power10) XXSPLTIDP + instruction. */ + +vector double +v2df_double_0 (void) +{ + return (vector double) { 0.0, 0.0 }; /* XXSPLTIB or XXLXOR. */ +} + +vector double +v2df_double_1 (void) +{ + return (vector double) { 1.0, 1.0 }; /* XXSPLTIDP. */ +} + +#ifndef __FAST_MATH__ +vector double +v2df_double_m0 (void) +{ + return (vector double) { -0.0, -0.0 }; /* XXSPLTIDP. */ +} + +vector double +v2df_double_nan (void) +{ + return (vector double) { __builtin_nan (""), + __builtin_nan ("") }; /* XXSPLTIDP. */ +} + +vector double +v2df_double_inf (void) +{ + return (vector double) { __builtin_inf (), + __builtin_inf () }; /* XXSPLTIDP. */ +} + +vector double +v2df_double_m_inf (void) +{ + return (vector double) { - __builtin_inf (), + - __builtin_inf () }; /* XXSPLTIDP. */ +} +#endif + +vector double +v2df_double_pi (void) +{ + return (vector double) { M_PI, M_PI }; /* PLVX. */ +} + +vector double +v2df_double_denorm (void) +{ + return (vector double) { (double)0x1p-149f, + (double)0x1p-149f }; /* PLVX. */ +} + +/* { dg-final { scan-assembler-times {\mxxspltidp\M} 5 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2di.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2di.c new file mode 100644 index 00000000000..4d44f943d26 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2di.c @@ -0,0 +1,50 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Test generating V2DImode constants that have the same bit pattern as + V2DFmode constants that can be loaded with the XXSPLTIDP instruction with + the ISA 3.1 (power10). */ + +vector long long +vector_0 (void) +{ + /* XXSPLTIB or XXLXOR. */ + return (vector long long) { 0LL, 0LL }; +} + +vector long long +vector_1 (void) +{ + /* XXSPLTIB and VEXTSB2D. */ + return (vector long long) { 1LL, 1LL }; +} + +/* 0x8000000000000000LL is the bit pattern for -0.0, which can be generated + with XXSPLTISDP. */ +vector long long +vector_float_neg_0 (void) +{ + /* XXSPLTIDP. */ + return (vector long long) { 0x8000000000000000LL, 0x8000000000000000LL }; +} + +/* 0x3ff0000000000000LL is the bit pattern for 1.0 which can be generated with + XXSPLTISDP. */ +vector long long +vector_float_1_0 (void) +{ + /* XXSPLTIDP. */ + return (vector long long) { 0x3ff0000000000000LL, 0x3ff0000000000000LL }; +} + +/* 0x400921fb54442d18LL is the bit pattern for PI, which cannot be generated + with XXSPLTIDP. */ +vector long long +scalar_pi (void) +{ + /* PLXV. */ + return (vector long long) { 0x400921fb54442d18LL, 0x400921fb54442d18LL }; +} + +/* { dg-final { scan-assembler-times {\mxxspltidp\M} 2 } } */