diff mbox series

[V2] rs6000: Enhance lowpart/highpart DI->SF by mtvsrws/mtvsrd

Message ID 20230213051843.2615021-1-guojiufu@linux.ibm.com
State New
Headers show
Series [V2] rs6000: Enhance lowpart/highpart DI->SF by mtvsrws/mtvsrd | expand

Commit Message

Jiufu Guo Feb. 13, 2023, 5:18 a.m. UTC
Hi,

Compare with previous version:
https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609654.html
This patch does not use UNSPEC for insn mtvsrws anymore.  And to handle
the subreg better on BE and LE, predicate "lowpart_subreg_operator"
is introducted. To help combine pass to match the pattern on high32
bit of DI, shiftrt is still used.

As mentioned in PR108338, on p9, we could use mtvsrws to implement
the conversion from SI#0 to SF (or lowpart DI to SF).

For examples:
  *(long long*)buff = di;
  float f = *(float*)(buff);
We generate "sldi 9,3,32 ; mtvsrd 1,9 ; xscvspdpn 1,1" instead of
"mtvsrws 1,3 ; xscvspdpn 1,1".

This patch update this, and also enhance the bitcast from highpart
DI to SF.

Bootstrap and regtests pass on ppc64{,le}.
Is this ok for trunk?

BR,
Jeff (Jiufu)

	PR target/108338

gcc/ChangeLog:

	* config/rs6000/predicates.md (lowpart_subreg_operator): New
	define_predicate.
	* config/rs6000/rs6000.md (any_rshift): New code_iterator.
	(movsf_from_si2): Rename to...
	(movsf_from_si2_<code>): ... this.
	(si2sf_mtvsrws): New define_insn.

gcc/testsuite/ChangeLog:

	* gcc.target/powerpc/pr108338.c: New test.

---
 gcc/config/rs6000/predicates.md             |  5 +++
 gcc/config/rs6000/rs6000.md                 | 35 ++++++++++++-----
 gcc/testsuite/gcc.target/powerpc/pr108338.c | 42 +++++++++++++++++++++
 3 files changed, 73 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108338.c

Comments

Jiufu Guo April 26, 2023, 2:59 a.m. UTC | #1
Hi

I would like to ping this patch for stage1:
https://gcc.gnu.org/pipermail/gcc-patches/2023-February/612168.html

BR,
Jeff (Jiufu)

Jiufu Guo <guojiufu@linux.ibm.com> writes:

> Hi,
>
> Compare with previous version:
> https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609654.html
> This patch does not use UNSPEC for insn mtvsrws anymore.  And to handle
> the subreg better on BE and LE, predicate "lowpart_subreg_operator"
> is introducted. To help combine pass to match the pattern on high32
> bit of DI, shiftrt is still used.
>
> As mentioned in PR108338, on p9, we could use mtvsrws to implement
> the conversion from SI#0 to SF (or lowpart DI to SF).
>
> For examples:
>   *(long long*)buff = di;
>   float f = *(float*)(buff);
> We generate "sldi 9,3,32 ; mtvsrd 1,9 ; xscvspdpn 1,1" instead of
> "mtvsrws 1,3 ; xscvspdpn 1,1".
>
> This patch update this, and also enhance the bitcast from highpart
> DI to SF.
>
> Bootstrap and regtests pass on ppc64{,le}.
> Is this ok for trunk?
>
> BR,
> Jeff (Jiufu)
>
> 	PR target/108338
>
> gcc/ChangeLog:
>
> 	* config/rs6000/predicates.md (lowpart_subreg_operator): New
> 	define_predicate.
> 	* config/rs6000/rs6000.md (any_rshift): New code_iterator.
> 	(movsf_from_si2): Rename to...
> 	(movsf_from_si2_<code>): ... this.
> 	(si2sf_mtvsrws): New define_insn.
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/powerpc/pr108338.c: New test.
>
> ---
>  gcc/config/rs6000/predicates.md             |  5 +++
>  gcc/config/rs6000/rs6000.md                 | 35 ++++++++++++-----
>  gcc/testsuite/gcc.target/powerpc/pr108338.c | 42 +++++++++++++++++++++
>  3 files changed, 73 insertions(+), 9 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108338.c
>
> diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
> index 52c65534e51..e57c9d99c6b 100644
> --- a/gcc/config/rs6000/predicates.md
> +++ b/gcc/config/rs6000/predicates.md
> @@ -2064,3 +2064,8 @@ (define_predicate "macho_pic_address"
>    else
>      return false;
>  })
> +
> +(define_predicate "lowpart_subreg_operator"
> +  (and (match_code "subreg")
> +       (match_test "subreg_lowpart_offset (mode, GET_MODE (SUBREG_REG (op)))
> +		    == SUBREG_BYTE (op)")))
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 4a7812fa592..5b4a7f8d801 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -7539,6 +7539,14 @@ (define_split
>  				 UNSPEC_MOVSI_GOT))]
>    "")
>  
> +(define_insn "si2sf_mtvsrws"
> +  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
> +       (subreg:SF (match_operand:SI 1 "gpc_reg_operand" "r") 0))]
> +  "TARGET_P9_VECTOR && TARGET_XSCVSPDPN"
> +  "mtvsrws %x0,%1\n\txscvspdpn %x0,%x0"
> +  [(set_attr "type" "mfvsr")
> +   (set_attr "length" "8")])
> +
>  ;;	   MR          LA
>  ;;	   LWZ         LFIWZX      LXSIWZX
>  ;;	   STW         STFIWX      STXSIWX
> @@ -8203,10 +8211,18 @@ (define_insn_and_split "movsf_from_si"
>    rtx op2 = operands[2];
>    rtx op1_di = gen_rtx_REG (DImode, REGNO (op1));
>  
> -  /* Move SF value to upper 32-bits for xscvspdpn.  */
> -  emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
> -  emit_insn (gen_p8_mtvsrd_sf (op0, op2));
> -  emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
> +  if (TARGET_P9_VECTOR)
> +    {
> +      emit_insn (gen_si2sf_mtvsrws (op0, gen_lowpart (SImode, op1_di)));
> +    }
> +  else
> +    {
> +      /* Move SF value to upper 32-bits for xscvspdpn.  */
> +      emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
> +      emit_insn (gen_p8_mtvsrd_sf (op0, op2));
> +      emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
> +    }
> +
>    DONE;
>  }
>    [(set_attr "length"
> @@ -8219,18 +8235,19 @@ (define_insn_and_split "movsf_from_si"
>  	    "*,          *,         p9v,       p8v,       *,         *,
>  	     p8v,        p8v,       p8v,       *")])
>  
> +(define_code_iterator any_rshift [ashiftrt lshiftrt])
> +
>  ;; For extracting high part element from DImode register like:
>  ;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
>  ;; split it before reload with "and mask" to avoid generating shift right
>  ;; 32 bit then shift left 32 bit.
> -(define_insn_and_split "movsf_from_si2"
> +(define_insn_and_split "movsf_from_si2_<code>"
>    [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
>  	    (unspec:SF
> -	     [(subreg:SI
> -	       (ashiftrt:DI
> +	     [(match_operator:SI 3 "lowpart_subreg_operator"
> +	       [(any_rshift:DI
>  		(match_operand:DI 1 "input_operand" "r")
> -		(const_int 32))
> -	       0)]
> +		(const_int 32))])]
>  	     UNSPEC_SF_FROM_SI))
>    (clobber (match_scratch:DI 2 "=r"))]
>    "TARGET_NO_SF_SUBREG"
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr108338.c b/gcc/testsuite/gcc.target/powerpc/pr108338.c
> new file mode 100644
> index 00000000000..2438dc13f41
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr108338.c
> @@ -0,0 +1,42 @@
> +// { dg-do run }
> +// { dg-options "-O2 -save-temps" }
> +
> +float __attribute__ ((noipa)) sf_from_di_off0 (long long l)
> +{
> +  char buff[16];
> +  *(long long*)buff = l;
> +  float f = *(float*)(buff);
> +  return f;    
> +}
> +
> +float  __attribute__ ((noipa)) sf_from_di_off4 (long long l)
> +{
> +  char buff[16];
> +  *(long long*)buff = l;
> +  float f = *(float*)(buff + 4);
> +  return f; 
> +}
> +
> +/* Under lp64, 'l' is in one DI reg, then check sub DI to SF. */
> +/* { dg-final { scan-assembler-times {\mrldicr\M} 1 { target { lp64 && has_arch_pwr8 } } } } */
> +/* { dg-final { scan-assembler-times {\mxscvspdpn\M} 2 { target { lp64 && has_arch_pwr8 } } } } */
> +
> +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 2 { target { lp64 && { has_arch_pwr8 && { ! has_arch_pwr9 } } } } } } */
> +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 1 { target { lp64 && has_arch_pwr9 } } } } */
> +/* { dg-final { scan-assembler-times {\mmtvsrws\M} 1 { target { lp64 && has_arch_pwr9 } } } } */
> +
> +union di_sf_sf
> +{
> +  struct {float f1; float f2;};
> +  long long l;
> +};
> +
> +int main()
> +{
> +  union di_sf_sf v;
> +  v.f1 = 1.0f;
> +  v.f2 = 2.0f;
> +  if (sf_from_di_off0 (v.l) != 1.0f || sf_from_di_off4 (v.l) != 2.0f )
> +    __builtin_abort ();
> +  return 0;
> +}
Jiufu Guo May 17, 2023, 6:43 a.m. UTC | #2
Gentle ping...

Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes:

> Hi
>
> I would like to ping this patch for stage1:
> https://gcc.gnu.org/pipermail/gcc-patches/2023-February/612168.html
>
> BR,
> Jeff (Jiufu)
>
> Jiufu Guo <guojiufu@linux.ibm.com> writes:
>
>> Hi,
>>
>> Compare with previous version:
>> https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609654.html
>> This patch does not use UNSPEC for insn mtvsrws anymore.  And to handle
>> the subreg better on BE and LE, predicate "lowpart_subreg_operator"
>> is introducted. To help combine pass to match the pattern on high32
>> bit of DI, shiftrt is still used.
>>
>> As mentioned in PR108338, on p9, we could use mtvsrws to implement
>> the conversion from SI#0 to SF (or lowpart DI to SF).
>>
>> For examples:
>>   *(long long*)buff = di;
>>   float f = *(float*)(buff);
>> We generate "sldi 9,3,32 ; mtvsrd 1,9 ; xscvspdpn 1,1" instead of
>> "mtvsrws 1,3 ; xscvspdpn 1,1".
>>
>> This patch update this, and also enhance the bitcast from highpart
>> DI to SF.
>>
>> Bootstrap and regtests pass on ppc64{,le}.
>> Is this ok for trunk?
>>
>> BR,
>> Jeff (Jiufu)
>>
>> 	PR target/108338
>>
>> gcc/ChangeLog:
>>
>> 	* config/rs6000/predicates.md (lowpart_subreg_operator): New
>> 	define_predicate.
>> 	* config/rs6000/rs6000.md (any_rshift): New code_iterator.
>> 	(movsf_from_si2): Rename to...
>> 	(movsf_from_si2_<code>): ... this.
>> 	(si2sf_mtvsrws): New define_insn.
>>
>> gcc/testsuite/ChangeLog:
>>
>> 	* gcc.target/powerpc/pr108338.c: New test.
>>
>> ---
>>  gcc/config/rs6000/predicates.md             |  5 +++
>>  gcc/config/rs6000/rs6000.md                 | 35 ++++++++++++-----
>>  gcc/testsuite/gcc.target/powerpc/pr108338.c | 42 +++++++++++++++++++++
>>  3 files changed, 73 insertions(+), 9 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108338.c
>>
>> diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
>> index 52c65534e51..e57c9d99c6b 100644
>> --- a/gcc/config/rs6000/predicates.md
>> +++ b/gcc/config/rs6000/predicates.md
>> @@ -2064,3 +2064,8 @@ (define_predicate "macho_pic_address"
>>    else
>>      return false;
>>  })
>> +
>> +(define_predicate "lowpart_subreg_operator"
>> +  (and (match_code "subreg")
>> +       (match_test "subreg_lowpart_offset (mode, GET_MODE (SUBREG_REG (op)))
>> +		    == SUBREG_BYTE (op)")))
>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
>> index 4a7812fa592..5b4a7f8d801 100644
>> --- a/gcc/config/rs6000/rs6000.md
>> +++ b/gcc/config/rs6000/rs6000.md
>> @@ -7539,6 +7539,14 @@ (define_split
>>  				 UNSPEC_MOVSI_GOT))]
>>    "")
>>  
>> +(define_insn "si2sf_mtvsrws"
>> +  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
>> +       (subreg:SF (match_operand:SI 1 "gpc_reg_operand" "r") 0))]
>> +  "TARGET_P9_VECTOR && TARGET_XSCVSPDPN"
>> +  "mtvsrws %x0,%1\n\txscvspdpn %x0,%x0"
>> +  [(set_attr "type" "mfvsr")
>> +   (set_attr "length" "8")])
>> +
>>  ;;	   MR          LA
>>  ;;	   LWZ         LFIWZX      LXSIWZX
>>  ;;	   STW         STFIWX      STXSIWX
>> @@ -8203,10 +8211,18 @@ (define_insn_and_split "movsf_from_si"
>>    rtx op2 = operands[2];
>>    rtx op1_di = gen_rtx_REG (DImode, REGNO (op1));
>>  
>> -  /* Move SF value to upper 32-bits for xscvspdpn.  */
>> -  emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
>> -  emit_insn (gen_p8_mtvsrd_sf (op0, op2));
>> -  emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
>> +  if (TARGET_P9_VECTOR)
>> +    {
>> +      emit_insn (gen_si2sf_mtvsrws (op0, gen_lowpart (SImode, op1_di)));
>> +    }
>> +  else
>> +    {
>> +      /* Move SF value to upper 32-bits for xscvspdpn.  */
>> +      emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
>> +      emit_insn (gen_p8_mtvsrd_sf (op0, op2));
>> +      emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
>> +    }
>> +
>>    DONE;
>>  }
>>    [(set_attr "length"
>> @@ -8219,18 +8235,19 @@ (define_insn_and_split "movsf_from_si"
>>  	    "*,          *,         p9v,       p8v,       *,         *,
>>  	     p8v,        p8v,       p8v,       *")])
>>  
>> +(define_code_iterator any_rshift [ashiftrt lshiftrt])
>> +
>>  ;; For extracting high part element from DImode register like:
>>  ;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
>>  ;; split it before reload with "and mask" to avoid generating shift right
>>  ;; 32 bit then shift left 32 bit.
>> -(define_insn_and_split "movsf_from_si2"
>> +(define_insn_and_split "movsf_from_si2_<code>"
>>    [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
>>  	    (unspec:SF
>> -	     [(subreg:SI
>> -	       (ashiftrt:DI
>> +	     [(match_operator:SI 3 "lowpart_subreg_operator"
>> +	       [(any_rshift:DI
>>  		(match_operand:DI 1 "input_operand" "r")
>> -		(const_int 32))
>> -	       0)]
>> +		(const_int 32))])]
>>  	     UNSPEC_SF_FROM_SI))
>>    (clobber (match_scratch:DI 2 "=r"))]
>>    "TARGET_NO_SF_SUBREG"
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr108338.c b/gcc/testsuite/gcc.target/powerpc/pr108338.c
>> new file mode 100644
>> index 00000000000..2438dc13f41
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr108338.c
>> @@ -0,0 +1,42 @@
>> +// { dg-do run }
>> +// { dg-options "-O2 -save-temps" }
>> +
>> +float __attribute__ ((noipa)) sf_from_di_off0 (long long l)
>> +{
>> +  char buff[16];
>> +  *(long long*)buff = l;
>> +  float f = *(float*)(buff);
>> +  return f;    
>> +}
>> +
>> +float  __attribute__ ((noipa)) sf_from_di_off4 (long long l)
>> +{
>> +  char buff[16];
>> +  *(long long*)buff = l;
>> +  float f = *(float*)(buff + 4);
>> +  return f; 
>> +}
>> +
>> +/* Under lp64, 'l' is in one DI reg, then check sub DI to SF. */
>> +/* { dg-final { scan-assembler-times {\mrldicr\M} 1 { target { lp64 && has_arch_pwr8 } } } } */
>> +/* { dg-final { scan-assembler-times {\mxscvspdpn\M} 2 { target { lp64 && has_arch_pwr8 } } } } */
>> +
>> +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 2 { target { lp64 && { has_arch_pwr8 && { ! has_arch_pwr9 } } } } } } */
>> +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 1 { target { lp64 && has_arch_pwr9 } } } } */
>> +/* { dg-final { scan-assembler-times {\mmtvsrws\M} 1 { target { lp64 && has_arch_pwr9 } } } } */
>> +
>> +union di_sf_sf
>> +{
>> +  struct {float f1; float f2;};
>> +  long long l;
>> +};
>> +
>> +int main()
>> +{
>> +  union di_sf_sf v;
>> +  v.f1 = 1.0f;
>> +  v.f2 = 2.0f;
>> +  if (sf_from_di_off0 (v.l) != 1.0f || sf_from_di_off4 (v.l) != 2.0f )
>> +    __builtin_abort ();
>> +  return 0;
>> +}
Jiufu Guo May 31, 2023, 2:53 a.m. UTC | #3
Gentle ping...

Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes:

> Gentle ping...
>
> Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
>
>> Hi
>>
>> I would like to ping this patch for stage1:
>> https://gcc.gnu.org/pipermail/gcc-patches/2023-February/612168.html
>>
>> BR,
>> Jeff (Jiufu)
>>
>> Jiufu Guo <guojiufu@linux.ibm.com> writes:
>>
>>> Hi,
>>>
>>> Compare with previous version:
>>> https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609654.html
>>> This patch does not use UNSPEC for insn mtvsrws anymore.  And to handle
>>> the subreg better on BE and LE, predicate "lowpart_subreg_operator"
>>> is introducted. To help combine pass to match the pattern on high32
>>> bit of DI, shiftrt is still used.
>>>
>>> As mentioned in PR108338, on p9, we could use mtvsrws to implement
>>> the conversion from SI#0 to SF (or lowpart DI to SF).
>>>
>>> For examples:
>>>   *(long long*)buff = di;
>>>   float f = *(float*)(buff);
>>> We generate "sldi 9,3,32 ; mtvsrd 1,9 ; xscvspdpn 1,1" instead of
>>> "mtvsrws 1,3 ; xscvspdpn 1,1".
>>>
>>> This patch update this, and also enhance the bitcast from highpart
>>> DI to SF.
>>>
>>> Bootstrap and regtests pass on ppc64{,le}.
>>> Is this ok for trunk?
>>>
>>> BR,
>>> Jeff (Jiufu)
>>>
>>> 	PR target/108338
>>>
>>> gcc/ChangeLog:
>>>
>>> 	* config/rs6000/predicates.md (lowpart_subreg_operator): New
>>> 	define_predicate.
>>> 	* config/rs6000/rs6000.md (any_rshift): New code_iterator.
>>> 	(movsf_from_si2): Rename to...
>>> 	(movsf_from_si2_<code>): ... this.
>>> 	(si2sf_mtvsrws): New define_insn.
>>>
>>> gcc/testsuite/ChangeLog:
>>>
>>> 	* gcc.target/powerpc/pr108338.c: New test.
>>>
>>> ---
>>>  gcc/config/rs6000/predicates.md             |  5 +++
>>>  gcc/config/rs6000/rs6000.md                 | 35 ++++++++++++-----
>>>  gcc/testsuite/gcc.target/powerpc/pr108338.c | 42 +++++++++++++++++++++
>>>  3 files changed, 73 insertions(+), 9 deletions(-)
>>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108338.c
>>>
>>> diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
>>> index 52c65534e51..e57c9d99c6b 100644
>>> --- a/gcc/config/rs6000/predicates.md
>>> +++ b/gcc/config/rs6000/predicates.md
>>> @@ -2064,3 +2064,8 @@ (define_predicate "macho_pic_address"
>>>    else
>>>      return false;
>>>  })
>>> +
>>> +(define_predicate "lowpart_subreg_operator"
>>> +  (and (match_code "subreg")
>>> +       (match_test "subreg_lowpart_offset (mode, GET_MODE (SUBREG_REG (op)))
>>> +		    == SUBREG_BYTE (op)")))
>>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
>>> index 4a7812fa592..5b4a7f8d801 100644
>>> --- a/gcc/config/rs6000/rs6000.md
>>> +++ b/gcc/config/rs6000/rs6000.md
>>> @@ -7539,6 +7539,14 @@ (define_split
>>>  				 UNSPEC_MOVSI_GOT))]
>>>    "")
>>>  
>>> +(define_insn "si2sf_mtvsrws"
>>> +  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
>>> +       (subreg:SF (match_operand:SI 1 "gpc_reg_operand" "r") 0))]
>>> +  "TARGET_P9_VECTOR && TARGET_XSCVSPDPN"
>>> +  "mtvsrws %x0,%1\n\txscvspdpn %x0,%x0"
>>> +  [(set_attr "type" "mfvsr")
>>> +   (set_attr "length" "8")])
>>> +
>>>  ;;	   MR          LA
>>>  ;;	   LWZ         LFIWZX      LXSIWZX
>>>  ;;	   STW         STFIWX      STXSIWX
>>> @@ -8203,10 +8211,18 @@ (define_insn_and_split "movsf_from_si"
>>>    rtx op2 = operands[2];
>>>    rtx op1_di = gen_rtx_REG (DImode, REGNO (op1));
>>>  
>>> -  /* Move SF value to upper 32-bits for xscvspdpn.  */
>>> -  emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
>>> -  emit_insn (gen_p8_mtvsrd_sf (op0, op2));
>>> -  emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
>>> +  if (TARGET_P9_VECTOR)
>>> +    {
>>> +      emit_insn (gen_si2sf_mtvsrws (op0, gen_lowpart (SImode, op1_di)));
>>> +    }
>>> +  else
>>> +    {
>>> +      /* Move SF value to upper 32-bits for xscvspdpn.  */
>>> +      emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
>>> +      emit_insn (gen_p8_mtvsrd_sf (op0, op2));
>>> +      emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
>>> +    }
>>> +
>>>    DONE;
>>>  }
>>>    [(set_attr "length"
>>> @@ -8219,18 +8235,19 @@ (define_insn_and_split "movsf_from_si"
>>>  	    "*,          *,         p9v,       p8v,       *,         *,
>>>  	     p8v,        p8v,       p8v,       *")])
>>>  
>>> +(define_code_iterator any_rshift [ashiftrt lshiftrt])
>>> +
>>>  ;; For extracting high part element from DImode register like:
>>>  ;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
>>>  ;; split it before reload with "and mask" to avoid generating shift right
>>>  ;; 32 bit then shift left 32 bit.
>>> -(define_insn_and_split "movsf_from_si2"
>>> +(define_insn_and_split "movsf_from_si2_<code>"
>>>    [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
>>>  	    (unspec:SF
>>> -	     [(subreg:SI
>>> -	       (ashiftrt:DI
>>> +	     [(match_operator:SI 3 "lowpart_subreg_operator"
>>> +	       [(any_rshift:DI
>>>  		(match_operand:DI 1 "input_operand" "r")
>>> -		(const_int 32))
>>> -	       0)]
>>> +		(const_int 32))])]
>>>  	     UNSPEC_SF_FROM_SI))
>>>    (clobber (match_scratch:DI 2 "=r"))]
>>>    "TARGET_NO_SF_SUBREG"
>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr108338.c b/gcc/testsuite/gcc.target/powerpc/pr108338.c
>>> new file mode 100644
>>> index 00000000000..2438dc13f41
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr108338.c
>>> @@ -0,0 +1,42 @@
>>> +// { dg-do run }
>>> +// { dg-options "-O2 -save-temps" }
>>> +
>>> +float __attribute__ ((noipa)) sf_from_di_off0 (long long l)
>>> +{
>>> +  char buff[16];
>>> +  *(long long*)buff = l;
>>> +  float f = *(float*)(buff);
>>> +  return f;    
>>> +}
>>> +
>>> +float  __attribute__ ((noipa)) sf_from_di_off4 (long long l)
>>> +{
>>> +  char buff[16];
>>> +  *(long long*)buff = l;
>>> +  float f = *(float*)(buff + 4);
>>> +  return f; 
>>> +}
>>> +
>>> +/* Under lp64, 'l' is in one DI reg, then check sub DI to SF. */
>>> +/* { dg-final { scan-assembler-times {\mrldicr\M} 1 { target { lp64 && has_arch_pwr8 } } } } */
>>> +/* { dg-final { scan-assembler-times {\mxscvspdpn\M} 2 { target { lp64 && has_arch_pwr8 } } } } */
>>> +
>>> +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 2 { target { lp64 && { has_arch_pwr8 && { ! has_arch_pwr9 } } } } } } */
>>> +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 1 { target { lp64 && has_arch_pwr9 } } } } */
>>> +/* { dg-final { scan-assembler-times {\mmtvsrws\M} 1 { target { lp64 && has_arch_pwr9 } } } } */
>>> +
>>> +union di_sf_sf
>>> +{
>>> +  struct {float f1; float f2;};
>>> +  long long l;
>>> +};
>>> +
>>> +int main()
>>> +{
>>> +  union di_sf_sf v;
>>> +  v.f1 = 1.0f;
>>> +  v.f2 = 2.0f;
>>> +  if (sf_from_di_off0 (v.l) != 1.0f || sf_from_di_off4 (v.l) != 2.0f )
>>> +    __builtin_abort ();
>>> +  return 0;
>>> +}
Jiufu Guo July 4, 2023, 2:56 a.m. UTC | #4
Hi,

Gentle ping ...

Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes:

> Gentle ping...
>
> Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
>
>> Gentle ping...
>>
>> Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
>>
>>> Hi
>>>
>>> I would like to ping this patch for stage1:
>>> https://gcc.gnu.org/pipermail/gcc-patches/2023-February/612168.html
>>>
>>> BR,
>>> Jeff (Jiufu)
>>>
>>> Jiufu Guo <guojiufu@linux.ibm.com> writes:
>>>
>>>> Hi,
>>>>
>>>> Compare with previous version:
>>>> https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609654.html
>>>> This patch does not use UNSPEC for insn mtvsrws anymore.  And to handle
>>>> the subreg better on BE and LE, predicate "lowpart_subreg_operator"
>>>> is introducted. To help combine pass to match the pattern on high32
>>>> bit of DI, shiftrt is still used.
>>>>
>>>> As mentioned in PR108338, on p9, we could use mtvsrws to implement
>>>> the conversion from SI#0 to SF (or lowpart DI to SF).
>>>>
>>>> For examples:
>>>>   *(long long*)buff = di;
>>>>   float f = *(float*)(buff);
>>>> We generate "sldi 9,3,32 ; mtvsrd 1,9 ; xscvspdpn 1,1" instead of
>>>> "mtvsrws 1,3 ; xscvspdpn 1,1".
>>>>
>>>> This patch update this, and also enhance the bitcast from highpart
>>>> DI to SF.
>>>>
>>>> Bootstrap and regtests pass on ppc64{,le}.
>>>> Is this ok for trunk?
>>>>
>>>> BR,
>>>> Jeff (Jiufu)
>>>>
>>>> 	PR target/108338
>>>>
>>>> gcc/ChangeLog:
>>>>
>>>> 	* config/rs6000/predicates.md (lowpart_subreg_operator): New
>>>> 	define_predicate.
>>>> 	* config/rs6000/rs6000.md (any_rshift): New code_iterator.
>>>> 	(movsf_from_si2): Rename to...
>>>> 	(movsf_from_si2_<code>): ... this.
>>>> 	(si2sf_mtvsrws): New define_insn.
>>>>
>>>> gcc/testsuite/ChangeLog:
>>>>
>>>> 	* gcc.target/powerpc/pr108338.c: New test.
>>>>
>>>> ---
>>>>  gcc/config/rs6000/predicates.md             |  5 +++
>>>>  gcc/config/rs6000/rs6000.md                 | 35 ++++++++++++-----
>>>>  gcc/testsuite/gcc.target/powerpc/pr108338.c | 42 +++++++++++++++++++++
>>>>  3 files changed, 73 insertions(+), 9 deletions(-)
>>>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108338.c
>>>>
>>>> diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
>>>> index 52c65534e51..e57c9d99c6b 100644
>>>> --- a/gcc/config/rs6000/predicates.md
>>>> +++ b/gcc/config/rs6000/predicates.md
>>>> @@ -2064,3 +2064,8 @@ (define_predicate "macho_pic_address"
>>>>    else
>>>>      return false;
>>>>  })
>>>> +
>>>> +(define_predicate "lowpart_subreg_operator"
>>>> +  (and (match_code "subreg")
>>>> +       (match_test "subreg_lowpart_offset (mode, GET_MODE (SUBREG_REG (op)))
>>>> +		    == SUBREG_BYTE (op)")))
>>>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
>>>> index 4a7812fa592..5b4a7f8d801 100644
>>>> --- a/gcc/config/rs6000/rs6000.md
>>>> +++ b/gcc/config/rs6000/rs6000.md
>>>> @@ -7539,6 +7539,14 @@ (define_split
>>>>  				 UNSPEC_MOVSI_GOT))]
>>>>    "")
>>>>  
>>>> +(define_insn "si2sf_mtvsrws"
>>>> +  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
>>>> +       (subreg:SF (match_operand:SI 1 "gpc_reg_operand" "r") 0))]
>>>> +  "TARGET_P9_VECTOR && TARGET_XSCVSPDPN"
>>>> +  "mtvsrws %x0,%1\n\txscvspdpn %x0,%x0"
>>>> +  [(set_attr "type" "mfvsr")
>>>> +   (set_attr "length" "8")])
>>>> +
>>>>  ;;	   MR          LA
>>>>  ;;	   LWZ         LFIWZX      LXSIWZX
>>>>  ;;	   STW         STFIWX      STXSIWX
>>>> @@ -8203,10 +8211,18 @@ (define_insn_and_split "movsf_from_si"
>>>>    rtx op2 = operands[2];
>>>>    rtx op1_di = gen_rtx_REG (DImode, REGNO (op1));
>>>>  
>>>> -  /* Move SF value to upper 32-bits for xscvspdpn.  */
>>>> -  emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
>>>> -  emit_insn (gen_p8_mtvsrd_sf (op0, op2));
>>>> -  emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
>>>> +  if (TARGET_P9_VECTOR)
>>>> +    {
>>>> +      emit_insn (gen_si2sf_mtvsrws (op0, gen_lowpart (SImode, op1_di)));
>>>> +    }
>>>> +  else
>>>> +    {
>>>> +      /* Move SF value to upper 32-bits for xscvspdpn.  */
>>>> +      emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
>>>> +      emit_insn (gen_p8_mtvsrd_sf (op0, op2));
>>>> +      emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
>>>> +    }
>>>> +
>>>>    DONE;
>>>>  }
>>>>    [(set_attr "length"
>>>> @@ -8219,18 +8235,19 @@ (define_insn_and_split "movsf_from_si"
>>>>  	    "*,          *,         p9v,       p8v,       *,         *,
>>>>  	     p8v,        p8v,       p8v,       *")])
>>>>  
>>>> +(define_code_iterator any_rshift [ashiftrt lshiftrt])
>>>> +
>>>>  ;; For extracting high part element from DImode register like:
>>>>  ;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
>>>>  ;; split it before reload with "and mask" to avoid generating shift right
>>>>  ;; 32 bit then shift left 32 bit.
>>>> -(define_insn_and_split "movsf_from_si2"
>>>> +(define_insn_and_split "movsf_from_si2_<code>"
>>>>    [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
>>>>  	    (unspec:SF
>>>> -	     [(subreg:SI
>>>> -	       (ashiftrt:DI
>>>> +	     [(match_operator:SI 3 "lowpart_subreg_operator"
>>>> +	       [(any_rshift:DI
>>>>  		(match_operand:DI 1 "input_operand" "r")
>>>> -		(const_int 32))
>>>> -	       0)]
>>>> +		(const_int 32))])]
>>>>  	     UNSPEC_SF_FROM_SI))
>>>>    (clobber (match_scratch:DI 2 "=r"))]
>>>>    "TARGET_NO_SF_SUBREG"
>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr108338.c b/gcc/testsuite/gcc.target/powerpc/pr108338.c
>>>> new file mode 100644
>>>> index 00000000000..2438dc13f41
>>>> --- /dev/null
>>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr108338.c
>>>> @@ -0,0 +1,42 @@
>>>> +// { dg-do run }
>>>> +// { dg-options "-O2 -save-temps" }
>>>> +
>>>> +float __attribute__ ((noipa)) sf_from_di_off0 (long long l)
>>>> +{
>>>> +  char buff[16];
>>>> +  *(long long*)buff = l;
>>>> +  float f = *(float*)(buff);
>>>> +  return f;    
>>>> +}
>>>> +
>>>> +float  __attribute__ ((noipa)) sf_from_di_off4 (long long l)
>>>> +{
>>>> +  char buff[16];
>>>> +  *(long long*)buff = l;
>>>> +  float f = *(float*)(buff + 4);
>>>> +  return f; 
>>>> +}
>>>> +
>>>> +/* Under lp64, 'l' is in one DI reg, then check sub DI to SF. */
>>>> +/* { dg-final { scan-assembler-times {\mrldicr\M} 1 { target { lp64 && has_arch_pwr8 } } } } */
>>>> +/* { dg-final { scan-assembler-times {\mxscvspdpn\M} 2 { target { lp64 && has_arch_pwr8 } } } } */
>>>> +
>>>> +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 2 { target { lp64 && { has_arch_pwr8 && { ! has_arch_pwr9 } } } } } } */
>>>> +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 1 { target { lp64 && has_arch_pwr9 } } } } */
>>>> +/* { dg-final { scan-assembler-times {\mmtvsrws\M} 1 { target { lp64 && has_arch_pwr9 } } } } */
>>>> +
>>>> +union di_sf_sf
>>>> +{
>>>> +  struct {float f1; float f2;};
>>>> +  long long l;
>>>> +};
>>>> +
>>>> +int main()
>>>> +{
>>>> +  union di_sf_sf v;
>>>> +  v.f1 = 1.0f;
>>>> +  v.f2 = 2.0f;
>>>> +  if (sf_from_di_off0 (v.l) != 1.0f || sf_from_di_off4 (v.l) != 2.0f )
>>>> +    __builtin_abort ();
>>>> +  return 0;
>>>> +}
Jiufu Guo July 4, 2023, 6:47 a.m. UTC | #5
Hi,

I just submit a new version:
https://gcc.gnu.org/pipermail/gcc-patches/2023-July/623533.html
So, we could ignore this ping and check the new version.

BR,
Jeff (Jiufu Guo)

Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes:

> Hi,
>
> Gentle ping ...
>
> Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
>
>> Gentle ping...
>>
>> Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
>>
>>> Gentle ping...
>>>
>>> Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
>>>
>>>> Hi
>>>>
>>>> I would like to ping this patch for stage1:
>>>> https://gcc.gnu.org/pipermail/gcc-patches/2023-February/612168.html
>>>>
>>>> BR,
>>>> Jeff (Jiufu)
>>>>
>>>> Jiufu Guo <guojiufu@linux.ibm.com> writes:
>>>>
>>>>> Hi,
>>>>>
>>>>> Compare with previous version:
>>>>> https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609654.html
>>>>> This patch does not use UNSPEC for insn mtvsrws anymore.  And to handle
>>>>> the subreg better on BE and LE, predicate "lowpart_subreg_operator"
>>>>> is introducted. To help combine pass to match the pattern on high32
>>>>> bit of DI, shiftrt is still used.
>>>>>
>>>>> As mentioned in PR108338, on p9, we could use mtvsrws to implement
>>>>> the conversion from SI#0 to SF (or lowpart DI to SF).
>>>>>
>>>>> For examples:
>>>>>   *(long long*)buff = di;
>>>>>   float f = *(float*)(buff);
>>>>> We generate "sldi 9,3,32 ; mtvsrd 1,9 ; xscvspdpn 1,1" instead of
>>>>> "mtvsrws 1,3 ; xscvspdpn 1,1".
>>>>>
>>>>> This patch update this, and also enhance the bitcast from highpart
>>>>> DI to SF.
>>>>>
>>>>> Bootstrap and regtests pass on ppc64{,le}.
>>>>> Is this ok for trunk?
>>>>>
>>>>> BR,
>>>>> Jeff (Jiufu)
>>>>>
>>>>> 	PR target/108338
>>>>>
>>>>> gcc/ChangeLog:
>>>>>
>>>>> 	* config/rs6000/predicates.md (lowpart_subreg_operator): New
>>>>> 	define_predicate.
>>>>> 	* config/rs6000/rs6000.md (any_rshift): New code_iterator.
>>>>> 	(movsf_from_si2): Rename to...
>>>>> 	(movsf_from_si2_<code>): ... this.
>>>>> 	(si2sf_mtvsrws): New define_insn.
>>>>>
>>>>> gcc/testsuite/ChangeLog:
>>>>>
>>>>> 	* gcc.target/powerpc/pr108338.c: New test.
>>>>>
>>>>> ---
>>>>>  gcc/config/rs6000/predicates.md             |  5 +++
>>>>>  gcc/config/rs6000/rs6000.md                 | 35 ++++++++++++-----
>>>>>  gcc/testsuite/gcc.target/powerpc/pr108338.c | 42 +++++++++++++++++++++
>>>>>  3 files changed, 73 insertions(+), 9 deletions(-)
>>>>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108338.c
>>>>>
>>>>> diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
>>>>> index 52c65534e51..e57c9d99c6b 100644
>>>>> --- a/gcc/config/rs6000/predicates.md
>>>>> +++ b/gcc/config/rs6000/predicates.md
>>>>> @@ -2064,3 +2064,8 @@ (define_predicate "macho_pic_address"
>>>>>    else
>>>>>      return false;
>>>>>  })
>>>>> +
>>>>> +(define_predicate "lowpart_subreg_operator"
>>>>> +  (and (match_code "subreg")
>>>>> +       (match_test "subreg_lowpart_offset (mode, GET_MODE (SUBREG_REG (op)))
>>>>> +		    == SUBREG_BYTE (op)")))
>>>>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
>>>>> index 4a7812fa592..5b4a7f8d801 100644
>>>>> --- a/gcc/config/rs6000/rs6000.md
>>>>> +++ b/gcc/config/rs6000/rs6000.md
>>>>> @@ -7539,6 +7539,14 @@ (define_split
>>>>>  				 UNSPEC_MOVSI_GOT))]
>>>>>    "")
>>>>>  
>>>>> +(define_insn "si2sf_mtvsrws"
>>>>> +  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
>>>>> +       (subreg:SF (match_operand:SI 1 "gpc_reg_operand" "r") 0))]
>>>>> +  "TARGET_P9_VECTOR && TARGET_XSCVSPDPN"
>>>>> +  "mtvsrws %x0,%1\n\txscvspdpn %x0,%x0"
>>>>> +  [(set_attr "type" "mfvsr")
>>>>> +   (set_attr "length" "8")])
>>>>> +
>>>>>  ;;	   MR          LA
>>>>>  ;;	   LWZ         LFIWZX      LXSIWZX
>>>>>  ;;	   STW         STFIWX      STXSIWX
>>>>> @@ -8203,10 +8211,18 @@ (define_insn_and_split "movsf_from_si"
>>>>>    rtx op2 = operands[2];
>>>>>    rtx op1_di = gen_rtx_REG (DImode, REGNO (op1));
>>>>>  
>>>>> -  /* Move SF value to upper 32-bits for xscvspdpn.  */
>>>>> -  emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
>>>>> -  emit_insn (gen_p8_mtvsrd_sf (op0, op2));
>>>>> -  emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
>>>>> +  if (TARGET_P9_VECTOR)
>>>>> +    {
>>>>> +      emit_insn (gen_si2sf_mtvsrws (op0, gen_lowpart (SImode, op1_di)));
>>>>> +    }
>>>>> +  else
>>>>> +    {
>>>>> +      /* Move SF value to upper 32-bits for xscvspdpn.  */
>>>>> +      emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
>>>>> +      emit_insn (gen_p8_mtvsrd_sf (op0, op2));
>>>>> +      emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
>>>>> +    }
>>>>> +
>>>>>    DONE;
>>>>>  }
>>>>>    [(set_attr "length"
>>>>> @@ -8219,18 +8235,19 @@ (define_insn_and_split "movsf_from_si"
>>>>>  	    "*,          *,         p9v,       p8v,       *,         *,
>>>>>  	     p8v,        p8v,       p8v,       *")])
>>>>>  
>>>>> +(define_code_iterator any_rshift [ashiftrt lshiftrt])
>>>>> +
>>>>>  ;; For extracting high part element from DImode register like:
>>>>>  ;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
>>>>>  ;; split it before reload with "and mask" to avoid generating shift right
>>>>>  ;; 32 bit then shift left 32 bit.
>>>>> -(define_insn_and_split "movsf_from_si2"
>>>>> +(define_insn_and_split "movsf_from_si2_<code>"
>>>>>    [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
>>>>>  	    (unspec:SF
>>>>> -	     [(subreg:SI
>>>>> -	       (ashiftrt:DI
>>>>> +	     [(match_operator:SI 3 "lowpart_subreg_operator"
>>>>> +	       [(any_rshift:DI
>>>>>  		(match_operand:DI 1 "input_operand" "r")
>>>>> -		(const_int 32))
>>>>> -	       0)]
>>>>> +		(const_int 32))])]
>>>>>  	     UNSPEC_SF_FROM_SI))
>>>>>    (clobber (match_scratch:DI 2 "=r"))]
>>>>>    "TARGET_NO_SF_SUBREG"
>>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr108338.c b/gcc/testsuite/gcc.target/powerpc/pr108338.c
>>>>> new file mode 100644
>>>>> index 00000000000..2438dc13f41
>>>>> --- /dev/null
>>>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr108338.c
>>>>> @@ -0,0 +1,42 @@
>>>>> +// { dg-do run }
>>>>> +// { dg-options "-O2 -save-temps" }
>>>>> +
>>>>> +float __attribute__ ((noipa)) sf_from_di_off0 (long long l)
>>>>> +{
>>>>> +  char buff[16];
>>>>> +  *(long long*)buff = l;
>>>>> +  float f = *(float*)(buff);
>>>>> +  return f;    
>>>>> +}
>>>>> +
>>>>> +float  __attribute__ ((noipa)) sf_from_di_off4 (long long l)
>>>>> +{
>>>>> +  char buff[16];
>>>>> +  *(long long*)buff = l;
>>>>> +  float f = *(float*)(buff + 4);
>>>>> +  return f; 
>>>>> +}
>>>>> +
>>>>> +/* Under lp64, 'l' is in one DI reg, then check sub DI to SF. */
>>>>> +/* { dg-final { scan-assembler-times {\mrldicr\M} 1 { target { lp64 && has_arch_pwr8 } } } } */
>>>>> +/* { dg-final { scan-assembler-times {\mxscvspdpn\M} 2 { target { lp64 && has_arch_pwr8 } } } } */
>>>>> +
>>>>> +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 2 { target { lp64 && { has_arch_pwr8 && { ! has_arch_pwr9 } } } } } } */
>>>>> +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 1 { target { lp64 && has_arch_pwr9 } } } } */
>>>>> +/* { dg-final { scan-assembler-times {\mmtvsrws\M} 1 { target { lp64 && has_arch_pwr9 } } } } */
>>>>> +
>>>>> +union di_sf_sf
>>>>> +{
>>>>> +  struct {float f1; float f2;};
>>>>> +  long long l;
>>>>> +};
>>>>> +
>>>>> +int main()
>>>>> +{
>>>>> +  union di_sf_sf v;
>>>>> +  v.f1 = 1.0f;
>>>>> +  v.f2 = 2.0f;
>>>>> +  if (sf_from_di_off0 (v.l) != 1.0f || sf_from_di_off4 (v.l) != 2.0f )
>>>>> +    __builtin_abort ();
>>>>> +  return 0;
>>>>> +}
diff mbox series

Patch

diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 52c65534e51..e57c9d99c6b 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -2064,3 +2064,8 @@  (define_predicate "macho_pic_address"
   else
     return false;
 })
+
+(define_predicate "lowpart_subreg_operator"
+  (and (match_code "subreg")
+       (match_test "subreg_lowpart_offset (mode, GET_MODE (SUBREG_REG (op)))
+		    == SUBREG_BYTE (op)")))
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 4a7812fa592..5b4a7f8d801 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7539,6 +7539,14 @@  (define_split
 				 UNSPEC_MOVSI_GOT))]
   "")
 
+(define_insn "si2sf_mtvsrws"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
+       (subreg:SF (match_operand:SI 1 "gpc_reg_operand" "r") 0))]
+  "TARGET_P9_VECTOR && TARGET_XSCVSPDPN"
+  "mtvsrws %x0,%1\n\txscvspdpn %x0,%x0"
+  [(set_attr "type" "mfvsr")
+   (set_attr "length" "8")])
+
 ;;	   MR          LA
 ;;	   LWZ         LFIWZX      LXSIWZX
 ;;	   STW         STFIWX      STXSIWX
@@ -8203,10 +8211,18 @@  (define_insn_and_split "movsf_from_si"
   rtx op2 = operands[2];
   rtx op1_di = gen_rtx_REG (DImode, REGNO (op1));
 
-  /* Move SF value to upper 32-bits for xscvspdpn.  */
-  emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
-  emit_insn (gen_p8_mtvsrd_sf (op0, op2));
-  emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+  if (TARGET_P9_VECTOR)
+    {
+      emit_insn (gen_si2sf_mtvsrws (op0, gen_lowpart (SImode, op1_di)));
+    }
+  else
+    {
+      /* Move SF value to upper 32-bits for xscvspdpn.  */
+      emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
+      emit_insn (gen_p8_mtvsrd_sf (op0, op2));
+      emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+    }
+
   DONE;
 }
   [(set_attr "length"
@@ -8219,18 +8235,19 @@  (define_insn_and_split "movsf_from_si"
 	    "*,          *,         p9v,       p8v,       *,         *,
 	     p8v,        p8v,       p8v,       *")])
 
+(define_code_iterator any_rshift [ashiftrt lshiftrt])
+
 ;; For extracting high part element from DImode register like:
 ;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
 ;; split it before reload with "and mask" to avoid generating shift right
 ;; 32 bit then shift left 32 bit.
-(define_insn_and_split "movsf_from_si2"
+(define_insn_and_split "movsf_from_si2_<code>"
   [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
 	    (unspec:SF
-	     [(subreg:SI
-	       (ashiftrt:DI
+	     [(match_operator:SI 3 "lowpart_subreg_operator"
+	       [(any_rshift:DI
 		(match_operand:DI 1 "input_operand" "r")
-		(const_int 32))
-	       0)]
+		(const_int 32))])]
 	     UNSPEC_SF_FROM_SI))
   (clobber (match_scratch:DI 2 "=r"))]
   "TARGET_NO_SF_SUBREG"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr108338.c b/gcc/testsuite/gcc.target/powerpc/pr108338.c
new file mode 100644
index 00000000000..2438dc13f41
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr108338.c
@@ -0,0 +1,42 @@ 
+// { dg-do run }
+// { dg-options "-O2 -save-temps" }
+
+float __attribute__ ((noipa)) sf_from_di_off0 (long long l)
+{
+  char buff[16];
+  *(long long*)buff = l;
+  float f = *(float*)(buff);
+  return f;    
+}
+
+float  __attribute__ ((noipa)) sf_from_di_off4 (long long l)
+{
+  char buff[16];
+  *(long long*)buff = l;
+  float f = *(float*)(buff + 4);
+  return f; 
+}
+
+/* Under lp64, 'l' is in one DI reg, then check sub DI to SF. */
+/* { dg-final { scan-assembler-times {\mrldicr\M} 1 { target { lp64 && has_arch_pwr8 } } } } */
+/* { dg-final { scan-assembler-times {\mxscvspdpn\M} 2 { target { lp64 && has_arch_pwr8 } } } } */
+
+/* { dg-final { scan-assembler-times {\mmtvsrd\M} 2 { target { lp64 && { has_arch_pwr8 && { ! has_arch_pwr9 } } } } } } */
+/* { dg-final { scan-assembler-times {\mmtvsrd\M} 1 { target { lp64 && has_arch_pwr9 } } } } */
+/* { dg-final { scan-assembler-times {\mmtvsrws\M} 1 { target { lp64 && has_arch_pwr9 } } } } */
+
+union di_sf_sf
+{
+  struct {float f1; float f2;};
+  long long l;
+};
+
+int main()
+{
+  union di_sf_sf v;
+  v.f1 = 1.0f;
+  v.f2 = 2.0f;
+  if (sf_from_di_off0 (v.l) != 1.0f || sf_from_di_off4 (v.l) != 2.0f )
+    __builtin_abort ();
+  return 0;
+}