diff mbox series

[1/3] expr: Allow same precision modes conversion between {ibm_extended, ieee_quad}_format [PR112993]

Message ID cecc33a7-740e-0612-a950-8699490b8e0d@linux.ibm.com
State New
Headers show
Series [1/3] expr: Allow same precision modes conversion between {ibm_extended, ieee_quad}_format [PR112993] | expand

Commit Message

Kewen.Lin July 4, 2024, 7:57 a.m. UTC
Hi,

With some historical reasons, rs6000 defines KFmode, TFmode
and IFmode to have different mode precision, but it causes
some issues and needs some workarounds such as r14-6478 for
PR112788.  So we are going to make all rs6000 128 bit scalar
FP modes have 128 bit precision.  Be prepared for that, this
patch is to make function convert_mode_scalar allow same
precision FP modes conversion if their underlying formats are
ibm_extended_format and ieee_quad_format respectively, just
like the existing special treatment on arm_bfloat_half_format
<-> ieee_half_format.  It also factors out all the relevant
checks into a lambda function.

Bootstrapped and regtested on x86_64-redhat-linux and
powerpc64{,le}-linux-gnu.

Is it ok for trunk?

BR,
Kewen
-----
	PR target/112993

gcc/ChangeLog:

	* expr.cc (convert_mode_scalar): Allow same precision conversion
	between scalar floating point modes if whose underlying format is
	ibm_extended_format or ieee_quad_format, and refactor assertion
	with new lambda function acceptable_same_precision_modes.
---
 gcc/expr.cc | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

--
2.39.1

Comments

Richard Sandiford July 4, 2024, 3:58 p.m. UTC | #1
"Kewen.Lin" <linkw@linux.ibm.com> writes:
> Hi,
>
> With some historical reasons, rs6000 defines KFmode, TFmode
> and IFmode to have different mode precision, but it causes
> some issues and needs some workarounds such as r14-6478 for
> PR112788.  So we are going to make all rs6000 128 bit scalar
> FP modes have 128 bit precision.  Be prepared for that, this
> patch is to make function convert_mode_scalar allow same
> precision FP modes conversion if their underlying formats are
> ibm_extended_format and ieee_quad_format respectively, just
> like the existing special treatment on arm_bfloat_half_format
> <-> ieee_half_format.  It also factors out all the relevant
> checks into a lambda function.
>
> Bootstrapped and regtested on x86_64-redhat-linux and
> powerpc64{,le}-linux-gnu.
>
> Is it ok for trunk?
>
> BR,
> Kewen
> -----
> 	PR target/112993
>
> gcc/ChangeLog:
>
> 	* expr.cc (convert_mode_scalar): Allow same precision conversion
> 	between scalar floating point modes if whose underlying format is
> 	ibm_extended_format or ieee_quad_format, and refactor assertion
> 	with new lambda function acceptable_same_precision_modes.
> ---
>  gcc/expr.cc | 30 ++++++++++++++++++++++++------
>  1 file changed, 24 insertions(+), 6 deletions(-)
>
> diff --git a/gcc/expr.cc b/gcc/expr.cc
> index ffbac513692..eac4dcc982e 100644
> --- a/gcc/expr.cc
> +++ b/gcc/expr.cc
> @@ -338,6 +338,29 @@ convert_mode_scalar (rtx to, rtx from, int unsignedp)
>    enum rtx_code equiv_code = (unsignedp < 0 ? UNKNOWN
>  			      : (unsignedp ? ZERO_EXTEND : SIGN_EXTEND));
>
> +  auto acceptable_same_precision_modes
> +    = [] (scalar_mode from_mode, scalar_mode to_mode) -> bool
> +    {
> +      if (DECIMAL_FLOAT_MODE_P (from_mode) != DECIMAL_FLOAT_MODE_P (to_mode))
> +	return true;
> +
> +      /* arm_bfloat_half_format <-> ieee_half_format */
> +      if ((REAL_MODE_FORMAT (from_mode) == &arm_bfloat_half_format
> +	   && REAL_MODE_FORMAT (to_mode) == &ieee_half_format)
> +	  || (REAL_MODE_FORMAT (to_mode) == &arm_bfloat_half_format
> +	      && REAL_MODE_FORMAT (from_mode) == &ieee_half_format))
> +	return true;
> +
> +      /* ibm_extended_format <-> ieee_quad_format */
> +      if ((REAL_MODE_FORMAT (from_mode) == &ibm_extended_format
> +	   && REAL_MODE_FORMAT (to_mode) == &ieee_quad_format)
> +	  || (REAL_MODE_FORMAT (from_mode) == &ieee_quad_format
> +	      && REAL_MODE_FORMAT (to_mode) == &ibm_extended_format))
> +	return true;
> +
> +      return false;
> +    };
> +
>    if (to_real)
>      {
>        rtx value;
> @@ -346,12 +369,7 @@ convert_mode_scalar (rtx to, rtx from, int unsignedp)
>
>        gcc_assert ((GET_MODE_PRECISION (from_mode)
>  		   != GET_MODE_PRECISION (to_mode))
> -		  || (DECIMAL_FLOAT_MODE_P (from_mode)
> -		      != DECIMAL_FLOAT_MODE_P (to_mode))
> -		  || (REAL_MODE_FORMAT (from_mode) == &arm_bfloat_half_format
> -		      && REAL_MODE_FORMAT (to_mode) == &ieee_half_format)
> -		  || (REAL_MODE_FORMAT (to_mode) == &arm_bfloat_half_format
> -		      && REAL_MODE_FORMAT (from_mode) == &ieee_half_format));
> +		  || acceptable_same_precision_modes (from_mode, to_mode));
>
>        if (GET_MODE_PRECISION (from_mode) == GET_MODE_PRECISION (to_mode))
>  	{
> --
> 2.39.1

This part looks good to me FWIW, but what's the correct behaviour of:

      if (GET_MODE_PRECISION (from_mode) == GET_MODE_PRECISION (to_mode))
	{
	  if (REAL_MODE_FORMAT (to_mode) == &arm_bfloat_half_format
	      && REAL_MODE_FORMAT (from_mode) == &ieee_half_format)
	    /* libgcc implements just __trunchfbf2, not __extendhfbf2.  */
	    tab = trunc_optab;
	  else
	    /* Conversion between decimal float and binary float, same
	       size.  */
	    tab = DECIMAL_FLOAT_MODE_P (from_mode) ? trunc_optab : sext_optab;

for the new pairing?  The intent for bfloat/half seems to be that bfloat
is treated as arbitrarily “lesser than” half, so half->bfloat is a
truncation and bfloat->half is an extension.  It seems like it would be
good to do something similar for the new pair, so that the float modes
still form a total order in terms of extension & truncation.

Thanks,
Richard
Kewen.Lin July 5, 2024, 2:19 a.m. UTC | #2
Hi Richard,

Thanks for the review comments!

on 2024/7/4 23:58, Richard Sandiford wrote:
> "Kewen.Lin" <linkw@linux.ibm.com> writes:
>> Hi,
>>
>> With some historical reasons, rs6000 defines KFmode, TFmode
>> and IFmode to have different mode precision, but it causes
>> some issues and needs some workarounds such as r14-6478 for
>> PR112788.  So we are going to make all rs6000 128 bit scalar
>> FP modes have 128 bit precision.  Be prepared for that, this
>> patch is to make function convert_mode_scalar allow same
>> precision FP modes conversion if their underlying formats are
>> ibm_extended_format and ieee_quad_format respectively, just
>> like the existing special treatment on arm_bfloat_half_format
>> <-> ieee_half_format.  It also factors out all the relevant
>> checks into a lambda function.
>>
>> Bootstrapped and regtested on x86_64-redhat-linux and
>> powerpc64{,le}-linux-gnu.
>>
>> Is it ok for trunk?
>>
>> BR,
>> Kewen
>> -----
>> 	PR target/112993
>>
>> gcc/ChangeLog:
>>
>> 	* expr.cc (convert_mode_scalar): Allow same precision conversion
>> 	between scalar floating point modes if whose underlying format is
>> 	ibm_extended_format or ieee_quad_format, and refactor assertion
>> 	with new lambda function acceptable_same_precision_modes.
>> ---
>>  gcc/expr.cc | 30 ++++++++++++++++++++++++------
>>  1 file changed, 24 insertions(+), 6 deletions(-)
>>
>> diff --git a/gcc/expr.cc b/gcc/expr.cc
>> index ffbac513692..eac4dcc982e 100644
>> --- a/gcc/expr.cc
>> +++ b/gcc/expr.cc
>> @@ -338,6 +338,29 @@ convert_mode_scalar (rtx to, rtx from, int unsignedp)
>>    enum rtx_code equiv_code = (unsignedp < 0 ? UNKNOWN
>>  			      : (unsignedp ? ZERO_EXTEND : SIGN_EXTEND));
>>
>> +  auto acceptable_same_precision_modes
>> +    = [] (scalar_mode from_mode, scalar_mode to_mode) -> bool
>> +    {
>> +      if (DECIMAL_FLOAT_MODE_P (from_mode) != DECIMAL_FLOAT_MODE_P (to_mode))
>> +	return true;
>> +
>> +      /* arm_bfloat_half_format <-> ieee_half_format */
>> +      if ((REAL_MODE_FORMAT (from_mode) == &arm_bfloat_half_format
>> +	   && REAL_MODE_FORMAT (to_mode) == &ieee_half_format)
>> +	  || (REAL_MODE_FORMAT (to_mode) == &arm_bfloat_half_format
>> +	      && REAL_MODE_FORMAT (from_mode) == &ieee_half_format))
>> +	return true;
>> +
>> +      /* ibm_extended_format <-> ieee_quad_format */
>> +      if ((REAL_MODE_FORMAT (from_mode) == &ibm_extended_format
>> +	   && REAL_MODE_FORMAT (to_mode) == &ieee_quad_format)
>> +	  || (REAL_MODE_FORMAT (from_mode) == &ieee_quad_format
>> +	      && REAL_MODE_FORMAT (to_mode) == &ibm_extended_format))
>> +	return true;
>> +
>> +      return false;
>> +    };
>> +
>>    if (to_real)
>>      {
>>        rtx value;
>> @@ -346,12 +369,7 @@ convert_mode_scalar (rtx to, rtx from, int unsignedp)
>>
>>        gcc_assert ((GET_MODE_PRECISION (from_mode)
>>  		   != GET_MODE_PRECISION (to_mode))
>> -		  || (DECIMAL_FLOAT_MODE_P (from_mode)
>> -		      != DECIMAL_FLOAT_MODE_P (to_mode))
>> -		  || (REAL_MODE_FORMAT (from_mode) == &arm_bfloat_half_format
>> -		      && REAL_MODE_FORMAT (to_mode) == &ieee_half_format)
>> -		  || (REAL_MODE_FORMAT (to_mode) == &arm_bfloat_half_format
>> -		      && REAL_MODE_FORMAT (from_mode) == &ieee_half_format));
>> +		  || acceptable_same_precision_modes (from_mode, to_mode));
>>
>>        if (GET_MODE_PRECISION (from_mode) == GET_MODE_PRECISION (to_mode))
>>  	{
>> --
>> 2.39.1
> 
> This part looks good to me FWIW, but what's the correct behaviour of:
> 
>       if (GET_MODE_PRECISION (from_mode) == GET_MODE_PRECISION (to_mode))
> 	{
> 	  if (REAL_MODE_FORMAT (to_mode) == &arm_bfloat_half_format
> 	      && REAL_MODE_FORMAT (from_mode) == &ieee_half_format)
> 	    /* libgcc implements just __trunchfbf2, not __extendhfbf2.  */
> 	    tab = trunc_optab;
> 	  else
> 	    /* Conversion between decimal float and binary float, same
> 	       size.  */
> 	    tab = DECIMAL_FLOAT_MODE_P (from_mode) ? trunc_optab : sext_optab;
> 
> for the new pairing?  The intent for bfloat/half seems to be that bfloat
> is treated as arbitrarily “lesser than” half, so half->bfloat is a
> truncation and bfloat->half is an extension.  It seems like it would be
> good to do something similar for the new pair, so that the float modes
> still form a total order in terms of extension & truncation.

Good question!  If I read it right, this special handling for half->bfloat is
to align with the previous implementation in libgcc and easy for backporting
, but isn't to keep the modes to form a total order, as Jakub's comments
PR114907 #c6 and #c8.  Similar to half vs. bfloat, neither of ibm128 nor ieee128
is a subset or superset of the other, the current behavior for this new paring is
that:
  1) define sext_optab for any two of TF/KF/IF (also bi-direction), since generic
     code above adopts sext_optab for same size conversion.
  2) for each define_expand in 1), it actually calls rs6000_expand_float128_convert
     which is one existing helper to handle conversions to/from __float128 and
     __ibm128, it will take care of the remaining (generate __extend* or __trunc*).
     Similar to half vs. bfloat which has extend and trunc libgcc functions, we
     have some extend and trunc libgcc functions for __float128 vs. __ibm128.

We can add some special treatment for the new pairing like what's for half vs. bfloat,
but since extend and truncate can be arbitrary for them, I thought we can just define
sext_optab to align with what generic code does and wants, then do the actual handling
in the corresponding expander (someone may argue that sext_optab is expected to be end
with libcall __extend*, it's surprising to end up with __trunc*, but considering
the extend/truncate here is arbitrary, guessing it's acceptable?).

Does it make sense to you?

BR,
Kewen
Richard Sandiford July 8, 2024, 11:14 a.m. UTC | #3
"Kewen.Lin" <linkw@linux.ibm.com> writes:
> Hi Richard,
>
> Thanks for the review comments!
>
> on 2024/7/4 23:58, Richard Sandiford wrote:
>> "Kewen.Lin" <linkw@linux.ibm.com> writes:
>>> Hi,
>>>
>>> With some historical reasons, rs6000 defines KFmode, TFmode
>>> and IFmode to have different mode precision, but it causes
>>> some issues and needs some workarounds such as r14-6478 for
>>> PR112788.  So we are going to make all rs6000 128 bit scalar
>>> FP modes have 128 bit precision.  Be prepared for that, this
>>> patch is to make function convert_mode_scalar allow same
>>> precision FP modes conversion if their underlying formats are
>>> ibm_extended_format and ieee_quad_format respectively, just
>>> like the existing special treatment on arm_bfloat_half_format
>>> <-> ieee_half_format.  It also factors out all the relevant
>>> checks into a lambda function.
>>>
>>> Bootstrapped and regtested on x86_64-redhat-linux and
>>> powerpc64{,le}-linux-gnu.
>>>
>>> Is it ok for trunk?
>>>
>>> BR,
>>> Kewen
>>> -----
>>> 	PR target/112993
>>>
>>> gcc/ChangeLog:
>>>
>>> 	* expr.cc (convert_mode_scalar): Allow same precision conversion
>>> 	between scalar floating point modes if whose underlying format is
>>> 	ibm_extended_format or ieee_quad_format, and refactor assertion
>>> 	with new lambda function acceptable_same_precision_modes.
>>> ---
>>>  gcc/expr.cc | 30 ++++++++++++++++++++++++------
>>>  1 file changed, 24 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/gcc/expr.cc b/gcc/expr.cc
>>> index ffbac513692..eac4dcc982e 100644
>>> --- a/gcc/expr.cc
>>> +++ b/gcc/expr.cc
>>> @@ -338,6 +338,29 @@ convert_mode_scalar (rtx to, rtx from, int unsignedp)
>>>    enum rtx_code equiv_code = (unsignedp < 0 ? UNKNOWN
>>>  			      : (unsignedp ? ZERO_EXTEND : SIGN_EXTEND));
>>>
>>> +  auto acceptable_same_precision_modes
>>> +    = [] (scalar_mode from_mode, scalar_mode to_mode) -> bool
>>> +    {
>>> +      if (DECIMAL_FLOAT_MODE_P (from_mode) != DECIMAL_FLOAT_MODE_P (to_mode))
>>> +	return true;
>>> +
>>> +      /* arm_bfloat_half_format <-> ieee_half_format */
>>> +      if ((REAL_MODE_FORMAT (from_mode) == &arm_bfloat_half_format
>>> +	   && REAL_MODE_FORMAT (to_mode) == &ieee_half_format)
>>> +	  || (REAL_MODE_FORMAT (to_mode) == &arm_bfloat_half_format
>>> +	      && REAL_MODE_FORMAT (from_mode) == &ieee_half_format))
>>> +	return true;
>>> +
>>> +      /* ibm_extended_format <-> ieee_quad_format */
>>> +      if ((REAL_MODE_FORMAT (from_mode) == &ibm_extended_format
>>> +	   && REAL_MODE_FORMAT (to_mode) == &ieee_quad_format)
>>> +	  || (REAL_MODE_FORMAT (from_mode) == &ieee_quad_format
>>> +	      && REAL_MODE_FORMAT (to_mode) == &ibm_extended_format))
>>> +	return true;
>>> +
>>> +      return false;
>>> +    };
>>> +
>>>    if (to_real)
>>>      {
>>>        rtx value;
>>> @@ -346,12 +369,7 @@ convert_mode_scalar (rtx to, rtx from, int unsignedp)
>>>
>>>        gcc_assert ((GET_MODE_PRECISION (from_mode)
>>>  		   != GET_MODE_PRECISION (to_mode))
>>> -		  || (DECIMAL_FLOAT_MODE_P (from_mode)
>>> -		      != DECIMAL_FLOAT_MODE_P (to_mode))
>>> -		  || (REAL_MODE_FORMAT (from_mode) == &arm_bfloat_half_format
>>> -		      && REAL_MODE_FORMAT (to_mode) == &ieee_half_format)
>>> -		  || (REAL_MODE_FORMAT (to_mode) == &arm_bfloat_half_format
>>> -		      && REAL_MODE_FORMAT (from_mode) == &ieee_half_format));
>>> +		  || acceptable_same_precision_modes (from_mode, to_mode));
>>>
>>>        if (GET_MODE_PRECISION (from_mode) == GET_MODE_PRECISION (to_mode))
>>>  	{
>>> --
>>> 2.39.1
>> 
>> This part looks good to me FWIW, but what's the correct behaviour of:
>> 
>>       if (GET_MODE_PRECISION (from_mode) == GET_MODE_PRECISION (to_mode))
>> 	{
>> 	  if (REAL_MODE_FORMAT (to_mode) == &arm_bfloat_half_format
>> 	      && REAL_MODE_FORMAT (from_mode) == &ieee_half_format)
>> 	    /* libgcc implements just __trunchfbf2, not __extendhfbf2.  */
>> 	    tab = trunc_optab;
>> 	  else
>> 	    /* Conversion between decimal float and binary float, same
>> 	       size.  */
>> 	    tab = DECIMAL_FLOAT_MODE_P (from_mode) ? trunc_optab : sext_optab;
>> 
>> for the new pairing?  The intent for bfloat/half seems to be that bfloat
>> is treated as arbitrarily “lesser than” half, so half->bfloat is a
>> truncation and bfloat->half is an extension.  It seems like it would be
>> good to do something similar for the new pair, so that the float modes
>> still form a total order in terms of extension & truncation.
>
> Good question!  If I read it right, this special handling for half->bfloat is
> to align with the previous implementation in libgcc and easy for backporting
> , but isn't to keep the modes to form a total order, as Jakub's comments
> PR114907 #c6 and #c8.  Similar to half vs. bfloat, neither of ibm128 nor ieee128
> is a subset or superset of the other, the current behavior for this new paring is
> that:
>   1) define sext_optab for any two of TF/KF/IF (also bi-direction), since generic
>      code above adopts sext_optab for same size conversion.

But before your patch, that code only expected equal precisions if:

		  || (DECIMAL_FLOAT_MODE_P (from_mode)
		      != DECIMAL_FLOAT_MODE_P (to_mode))
		  || (REAL_MODE_FORMAT (from_mode) == &arm_bfloat_half_format
		      && REAL_MODE_FORMAT (to_mode) == &ieee_half_format)
		  || (REAL_MODE_FORMAT (to_mode) == &arm_bfloat_half_format
		      && REAL_MODE_FORMAT (from_mode) == &ieee_half_format)

So the effect was:

binary->decimal: extend
decimal->binary: truncate
bfloat->IEEE: extend
IEEE->bfloat: truncate

AFAICT there was no X and Y for which X->Y and Y->X are both extensions.

>   2) for each define_expand in 1), it actually calls rs6000_expand_float128_convert
>      which is one existing helper to handle conversions to/from __float128 and
>      __ibm128, it will take care of the remaining (generate __extend* or __trunc*).
>      Similar to half vs. bfloat which has extend and trunc libgcc functions, we
>      have some extend and trunc libgcc functions for __float128 vs. __ibm128.
>
> We can add some special treatment for the new pairing like what's for half vs. bfloat,
> but since extend and truncate can be arbitrary for them, I thought we can just define
> sext_optab to align with what generic code does and wants, then do the actual handling
> in the corresponding expander (someone may argue that sext_optab is expected to be end
> with libcall __extend*, it's surprising to end up with __trunc*, but considering
> the extend/truncate here is arbitrary, guessing it's acceptable?).

Yeah, using a trunc libgcc function for sext_optab seems surprising to me.

Like Jakub said in #c8 above, both extension and truncation are conceptually
wrong.  But given that both are wrong, we can't use the literal meaning to
choose between them, and need to find something else instead.

Making the sext/trunc choice mirror the current libgcc functions seems
cleaner.  It also avoids the (IMO) odd situation that you can construct
a valid and infinitely long chain of extensions from a finite set of types.
IMO the types should be well ordered if possible.

Thanks,
Richard
Kewen.Lin July 9, 2024, 6:49 a.m. UTC | #4
Hi Richard,

on 2024/7/8 19:14, Richard Sandiford wrote:
> "Kewen.Lin" <linkw@linux.ibm.com> writes:[snip...]
>>>
>>> This part looks good to me FWIW, but what's the correct behaviour of:
>>>
>>>       if (GET_MODE_PRECISION (from_mode) == GET_MODE_PRECISION (to_mode))
>>> 	{
>>> 	  if (REAL_MODE_FORMAT (to_mode) == &arm_bfloat_half_format
>>> 	      && REAL_MODE_FORMAT (from_mode) == &ieee_half_format)
>>> 	    /* libgcc implements just __trunchfbf2, not __extendhfbf2.  */
>>> 	    tab = trunc_optab;
>>> 	  else
>>> 	    /* Conversion between decimal float and binary float, same
>>> 	       size.  */
>>> 	    tab = DECIMAL_FLOAT_MODE_P (from_mode) ? trunc_optab : sext_optab;
>>>
>>> for the new pairing?  The intent for bfloat/half seems to be that bfloat
>>> is treated as arbitrarily “lesser than” half, so half->bfloat is a
>>> truncation and bfloat->half is an extension.  It seems like it would be
>>> good to do something similar for the new pair, so that the float modes
>>> still form a total order in terms of extension & truncation.
>>
>> Good question!  If I read it right, this special handling for half->bfloat is
>> to align with the previous implementation in libgcc and easy for backporting
>> , but isn't to keep the modes to form a total order, as Jakub's comments
>> PR114907 #c6 and #c8.  Similar to half vs. bfloat, neither of ibm128 nor ieee128
>> is a subset or superset of the other, the current behavior for this new paring is
>> that:
>>   1) define sext_optab for any two of TF/KF/IF (also bi-direction), since generic
>>      code above adopts sext_optab for same size conversion.
> 
> But before your patch, that code only expected equal precisions if:
> 
> 		  || (DECIMAL_FLOAT_MODE_P (from_mode)
> 		      != DECIMAL_FLOAT_MODE_P (to_mode))
> 		  || (REAL_MODE_FORMAT (from_mode) == &arm_bfloat_half_format
> 		      && REAL_MODE_FORMAT (to_mode) == &ieee_half_format)
> 		  || (REAL_MODE_FORMAT (to_mode) == &arm_bfloat_half_format
> 		      && REAL_MODE_FORMAT (from_mode) == &ieee_half_format)
> 
> So the effect was:
> 
> binary->decimal: extend
> decimal->binary: truncate
> bfloat->IEEE: extend
> IEEE->bfloat: truncate
> 
> AFAICT there was no X and Y for which X->Y and Y->X are both extensions.

Yeah, this is exactly the status quo, though Jakub's comments implied that
IEEE->bfloat should have been supported with extend.

> 
>>   2) for each define_expand in 1), it actually calls rs6000_expand_float128_convert
>>      which is one existing helper to handle conversions to/from __float128 and
>>      __ibm128, it will take care of the remaining (generate __extend* or __trunc*).
>>      Similar to half vs. bfloat which has extend and trunc libgcc functions, we
>>      have some extend and trunc libgcc functions for __float128 vs. __ibm128.
>>
>> We can add some special treatment for the new pairing like what's for half vs. bfloat,
>> but since extend and truncate can be arbitrary for them, I thought we can just define
>> sext_optab to align with what generic code does and wants, then do the actual handling
>> in the corresponding expander (someone may argue that sext_optab is expected to be end
>> with libcall __extend*, it's surprising to end up with __trunc*, but considering
>> the extend/truncate here is arbitrary, guessing it's acceptable?).
> 
> Yeah, using a trunc libgcc function for sext_optab seems surprising to me.

OK, thanks for the input!  Unfortunately it's even worse in rs6000:

      set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
      if (mode != TFmode && FLOAT128_IBM_P (TFmode))
	set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");

      set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
      if (mode != TFmode && FLOAT128_IBM_P (TFmode))
	set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");

, the optabs and their corresponding libgcc functions look reversed.
I'll follow up to adjust them as well.

> 
> Like Jakub said in #c8 above, both extension and truncation are conceptually
> wrong.  But given that both are wrong, we can't use the literal meaning to
> choose between them, and need to find something else instead.
> 
> Making the sext/trunc choice mirror the current libgcc functions seems
> cleaner.  It also avoids the (IMO) odd situation that you can construct
> a valid and infinitely long chain of extensions from a finite set of types.
> IMO the types should be well ordered if possible.

Good point, from this perspective, bidirectional extend seems not recommended.

One updated version has been attached, which checks trunc optab for ibm128 ->
ieee128 similar to bfloat -> ieee16 as we have libgcc function __trunctfkf2
but not __extendtfkf2.  Does it look good to you?

I'll post an updated version for rs6000 part once the testing goes well.

BR,
Kewen

-----
From 81f9bb0c40817cbecbff63a9925039de8e6333ff Mon Sep 17 00:00:00 2001
From: Kewen Lin <linkw@linux.ibm.com>
Date: Tue, 2 Jul 2024 02:03:55 -0500
Subject: [PATCH 3/4] expr: Allow same precision modes conversion between
 {ibm_extended, ieee_quad}_format

With some historical reasons, rs6000 defines KFmode, TFmode
and IFmode to have different mode precisions, but it causes
some issues and needs some workarounds such as PR112993.
So we are going to make all rs6000 128 bit scalar FP modes
have 128 bit precision.  Be prepared for that, this patch
is to make function convert_mode_scalar allow same precision
FP modes conversion if their underlying formats are
ibm_extended_format and ieee_quad_format respectively, just
like the existing special treatment on arm_bfloat_half_format
<-> ieee_half_format.  It also factors out all the relevant
checks into a lambda function.  Besides, similar to ieee fp16
-> bfloat conversion, it adopts trunc_optab rather than
sext_optab for ibm128 to ieee128 conversion.

	PR target/112993

gcc/ChangeLog:

	* expr.cc (convert_mode_scalar): Allow same precision conversion
	between scalar floating point modes if whose underlying format is
	ibm_extended_format or ieee_quad_format, and refactor assertion
	with new lambda function acceptable_same_precision_modes.  Use
	trunc_optab rather than sext_optab for ibm128 to ieee128 conversion.
	* optabs-libfuncs.cc (gen_trunc_conv_libfunc): Use trunc_optab rather
	than sext_optab for ibm128 to ieee128 conversion.
---
 gcc/expr.cc            | 39 ++++++++++++++++++++++++++++++---------
 gcc/optabs-libfuncs.cc |  4 +++-
 2 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/gcc/expr.cc b/gcc/expr.cc
index ffbac513692..34679d464e8 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -338,6 +338,29 @@ convert_mode_scalar (rtx to, rtx from, int unsignedp)
   enum rtx_code equiv_code = (unsignedp < 0 ? UNKNOWN
 			      : (unsignedp ? ZERO_EXTEND : SIGN_EXTEND));
 
+  auto acceptable_same_precision_modes
+    = [] (scalar_mode from_mode, scalar_mode to_mode) -> bool
+    {
+      if (DECIMAL_FLOAT_MODE_P (from_mode) != DECIMAL_FLOAT_MODE_P (to_mode))
+	return true;
+
+      /* arm_bfloat_half_format <-> ieee_half_format */
+      if ((REAL_MODE_FORMAT (from_mode) == &arm_bfloat_half_format
+	   && REAL_MODE_FORMAT (to_mode) == &ieee_half_format)
+	  || (REAL_MODE_FORMAT (to_mode) == &arm_bfloat_half_format
+	      && REAL_MODE_FORMAT (from_mode) == &ieee_half_format))
+	return true;
+
+      /* ibm_extended_format <-> ieee_quad_format */
+      if ((REAL_MODE_FORMAT (from_mode) == &ibm_extended_format
+	   && REAL_MODE_FORMAT (to_mode) == &ieee_quad_format)
+	  || (REAL_MODE_FORMAT (from_mode) == &ieee_quad_format
+	      && REAL_MODE_FORMAT (to_mode) == &ibm_extended_format))
+	return true;
+
+      return false;
+    };
+
   if (to_real)
     {
       rtx value;
@@ -346,18 +369,16 @@ convert_mode_scalar (rtx to, rtx from, int unsignedp)
 
       gcc_assert ((GET_MODE_PRECISION (from_mode)
 		   != GET_MODE_PRECISION (to_mode))
-		  || (DECIMAL_FLOAT_MODE_P (from_mode)
-		      != DECIMAL_FLOAT_MODE_P (to_mode))
-		  || (REAL_MODE_FORMAT (from_mode) == &arm_bfloat_half_format
-		      && REAL_MODE_FORMAT (to_mode) == &ieee_half_format)
-		  || (REAL_MODE_FORMAT (to_mode) == &arm_bfloat_half_format
-		      && REAL_MODE_FORMAT (from_mode) == &ieee_half_format));
+		  || acceptable_same_precision_modes (from_mode, to_mode));
 
       if (GET_MODE_PRECISION (from_mode) == GET_MODE_PRECISION (to_mode))
 	{
-	  if (REAL_MODE_FORMAT (to_mode) == &arm_bfloat_half_format
-	      && REAL_MODE_FORMAT (from_mode) == &ieee_half_format)
-	    /* libgcc implements just __trunchfbf2, not __extendhfbf2.  */
+	  if ((REAL_MODE_FORMAT (to_mode) == &arm_bfloat_half_format
+	       && REAL_MODE_FORMAT (from_mode) == &ieee_half_format)
+	      || (REAL_MODE_FORMAT (to_mode) == &ieee_quad_format
+		  && REAL_MODE_FORMAT (from_mode) == &ibm_extended_format))
+	    /* libgcc implements just __trunchfbf2, not __extendhfbf2;
+	       and __trunctfkf2, not __trunctfkf2.  */
 	    tab = trunc_optab;
 	  else
 	    /* Conversion between decimal float and binary float, same
diff --git a/gcc/optabs-libfuncs.cc b/gcc/optabs-libfuncs.cc
index 26729910d92..ab97eace80e 100644
--- a/gcc/optabs-libfuncs.cc
+++ b/gcc/optabs-libfuncs.cc
@@ -591,7 +591,9 @@ gen_trunc_conv_libfunc (convert_optab tab,
 
   if (GET_MODE_PRECISION (float_fmode) <= GET_MODE_PRECISION (float_tmode)
       && (REAL_MODE_FORMAT (float_tmode) != &arm_bfloat_half_format
-	  || REAL_MODE_FORMAT (float_fmode) != &ieee_half_format))
+	  || REAL_MODE_FORMAT (float_fmode) != &ieee_half_format)
+      && (REAL_MODE_FORMAT (float_tmode) != &ieee_quad_format
+	  || REAL_MODE_FORMAT (float_fmode) != &ibm_extended_format))
     return;
 
   if (GET_MODE_CLASS (float_tmode) == GET_MODE_CLASS (float_fmode))
Richard Sandiford July 10, 2024, 6:25 p.m. UTC | #5
"Kewen.Lin" <linkw@linux.ibm.com> writes:
> Hi Richard,
>
> on 2024/7/8 19:14, Richard Sandiford wrote:
>> "Kewen.Lin" <linkw@linux.ibm.com> writes:[snip...]
>>>>
>>>> This part looks good to me FWIW, but what's the correct behaviour of:
>>>>
>>>>       if (GET_MODE_PRECISION (from_mode) == GET_MODE_PRECISION (to_mode))
>>>> 	{
>>>> 	  if (REAL_MODE_FORMAT (to_mode) == &arm_bfloat_half_format
>>>> 	      && REAL_MODE_FORMAT (from_mode) == &ieee_half_format)
>>>> 	    /* libgcc implements just __trunchfbf2, not __extendhfbf2.  */
>>>> 	    tab = trunc_optab;
>>>> 	  else
>>>> 	    /* Conversion between decimal float and binary float, same
>>>> 	       size.  */
>>>> 	    tab = DECIMAL_FLOAT_MODE_P (from_mode) ? trunc_optab : sext_optab;
>>>>
>>>> for the new pairing?  The intent for bfloat/half seems to be that bfloat
>>>> is treated as arbitrarily “lesser than” half, so half->bfloat is a
>>>> truncation and bfloat->half is an extension.  It seems like it would be
>>>> good to do something similar for the new pair, so that the float modes
>>>> still form a total order in terms of extension & truncation.
>>>
>>> Good question!  If I read it right, this special handling for half->bfloat is
>>> to align with the previous implementation in libgcc and easy for backporting
>>> , but isn't to keep the modes to form a total order, as Jakub's comments
>>> PR114907 #c6 and #c8.  Similar to half vs. bfloat, neither of ibm128 nor ieee128
>>> is a subset or superset of the other, the current behavior for this new paring is
>>> that:
>>>   1) define sext_optab for any two of TF/KF/IF (also bi-direction), since generic
>>>      code above adopts sext_optab for same size conversion.
>> 
>> But before your patch, that code only expected equal precisions if:
>> 
>> 		  || (DECIMAL_FLOAT_MODE_P (from_mode)
>> 		      != DECIMAL_FLOAT_MODE_P (to_mode))
>> 		  || (REAL_MODE_FORMAT (from_mode) == &arm_bfloat_half_format
>> 		      && REAL_MODE_FORMAT (to_mode) == &ieee_half_format)
>> 		  || (REAL_MODE_FORMAT (to_mode) == &arm_bfloat_half_format
>> 		      && REAL_MODE_FORMAT (from_mode) == &ieee_half_format)
>> 
>> So the effect was:
>> 
>> binary->decimal: extend
>> decimal->binary: truncate
>> bfloat->IEEE: extend
>> IEEE->bfloat: truncate
>> 
>> AFAICT there was no X and Y for which X->Y and Y->X are both extensions.
>
> Yeah, this is exactly the status quo, though Jakub's comments implied that
> IEEE->bfloat should have been supported with extend.
>
>> 
>>>   2) for each define_expand in 1), it actually calls rs6000_expand_float128_convert
>>>      which is one existing helper to handle conversions to/from __float128 and
>>>      __ibm128, it will take care of the remaining (generate __extend* or __trunc*).
>>>      Similar to half vs. bfloat which has extend and trunc libgcc functions, we
>>>      have some extend and trunc libgcc functions for __float128 vs. __ibm128.
>>>
>>> We can add some special treatment for the new pairing like what's for half vs. bfloat,
>>> but since extend and truncate can be arbitrary for them, I thought we can just define
>>> sext_optab to align with what generic code does and wants, then do the actual handling
>>> in the corresponding expander (someone may argue that sext_optab is expected to be end
>>> with libcall __extend*, it's surprising to end up with __trunc*, but considering
>>> the extend/truncate here is arbitrary, guessing it's acceptable?).
>> 
>> Yeah, using a trunc libgcc function for sext_optab seems surprising to me.
>
> OK, thanks for the input!  Unfortunately it's even worse in rs6000:
>
>       set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
>       if (mode != TFmode && FLOAT128_IBM_P (TFmode))
> 	set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
>
>       set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
>       if (mode != TFmode && FLOAT128_IBM_P (TFmode))
> 	set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
>
> , the optabs and their corresponding libgcc functions look reversed.
> I'll follow up to adjust them as well.
>
>> 
>> Like Jakub said in #c8 above, both extension and truncation are conceptually
>> wrong.  But given that both are wrong, we can't use the literal meaning to
>> choose between them, and need to find something else instead.
>> 
>> Making the sext/trunc choice mirror the current libgcc functions seems
>> cleaner.  It also avoids the (IMO) odd situation that you can construct
>> a valid and infinitely long chain of extensions from a finite set of types.
>> IMO the types should be well ordered if possible.
>
> Good point, from this perspective, bidirectional extend seems not recommended.
>
> One updated version has been attached, which checks trunc optab for ibm128 ->
> ieee128 similar to bfloat -> ieee16 as we have libgcc function __trunctfkf2
> but not __extendtfkf2.  Does it look good to you?

Yeah, looks good to me.  OK if no-one objects before the other parts
are approved.

Thanks,
Richard

> I'll post an updated version for rs6000 part once the testing goes well.
>
> BR,
> Kewen
>
> -----
>
> From 81f9bb0c40817cbecbff63a9925039de8e6333ff Mon Sep 17 00:00:00 2001
> From: Kewen Lin <linkw@linux.ibm.com>
> Date: Tue, 2 Jul 2024 02:03:55 -0500
> Subject: [PATCH 3/4] expr: Allow same precision modes conversion between
>  {ibm_extended, ieee_quad}_format
>
> With some historical reasons, rs6000 defines KFmode, TFmode
> and IFmode to have different mode precisions, but it causes
> some issues and needs some workarounds such as PR112993.
> So we are going to make all rs6000 128 bit scalar FP modes
> have 128 bit precision.  Be prepared for that, this patch
> is to make function convert_mode_scalar allow same precision
> FP modes conversion if their underlying formats are
> ibm_extended_format and ieee_quad_format respectively, just
> like the existing special treatment on arm_bfloat_half_format
> <-> ieee_half_format.  It also factors out all the relevant
> checks into a lambda function.  Besides, similar to ieee fp16
> -> bfloat conversion, it adopts trunc_optab rather than
> sext_optab for ibm128 to ieee128 conversion.
>
> 	PR target/112993
>
> gcc/ChangeLog:
>
> 	* expr.cc (convert_mode_scalar): Allow same precision conversion
> 	between scalar floating point modes if whose underlying format is
> 	ibm_extended_format or ieee_quad_format, and refactor assertion
> 	with new lambda function acceptable_same_precision_modes.  Use
> 	trunc_optab rather than sext_optab for ibm128 to ieee128 conversion.
> 	* optabs-libfuncs.cc (gen_trunc_conv_libfunc): Use trunc_optab rather
> 	than sext_optab for ibm128 to ieee128 conversion.
> ---
>  gcc/expr.cc            | 39 ++++++++++++++++++++++++++++++---------
>  gcc/optabs-libfuncs.cc |  4 +++-
>  2 files changed, 33 insertions(+), 10 deletions(-)
>
> diff --git a/gcc/expr.cc b/gcc/expr.cc
> index ffbac513692..34679d464e8 100644
> --- a/gcc/expr.cc
> +++ b/gcc/expr.cc
> @@ -338,6 +338,29 @@ convert_mode_scalar (rtx to, rtx from, int unsignedp)
>    enum rtx_code equiv_code = (unsignedp < 0 ? UNKNOWN
>  			      : (unsignedp ? ZERO_EXTEND : SIGN_EXTEND));
>  
> +  auto acceptable_same_precision_modes
> +    = [] (scalar_mode from_mode, scalar_mode to_mode) -> bool
> +    {
> +      if (DECIMAL_FLOAT_MODE_P (from_mode) != DECIMAL_FLOAT_MODE_P (to_mode))
> +	return true;
> +
> +      /* arm_bfloat_half_format <-> ieee_half_format */
> +      if ((REAL_MODE_FORMAT (from_mode) == &arm_bfloat_half_format
> +	   && REAL_MODE_FORMAT (to_mode) == &ieee_half_format)
> +	  || (REAL_MODE_FORMAT (to_mode) == &arm_bfloat_half_format
> +	      && REAL_MODE_FORMAT (from_mode) == &ieee_half_format))
> +	return true;
> +
> +      /* ibm_extended_format <-> ieee_quad_format */
> +      if ((REAL_MODE_FORMAT (from_mode) == &ibm_extended_format
> +	   && REAL_MODE_FORMAT (to_mode) == &ieee_quad_format)
> +	  || (REAL_MODE_FORMAT (from_mode) == &ieee_quad_format
> +	      && REAL_MODE_FORMAT (to_mode) == &ibm_extended_format))
> +	return true;
> +
> +      return false;
> +    };
> +
>    if (to_real)
>      {
>        rtx value;
> @@ -346,18 +369,16 @@ convert_mode_scalar (rtx to, rtx from, int unsignedp)
>  
>        gcc_assert ((GET_MODE_PRECISION (from_mode)
>  		   != GET_MODE_PRECISION (to_mode))
> -		  || (DECIMAL_FLOAT_MODE_P (from_mode)
> -		      != DECIMAL_FLOAT_MODE_P (to_mode))
> -		  || (REAL_MODE_FORMAT (from_mode) == &arm_bfloat_half_format
> -		      && REAL_MODE_FORMAT (to_mode) == &ieee_half_format)
> -		  || (REAL_MODE_FORMAT (to_mode) == &arm_bfloat_half_format
> -		      && REAL_MODE_FORMAT (from_mode) == &ieee_half_format));
> +		  || acceptable_same_precision_modes (from_mode, to_mode));
>  
>        if (GET_MODE_PRECISION (from_mode) == GET_MODE_PRECISION (to_mode))
>  	{
> -	  if (REAL_MODE_FORMAT (to_mode) == &arm_bfloat_half_format
> -	      && REAL_MODE_FORMAT (from_mode) == &ieee_half_format)
> -	    /* libgcc implements just __trunchfbf2, not __extendhfbf2.  */
> +	  if ((REAL_MODE_FORMAT (to_mode) == &arm_bfloat_half_format
> +	       && REAL_MODE_FORMAT (from_mode) == &ieee_half_format)
> +	      || (REAL_MODE_FORMAT (to_mode) == &ieee_quad_format
> +		  && REAL_MODE_FORMAT (from_mode) == &ibm_extended_format))
> +	    /* libgcc implements just __trunchfbf2, not __extendhfbf2;
> +	       and __trunctfkf2, not __trunctfkf2.  */
>  	    tab = trunc_optab;
>  	  else
>  	    /* Conversion between decimal float and binary float, same
> diff --git a/gcc/optabs-libfuncs.cc b/gcc/optabs-libfuncs.cc
> index 26729910d92..ab97eace80e 100644
> --- a/gcc/optabs-libfuncs.cc
> +++ b/gcc/optabs-libfuncs.cc
> @@ -591,7 +591,9 @@ gen_trunc_conv_libfunc (convert_optab tab,
>  
>    if (GET_MODE_PRECISION (float_fmode) <= GET_MODE_PRECISION (float_tmode)
>        && (REAL_MODE_FORMAT (float_tmode) != &arm_bfloat_half_format
> -	  || REAL_MODE_FORMAT (float_fmode) != &ieee_half_format))
> +	  || REAL_MODE_FORMAT (float_fmode) != &ieee_half_format)
> +      && (REAL_MODE_FORMAT (float_tmode) != &ieee_quad_format
> +	  || REAL_MODE_FORMAT (float_fmode) != &ibm_extended_format))
>      return;
>  
>    if (GET_MODE_CLASS (float_tmode) == GET_MODE_CLASS (float_fmode))
diff mbox series

Patch

diff --git a/gcc/expr.cc b/gcc/expr.cc
index ffbac513692..eac4dcc982e 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -338,6 +338,29 @@  convert_mode_scalar (rtx to, rtx from, int unsignedp)
   enum rtx_code equiv_code = (unsignedp < 0 ? UNKNOWN
 			      : (unsignedp ? ZERO_EXTEND : SIGN_EXTEND));

+  auto acceptable_same_precision_modes
+    = [] (scalar_mode from_mode, scalar_mode to_mode) -> bool
+    {
+      if (DECIMAL_FLOAT_MODE_P (from_mode) != DECIMAL_FLOAT_MODE_P (to_mode))
+	return true;
+
+      /* arm_bfloat_half_format <-> ieee_half_format */
+      if ((REAL_MODE_FORMAT (from_mode) == &arm_bfloat_half_format
+	   && REAL_MODE_FORMAT (to_mode) == &ieee_half_format)
+	  || (REAL_MODE_FORMAT (to_mode) == &arm_bfloat_half_format
+	      && REAL_MODE_FORMAT (from_mode) == &ieee_half_format))
+	return true;
+
+      /* ibm_extended_format <-> ieee_quad_format */
+      if ((REAL_MODE_FORMAT (from_mode) == &ibm_extended_format
+	   && REAL_MODE_FORMAT (to_mode) == &ieee_quad_format)
+	  || (REAL_MODE_FORMAT (from_mode) == &ieee_quad_format
+	      && REAL_MODE_FORMAT (to_mode) == &ibm_extended_format))
+	return true;
+
+      return false;
+    };
+
   if (to_real)
     {
       rtx value;
@@ -346,12 +369,7 @@  convert_mode_scalar (rtx to, rtx from, int unsignedp)

       gcc_assert ((GET_MODE_PRECISION (from_mode)
 		   != GET_MODE_PRECISION (to_mode))
-		  || (DECIMAL_FLOAT_MODE_P (from_mode)
-		      != DECIMAL_FLOAT_MODE_P (to_mode))
-		  || (REAL_MODE_FORMAT (from_mode) == &arm_bfloat_half_format
-		      && REAL_MODE_FORMAT (to_mode) == &ieee_half_format)
-		  || (REAL_MODE_FORMAT (to_mode) == &arm_bfloat_half_format
-		      && REAL_MODE_FORMAT (from_mode) == &ieee_half_format));
+		  || acceptable_same_precision_modes (from_mode, to_mode));

       if (GET_MODE_PRECISION (from_mode) == GET_MODE_PRECISION (to_mode))
 	{