diff mbox series

s390: Fix strict_low_part generation

Message ID 20240816071401.3293749-2-stefansf@gcc.gnu.org
State New
Headers show
Series s390: Fix strict_low_part generation | expand

Commit Message

Stefan Schulze Frielinghaus Aug. 16, 2024, 7:14 a.m. UTC
In s390_expand_insv(), if generating code for ICM et al. src is a MEM
and gen_lowpart might force src into a register such that we end up with
patterns which do not match anymore.  Use adjust_address() instead in
order to preserve a MEM.

Furthermore, it is not straight forward to enforce a subreg.  For
example, in case of a paradoxical subreg, gen_lowpart() may return a
register.  In order to compensate this, s390_gen_lowpart_subreg() emits
a reference to a pseudo which does not coincide with its definition
which is wrong.  Additionally, if dest is a paradoxical subreg, then do
not try to emit a strict_low_part since it could mean that dest was not
initialized even though this might be fixed up later by init-regs.

Splitter for insn *get_tp_64, *zero_extendhisi2_31,
*zero_extendqisi2_31, *zero_extendqihi2_31 are applied after reload.
Thus, operands[0] is a hard register and gen_lowpart (m, operands[0])
just returns the hard register for mode m which is fine to use as an
argument for strict_low_part, i.e., we do not need to enforce subregs
here since after reload subregs are supposed to be eliminated anyway.

This fixes gcc.dg/torture/pr111821.c.

gcc/ChangeLog:

	* config/s390/s390-protos.h (s390_gen_lowpart_subreg): Remove.
	* config/s390/s390.cc (s390_gen_lowpart_subreg): Remove.
	(s390_expand_insv): Use adjust_address() and emit a
	strict_low_part only in case of a natural subreg.
	* config/s390/s390.md: Use gen_lowpart() instead of
	s390_gen_lowpart_subreg().
---
 Bootstrapped and regtested on s390.  Ok for mainline,gcc12,gcc13,gcc14?

 gcc/config/s390/s390-protos.h |  1 -
 gcc/config/s390/s390.cc       | 47 +++++++++++------------------------
 gcc/config/s390/s390.md       | 13 +++++-----
 3 files changed, 20 insertions(+), 41 deletions(-)

Comments

Stefan Schulze Frielinghaus Sept. 9, 2024, 6:11 a.m. UTC | #1
Ping

On Fri, Aug 16, 2024 at 09:14:02AM +0200, Stefan Schulze Frielinghaus wrote:
> In s390_expand_insv(), if generating code for ICM et al. src is a MEM
> and gen_lowpart might force src into a register such that we end up with
> patterns which do not match anymore.  Use adjust_address() instead in
> order to preserve a MEM.
> 
> Furthermore, it is not straight forward to enforce a subreg.  For
> example, in case of a paradoxical subreg, gen_lowpart() may return a
> register.  In order to compensate this, s390_gen_lowpart_subreg() emits
> a reference to a pseudo which does not coincide with its definition
> which is wrong.  Additionally, if dest is a paradoxical subreg, then do
> not try to emit a strict_low_part since it could mean that dest was not
> initialized even though this might be fixed up later by init-regs.
> 
> Splitter for insn *get_tp_64, *zero_extendhisi2_31,
> *zero_extendqisi2_31, *zero_extendqihi2_31 are applied after reload.
> Thus, operands[0] is a hard register and gen_lowpart (m, operands[0])
> just returns the hard register for mode m which is fine to use as an
> argument for strict_low_part, i.e., we do not need to enforce subregs
> here since after reload subregs are supposed to be eliminated anyway.
> 
> This fixes gcc.dg/torture/pr111821.c.
> 
> gcc/ChangeLog:
> 
> 	* config/s390/s390-protos.h (s390_gen_lowpart_subreg): Remove.
> 	* config/s390/s390.cc (s390_gen_lowpart_subreg): Remove.
> 	(s390_expand_insv): Use adjust_address() and emit a
> 	strict_low_part only in case of a natural subreg.
> 	* config/s390/s390.md: Use gen_lowpart() instead of
> 	s390_gen_lowpart_subreg().
> ---
>  Bootstrapped and regtested on s390.  Ok for mainline,gcc12,gcc13,gcc14?
> 
>  gcc/config/s390/s390-protos.h |  1 -
>  gcc/config/s390/s390.cc       | 47 +++++++++++------------------------
>  gcc/config/s390/s390.md       | 13 +++++-----
>  3 files changed, 20 insertions(+), 41 deletions(-)
> 
> diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
> index b4646ccb606..e7ac59d17da 100644
> --- a/gcc/config/s390/s390-protos.h
> +++ b/gcc/config/s390/s390-protos.h
> @@ -50,7 +50,6 @@ extern void s390_set_has_landing_pad_p (bool);
>  extern bool s390_hard_regno_rename_ok (unsigned int, unsigned int);
>  extern int s390_class_max_nregs (enum reg_class, machine_mode);
>  extern bool s390_return_addr_from_memory(void);
> -extern rtx s390_gen_lowpart_subreg (machine_mode, rtx);
>  extern bool s390_fma_allowed_p (machine_mode);
>  #if S390_USE_TARGET_ATTRIBUTE
>  extern tree s390_valid_target_attribute_tree (tree args,
> diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
> index 7aea776da2f..7cdcebfc08b 100644
> --- a/gcc/config/s390/s390.cc
> +++ b/gcc/config/s390/s390.cc
> @@ -516,31 +516,6 @@ s390_return_addr_from_memory ()
>    return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK;
>  }
>  
> -/* Generate a SUBREG for the MODE lowpart of EXPR.
> -
> -   In contrast to gen_lowpart it will always return a SUBREG
> -   expression.  This is useful to generate STRICT_LOW_PART
> -   expressions.  */
> -rtx
> -s390_gen_lowpart_subreg (machine_mode mode, rtx expr)
> -{
> -  rtx lowpart = gen_lowpart (mode, expr);
> -
> -  /* There might be no SUBREG in case it could be applied to the hard
> -     REG rtx or it could be folded with a paradoxical subreg.  Bring
> -     it back.  */
> -  if (!SUBREG_P (lowpart))
> -    {
> -      machine_mode reg_mode = TARGET_ZARCH ? DImode : SImode;
> -      gcc_assert (REG_P (lowpart));
> -      lowpart = gen_lowpart_SUBREG (mode,
> -				    gen_rtx_REG (reg_mode,
> -						 REGNO (lowpart)));
> -    }
> -
> -  return lowpart;
> -}
> -
>  /* Return nonzero if it's OK to use fused multiply-add for MODE.  */
>  bool
>  s390_fma_allowed_p (machine_mode mode)
> @@ -7112,15 +7087,21 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
>        /* Emit a strict_low_part pattern if possible.  */
>        if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
>  	{
> -	  rtx low_dest = s390_gen_lowpart_subreg (smode, dest);
> -	  rtx low_src = gen_lowpart (smode, src);
> -
> -	  switch (smode)
> +	  rtx low_dest = gen_lowpart (smode, dest);
> +	  if (SUBREG_P (low_dest) && !paradoxical_subreg_p (low_dest))
>  	    {
> -	    case E_QImode: emit_insn (gen_movstrictqi (low_dest, low_src)); return true;
> -	    case E_HImode: emit_insn (gen_movstricthi (low_dest, low_src)); return true;
> -	    case E_SImode: emit_insn (gen_movstrictsi (low_dest, low_src)); return true;
> -	    default: break;
> +	      poly_int64 offset = GET_MODE_SIZE (mode) - GET_MODE_SIZE (smode);
> +	      rtx low_src = adjust_address (src, smode, offset);
> +	      switch (smode)
> +		{
> +		case E_QImode: emit_insn (gen_movstrictqi (low_dest, low_src));
> +			       return true;
> +		case E_HImode: emit_insn (gen_movstricthi (low_dest, low_src));
> +			       return true;
> +		case E_SImode: emit_insn (gen_movstrictsi (low_dest, low_src));
> +			       return true;
> +		default: break;
> +		}
>  	    }
>  	}
>  
> diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
> index 3d5759d6252..592cf62d962 100644
> --- a/gcc/config/s390/s390.md
> +++ b/gcc/config/s390/s390.md
> @@ -1974,12 +1974,11 @@
>    "TARGET_ZARCH"
>    "#"
>    "&& reload_completed"
> -  [(set (match_dup 2) (match_dup 4))
> +  [(set (match_dup 2) (match_dup 3))
>     (set (match_dup 0) (ashift:DI (match_dup 0) (const_int 32)))
> -   (set (strict_low_part (match_dup 3)) (match_dup 5))]
> +   (set (strict_low_part (match_dup 2)) (match_dup 4))]
>    "operands[2] = gen_lowpart (SImode, operands[0]);
> -   operands[3] = s390_gen_lowpart_subreg (SImode, operands[0]);
> -   s390_split_access_reg (operands[1], &operands[5], &operands[4]);")
> +   s390_split_access_reg (operands[1], &operands[4], &operands[3]);")
>  
>  ; Splitters for storing TLS pointer to %a0:DI.
>  
> @@ -5068,7 +5067,7 @@
>     (parallel
>      [(set (strict_low_part (match_dup 2)) (match_dup 1))
>       (clobber (reg:CC CC_REGNUM))])]
> -  "operands[2] = s390_gen_lowpart_subreg (HImode, operands[0]);")
> +  "operands[2] = gen_lowpart (HImode, operands[0]);")
>  
>  (define_insn_and_split "*zero_extendqisi2_31"
>    [(set (match_operand:SI 0 "register_operand" "=&d")
> @@ -5078,7 +5077,7 @@
>    "&& reload_completed"
>    [(set (match_dup 0) (const_int 0))
>     (set (strict_low_part (match_dup 2)) (match_dup 1))]
> -  "operands[2] = s390_gen_lowpart_subreg (QImode, operands[0]);")
> +  "operands[2] = gen_lowpart (QImode, operands[0]);")
>  
>  ;
>  ; zero_extendqihi2 instruction pattern(s).
> @@ -5110,7 +5109,7 @@
>    "&& reload_completed"
>    [(set (match_dup 0) (const_int 0))
>     (set (strict_low_part (match_dup 2)) (match_dup 1))]
> -  "operands[2] = s390_gen_lowpart_subreg (QImode, operands[0]);")
> +  "operands[2] = gen_lowpart (QImode, operands[0]);")
>  
>  ;
>  ; fixuns_trunc(dd|td|sf|df|tf)(si|di)2 expander
> -- 
> 2.45.2
>
Andreas Krebbel Sept. 12, 2024, 6:44 a.m. UTC | #2
On 8/16/24 09:14, Stefan Schulze Frielinghaus wrote:
> In s390_expand_insv(), if generating code for ICM et al. src is a MEM
> and gen_lowpart might force src into a register such that we end up with
> patterns which do not match anymore.  Use adjust_address() instead in
> order to preserve a MEM.
>
> Furthermore, it is not straight forward to enforce a subreg.  For
> example, in case of a paradoxical subreg, gen_lowpart() may return a
> register.  In order to compensate this, s390_gen_lowpart_subreg() emits
> a reference to a pseudo which does not coincide with its definition
> which is wrong.  Additionally, if dest is a paradoxical subreg, then do
> not try to emit a strict_low_part since it could mean that dest was not
> initialized even though this might be fixed up later by init-regs.
>
> Splitter for insn *get_tp_64, *zero_extendhisi2_31,
> *zero_extendqisi2_31, *zero_extendqihi2_31 are applied after reload.
> Thus, operands[0] is a hard register and gen_lowpart (m, operands[0])
> just returns the hard register for mode m which is fine to use as an
> argument for strict_low_part, i.e., we do not need to enforce subregs
> here since after reload subregs are supposed to be eliminated anyway.
>
> This fixes gcc.dg/torture/pr111821.c.
>
> gcc/ChangeLog:
>
> 	* config/s390/s390-protos.h (s390_gen_lowpart_subreg): Remove.
> 	* config/s390/s390.cc (s390_gen_lowpart_subreg): Remove.
> 	(s390_expand_insv): Use adjust_address() and emit a
> 	strict_low_part only in case of a natural subreg.
> 	* config/s390/s390.md: Use gen_lowpart() instead of
> 	s390_gen_lowpart_subreg().

Ok. Thanks!


Andreas
diff mbox series

Patch

diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index b4646ccb606..e7ac59d17da 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -50,7 +50,6 @@  extern void s390_set_has_landing_pad_p (bool);
 extern bool s390_hard_regno_rename_ok (unsigned int, unsigned int);
 extern int s390_class_max_nregs (enum reg_class, machine_mode);
 extern bool s390_return_addr_from_memory(void);
-extern rtx s390_gen_lowpart_subreg (machine_mode, rtx);
 extern bool s390_fma_allowed_p (machine_mode);
 #if S390_USE_TARGET_ATTRIBUTE
 extern tree s390_valid_target_attribute_tree (tree args,
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 7aea776da2f..7cdcebfc08b 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -516,31 +516,6 @@  s390_return_addr_from_memory ()
   return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK;
 }
 
-/* Generate a SUBREG for the MODE lowpart of EXPR.
-
-   In contrast to gen_lowpart it will always return a SUBREG
-   expression.  This is useful to generate STRICT_LOW_PART
-   expressions.  */
-rtx
-s390_gen_lowpart_subreg (machine_mode mode, rtx expr)
-{
-  rtx lowpart = gen_lowpart (mode, expr);
-
-  /* There might be no SUBREG in case it could be applied to the hard
-     REG rtx or it could be folded with a paradoxical subreg.  Bring
-     it back.  */
-  if (!SUBREG_P (lowpart))
-    {
-      machine_mode reg_mode = TARGET_ZARCH ? DImode : SImode;
-      gcc_assert (REG_P (lowpart));
-      lowpart = gen_lowpart_SUBREG (mode,
-				    gen_rtx_REG (reg_mode,
-						 REGNO (lowpart)));
-    }
-
-  return lowpart;
-}
-
 /* Return nonzero if it's OK to use fused multiply-add for MODE.  */
 bool
 s390_fma_allowed_p (machine_mode mode)
@@ -7112,15 +7087,21 @@  s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
       /* Emit a strict_low_part pattern if possible.  */
       if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
 	{
-	  rtx low_dest = s390_gen_lowpart_subreg (smode, dest);
-	  rtx low_src = gen_lowpart (smode, src);
-
-	  switch (smode)
+	  rtx low_dest = gen_lowpart (smode, dest);
+	  if (SUBREG_P (low_dest) && !paradoxical_subreg_p (low_dest))
 	    {
-	    case E_QImode: emit_insn (gen_movstrictqi (low_dest, low_src)); return true;
-	    case E_HImode: emit_insn (gen_movstricthi (low_dest, low_src)); return true;
-	    case E_SImode: emit_insn (gen_movstrictsi (low_dest, low_src)); return true;
-	    default: break;
+	      poly_int64 offset = GET_MODE_SIZE (mode) - GET_MODE_SIZE (smode);
+	      rtx low_src = adjust_address (src, smode, offset);
+	      switch (smode)
+		{
+		case E_QImode: emit_insn (gen_movstrictqi (low_dest, low_src));
+			       return true;
+		case E_HImode: emit_insn (gen_movstricthi (low_dest, low_src));
+			       return true;
+		case E_SImode: emit_insn (gen_movstrictsi (low_dest, low_src));
+			       return true;
+		default: break;
+		}
 	    }
 	}
 
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 3d5759d6252..592cf62d962 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -1974,12 +1974,11 @@ 
   "TARGET_ZARCH"
   "#"
   "&& reload_completed"
-  [(set (match_dup 2) (match_dup 4))
+  [(set (match_dup 2) (match_dup 3))
    (set (match_dup 0) (ashift:DI (match_dup 0) (const_int 32)))
-   (set (strict_low_part (match_dup 3)) (match_dup 5))]
+   (set (strict_low_part (match_dup 2)) (match_dup 4))]
   "operands[2] = gen_lowpart (SImode, operands[0]);
-   operands[3] = s390_gen_lowpart_subreg (SImode, operands[0]);
-   s390_split_access_reg (operands[1], &operands[5], &operands[4]);")
+   s390_split_access_reg (operands[1], &operands[4], &operands[3]);")
 
 ; Splitters for storing TLS pointer to %a0:DI.
 
@@ -5068,7 +5067,7 @@ 
    (parallel
     [(set (strict_low_part (match_dup 2)) (match_dup 1))
      (clobber (reg:CC CC_REGNUM))])]
-  "operands[2] = s390_gen_lowpart_subreg (HImode, operands[0]);")
+  "operands[2] = gen_lowpart (HImode, operands[0]);")
 
 (define_insn_and_split "*zero_extendqisi2_31"
   [(set (match_operand:SI 0 "register_operand" "=&d")
@@ -5078,7 +5077,7 @@ 
   "&& reload_completed"
   [(set (match_dup 0) (const_int 0))
    (set (strict_low_part (match_dup 2)) (match_dup 1))]
-  "operands[2] = s390_gen_lowpart_subreg (QImode, operands[0]);")
+  "operands[2] = gen_lowpart (QImode, operands[0]);")
 
 ;
 ; zero_extendqihi2 instruction pattern(s).
@@ -5110,7 +5109,7 @@ 
   "&& reload_completed"
   [(set (match_dup 0) (const_int 0))
    (set (strict_low_part (match_dup 2)) (match_dup 1))]
-  "operands[2] = s390_gen_lowpart_subreg (QImode, operands[0]);")
+  "operands[2] = gen_lowpart (QImode, operands[0]);")
 
 ;
 ; fixuns_trunc(dd|td|sf|df|tf)(si|di)2 expander