Message ID | 20240816071401.3293749-2-stefansf@gcc.gnu.org |
---|---|
State | New |
Headers | show |
Series | s390: Fix strict_low_part generation | expand |
Ping On Fri, Aug 16, 2024 at 09:14:02AM +0200, Stefan Schulze Frielinghaus wrote: > In s390_expand_insv(), if generating code for ICM et al. src is a MEM > and gen_lowpart might force src into a register such that we end up with > patterns which do not match anymore. Use adjust_address() instead in > order to preserve a MEM. > > Furthermore, it is not straight forward to enforce a subreg. For > example, in case of a paradoxical subreg, gen_lowpart() may return a > register. In order to compensate this, s390_gen_lowpart_subreg() emits > a reference to a pseudo which does not coincide with its definition > which is wrong. Additionally, if dest is a paradoxical subreg, then do > not try to emit a strict_low_part since it could mean that dest was not > initialized even though this might be fixed up later by init-regs. > > Splitter for insn *get_tp_64, *zero_extendhisi2_31, > *zero_extendqisi2_31, *zero_extendqihi2_31 are applied after reload. > Thus, operands[0] is a hard register and gen_lowpart (m, operands[0]) > just returns the hard register for mode m which is fine to use as an > argument for strict_low_part, i.e., we do not need to enforce subregs > here since after reload subregs are supposed to be eliminated anyway. > > This fixes gcc.dg/torture/pr111821.c. > > gcc/ChangeLog: > > * config/s390/s390-protos.h (s390_gen_lowpart_subreg): Remove. > * config/s390/s390.cc (s390_gen_lowpart_subreg): Remove. > (s390_expand_insv): Use adjust_address() and emit a > strict_low_part only in case of a natural subreg. > * config/s390/s390.md: Use gen_lowpart() instead of > s390_gen_lowpart_subreg(). > --- > Bootstrapped and regtested on s390. Ok for mainline,gcc12,gcc13,gcc14? > > gcc/config/s390/s390-protos.h | 1 - > gcc/config/s390/s390.cc | 47 +++++++++++------------------------ > gcc/config/s390/s390.md | 13 +++++----- > 3 files changed, 20 insertions(+), 41 deletions(-) > > diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h > index b4646ccb606..e7ac59d17da 100644 > --- a/gcc/config/s390/s390-protos.h > +++ b/gcc/config/s390/s390-protos.h > @@ -50,7 +50,6 @@ extern void s390_set_has_landing_pad_p (bool); > extern bool s390_hard_regno_rename_ok (unsigned int, unsigned int); > extern int s390_class_max_nregs (enum reg_class, machine_mode); > extern bool s390_return_addr_from_memory(void); > -extern rtx s390_gen_lowpart_subreg (machine_mode, rtx); > extern bool s390_fma_allowed_p (machine_mode); > #if S390_USE_TARGET_ATTRIBUTE > extern tree s390_valid_target_attribute_tree (tree args, > diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc > index 7aea776da2f..7cdcebfc08b 100644 > --- a/gcc/config/s390/s390.cc > +++ b/gcc/config/s390/s390.cc > @@ -516,31 +516,6 @@ s390_return_addr_from_memory () > return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK; > } > > -/* Generate a SUBREG for the MODE lowpart of EXPR. > - > - In contrast to gen_lowpart it will always return a SUBREG > - expression. This is useful to generate STRICT_LOW_PART > - expressions. */ > -rtx > -s390_gen_lowpart_subreg (machine_mode mode, rtx expr) > -{ > - rtx lowpart = gen_lowpart (mode, expr); > - > - /* There might be no SUBREG in case it could be applied to the hard > - REG rtx or it could be folded with a paradoxical subreg. Bring > - it back. */ > - if (!SUBREG_P (lowpart)) > - { > - machine_mode reg_mode = TARGET_ZARCH ? DImode : SImode; > - gcc_assert (REG_P (lowpart)); > - lowpart = gen_lowpart_SUBREG (mode, > - gen_rtx_REG (reg_mode, > - REGNO (lowpart))); > - } > - > - return lowpart; > -} > - > /* Return nonzero if it's OK to use fused multiply-add for MODE. */ > bool > s390_fma_allowed_p (machine_mode mode) > @@ -7112,15 +7087,21 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src) > /* Emit a strict_low_part pattern if possible. */ > if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize) > { > - rtx low_dest = s390_gen_lowpart_subreg (smode, dest); > - rtx low_src = gen_lowpart (smode, src); > - > - switch (smode) > + rtx low_dest = gen_lowpart (smode, dest); > + if (SUBREG_P (low_dest) && !paradoxical_subreg_p (low_dest)) > { > - case E_QImode: emit_insn (gen_movstrictqi (low_dest, low_src)); return true; > - case E_HImode: emit_insn (gen_movstricthi (low_dest, low_src)); return true; > - case E_SImode: emit_insn (gen_movstrictsi (low_dest, low_src)); return true; > - default: break; > + poly_int64 offset = GET_MODE_SIZE (mode) - GET_MODE_SIZE (smode); > + rtx low_src = adjust_address (src, smode, offset); > + switch (smode) > + { > + case E_QImode: emit_insn (gen_movstrictqi (low_dest, low_src)); > + return true; > + case E_HImode: emit_insn (gen_movstricthi (low_dest, low_src)); > + return true; > + case E_SImode: emit_insn (gen_movstrictsi (low_dest, low_src)); > + return true; > + default: break; > + } > } > } > > diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md > index 3d5759d6252..592cf62d962 100644 > --- a/gcc/config/s390/s390.md > +++ b/gcc/config/s390/s390.md > @@ -1974,12 +1974,11 @@ > "TARGET_ZARCH" > "#" > "&& reload_completed" > - [(set (match_dup 2) (match_dup 4)) > + [(set (match_dup 2) (match_dup 3)) > (set (match_dup 0) (ashift:DI (match_dup 0) (const_int 32))) > - (set (strict_low_part (match_dup 3)) (match_dup 5))] > + (set (strict_low_part (match_dup 2)) (match_dup 4))] > "operands[2] = gen_lowpart (SImode, operands[0]); > - operands[3] = s390_gen_lowpart_subreg (SImode, operands[0]); > - s390_split_access_reg (operands[1], &operands[5], &operands[4]);") > + s390_split_access_reg (operands[1], &operands[4], &operands[3]);") > > ; Splitters for storing TLS pointer to %a0:DI. > > @@ -5068,7 +5067,7 @@ > (parallel > [(set (strict_low_part (match_dup 2)) (match_dup 1)) > (clobber (reg:CC CC_REGNUM))])] > - "operands[2] = s390_gen_lowpart_subreg (HImode, operands[0]);") > + "operands[2] = gen_lowpart (HImode, operands[0]);") > > (define_insn_and_split "*zero_extendqisi2_31" > [(set (match_operand:SI 0 "register_operand" "=&d") > @@ -5078,7 +5077,7 @@ > "&& reload_completed" > [(set (match_dup 0) (const_int 0)) > (set (strict_low_part (match_dup 2)) (match_dup 1))] > - "operands[2] = s390_gen_lowpart_subreg (QImode, operands[0]);") > + "operands[2] = gen_lowpart (QImode, operands[0]);") > > ; > ; zero_extendqihi2 instruction pattern(s). > @@ -5110,7 +5109,7 @@ > "&& reload_completed" > [(set (match_dup 0) (const_int 0)) > (set (strict_low_part (match_dup 2)) (match_dup 1))] > - "operands[2] = s390_gen_lowpart_subreg (QImode, operands[0]);") > + "operands[2] = gen_lowpart (QImode, operands[0]);") > > ; > ; fixuns_trunc(dd|td|sf|df|tf)(si|di)2 expander > -- > 2.45.2 >
On 8/16/24 09:14, Stefan Schulze Frielinghaus wrote: > In s390_expand_insv(), if generating code for ICM et al. src is a MEM > and gen_lowpart might force src into a register such that we end up with > patterns which do not match anymore. Use adjust_address() instead in > order to preserve a MEM. > > Furthermore, it is not straight forward to enforce a subreg. For > example, in case of a paradoxical subreg, gen_lowpart() may return a > register. In order to compensate this, s390_gen_lowpart_subreg() emits > a reference to a pseudo which does not coincide with its definition > which is wrong. Additionally, if dest is a paradoxical subreg, then do > not try to emit a strict_low_part since it could mean that dest was not > initialized even though this might be fixed up later by init-regs. > > Splitter for insn *get_tp_64, *zero_extendhisi2_31, > *zero_extendqisi2_31, *zero_extendqihi2_31 are applied after reload. > Thus, operands[0] is a hard register and gen_lowpart (m, operands[0]) > just returns the hard register for mode m which is fine to use as an > argument for strict_low_part, i.e., we do not need to enforce subregs > here since after reload subregs are supposed to be eliminated anyway. > > This fixes gcc.dg/torture/pr111821.c. > > gcc/ChangeLog: > > * config/s390/s390-protos.h (s390_gen_lowpart_subreg): Remove. > * config/s390/s390.cc (s390_gen_lowpart_subreg): Remove. > (s390_expand_insv): Use adjust_address() and emit a > strict_low_part only in case of a natural subreg. > * config/s390/s390.md: Use gen_lowpart() instead of > s390_gen_lowpart_subreg(). Ok. Thanks! Andreas
diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h index b4646ccb606..e7ac59d17da 100644 --- a/gcc/config/s390/s390-protos.h +++ b/gcc/config/s390/s390-protos.h @@ -50,7 +50,6 @@ extern void s390_set_has_landing_pad_p (bool); extern bool s390_hard_regno_rename_ok (unsigned int, unsigned int); extern int s390_class_max_nregs (enum reg_class, machine_mode); extern bool s390_return_addr_from_memory(void); -extern rtx s390_gen_lowpart_subreg (machine_mode, rtx); extern bool s390_fma_allowed_p (machine_mode); #if S390_USE_TARGET_ATTRIBUTE extern tree s390_valid_target_attribute_tree (tree args, diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc index 7aea776da2f..7cdcebfc08b 100644 --- a/gcc/config/s390/s390.cc +++ b/gcc/config/s390/s390.cc @@ -516,31 +516,6 @@ s390_return_addr_from_memory () return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK; } -/* Generate a SUBREG for the MODE lowpart of EXPR. - - In contrast to gen_lowpart it will always return a SUBREG - expression. This is useful to generate STRICT_LOW_PART - expressions. */ -rtx -s390_gen_lowpart_subreg (machine_mode mode, rtx expr) -{ - rtx lowpart = gen_lowpart (mode, expr); - - /* There might be no SUBREG in case it could be applied to the hard - REG rtx or it could be folded with a paradoxical subreg. Bring - it back. */ - if (!SUBREG_P (lowpart)) - { - machine_mode reg_mode = TARGET_ZARCH ? DImode : SImode; - gcc_assert (REG_P (lowpart)); - lowpart = gen_lowpart_SUBREG (mode, - gen_rtx_REG (reg_mode, - REGNO (lowpart))); - } - - return lowpart; -} - /* Return nonzero if it's OK to use fused multiply-add for MODE. */ bool s390_fma_allowed_p (machine_mode mode) @@ -7112,15 +7087,21 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src) /* Emit a strict_low_part pattern if possible. */ if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize) { - rtx low_dest = s390_gen_lowpart_subreg (smode, dest); - rtx low_src = gen_lowpart (smode, src); - - switch (smode) + rtx low_dest = gen_lowpart (smode, dest); + if (SUBREG_P (low_dest) && !paradoxical_subreg_p (low_dest)) { - case E_QImode: emit_insn (gen_movstrictqi (low_dest, low_src)); return true; - case E_HImode: emit_insn (gen_movstricthi (low_dest, low_src)); return true; - case E_SImode: emit_insn (gen_movstrictsi (low_dest, low_src)); return true; - default: break; + poly_int64 offset = GET_MODE_SIZE (mode) - GET_MODE_SIZE (smode); + rtx low_src = adjust_address (src, smode, offset); + switch (smode) + { + case E_QImode: emit_insn (gen_movstrictqi (low_dest, low_src)); + return true; + case E_HImode: emit_insn (gen_movstricthi (low_dest, low_src)); + return true; + case E_SImode: emit_insn (gen_movstrictsi (low_dest, low_src)); + return true; + default: break; + } } } diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 3d5759d6252..592cf62d962 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -1974,12 +1974,11 @@ "TARGET_ZARCH" "#" "&& reload_completed" - [(set (match_dup 2) (match_dup 4)) + [(set (match_dup 2) (match_dup 3)) (set (match_dup 0) (ashift:DI (match_dup 0) (const_int 32))) - (set (strict_low_part (match_dup 3)) (match_dup 5))] + (set (strict_low_part (match_dup 2)) (match_dup 4))] "operands[2] = gen_lowpart (SImode, operands[0]); - operands[3] = s390_gen_lowpart_subreg (SImode, operands[0]); - s390_split_access_reg (operands[1], &operands[5], &operands[4]);") + s390_split_access_reg (operands[1], &operands[4], &operands[3]);") ; Splitters for storing TLS pointer to %a0:DI. @@ -5068,7 +5067,7 @@ (parallel [(set (strict_low_part (match_dup 2)) (match_dup 1)) (clobber (reg:CC CC_REGNUM))])] - "operands[2] = s390_gen_lowpart_subreg (HImode, operands[0]);") + "operands[2] = gen_lowpart (HImode, operands[0]);") (define_insn_and_split "*zero_extendqisi2_31" [(set (match_operand:SI 0 "register_operand" "=&d") @@ -5078,7 +5077,7 @@ "&& reload_completed" [(set (match_dup 0) (const_int 0)) (set (strict_low_part (match_dup 2)) (match_dup 1))] - "operands[2] = s390_gen_lowpart_subreg (QImode, operands[0]);") + "operands[2] = gen_lowpart (QImode, operands[0]);") ; ; zero_extendqihi2 instruction pattern(s). @@ -5110,7 +5109,7 @@ "&& reload_completed" [(set (match_dup 0) (const_int 0)) (set (strict_low_part (match_dup 2)) (match_dup 1))] - "operands[2] = s390_gen_lowpart_subreg (QImode, operands[0]);") + "operands[2] = gen_lowpart (QImode, operands[0]);") ; ; fixuns_trunc(dd|td|sf|df|tf)(si|di)2 expander