diff mbox series

[RFC] ARM: thumb1: fix bad code emitted when HI_REGS involved

Message ID 20240620090918.3074580-1-lis8215@gmail.com
State New
Headers show
Series [RFC] ARM: thumb1: fix bad code emitted when HI_REGS involved | expand

Commit Message

Siarhei Volkau June 20, 2024, 9:09 a.m. UTC
This patch deals with consequences but not the root cause though.

There are 5 cases which are subjects to rewrite:
case #1:
  mov ip, r1
  add r2, ip
  # ip is dead here
can be rewritten as:
  adds r2, r1

case #2:
  add ip, r1
  mov r1, ip
  # ip is dead here
can be rewritten as:
  add r1, ip

case #3:
  mov ip, r1
  add r2, ip
  add r3, ip
  # ip is dead here
can be rewritten as:
  adds r2, r1
  adds r3, r1

case #4:
  mov ip, r1
  add ip, r2
  mov r1, ip
can be rewritten as:
  adds r1, r2
  mov  ip, r1 <- might be eliminated too, if ip is dead

case #5 (arbitrary):
  mov  r1, ip
  subs r2, r1, r2
  mov  ip, r2
  # r1 is dead here
can be rewritten as:
  rsbs r1, r2, #0
  add  ip, r1
  movs r2, ip <- might be eliminated, if r2 is dead

Speed profit wasn't checked but size changes are the following:
   libgcc:  -132 bytes / -0.25%
     libc: -1262 bytes / -0.55%
     libm:  -384 bytes / -0.42%
libstdc++: -2258 bytes / -0.30%

No tests provided because its hard to force GCC to emit HI_REGS
in a small and straightforward function.

Signed-off-by: Siarhei Volkau <lis8215@gmail.com>
---
 gcc/config/arm/thumb1.md | 93 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 92 insertions(+), 1 deletion(-)

Comments

Siarhei Volkau July 8, 2024, 8:56 a.m. UTC | #1
ping

чт, 20 июн. 2024 г. в 12:09, Siarhei Volkau <lis8215@gmail.com>:
>
> This patch deals with consequences but not the root cause though.
>
> There are 5 cases which are subjects to rewrite:
> case #1:
>   mov ip, r1
>   add r2, ip
>   # ip is dead here
> can be rewritten as:
>   adds r2, r1
>
> case #2:
>   add ip, r1
>   mov r1, ip
>   # ip is dead here
> can be rewritten as:
>   add r1, ip
>
> case #3:
>   mov ip, r1
>   add r2, ip
>   add r3, ip
>   # ip is dead here
> can be rewritten as:
>   adds r2, r1
>   adds r3, r1
>
> case #4:
>   mov ip, r1
>   add ip, r2
>   mov r1, ip
> can be rewritten as:
>   adds r1, r2
>   mov  ip, r1 <- might be eliminated too, if ip is dead
>
> case #5 (arbitrary):
>   mov  r1, ip
>   subs r2, r1, r2
>   mov  ip, r2
>   # r1 is dead here
> can be rewritten as:
>   rsbs r1, r2, #0
>   add  ip, r1
>   movs r2, ip <- might be eliminated, if r2 is dead
>
> Speed profit wasn't checked but size changes are the following:
>    libgcc:  -132 bytes / -0.25%
>      libc: -1262 bytes / -0.55%
>      libm:  -384 bytes / -0.42%
> libstdc++: -2258 bytes / -0.30%
>
> No tests provided because its hard to force GCC to emit HI_REGS
> in a small and straightforward function.
>
> Signed-off-by: Siarhei Volkau <lis8215@gmail.com>
> ---
>  gcc/config/arm/thumb1.md | 93 +++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 92 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md
> index d7074b43f60..9da4af9eccd 100644
> --- a/gcc/config/arm/thumb1.md
> +++ b/gcc/config/arm/thumb1.md
> @@ -2055,4 +2055,95 @@ (define_insn "thumb1_stack_protect_test_insn"
>     (set_attr "conds" "clob")
>     (set_attr "type" "multiple")]
>  )
> -
> +
> +;; bad code emitted when HI_REGS involved in addition
> +;; subtract also might happen rarely
> +
> +;; case #1:
> +;; mov ip, r1
> +;; add r2, ip # ip is dead after that
> +(define_peephole2
> +  [(set (match_operand:SI 0 "register_operand" "")
> +       (match_operand:SI 1 "register_operand" ""))
> +   (set (match_operand:SI 2 "register_operand" "")
> +       (plus:SI (match_dup 2) (match_dup 0)))]
> +  "TARGET_THUMB1
> +    && peep2_reg_dead_p (2, operands[0])
> +    && REGNO_REG_CLASS (REGNO (operands[0])) == HI_REGS"
> +  [(set (match_dup 2)
> +       (plus:SI (match_dup 2) (match_dup 1)))]
> +  "")
> +
> +;; case #2:
> +;; add ip, r1
> +;; mov r1, ip # ip is dead after that
> +(define_peephole2
> +  [(set (match_operand:SI 0 "register_operand" "")
> +       (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "")))
> +   (set (match_dup 1) (match_dup 0))]
> +  "TARGET_THUMB1
> +    && peep2_reg_dead_p (2, operands[0])
> +    && REGNO_REG_CLASS (REGNO (operands[0])) == HI_REGS"
> +  [(set (match_dup 1)
> +       (plus:SI (match_dup 1) (match_dup 0)))]
> +  "")
> +
> +;; case #3:
> +;; mov ip, r1
> +;; add r2, ip
> +;; add r3, ip # ip is dead after that
> +(define_peephole2
> +  [(set (match_operand:SI 0 "register_operand" "")
> +       (match_operand:SI 1 "register_operand" ""))
> +   (set (match_operand:SI 2 "register_operand" "")
> +       (plus:SI (match_dup 2) (match_dup 0)))
> +   (set (match_operand:SI 3 "register_operand" "")
> +       (plus:SI (match_dup 3) (match_dup 0)))]
> +  "TARGET_THUMB1
> +    && peep2_reg_dead_p (3, operands[0])
> +    && REGNO_REG_CLASS (REGNO (operands[0])) == HI_REGS"
> +  [(set (match_dup 2)
> +       (plus:SI (match_dup 2) (match_dup 1)))
> +   (set (match_dup 3)
> +       (plus:SI (match_dup 3) (match_dup 1)))]
> +  "")
> +
> +;; case #4:
> +;; mov ip, r1
> +;; add ip, r2
> +;; mov r1, ip
> +(define_peephole2
> +  [(set (match_operand:SI 0 "register_operand" "")
> +       (match_operand:SI 1 "register_operand" ""))
> +   (set (match_dup 0)
> +       (plus:SI (match_dup 0) (match_operand:SI 2 "register_operand" "")))
> +   (set (match_dup 1)
> +       (match_dup 0))]
> +  "TARGET_THUMB1
> +    && REGNO_REG_CLASS (REGNO (operands[0])) == HI_REGS"
> +  [(set (match_dup 1)
> +       (plus:SI (match_dup 1) (match_dup 2)))
> +   (set (match_dup 0) (match_dup 1))]  ;; likely will be eliminated
> +  "")
> +
> +;; case #5:
> +;; mov  r1, ip
> +;; subs r2, r1, r2
> +;; mov  ip, r2      # r1 is dead after
> +(define_peephole2
> +  [(set (match_operand:SI 1 "register_operand" "")
> +       (match_operand:SI 0 "register_operand" ""))
> +   (set (match_operand:SI 2 "register_operand" "")
> +        (minus:SI (match_dup 1) (match_dup 2)))
> +   (set (match_dup 0)
> +       (match_dup 2))]
> +  "TARGET_THUMB1
> +    && peep2_reg_dead_p (3, operands[1])
> +    && REGNO_REG_CLASS (REGNO (operands[0])) == HI_REGS"
> +  [(set (match_dup 1)
> +       (neg:SI (match_dup 2)))
> +   (set (match_dup 0)
> +        (plus:SI (match_dup 0) (match_dup 1)))
> +   (set (match_dup 2)                  ;; likely will be eliminated
> +        (match_dup 0))]
> +  "")
> --
> 2.45.2
>
Christophe Lyon Oct. 4, 2024, 1:48 p.m. UTC | #2
Hi!


On Mon, 8 Jul 2024 at 10:57, Siarhei Volkau <lis8215@gmail.com> wrote:
>
> ping
>
> чт, 20 июн. 2024 г. в 12:09, Siarhei Volkau <lis8215@gmail.com>:
> >
> > This patch deals with consequences but not the root cause though.
> >
> > There are 5 cases which are subjects to rewrite:
> > case #1:
> >   mov ip, r1
> >   add r2, ip
> >   # ip is dead here
> > can be rewritten as:
> >   adds r2, r1

Why replace 'add' with 'adds' ?

Thanks,

Christophe

> >
> > case #2:
> >   add ip, r1
> >   mov r1, ip
> >   # ip is dead here
> > can be rewritten as:
> >   add r1, ip
> >
> > case #3:
> >   mov ip, r1
> >   add r2, ip
> >   add r3, ip
> >   # ip is dead here
> > can be rewritten as:
> >   adds r2, r1
> >   adds r3, r1
> >
> > case #4:
> >   mov ip, r1
> >   add ip, r2
> >   mov r1, ip
> > can be rewritten as:
> >   adds r1, r2
> >   mov  ip, r1 <- might be eliminated too, if ip is dead
> >
> > case #5 (arbitrary):
> >   mov  r1, ip
> >   subs r2, r1, r2
> >   mov  ip, r2
> >   # r1 is dead here
> > can be rewritten as:
> >   rsbs r1, r2, #0
> >   add  ip, r1
> >   movs r2, ip <- might be eliminated, if r2 is dead
> >
> > Speed profit wasn't checked but size changes are the following:
> >    libgcc:  -132 bytes / -0.25%
> >      libc: -1262 bytes / -0.55%
> >      libm:  -384 bytes / -0.42%
> > libstdc++: -2258 bytes / -0.30%
> >
> > No tests provided because its hard to force GCC to emit HI_REGS
> > in a small and straightforward function.
> >
> > Signed-off-by: Siarhei Volkau <lis8215@gmail.com>
> > ---
> >  gcc/config/arm/thumb1.md | 93 +++++++++++++++++++++++++++++++++++++++-
> >  1 file changed, 92 insertions(+), 1 deletion(-)
> >
> > diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md
> > index d7074b43f60..9da4af9eccd 100644
> > --- a/gcc/config/arm/thumb1.md
> > +++ b/gcc/config/arm/thumb1.md
> > @@ -2055,4 +2055,95 @@ (define_insn "thumb1_stack_protect_test_insn"
> >     (set_attr "conds" "clob")
> >     (set_attr "type" "multiple")]
> >  )
> > -
> > +
> > +;; bad code emitted when HI_REGS involved in addition
> > +;; subtract also might happen rarely
> > +
> > +;; case #1:
> > +;; mov ip, r1
> > +;; add r2, ip # ip is dead after that
> > +(define_peephole2
> > +  [(set (match_operand:SI 0 "register_operand" "")
> > +       (match_operand:SI 1 "register_operand" ""))
> > +   (set (match_operand:SI 2 "register_operand" "")
> > +       (plus:SI (match_dup 2) (match_dup 0)))]
> > +  "TARGET_THUMB1
> > +    && peep2_reg_dead_p (2, operands[0])
> > +    && REGNO_REG_CLASS (REGNO (operands[0])) == HI_REGS"
> > +  [(set (match_dup 2)
> > +       (plus:SI (match_dup 2) (match_dup 1)))]
> > +  "")
> > +
> > +;; case #2:
> > +;; add ip, r1
> > +;; mov r1, ip # ip is dead after that
> > +(define_peephole2
> > +  [(set (match_operand:SI 0 "register_operand" "")
> > +       (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "")))
> > +   (set (match_dup 1) (match_dup 0))]
> > +  "TARGET_THUMB1
> > +    && peep2_reg_dead_p (2, operands[0])
> > +    && REGNO_REG_CLASS (REGNO (operands[0])) == HI_REGS"
> > +  [(set (match_dup 1)
> > +       (plus:SI (match_dup 1) (match_dup 0)))]
> > +  "")
> > +
> > +;; case #3:
> > +;; mov ip, r1
> > +;; add r2, ip
> > +;; add r3, ip # ip is dead after that
> > +(define_peephole2
> > +  [(set (match_operand:SI 0 "register_operand" "")
> > +       (match_operand:SI 1 "register_operand" ""))
> > +   (set (match_operand:SI 2 "register_operand" "")
> > +       (plus:SI (match_dup 2) (match_dup 0)))
> > +   (set (match_operand:SI 3 "register_operand" "")
> > +       (plus:SI (match_dup 3) (match_dup 0)))]
> > +  "TARGET_THUMB1
> > +    && peep2_reg_dead_p (3, operands[0])
> > +    && REGNO_REG_CLASS (REGNO (operands[0])) == HI_REGS"
> > +  [(set (match_dup 2)
> > +       (plus:SI (match_dup 2) (match_dup 1)))
> > +   (set (match_dup 3)
> > +       (plus:SI (match_dup 3) (match_dup 1)))]
> > +  "")
> > +
> > +;; case #4:
> > +;; mov ip, r1
> > +;; add ip, r2
> > +;; mov r1, ip
> > +(define_peephole2
> > +  [(set (match_operand:SI 0 "register_operand" "")
> > +       (match_operand:SI 1 "register_operand" ""))
> > +   (set (match_dup 0)
> > +       (plus:SI (match_dup 0) (match_operand:SI 2 "register_operand" "")))
> > +   (set (match_dup 1)
> > +       (match_dup 0))]
> > +  "TARGET_THUMB1
> > +    && REGNO_REG_CLASS (REGNO (operands[0])) == HI_REGS"
> > +  [(set (match_dup 1)
> > +       (plus:SI (match_dup 1) (match_dup 2)))
> > +   (set (match_dup 0) (match_dup 1))]  ;; likely will be eliminated
> > +  "")
> > +
> > +;; case #5:
> > +;; mov  r1, ip
> > +;; subs r2, r1, r2
> > +;; mov  ip, r2      # r1 is dead after
> > +(define_peephole2
> > +  [(set (match_operand:SI 1 "register_operand" "")
> > +       (match_operand:SI 0 "register_operand" ""))
> > +   (set (match_operand:SI 2 "register_operand" "")
> > +        (minus:SI (match_dup 1) (match_dup 2)))
> > +   (set (match_dup 0)
> > +       (match_dup 2))]
> > +  "TARGET_THUMB1
> > +    && peep2_reg_dead_p (3, operands[1])
> > +    && REGNO_REG_CLASS (REGNO (operands[0])) == HI_REGS"
> > +  [(set (match_dup 1)
> > +       (neg:SI (match_dup 2)))
> > +   (set (match_dup 0)
> > +        (plus:SI (match_dup 0) (match_dup 1)))
> > +   (set (match_dup 2)                  ;; likely will be eliminated
> > +        (match_dup 0))]
> > +  "")
> > --
> > 2.45.2
> >
Siarhei Volkau Oct. 4, 2024, 2:58 p.m. UTC | #3
Hello,

пт, 4 окт. 2024 г. в 16:48, Christophe Lyon <christophe.lyon@linaro.org>:
>
> Hi!
>
>
> On Mon, 8 Jul 2024 at 10:57, Siarhei Volkau <lis8215@gmail.com> wrote:
> >
> > ping
> >
> > чт, 20 июн. 2024 г. в 12:09, Siarhei Volkau <lis8215@gmail.com>:
> > >
> > > This patch deals with consequences but not the root cause though.
> > >
> > > There are 5 cases which are subjects to rewrite:
> > > case #1:
> > >   mov ip, r1
> > >   add r2, ip
> > >   # ip is dead here
> > > can be rewritten as:
> > >   adds r2, r1
>
> Why replace 'add' with 'adds' ?
>
> Thanks,
>
> Christophe
>

Good catch, actually. Silly answer is:
because there's no alternative without {S} for Lo registers in thumb1.

Correct me if I'm wrong, I don't think that we have to do something
special with CC reg there because conditional execution instructions
(thumb1_cbz, cbranchsi4_insn) take care of that.
See thumb1_final_prescan_insn.

Thanks

Siarhei

> > >
> > > case #2:
> > >   add ip, r1
> > >   mov r1, ip
> > >   # ip is dead here
> > > can be rewritten as:
> > >   add r1, ip
> > >
> > > case #3:
> > >   mov ip, r1
> > >   add r2, ip
> > >   add r3, ip
> > >   # ip is dead here
> > > can be rewritten as:
> > >   adds r2, r1
> > >   adds r3, r1
> > >
> > > case #4:
> > >   mov ip, r1
> > >   add ip, r2
> > >   mov r1, ip
> > > can be rewritten as:
> > >   adds r1, r2
> > >   mov  ip, r1 <- might be eliminated too, if ip is dead
> > >
> > > case #5 (arbitrary):
> > >   mov  r1, ip
> > >   subs r2, r1, r2
> > >   mov  ip, r2
> > >   # r1 is dead here
> > > can be rewritten as:
> > >   rsbs r1, r2, #0
> > >   add  ip, r1
> > >   movs r2, ip <- might be eliminated, if r2 is dead
> > >
> > > Speed profit wasn't checked but size changes are the following:
> > >    libgcc:  -132 bytes / -0.25%
> > >      libc: -1262 bytes / -0.55%
> > >      libm:  -384 bytes / -0.42%
> > > libstdc++: -2258 bytes / -0.30%
> > >
> > > No tests provided because its hard to force GCC to emit HI_REGS
> > > in a small and straightforward function.
> > >
> > > Signed-off-by: Siarhei Volkau <lis8215@gmail.com>
> > > ---
> > >  gcc/config/arm/thumb1.md | 93 +++++++++++++++++++++++++++++++++++++++-
> > >  1 file changed, 92 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md
> > > index d7074b43f60..9da4af9eccd 100644
> > > --- a/gcc/config/arm/thumb1.md
> > > +++ b/gcc/config/arm/thumb1.md
> > > @@ -2055,4 +2055,95 @@ (define_insn "thumb1_stack_protect_test_insn"
> > >     (set_attr "conds" "clob")
> > >     (set_attr "type" "multiple")]
> > >  )
> > > -
> > > +
> > > +;; bad code emitted when HI_REGS involved in addition
> > > +;; subtract also might happen rarely
> > > +
> > > +;; case #1:
> > > +;; mov ip, r1
> > > +;; add r2, ip # ip is dead after that
> > > +(define_peephole2
> > > +  [(set (match_operand:SI 0 "register_operand" "")
> > > +       (match_operand:SI 1 "register_operand" ""))
> > > +   (set (match_operand:SI 2 "register_operand" "")
> > > +       (plus:SI (match_dup 2) (match_dup 0)))]
> > > +  "TARGET_THUMB1
> > > +    && peep2_reg_dead_p (2, operands[0])
> > > +    && REGNO_REG_CLASS (REGNO (operands[0])) == HI_REGS"
> > > +  [(set (match_dup 2)
> > > +       (plus:SI (match_dup 2) (match_dup 1)))]
> > > +  "")
> > > +
> > > +;; case #2:
> > > +;; add ip, r1
> > > +;; mov r1, ip # ip is dead after that
> > > +(define_peephole2
> > > +  [(set (match_operand:SI 0 "register_operand" "")
> > > +       (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "")))
> > > +   (set (match_dup 1) (match_dup 0))]
> > > +  "TARGET_THUMB1
> > > +    && peep2_reg_dead_p (2, operands[0])
> > > +    && REGNO_REG_CLASS (REGNO (operands[0])) == HI_REGS"
> > > +  [(set (match_dup 1)
> > > +       (plus:SI (match_dup 1) (match_dup 0)))]
> > > +  "")
> > > +
> > > +;; case #3:
> > > +;; mov ip, r1
> > > +;; add r2, ip
> > > +;; add r3, ip # ip is dead after that
> > > +(define_peephole2
> > > +  [(set (match_operand:SI 0 "register_operand" "")
> > > +       (match_operand:SI 1 "register_operand" ""))
> > > +   (set (match_operand:SI 2 "register_operand" "")
> > > +       (plus:SI (match_dup 2) (match_dup 0)))
> > > +   (set (match_operand:SI 3 "register_operand" "")
> > > +       (plus:SI (match_dup 3) (match_dup 0)))]
> > > +  "TARGET_THUMB1
> > > +    && peep2_reg_dead_p (3, operands[0])
> > > +    && REGNO_REG_CLASS (REGNO (operands[0])) == HI_REGS"
> > > +  [(set (match_dup 2)
> > > +       (plus:SI (match_dup 2) (match_dup 1)))
> > > +   (set (match_dup 3)
> > > +       (plus:SI (match_dup 3) (match_dup 1)))]
> > > +  "")
> > > +
> > > +;; case #4:
> > > +;; mov ip, r1
> > > +;; add ip, r2
> > > +;; mov r1, ip
> > > +(define_peephole2
> > > +  [(set (match_operand:SI 0 "register_operand" "")
> > > +       (match_operand:SI 1 "register_operand" ""))
> > > +   (set (match_dup 0)
> > > +       (plus:SI (match_dup 0) (match_operand:SI 2 "register_operand" "")))
> > > +   (set (match_dup 1)
> > > +       (match_dup 0))]
> > > +  "TARGET_THUMB1
> > > +    && REGNO_REG_CLASS (REGNO (operands[0])) == HI_REGS"
> > > +  [(set (match_dup 1)
> > > +       (plus:SI (match_dup 1) (match_dup 2)))
> > > +   (set (match_dup 0) (match_dup 1))]  ;; likely will be eliminated
> > > +  "")
> > > +
> > > +;; case #5:
> > > +;; mov  r1, ip
> > > +;; subs r2, r1, r2
> > > +;; mov  ip, r2      # r1 is dead after
> > > +(define_peephole2
> > > +  [(set (match_operand:SI 1 "register_operand" "")
> > > +       (match_operand:SI 0 "register_operand" ""))
> > > +   (set (match_operand:SI 2 "register_operand" "")
> > > +        (minus:SI (match_dup 1) (match_dup 2)))
> > > +   (set (match_dup 0)
> > > +       (match_dup 2))]
> > > +  "TARGET_THUMB1
> > > +    && peep2_reg_dead_p (3, operands[1])
> > > +    && REGNO_REG_CLASS (REGNO (operands[0])) == HI_REGS"
> > > +  [(set (match_dup 1)
> > > +       (neg:SI (match_dup 2)))
> > > +   (set (match_dup 0)
> > > +        (plus:SI (match_dup 0) (match_dup 1)))
> > > +   (set (match_dup 2)                  ;; likely will be eliminated
> > > +        (match_dup 0))]
> > > +  "")
> > > --
> > > 2.45.2
> > >
Christophe Lyon Oct. 4, 2024, 4:07 p.m. UTC | #4
On Fri, 4 Oct 2024 at 16:59, Siarhei Volkau <lis8215@gmail.com> wrote:
>
> Hello,
>
> пт, 4 окт. 2024 г. в 16:48, Christophe Lyon <christophe.lyon@linaro.org>:
> >
> > Hi!
> >
> >
> > On Mon, 8 Jul 2024 at 10:57, Siarhei Volkau <lis8215@gmail.com> wrote:
> > >
> > > ping
> > >
> > > чт, 20 июн. 2024 г. в 12:09, Siarhei Volkau <lis8215@gmail.com>:
> > > >
> > > > This patch deals with consequences but not the root cause though.
> > > >
> > > > There are 5 cases which are subjects to rewrite:
> > > > case #1:
> > > >   mov ip, r1
> > > >   add r2, ip
> > > >   # ip is dead here
> > > > can be rewritten as:
> > > >   adds r2, r1
> >
> > Why replace 'add' with 'adds' ?
> >
> > Thanks,
> >
> > Christophe
> >
>
> Good catch, actually. Silly answer is:
> because there's no alternative without {S} for Lo registers in thumb1.
>
> Correct me if I'm wrong, I don't think that we have to do something
> special with CC reg there because conditional execution instructions
> (thumb1_cbz, cbranchsi4_insn) take care of that.
> See thumb1_final_prescan_insn.
>

Not familiar with how this is handled, but my question is more like:
if the original code is
case #1:
 adds r3,r0  ;; or any instruction which sets CC
  mov ip, r1
  add r2, ip
  # ip is dead here
cbz ...

If you rewrite as
  adds r3,r0
  adds r2, r1
  cbz
then you change CC and it does not get the value expected by cbz.

Am I missing something?

Thanks,

Christophe


> Thanks
>
> Siarhei
>
> > > >
> > > > case #2:
> > > >   add ip, r1
> > > >   mov r1, ip
> > > >   # ip is dead here
> > > > can be rewritten as:
> > > >   add r1, ip
> > > >
> > > > case #3:
> > > >   mov ip, r1
> > > >   add r2, ip
> > > >   add r3, ip
> > > >   # ip is dead here
> > > > can be rewritten as:
> > > >   adds r2, r1
> > > >   adds r3, r1
> > > >
> > > > case #4:
> > > >   mov ip, r1
> > > >   add ip, r2
> > > >   mov r1, ip
> > > > can be rewritten as:
> > > >   adds r1, r2
> > > >   mov  ip, r1 <- might be eliminated too, if ip is dead
> > > >
> > > > case #5 (arbitrary):
> > > >   mov  r1, ip
> > > >   subs r2, r1, r2
> > > >   mov  ip, r2
> > > >   # r1 is dead here
> > > > can be rewritten as:
> > > >   rsbs r1, r2, #0
> > > >   add  ip, r1
> > > >   movs r2, ip <- might be eliminated, if r2 is dead
> > > >
> > > > Speed profit wasn't checked but size changes are the following:
> > > >    libgcc:  -132 bytes / -0.25%
> > > >      libc: -1262 bytes / -0.55%
> > > >      libm:  -384 bytes / -0.42%
> > > > libstdc++: -2258 bytes / -0.30%
> > > >
> > > > No tests provided because its hard to force GCC to emit HI_REGS
> > > > in a small and straightforward function.
> > > >
> > > > Signed-off-by: Siarhei Volkau <lis8215@gmail.com>
> > > > ---
> > > >  gcc/config/arm/thumb1.md | 93 +++++++++++++++++++++++++++++++++++++++-
> > > >  1 file changed, 92 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md
> > > > index d7074b43f60..9da4af9eccd 100644
> > > > --- a/gcc/config/arm/thumb1.md
> > > > +++ b/gcc/config/arm/thumb1.md
> > > > @@ -2055,4 +2055,95 @@ (define_insn "thumb1_stack_protect_test_insn"
> > > >     (set_attr "conds" "clob")
> > > >     (set_attr "type" "multiple")]
> > > >  )
> > > > -
> > > > +
> > > > +;; bad code emitted when HI_REGS involved in addition
> > > > +;; subtract also might happen rarely
> > > > +
> > > > +;; case #1:
> > > > +;; mov ip, r1
> > > > +;; add r2, ip # ip is dead after that
> > > > +(define_peephole2
> > > > +  [(set (match_operand:SI 0 "register_operand" "")
> > > > +       (match_operand:SI 1 "register_operand" ""))
> > > > +   (set (match_operand:SI 2 "register_operand" "")
> > > > +       (plus:SI (match_dup 2) (match_dup 0)))]
> > > > +  "TARGET_THUMB1
> > > > +    && peep2_reg_dead_p (2, operands[0])
> > > > +    && REGNO_REG_CLASS (REGNO (operands[0])) == HI_REGS"
> > > > +  [(set (match_dup 2)
> > > > +       (plus:SI (match_dup 2) (match_dup 1)))]
> > > > +  "")
> > > > +
> > > > +;; case #2:
> > > > +;; add ip, r1
> > > > +;; mov r1, ip # ip is dead after that
> > > > +(define_peephole2
> > > > +  [(set (match_operand:SI 0 "register_operand" "")
> > > > +       (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "")))
> > > > +   (set (match_dup 1) (match_dup 0))]
> > > > +  "TARGET_THUMB1
> > > > +    && peep2_reg_dead_p (2, operands[0])
> > > > +    && REGNO_REG_CLASS (REGNO (operands[0])) == HI_REGS"
> > > > +  [(set (match_dup 1)
> > > > +       (plus:SI (match_dup 1) (match_dup 0)))]
> > > > +  "")
> > > > +
> > > > +;; case #3:
> > > > +;; mov ip, r1
> > > > +;; add r2, ip
> > > > +;; add r3, ip # ip is dead after that
> > > > +(define_peephole2
> > > > +  [(set (match_operand:SI 0 "register_operand" "")
> > > > +       (match_operand:SI 1 "register_operand" ""))
> > > > +   (set (match_operand:SI 2 "register_operand" "")
> > > > +       (plus:SI (match_dup 2) (match_dup 0)))
> > > > +   (set (match_operand:SI 3 "register_operand" "")
> > > > +       (plus:SI (match_dup 3) (match_dup 0)))]
> > > > +  "TARGET_THUMB1
> > > > +    && peep2_reg_dead_p (3, operands[0])
> > > > +    && REGNO_REG_CLASS (REGNO (operands[0])) == HI_REGS"
> > > > +  [(set (match_dup 2)
> > > > +       (plus:SI (match_dup 2) (match_dup 1)))
> > > > +   (set (match_dup 3)
> > > > +       (plus:SI (match_dup 3) (match_dup 1)))]
> > > > +  "")
> > > > +
> > > > +;; case #4:
> > > > +;; mov ip, r1
> > > > +;; add ip, r2
> > > > +;; mov r1, ip
> > > > +(define_peephole2
> > > > +  [(set (match_operand:SI 0 "register_operand" "")
> > > > +       (match_operand:SI 1 "register_operand" ""))
> > > > +   (set (match_dup 0)
> > > > +       (plus:SI (match_dup 0) (match_operand:SI 2 "register_operand" "")))
> > > > +   (set (match_dup 1)
> > > > +       (match_dup 0))]
> > > > +  "TARGET_THUMB1
> > > > +    && REGNO_REG_CLASS (REGNO (operands[0])) == HI_REGS"
> > > > +  [(set (match_dup 1)
> > > > +       (plus:SI (match_dup 1) (match_dup 2)))
> > > > +   (set (match_dup 0) (match_dup 1))]  ;; likely will be eliminated
> > > > +  "")
> > > > +
> > > > +;; case #5:
> > > > +;; mov  r1, ip
> > > > +;; subs r2, r1, r2
> > > > +;; mov  ip, r2      # r1 is dead after
> > > > +(define_peephole2
> > > > +  [(set (match_operand:SI 1 "register_operand" "")
> > > > +       (match_operand:SI 0 "register_operand" ""))
> > > > +   (set (match_operand:SI 2 "register_operand" "")
> > > > +        (minus:SI (match_dup 1) (match_dup 2)))
> > > > +   (set (match_dup 0)
> > > > +       (match_dup 2))]
> > > > +  "TARGET_THUMB1
> > > > +    && peep2_reg_dead_p (3, operands[1])
> > > > +    && REGNO_REG_CLASS (REGNO (operands[0])) == HI_REGS"
> > > > +  [(set (match_dup 1)
> > > > +       (neg:SI (match_dup 2)))
> > > > +   (set (match_dup 0)
> > > > +        (plus:SI (match_dup 0) (match_dup 1)))
> > > > +   (set (match_dup 2)                  ;; likely will be eliminated
> > > > +        (match_dup 0))]
> > > > +  "")
> > > > --
> > > > 2.45.2
> > > >
Siarhei Volkau Oct. 4, 2024, 5:04 p.m. UTC | #5
пт, 4 окт. 2024 г. в 19:07, Christophe Lyon <christophe.lyon@linaro.org>:

>
> On Fri, 4 Oct 2024 at 16:59, Siarhei Volkau <lis8215@gmail.com> wrote:
> >
> > Hello,
> >
> > пт, 4 окт. 2024 г. в 16:48, Christophe Lyon <christophe.lyon@linaro.org>:
> > >
> > > Hi!
> > >
> > >
> > > On Mon, 8 Jul 2024 at 10:57, Siarhei Volkau <lis8215@gmail.com> wrote:
> > > >
> > > > ping
> > > >
> > > > чт, 20 июн. 2024 г. в 12:09, Siarhei Volkau <lis8215@gmail.com>:
> > > > >
> > > > > This patch deals with consequences but not the root cause though.
> > > > >
> > > > > There are 5 cases which are subjects to rewrite:
> > > > > case #1:
> > > > >   mov ip, r1
> > > > >   add r2, ip
> > > > >   # ip is dead here
> > > > > can be rewritten as:
> > > > >   adds r2, r1
> > >
> > > Why replace 'add' with 'adds' ?
> > >
> > > Thanks,
> > >
> > > Christophe
> > >
> >
> > Good catch, actually. Silly answer is:
> > because there's no alternative without {S} for Lo registers in thumb1.
> >
> > Correct me if I'm wrong, I don't think that we have to do something
> > special with CC reg there because conditional execution instructions
> > (thumb1_cbz, cbranchsi4_insn) take care of that.
> > See thumb1_final_prescan_insn.
> >
>
> Not familiar with how this is handled, but my question is more like:
> if the original code is
> case #1:
>  adds r3,r0  ;; or any instruction which sets CC
>   mov ip, r1
>   add r2, ip
>   # ip is dead here
> cbz ...
>
> If you rewrite as
>   adds r3,r0
>   adds r2, r1
>   cbz
> then you change CC and it does not get the value expected by cbz.
>
> Am I missing something?
>
> Thanks,
>
> Christophe
>

Your point is correct in general but look at the thumb1.md.

You will not find a separate "compare" pattern (except one)
and "if_then_else", which relies on the previous compare result.
Because they are combined in one insn pattern, they will be emitted
together as a pair of cmp/cbranch, later than peephole2.

So there's no chance to put an instruction in between by this patch.
But even if it happens somehow, (as I said there's one "compare" insn)
there's a mechanism which tracks condition codes for branch insn.
And if the CC is not matched for the branch insn then extra cmp will be emitted.

>
> > Thanks
> >
> > Siarhei
> >
> > > > >
> > > > > case #2:
> > > > >   add ip, r1
> > > > >   mov r1, ip
> > > > >   # ip is dead here
> > > > > can be rewritten as:
> > > > >   add r1, ip
> > > > >
> > > > > case #3:
> > > > >   mov ip, r1
> > > > >   add r2, ip
> > > > >   add r3, ip
> > > > >   # ip is dead here
> > > > > can be rewritten as:
> > > > >   adds r2, r1
> > > > >   adds r3, r1
> > > > >
> > > > > case #4:
> > > > >   mov ip, r1
> > > > >   add ip, r2
> > > > >   mov r1, ip
> > > > > can be rewritten as:
> > > > >   adds r1, r2
> > > > >   mov  ip, r1 <- might be eliminated too, if ip is dead
> > > > >
> > > > > case #5 (arbitrary):
> > > > >   mov  r1, ip
> > > > >   subs r2, r1, r2
> > > > >   mov  ip, r2
> > > > >   # r1 is dead here
> > > > > can be rewritten as:
> > > > >   rsbs r1, r2, #0
> > > > >   add  ip, r1
> > > > >   movs r2, ip <- might be eliminated, if r2 is dead
> > > > >
> > > > > Speed profit wasn't checked but size changes are the following:
> > > > >    libgcc:  -132 bytes / -0.25%
> > > > >      libc: -1262 bytes / -0.55%
> > > > >      libm:  -384 bytes / -0.42%
> > > > > libstdc++: -2258 bytes / -0.30%
> > > > >
> > > > > No tests provided because its hard to force GCC to emit HI_REGS
> > > > > in a small and straightforward function.
> > > > >
> > > > > Signed-off-by: Siarhei Volkau <lis8215@gmail.com>
> > > > > ---
> > > > >  gcc/config/arm/thumb1.md | 93 +++++++++++++++++++++++++++++++++++++++-
> > > > >  1 file changed, 92 insertions(+), 1 deletion(-)
> > > > >
> > > > > diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md
> > > > > index d7074b43f60..9da4af9eccd 100644
> > > > > --- a/gcc/config/arm/thumb1.md
> > > > > +++ b/gcc/config/arm/thumb1.md
> > > > > @@ -2055,4 +2055,95 @@ (define_insn "thumb1_stack_protect_test_insn"
> > > > >     (set_attr "conds" "clob")
> > > > >     (set_attr "type" "multiple")]
> > > > >  )
> > > > > -
> > > > > +
> > > > > +;; bad code emitted when HI_REGS involved in addition
> > > > > +;; subtract also might happen rarely
> > > > > +
> > > > > +;; case #1:
> > > > > +;; mov ip, r1
> > > > > +;; add r2, ip # ip is dead after that
> > > > > +(define_peephole2
> > > > > +  [(set (match_operand:SI 0 "register_operand" "")
> > > > > +       (match_operand:SI 1 "register_operand" ""))
> > > > > +   (set (match_operand:SI 2 "register_operand" "")
> > > > > +       (plus:SI (match_dup 2) (match_dup 0)))]
> > > > > +  "TARGET_THUMB1
> > > > > +    && peep2_reg_dead_p (2, operands[0])
> > > > > +    && REGNO_REG_CLASS (REGNO (operands[0])) == HI_REGS"
> > > > > +  [(set (match_dup 2)
> > > > > +       (plus:SI (match_dup 2) (match_dup 1)))]
> > > > > +  "")
> > > > > +
> > > > > +;; case #2:
> > > > > +;; add ip, r1
> > > > > +;; mov r1, ip # ip is dead after that
> > > > > +(define_peephole2
> > > > > +  [(set (match_operand:SI 0 "register_operand" "")
> > > > > +       (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "")))
> > > > > +   (set (match_dup 1) (match_dup 0))]
> > > > > +  "TARGET_THUMB1
> > > > > +    && peep2_reg_dead_p (2, operands[0])
> > > > > +    && REGNO_REG_CLASS (REGNO (operands[0])) == HI_REGS"
> > > > > +  [(set (match_dup 1)
> > > > > +       (plus:SI (match_dup 1) (match_dup 0)))]
> > > > > +  "")
> > > > > +
> > > > > +;; case #3:
> > > > > +;; mov ip, r1
> > > > > +;; add r2, ip
> > > > > +;; add r3, ip # ip is dead after that
> > > > > +(define_peephole2
> > > > > +  [(set (match_operand:SI 0 "register_operand" "")
> > > > > +       (match_operand:SI 1 "register_operand" ""))
> > > > > +   (set (match_operand:SI 2 "register_operand" "")
> > > > > +       (plus:SI (match_dup 2) (match_dup 0)))
> > > > > +   (set (match_operand:SI 3 "register_operand" "")
> > > > > +       (plus:SI (match_dup 3) (match_dup 0)))]
> > > > > +  "TARGET_THUMB1
> > > > > +    && peep2_reg_dead_p (3, operands[0])
> > > > > +    && REGNO_REG_CLASS (REGNO (operands[0])) == HI_REGS"
> > > > > +  [(set (match_dup 2)
> > > > > +       (plus:SI (match_dup 2) (match_dup 1)))
> > > > > +   (set (match_dup 3)
> > > > > +       (plus:SI (match_dup 3) (match_dup 1)))]
> > > > > +  "")
> > > > > +
> > > > > +;; case #4:
> > > > > +;; mov ip, r1
> > > > > +;; add ip, r2
> > > > > +;; mov r1, ip
> > > > > +(define_peephole2
> > > > > +  [(set (match_operand:SI 0 "register_operand" "")
> > > > > +       (match_operand:SI 1 "register_operand" ""))
> > > > > +   (set (match_dup 0)
> > > > > +       (plus:SI (match_dup 0) (match_operand:SI 2 "register_operand" "")))
> > > > > +   (set (match_dup 1)
> > > > > +       (match_dup 0))]
> > > > > +  "TARGET_THUMB1
> > > > > +    && REGNO_REG_CLASS (REGNO (operands[0])) == HI_REGS"
> > > > > +  [(set (match_dup 1)
> > > > > +       (plus:SI (match_dup 1) (match_dup 2)))
> > > > > +   (set (match_dup 0) (match_dup 1))]  ;; likely will be eliminated
> > > > > +  "")
> > > > > +
> > > > > +;; case #5:
> > > > > +;; mov  r1, ip
> > > > > +;; subs r2, r1, r2
> > > > > +;; mov  ip, r2      # r1 is dead after
> > > > > +(define_peephole2
> > > > > +  [(set (match_operand:SI 1 "register_operand" "")
> > > > > +       (match_operand:SI 0 "register_operand" ""))
> > > > > +   (set (match_operand:SI 2 "register_operand" "")
> > > > > +        (minus:SI (match_dup 1) (match_dup 2)))
> > > > > +   (set (match_dup 0)
> > > > > +       (match_dup 2))]
> > > > > +  "TARGET_THUMB1
> > > > > +    && peep2_reg_dead_p (3, operands[1])
> > > > > +    && REGNO_REG_CLASS (REGNO (operands[0])) == HI_REGS"
> > > > > +  [(set (match_dup 1)
> > > > > +       (neg:SI (match_dup 2)))
> > > > > +   (set (match_dup 0)
> > > > > +        (plus:SI (match_dup 0) (match_dup 1)))
> > > > > +   (set (match_dup 2)                  ;; likely will be eliminated
> > > > > +        (match_dup 0))]
> > > > > +  "")
> > > > > --
> > > > > 2.45.2
> > > > >
diff mbox series

Patch

diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md
index d7074b43f60..9da4af9eccd 100644
--- a/gcc/config/arm/thumb1.md
+++ b/gcc/config/arm/thumb1.md
@@ -2055,4 +2055,95 @@  (define_insn "thumb1_stack_protect_test_insn"
    (set_attr "conds" "clob")
    (set_attr "type" "multiple")]
 )
-
+
+;; bad code emitted when HI_REGS involved in addition
+;; subtract also might happen rarely
+
+;; case #1:
+;; mov ip, r1
+;; add r2, ip # ip is dead after that
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "register_operand" ""))
+   (set (match_operand:SI 2 "register_operand" "")
+	(plus:SI (match_dup 2) (match_dup 0)))]
+  "TARGET_THUMB1
+    && peep2_reg_dead_p (2, operands[0])
+    && REGNO_REG_CLASS (REGNO (operands[0])) == HI_REGS"
+  [(set (match_dup 2)
+	(plus:SI (match_dup 2) (match_dup 1)))]
+  "")
+
+;; case #2:
+;; add ip, r1
+;; mov r1, ip # ip is dead after that
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "")))
+   (set (match_dup 1) (match_dup 0))]
+  "TARGET_THUMB1
+    && peep2_reg_dead_p (2, operands[0])
+    && REGNO_REG_CLASS (REGNO (operands[0])) == HI_REGS"
+  [(set (match_dup 1)
+	(plus:SI (match_dup 1) (match_dup 0)))]
+  "")
+
+;; case #3:
+;; mov ip, r1
+;; add r2, ip
+;; add r3, ip # ip is dead after that
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "register_operand" ""))
+   (set (match_operand:SI 2 "register_operand" "")
+	(plus:SI (match_dup 2) (match_dup 0)))
+   (set (match_operand:SI 3 "register_operand" "")
+	(plus:SI (match_dup 3) (match_dup 0)))]
+  "TARGET_THUMB1
+    && peep2_reg_dead_p (3, operands[0])
+    && REGNO_REG_CLASS (REGNO (operands[0])) == HI_REGS"
+  [(set (match_dup 2)
+	(plus:SI (match_dup 2) (match_dup 1)))
+   (set (match_dup 3)
+	(plus:SI (match_dup 3) (match_dup 1)))]
+  "")
+
+;; case #4:
+;; mov ip, r1
+;; add ip, r2
+;; mov r1, ip
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "register_operand" ""))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0) (match_operand:SI 2 "register_operand" "")))
+   (set (match_dup 1)
+	(match_dup 0))]
+  "TARGET_THUMB1
+    && REGNO_REG_CLASS (REGNO (operands[0])) == HI_REGS"
+  [(set (match_dup 1)
+	(plus:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (match_dup 1))]  ;; likely will be eliminated
+  "")
+
+;; case #5:
+;; mov  r1, ip
+;; subs r2, r1, r2
+;; mov  ip, r2      # r1 is dead after
+(define_peephole2
+  [(set (match_operand:SI 1 "register_operand" "")
+	(match_operand:SI 0 "register_operand" ""))
+   (set (match_operand:SI 2 "register_operand" "")
+        (minus:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(match_dup 2))]
+  "TARGET_THUMB1
+    && peep2_reg_dead_p (3, operands[1])
+    && REGNO_REG_CLASS (REGNO (operands[0])) == HI_REGS"
+  [(set (match_dup 1)
+	(neg:SI (match_dup 2)))
+   (set (match_dup 0)
+        (plus:SI (match_dup 0) (match_dup 1)))
+   (set (match_dup 2)                  ;; likely will be eliminated
+        (match_dup 0))]
+  "")