diff mbox series

[avr] Rework avr_out_compare

Message ID da646407-4909-4cec-ba5b-ab5ab211c77b@gjlay.de
State New
Headers show
Series [avr] Rework avr_out_compare | expand

Commit Message

Georg-Johann Lay Sept. 12, 2024, 1:32 p.m. UTC
This patch reworks avr_out_compare:

Use new convenient helper functions that may be useful in
other output functions, too.

Generalized some special cases that only work for EQ and NE
comparisons.  For example, with the patch

;; R24:SI == -1 (unused after)
	adiw r26,1
	sbci r25,hi8(-1)
	sbci r24,lo8(-1)

;; R18:SI == -1
	cpi r18,-1
	cpc r19,r18
	cpc r20,r18
	cpc r21,r18

Without the patch, we had:

;; R24:SI == -1 (unused after)
	cpi r24,-1
	sbci r25,-1
	sbci r26,-1
	sbci r27,-1

;; R18:SI == -1
	cpi r18,-1
	ldi r24,-1
	cpc r19,r24
	cpc r20,r24
	cpc r21,r24

Ok for trunk?

This patch requires "Tweak 32-bit comparisons".

https://gcc.gnu.org/pipermail/gcc-patches/2024-September/662738.html

Johann

--

AVR: Rework avr_out_compare.

16-bit comparisons like R25:24 == -1 are currently performed like
     cpi R24, -1
     cpc R25, R24
Similar is possible for wider modes.  ADIW can be used like SBIW when
the compare code is EQ or NE because such comparisons are just about
(propagating) the Z flag.  The patch adds helper functions like avr_byte()
that may be useful in other functions than avr_out_compare().

gcc/
	* config/avr/avr.cc (avr_chunk, avr_byte, avr_word)
	(avr_int8, avr_uint8, avr_int16): New helper functions.
	(avr_out_compare): Overhaul.

Comments

Denis Chertykov Sept. 13, 2024, 7:52 a.m. UTC | #1
чт, 12 сент. 2024 г. в 17:32, Georg-Johann Lay <avr@gjlay.de>:
>
> This patch reworks avr_out_compare:
>
> Use new convenient helper functions that may be useful in
> other output functions, too.
>
> Generalized some special cases that only work for EQ and NE
> comparisons.  For example, with the patch
>
> ;; R24:SI == -1 (unused after)
>         adiw r26,1
>         sbci r25,hi8(-1)
>         sbci r24,lo8(-1)
>
> ;; R18:SI == -1
>         cpi r18,-1
>         cpc r19,r18
>         cpc r20,r18
>         cpc r21,r18
>
> Without the patch, we had:
>
> ;; R24:SI == -1 (unused after)
>         cpi r24,-1
>         sbci r25,-1
>         sbci r26,-1
>         sbci r27,-1
>
> ;; R18:SI == -1
>         cpi r18,-1
>         ldi r24,-1
>         cpc r19,r24
>         cpc r20,r24
>         cpc r21,r24
>
> Ok for trunk?

Please, apply.

Denis

>
> This patch requires "Tweak 32-bit comparisons".
>
> https://gcc.gnu.org/pipermail/gcc-patches/2024-September/662738.html
>
> Johann
>
> --
>
> AVR: Rework avr_out_compare.
>
> 16-bit comparisons like R25:24 == -1 are currently performed like
>      cpi R24, -1
>      cpc R25, R24
> Similar is possible for wider modes.  ADIW can be used like SBIW when
> the compare code is EQ or NE because such comparisons are just about
> (propagating) the Z flag.  The patch adds helper functions like avr_byte()
> that may be useful in other functions than avr_out_compare().
>
> gcc/
>         * config/avr/avr.cc (avr_chunk, avr_byte, avr_word)
>         (avr_int8, avr_uint8, avr_int16): New helper functions.
>         (avr_out_compare): Overhaul.
diff mbox series

Patch

    AVR: Rework avr_out_compare.
    
    16-bit comparisons like R25:24 == -1 are currently performed like
        cpi R24, -1
        cpc R25, R24
    Similar is possible for wider modes.  ADIW can be used like SBIW when
    the compare code is EQ or NE because such comparisons are just about
    (propagating) the Z flag.  The patch adds helper functions like avr_byte()
    that may be useful in other functions than avr_out_compare().
    
    gcc/
            * config/avr/avr.cc (avr_chunk, avr_byte, avr_word)
            (avr_int8, avr_uint8, avr_int16): New helper functions.
            (avr_out_compare): Overhaul.

diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 99657911171..1cfbfe6ec3b 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -322,6 +322,68 @@  avr_to_int_mode (rtx x)
 }
 
 
+/* Return chunk of mode MODE of X as an rtx.  N specifies the subreg
+   byte at which the chunk starts.  N must be an integral multiple
+   of the mode size.  */
+
+static rtx
+avr_chunk (machine_mode mode, rtx x, int n)
+{
+  gcc_assert (n % GET_MODE_SIZE (mode) == 0);
+  machine_mode xmode = GET_MODE (x) == VOIDmode ? DImode : GET_MODE (x);
+  return simplify_gen_subreg (mode, x, xmode, n);
+}
+
+
+/* Return the N-th byte of X as an rtx.  */
+
+static rtx
+avr_byte (rtx x, int n)
+{
+  return avr_chunk (QImode, x, n);
+}
+
+
+/* Return the sub-word of X starting at byte number N.  */
+
+static rtx
+avr_word (rtx x, int n)
+{
+  return avr_chunk (HImode, x, n);
+}
+
+
+/* Return the N-th byte of compile-time constant X as an int8_t.  */
+
+static int8_t
+avr_int8 (rtx x, int n)
+{
+  gcc_assert (CONST_INT_P (x) || CONST_FIXED_P (x) || CONST_DOUBLE_P (x));
+
+  return (int8_t) trunc_int_for_mode (INTVAL (avr_byte (x, n)), QImode);
+}
+
+/* Return the N-th byte of compile-time constant X as an uint8_t.  */
+
+static uint8_t
+avr_uint8 (rtx x, int n)
+{
+  return (uint8_t) avr_int8 (x, n);
+}
+
+
+/* Return the sub-word of compile-time constant X that starts
+   at byte N as an int16_t.  */
+
+static int16_t
+avr_int16 (rtx x, int n)
+{
+  gcc_assert (CONST_INT_P (x) || CONST_FIXED_P (x) || CONST_DOUBLE_P (x));
+
+  return (int16_t) trunc_int_for_mode (INTVAL (avr_word (x, n)), HImode);
+}
+
+
 /* Return true if hard register REG supports the ADIW and SBIW instructions.  */
 
 bool
@@ -5574,9 +5636,6 @@  avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
       xval = avr_to_int_mode (xop[1]);
     }
 
-  /* MODE of the comparison.  */
-  machine_mode mode = GET_MODE (xreg);
-
   gcc_assert (REG_P (xreg));
   gcc_assert ((CONST_INT_P (xval) && n_bytes <= 4)
 	      || (const_double_operand (xval, VOIDmode) && n_bytes == 8));
@@ -5584,13 +5643,15 @@  avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
   if (plen)
     *plen = 0;
 
+  const bool eqne_p = compare_eq_p (insn);
+
   /* Comparisons == +/-1 and != +/-1 can be done similar to camparing
      against 0 by ORing the bytes.  This is one instruction shorter.
      Notice that 64-bit comparisons are always against reg:ALL8 18 (ACC_A)
      and therefore don't use this.  */
 
-  if (!test_hard_reg_class (LD_REGS, xreg)
-      && compare_eq_p (insn)
+  if (eqne_p
+      && ! test_hard_reg_class (LD_REGS, xreg)
       && reg_unused_after (insn, xreg))
     {
       if (xval == const1_rtx)
@@ -5619,39 +5680,11 @@  avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
 	}
     }
 
-  /* Comparisons == -1 and != -1 of a d-register that's used after the
-     comparison.  (If it's unused after we use CPI / SBCI or ADIW sequence
-     from below.)  Instead of  CPI Rlo,-1 / LDI Rx,-1 / CPC Rhi,Rx  we can
-     use  CPI Rlo,-1 / CPC Rhi,Rlo  which is 1 instruction shorter:
-     If CPI is true then Rlo contains -1 and we can use Rlo instead of Rx
-     when CPC'ing the high part.  If CPI is false then CPC cannot render
-     the result to true.  This also works for the more generic case where
-     the constant is of the form 0xabab.  */
-
-  if (n_bytes == 2
-      && xval != const0_rtx
-      && test_hard_reg_class (LD_REGS, xreg)
-      && compare_eq_p (insn)
-      && !reg_unused_after (insn, xreg))
-    {
-      rtx xlo8 = simplify_gen_subreg (QImode, xval, mode, 0);
-      rtx xhi8 = simplify_gen_subreg (QImode, xval, mode, 1);
-
-      if (INTVAL (xlo8) == INTVAL (xhi8))
-	{
-	  xop[0] = xreg;
-	  xop[1] = xlo8;
-
-	  return avr_asm_len ("cpi %A0,%1"  CR_TAB
-			      "cpc %B0,%A0", xop, plen, 2);
-	}
-    }
-
   /* Comparisons == and != may change the order in which the sub-bytes are
      being compared.  Start with the high 16 bits so we can use SBIW.  */
 
   if (n_bytes == 4
-      && compare_eq_p (insn)
+      && eqne_p
       && AVR_HAVE_ADIW
       && REGNO (xreg) >= REG_22)
     {
@@ -5660,56 +5693,57 @@  avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
 			    "cpc %B0,__zero_reg__" CR_TAB
 			    "cpc %A0,__zero_reg__", xop, plen, 3);
 
-      rtx xhi16 = simplify_gen_subreg (HImode, xval, mode, 2);
-      if (IN_RANGE (UINTVAL (xhi16) & GET_MODE_MASK (HImode), 0, 63)
-	  && reg_unused_after (insn, xreg))
+      int16_t hi16 = avr_int16 (xval, 2);
+      if (reg_unused_after (insn, xreg)
+	  && (IN_RANGE (hi16, 0, 63)
+	      || (eqne_p
+		  && IN_RANGE (hi16, -63, -1))))
 	{
-	  xop[1] = xhi16;
-	  avr_asm_len ("sbiw %C0,%1", xop, plen, 1);
-	  xop[1] = xval;
+	  rtx op[] = { xop[0], avr_word (xval, 2) };
+	  avr_asm_len (hi16 < 0 ? "adiw %C0,%n1" : "sbiw %C0,%1",
+		       op, plen, 1);
 	  return avr_asm_len ("sbci %B0,hi8(%1)" CR_TAB
 			      "sbci %A0,lo8(%1)", xop, plen, 2);
 	}
     }
 
+  bool changed[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+
   for (int i = 0; i < n_bytes; i++)
     {
       /* We compare byte-wise.  */
-      rtx reg8 = simplify_gen_subreg (QImode, xreg, mode, i);
-      rtx xval8 = simplify_gen_subreg (QImode, xval, mode, i);
+      xop[0] = avr_byte (xreg, i);
+      xop[1] = avr_byte (xval, i);
 
       /* 8-bit value to compare with this byte.  */
-      unsigned int val8 = UINTVAL (xval8) & GET_MODE_MASK (QImode);
-
-      /* Registers R16..R31 can operate with immediate.  */
-      bool ld_reg_p = test_hard_reg_class (LD_REGS, reg8);
-
-      xop[0] = reg8;
-      xop[1] = gen_int_mode (val8, QImode);
+      unsigned int val8 = avr_uint8 (xval, i);
 
       /* Word registers >= R24 can use SBIW/ADIW with 0..63.  */
 
       if (i == 0
-	  && avr_adiw_reg_p (reg8))
+	  && n_bytes >= 2
+	  && avr_adiw_reg_p (xop[0]))
 	{
-	  int val16 = trunc_int_for_mode (INTVAL (xval), HImode);
+	  int val16 = avr_int16 (xval, 0);
 
 	  if (IN_RANGE (val16, 0, 63)
 	      && (val8 == 0
 		  || reg_unused_after (insn, xreg)))
 	    {
 	      avr_asm_len ("sbiw %0,%1", xop, plen, 1);
-
+	      changed[0] = changed[1] = val8 != 0;
 	      i++;
 	      continue;
 	    }
 
-	  if (n_bytes == 2
-	      && IN_RANGE (val16, -63, -1)
-	      && compare_eq_p (insn)
+	  if (IN_RANGE (val16, -63, -1)
+	      && eqne_p
 	      && reg_unused_after (insn, xreg))
 	    {
-	      return avr_asm_len ("adiw %0,%n1", xop, plen, 1);
+	      avr_asm_len ("adiw %0,%n1", xop, plen, 1);
+	      changed[0] = changed[1] = true;
+	      i++;
+	      continue;
 	    }
 	}
 
@@ -5728,7 +5762,7 @@  avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
 	 instruction; the only difference is that comparisons don't write
 	 the result back to the target register.  */
 
-      if (ld_reg_p)
+      if (test_hard_reg_class (LD_REGS, xop[0]))
 	{
 	  if (i == 0)
 	    {
@@ -5738,10 +5772,37 @@  avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
 	  else if (reg_unused_after (insn, xreg))
 	    {
 	      avr_asm_len ("sbci %0,%1", xop, plen, 1);
+	      changed[i] = true;
 	      continue;
 	    }
 	}
 
+      /* When byte comparisons for an EQ or NE comparison look like
+	     compare (x[i], C)
+	     compare (x[j], C)
+	 then we can instead use
+	     compare (x[i], C)
+	     compare (x[j], x[i])
+	 which is shorter, and the outcome of the comparison is the same.  */
+
+      if (eqne_p)
+	{
+	  bool done = false;
+
+	  for (int j = 0; j < i && ! done; ++j)
+	    if (val8 == avr_uint8 (xval, j)
+		// Make sure that we didn't clobber x[j] above.
+		&& ! changed[j])
+	      {
+		rtx op[] = { xop[0], avr_byte (xreg, j) };
+		avr_asm_len ("cpc %0,%1", op, plen, 1);
+		done = true;
+	      }
+
+	  if (done)
+	    continue;
+	}
+
       /* Must load the value into the scratch register.  */
 
       gcc_assert (REG_P (xop[2]));