diff mbox series

[avr] Tweak unsigned compares with consts that are 0 mod 256.

Message ID 876e1122-3b3b-44b6-97d1-437ae883d110@gjlay.de
State New
Headers show
Series [avr] Tweak unsigned compares with consts that are 0 mod 256. | expand

Commit Message

Georg-Johann Lay Sept. 15, 2024, 3:31 p.m. UTC
Unsigned comparisons may skip comparing the lower bytes when
the according bytes of the constant are all zeros.  For example,

     uint16 >= 0x1200

is true iff

     hi8 (uint16) >= hi8 (0x1200)

and similar for uint16 < 0x1200.  Some comparisons against constants
that are an integral power of 256 where already handled in the split
preparation.  That code has been outsourced to new avr_maybe_cmp_lsr()
which may change the operands such that the resulting insns become
a comparison of the high bytes against 0 plus a EQ / NE branch.
For example,

     uint32 >= 0x10000

can be rewritten as

     (uint32 >> 16) != 0.

The according asm output is performed by new avr_out_cmp_lsr().

Ok for trunk?

Johann

--

gcc/
	* config/avr/avr-protos.h (avr_out_cmp_lsr, avr_maybe_cmp_lsr): New.
	* config/avr/avr.cc (avr_maybe_cmp_lsr, avr_out_cmp_lsr): New functions.
	(avr_out_compare) [GEU, LTU]: Start output at byte CTZ(xval) / 8.
	(avr_adjust_insn_length) [ADJUST_LEN_CMP_LSR]: Handle case.
	* config/avr/avr.md (adjust_len) <cmp_lsr>: New attr value.
	(*cmp<mode>_lsr): New define_insn_and_split.
	(cbranch<mode>4_insn): When splitting, run avr_maybe_cmp_lsr()
	which may map the operands to *cmp<mode>_lsr.
gcc/testsuite/
	* gcc.target/avr/torture/cmp-lsr-u16.c: New test.
	* gcc.target/avr/torture/cmp-lsr-u24.c: New test.
	* gcc.target/avr/torture/cmp-lsr-u32.c: New test.
	* gcc.target/avr/torture/cmp-lsr-u64.c: New test.

Comments

Denis Chertykov Sept. 16, 2024, 11:04 a.m. UTC | #1
вс, 15 сент. 2024 г. в 19:32, Georg-Johann Lay <avr@gjlay.de>:
>
> Unsigned comparisons may skip comparing the lower bytes when
> the according bytes of the constant are all zeros.  For example,
>
>      uint16 >= 0x1200
>
> is true iff
>
>      hi8 (uint16) >= hi8 (0x1200)
>
> and similar for uint16 < 0x1200.  Some comparisons against constants
> that are an integral power of 256 where already handled in the split
> preparation.  That code has been outsourced to new avr_maybe_cmp_lsr()
> which may change the operands such that the resulting insns become
> a comparison of the high bytes against 0 plus a EQ / NE branch.
> For example,
>
>      uint32 >= 0x10000
>
> can be rewritten as
>
>      (uint32 >> 16) != 0.
>
> The according asm output is performed by new avr_out_cmp_lsr().
>
> Ok for trunk?

Please apply.

Denis.

>
> Johann
>
> --
>
> gcc/
>         * config/avr/avr-protos.h (avr_out_cmp_lsr, avr_maybe_cmp_lsr): New.
>         * config/avr/avr.cc (avr_maybe_cmp_lsr, avr_out_cmp_lsr): New functions.
>         (avr_out_compare) [GEU, LTU]: Start output at byte CTZ(xval) / 8.
>         (avr_adjust_insn_length) [ADJUST_LEN_CMP_LSR]: Handle case.
>         * config/avr/avr.md (adjust_len) <cmp_lsr>: New attr value.
>         (*cmp<mode>_lsr): New define_insn_and_split.
>         (cbranch<mode>4_insn): When splitting, run avr_maybe_cmp_lsr()
>         which may map the operands to *cmp<mode>_lsr.
> gcc/testsuite/
>         * gcc.target/avr/torture/cmp-lsr-u16.c: New test.
>         * gcc.target/avr/torture/cmp-lsr-u24.c: New test.
>         * gcc.target/avr/torture/cmp-lsr-u32.c: New test.
>         * gcc.target/avr/torture/cmp-lsr-u64.c: New test.
Georg-Johann Lay Sept. 16, 2024, 11:49 a.m. UTC | #2
Am 16.09.24 um 13:04 schrieb Denis Chertykov:
> вс, 15 сент. 2024 г. в 19:32, Georg-Johann Lay <avr@gjlay.de>:
>>
>> Unsigned comparisons may skip comparing the lower bytes when
>> the according bytes of the constant are all zeros.  For example,
>>
>>       uint16 >= 0x1200
>>
>> is true iff
>>
>>       hi8 (uint16) >= hi8 (0x1200)
>>
>> and similar for uint16 < 0x1200.  Some comparisons against constants
>> that are an integral power of 256 where already handled in the split
>> preparation.  That code has been outsourced to new avr_maybe_cmp_lsr()
>> which may change the operands such that the resulting insns become
>> a comparison of the high bytes against 0 plus a EQ / NE branch.
>> For example,
>>
>>       uint32 >= 0x10000
>>
>> can be rewritten as
>>
>>       (uint32 >> 16) != 0.
>>
>> The according asm output is performed by new avr_out_cmp_lsr().
>>
>> Ok for trunk?
> 
> Please apply.
> 
> Denis.

Applied with the following addendum because the same also works
for signed comparisons.

Johann


diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 5e5b5779d8a..92013c3845d 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -6152,11 +6152,12 @@ avr_out_compare (rtx_insn *insn, rtx *xop, int 
*plen)

    bool changed[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };

-  /* Unsigned comparisons may skip the lower bytes when the according bytes
+  /* The >= and < comparisons may skip the lower bytes when the 
according bytes
       of the constant are all zeros.  In that case, the comparison may 
start
       at a byte other than the LSB.  */

-  const int start = (cond == GEU || cond == LTU) && INTVAL (xval) != 0
+  const int start = ((cond == GEU || cond == LTU || cond == GE || cond 
== LT)
+                    && INTVAL (xval) != 0)
      ? ctz_hwi (INTVAL (xval)) / 8
      : 0;


>> --
>>
>> gcc/
>>          * config/avr/avr-protos.h (avr_out_cmp_lsr, avr_maybe_cmp_lsr): New.
>>          * config/avr/avr.cc (avr_maybe_cmp_lsr, avr_out_cmp_lsr): New functions.
>>          (avr_out_compare) [GEU, LTU]: Start output at byte CTZ(xval) / 8.
>>          (avr_adjust_insn_length) [ADJUST_LEN_CMP_LSR]: Handle case.
>>          * config/avr/avr.md (adjust_len) <cmp_lsr>: New attr value.
>>          (*cmp<mode>_lsr): New define_insn_and_split.
>>          (cbranch<mode>4_insn): When splitting, run avr_maybe_cmp_lsr()
>>          which may map the operands to *cmp<mode>_lsr.
>> gcc/testsuite/
>>          * gcc.target/avr/torture/cmp-lsr-u16.c: New test.
>>          * gcc.target/avr/torture/cmp-lsr-u24.c: New test.
>>          * gcc.target/avr/torture/cmp-lsr-u32.c: New test.
>>          * gcc.target/avr/torture/cmp-lsr-u64.c: New test.
diff mbox series

Patch

    AVR: Tweak unsigned compares with consts that are 0 mod 256.
    
    Unsigned comparisons may skip comparing the lower bytes when
    the according bytes of the constant are all zeros.  For example,
    
        uint16 >= 0x1200
    
    is true iff
    
        hi8 (uint16) >= hi8 (0x1200)
    
    and similar for uint16 < 0x1200.  Some comparisons against constants
    that are an integral power of 256 where already handled in the split
    preparation.  That code has been outsourced to new avr_maybe_cmp_lsr()
    which may change the operands such that the resulting insns become
    a comparison of the high bytes against 0 plus a EQ / NE branch.
    For example,
    
        uint32 >= 0x10000
    
    can be rewritten as
    
        (uint32 >> 16) != 0.
    
    The according asm output is performed by new avr_out_cmp_lsr().
    
    gcc/
            * config/avr/avr-protos.h (avr_out_cmp_lsr, avr_maybe_cmp_lsr): New.
            * config/avr/avr.cc (avr_maybe_cmp_lsr, avr_out_cmp_lsr): New functions.
            (avr_out_compare) [GEU, LTU]: Start output at byte CTZ(xval) / 8.
            (avr_adjust_insn_length) [ADJUST_LEN_CMP_LSR]: Handle case.
            * config/avr/avr.md (adjust_len) <cmp_lsr>: New attr value.
            (*cmp<mode>_lsr): New define_insn_and_split.
            (cbranch<mode>4_insn): When splitting, run avr_maybe_cmp_lsr()
            which may map the operands to *cmp<mode>_lsr.
    gcc/testsuite/
            * gcc.target/avr/torture/cmp-lsr-u16.c: New test.
            * gcc.target/avr/torture/cmp-lsr-u24.c: New test.
            * gcc.target/avr/torture/cmp-lsr-u32.c: New test.
            * gcc.target/avr/torture/cmp-lsr-u64.c: New test.

diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index 71c110c791f..2eb92c6518d 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h
@@ -115,7 +115,8 @@  extern const char* output_reload_inhi (rtx*, rtx, int*);
 extern const char* output_reload_insisf (rtx*, rtx, int*);
 extern const char* avr_out_reload_inpsi (rtx*, rtx, int*);
 extern const char* avr_out_lpm (rtx_insn *, rtx*, int*);
-extern void avr_notice_update_cc (rtx body, rtx_insn *insn);
+extern const char* avr_out_cmp_lsr (rtx_insn *, rtx*, int*);
+extern void avr_maybe_cmp_lsr (rtx *);
 extern int reg_unused_after (rtx_insn *insn, rtx reg);
 extern int avr_jump_mode (rtx x, rtx_insn *insn, int = 0);
 extern int test_hard_reg_class (enum reg_class rclass, rtx x);
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 4cb51ea68dc..62f46c1933b 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -5943,6 +5943,118 @@  avr_canonicalize_comparison (int *icode, rtx *op0, rtx *op1, bool op0_fixed)
 }
 
 
+/* Try to turn a GEU or LTU comparison of register XOP[1] into an
+   NE / EQ comparison of the higher bytes of XOP[1] against 0.
+   XOP[1] has scalar int or scalar fixed-point mode of 2, 3 or 4 bytes.
+   XOP[2] is a compile-time constant, and XOP[0] = XOP[1] <comp> XOP[2]
+   is the comparison operator.  XOP[3] is the branch label, and XOP[4]
+   is a QImode scratch operand.
+      When XOP[1] (viewed as a CONST_INT) is an integral power of 256,
+   then a GTU or LTU comparison can be turned into a NE or EQ comparison
+   of the high bytes against zero.  For example, the C code
+
+	if (x >= 1)
+	  ccc = 0;
+
+   where x is an unsigned _Accum may be compiled as:
+
+	or r24,r25		 ;  *cmpsi_lsr
+	breq .L1		 ;  branch
+	sts ccc,__zero_reg__	 ;  movqi_insn
+     .L1:
+
+   In the case of success, the operands will be such that they comprise
+   a *cmp<mode>_lsr insn, where mode is HI, PSI or SI, and XOP[0] will be
+   a NE or EQ branch condition.  Otherwise, XOP[] is unchanged.  */
+
+void
+avr_maybe_cmp_lsr (rtx *xop)
+{
+  rtx_code comp = GET_CODE (xop[0]);
+
+  if ((comp == GEU || comp == LTU)
+      && (CONST_INT_P (xop[2]) || CONST_FIXED_P (xop[2])))
+    {
+      rtx xreg = avr_to_int_mode (xop[1]);
+      rtx xval = avr_to_int_mode (xop[2]);
+      machine_mode imode = GET_MODE (xreg);
+      auto uval = UINTVAL (xval) & GET_MODE_MASK (imode);
+      int shift = exact_log2 (uval);
+
+      if (shift == 8 || shift == 16 || shift == 24)
+	{
+	  // Operands such that the compare becomes *cmp<mode>_lsr.
+	  xop[1] = gen_rtx_LSHIFTRT (imode, xreg, GEN_INT (shift));
+	  xop[2] = const0_rtx;
+	  xop[4] = gen_rtx_SCRATCH (QImode);
+	  // Branch condition.
+	  xop[0] = gen_rtx_fmt_ee (comp == GEU ? NE : EQ,
+				   VOIDmode, xop[1], xop[2]);
+	}
+    }
+}
+
+
+/* Output an EQ / NE compare of HI, PSI or SI register XOP[0] against 0,
+   where only the bits starting at XOP[1] are relevant.  XOP[1] is a
+   const_int that is 8, 16 or 24.  Return "".
+   PLEN == 0:  Output instructions.
+   PLEN != 0:  Set *PLEN to the length of the sequence in words.  */
+
+const char *
+avr_out_cmp_lsr (rtx_insn *insn, rtx *xop, int *plen)
+{
+  rtx xreg = xop[0];
+  const int n_bytes = GET_MODE_SIZE (GET_MODE (xreg));
+  const int shift = INTVAL (xop[1]);
+  const rtx_code cond = compare_condition (insn);
+
+  gcc_assert (shift == 8 || shift == 16 || shift == 24);
+  gcc_assert (shift < 8 * n_bytes);
+  gcc_assert (cond == UNKNOWN || cond == NE || cond == EQ);
+
+  const bool used_p = ! reg_unused_after (insn, xreg);
+
+  if (plen)
+    *plen = 0;
+
+  if (shift / 8 == n_bytes - 1)
+    {
+      rtx xmsb = avr_byte (xreg, n_bytes - 1);
+      avr_asm_len ("tst %0", &xmsb, plen, 1);
+    }
+  else if (n_bytes == 4
+	   && shift <= 16
+	   && AVR_HAVE_ADIW
+	   && REGNO (xreg) >= REG_22
+	   // The sequence also works when xreg is unused after,
+	   // but SBIW is slower than OR.
+	   && used_p)
+    {
+      avr_asm_len ("sbiw %C0,0", &xreg, plen, 1);
+      if (shift == 8)
+	avr_asm_len ("cpc %B0,__zero_reg__", &xreg, plen, 1);
+    }
+  else
+    {
+      rtx op[2] = { avr_byte (xreg, shift / 8), tmp_reg_rtx };
+      if (used_p)
+	{
+	  avr_asm_len ("mov %1,%0", op, plen, 1);
+	  op[0] = tmp_reg_rtx;
+	}
+
+      for (int i = 1 + shift / 8; i < n_bytes; ++i)
+	{
+	  op[1] = avr_byte (xreg, i);
+	  avr_asm_len ("or %0,%1", op, plen, 1);
+	}
+    }
+
+  return "";
+}
+
+
 /* Output compare instruction
 
       compare (XOP[0], XOP[1])
@@ -5983,7 +6095,8 @@  avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
   if (plen)
     *plen = 0;
 
-  const bool eqne_p = compare_eq_p (insn);
+  const rtx_code cond = compare_condition (insn);
+  const bool eqne_p = cond == EQ || cond == NE;
 
   /* Comparisons == +/-1 and != +/-1 can be done similar to camparing
      against 0 by ORing the bytes.  This is one instruction shorter.
@@ -6049,7 +6162,15 @@  avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
 
   bool changed[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
 
-  for (int i = 0; i < n_bytes; i++)
+  /* Unsigned comparisons may skip the lower bytes when the according bytes
+     of the constant are all zeros.  In that case, the comparison may start
+     at a byte other than the LSB.  */
+
+  const int start = (cond == GEU || cond == LTU) && INTVAL (xval) != 0
+    ? ctz_hwi (INTVAL (xval)) / 8
+    : 0;
+
+  for (int i = start; i < n_bytes; i++)
     {
       /* We compare byte-wise.  */
       xop[0] = avr_byte (xreg, i);
@@ -6060,18 +6181,19 @@  avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
 
       /* Word registers >= R24 can use SBIW/ADIW with 0..63.  */
 
-      if (i == 0
-	  && n_bytes >= 2
+      if (i == start
+	  && i % 2 == 0
+	  && n_bytes - start >= 2
 	  && avr_adiw_reg_p (xop[0]))
 	{
-	  int val16 = avr_int16 (xval, 0);
+	  int val16 = avr_int16 (xval, i);
 
 	  if (IN_RANGE (val16, 0, 63)
 	      && (val8 == 0
 		  || reg_unused_after (insn, xreg)))
 	    {
 	      avr_asm_len ("sbiw %0,%1", xop, plen, 1);
-	      changed[0] = changed[1] = val8 != 0;
+	      changed[i] = changed[i + 1] = val8 != 0;
 	      i++;
 	      continue;
 	    }
@@ -6081,7 +6203,7 @@  avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
 	      && reg_unused_after (insn, xreg))
 	    {
 	      avr_asm_len ("adiw %0,%n1", xop, plen, 1);
-	      changed[0] = changed[1] = true;
+	      changed[i] = changed[i + 1] = true;
 	      i++;
 	      continue;
 	    }
@@ -6091,7 +6213,7 @@  avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
 
       if (val8 == 0)
 	{
-	  avr_asm_len (i == 0
+	  avr_asm_len (i == start
 		       ? "cp %0,__zero_reg__"
 		       : "cpc %0,__zero_reg__", xop, plen, 1);
 	  continue;
@@ -6104,7 +6226,7 @@  avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
 
       if (test_hard_reg_class (LD_REGS, xop[0]))
 	{
-	  if (i == 0)
+	  if (i == start)
 	    {
 	      avr_asm_len ("cpi %0,%1", xop, plen, 1);
 	      continue;
@@ -6129,7 +6251,7 @@  avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
 	{
 	  bool found = false;
 
-	  for (int j = 0; j < i && ! found; ++j)
+	  for (int j = start; j < i && ! found; ++j)
 	    if (val8 == avr_uint8 (xval, j)
 		// Make sure that we didn't clobber x[j] above.
 		&& ! changed[j])
@@ -6151,7 +6273,7 @@  avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
 	avr_asm_len ("ldi %2,%1", xop, plen, 1);
       clobber_val = (int) val8;
 
-      avr_asm_len (i == 0
+      avr_asm_len (i == start
 		   ? "cp %0,%2"
 		   : "cpc %0,%2", xop, plen, 1);
     }
@@ -10338,6 +10460,7 @@  avr_adjust_insn_length (rtx_insn *insn, int len)
     case ADJUST_LEN_COMPARE64: avr_out_compare64 (insn, op, &len); break;
     case ADJUST_LEN_CMP_UEXT: avr_out_cmp_ext (op, ZERO_EXTEND, &len); break;
     case ADJUST_LEN_CMP_SEXT: avr_out_cmp_ext (op, SIGN_EXTEND, &len); break;
+    case ADJUST_LEN_CMP_LSR: avr_out_cmp_lsr (insn, op, &len); break;
 
     case ADJUST_LEN_LSHRQI: lshrqi3_out (insn, op, &len); break;
     case ADJUST_LEN_LSHRHI: lshrhi3_out (insn, op, &len); break;
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index 2abf3c38d83..e9a52ceb842 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -170,7 +170,7 @@  (define_attr "adjust_len"
    ashlsi, ashrsi, lshrsi,
    ashlpsi, ashrpsi, lshrpsi,
    insert_bits, insv_notbit, insv,
-   add_set_ZN, add_set_N, cmp_uext, cmp_sext,
+   add_set_ZN, add_set_N, cmp_uext, cmp_sext, cmp_lsr,
    no"
   (const_string "no"))
 
@@ -6631,6 +6631,34 @@  (define_insn "*cmp<mode>"
    (set_attr "adjust_len" "tstsi,*,compare,compare")])
 
 
+;; "*cmphi_lsr"
+;; "*cmpsi_lsr"
+;; "*cmppsi_lsr"
+(define_insn_and_split "*cmp<mode>_lsr"
+  [(set (reg:CC REG_CC)
+        (compare:CC (lshiftrt:HISI (match_operand:HISI 0 "register_operand"    "r")
+                                   (match_operand:QI 1 "const_8_16_24_operand" "n"))
+                    (const_int 0)))
+   (clobber (scratch:QI))]
+  "reload_completed"
+  {
+    return avr_out_cmp_lsr (insn, operands, NULL);
+  }
+  "&& 1"
+  [;; "cmpqi3"
+   (set (reg:CC REG_CC)
+        (compare:CC (match_dup 0)
+                    (const_int 0)))]
+  {
+    // When the comparison is just one byte, then cmpqi3.
+    if (INTVAL (operands[1]) / 8 == <SIZE> - 1)
+      operands[0] = simplify_gen_subreg (QImode, operands[0], <MODE>mode, <SIZE> - 1);
+    else
+      FAIL;
+  }
+  [(set_attr "adjust_len" "cmp_lsr")])
+
+
 ;; A helper for avr_pass_ifelse::avr_rest_of_handle_ifelse().
 (define_expand "gen_compare<mode>"
   [(parallel [(set (reg:CC REG_CC)
@@ -6724,20 +6752,9 @@  (define_insn_and_split "cbranch<mode>4_insn"
                        (label_ref (match_dup 3))
                        (pc)))]
    {
-     // Unsigned >= 65536 and < 65536 can be performed by testing the
-     // high word against 0.
-     if ((GET_CODE (operands[0]) == LTU
-          || GET_CODE (operands[0]) == GEU)
-         && const_operand (operands[2], <MODE>mode)
-         && INTVAL (avr_to_int_mode (operands[2])) == 65536)
-       {
-         // "cmphi3" of the high word against 0.
-         operands[0] = copy_rtx (operands[0]);
-         PUT_CODE (operands[0], GET_CODE (operands[0]) == GEU ? NE : EQ);
-         operands[1] = simplify_gen_subreg (HImode, operands[1], <MODE>mode, 2);
-         operands[2] = const0_rtx;
-         operands[4] = gen_rtx_SCRATCH (QImode);
-       }
+     // Unsigned >= 256^n and < 256^n can be performed by testing the
+     // higher bytes against 0 (*cmpsi_lsr).
+     avr_maybe_cmp_lsr (operands);
    })
 
 ;; "cbranchpsi4_insn"
@@ -6760,7 +6777,12 @@  (define_insn_and_split "cbranchpsi4_insn"
          (if_then_else (match_op_dup 0
                          [(reg:CC REG_CC) (const_int 0)])
                        (label_ref (match_dup 3))
-                       (pc)))])
+                       (pc)))]
+   {
+     // Unsigned >= 256^n and < 256^n can be performed by testing the
+     // higher bytes against 0 (*cmppsi_lsr).
+     avr_maybe_cmp_lsr (operands);
+   })
 
 ;; "cbranchhi4_insn"
 ;; "cbranchhq4_insn"  "cbranchuhq4_insn"  "cbranchha4_insn"  "cbranchuha4_insn"
@@ -6786,21 +6808,11 @@  (define_insn_and_split "cbranch<mode>4_insn"
                        (pc)))]
    {
      // Unsigned >= 256 and < 256 can be performed by testing the
-     // high byte against 0.
-     if ((GET_CODE (operands[0]) == LTU
-          || GET_CODE (operands[0]) == GEU)
-         && const_operand (operands[2], <MODE>mode)
-         && INTVAL (avr_to_int_mode (operands[2])) == 256)
-       {
-         rtx_code code = GET_CODE (operands[0]) == GEU ? NE : EQ;
-         rtx hi8 = simplify_gen_subreg (QImode, operands[1], <MODE>mode, 1);
-         rtx cmp = gen_rtx_fmt_ee (code, VOIDmode, cc_reg_rtx, const0_rtx);
-         emit (gen_cmpqi3 (hi8, const0_rtx));
-         emit (gen_branch (operands[3], cmp));
-         DONE;
-       }
+     // high byte against 0 (*cmphi_lsr).
+     avr_maybe_cmp_lsr (operands);
    })
 
+
 ;; Combiner pattern to compare sign- or zero-extended register against
 ;; a wider register, like comparing uint8_t against uint16_t.
 (define_insn_and_split "*cbranch<HISI:mode>.<code><QIPSI:mode>.0"
diff --git a/gcc/testsuite/gcc.target/avr/torture/cmp-lsr-u16.c b/gcc/testsuite/gcc.target/avr/torture/cmp-lsr-u16.c
new file mode 100644
index 00000000000..268164e22b1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/torture/cmp-lsr-u16.c
@@ -0,0 +1,73 @@ 
+/* Test comparisons against constants that are a multiple of 256.  */
+/* { dg-do run } */
+/* { dg-additional-options { -std=c99 } } */
+
+#define T u16
+
+#ifdef __OPTIMIZE__
+
+typedef __UINT64_TYPE__ u64;
+typedef __UINT32_TYPE__ u32;
+typedef __uint24 u24;
+typedef __UINT16_TYPE__ u16;
+typedef __UINT8_TYPE__ u8;
+
+typedef __INT8_TYPE__ i8;
+
+u8 volatile cc;
+
+#define NI __attribute__((noipa))
+#define AI static __inline__ __attribute__((always_inline))
+
+#define MK_FUN(id, val)					\
+NI void fun_geu_##id (T x)				\
+{							\
+  if (x >= val)						\
+    cc = 0;						\
+}							\
+							\
+NI T fun_ltu_##id (T x)					\
+{							\
+  if (x < val)						\
+    cc = 0;						\
+  return x;						\
+}							\
+							\
+NI void test_##id (void)				\
+{							\
+  for (i8 v = -2; v <= 2; ++v)				\
+    {							\
+      const u8 lt0 = !! (v & 0x80);			\
+      const T x = val + (T) v;				\
+							\
+      cc = 1;						\
+      fun_geu_##id (x);					\
+      if (cc != lt0)					\
+	__builtin_exit (__LINE__);			\
+							\
+      cc = 1;						\
+      T y = fun_ltu_##id (x);				\
+      if (y != x)					\
+	__builtin_exit (__LINE__);			\
+      if (cc != ! lt0)					\
+	__builtin_exit (__LINE__);			\
+    }							\
+}
+
+MK_FUN (01, 0x100)
+MK_FUN (02, 0x1200)
+MK_FUN (03, 0x8000)
+MK_FUN (04, 0xff00)
+
+#endif /* OPTIMIZE */
+
+int main (void)
+{
+#ifdef __OPTIMIZE__
+  test_01 ();
+  test_02 ();
+  test_03 ();
+#endif /* OPTIMIZE */
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/avr/torture/cmp-lsr-u24.c b/gcc/testsuite/gcc.target/avr/torture/cmp-lsr-u24.c
new file mode 100644
index 00000000000..d68433b08e5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/torture/cmp-lsr-u24.c
@@ -0,0 +1,76 @@ 
+/* Test comparisons against constants that are a multiple of 256.  */
+/* { dg-do run } */
+/* { dg-additional-options { -std=c99 } } */
+
+#define T u24
+
+#ifdef __OPTIMIZE__
+
+typedef __UINT64_TYPE__ u64;
+typedef __UINT32_TYPE__ u32;
+typedef __uint24 u24;
+typedef __UINT16_TYPE__ u16;
+typedef __UINT8_TYPE__ u8;
+
+typedef __INT8_TYPE__ i8;
+
+u8 volatile cc;
+
+#define NI __attribute__((noipa))
+#define AI static __inline__ __attribute__((always_inline))
+
+#define MK_FUN(id, val)					\
+NI void fun_geu_##id (T x)				\
+{							\
+  if (x >= val)						\
+    cc = 0;						\
+}							\
+							\
+NI T fun_ltu_##id (T x)					\
+{							\
+  if (x < val)						\
+    cc = 0;						\
+  return x;						\
+}							\
+							\
+NI void test_##id (void)				\
+{							\
+  for (i8 v = -2; v <= 2; ++v)				\
+    {							\
+      const u8 lt0 = !! (v & 0x80);			\
+      const T x = val + (T) v;				\
+							\
+      cc = 1;						\
+      fun_geu_##id (x);					\
+      if (cc != lt0)					\
+	__builtin_exit (__LINE__);			\
+							\
+      cc = 1;						\
+      T y = fun_ltu_##id (x);				\
+      if (y != x)					\
+	__builtin_exit (__LINE__);			\
+      if (cc != ! lt0)					\
+	__builtin_exit (__LINE__);			\
+    }							\
+}
+
+MK_FUN (01, 0x100)
+MK_FUN (02, 0x1200)
+MK_FUN (03, 0x8000)
+MK_FUN (04, 0x10000)
+MK_FUN (05, 0x110000)
+
+#endif /* OPTIMIZE */
+
+int main (void)
+{
+#ifdef __OPTIMIZE__
+  test_01 ();
+  test_02 ();
+  test_03 ();
+  test_04 ();
+  test_05 ();
+#endif /* OPTIMIZE */
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/avr/torture/cmp-lsr-u32.c b/gcc/testsuite/gcc.target/avr/torture/cmp-lsr-u32.c
new file mode 100644
index 00000000000..886866224af
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/torture/cmp-lsr-u32.c
@@ -0,0 +1,78 @@ 
+/* Test comparisons against constants that are a multiple of 256.  */
+/* { dg-do run } */
+/* { dg-additional-options { -std=c99 } } */
+
+#define T u32
+
+#ifdef __OPTIMIZE__
+
+typedef __UINT64_TYPE__ u64;
+typedef __UINT32_TYPE__ u32;
+typedef __uint24 u24;
+typedef __UINT16_TYPE__ u16;
+typedef __UINT8_TYPE__ u8;
+
+typedef __INT8_TYPE__ i8;
+
+u8 volatile cc;
+
+#define NI __attribute__((noipa))
+#define AI static __inline__ __attribute__((always_inline))
+
+#define MK_FUN(id, val)					\
+NI void fun_geu_##id (T x)				\
+{							\
+  if (x >= val)						\
+    cc = 0;						\
+}							\
+							\
+NI T fun_ltu_##id (T x)					\
+{							\
+  if (x < val)						\
+    cc = 0;						\
+  return x;						\
+}							\
+							\
+NI void test_##id (void)				\
+{							\
+  for (i8 v = -2; v <= 2; ++v)				\
+    {							\
+      const u8 lt0 = !! (v & 0x80);			\
+      const T x = val + (T) v;				\
+							\
+      cc = 1;						\
+      fun_geu_##id (x);					\
+      if (cc != lt0)					\
+	__builtin_exit (__LINE__);			\
+							\
+      cc = 1;						\
+      T y = fun_ltu_##id (x);				\
+      if (y != x)					\
+	__builtin_exit (__LINE__);			\
+      if (cc != ! lt0)					\
+	__builtin_exit (__LINE__);			\
+    }							\
+}
+
+MK_FUN (01, 0x100)
+MK_FUN (02, 0x1200)
+MK_FUN (03, 0x8000)
+MK_FUN (04, 0x10000)
+MK_FUN (05, 0x110000)
+MK_FUN (06, 0x1000000)
+
+#endif /* OPTIMIZE */
+
+int main (void)
+{
+#ifdef __OPTIMIZE__
+  test_01 ();
+  test_02 ();
+  test_03 ();
+  test_04 ();
+  test_05 ();
+  test_06 ();
+#endif /* OPTIMIZE */
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/avr/torture/cmp-lsr-u64.c b/gcc/testsuite/gcc.target/avr/torture/cmp-lsr-u64.c
new file mode 100644
index 00000000000..928c442ebc6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/torture/cmp-lsr-u64.c
@@ -0,0 +1,84 @@ 
+/* Test comparisons against constants that are a multiple of 256.  */
+/* { dg-do run } */
+/* { dg-additional-options { -std=c99 } } */
+
+#define T u64
+
+#ifdef __OPTIMIZE__
+
+typedef __UINT64_TYPE__ u64;
+typedef __UINT32_TYPE__ u32;
+typedef __uint24 u24;
+typedef __UINT16_TYPE__ u16;
+typedef __UINT8_TYPE__ u8;
+
+typedef __INT8_TYPE__ i8;
+
+u8 volatile cc;
+
+#define NI __attribute__((noipa))
+#define AI static __inline__ __attribute__((always_inline))
+
+#define MK_FUN(id, val)					\
+NI void fun_geu_##id (T x)				\
+{							\
+  if (x >= val)						\
+    cc = 0;						\
+}							\
+							\
+NI T fun_ltu_##id (T x)					\
+{							\
+  if (x < val)						\
+    cc = 0;						\
+  return x;						\
+}							\
+							\
+NI void test_##id (void)				\
+{							\
+  for (i8 v = -2; v <= 2; ++v)				\
+    {							\
+      const u8 lt0 = !! (v & 0x80);			\
+      const T x = val + (T) v;				\
+							\
+      cc = 1;						\
+      fun_geu_##id (x);					\
+      if (cc != lt0)					\
+	__builtin_exit (__LINE__);			\
+							\
+      cc = 1;						\
+      T y = fun_ltu_##id (x);				\
+      if (y != x)					\
+	__builtin_exit (__LINE__);			\
+      if (cc != ! lt0)					\
+	__builtin_exit (__LINE__);			\
+    }							\
+}
+
+MK_FUN (01, 0x100)
+MK_FUN (02, 0x1200)
+MK_FUN (03, 0x8000)
+MK_FUN (04, 0x10000)
+MK_FUN (05, 0x110000)
+MK_FUN (06, 0x1000000)
+MK_FUN (07, 0x8080000000000000)
+MK_FUN (08, 0x0100000000000000)
+MK_FUN (09, 0x0001000000000000)
+
+#endif /* OPTIMIZE */
+
+int main (void)
+{
+#ifdef __OPTIMIZE__
+  test_01 ();
+  test_02 ();
+  test_03 ();
+  test_04 ();
+  test_05 ();
+  test_06 ();
+  test_07 ();
+  test_08 ();
+  test_09 ();
+#endif /* OPTIMIZE */
+
+  return 0;
+}