diff mbox series

[avr] ad PR117726: Improve logic 8-bit shifts with an offset of 6

Message ID 2fc5d665-d99c-44aa-a20a-b0141fbc1e2e@gjlay.de
State New
Headers show
Series [avr] ad PR117726: Improve logic 8-bit shifts with an offset of 6 | expand

Commit Message

Georg-Johann Lay Dec. 2, 2024, 11:29 a.m. UTC
Logic 8-bit shifts with an offset of 6 can be improved by
supporting them as 3-operand operations.

Ok for trunk?

Johann

--

AVR: Tweak uin8_t << 6 and uint8_t >> 6 shifts.

Logic 8-bit shifts with an offset of 6 can be improved by
supporting them as 3-operand operations.

	PR target/117726
gcc/
	* config/avr/avr-passes.cc (avr_emit_shift): All 8-bit shifts with
	an offset of 6 have 3-operand alternatives.
	* config/avr/avr.cc (ashlqi3_out, lshrqi3_out) [case 6]:
	Implement as 3-operand insn.
	(avr_rtx_costs_1) [QImode, ASHIFT + LSHIFTRT]: Adjust
	costs for offset of 6.
	* config/avr/avr.md (*ashlqi3_split, *ashlqi3)
	(*lshrqi3_split, *lshrqi3): Add "r,r,C06" alternative.

Comments

Denis Chertykov Dec. 2, 2024, 9:53 p.m. UTC | #1
пн, 2 дек. 2024 г. в 15:29, Georg-Johann Lay <avr@gjlay.de>:
>
> Logic 8-bit shifts with an offset of 6 can be improved by
> supporting them as 3-operand operations.
>
> Ok for trunk?
>

Ok. Please apply.

Denis.
diff mbox series

Patch

diff --git a/gcc/config/avr/avr-passes.cc b/gcc/config/avr/avr-passes.cc
index 7be5ec25fbc..dc98780ef27 100644
--- a/gcc/config/avr/avr-passes.cc
+++ b/gcc/config/avr/avr-passes.cc
@@ -4899,6 +4899,8 @@  avr_emit_shift (rtx_code code, rtx dest, rtx src, int off, rtx scratch)
   // Work out which alternatives can handle 3 operands independent
   // of options.
 
+  const bool b8_is_3op = off == 6;
+
   const bool b16_is_3op = select<bool>()
     : code == ASHIFT ? satisfies_constraint_C7c (xoff) // 7...12
     : code == LSHIFTRT ? satisfies_constraint_C7c (xoff)
@@ -4914,6 +4916,7 @@  avr_emit_shift (rtx_code code, rtx dest, rtx src, int off, rtx scratch)
   const bool is_3op = (off % 8 == 0
 		       || off == n_bits - 1
 		       || (code == ASHIFTRT && off == n_bits - 2)
+		       || (n_bits == 8 && b8_is_3op)
 		       || (n_bits == 16 && b16_is_3op)
 		       || (n_bits == 24 && b24_is_3op));
   rtx shift;
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 32028df30a5..ccf9b05bb3e 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -6780,6 +6780,8 @@  ashlqi3_out (rtx_insn *insn, rtx operands[], int *plen)
 {
   if (CONST_INT_P (operands[2]))
     {
+      int reg0 = REGNO (operands[0]);
+      int reg1 = REGNO (operands[1]);
       bool ldreg_p = test_hard_reg_class (LD_REGS, operands[0]);
       int offs = INTVAL (operands[2]);
 
@@ -6787,7 +6789,7 @@  ashlqi3_out (rtx_insn *insn, rtx operands[], int *plen)
 	*plen = 0;
 
       if (offs <= 3
-	  || (offs <= 6 && ! ldreg_p))
+	  || (offs <= 5 && ! ldreg_p))
 	{
 	  for (int i = 0; i < offs; ++i)
 	    avr_asm_len ("lsl %0", operands, plen, 1);
@@ -6814,10 +6816,28 @@  ashlqi3_out (rtx_insn *insn, rtx operands[], int *plen)
 			      "lsl %0"  CR_TAB
 			      "andi %0,0xe0", operands, plen, 3);
 	case 6:
-	  return avr_asm_len ("swap %0" CR_TAB
-			      "lsl %0"  CR_TAB
-			      "lsl %0"  CR_TAB
-			      "andi %0,0xc0", operands, plen, 4);
+	  if (ldreg_p && reg0 == reg1)
+	    return avr_asm_len ("swap %0" CR_TAB
+				"lsl %0"  CR_TAB
+				"lsl %0"  CR_TAB
+				"andi %0,0xc0", operands, plen, 4);
+	  if (ldreg_p && reg0 != reg1 && AVR_HAVE_MUL)
+	    return avr_asm_len ("ldi %0,1<<6" CR_TAB
+				"mul %0,%1"   CR_TAB
+				"mov %0,r0"   CR_TAB
+				"clr __zero_reg__", operands, plen, 4);
+	  return reg0 != reg1
+	    ? avr_asm_len ("clr %0"    CR_TAB
+			   "bst %1,0"  CR_TAB
+			   "bld %0,6"  CR_TAB
+			   "bst %1,1"  CR_TAB
+			   "bld %0,7", operands, plen, 5)
+	    : avr_asm_len ("lsl %0"  CR_TAB
+			   "lsl %0"  CR_TAB
+			   "lsl %0"  CR_TAB
+			   "lsl %0"  CR_TAB
+			   "lsl %0"  CR_TAB
+			   "lsl %0", operands, plen, 6);
 	case 7:
 	  return avr_asm_len ("bst %1,0" CR_TAB
 			      "clr %0"   CR_TAB
@@ -7663,6 +7683,8 @@  lshrqi3_out (rtx_insn *insn, rtx operands[], int *plen)
 {
   if (CONST_INT_P (operands[2]))
     {
+      int reg0 = REGNO (operands[0]);
+      int reg1 = REGNO (operands[1]);
       bool ldreg_p = test_hard_reg_class (LD_REGS, operands[0]);
       int offs = INTVAL (operands[2]);
 
@@ -7670,7 +7692,7 @@  lshrqi3_out (rtx_insn *insn, rtx operands[], int *plen)
 	*plen = 0;
 
       if (offs <= 3
-	  || (offs <= 6 && ! ldreg_p))
+	  || (offs <= 5 && ! ldreg_p))
 	{
 	  for (int i = 0; i < offs; ++i)
 	    avr_asm_len ("lsr %0", operands, plen, 1);
@@ -7697,10 +7719,28 @@  lshrqi3_out (rtx_insn *insn, rtx operands[], int *plen)
 			      "lsr %0"  CR_TAB
 			      "andi %0,0x7", operands, plen, 3);
 	case 6:
-	  return avr_asm_len ("swap %0" CR_TAB
-			      "lsr %0"  CR_TAB
-			      "lsr %0"  CR_TAB
-			      "andi %0,0x3", operands, plen, 4);
+	  if (ldreg_p && reg0 == reg1)
+	    return avr_asm_len ("swap %0" CR_TAB
+				"lsr %0"  CR_TAB
+				"lsr %0"  CR_TAB
+				"andi %0,0x3", operands, plen, 4);
+	  if (ldreg_p && reg0 != reg1 && AVR_HAVE_MUL)
+	    return avr_asm_len ("ldi %0,1<<2" CR_TAB
+				"mul %0,%1"   CR_TAB
+				"mov %0,r1"   CR_TAB
+				"clr __zero_reg__", operands, plen, 4);
+	  return reg0 != reg1
+	    ? avr_asm_len ("clr %0"    CR_TAB
+			   "bst %1,6"  CR_TAB
+			   "bld %0,0"  CR_TAB
+			   "bst %1,7"  CR_TAB
+			   "bld %0,1", operands, plen, 5)
+	    : avr_asm_len ("lsr %0"  CR_TAB
+			   "lsr %0"  CR_TAB
+			   "lsr %0"  CR_TAB
+			   "lsr %0"  CR_TAB
+			   "lsr %0"  CR_TAB
+			   "lsr %0", operands, plen, 6);
 	case 7:
 	  return avr_asm_len ("bst %1,7" CR_TAB
 			      "clr %0"   CR_TAB
@@ -12528,7 +12568,9 @@  avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 	    {
 	      if (val1 == 7)
 		*total = COSTS_N_INSNS (3);
-	      else if (val1 >= 0 && val1 <= 7)
+	      else if (val1 == 6)
+		*total = COSTS_N_INSNS (5 - AVR_HAVE_MUL);
+	      else if (val1 >= 0 && val1 <= 5)
 		*total = COSTS_N_INSNS (val1);
 	      else
 		*total = COSTS_N_INSNS (1);
@@ -12688,7 +12730,7 @@  avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 		*total = COSTS_N_INSNS (4);
 	      else if (val1 == 7)
 		*total = COSTS_N_INSNS (2);
-	      else if (val1 >= 0 && val1 <= 7)
+	      else if (val1 >= 0 && val1 <= 5)
 		*total = COSTS_N_INSNS (val1);
 	      else
 		*total = COSTS_N_INSNS (1);
@@ -12851,7 +12893,9 @@  avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 	    {
 	      if (val1 == 7)
 		*total = COSTS_N_INSNS (3);
-	      else if (val1 >= 0 && val1 <= 7)
+	      else if (val1 == 6)
+		*total = COSTS_N_INSNS (5 - AVR_HAVE_MUL);
+	      else if (val1 >= 0 && val1 <= 5)
 		*total = COSTS_N_INSNS (val1);
 	      else
 		*total = COSTS_N_INSNS (1);
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index e343fb23d07..0c98318c03d 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -5122,9 +5122,9 @@  (define_split ; ashlqi3_const6
 ;; "*ashlqi3"
 ;; "*ashlqq3"  "*ashluqq3"
 (define_insn_and_split "*ashl<mode>3_split"
-  [(set (match_operand:ALL1 0 "register_operand"              "=r,r,r,r,r  ,!d,r,r")
-        (ashift:ALL1 (match_operand:ALL1 1 "register_operand"  "0,0,0,0,r  ,0 ,0,0")
-                     (match_operand:QI 2 "nop_general_operand" "r,L,P,K,C07,n ,n,Qm")))]
+  [(set (match_operand:ALL1 0 "register_operand"              "=r,r  ,r      ,r,r")
+        (ashift:ALL1 (match_operand:ALL1 1 "register_operand"  "0,0  ,r      ,0,0")
+                     (match_operand:QI 2 "nop_general_operand" "r,LPK,C07 C06,n,Qm")))]
   ""
   "#"
   "&& reload_completed"
@@ -5134,15 +5134,15 @@  (define_insn_and_split "*ashl<mode>3_split"
               (clobber (reg:CC REG_CC))])])
 
 (define_insn "*ashl<mode>3"
-  [(set (match_operand:ALL1 0 "register_operand"              "=r,r,r,r,r  ,!d,r,r")
-        (ashift:ALL1 (match_operand:ALL1 1 "register_operand"  "0,0,0,0,r  ,0 ,0,0")
-                     (match_operand:QI 2 "nop_general_operand" "r,L,P,K,C07,n ,n,Qm")))
+  [(set (match_operand:ALL1 0 "register_operand"              "=r,r  ,r      ,r,r")
+        (ashift:ALL1 (match_operand:ALL1 1 "register_operand"  "0,0  ,r      ,0,0")
+                     (match_operand:QI 2 "nop_general_operand" "r,LPK,C07 C06,n,Qm")))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
     return ashlqi3_out (insn, operands, NULL);
   }
-  [(set_attr "length" "5,0,1,2,3,4,6,9")
+  [(set_attr "length" "9")
    (set_attr "adjust_len" "ashlqi")])
 
 ;; "ashlhi3"
@@ -5747,9 +5747,9 @@  (define_split ; lshrqi3_const6
 ;; "*lshrqq3"
 ;; "*lshruqq3"
 (define_insn_and_split "*lshr<mode>3_split"
-  [(set (match_operand:ALL1 0 "register_operand"                  "=r,r,r,r,r  ,!d,r,r")
-        (lshiftrt:ALL1 (match_operand:ALL1 1 "register_operand"    "0,0,0,0,r  ,0 ,0,0")
-                       (match_operand:QI 2 "nop_general_operand"   "r,L,P,K,C07,n ,n,Qm")))]
+  [(set (match_operand:ALL1 0 "register_operand"                  "=r,r  ,r      ,r,r")
+        (lshiftrt:ALL1 (match_operand:ALL1 1 "register_operand"    "0,0  ,r      ,0,0")
+                       (match_operand:QI 2 "nop_general_operand"   "r,LPK,C07 C06,n,Qm")))]
   ""
   "#"
   "&& reload_completed"
@@ -5759,9 +5759,9 @@  (define_insn_and_split "*lshr<mode>3_split"
               (clobber (reg:CC REG_CC))])])
 
 (define_insn "*lshr<mode>3"
-  [(set (match_operand:ALL1 0 "register_operand"                  "=r,r,r,r,r  ,!d,r,r")
-        (lshiftrt:ALL1 (match_operand:ALL1 1 "register_operand"    "0,0,0,0,r  ,0 ,0,0")
-                       (match_operand:QI 2 "nop_general_operand"   "r,L,P,K,C07,n ,n,Qm")))
+  [(set (match_operand:ALL1 0 "register_operand"                  "=r,r  ,r      ,r,r")
+        (lshiftrt:ALL1 (match_operand:ALL1 1 "register_operand"    "0,0  ,r      ,0,0")
+                       (match_operand:QI 2 "nop_general_operand"   "r,LPK,C07 C06,n,Qm")))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {