diff mbox series

[committed] Improve many SImode shifts on the H8/300H.

Message ID 66685fb7-ea8d-e2db-ff11-e44f22168d3b@gmail.com
State New
Headers show
Series [committed] Improve many SImode shifts on the H8/300H. | expand

Commit Message

Jeff Law Aug. 15, 2021, 4:18 a.m. UTC
As I've mentioned before, the H8/300H can only shift a single bit 
position at a time.  Naturally this means many shifts are implemented as 
loops.  There's a variety of special cases that we can do without loops 
by using rotates, sub-word moves, etc.  The general guidance for the 
port has been to only use inline or special sequences if they're shorter 
or just one instruction longer than the loop.

This was pretty reasonable guidance for QI/HI mode.  It was relaxed a 
bit about 10 years ago for HImode in particular where the kpit team 
realized they could save 50-100 cycles for some shifts by allowing 2 
instructions of code growth over the loop implementation.

But they only re-tuned HImode shifts.  There's even bigger benefits for 
re-tuning SImode shifts.  There's cases where we can save close to 200 
cycles by allowing 2 additional instructions.

This patch re-tunes SImode shifts on the H8/300H primarily by inlining 
more often or using a special sequence + inlining for residuals.  Both 
cases were already supported and this just uses those existing 
capabilities more often, so it was trivial to implement.  I think 
there's some cases were entirely new special sequences could be used, 
but I haven't tried those yet.

There'll be a similar follow-up for the H8/S.  The gains aren't as 
spectacular as the H8/S gained shift-by-2 instructions, but they should 
still be significant.



Committed to the trunk after the usual testing and no regressions.

Jeff
commit 882f1d58bfa56737ff2de84c3cd1e0acfc318b86
Author: Jeff Law <jlaw@localhost.localdomain>
Date:   Sun Aug 15 00:13:23 2021 -0400

    Improve many SImode shifts on the H8/300H
    
    As I've mentioned before, the H8/300H can only shift a single bit position at a time.  Naturally this means many shifts are implemented as loops.  There's a variety of special cases that we can do without loops by using rotates, sub-word moves, etc.  The general guidance for the port has been to only use inline or special sequences if they're shorter or just one instruction longer than the loop.
    
    This was pretty reasonable guidance for QI/HI mode.  It was relaxed a bit about 10 years ago for HImode in particular where the kpit team realized they could save 50-100 cycles for some shifts by allowing 2 instructions of code growth over the loop implementation.
    
    But they only re-tuned HImode shifts.  There's even bigger benefits for re-tuning SImode shifts.  There's cases where we can save close to 200 cycles by allowing 2 additional instructions.
    
    This patch re-tunes SImode shifts on the H8/300H primarily by inlining more often or using a special sequence + inlining for residuals.  Both cases were already supported and this just uses those existing capabilities more often, so it was trivial to implement.  I think there's some cases were entirely new special sequences could be used, but I haven't tried those yet.
    
    gcc/
    
            * config/h8300/h8300.c (shift_alg_si): Retune H8/300H shifts
            to allow a bit more code growth, saving many dozens of cycles.
            (h8300_option_override): Adjus shift_alg_si if optimizing for
            code size.
            (get_shift_alg): Use special + inline shifts for residuals
            in more cases.
diff mbox series

Patch

diff --git a/gcc/config/h8300/h8300.c b/gcc/config/h8300/h8300.c
index d2f6548a265..7959ad1e276 100644
--- a/gcc/config/h8300/h8300.c
+++ b/gcc/config/h8300/h8300.c
@@ -228,18 +228,18 @@  static enum shift_alg shift_alg_si[2][3][32] = {
     /*  8    9   10   11   12   13   14   15  */
     /* 16   17   18   19   20   21   22   23  */
     /* 24   25   26   27   28   29   30   31  */
-    { INL, INL, INL, INL, INL, LOP, LOP, LOP,
+    { INL, INL, INL, INL, INL, INL, INL, LOP,
       SPC, LOP, LOP, LOP, LOP, LOP, LOP, SPC,
-      SPC, SPC, SPC, SPC, LOP, LOP, LOP, LOP,
-      SPC, LOP, LOP, LOP, SPC, SPC, SPC, SPC }, /* SHIFT_ASHIFT   */
-    { INL, INL, INL, INL, INL, LOP, LOP, LOP,
+      SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC,
+      SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC }, /* SHIFT_ASHIFT   */
+    { INL, INL, INL, INL, INL, INL, INL, LOP,
       SPC, LOP, LOP, LOP, LOP, LOP, LOP, SPC,
-      SPC, SPC, SPC, SPC, LOP, LOP, LOP, LOP,
-      SPC, LOP, LOP, LOP, SPC, SPC, SPC, SPC }, /* SHIFT_LSHIFTRT */
-    { INL, INL, INL, INL, INL, LOP, LOP, LOP,
+      SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC,
+      SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC }, /* SHIFT_LSHIFTRT */
+    { INL, INL, INL, INL, INL, INL, INL, LOP,
       SPC, LOP, LOP, LOP, LOP, LOP, LOP, LOP,
-      SPC, SPC, SPC, SPC, LOP, LOP, LOP, LOP,
-      SPC, LOP, LOP, LOP, LOP, LOP, LOP, SPC }, /* SHIFT_ASHIFTRT */
+      SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC,
+      SPC, SPC, SPC, SPC, LOP, LOP, LOP, SPC }, /* SHIFT_ASHIFTRT */
   },
   {
     /* TARGET_H8300S  */
@@ -343,6 +343,36 @@  h8300_option_override (void)
       shift_alg_hi[H8_300H][SHIFT_ASHIFTRT][13] = SHIFT_LOOP;
       shift_alg_hi[H8_300H][SHIFT_ASHIFTRT][14] = SHIFT_LOOP;
 
+      shift_alg_si[H8_300H][SHIFT_ASHIFT][5] = SHIFT_LOOP;
+      shift_alg_si[H8_300H][SHIFT_ASHIFT][6] = SHIFT_LOOP;
+      shift_alg_si[H8_300H][SHIFT_ASHIFT][20] = SHIFT_LOOP;
+      shift_alg_si[H8_300H][SHIFT_ASHIFT][21] = SHIFT_LOOP;
+      shift_alg_si[H8_300H][SHIFT_ASHIFT][22] = SHIFT_LOOP;
+      shift_alg_si[H8_300H][SHIFT_ASHIFT][23] = SHIFT_LOOP;
+      shift_alg_si[H8_300H][SHIFT_ASHIFT][25] = SHIFT_LOOP;
+      shift_alg_si[H8_300H][SHIFT_ASHIFT][26] = SHIFT_LOOP;
+      shift_alg_si[H8_300H][SHIFT_ASHIFT][27] = SHIFT_LOOP;
+
+      shift_alg_si[H8_300H][SHIFT_LSHIFTRT][5] = SHIFT_LOOP;
+      shift_alg_si[H8_300H][SHIFT_LSHIFTRT][6] = SHIFT_LOOP;
+      shift_alg_si[H8_300H][SHIFT_LSHIFTRT][20] = SHIFT_LOOP;
+      shift_alg_si[H8_300H][SHIFT_LSHIFTRT][21] = SHIFT_LOOP;
+      shift_alg_si[H8_300H][SHIFT_LSHIFTRT][22] = SHIFT_LOOP;
+      shift_alg_si[H8_300H][SHIFT_LSHIFTRT][23] = SHIFT_LOOP;
+      shift_alg_si[H8_300H][SHIFT_LSHIFTRT][25] = SHIFT_LOOP;
+      shift_alg_si[H8_300H][SHIFT_LSHIFTRT][26] = SHIFT_LOOP;
+      shift_alg_si[H8_300H][SHIFT_LSHIFTRT][27] = SHIFT_LOOP;
+
+      shift_alg_si[H8_300H][SHIFT_ASHIFTRT][5] = SHIFT_LOOP;
+      shift_alg_si[H8_300H][SHIFT_ASHIFTRT][6] = SHIFT_LOOP;
+      shift_alg_si[H8_300H][SHIFT_ASHIFTRT][20] = SHIFT_LOOP;
+      shift_alg_si[H8_300H][SHIFT_ASHIFTRT][21] = SHIFT_LOOP;
+      shift_alg_si[H8_300H][SHIFT_ASHIFTRT][22] = SHIFT_LOOP;
+      shift_alg_si[H8_300H][SHIFT_ASHIFTRT][23] = SHIFT_LOOP;
+      shift_alg_si[H8_300H][SHIFT_ASHIFTRT][25] = SHIFT_LOOP;
+      shift_alg_si[H8_300H][SHIFT_ASHIFTRT][26] = SHIFT_LOOP;
+      shift_alg_si[H8_300H][SHIFT_ASHIFTRT][27] = SHIFT_LOOP;
+
       /* H8S */
       shift_alg_hi[H8_S][SHIFT_ASHIFTRT][14] = SHIFT_LOOP;
     }
@@ -3784,7 +3814,7 @@  get_shift_alg (enum shift_type shift_type, enum shift_mode shift_mode,
 	      gcc_unreachable ();
 	    }
 	}
-      else if ((TARGET_H8300H && count >= 16 && count <= 19)
+      else if ((TARGET_H8300H && count >= 16 && count <= 23)
 	       || (TARGET_H8300S && count >= 16 && count <= 21))
 	{
 	  info->remainder = count - 16;
@@ -3804,7 +3834,7 @@  get_shift_alg (enum shift_type shift_type, enum shift_mode shift_mode,
 	      goto end;
 	    }
 	}
-      else if ((TARGET_H8300H && count == 24)
+      else if ((TARGET_H8300H && count >= 24 || count <= 27)
 	       || (TARGET_H8300S && count >= 24 && count <= 25))
 	{
 	  info->remainder = count - 24;