diff mbox

[mid-end] Support combining of LSHIFTRT + LSHIFTRT operations

Message ID VI1PR0801MB2031F2B12BBAFCD03F946489FF170@VI1PR0801MB2031.eurprd08.prod.outlook.com
State New
Headers show

Commit Message

Tamar Christina May 2, 2017, 2:34 p.m. UTC
Hi all,

r217118 added an optimization to combine ashiftrt and lshiftrt.
This same optimization can at the very least also apply to lshiftrt + lshiftrt
with the same constraints. i.e. that both operations are done for scalar modes,
that second operation operates on a subreg of the first one and that the shift
amount of the first operation is larger than the mode bitsize of the subreg.

This reduces

	umull	x1, w0, w1
	lsr	x1, x1, 32
	lsr	w1, w1, 5

to

	umull	x1, w0, w1
	lsr	x1, x1, 37


Bootstrapped on aarch64-none-linux-gnu and x86_64-linux
and reg-tested on aarch64-none-linux-gnu with no regressions.

OK for trunk?

Thanks,
Tamar


gcc/
2017-04-27  Tamar Christina  <tamar.christina@arm.com>

	* simplify-rtx.c (simplify_binary_operation_1): Add LSHIFTRT case.

gcc/testsuite/
2017-04-27  Tamar Christina  <tamar.christina@arm.com>

	* gcc.dg/lsr-div1.c: New testcase.

Comments

Jeff Law May 2, 2017, 3:07 p.m. UTC | #1
On 05/02/2017 08:34 AM, Tamar Christina wrote:
> Hi all,
> 
> r217118 added an optimization to combine ashiftrt and lshiftrt.
> This same optimization can at the very least also apply to lshiftrt + lshiftrt
> with the same constraints. i.e. that both operations are done for scalar modes,
> that second operation operates on a subreg of the first one and that the shift
> amount of the first operation is larger than the mode bitsize of the subreg.
> 
> This reduces
> 
> 	umull	x1, w0, w1
> 	lsr	x1, x1, 32
> 	lsr	w1, w1, 5
> 
> to
> 
> 	umull	x1, w0, w1
> 	lsr	x1, x1, 37
> 
> 
> Bootstrapped on aarch64-none-linux-gnu and x86_64-linux
> and reg-tested on aarch64-none-linux-gnu with no regressions.
> 
> OK for trunk?
> 
> Thanks,
> Tamar
> 
> 
> gcc/
> 2017-04-27  Tamar Christina  <tamar.christina@arm.com>
> 
> 	* simplify-rtx.c (simplify_binary_operation_1): Add LSHIFTRT case.
> 
> gcc/testsuite/
> 2017-04-27  Tamar Christina  <tamar.christina@arm.com>
> 
> 	* gcc.dg/lsr-div1.c: New testcase.
OK for the trunk.

Thanks,
Jeff
diff mbox

Patch

diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 640ccb7cb95933a6991bf1599099f7aed455daec..feaceff06d6267b372f40fcd263e2ae67bbd4c74 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -3343,19 +3343,21 @@  simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
 	  && UINTVAL (trueop0) == GET_MODE_MASK (mode)
 	  && ! side_effects_p (op1))
 	return op0;
+
+    canonicalize_shift:
       /* Given:
 	 scalar modes M1, M2
 	 scalar constants c1, c2
 	 size (M2) > size (M1)
 	 c1 == size (M2) - size (M1)
 	 optimize:
-	 (ashiftrt:M1 (subreg:M1 (lshiftrt:M2 (reg:M2) (const_int <c1>))
+	 ([a|l]shiftrt:M1 (subreg:M1 (lshiftrt:M2 (reg:M2) (const_int <c1>))
 				 <low_part>)
 		      (const_int <c2>))
 	 to:
-	 (subreg:M1 (ashiftrt:M2 (reg:M2) (const_int <c1 + c2>))
+	 (subreg:M1 ([a|l]shiftrt:M2 (reg:M2) (const_int <c1 + c2>))
 		    <low_part>).  */
-      if (code == ASHIFTRT
+      if ((code == ASHIFTRT || code == LSHIFTRT)
 	  && !VECTOR_MODE_P (mode)
 	  && SUBREG_P (op0)
 	  && CONST_INT_P (op1)
@@ -3372,13 +3374,13 @@  simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
 	  rtx tmp = GEN_INT (INTVAL (XEXP (SUBREG_REG (op0), 1))
 			     + INTVAL (op1));
 	  machine_mode inner_mode = GET_MODE (SUBREG_REG (op0));
-	  tmp = simplify_gen_binary (ASHIFTRT,
+	  tmp = simplify_gen_binary (code,
 				     GET_MODE (SUBREG_REG (op0)),
 				     XEXP (SUBREG_REG (op0), 0),
 				     tmp);
 	  return lowpart_subreg (mode, tmp, inner_mode);
 	}
-    canonicalize_shift:
+
       if (SHIFT_COUNT_TRUNCATED && CONST_INT_P (op1))
 	{
 	  val = INTVAL (op1) & (GET_MODE_PRECISION (mode) - 1);
diff --git a/gcc/testsuite/gcc.dg/lsr-div1.c b/gcc/testsuite/gcc.dg/lsr-div1.c
new file mode 100644
index 0000000000000000000000000000000000000000..962054d34d953b63c9736134b9ad147791a491d3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/lsr-div1.c
@@ -0,0 +1,57 @@ 
+/* Test division by const int generates only one shift.  */
+/* { dg-do run } */
+/* { dg-options "-O2 -fdump-rtl-combine-all" } */
+/* { dg-options "-O2 -fdump-rtl-combine-all -mtune=cortex-a53" { target aarch64*-*-* } } */
+/* { dg-require-effective-target int32plus } */
+
+extern void abort (void);
+
+#define NOINLINE __attribute__((noinline))
+
+static NOINLINE int
+f1 (unsigned int n)
+{
+  return n % 0x33;
+}
+
+static NOINLINE int
+f2 (unsigned int n)
+{
+  return n % 0x12;
+}
+
+int
+main ()
+{
+  int a = 0xaaaaaaaa;
+  int b = 0x55555555;
+  int c;
+  c = f1 (a);
+  if (c != 0x11)
+    abort ();
+  c = f1 (b);
+  if (c != 0x22)
+    abort ();
+  c = f2 (a);
+  if (c != 0xE)
+    abort ();
+  c = f2 (b);
+  if (c != 0x7)
+    abort ();
+  return 0;
+}
+
+/* Following replacement pattern of intger division by constant, GCC is expected
+   to generate UMULL and (x)SHIFTRT.  This test checks that considering division
+   by const 0x33, gcc generates a single LSHIFTRT by 37, instead of
+   two - LSHIFTRT by 32 and LSHIFTRT by 5.  */
+
+/* { dg-final { scan-rtl-dump "\\(set \\(subreg:DI \\(reg:SI" "combine" { target aarch64*-*-* } } } */
+/* { dg-final { scan-rtl-dump "\\(lshiftrt:DI \\(reg:DI" "combine" { target aarch64*-*-* } } } */
+/* { dg-final { scan-rtl-dump "\\(const_int 37 " "combine" { target aarch64*-*-* } } } */
+
+/* Similarly, considering division by const 0x12, gcc generates a
+   single LSHIFTRT by 34, instead of two - LSHIFTRT by 32 and LSHIFTRT by 2.  */
+
+/* { dg-final { scan-rtl-dump "\\(const_int 34 " "combine" { target aarch64*-*-* } } } */
+