diff mbox

S/390: Add splitter for "and" with complement.

Message ID 20160427075844.GD5082@linux.vnet.ibm.com
State New
Headers show

Commit Message

Dominik Vogt April 27, 2016, 7:58 a.m. UTC
The attached patch provides some improved patterns for "and with
complement" to the s390 machine description.  Bootstrapped and
regression tested on s390 and s390x.

Ciao

Dominik ^_^  ^_^

Comments

Dominik Vogt April 27, 2016, 8:56 a.m. UTC | #1
On Wed, Apr 27, 2016 at 08:58:44AM +0100, Dominik Vogt wrote:
> The attached patch provides some improved patterns for "and with
> complement" to the s390 machine description.  Bootstrapped and
> regression tested on s390 and s390x.

(This patch needs some careful proof reading.  I've made so many
versions of the patch that I may be overlooking something
obvious.)

Ciao

Dominik ^_^  ^_^
Andreas Krebbel July 19, 2016, 11:05 a.m. UTC | #2
On 07/19/2016 11:37 AM, Dominik Vogt wrote:
>  ;
> +; And with complement
> +;
> +; c = ~b & a = (b & a) ^ a
> +
> +(define_insn_and_split "*andc_split"

Please append <mode> here to make the insn name unique.

> +  [(set (match_operand:GPR 0 "nonimmediate_operand" "")
> +	(and:GPR (not:GPR (match_operand:GPR 1 "nonimmediate_operand" ""))
> +		 (match_operand:GPR 2 "general_operand" "")))
> +   (clobber (reg:CC CC_REGNUM))]
> +  "TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
> +  "#"
> +  "&& 1"
> +  [
> +  (parallel
> +   [(set (match_dup 3) (and:GPR (match_dup 1) (match_dup 2)))
> +   (clobber (reg:CC CC_REGNUM))])
> +  (parallel
> +   [(set (match_dup 0) (xor:GPR (match_dup 3) (match_dup 2)))
> +   (clobber (reg:CC CC_REGNUM))])]
> +{
> +  if (reg_overlap_mentioned_p (operands[0], operands[2]))
> +    {
> +      gcc_assert (can_create_pseudo_p ());

Is it really safe to assume we will never get here after reload? I don't see where this is
prevented. Btw. the very same assertion is in gen_reg_rtx anyway so no need to duplicate it.

> +      operands[3] = gen_reg_rtx (<MODE>mode);
> +    }
> +  else
> +    operands[3] = operands[0];
> +})
> +
> +; Convert "(xor (operand) (-1))" to "(not (operand))" for low optimization
> +; levels so that "*andc_split" matches.
> +(define_insn_and_split "*andc_split2"

<mode> missing

> +  [(set (match_operand:GPR 0 "nonimmediate_operand" "")
> +        (and:GPR (xor:GPR (match_operand:GPR 1 "nonimmediate_operand" "")
> +			  (const_int -1))
> +		 (match_operand:GPR 2 "general_operand" "")))
> +    (clobber (reg:CC CC_REGNUM))]
> +  "TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
> +  "#"
> +  "&& 1"
> +  [(parallel
> +    [(set (match_dup 0) (and:GPR (not:GPR (match_dup 1)) (match_dup 2)))
> +    (clobber (reg:CC CC_REGNUM))])]
> +)
> +
> +;
>  ; Block and (NC) patterns.
>  ;
>

Looks like these testcase could be merged by putting the lp64 conditions at the scan-assembler
directives.

> diff --git a/gcc/testsuite/gcc.target/s390/md/andc-splitter-1.c
b/gcc/testsuite/gcc.target/s390/md/andc-splitter-1.c
> new file mode 100644
> index 0000000..ed78921
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/md/andc-splitter-1.c
> @@ -0,0 +1,61 @@
> +/* Machine description pattern tests.  */
> +
> +/* { dg-do run { target { lp64 } } } */
> +/* { dg-options "-mzarch -save-temps -dP" } */
> +/* Skip test if -O0 is present on the command line:
> +
> +    { dg-skip-if "" { *-*-* } { "-O0" } { "" } }
> +
> +   Skip test if the -O option is missing from the command line
> +    { dg-skip-if "" { *-*-* } { "*" } { "-O*" } }
> +*/
> +
> +__attribute__ ((noinline))
> +unsigned long andc_vv(unsigned long a, unsigned long b)
> +{ return ~b & a; }
> +/* { dg-final { scan-assembler ":15 .\* \{\\*anddi3\}" } } */
> +/* { dg-final { scan-assembler ":15 .\* \{\\*xordi3\}" } } */
> +
> +__attribute__ ((noinline))
> +unsigned long andc_pv(unsigned long *a, unsigned long b)
> +{ return ~b & *a; }
> +/* { dg-final { scan-assembler ":21 .\* \{\\*anddi3\}" } } */
> +/* { dg-final { scan-assembler ":21 .\* \{\\*xordi3\}" } } */
> +
> +__attribute__ ((noinline))
> +unsigned long andc_vp(unsigned long a, unsigned long *b)
> +{ return ~*b & a; }
> +/* { dg-final { scan-assembler ":27 .\* \{\\*anddi3\}" } } */
> +/* { dg-final { scan-assembler ":27 .\* \{\\*xordi3\}" } } */
> +
> +__attribute__ ((noinline))
> +unsigned long andc_pp(unsigned long *a, unsigned long *b)
> +{ return ~*b & *a; }
> +/* { dg-final { scan-assembler ":33 .\* \{\\*anddi3\}" } } */
> +/* { dg-final { scan-assembler ":33 .\* \{\\*xordi3\}" } } */
> +
> +/* { dg-final { scan-assembler-times "\tngr\?k\?\t" 4 } } */
> +/* { dg-final { scan-assembler-times "\txgr\?\t" 4 } } */
> +
> +int
> +main (void)
> +{
> +  unsigned long a = 0xc00000000000000cllu;
> +  unsigned long b = 0x500000000000000allu;
> +  unsigned long e = 0x8000000000000004llu;
> +  unsigned long c;
> +
> +  c = andc_vv (a, b);
> +  if (c != e)
> +    __builtin_abort ();
> +  c = andc_pv (&a, b);
> +  if (c != e)
> +    __builtin_abort ();
> +  c = andc_vp (a, &b);
> +  if (c != e)
> +    __builtin_abort ();
> +  c = andc_pp (&a, &b);
> +  if (c != e)
> +    __builtin_abort ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/s390/md/andc-splitter-2.c
b/gcc/testsuite/gcc.target/s390/md/andc-splitter-2.c
> new file mode 100644
> index 0000000..9e78335
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/md/andc-splitter-2.c
> @@ -0,0 +1,38 @@
> +/* Machine description pattern tests.  */
> +
> +/* { dg-do compile } */
> +/* { dg-options "-mzarch -save-temps -dP" } */
> +/* Skip test if -O0 is present on the command line:
> +
> +    { dg-skip-if "" { *-*-* } { "-O0" } { "" } }
> +
> +   Skip test if the -O option is missing from the command line
> +    { dg-skip-if "" { *-*-* } { "*" } { "-O*" } }
> +*/
> +
> +__attribute__ ((noinline))
> +unsigned int andc_vv(unsigned int a, unsigned int b)
> +{ return ~b & a; }
> +/* { dg-final { scan-assembler ":15 .\* \{\\*andsi3_zarch\}" } } */
> +/* { dg-final { scan-assembler ":15 .\* \{\\*xorsi3\}" } } */
> +
> +__attribute__ ((noinline))
> +unsigned int andc_pv(unsigned int *a, unsigned int b)
> +{ return ~b & *a; }
> +/* { dg-final { scan-assembler ":21 .\* \{\\*andsi3_zarch\}" } } */
> +/* { dg-final { scan-assembler ":21 .\* \{\\*xorsi3\}" } } */
> +
> +__attribute__ ((noinline))
> +unsigned int andc_vp(unsigned int a, unsigned int *b)
> +{ return ~*b & a; }
> +/* { dg-final { scan-assembler ":27 .\* \{\\*andsi3_zarch\}" } } */
> +/* { dg-final { scan-assembler ":27 .\* \{\\*xorsi3\}" } } */
> +
> +__attribute__ ((noinline))
> +unsigned int andc_pp(unsigned int *a, unsigned int *b)
> +{ return ~*b & *a; }
> +/* { dg-final { scan-assembler ":33 .\* \{\\*andsi3_zarch\}" } } */
> +/* { dg-final { scan-assembler ":33 .\* \{\\*xorsi3\}" } } */
> +
> +/* { dg-final { scan-assembler-times "\tnr\?k\?\t" 4 } } */
> +/* { dg-final { scan-assembler-times "\txr\?k\?\t" 4 } } */
diff mbox

Patch

From de225e02fe79661642f123fd0505a0bd60f20066 Mon Sep 17 00:00:00 2001
From: Dominik Vogt <vogt@linux.vnet.ibm.com>
Date: Mon, 14 Mar 2016 17:48:17 +0100
Subject: [PATCH] S/390: Add splitter for "and" with complement.

Force splitting of logical operator expressions ...  with three operands, a
register destination and a memory operand because there are no instructions for
that and combine results in inefficient code.
---
 gcc/config/s390/s390-protos.h                      |  2 +
 gcc/config/s390/s390.c                             | 65 ++++++++++++++++++++++
 gcc/config/s390/s390.h                             |  3 +
 gcc/config/s390/s390.md                            | 52 ++++++++++++++++-
 gcc/testsuite/gcc.target/s390/md/andc-splitter-1.c | 61 ++++++++++++++++++++
 gcc/testsuite/gcc.target/s390/md/andc-splitter-2.c | 38 +++++++++++++
 6 files changed, 218 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/md/andc-splitter-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/md/andc-splitter-2.c

diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index 2ccf0bb..8ba4d5d 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -127,6 +127,8 @@  extern rtx_insn *s390_emit_call (rtx, rtx, rtx, rtx);
 extern void s390_expand_logical_operator (enum rtx_code,
 					  machine_mode, rtx *);
 extern bool s390_logical_operator_ok_p (rtx *);
+extern bool s390_logical_operator_si3_ok_p (rtx *);
+extern bool s390_andc_split_ok_p (rtx *);
 extern void s390_narrow_logical_operator (enum rtx_code, rtx *, rtx *);
 extern void s390_split_access_reg (rtx, rtx *, rtx *);
 
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index cb5dd5f..1a303d8 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -2558,6 +2558,27 @@  s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
 	src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
     }
 
+  /* We have no useful instructions with three operands, the source in memory
+     and the destination in a register.  Reload memory operands to register if
+     necessary.  */
+  if (!s390_logical_operator_si3_ok_p (operands))
+    {
+      if (MEM_P (src1))
+	{
+	  rtx temp = gen_reg_rtx (mode);
+
+	  emit_move_insn (temp, src1);
+	  src1 = temp;
+	}
+      if (MEM_P (src2))
+	{
+	  rtx temp = gen_reg_rtx (mode);
+
+	  emit_move_insn (temp, src2);
+	  src2 = temp;
+	}
+    }
+
   /* Emit the instruction.  */
   op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
   clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
@@ -2583,6 +2604,50 @@  s390_logical_operator_ok_p (rtx *operands)
   return true;
 }
 
+/* Rejects operand combinations of logical operations (AND, IOR, XOR) that
+   result in less efficient code later.  */
+
+bool
+s390_logical_operator_si3_ok_p (rtx *operands)
+{
+  if (!s390_logical_operator_ok_p (operands))
+    return false;
+  if (reload_completed)
+    return true;
+  /* Reject three operand expressions with register destination if one of the
+     sources is a memory operand and the other is not a const_int operand.  */
+  if (REG_OR_SUBREG_P (operands[0])
+      && (MEM_P (operands[1]) || MEM_P (operands[2]))
+      && !(CONST_INT_P (operands[1]) || CONST_INT_P (operands[2])))
+    return false;
+
+  return true;
+}
+
+/* Rejects operand combinations of AND operations that result in less efficient
+   code later.  */
+
+bool
+s390_andc_split_ok_p (rtx *operands)
+{
+  if (reload_completed)
+    return false;
+  if (!s390_logical_operator_si3_ok_p (operands))
+    return false;
+  /* Reject two operand expressions with a memory destination that is identical
+     to one of the source operands and the other operand a register or memory
+     because the splitter would replace the destination with a register yielding
+     an undefined pattern.  */
+  if (MEM_P (operands[0])
+      && (MEM_P (operands[1]) || REG_OR_SUBREG_P (operands[1]))
+      && (MEM_P (operands[2]) || REG_OR_SUBREG_P (operands[2]))
+      && (rtx_equal_p (operands[0], operands[1])
+	  || rtx_equal_p (operands[0], operands[2])))
+    return false;
+
+  return true;
+}
+
 /* Narrow logical operation CODE of memory operand MEMOP with immediate
    operand IMMOP to switch from SS to SI type instructions.  */
 
diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
index 3a7be1a..19185ed 100644
--- a/gcc/config/s390/s390.h
+++ b/gcc/config/s390/s390.h
@@ -389,6 +389,9 @@  extern const char *s390_host_detect_local_cpu (int argc, const char **argv);
 #define ACCESS_REG_P(X)		(REG_P (X) && ACCESS_REGNO_P (REGNO (X)))
 #define VECTOR_NOFP_REG_P(X)    (REG_P (X) && VECTOR_NOFP_REGNO_P (REGNO (X)))
 #define VECTOR_REG_P(X)         (REG_P (X) && VECTOR_REGNO_P (REGNO (X)))
+#define REG_OR_SUBREG_P(x) \
+  (REG_P ((x)) || (SUBREG_P ((x)) && REG_P (SUBREG_REG ((x)))))
+
 
 /* Set up fixed registers and calling convention:
 
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 5f3b0f7..b120197 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -7100,7 +7100,7 @@ 
                 (match_operand:SI 2 "general_operand"
 			    " M,M,N0HSF,N1HSF,Os,d,d,R,T,NxxSq,NxQSF,Q")))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "TARGET_ZARCH && s390_logical_operator_si3_ok_p (operands)"
   "@
    #
    #
@@ -7234,6 +7234,52 @@ 
    (set_attr "z10prop" "z10_super_E1,z10_super,*")])
 
 ;
+; And with complement
+;
+; c = ~b & a = (b & a) ^ a
+
+(define_insn_and_split "*andc_split"
+  [(set (match_operand:GPR 0 "nonimmediate_operand" "")
+	(and:GPR (not:GPR (match_operand:GPR 1 "nonimmediate_operand" ""))
+		 (match_operand:GPR 2 "general_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH && s390_andc_split_ok_p (operands)"
+  "#"
+  "&& 1"
+  [
+  (parallel
+   [(set (match_dup 3) (and:GPR (match_dup 1) (match_dup 2)))
+   (clobber (reg:CC CC_REGNUM))])
+  (parallel
+   [(set (match_dup 0) (xor:GPR (match_dup 3) (match_dup 2)))
+   (clobber (reg:CC CC_REGNUM))])]
+{
+  if (reg_overlap_mentioned_p (operands[0], operands[2]))
+    {
+      gcc_assert (can_create_pseudo_p ());
+      operands[3] = gen_reg_rtx (<MODE>mode);
+    }
+  else
+    operands[3] = operands[0];
+})
+
+; Convert "(xor (operand) (-1))" to "(not (operand))" for low optimization
+; levels so that "*andc_split" matches.
+(define_insn_and_split "*andc_split2"
+  [(set (match_operand:GPR 0 "nonimmediate_operand" "")
+        (and:GPR (xor:GPR (match_operand:GPR 1 "nonimmediate_operand" "")
+			  (const_int -1))
+		 (match_operand:GPR 2 "general_operand" "")))
+    (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH && s390_andc_split_ok_p (operands)"
+  "#"
+  "&& 1"
+  [(parallel
+    [(set (match_dup 0) (and:GPR (not:GPR (match_dup 1)) (match_dup 2)))
+    (clobber (reg:CC CC_REGNUM))])]
+)
+
+;
 ; Block and (NC) patterns.
 ;
 
@@ -7429,7 +7475,7 @@ 
         (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0,    0, 0,0,d,0,0,    0,0")
                 (match_operand:SI 2 "general_operand"   "N0HS0,N1HS0,Os,d,d,R,T,NxQS0,Q")))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "TARGET_ZARCH && s390_logical_operator_si3_ok_p (operands)"
   "@
    oilh\t%0,%i2
    oill\t%0,%i2
@@ -7751,7 +7797,7 @@ 
         (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,d,0,0,    0,0")
                 (match_operand:SI 2 "general_operand"      "Os,d,d,R,T,NxQS0,Q")))
    (clobber (reg:CC CC_REGNUM))]
-  "s390_logical_operator_ok_p (operands)"
+  "s390_logical_operator_si3_ok_p (operands)"
   "@
    xilf\t%0,%o2
    xr\t%0,%2
diff --git a/gcc/testsuite/gcc.target/s390/md/andc-splitter-1.c b/gcc/testsuite/gcc.target/s390/md/andc-splitter-1.c
new file mode 100644
index 0000000..ed78921
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/md/andc-splitter-1.c
@@ -0,0 +1,61 @@ 
+/* Machine description pattern tests.  */
+
+/* { dg-do run { target { lp64 } } } */
+/* { dg-options "-mzarch -save-temps -dP" } */
+/* Skip test if -O0 is present on the command line:
+
+    { dg-skip-if "" { *-*-* } { "-O0" } { "" } }
+
+   Skip test if the -O option is missing from the command line
+    { dg-skip-if "" { *-*-* } { "*" } { "-O*" } }
+*/
+
+__attribute__ ((noinline))
+unsigned long andc_vv(unsigned long a, unsigned long b)
+{ return ~b & a; }
+/* { dg-final { scan-assembler ":15 .\* \{\\*anddi3\}" } } */
+/* { dg-final { scan-assembler ":15 .\* \{\\*xordi3\}" } } */
+
+__attribute__ ((noinline))
+unsigned long andc_pv(unsigned long *a, unsigned long b)
+{ return ~b & *a; }
+/* { dg-final { scan-assembler ":21 .\* \{\\*anddi3\}" } } */
+/* { dg-final { scan-assembler ":21 .\* \{\\*xordi3\}" } } */
+
+__attribute__ ((noinline))
+unsigned long andc_vp(unsigned long a, unsigned long *b)
+{ return ~*b & a; }
+/* { dg-final { scan-assembler ":27 .\* \{\\*anddi3\}" } } */
+/* { dg-final { scan-assembler ":27 .\* \{\\*xordi3\}" } } */
+
+__attribute__ ((noinline))
+unsigned long andc_pp(unsigned long *a, unsigned long *b)
+{ return ~*b & *a; }
+/* { dg-final { scan-assembler ":33 .\* \{\\*anddi3\}" } } */
+/* { dg-final { scan-assembler ":33 .\* \{\\*xordi3\}" } } */
+
+/* { dg-final { scan-assembler-times "\tngr\?k\?\t" 4 } } */
+/* { dg-final { scan-assembler-times "\txgr\?\t" 4 } } */
+
+int
+main (void)
+{
+  unsigned long a = 0xc00000000000000cllu;
+  unsigned long b = 0x500000000000000allu;
+  unsigned long e = 0x8000000000000004llu;
+  unsigned long c;
+
+  c = andc_vv (a, b);
+  if (c != e)
+    __builtin_abort ();
+  c = andc_pv (&a, b);
+  if (c != e)
+    __builtin_abort ();
+  c = andc_vp (a, &b);
+  if (c != e)
+    __builtin_abort ();
+  c = andc_pp (&a, &b);
+  if (c != e)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/s390/md/andc-splitter-2.c b/gcc/testsuite/gcc.target/s390/md/andc-splitter-2.c
new file mode 100644
index 0000000..9e78335
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/md/andc-splitter-2.c
@@ -0,0 +1,38 @@ 
+/* Machine description pattern tests.  */
+
+/* { dg-do compile } */
+/* { dg-options "-mzarch -save-temps -dP" } */
+/* Skip test if -O0 is present on the command line:
+
+    { dg-skip-if "" { *-*-* } { "-O0" } { "" } }
+
+   Skip test if the -O option is missing from the command line
+    { dg-skip-if "" { *-*-* } { "*" } { "-O*" } }
+*/
+
+__attribute__ ((noinline))
+unsigned int andc_vv(unsigned int a, unsigned int b)
+{ return ~b & a; }
+/* { dg-final { scan-assembler ":15 .\* \{\\*andsi3_zarch\}" } } */
+/* { dg-final { scan-assembler ":15 .\* \{\\*xorsi3\}" } } */
+
+__attribute__ ((noinline))
+unsigned int andc_pv(unsigned int *a, unsigned int b)
+{ return ~b & *a; }
+/* { dg-final { scan-assembler ":21 .\* \{\\*andsi3_zarch\}" } } */
+/* { dg-final { scan-assembler ":21 .\* \{\\*xorsi3\}" } } */
+
+__attribute__ ((noinline))
+unsigned int andc_vp(unsigned int a, unsigned int *b)
+{ return ~*b & a; }
+/* { dg-final { scan-assembler ":27 .\* \{\\*andsi3_zarch\}" } } */
+/* { dg-final { scan-assembler ":27 .\* \{\\*xorsi3\}" } } */
+
+__attribute__ ((noinline))
+unsigned int andc_pp(unsigned int *a, unsigned int *b)
+{ return ~*b & *a; }
+/* { dg-final { scan-assembler ":33 .\* \{\\*andsi3_zarch\}" } } */
+/* { dg-final { scan-assembler ":33 .\* \{\\*xorsi3\}" } } */
+
+/* { dg-final { scan-assembler-times "\tnr\?k\?\t" 4 } } */
+/* { dg-final { scan-assembler-times "\txr\?k\?\t" 4 } } */
-- 
2.3.0