diff mbox series

Canonicalize (vec_duplicate (not A)) to (not (vec_duplicate A)).

Message ID 20210602054154.86889-1-hongtao.liu@intel.com
State New
Headers show
Series Canonicalize (vec_duplicate (not A)) to (not (vec_duplicate A)). | expand

Commit Message

liuhongt June 2, 2021, 5:41 a.m. UTC
For i386, it will enable below opt

from
	notl    %edi
      	vpbroadcastd    %edi, %xmm0
      	vpand   %xmm1, %xmm0, %xmm0
to
      	vpbroadcastd    %edi, %xmm0
      	vpandn   %xmm1, %xmm0, %xmm0

gcc/ChangeLog:

	PR target/100711
	* simplify-rtx.c (simplify_unary_operation_1):
	Canonicalize (vec_duplicate (not A)) to
	(not (vec_duplicate A)).
	* doc/md.texi (Insn Canonicalizations): Document
	canonicalization of vec_duplicate.

gcc/testsuite/ChangeLog:

	PR target/100711
	* gcc.target/i386/avx2-pr100711.c: New test.
	* gcc.target/i386/avx512bw-pr100711.c: New test.
---
 gcc/doc/md.texi                               |  5 ++
 gcc/simplify-rtx.c                            |  6 ++
 gcc/testsuite/gcc.target/i386/avx2-pr100711.c | 73 +++++++++++++++++++
 .../gcc.target/i386/avx512bw-pr100711.c       | 48 ++++++++++++
 4 files changed, 132 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx2-pr100711.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-pr100711.c
diff mbox series

Patch

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 0e65b3ae663..06b42901413 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -8297,6 +8297,11 @@  operand of @code{mult} is also a shift, then that is extended also.
 This transformation is only applied when it can be proven that the
 original operation had sufficient precision to prevent overflow.
 
+@cindex @code{vec_duplicate}, canonicalization of
+@item
+@code{(vec_duplicate (not @var{a}))} is converted to
+@code{(not (vec_duplicate @var{a}))}.
+
 @end itemize
 
 Further canonicalization rules are defined in the function
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 04423bbd195..171fc447d50 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -1708,6 +1708,12 @@  simplify_context::simplify_unary_operation_1 (rtx_code code, machine_mode mode,
 #endif
       break;
 
+      /* Canonicalize (vec_duplicate (not A)) to (not (vec_duplicate A)).  */
+    case VEC_DUPLICATE:
+      if (GET_CODE (op) == NOT)
+	return gen_rtx_NOT (mode, gen_rtx_VEC_DUPLICATE (mode, XEXP (op, 0)));
+      break;
+
     default:
       break;
     }
diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr100711.c b/gcc/testsuite/gcc.target/i386/avx2-pr100711.c
new file mode 100644
index 00000000000..5b144623873
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-pr100711.c
@@ -0,0 +1,73 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "pandn" 8 } } */
+/* { dg-final { scan-assembler-not "not\[bwlq\]" } } */
+typedef char v16qi __attribute__((vector_size(16)));
+typedef char v32qi __attribute__((vector_size(32)));
+typedef short v8hi __attribute__((vector_size(16)));
+typedef short v16hi __attribute__((vector_size(32)));
+typedef int v4si __attribute__((vector_size(16)));
+typedef int v8si __attribute__((vector_size(32)));
+typedef long long v2di __attribute__((vector_size(16)));
+typedef long long v4di __attribute__((vector_size(32)));
+
+v16qi
+f1 (char a, v16qi c)
+{
+  char b = ~a;
+  return (__extension__(v16qi) {b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b}) & c;
+}
+
+v32qi
+f2 (char a, v32qi c)
+{
+  char b = ~a;
+  return (__extension__(v32qi) {b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b}) & c;
+}
+
+v8hi
+f3 (short a, v8hi c)
+{
+  short b = ~a;
+  return (__extension__(v8hi) {b, b, b, b, b, b, b, b}) & c;
+}
+
+v16hi
+f4 (short a, v16hi c)
+{
+  short b = ~a;
+  return (__extension__(v16hi) {b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b}) & c;
+}
+
+v4si
+f5 (int a, v4si c)
+{
+  int b = ~a;
+  return (__extension__(v4si) {b, b, b, b}) & c;
+}
+
+v8si
+f6 (int a, v8si c)
+{
+  int b = ~a;
+  return (__extension__(v8si) {b, b, b, b, b, b, b, b}) & c;
+}
+
+v2di
+f7 (long long a, v2di c)
+{
+  long long b = ~a;
+  return (__extension__(v2di) {b, b}) & c;
+}
+
+v4di
+f8 (long long a, v4di c)
+{
+  long long b = ~a;
+  return (__extension__(v4di) {b, b, b, b}) & c;
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr100711.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr100711.c
new file mode 100644
index 00000000000..f0a103d0bc2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr100711.c
@@ -0,0 +1,48 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "pandn" 4 } } */
+/* { dg-final { scan-assembler-not "not\[bwlq\]" } } */
+
+typedef char v64qi __attribute__((vector_size(64)));
+typedef short v32hi __attribute__((vector_size(64)));
+typedef int v16si __attribute__((vector_size(64)));
+typedef long long v8di __attribute__((vector_size(64)));
+
+v64qi
+f1 (char a, v64qi c)
+{
+  char b = ~a;
+  return (__extension__(v64qi) {b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b}) & c;
+}
+
+v32hi
+f2 (short a, v32hi c)
+{
+  short b = ~a;
+  return (__extension__(v32hi) {b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b}) & c;
+}
+
+v16si
+f3 (int a, v16si c)
+{
+  int b = ~a;
+  return (__extension__(v16si) {b, b, b, b, b, b, b, b,
+				 b, b, b, b, b, b, b, b}) & c;
+}
+
+v8di
+f4 (long long a, v8di c)
+{
+  long long b = ~a;
+  return (__extension__(v8di) {b, b, b, b, b, b, b, b}) & c;
+}