diff mbox series

[x86,SSE] Some additional ternlog refinements.

Message ID 000701dac86c$2953c570$7bfb5050$@nextmovesoftware.com
State New
Headers show
Series [x86,SSE] Some additional ternlog refinements. | expand

Commit Message

Roger Sayle June 27, 2024, 8:29 a.m. UTC
This patch is another round of refinements to fine tune the new ternlog
infrastructure in i386's sse.md.  This patch tweaks ix86_ternlog_idx
to allow multiple MEM/CONST_VECTOR/VEC_DUPLICATE operands prior to
splitting (before reload), when force_register is called on all but
one of these operands.  Conceptually during the dynamic programming,
registers fill the args slots in the order 0, 1, 2, and mem-like
operands fill the slots in the order 2, 0, 1 [preferring the memory
operand to come last].

This patch allows us to remove some of the legacy ternlog patterns
in sse.md without regressions [which is left to the next and final
patch in this series].  An indication that these patterns are no
longer required is shown by the necessary testsuite tweaks below,
where the output assembler for the legacy instructions used hexadecimal,
but with the new ternlog infrastructure now consistently use decimal.

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}
with no new failures.  Ok for mainline?


2024-06-27  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
        * config/i386/i386-expand.cc (ix86_ternlog_idx) <case
VEC_DUPLICATE>:
        Add a "goto do_mem_operand" as this need not match memory_operand.
        <case CONST_VECTOR>: Only args[2] may be volatile memory operand.
        Allow MEM/VEC_DUPLICATE/CONST_VECTOR as args[0] and args[1].

gcc/testsuite/ChangeLog
        * gcc.target/i386/avx512f-andn-di-zmm-2.c: Match decimal instead
        of hexadecimal immediate operand to ternlog.
        * gcc.target/i386/avx512f-andn-si-zmm-2.c: Likewise.
        * gcc.target/i386/avx512f-orn-si-zmm-1.c: Likewise.
        * gcc.target/i386/avx512f-orn-si-zmm-2.c: Likewise.
        * gcc.target/i386/pr100711-3.c: Likewise.
        * gcc.target/i386/pr100711-4.c: Likewise.
        * gcc.target/i386/pr100711-5.c: Likewise.


Thanks in advance,
Roger
--

Comments

Hongtao Liu June 28, 2024, 1:10 a.m. UTC | #1
On Thu, Jun 27, 2024 at 4:29 PM Roger Sayle <roger@nextmovesoftware.com> wrote:
>
>
> This patch is another round of refinements to fine tune the new ternlog
> infrastructure in i386's sse.md.  This patch tweaks ix86_ternlog_idx
> to allow multiple MEM/CONST_VECTOR/VEC_DUPLICATE operands prior to
> splitting (before reload), when force_register is called on all but
> one of these operands.  Conceptually during the dynamic programming,
> registers fill the args slots in the order 0, 1, 2, and mem-like
> operands fill the slots in the order 2, 0, 1 [preferring the memory
> operand to come last].
>
> This patch allows us to remove some of the legacy ternlog patterns
> in sse.md without regressions [which is left to the next and final
> patch in this series].  An indication that these patterns are no
> longer required is shown by the necessary testsuite tweaks below,
> where the output assembler for the legacy instructions used hexadecimal,
> but with the new ternlog infrastructure now consistently use decimal.
>
> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> and make -k check, both with and without --target_board=unix{-m32}
> with no new failures.  Ok for mainline?
LGTM.
>
>
> 2024-06-27  Roger Sayle  <roger@nextmovesoftware.com>
>
> gcc/ChangeLog
>         * config/i386/i386-expand.cc (ix86_ternlog_idx) <case
> VEC_DUPLICATE>:
>         Add a "goto do_mem_operand" as this need not match memory_operand.
>         <case CONST_VECTOR>: Only args[2] may be volatile memory operand.
>         Allow MEM/VEC_DUPLICATE/CONST_VECTOR as args[0] and args[1].
>
> gcc/testsuite/ChangeLog
>         * gcc.target/i386/avx512f-andn-di-zmm-2.c: Match decimal instead
>         of hexadecimal immediate operand to ternlog.
>         * gcc.target/i386/avx512f-andn-si-zmm-2.c: Likewise.
>         * gcc.target/i386/avx512f-orn-si-zmm-1.c: Likewise.
>         * gcc.target/i386/avx512f-orn-si-zmm-2.c: Likewise.
>         * gcc.target/i386/pr100711-3.c: Likewise.
>         * gcc.target/i386/pr100711-4.c: Likewise.
>         * gcc.target/i386/pr100711-5.c: Likewise.
>
>
> Thanks in advance,
> Roger
> --
>
diff mbox series

Patch

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index ac42300..110de7f 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -25608,7 +25608,7 @@  ix86_ternlog_idx (rtx op, rtx *args)
     case VEC_DUPLICATE:
       if (!bcst_mem_operand (op, GET_MODE (op)))
 	return -1;
-      /* FALLTHRU */
+      goto do_mem_operand;
 
     case MEM:
       if (!memory_operand (op, GET_MODE (op)))
@@ -25620,23 +25620,52 @@  ix86_ternlog_idx (rtx op, rtx *args)
       /* FALLTHRU */
 
     case CONST_VECTOR:
+do_mem_operand:
       if (!args[2])
 	{
 	  args[2] = op;
 	  return 0xaa;
 	}
       /* Maximum of one volatile memory reference per expression.  */
-      if (side_effects_p (op) && side_effects_p (args[2]))
+      if (side_effects_p (op))
 	return -1;
       if (rtx_equal_p (op, args[2]))
 	return 0xaa;
-      /* Check if one CONST_VECTOR is the ones-complement of the other.  */
+      /* Check if CONST_VECTOR is the ones-complement of args[2].  */
       if (GET_CODE (op) == CONST_VECTOR
 	  && GET_CODE (args[2]) == CONST_VECTOR
 	  && rtx_equal_p (simplify_const_unary_operation (NOT, GET_MODE (op),
 							  op, GET_MODE (op)),
 			  args[2]))
 	return 0x55;
+      if (!args[0])
+	{
+	  args[0] = op;
+	  return 0xf0;
+	}
+      if (rtx_equal_p (op, args[0]))
+	return 0xf0;
+      /* Check if CONST_VECTOR is the ones-complement of args[0].  */
+      if (GET_CODE (op) == CONST_VECTOR
+	  && GET_CODE (args[0]) == CONST_VECTOR
+	  && rtx_equal_p (simplify_const_unary_operation (NOT, GET_MODE (op),
+							  op, GET_MODE (op)),
+			  args[0]))
+	return 0x0f;
+      if (!args[1])
+	{
+	  args[1] = op;
+	  return 0xcc;
+	}
+      if (rtx_equal_p (op, args[1]))
+	return 0xcc;
+      /* Check if CONST_VECTOR is the ones-complement of args[1].  */
+      if (GET_CODE (op) == CONST_VECTOR
+	  && GET_CODE (args[1]) == CONST_VECTOR
+	  && rtx_equal_p (simplify_const_unary_operation (NOT, GET_MODE (op),
+							  op, GET_MODE (op)),
+			  args[1]))
+	return 0x33;
       return -1;
 
     case NOT:
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-2.c b/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-2.c
index 4ebb30f..24f3d6c 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-2.c
@@ -1,6 +1,6 @@ 
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */
-/* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\\\$0x44, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\\\$80, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
 /* { dg-final { scan-assembler-not "vpbroadcast" } } */
 
 #define type __m512i
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c b/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c
index 86e7ebe..1f5e72d 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c
@@ -1,6 +1,6 @@ 
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$0x44, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$80, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
 /* { dg-final { scan-assembler-not "vpbroadcast" } } */
 
 #define type __m512i
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-1.c b/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-1.c
index 7d02f03..d21f48f 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-1.c
@@ -1,6 +1,6 @@ 
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */
-/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$0xdd, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$245, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
 /* { dg-final { scan-assembler-not "vpbroadcast" } } */
 
 #define type __m512i
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-2.c b/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-2.c
index c793083..5359200 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-2.c
@@ -1,6 +1,6 @@ 
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */
-/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$0xbb, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$175, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
 /* { dg-final { scan-assembler-not "vpbroadcast" } } */
 
 #define type __m512i
diff --git a/gcc/testsuite/gcc.target/i386/pr100711-3.c b/gcc/testsuite/gcc.target/i386/pr100711-3.c
index 98cc1c3..ea60190 100644
--- a/gcc/testsuite/gcc.target/i386/pr100711-3.c
+++ b/gcc/testsuite/gcc.target/i386/pr100711-3.c
@@ -39,4 +39,4 @@  v8di foo_v8di (long long a, v8di b)
 
 /* { dg-final { scan-assembler-times "vpandn" 4 { target { ! ia32 } } } } */
 /* { dg-final { scan-assembler-times "vpandn" 2 { target { ia32 } } } } */
-/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0x44" 2 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$80" 2 { target { ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100711-4.c b/gcc/testsuite/gcc.target/i386/pr100711-4.c
index 26152d6..4ca1292 100644
--- a/gcc/testsuite/gcc.target/i386/pr100711-4.c
+++ b/gcc/testsuite/gcc.target/i386/pr100711-4.c
@@ -39,4 +39,4 @@  v8di foo_v8di (long long a, v8di b)
 
 /* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$207" 4 { target { ! ia32 } } } } */
 /* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$207" 2 { target { ia32 } } } } */
-/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0xdd" 2 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$245" 2 { target { ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100711-5.c b/gcc/testsuite/gcc.target/i386/pr100711-5.c
index 820bed8..640787e 100644
--- a/gcc/testsuite/gcc.target/i386/pr100711-5.c
+++ b/gcc/testsuite/gcc.target/i386/pr100711-5.c
@@ -39,5 +39,5 @@  v8di foo_v8di (long long a, v8di b)
 
 /* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$195" 4 { target { ! ia32 } } } } */
 /* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$195" 2 { target { ia32 } } } } */
-/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0x99" 2 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$165" 2 { target { ia32 } } } } */