diff mbox series

[14/16,APX,NDD] Support APX NDD for rotate insns

Message ID 20231115094705.3976553-15-hongyu.wang@intel.com
State New
Headers show
Series Support Intel APX NDD | expand

Commit Message

Hongyu Wang Nov. 15, 2023, 9:47 a.m. UTC
gcc/ChangeLog:

	* config/i386/i386-expand.cc (ix86_can_use_ndd_p): Add ROTATE
	and ROTATERT.
	* config/i386/i386.md (*<insn><mode>3_1): Extend with a new
	alternative to support NDD for SI/DI rotate, and adjust output
	template.
	(*<insn>si3_1_zext): Likewise.
	(*<insn><mode>3_1): Likewise for QI/HI modes.
	(rcrsi2): Likewise.
	(rcrdi2): Likewise.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/apx-ndd.c: Add test for left/right rotate.
---
 gcc/config/i386/i386-expand.cc          |  2 +
 gcc/config/i386/i386.md                 | 91 ++++++++++++++++---------
 gcc/testsuite/gcc.target/i386/apx-ndd.c | 20 ++++++
 3 files changed, 80 insertions(+), 33 deletions(-)
diff mbox series

Patch

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 8e040346fbb..ab6f14485d6 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -1279,6 +1279,8 @@  bool ix86_can_use_ndd_p (enum rtx_code code)
     case ASHIFT:
     case ASHIFTRT:
     case LSHIFTRT:
+    case ROTATE:
+    case ROTATERT:
       return true;
     default:
       return false;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 3ff333d4a41..760c0d32f4d 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -16362,13 +16362,15 @@  (define_insn "*bmi2_rorx<mode>3_1"
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*<insn><mode>3_1"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
 	(any_rotate:SWI48
-	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
-	  (match_operand:QI 2 "nonmemory_operand" "c<S>,<S>")))
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,rm")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>,<S>,c<S>")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands,
+			    ix86_can_use_ndd_p (<CODE>))"
 {
+  bool use_ndd = (which_alternative == 2);
   switch (get_attr_type (insn))
     {
     case TYPE_ROTATEX:
@@ -16376,14 +16378,18 @@  (define_insn "*<insn><mode>3_1"
 
     default:
       if (operands[2] == const1_rtx
-	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-	return "<rotate>{<imodesuffix>}\t%0";
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+	  && !(use_ndd && REG_P (operands[1])
+	       && REGNO (operands[1]) == CX_REG))
+	return use_ndd ? "<rotate>{<imodesuffix>}\t{%1, %0|%0, %1}"
+		       : "<rotate>{<imodesuffix>}\t%0";
       else
-	return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
+	return use_ndd ? "<rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+		       : "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
-  [(set_attr "isa" "*,bmi2")
-   (set_attr "type" "rotate,rotatex")
+  [(set_attr "isa" "*,bmi2,apx_ndd")
+   (set_attr "type" "rotate,rotatex,rotate")
    (set (attr "preferred_for_size")
      (cond [(eq_attr "alternative" "0")
 	      (symbol_ref "true")]
@@ -16433,13 +16439,14 @@  (define_insn "*bmi2_rorxsi3_1_zext"
    (set_attr "mode" "SI")])
 
 (define_insn "*<insn>si3_1_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
 	(zero_extend:DI
-	  (any_rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
-			 (match_operand:QI 2 "nonmemory_operand" "cI,I"))))
+	  (any_rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm,rm")
+			 (match_operand:QI 2 "nonmemory_operand" "cI,I,cI"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
 {
+  bool use_ndd = (which_alternative == 2);
   switch (get_attr_type (insn))
     {
     case TYPE_ROTATEX:
@@ -16447,14 +16454,18 @@  (define_insn "*<insn>si3_1_zext"
 
     default:
       if (operands[2] == const1_rtx
-	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-	return "<rotate>{l}\t%k0";
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+	  && !(use_ndd && REG_P (operands[1])
+	       && REGNO (operands[1]) == CX_REG))
+	return use_ndd ? "<rotate>{l}\t{%1, %k0|%k0, %1}"
+		       : "<rotate>{l}\t%k0";
       else
-	return "<rotate>{l}\t{%2, %k0|%k0, %2}";
+	return use_ndd ? "<rotate>{l}\t{%2, %1, %k0|%k0, %1, %2}"
+		       : "<rotate>{l}\t{%2, %k0|%k0, %2}";
     }
 }
-  [(set_attr "isa" "*,bmi2")
-   (set_attr "type" "rotate,rotatex")
+  [(set_attr "isa" "*,bmi2,apx_ndd")
+   (set_attr "type" "rotate,rotatex,rotate")
    (set (attr "preferred_for_size")
      (cond [(eq_attr "alternative" "0")
 	      (symbol_ref "true")]
@@ -16498,19 +16509,27 @@  (define_split
 	(zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))])
 
 (define_insn "*<insn><mode>3_1"
-  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")
-	(any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0")
-			  (match_operand:QI 2 "nonmemory_operand" "c<S>")))
+  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m,r")
+	(any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0,rm")
+			  (match_operand:QI 2 "nonmemory_operand" "c<S>,c<S>")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands,
+			    ix86_can_use_ndd_p (<CODE>))"
 {
   if (operands[2] == const1_rtx
-      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-    return "<rotate>{<imodesuffix>}\t%0";
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+      && !(which_alternative && REG_P (operands[1])
+	   && REGNO (operands[1]) == CX_REG))
+    return which_alternative
+	   ? "<rotate>{<imodesuffix>}\t{%1, %0|%0, %1}"
+	   : "<rotate>{<imodesuffix>}\t%0";
   else
-    return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
+    return which_alternative
+	   ? "<rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+	   : "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
 }
-  [(set_attr "type" "rotate")
+  [(set_attr "isa" "*,apx_ndd")
+   (set_attr "type" "rotate")
    (set (attr "length_immediate")
      (if_then_else
        (and (match_operand 2 "const1_operand")
@@ -16567,31 +16586,37 @@  (define_split
 
 ;; Rotations through carry flag
 (define_insn "rcrsi2"
-  [(set (match_operand:SI 0 "register_operand" "=r")
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
 	(plus:SI
-	  (lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
+	  (lshiftrt:SI (match_operand:SI 1 "register_operand" "0,r")
 		       (const_int 1))
 	  (ashift:SI (ltu:SI (reg:CCC FLAGS_REG) (const_int 0))
 		     (const_int 31))))
    (clobber (reg:CC FLAGS_REG))]
   ""
-  "rcr{l}\t%0"
-  [(set_attr "type" "ishift1")
+  "@
+   rcr{l}\t{%1, %0|%0, %1}
+   rcr{l}\t%0"
+  [(set_attr "isa" "*,apx_ndd")
+   (set_attr "type" "ishift1")
    (set_attr "memory" "none")
    (set_attr "length_immediate" "0")
    (set_attr "mode" "SI")])
 
 (define_insn "rcrdi2"
-  [(set (match_operand:DI 0 "register_operand" "=r")
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
 	(plus:DI
-	  (lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
+	  (lshiftrt:DI (match_operand:DI 1 "register_operand" "0,r")
 		       (const_int 1))
 	  (ashift:DI (ltu:DI (reg:CCC FLAGS_REG) (const_int 0))
 		     (const_int 63))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT"
-  "rcr{q}\t%0"
-  [(set_attr "type" "ishift1")
+  "@
+   rcr{q}\t{%1, %0|%0, %1}
+   rcr{q}\t%0"
+  [(set_attr "isa" "*,apx_ndd")
+   (set_attr "type" "ishift1")
    (set_attr "length_immediate" "0")
    (set_attr "mode" "DI")])
 
diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c
index 28c0df72988..b8b70511023 100644
--- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
+++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
@@ -40,6 +40,14 @@  foo3_##OP_NAME##_##TYPE (TYPE a)      \
   return b;			      \
 }			
 
+#define FOO4(TYPE, OP_NAME, OP1, OP2, IMM1)		    \
+TYPE							    \
+__attribute__ ((noipa))					    \
+foo4_##OP_NAME##_##TYPE (TYPE a)			    \
+{							    \
+  TYPE b = (a OP1 IMM1 | a OP2 (8 * sizeof(TYPE) - IMM1));  \
+  return b;						    \
+}
 
 #define F(TYPE, OP_NAME, OP)   \
 TYPE				 \
@@ -152,6 +160,16 @@  FOO3 (uint32_t, shr, >>, 7)
 FOO (uint64_t, shr, >>)
 FOO3 (uint64_t, shr, >>, 7)
 
+FOO4 (uint8_t, ror, >>, <<, 1)
+FOO4 (uint16_t, ror, >>, <<, 1)
+FOO4 (uint32_t, ror, >>, <<, 1)
+FOO4 (uint64_t, ror, >>, <<, 1)
+
+FOO4 (uint8_t, rol, <<, >>, 1)
+FOO4 (uint16_t, rol, <<, >>, 1)
+FOO4 (uint32_t, rol, <<, >>, 1)
+FOO4 (uint64_t, rol, <<, >>, 1)
+
 /* { dg-final { scan-assembler-times "add(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 4 } } */
 /* { dg-final { scan-assembler-times "lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */
 /* { dg-final { scan-assembler-times "add(?:l|w|q)\[^\n\r]%(?:|r|e)si, \\(%rdi\\), %(?:|r|e)ax" 4 } } */
@@ -180,3 +198,5 @@  FOO3 (uint64_t, shr, >>, 7)
 /* { dg-final { scan-assembler-times "sar(?:b|l|w|q)\[^\n\r]*7, %(?:|r|e)di(?:|l), %(?:|r|e)a(?:x|l)" 4 } } */
 /* { dg-final { scan-assembler-times "shr(?:b|l|w|q)\[^\n\r]\\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */
 /* { dg-final { scan-assembler-times "shr(?:b|l|w|q)\[^\n\r]*7, %(?:|r|e)di(?:|l), %(?:|r|e)a(?:x|l)" 4 } } */
+/* { dg-final { scan-assembler-times "ror(?:b|l|w|q)\[^\n\r]%(?:|r|e)di(?:|l), %(?:|r|e)a(?:x|l)" 4 } } */
+/* { dg-final { scan-assembler-times "rol(?:b|l|w|q)\[^\n\r]%(?:|r|e)di(?:|l), %(?:|r|e)a(?:x|l)" 4 } } */