diff mbox series

[committed] i386: Double-word sign-extension missed-optimization [PR110717]

Message ID CAFULd4ZMydG9qLjbcdCddg=Cd5B5MXoqcPsQ=xQpYj5dwTQKNQ@mail.gmail.com
State New
Headers show
Series [committed] i386: Double-word sign-extension missed-optimization [PR110717] | expand

Commit Message

Uros Bizjak July 20, 2023, 6:57 p.m. UTC
When sign-extending the value in a double-word register pair using shift and
ashiftrt sequence with the same count immediate value less than word width,
there is no need to shift the lower word of the value. The sign-extension
could be limited to the upper word, but we uselessly shift the lower word
with it as well:
    movq    %rdi, %rax
    movq    %rsi, %rdx
    shldq    $59, %rdi, %rdx
    salq    $59, %rax
    shrdq    $59, %rdx, %rax
    sarq    $59, %rdx
    ret
for -m64 and
    movl    4(%esp), %eax
    movl    8(%esp), %edx
    shldl    $27, %eax, %edx
    sall    $27, %eax
    shrdl    $27, %edx, %eax
    sarl    $27, %edx
    ret
for -m32.

The patch introduces a new post-reload splitter to provide the combined
ASHIFTRT/SHIFT instruction pattern.  The instruction is split to a sequence
of SAL and SAR insns with the same count immediate operand:
    movq    %rsi, %rdx
    movq    %rdi, %rax
    salq    $59, %rdx
    sarq    $59, %rdx
    ret

Some complication is required to properly handle STV transform, where we
emit a sequence with DImode PSLLQ and PSRAQ insns for 32-bit AVX512VL
targets when profitable.

The patch also fixes a small oversight and enables STV transform of SImode
ASHIFTRT to PSRAD also for SSE2 targets.

    PR target/110717

gcc/ChangeLog:

    * config/i386/i386-features.cc
    (general_scalar_chain::compute_convert_gain): Calculate gain
    for extend higpart case.
    (general_scalar_chain::convert_op): Handle
    ASHIFTRT/ASHIFT combined RTX.
    (general_scalar_to_vector_candidate_p): Enable ASHIFTRT for
    SImode for SSE2 targets.  Handle ASHIFTRT/ASHIFT combined RTX.
    * config/i386/i386.md (*extend<dwi>2_doubleword_highpart):
    New define_insn_and_split pattern.
    (*extendv2di2_highpart_stv): Ditto.

gcc/testsuite/ChangeLog:

    * gcc.target/i386/pr110717.c: New test.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Uros.
diff mbox series

Patch

diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index 4d69251d4f5..f801a8fc94a 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -572,6 +572,9 @@  general_scalar_chain::compute_convert_gain ()
 	      {
 		if (INTVAL (XEXP (src, 1)) >= 32)
 		  igain += ix86_cost->add;
+		/* Gain for extend highpart case.  */
+		else if (GET_CODE (XEXP (src, 0)) == ASHIFT)
+		  igain += ix86_cost->shift_const - ix86_cost->sse_op;
 		else
 		  igain += ix86_cost->shift_const;
 	      }
@@ -951,7 +954,8 @@  general_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
 {
   *op = copy_rtx_if_shared (*op);
 
-  if (GET_CODE (*op) == NOT)
+  if (GET_CODE (*op) == NOT
+      || GET_CODE (*op) == ASHIFT)
     {
       convert_op (&XEXP (*op, 0), insn);
       PUT_MODE (*op, vmode);
@@ -2120,7 +2124,7 @@  general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode)
   switch (GET_CODE (src))
     {
     case ASHIFTRT:
-      if (!TARGET_AVX512VL)
+      if (mode == DImode && !TARGET_AVX512VL)
 	return false;
       /* FALLTHRU */
 
@@ -2131,6 +2135,14 @@  general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode)
       if (!CONST_INT_P (XEXP (src, 1))
 	  || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, GET_MODE_BITSIZE (mode)-1))
 	return false;
+
+      /* Check for extend highpart case.  */
+      if (mode != DImode
+	  || GET_CODE (src) != ASHIFTRT
+	  || GET_CODE (XEXP (src, 0)) != ASHIFT)
+	break;
+
+      src = XEXP (src, 0);
       break;
 
     case SMAX:
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 8c54aa5e981..4db210cc795 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -15292,6 +15292,41 @@  (define_insn "*<insn>qi_ext<mode>_2"
        (const_string "0")
        (const_string "*")))
    (set_attr "mode" "QI")])
+
+(define_insn_and_split "*extend<dwi>2_doubleword_highpart"
+  [(set (match_operand:<DWI> 0 "register_operand" "=r")
+	(ashiftrt:<DWI>
+	  (ashift:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0")
+		        (match_operand:QI 2 "const_int_operand"))
+	  (match_operand:QI 3 "const_int_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "INTVAL (operands[2]) == INTVAL (operands[3])
+   && UINTVAL (operands[2]) < <MODE_SIZE> * BITS_PER_UNIT"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 4)
+		   (ashift:DWIH (match_dup 4) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_dup 4)
+		   (ashiftrt:DWIH (match_dup 4) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[4]);")
+
+(define_insn_and_split "*extendv2di2_highpart_stv"
+  [(set (match_operand:V2DI 0 "register_operand" "=v")
+	(ashiftrt:V2DI
+	  (ashift:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "vm")
+		       (match_operand:QI 2 "const_int_operand"))
+	  (match_operand:QI 3 "const_int_operand")))]
+  "!TARGET_64BIT && TARGET_STV && TARGET_AVX512VL
+   && INTVAL (operands[2]) == INTVAL (operands[3])
+   && UINTVAL (operands[2]) < 32"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(ashift:V2DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(ashiftrt:V2DI (match_dup 0) (match_dup 2)))])
 
 ;; Rotate instructions
 
diff --git a/gcc/testsuite/gcc.target/i386/pr110717.c b/gcc/testsuite/gcc.target/i386/pr110717.c
new file mode 100644
index 00000000000..233f0eae5b5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110717.c
@@ -0,0 +1,21 @@ 
+/* PR target/110717 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#ifdef __SIZEOF_INT128__
+unsigned __int128
+foo (unsigned __int128 x)
+{
+  x <<= 59;
+  return ((__int128) x) >> 59;
+}
+#else
+unsigned long long
+foo (unsigned long long x)
+{
+  x <<= 27;
+  return ((long long) x) >> 27;
+}
+#endif
+
+/* { dg-final { scan-assembler-not "sh\[lr\]d" } } */