diff mbox

PR target/70708: Suboptimal code generated when using _mm_set_sd (X64)

Message ID 20160418160750.GA31117@intel.com
State New
Headers show

Commit Message

H.J. Lu April 18, 2016, 4:07 p.m. UTC
"movq" should used to load double into xmm register with zero_extend:

(set (reg:V2DF 90)
     (vec_concat:V2DF (reg/v:DF 88 [ d ])
                      (const_double:DF 0.0 [0x0.0p+0])))

Unlike "movsd", which only works with load from memory, "movq" works
with both memory and xmm register.

OK for trunk if there is no regression?

H.J.
--
gcc/

	PR target/70708
	* config/i386/sse.md (sse2_loadlpd): Accept load from "xm" and
	replace %vmovsd with "%vmovq".
	(vec_concatv2df): Likewise.

gcc/testsuite/

	PR target/70708
	* gcc.target/i386/pr70708.c: New test.
---
 gcc/config/i386/sse.md                  | 12 ++++++------
 gcc/testsuite/gcc.target/i386/pr70708.c | 14 ++++++++++++++
 2 files changed, 20 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr70708.c

Comments

Uros Bizjak April 18, 2016, 7:29 p.m. UTC | #1
On Mon, Apr 18, 2016 at 6:07 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
> "movq" should used to load double into xmm register with zero_extend:
>
> (set (reg:V2DF 90)
>      (vec_concat:V2DF (reg/v:DF 88 [ d ])
>                       (const_double:DF 0.0 [0x0.0p+0])))
>
> Unlike "movsd", which only works with load from memory, "movq" works
> with both memory and xmm register.
>
> OK for trunk if there is no regression?

OK.

Thanks,
Uros.

> H.J.
> --
> gcc/
>
>         PR target/70708
>         * config/i386/sse.md (sse2_loadlpd): Accept load from "xm" and
>         replace %vmovsd with "%vmovq".
>         (vec_concatv2df): Likewise.
>
> gcc/testsuite/
>
>         PR target/70708
>         * gcc.target/i386/pr70708.c: New test.
> ---
>  gcc/config/i386/sse.md                  | 12 ++++++------
>  gcc/testsuite/gcc.target/i386/pr70708.c | 14 ++++++++++++++
>  2 files changed, 20 insertions(+), 6 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr70708.c
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 1ffb3b9..845ef56 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -8863,14 +8863,14 @@
>           "=x,x,x,x,x,x,x,x,m,m ,m")
>         (vec_concat:V2DF
>           (match_operand:DF 2 "nonimmediate_operand"
> -         " m,m,m,x,x,0,0,x,x,*f,r")
> +         "xm,m,m,x,x,0,0,x,x,*f,r")
>           (vec_select:DF
>             (match_operand:V2DF 1 "vector_move_operand"
>           " C,0,x,0,x,x,o,o,0,0 ,0")
>             (parallel [(const_int 1)]))))]
>    "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
>    "@
> -   %vmovsd\t{%2, %0|%0, %2}
> +   %vmovq\t{%2, %0|%0, %2}
>     movlpd\t{%2, %0|%0, %2}
>     vmovlpd\t{%2, %1, %0|%0, %1, %2}
>     movsd\t{%2, %0|%0, %2}
> @@ -8955,10 +8955,10 @@
>     (set_attr "mode" "V2DF,DF,DF")])
>
>  (define_insn "vec_concatv2df"
> -  [(set (match_operand:V2DF 0 "register_operand"     "=x,x,v,x,v,x,x,v,x,x")
> +  [(set (match_operand:V2DF 0 "register_operand"     "=x,x,v,x,v,x,x, v,x,x")
>         (vec_concat:V2DF
> -         (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,m,0,0")
> -         (match_operand:DF 2 "vector_move_operand"  " x,x,v,1,1,m,m,C,x,m")))]
> +         (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,xm,0,0")
> +         (match_operand:DF 2 "vector_move_operand"  " x,x,v,1,1,m,m, C,x,m")))]
>    "TARGET_SSE
>     && (!(MEM_P (operands[1]) && MEM_P (operands[2]))
>         || (TARGET_SSE3 && rtx_equal_p (operands[1], operands[2])))"
> @@ -8970,7 +8970,7 @@
>     vmovddup\t{%1, %0|%0, %1}
>     movhpd\t{%2, %0|%0, %2}
>     vmovhpd\t{%2, %1, %0|%0, %1, %2}
> -   %vmovsd\t{%1, %0|%0, %1}
> +   %vmovq\t{%1, %0|%0, %1}
>     movlhps\t{%2, %0|%0, %2}
>     movhps\t{%2, %0|%0, %2}"
>    [(set_attr "isa" "sse2_noavx,avx,avx512vl,sse3,avx512vl,sse2_noavx,avx,sse2,noavx,noavx")
> diff --git a/gcc/testsuite/gcc.target/i386/pr70708.c b/gcc/testsuite/gcc.target/i386/pr70708.c
> new file mode 100644
> index 0000000..2219e61
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr70708.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse2" } */
> +
> +typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
> +
> +__m128d
> +foo (double d)
> +{
> +  return __extension__ (__m128d){ d, 0.0 };
> +}
> +
> +/* { dg-final { scan-assembler-times "movq\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-not "movsd\[ \\t\]+\[^\n\]*%xmm" } } */
> +/* { dg-final { scan-assembler-not "\\(%\[er\]sp\\)" { target { ! ia32 } }} } */
> --
> 2.5.5
>
diff mbox

Patch

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 1ffb3b9..845ef56 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -8863,14 +8863,14 @@ 
 	  "=x,x,x,x,x,x,x,x,m,m ,m")
 	(vec_concat:V2DF
 	  (match_operand:DF 2 "nonimmediate_operand"
-	  " m,m,m,x,x,0,0,x,x,*f,r")
+	  "xm,m,m,x,x,0,0,x,x,*f,r")
 	  (vec_select:DF
 	    (match_operand:V2DF 1 "vector_move_operand"
 	  " C,0,x,0,x,x,o,o,0,0 ,0")
 	    (parallel [(const_int 1)]))))]
   "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "@
-   %vmovsd\t{%2, %0|%0, %2}
+   %vmovq\t{%2, %0|%0, %2}
    movlpd\t{%2, %0|%0, %2}
    vmovlpd\t{%2, %1, %0|%0, %1, %2}
    movsd\t{%2, %0|%0, %2}
@@ -8955,10 +8955,10 @@ 
    (set_attr "mode" "V2DF,DF,DF")])
 
 (define_insn "vec_concatv2df"
-  [(set (match_operand:V2DF 0 "register_operand"     "=x,x,v,x,v,x,x,v,x,x")
+  [(set (match_operand:V2DF 0 "register_operand"     "=x,x,v,x,v,x,x, v,x,x")
 	(vec_concat:V2DF
-	  (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,m,0,0")
-	  (match_operand:DF 2 "vector_move_operand"  " x,x,v,1,1,m,m,C,x,m")))]
+	  (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,xm,0,0")
+	  (match_operand:DF 2 "vector_move_operand"  " x,x,v,1,1,m,m, C,x,m")))]
   "TARGET_SSE
    && (!(MEM_P (operands[1]) && MEM_P (operands[2]))
        || (TARGET_SSE3 && rtx_equal_p (operands[1], operands[2])))"
@@ -8970,7 +8970,7 @@ 
    vmovddup\t{%1, %0|%0, %1}
    movhpd\t{%2, %0|%0, %2}
    vmovhpd\t{%2, %1, %0|%0, %1, %2}
-   %vmovsd\t{%1, %0|%0, %1}
+   %vmovq\t{%1, %0|%0, %1}
    movlhps\t{%2, %0|%0, %2}
    movhps\t{%2, %0|%0, %2}"
   [(set_attr "isa" "sse2_noavx,avx,avx512vl,sse3,avx512vl,sse2_noavx,avx,sse2,noavx,noavx")
diff --git a/gcc/testsuite/gcc.target/i386/pr70708.c b/gcc/testsuite/gcc.target/i386/pr70708.c
new file mode 100644
index 0000000..2219e61
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr70708.c
@@ -0,0 +1,14 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
+
+__m128d
+foo (double d)
+{
+  return __extension__ (__m128d){ d, 0.0 };
+}
+
+/* { dg-final { scan-assembler-times "movq\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-not "movsd\[ \\t\]+\[^\n\]*%xmm" } } */
+/* { dg-final { scan-assembler-not "\\(%\[er\]sp\\)" { target { ! ia32 } }} } */