@@ -803,8 +803,8 @@
;; Used to control the "enabled" attribute on a per-instruction basis.
(define_attr "isa" "base,x64,x64_sse2,x64_sse4,x64_sse4_noavx,x64_avx,nox64,
- sse_noavx,sse2,sse2_noavx,sse3,sse4,sse4_noavx,avx,noavx,
- avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
+ sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx,
+ avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
avx512bw,noavx512bw,avx512dq,noavx512dq,
avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw"
(const_string "base"))
@@ -834,6 +834,8 @@
(eq_attr "isa" "sse2_noavx")
(symbol_ref "TARGET_SSE2 && !TARGET_AVX")
(eq_attr "isa" "sse3") (symbol_ref "TARGET_SSE3")
+ (eq_attr "isa" "sse3_noavx")
+ (symbol_ref "TARGET_SSE3 && !TARGET_AVX")
(eq_attr "isa" "sse4") (symbol_ref "TARGET_SSE4_1")
(eq_attr "isa" "sse4_noavx")
(symbol_ref "TARGET_SSE4_1 && !TARGET_AVX")
@@ -530,43 +530,109 @@
(set_attr "prefix_extra" "1")
(set_attr "mode" "V2SF")])
-(define_insn "mmx_haddv2sf3"
- [(set (match_operand:V2SF 0 "register_operand" "=y")
+(define_expand "mmx_haddv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand")
(vec_concat:V2SF
(plus:SF
(vec_select:SF
- (match_operand:V2SF 1 "register_operand" "0")
- (parallel [(const_int 0)]))
+ (match_operand:V2SF 1 "register_operand")
+ (parallel [(const_int 0)]))
(vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
(plus:SF
- (vec_select:SF
- (match_operand:V2SF 2 "nonimmediate_operand" "ym")
- (parallel [(const_int 0)]))
+ (vec_select:SF
+ (match_operand:V2SF 2 "nonimmediate_operand")
+ (parallel [(const_int 0)]))
(vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
- "TARGET_3DNOW"
- "pfacc\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxadd")
- (set_attr "prefix_extra" "1")
- (set_attr "mode" "V2SF")])
+ "TARGET_3DNOW")
+
+(define_insn "*mmx_haddv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y,x,x")
+ (vec_concat:V2SF
+ (plus:SF
+ (vec_select:SF
+ (match_operand:V2SF 1 "register_operand" "0,0,x")
+ (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
+ (vec_select:SF (match_dup 1)
+ (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
+ (plus:SF
+ (vec_select:SF
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym,x,x")
+ (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
+ (vec_select:SF (match_dup 2)
+ (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
+ "TARGET_3DNOW
+ && INTVAL (operands[3]) != INTVAL (operands[4])
+ && INTVAL (operands[5]) != INTVAL (operands[6])"
+ "@
+ pfacc\t{%2, %0|%0, %2}
+ haddps\t{%2, %0|%0, %2}
+ vhaddps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,sse3_noavx,avx")
+ (set_attr "type" "mmxadd,sseadd,sseadd")
+ (set_attr "prefix_extra" "1,*,*")
+ (set_attr "prefix" "*,orig,vex")
+ (set_attr "mode" "V2SF,V4SF,V4SF")])
+
+(define_insn "*mmx_haddv2sf3_low"
+ [(set (match_operand:SF 0 "register_operand" "=x,x")
+ (plus:SF
+ (vec_select:SF
+ (match_operand:V2SF 1 "register_operand" "0,x")
+ (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
+ (vec_select:SF
+ (match_dup 1)
+ (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
+ "TARGET_MMX_WITH_SSE && TARGET_SSE3
+ && INTVAL (operands[2]) != INTVAL (operands[3])"
+ "@
+ haddps\t{%0, %0|%0, %0}
+ vhaddps\t{%1, %1, %0|%0, %1, %1}"
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "sseadd1")
+ (set_attr "prefix" "orig,vex")
+ (set_attr "mode" "V4SF")])
(define_insn "mmx_hsubv2sf3"
- [(set (match_operand:V2SF 0 "register_operand" "=y")
+ [(set (match_operand:V2SF 0 "register_operand" "=y,x,x")
(vec_concat:V2SF
(minus:SF
(vec_select:SF
- (match_operand:V2SF 1 "register_operand" "0")
+ (match_operand:V2SF 1 "register_operand" "0,0,x")
(parallel [(const_int 0)]))
(vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
(minus:SF
(vec_select:SF
- (match_operand:V2SF 2 "nonimmediate_operand" "ym")
+ (match_operand:V2SF 2 "register_mmxmem_operand" "ym,x,x")
(parallel [(const_int 0)]))
(vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
"TARGET_3DNOW_A"
- "pfnacc\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxadd")
- (set_attr "prefix_extra" "1")
- (set_attr "mode" "V2SF")])
+ "@
+ pfnacc\t{%2, %0|%0, %2}
+ hsubps\t{%2, %0|%0, %2}
+ vhsubps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,sse3_noavx,avx")
+ (set_attr "type" "mmxadd,sseadd,sseadd")
+ (set_attr "prefix_extra" "1,*,*")
+ (set_attr "prefix" "*,orig,vex")
+ (set_attr "mode" "V2SF,V4SF,V4SF")])
+
+(define_insn "*mmx_hsubv2sf3_low"
+ [(set (match_operand:SF 0 "register_operand" "=x,x")
+ (minus:SF
+ (vec_select:SF
+ (match_operand:V2SF 1 "register_operand" "0,x")
+ (parallel [(const_int 0)]))
+ (vec_select:SF
+ (match_dup 1)
+ (parallel [(const_int 1)]))))]
+ "TARGET_MMX_WITH_SSE && TARGET_SSE3"
+ "@
+ hsubps\t{%0, %0|%0, %0}
+ vhsubps\t{%1, %1, %0|%0, %1, %1}"
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "sseadd1")
+ (set_attr "prefix" "orig,vex")
+ (set_attr "mode" "V4SF")])
(define_insn "mmx_addsubv2sf3"
[(set (match_operand:V2SF 0 "register_operand" "=y")
new file mode 100644
@@ -0,0 +1,22 @@
+/* PR target/95046 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse3" } */
+
+
+typedef float __v2sf __attribute__ ((__vector_size__ (8)));
+
+float
+foo (__v2sf a)
+{
+ return a[0] + a[1];
+}
+
+/* { dg-final { scan-assembler "\tv?haddps" } } */
+
+float
+bar (__v2sf a)
+{
+ return a[0] - a[1];
+}
+
+/* { dg-final { scan-assembler "\tv?hsubps" } } */