From 93499102a52d29974b47e1d32274f6a08a4d6580 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Fri, 6 Aug 2021 12:32:01 -0700
Subject: [PATCH v4] x86: Optimize load of const FP all bits set vectors
Check float_vector_all_ones_operand for vector floating-point modes to
optimize load of const floating-point all bits set vectors.
gcc/
PR target/101804
* config/i386/constraints.md (BC): Document for integer SSE
constant all bits set operand.
(BF): New constraint for const floating-point all bits set
vectors.
* config/i386/i386.c (standard_sse_constant_p): Likewise.
(standard_sse_constant_opcode): Likewise.
* config/i386/sse.md (sseconstm1): New mode attribute.
(mov<mode>_internal): Replace BC with <sseconstm1>.
gcc/testsuite/
PR target/101804
* gcc.target/i386/avx2-gather-2.c: Pass -march=skylake instead
of "-mavx2 -mtune=skylake". Scan vpcmpeqd.
Fix
---
gcc/config/i386/constraints.md | 10 ++++++++--
gcc/config/i386/i386.c | 11 +++++++++--
gcc/config/i386/sse.md | 11 ++++++++++-
gcc/testsuite/gcc.target/i386/avx2-gather-2.c | 3 ++-
4 files changed, 29 insertions(+), 6 deletions(-)
@@ -166,7 +166,8 @@ (define_register_constraint "YW"
;; s Sibcall memory operand, not valid for TARGET_X32
;; w Call memory operand, not valid for TARGET_X32
;; z Constant call address operand.
-;; C SSE constant operand.
+;; C Integer SSE constant with all bits set operand.
+;; F Floating-point SSE constant with all bits set operand.
(define_constraint "Bf"
"@internal Flags register operand."
@@ -216,11 +217,16 @@ (define_constraint "Bz"
(match_operand 0 "constant_call_address_operand"))
(define_constraint "BC"
- "@internal SSE constant -1 operand."
+ "@internal integer SSE constant with all bits set operand."
(and (match_test "TARGET_SSE")
(ior (match_test "op == constm1_rtx")
(match_operand 0 "vector_all_ones_operand"))))
+(define_constraint "BF"
+ "@internal floating-point SSE constant with all bits set operand."
+ (and (match_test "TARGET_SSE")
+ (match_operand 0 "float_vector_all_ones_operand")))
+
;; Integer constant constraints.
(define_constraint "Wb"
"Integer constant in the range 0 @dots{} 7, for 8-bit shifts."
@@ -5073,7 +5073,11 @@ standard_sse_constant_p (rtx x, machine_mode pred_mode)
if (x == const0_rtx || const0_operand (x, mode))
return 1;
- if (x == constm1_rtx || vector_all_ones_operand (x, mode))
+ if (x == constm1_rtx
+ || vector_all_ones_operand (x, mode)
+ || ((GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
+ || GET_MODE_CLASS (pred_mode) == MODE_VECTOR_FLOAT)
+ && float_vector_all_ones_operand (x, mode)))
{
/* VOIDmode integer constant, get mode from the predicate. */
if (mode == VOIDmode)
@@ -5171,7 +5175,10 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
gcc_unreachable ();
}
}
- else if (x == constm1_rtx || vector_all_ones_operand (x, mode))
+ else if (x == constm1_rtx
+ || vector_all_ones_operand (x, mode)
+ || (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
+ && float_vector_all_ones_operand (x, mode)))
{
enum attr_mode insn_mode = get_attr_mode (insn);
@@ -777,6 +777,15 @@ (define_mode_attr sseinsnmode
(V4SF "V4SF") (V2DF "V2DF")
(TI "TI")])
+;; SSE constant -1 constraint
+(define_mode_attr sseconstm1
+ [(V64QI "BC") (V32HI "BC") (V16SI "BC") (V8DI "BC") (V4TI "BC")
+ (V32QI "BC") (V16HI "BC") (V8SI "BC") (V4DI "BC") (V2TI "BC")
+ (V16QI "BC") (V8HI "BC") (V4SI "BC") (V2DI "BC") (V1TI "BC")
+ (V16SF "BF") (V8DF "BF")
+ (V8SF "BF") (V4DF "BF")
+ (V4SF "BF") (V2DF "BF")])
+
;; Mapping of vector modes to corresponding mask size
(define_mode_attr avx512fmaskmode
[(V64QI "DI") (V32QI "SI") (V16QI "HI")
@@ -1056,7 +1065,7 @@ (define_insn "mov<mode>_internal"
[(set (match_operand:VMOVE 0 "nonimmediate_operand"
"=v,v ,v ,m")
(match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"
- " C,BC,vm,v"))]
+ " C,<sseconstm1>,vm,v"))]
"TARGET_SSE
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))"
@@ -1,6 +1,7 @@
/* { dg-do compile } */
-/* { dg-options "-O3 -mavx2 -fdump-tree-vect-details -mtune=skylake" } */
+/* { dg-options "-O3 -fdump-tree-vect-details -march=skylake" } */
#include "avx2-gather-1.c"
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 16 "vect" } } */
+/* { dg-final { scan-assembler "vpcmpeqd" } } */
--
2.31.1