diff mbox series

[6/7,x86] Optimize a < 0 ? -1 : 0 to (signed)a >> 31.

Message ID 20240627082307.1166985-7-hongtao.liu@intel.com
State New
Headers show
Series Remove vcond{,u,eq}<mode> expanders. | expand

Commit Message

liuhongt June 27, 2024, 8:23 a.m. UTC
Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
and x < 0 ? 1 : 0 into (unsigned) x >> 31.

Add define_insn_and_split for the optimization did in
ix86_expand_int_vcond.

gcc/ChangeLog:

	PR target/115517
	* config/i386/sse.md ("*ashr<mode>3_1"): New
	define_insn_and_split.
	(*avx512_ashr<mode>3_1): Ditto.
	(*avx2_lshr<mode>3_1): Ditto.
	(*avx2_lshr<mode>3_2): Ditto and add 2 combine splitter after
	it.
	* config/i386/mmx.md (mmxscalarsize): New mode attribute.
	(*mmw_ashr<mode>3_1): New define_insn_and_split.
	("mmx_<insn><mode>3): Add a combine spiltter after it.
	(*mmx_ashrv2hi3_1): New define_insn_and_plit, also add a
	combine splitter after it.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/avx2-pr115517.c: New test.
	* gcc.target/i386/avx512-pr115517.c: New test.
	* g++.target/i386/avx2-pr115517.C: New test.
	* g++.target/i386/avx512-pr115517.C: New test.
	* gcc.target/i386/pr111023-2.c: Adjust testcase.
	* gcc.target/i386/vect-div-1.c: Ditto.
---
 gcc/config/i386/mmx.md                        | 52 ++++++++++++
 gcc/config/i386/sse.md                        | 83 +++++++++++++++++++
 gcc/testsuite/g++.target/i386/avx2-pr115517.C | 60 ++++++++++++++
 .../g++.target/i386/avx512-pr115517.C         | 70 ++++++++++++++++
 gcc/testsuite/gcc.target/i386/avx2-pr115517.c | 33 ++++++++
 .../gcc.target/i386/avx512-pr115517.c         | 70 ++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr111023-2.c    |  4 +-
 gcc/testsuite/gcc.target/i386/vect-div-1.c    |  3 +-
 8 files changed, 372 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/i386/avx2-pr115517.C
 create mode 100644 gcc/testsuite/g++.target/i386/avx512-pr115517.C
 create mode 100644 gcc/testsuite/gcc.target/i386/avx2-pr115517.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512-pr115517.c
diff mbox series

Patch

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index ea53f516cbb..7262bf146c2 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -135,6 +135,14 @@  (define_mode_attr mmxscalarmodelower
    (V4HI "hi") (V2HI "hi")
    (V8QI "qi")])
 
+(define_mode_attr mmxscalarsize
+  [(V1DI "64")
+   (V2SI "32") (V2SF "32")
+   (V4HF "16") (V4BF "16")
+   (V2HF "16") (V2BF "16")
+   (V4HI "16") (V2HI "16")
+   (V8QI "8")])
+
 (define_mode_attr Yv_Yw
   [(V8QI "Yw") (V4HI "Yw") (V2SI "Yv") (V1DI "Yv") (V2SF "Yv")])
 
@@ -3608,6 +3616,17 @@  (define_insn "mmx_ashr<mode>3"
        (const_string "0")))
    (set_attr "mode" "DI,TI,TI")])
 
+(define_insn_and_split "*mmx_ashr<mode>3_1"
+  [(set (match_operand:MMXMODE24 0 "register_operand")
+	(lt:MMXMODE24
+	  (match_operand:MMXMODE24 1 "register_operand")
+	  (match_operand:MMXMODE24 2 "const0_operand")))]
+  "TARGET_MMX_WITH_SSE && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (ashiftrt:MMXMODE24 (match_dup 1) (match_dup 3)))]
+  "operands[3] = gen_int_mode (<mmxscalarsize> - 1, DImode);")
+
 (define_expand "ashr<mode>3"
   [(set (match_operand:MMXMODE24 0 "register_operand")
         (ashiftrt:MMXMODE24
@@ -3634,6 +3653,17 @@  (define_insn "mmx_<insn><mode>3"
        (const_string "0")))
    (set_attr "mode" "DI,TI,TI")])
 
+(define_split
+  [(set (match_operand:MMXMODE248 0 "register_operand")
+  	(and:MMXMODE248
+	  (lt:MMXMODE248
+	    (match_operand:MMXMODE248 1 "register_operand")
+	    (match_operand:MMXMODE248 2 "const0_operand"))
+	  (match_operand:MMXMODE248 3 "const1_operand")))]
+  "TARGET_MMX_WITH_SSE && ix86_pre_reload_split ()"
+  [(set (match_dup 0) (lshiftrt:MMXMODE248 (match_dup 1) (match_dup 4)))]
+  "operands[4] = gen_int_mode (<mmxscalarsize> - 1, DImode);")
+
 (define_expand "<insn><mode>3"
   [(set (match_operand:MMXMODE24 0 "register_operand")
         (any_lshift:MMXMODE24
@@ -3675,6 +3705,28 @@  (define_insn "<insn>v2hi3"
        (const_string "0")))
    (set_attr "mode" "TI")])
 
+(define_insn_and_split "*mmx_ashrv2hi3_1"
+  [(set (match_operand:V2HI 0 "register_operand")
+	(lt:V2HI
+	  (match_operand:V2HI 1 "register_operand")
+	  (match_operand:V2HI 2 "const0_operand")))]
+  "TARGET_SSE2 && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (ashiftrt:V2HI (match_dup 1) (match_dup 3)))]
+  "operands[3] = gen_int_mode (15, DImode);")
+
+(define_split
+  [(set (match_operand:V2HI 0 "register_operand")
+  	(and:V2HI
+	  (lt:V2HI
+	    (match_operand:V2HI 1 "register_operand")
+	    (match_operand:V2HI 2 "const0_operand"))
+	  (match_operand:V2HI 3 "const1_operand")))]
+  "TARGET_SSE2 && ix86_pre_reload_split ()"
+  [(set (match_dup 0) (lshiftrt:V2HI (match_dup 1) (match_dup 4)))]
+  "operands[4] = gen_int_mode (15, DImode);")
+
 (define_expand "<insn>v8qi3"
   [(set (match_operand:V8QI 0 "register_operand")
 	(any_shift:V8QI (match_operand:V8QI 1 "register_operand")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 5996ad99606..d86b6fa81c0 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -16860,6 +16860,17 @@  (define_insn "ashr<mode>3"
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn_and_split "*ashr<mode>3_1"
+  [(set (match_operand:VI24_AVX2 0 "register_operand")
+	(lt:VI24_AVX2
+	  (match_operand:VI24_AVX2 1 "register_operand")
+	  (match_operand:VI24_AVX2 2 "const0_operand")))]
+  "TARGET_SSE2 && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (ashiftrt:VI24_AVX2 (match_dup 1) (match_dup 3)))]
+  "operands[3] = gen_int_mode (<ssescalarsize> - 1, DImode);")
+
 (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
   [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
 	(ashiftrt:VI248_AVX512BW_AVX512VL
@@ -16874,6 +16885,23 @@  (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
        (const_string "0")))
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn_and_split "*avx512_ashr<mode>3_1"
+ [(set (match_operand:VI248_AVX512VLBW  0 "register_operand")
+	(vec_merge:VI248_AVX512VLBW
+	  (match_operand:VI248_AVX512VLBW 1 "vector_all_ones_operand")
+	  (match_operand:VI248_AVX512VLBW 2 "const0_operand")
+	  (unspec:<avx512fmaskmode>
+	    [(match_operand:VI248_AVX512VLBW 3 "nonimmediate_operand")
+	     (match_operand:VI248_AVX512VLBW 4 "const0_operand")
+	     (const_int 1)]
+	     UNSPEC_PCMP)))]
+  "TARGET_AVX512F && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(ashiftrt:VI248_AVX512VLBW (match_dup 3) (match_dup 5)))]
+  "operands[5] = gen_int_mode (<ssescalarsize> - 1, DImode);")
+
 (define_expand "ashr<mode>3"
   [(set (match_operand:VI248_AVX512BW 0 "register_operand")
 	(ashiftrt:VI248_AVX512BW
@@ -17028,6 +17056,61 @@  (define_insn "<insn><mode>3"
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn_and_split "*avx2_lshr<mode>3_1"
+  [(set (match_operand:VI8_AVX2 0 "register_operand")
+  	(and:VI8_AVX2
+	  (gt:VI8_AVX2
+	    (match_operand:VI8_AVX2 1 "register_operand")
+	    (match_operand:VI8_AVX2 2 "register_operand"))
+	  (match_operand:VI8_AVX2 3 "const1_operand")))]
+  "TARGET_SSE4_2 && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 5) (gt:VI8_AVX2 (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (lshiftrt:VI8_AVX2 (match_dup 5) (match_dup 4)))]
+{
+  operands[4] = gen_int_mode (<ssescalarsize> - 1, DImode);
+  operands[5] = gen_reg_rtx (<MODE>mode);
+})
+
+(define_insn_and_split "*avx2_lshr<mode>3_2"
+  [(set (match_operand:VI8_AVX2 0 "register_operand")
+  	(and:VI8_AVX2
+	  (lt:VI8_AVX2
+	    (match_operand:VI8_AVX2 1 "register_operand")
+	    (match_operand:VI8_AVX2 2 "const0_operand"))
+	  (match_operand:VI8_AVX2 3 "const1_operand")))]
+  "TARGET_SSE2 && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (lshiftrt:VI8_AVX2 (match_dup 1) (const_int 63)))])
+
+(define_split
+  [(set (match_operand:VI248_AVX2 0 "register_operand")
+  	(and:VI248_AVX2
+	  (lt:VI248_AVX2
+	    (match_operand:VI248_AVX2 1 "register_operand")
+	    (match_operand:VI248_AVX2 2 "const0_operand"))
+	  (match_operand:VI248_AVX2 3 "const1_operand")))]
+  "TARGET_SSE2 && ix86_pre_reload_split ()"
+  [(set (match_dup 0) (lshiftrt:VI248_AVX2 (match_dup 1) (match_dup 4)))]
+  "operands[4] = gen_int_mode (<ssescalarsize> - 1, DImode);")
+
+(define_split
+ [(set (match_operand:VI248_AVX512VLBW  0 "register_operand")
+	(vec_merge:VI248_AVX512VLBW
+	  (match_operand:VI248_AVX512VLBW 1 "const1_operand")
+	  (match_operand:VI248_AVX512VLBW 2 "const0_operand")
+	  (unspec:<avx512fmaskmode>
+	    [(match_operand:VI248_AVX512VLBW 3 "nonimmediate_operand")
+	     (match_operand:VI248_AVX512VLBW 4 "const0_operand")
+	     (const_int 1)]
+	     UNSPEC_PCMP)))]
+  "TARGET_AVX512F && ix86_pre_reload_split ()"
+  [(set (match_dup 0)
+	(lshiftrt:VI248_AVX512VLBW (match_dup 3) (match_dup 5)))]
+  "operands[5] = gen_int_mode (<ssescalarsize> - 1, DImode);")
+
 (define_insn "<insn><mode>3<mask_name>"
   [(set (match_operand:VI248_AVX512BW 0 "register_operand" "=v,v")
 	(any_lshift:VI248_AVX512BW
diff --git a/gcc/testsuite/g++.target/i386/avx2-pr115517.C b/gcc/testsuite/g++.target/i386/avx2-pr115517.C
new file mode 100644
index 00000000000..ec000c57542
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/avx2-pr115517.C
@@ -0,0 +1,60 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O2" } */
+/* { dg-final { scan-assembler-times "vpsrlq" 2 } } */
+/* { dg-final { scan-assembler-times "vpsrld" 2 } } */
+/* { dg-final { scan-assembler-times "vpsrlw" 2 } } */
+
+typedef short v8hi __attribute__((vector_size(16)));
+typedef short v16hi __attribute__((vector_size(32)));
+typedef int v4si __attribute__((vector_size(16)));
+typedef int v8si __attribute__((vector_size(32)));
+typedef long long v2di __attribute__((vector_size(16)));
+typedef long long v4di __attribute__((vector_size(32)));
+
+v8hi
+foo (v8hi a)
+{
+  v8hi const1_op = __extension__(v8hi){1,1,1,1,1,1,1,1};
+  v8hi const0_op = __extension__(v8hi){0,0,0,0,0,0,0,0};
+  return a < const0_op ? const1_op : const0_op;
+}
+
+v16hi
+foo2 (v16hi a)
+{
+  v16hi const1_op = __extension__(v16hi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
+  v16hi const0_op = __extension__(v16hi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+  return a < const0_op ? const1_op : const0_op;
+}
+
+v4si
+foo3 (v4si a)
+{
+  v4si const1_op = __extension__(v4si){1,1,1,1};
+  v4si const0_op = __extension__(v4si){0,0,0,0};
+  return a < const0_op ? const1_op : const0_op;
+}
+
+v8si
+foo4 (v8si a)
+{
+  v8si const1_op = __extension__(v8si){1,1,1,1,1,1,1,1};
+  v8si const0_op = __extension__(v8si){0,0,0,0,0,0,0,0};
+  return a < const0_op ? const1_op : const0_op;
+}
+
+v2di
+foo3 (v2di a)
+{
+  v2di const1_op = __extension__(v2di){1,1};
+  v2di const0_op = __extension__(v2di){0,0};
+  return a < const0_op ? const1_op : const0_op;
+}
+
+v4di
+foo4 (v4di a)
+{
+  v4di const1_op = __extension__(v4di){1,1,1,1};
+  v4di const0_op = __extension__(v4di){0,0,0,0};
+  return a < const0_op ? const1_op : const0_op;
+}
diff --git a/gcc/testsuite/g++.target/i386/avx512-pr115517.C b/gcc/testsuite/g++.target/i386/avx512-pr115517.C
new file mode 100644
index 00000000000..22df41bbdc9
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/avx512-pr115517.C
@@ -0,0 +1,70 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpsrad" 3 } } */
+/* { dg-final { scan-assembler-times "vpsraw" 3 } } */
+/* { dg-final { scan-assembler-times "vpsraq" 3 } } */
+
+typedef short v8hi __attribute__((vector_size(16)));
+typedef short v16hi __attribute__((vector_size(32)));
+typedef short v32hi __attribute__((vector_size(64)));
+typedef int v4si __attribute__((vector_size(16)));
+typedef int v8si __attribute__((vector_size(32)));
+typedef int v16si __attribute__((vector_size(64)));
+typedef long long v2di __attribute__((vector_size(16)));
+typedef long long v4di __attribute__((vector_size(32)));
+typedef long long v8di __attribute__((vector_size(64)));
+
+v8hi
+foo (v8hi a)
+{
+  return a < __extension__(v8hi) { 0, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v16hi
+foo2 (v16hi a)
+{
+  return a < __extension__(v16hi) { 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v32hi
+foo3 (v32hi a)
+{
+  return a < __extension__(v32hi) { 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v4si
+foo4 (v4si a)
+{
+  return a < __extension__(v4si) { 0, 0, 0, 0};
+}
+
+v8si
+foo5 (v8si a)
+{
+  return a < __extension__(v8si) { 0, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v16si
+foo6 (v16si a)
+{
+  return a < __extension__(v16si) { 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v2di
+foo7 (v2di a)
+{
+  return a < __extension__(v2di) { 0, 0};
+}
+
+v4di
+foo8 (v4di a)
+{
+  return a < __extension__(v4di) { 0, 0, 0, 0};
+}
+
+v8di
+foo9 (v8di a)
+{
+  return a < __extension__(v8di) { 0, 0, 0, 0, 0, 0, 0, 0};
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr115517.c b/gcc/testsuite/gcc.target/i386/avx2-pr115517.c
new file mode 100644
index 00000000000..5b2620b0dc1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-pr115517.c
@@ -0,0 +1,33 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O2" } */
+/* { dg-final { scan-assembler-times "vpsrad" 2 } } */
+/* { dg-final { scan-assembler-times "vpsraw" 2 } } */
+
+typedef short v8hi __attribute__((vector_size(16)));
+typedef short v16hi __attribute__((vector_size(32)));
+typedef int v4si __attribute__((vector_size(16)));
+typedef int v8si __attribute__((vector_size(32)));
+
+v8hi
+foo (v8hi a)
+{
+  return a < __extension__(v8hi) { 0, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v16hi
+foo2 (v16hi a)
+{
+  return a < __extension__(v16hi) { 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v4si
+foo3 (v4si a)
+{
+  return a < __extension__(v4si) { 0, 0, 0, 0};
+}
+
+v8si
+foo4 (v8si a)
+{
+  return a < __extension__(v8si) { 0, 0, 0, 0, 0, 0, 0, 0};
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512-pr115517.c b/gcc/testsuite/gcc.target/i386/avx512-pr115517.c
new file mode 100644
index 00000000000..22df41bbdc9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512-pr115517.c
@@ -0,0 +1,70 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpsrad" 3 } } */
+/* { dg-final { scan-assembler-times "vpsraw" 3 } } */
+/* { dg-final { scan-assembler-times "vpsraq" 3 } } */
+
+typedef short v8hi __attribute__((vector_size(16)));
+typedef short v16hi __attribute__((vector_size(32)));
+typedef short v32hi __attribute__((vector_size(64)));
+typedef int v4si __attribute__((vector_size(16)));
+typedef int v8si __attribute__((vector_size(32)));
+typedef int v16si __attribute__((vector_size(64)));
+typedef long long v2di __attribute__((vector_size(16)));
+typedef long long v4di __attribute__((vector_size(32)));
+typedef long long v8di __attribute__((vector_size(64)));
+
+v8hi
+foo (v8hi a)
+{
+  return a < __extension__(v8hi) { 0, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v16hi
+foo2 (v16hi a)
+{
+  return a < __extension__(v16hi) { 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v32hi
+foo3 (v32hi a)
+{
+  return a < __extension__(v32hi) { 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v4si
+foo4 (v4si a)
+{
+  return a < __extension__(v4si) { 0, 0, 0, 0};
+}
+
+v8si
+foo5 (v8si a)
+{
+  return a < __extension__(v8si) { 0, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v16si
+foo6 (v16si a)
+{
+  return a < __extension__(v16si) { 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v2di
+foo7 (v2di a)
+{
+  return a < __extension__(v2di) { 0, 0};
+}
+
+v4di
+foo8 (v4di a)
+{
+  return a < __extension__(v4di) { 0, 0, 0, 0};
+}
+
+v8di
+foo9 (v8di a)
+{
+  return a < __extension__(v8di) { 0, 0, 0, 0, 0, 0, 0, 0};
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr111023-2.c b/gcc/testsuite/gcc.target/i386/pr111023-2.c
index 6c69f947544..ba52959b357 100644
--- a/gcc/testsuite/gcc.target/i386/pr111023-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr111023-2.c
@@ -36,7 +36,7 @@  v4si_v4hi (v4si *dst, v8hi src)
   dst[0] = *(v4si *) tem;
 }
 
-/* { dg-final { scan-assembler "pcmpgtw" } } */
+/* { dg-final { scan-assembler "(?:pcmpgtw|psraw)" } } */
 /* { dg-final { scan-assembler "punpcklwd" } } */
 
 void
@@ -48,5 +48,5 @@  v2di_v2si (v2di *dst, v4si src)
   dst[0] = *(v2di *) tem;
 }
 
-/* { dg-final { scan-assembler "pcmpgtd" } } */
+/* { dg-final { scan-assembler "(?:pcmpgtd|psrad)" } } */
 /* { dg-final { scan-assembler "punpckldq" } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-div-1.c b/gcc/testsuite/gcc.target/i386/vect-div-1.c
index f611088d8df..6d911290e06 100644
--- a/gcc/testsuite/gcc.target/i386/vect-div-1.c
+++ b/gcc/testsuite/gcc.target/i386/vect-div-1.c
@@ -40,4 +40,5 @@  f4 (int x)
    is always non-negative, so there is no need to do >> 31 shift
    etc. to check if it is.  And in f3 and f4, VRP can prove it is always
    negative.  */
-/* { dg-final { scan-assembler-not "psrad\[^\n\r\]*\\\$31" } } */
+/* Now (lt:v4si op1 const0_operand) is optimized to psrad, there're 20 of them.  */
+/* { dg-final { scan-assembler-times "psrad\[^\n\r\]*\\\$31" 20 } } */