diff mbox series

[v1] RISC-V: Bugfix vfmv insn honor zvfhmin for FP16 SEW [PR115763]

Message ID 20240703141713.1425590-1-pan2.li@intel.com
State New
Headers show
Series [v1] RISC-V: Bugfix vfmv insn honor zvfhmin for FP16 SEW [PR115763] | expand

Commit Message

Li, Pan2 July 3, 2024, 2:17 p.m. UTC
From: Pan Li <pan2.li@intel.com>

According to the ISA,  the zvfhmin sub extension should only contain
convertion insn.  Thus,  the vfmv insn acts on FP16 should not be
present when only the zvfhmin option is given.

This patch would like to fix it by split the pred_broadcast define_insn
into zvfhmin and zvfh part.  Given below example:

void test (_Float16 *dest, _Float16 bias) {
  dest[0] = bias;
  dest[1] = bias;
}

when compile with -march=rv64gcv_zfh_zvfhmin

Before this patch:
test:
  vsetivli        zero,2,e16,mf4,ta,ma
  vfmv.v.f        v1,fa0 // should not leverage vfmv for zvfhmin
  vse16.v v1,0(a0)
  ret

After this patch:
test:
  addi     sp,sp,-16
  fsh      fa0,14(sp)
  addi     a5,sp,14
  vsetivli zero,2,e16,mf4,ta,ma
  vlse16.v v1,0(a5),zero
  vse16.v  v1,0(a0)
  addi     sp,sp,16
  jr       ra

	PR target/115763

gcc/ChangeLog:

	* config/riscv/vector.md (*pred_broadcast<mode>): Split into
	zvfh and zvfhmin part.
	(*pred_broadcast<mode>_zvfh): New define_insn for zvfh part.
	(*pred_broadcast<mode>_zvfhmin): Ditto but for zvfhmin.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/base/scalar_move-5.c: Adjust asm check.
	* gcc.target/riscv/rvv/base/scalar_move-6.c: Ditto.
	* gcc.target/riscv/rvv/base/scalar_move-7.c: Ditto.
	* gcc.target/riscv/rvv/base/scalar_move-8.c: Ditto.
	* gcc.target/riscv/rvv/base/pr115763-1.c: New test.
	* gcc.target/riscv/rvv/base/pr115763-2.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/config/riscv/vector.md                    | 49 +++++++++++++------
 .../gcc.target/riscv/rvv/base/pr115763-1.c    |  9 ++++
 .../gcc.target/riscv/rvv/base/pr115763-2.c    | 10 ++++
 .../gcc.target/riscv/rvv/base/scalar_move-5.c |  4 +-
 .../gcc.target/riscv/rvv/base/scalar_move-6.c |  6 +--
 .../gcc.target/riscv/rvv/base/scalar_move-7.c |  6 +--
 .../gcc.target/riscv/rvv/base/scalar_move-8.c |  6 +--
 7 files changed, 64 insertions(+), 26 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-2.c

Comments

钟居哲 July 3, 2024, 2:18 p.m. UTC | #1
LGTM。



juzhe.zhong@rivai.ai
 
From: pan2.li
Date: 2024-07-03 22:17
To: gcc-patches
CC: juzhe.zhong; kito.cheng; jeffreyalaw; rdapp.gcc; Pan Li
Subject: [PATCH v1] RISC-V: Bugfix vfmv insn honor zvfhmin for FP16 SEW [PR115763]
From: Pan Li <pan2.li@intel.com>
 
According to the ISA,  the zvfhmin sub extension should only contain
convertion insn.  Thus,  the vfmv insn acts on FP16 should not be
present when only the zvfhmin option is given.
 
This patch would like to fix it by split the pred_broadcast define_insn
into zvfhmin and zvfh part.  Given below example:
 
void test (_Float16 *dest, _Float16 bias) {
  dest[0] = bias;
  dest[1] = bias;
}
 
when compile with -march=rv64gcv_zfh_zvfhmin
 
Before this patch:
test:
  vsetivli        zero,2,e16,mf4,ta,ma
  vfmv.v.f        v1,fa0 // should not leverage vfmv for zvfhmin
  vse16.v v1,0(a0)
  ret
 
After this patch:
test:
  addi     sp,sp,-16
  fsh      fa0,14(sp)
  addi     a5,sp,14
  vsetivli zero,2,e16,mf4,ta,ma
  vlse16.v v1,0(a5),zero
  vse16.v  v1,0(a0)
  addi     sp,sp,16
  jr       ra
 
PR target/115763
 
gcc/ChangeLog:
 
* config/riscv/vector.md (*pred_broadcast<mode>): Split into
zvfh and zvfhmin part.
(*pred_broadcast<mode>_zvfh): New define_insn for zvfh part.
(*pred_broadcast<mode>_zvfhmin): Ditto but for zvfhmin.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/base/scalar_move-5.c: Adjust asm check.
* gcc.target/riscv/rvv/base/scalar_move-6.c: Ditto.
* gcc.target/riscv/rvv/base/scalar_move-7.c: Ditto.
* gcc.target/riscv/rvv/base/scalar_move-8.c: Ditto.
* gcc.target/riscv/rvv/base/pr115763-1.c: New test.
* gcc.target/riscv/rvv/base/pr115763-2.c: New test.
 
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/config/riscv/vector.md                    | 49 +++++++++++++------
.../gcc.target/riscv/rvv/base/pr115763-1.c    |  9 ++++
.../gcc.target/riscv/rvv/base/pr115763-2.c    | 10 ++++
.../gcc.target/riscv/rvv/base/scalar_move-5.c |  4 +-
.../gcc.target/riscv/rvv/base/scalar_move-6.c |  6 +--
.../gcc.target/riscv/rvv/base/scalar_move-7.c |  6 +--
.../gcc.target/riscv/rvv/base/scalar_move-8.c |  6 +--
7 files changed, 64 insertions(+), 26 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-2.c
 
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index fe18ee5b5f7..d9474262d54 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -2080,31 +2080,50 @@ (define_insn_and_split "*pred_broadcast<mode>"
   [(set_attr "type" "vimov,vimov,vlds,vlds,vlds,vlds,vimovxv,vimovxv")
    (set_attr "mode" "<MODE>")])
-(define_insn "*pred_broadcast<mode>"
-  [(set (match_operand:V_VLSF_ZVFHMIN 0 "register_operand"         "=vr, vr, vr, vr, vr, vr, vr, vr")
- (if_then_else:V_VLSF_ZVFHMIN
+(define_insn "*pred_broadcast<mode>_zvfh"
+  [(set (match_operand:V_VLSF    0 "register_operand"              "=vr,  vr,  vr,  vr")
+ (if_then_else:V_VLSF
  (unspec:<VM>
-     [(match_operand:<VM> 1 "vector_broadcast_mask_operand" "Wc1,Wc1, vm, vm,Wc1,Wc1,Wb1,Wb1")
-      (match_operand 4 "vector_length_operand"              " rK, rK, rK, rK, rK, rK, rK, rK")
-      (match_operand 5 "const_int_operand"                  "  i,  i,  i,  i,  i,  i,  i,  i")
-      (match_operand 6 "const_int_operand"                  "  i,  i,  i,  i,  i,  i,  i,  i")
-      (match_operand 7 "const_int_operand"                  "  i,  i,  i,  i,  i,  i,  i,  i")
+     [(match_operand:<VM> 1 "vector_broadcast_mask_operand" "Wc1, Wc1, Wb1, Wb1")
+      (match_operand      4 "vector_length_operand"         " rK,  rK,  rK,  rK")
+      (match_operand      5 "const_int_operand"             "  i,   i,   i,   i")
+      (match_operand      6 "const_int_operand"             "  i,   i,   i,   i")
+      (match_operand      7 "const_int_operand"             "  i,   i,   i,   i")
     (reg:SI VL_REGNUM)
     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
-   (vec_duplicate:V_VLSF_ZVFHMIN
-     (match_operand:<VEL> 3 "direct_broadcast_operand"       " f,  f,Wdm,Wdm,Wdm,Wdm,  f,  f"))
-   (match_operand:V_VLSF_ZVFHMIN 2 "vector_merge_operand"    "vu,  0, vu,  0, vu,  0, vu,  0")))]
+   (vec_duplicate:V_VLSF
+     (match_operand:<VEL> 3 "direct_broadcast_operand"      "  f,   f,   f,   f"))
+   (match_operand:V_VLSF  2 "vector_merge_operand"          " vu,   0,  vu,   0")))]
   "TARGET_VECTOR"
   "@
    vfmv.v.f\t%0,%3
    vfmv.v.f\t%0,%3
+   vfmv.s.f\t%0,%3
+   vfmv.s.f\t%0,%3"
+  [(set_attr "type" "vfmov,vfmov,vfmovfv,vfmovfv")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*pred_broadcast<mode>_zvfhmin"
+  [(set (match_operand:V_VLSF_ZVFHMIN   0 "register_operand"              "=vr,  vr,  vr,  vr")
+ (if_then_else:V_VLSF_ZVFHMIN
+   (unspec:<VM>
+     [(match_operand:<VM>        1 "vector_broadcast_mask_operand" " vm,  vm, Wc1, Wc1")
+      (match_operand             4 "vector_length_operand"         " rK,  rK,  rK,  rK")
+      (match_operand             5 "const_int_operand"             "  i,   i,   i,   i")
+      (match_operand             6 "const_int_operand"             "  i,   i,   i,   i")
+      (match_operand             7 "const_int_operand"             "  i,   i,   i,   i")
+      (reg:SI VL_REGNUM)
+      (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+   (vec_duplicate:V_VLSF_ZVFHMIN
+     (match_operand:<VEL>        3 "direct_broadcast_operand"      "Wdm, Wdm, Wdm, Wdm"))
+   (match_operand:V_VLSF_ZVFHMIN 2 "vector_merge_operand"          " vu,   0,  vu,   0")))]
+  "TARGET_VECTOR"
+  "@
    vlse<sew>.v\t%0,%3,zero,%1.t
    vlse<sew>.v\t%0,%3,zero,%1.t
    vlse<sew>.v\t%0,%3,zero
-   vlse<sew>.v\t%0,%3,zero
-   vfmv.s.f\t%0,%3
-   vfmv.s.f\t%0,%3"
-  [(set_attr "type" "vfmov,vfmov,vlds,vlds,vlds,vlds,vfmovfv,vfmovfv")
+   vlse<sew>.v\t%0,%3,zero"
+  [(set_attr "type" "vlds,vlds,vlds,vlds")
    (set_attr "mode" "<MODE>")])
(define_insn "*pred_broadcast<mode>_extended_scalar"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-1.c
new file mode 100644
index 00000000000..3b0b0046041
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zfh_zvfh -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model" } */
+
+void test (_Float16 *dest, _Float16 bias) {
+  dest[0] = bias;
+  dest[1] = bias;
+}
+
+/* { dg-final { scan-assembler-times {vfmv\.v\.f\s+v[0-9]+,\s*fa[0-9]+} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-2.c
new file mode 100644
index 00000000000..f4d53e72022
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-2.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zfh_zvfhmin -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model" } */
+
+void test (_Float16 *dest, _Float16 bias) {
+  dest[0] = bias;
+  dest[1] = bias;
+}
+
+/* { dg-final { scan-assembler-times {fsh\s+fa[0-9]+,[0-9]+\(sp\)} 1 } } */
+/* { dg-final { scan-assembler-not {vfmv\.v\.x\s+v[0-9]+,\s*fa[0-9]+} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-5.c b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-5.c
index 2e897a4896f..04dec7bc8dc 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-5.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-5.c
@@ -21,9 +21,9 @@ void foo (void *base, void *out, size_t vl)
/*
** foo2:
-** addi\t[a-x0-9]+,\s*[a-x0-9]+,100
+** fld\tfa[0-9]+,\s*100\(a0\)
** vsetvli\tzero,a2,e64,m2,t[au],m[au]
-** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** vfmv\.v\.f\tv[0-9]+,\s*fa[0-9]+
** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
** ret
*/
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-6.c b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-6.c
index 326cfd8e2ff..0ebb92eda42 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-6.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-6.c
@@ -21,9 +21,9 @@ void foo (void *base, void *out, size_t vl)
/*
** foo2:
-** addi\t[a-x0-9]+,\s*[a-x0-9]+,100
+** fld\tfa[0-9]+,\s*100\(a0\)
** vsetvli\tzero,a2,e64,m2,t[au],m[au]
-** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** vfmv\.v\.f\tv[0-9]+,\s*fa[0-9]+
** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
** ret
*/
@@ -52,7 +52,7 @@ void foo3 (void *base, void *out, size_t vl)
/*
** foo4:
** ...
-** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** vfmv\.v\.f\tv[0-9]+,\s*fa[0-9]+
** ...
** ret
*/
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-7.c b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-7.c
index b218f2d0ba4..512fa62858a 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-7.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-7.c
@@ -21,9 +21,9 @@ void foo (void *base, void *out, size_t vl)
/*
** foo2:
-** addi\t[a-x0-9]+,\s*[a-x0-9]+,100
+** fld\tfa[0-9]+,\s*100\(a0\)
** vsetvli\tzero,a2,e64,m2,t[au],m[au]
-** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** vfmv\.v\.f\tv[0-9]+,\s*fa[0-9]+
** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
** ret
*/
@@ -52,7 +52,7 @@ void foo3 (void *base, void *out, size_t vl)
/*
** foo4:
** ...
-** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** vfmv\.v\.f\tv[0-9]+,\s*fa[0-9]+
** ...
** ret
*/
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-8.c b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-8.c
index 4438e793dbc..d9d10f3702a 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-8.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-8.c
@@ -21,9 +21,9 @@ void foo (void *base, void *out, size_t vl)
/*
** foo2:
-** addi\t[a-x0-9]+,\s*[a-x0-9]+,100
+** fld\tfa[0-9]+,\s*100\(a0\)
** vsetvli\tzero,a2,e64,m2,t[au],m[au]
-** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** vfmv\.v\.f\tv[0-9]+,\s*fa[0-9]+
** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
** ret
*/
@@ -52,7 +52,7 @@ void foo3 (void *base, void *out, size_t vl)
/*
** foo4:
** ...
-** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** vfmv\.v\.f\tv[0-9]+,\s*fa[0-9]+
** ...
** ret
*/
Kito Cheng July 3, 2024, 2:31 p.m. UTC | #2
LGTM and ok for gcc 14 as well,
btw an idea is that actually could passed via gpr, I mean fpr->gpr and then
vmv.v.x, but it's not block commend for this patch.

钟居哲 <juzhe.zhong@rivai.ai> 於 2024年7月3日 週三 22:18 寫道:

> LGTM。
>
> ------------------------------
> juzhe.zhong@rivai.ai
>
>
> *From:* pan2.li <pan2.li@intel.com>
> *Date:* 2024-07-03 22:17
> *To:* gcc-patches <gcc-patches@gcc.gnu.org>
> *CC:* juzhe.zhong <juzhe.zhong@rivai.ai>; kito.cheng
> <kito.cheng@gmail.com>; jeffreyalaw <jeffreyalaw@gmail.com>; rdapp.gcc
> <rdapp.gcc@gmail.com>; Pan Li <pan2.li@intel.com>
> *Subject:* [PATCH v1] RISC-V: Bugfix vfmv insn honor zvfhmin for FP16 SEW
> [PR115763]
> From: Pan Li <pan2.li@intel.com>
>
> According to the ISA,  the zvfhmin sub extension should only contain
> convertion insn.  Thus,  the vfmv insn acts on FP16 should not be
> present when only the zvfhmin option is given.
>
> This patch would like to fix it by split the pred_broadcast define_insn
> into zvfhmin and zvfh part.  Given below example:
>
> void test (_Float16 *dest, _Float16 bias) {
>   dest[0] = bias;
>   dest[1] = bias;
> }
>
> when compile with -march=rv64gcv_zfh_zvfhmin
>
> Before this patch:
> test:
>   vsetivli        zero,2,e16,mf4,ta,ma
>   vfmv.v.f        v1,fa0 // should not leverage vfmv for zvfhmin
>   vse16.v v1,0(a0)
>   ret
>
> After this patch:
> test:
>   addi     sp,sp,-16
>   fsh      fa0,14(sp)
>   addi     a5,sp,14
>   vsetivli zero,2,e16,mf4,ta,ma
>   vlse16.v v1,0(a5),zero
>   vse16.v  v1,0(a0)
>   addi     sp,sp,16
>   jr       ra
>
> PR target/115763
>
> gcc/ChangeLog:
>
> * config/riscv/vector.md (*pred_broadcast<mode>): Split into
> zvfh and zvfhmin part.
> (*pred_broadcast<mode>_zvfh): New define_insn for zvfh part.
> (*pred_broadcast<mode>_zvfhmin): Ditto but for zvfhmin.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/base/scalar_move-5.c: Adjust asm check.
> * gcc.target/riscv/rvv/base/scalar_move-6.c: Ditto.
> * gcc.target/riscv/rvv/base/scalar_move-7.c: Ditto.
> * gcc.target/riscv/rvv/base/scalar_move-8.c: Ditto.
> * gcc.target/riscv/rvv/base/pr115763-1.c: New test.
> * gcc.target/riscv/rvv/base/pr115763-2.c: New test.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
> gcc/config/riscv/vector.md                    | 49 +++++++++++++------
> .../gcc.target/riscv/rvv/base/pr115763-1.c    |  9 ++++
> .../gcc.target/riscv/rvv/base/pr115763-2.c    | 10 ++++
> .../gcc.target/riscv/rvv/base/scalar_move-5.c |  4 +-
> .../gcc.target/riscv/rvv/base/scalar_move-6.c |  6 +--
> .../gcc.target/riscv/rvv/base/scalar_move-7.c |  6 +--
> .../gcc.target/riscv/rvv/base/scalar_move-8.c |  6 +--
> 7 files changed, 64 insertions(+), 26 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-1.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-2.c
>
> diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
> index fe18ee5b5f7..d9474262d54 100644
> --- a/gcc/config/riscv/vector.md
> +++ b/gcc/config/riscv/vector.md
> @@ -2080,31 +2080,50 @@ (define_insn_and_split "*pred_broadcast<mode>"
>    [(set_attr "type" "vimov,vimov,vlds,vlds,vlds,vlds,vimovxv,vimovxv")
>     (set_attr "mode" "<MODE>")])
> -(define_insn "*pred_broadcast<mode>"
> -  [(set (match_operand:V_VLSF_ZVFHMIN 0 "register_operand"         "=vr,
> vr, vr, vr, vr, vr, vr, vr")
> - (if_then_else:V_VLSF_ZVFHMIN
> +(define_insn "*pred_broadcast<mode>_zvfh"
> +  [(set (match_operand:V_VLSF    0 "register_operand"              "=vr,
> vr,  vr,  vr")
> + (if_then_else:V_VLSF
>   (unspec:<VM>
> -     [(match_operand:<VM> 1 "vector_broadcast_mask_operand" "Wc1,Wc1, vm,
> vm,Wc1,Wc1,Wb1,Wb1")
> -      (match_operand 4 "vector_length_operand"              " rK, rK, rK,
> rK, rK, rK, rK, rK")
> -      (match_operand 5 "const_int_operand"                  "  i,  i,
> i,  i,  i,  i,  i,  i")
> -      (match_operand 6 "const_int_operand"                  "  i,  i,
> i,  i,  i,  i,  i,  i")
> -      (match_operand 7 "const_int_operand"                  "  i,  i,
> i,  i,  i,  i,  i,  i")
> +     [(match_operand:<VM> 1 "vector_broadcast_mask_operand" "Wc1, Wc1,
> Wb1, Wb1")
> +      (match_operand      4 "vector_length_operand"         " rK,  rK,
> rK,  rK")
> +      (match_operand      5 "const_int_operand"             "  i,   i,
> i,   i")
> +      (match_operand      6 "const_int_operand"             "  i,   i,
> i,   i")
> +      (match_operand      7 "const_int_operand"             "  i,   i,
> i,   i")
>      (reg:SI VL_REGNUM)
>      (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
> -   (vec_duplicate:V_VLSF_ZVFHMIN
> -     (match_operand:<VEL> 3 "direct_broadcast_operand"       " f,
> f,Wdm,Wdm,Wdm,Wdm,  f,  f"))
> -   (match_operand:V_VLSF_ZVFHMIN 2 "vector_merge_operand"    "vu,  0,
> vu,  0, vu,  0, vu,  0")))]
> +   (vec_duplicate:V_VLSF
> +     (match_operand:<VEL> 3 "direct_broadcast_operand"      "  f,   f,
> f,   f"))
> +   (match_operand:V_VLSF  2 "vector_merge_operand"          " vu,   0,
> vu,   0")))]
>    "TARGET_VECTOR"
>    "@
>     vfmv.v.f\t%0,%3
>     vfmv.v.f\t%0,%3
> +   vfmv.s.f\t%0,%3
> +   vfmv.s.f\t%0,%3"
> +  [(set_attr "type" "vfmov,vfmov,vfmovfv,vfmovfv")
> +   (set_attr "mode" "<MODE>")])
> +
> +(define_insn "*pred_broadcast<mode>_zvfhmin"
> +  [(set (match_operand:V_VLSF_ZVFHMIN   0 "register_operand"
> "=vr,  vr,  vr,  vr")
> + (if_then_else:V_VLSF_ZVFHMIN
> +   (unspec:<VM>
> +     [(match_operand:<VM>        1 "vector_broadcast_mask_operand" " vm,
> vm, Wc1, Wc1")
> +      (match_operand             4 "vector_length_operand"         " rK,
> rK,  rK,  rK")
> +      (match_operand             5 "const_int_operand"             "
> i,   i,   i,   i")
> +      (match_operand             6 "const_int_operand"             "
> i,   i,   i,   i")
> +      (match_operand             7 "const_int_operand"             "
> i,   i,   i,   i")
> +      (reg:SI VL_REGNUM)
> +      (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
> +   (vec_duplicate:V_VLSF_ZVFHMIN
> +     (match_operand:<VEL>        3 "direct_broadcast_operand"      "Wdm,
> Wdm, Wdm, Wdm"))
> +   (match_operand:V_VLSF_ZVFHMIN 2 "vector_merge_operand"          "
> vu,   0,  vu,   0")))]
> +  "TARGET_VECTOR"
> +  "@
>     vlse<sew>.v\t%0,%3,zero,%1.t
>     vlse<sew>.v\t%0,%3,zero,%1.t
>     vlse<sew>.v\t%0,%3,zero
> -   vlse<sew>.v\t%0,%3,zero
> -   vfmv.s.f\t%0,%3
> -   vfmv.s.f\t%0,%3"
> -  [(set_attr "type" "vfmov,vfmov,vlds,vlds,vlds,vlds,vfmovfv,vfmovfv")
> +   vlse<sew>.v\t%0,%3,zero"
> +  [(set_attr "type" "vlds,vlds,vlds,vlds")
>     (set_attr "mode" "<MODE>")])
> (define_insn "*pred_broadcast<mode>_extended_scalar"
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-1.c
> b/gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-1.c
> new file mode 100644
> index 00000000000..3b0b0046041
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-1.c
> @@ -0,0 +1,9 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv_zfh_zvfh -mabi=lp64d -O3 -ftree-vectorize
> -fno-vect-cost-model" } */
> +
> +void test (_Float16 *dest, _Float16 bias) {
> +  dest[0] = bias;
> +  dest[1] = bias;
> +}
> +
> +/* { dg-final { scan-assembler-times {vfmv\.v\.f\s+v[0-9]+,\s*fa[0-9]+} 1
> } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-2.c
> b/gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-2.c
> new file mode 100644
> index 00000000000..f4d53e72022
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-2.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv_zfh_zvfhmin -mabi=lp64d -O3
> -ftree-vectorize -fno-vect-cost-model" } */
> +
> +void test (_Float16 *dest, _Float16 bias) {
> +  dest[0] = bias;
> +  dest[1] = bias;
> +}
> +
> +/* { dg-final { scan-assembler-times {fsh\s+fa[0-9]+,[0-9]+\(sp\)} 1 } }
> */
> +/* { dg-final { scan-assembler-not {vfmv\.v\.x\s+v[0-9]+,\s*fa[0-9]+} } }
> */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-5.c
> b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-5.c
> index 2e897a4896f..04dec7bc8dc 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-5.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-5.c
> @@ -21,9 +21,9 @@ void foo (void *base, void *out, size_t vl)
> /*
> ** foo2:
> -** addi\t[a-x0-9]+,\s*[a-x0-9]+,100
> +** fld\tfa[0-9]+,\s*100\(a0\)
> ** vsetvli\tzero,a2,e64,m2,t[au],m[au]
> -** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
> +** vfmv\.v\.f\tv[0-9]+,\s*fa[0-9]+
> ** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
> ** ret
> */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-6.c
> b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-6.c
> index 326cfd8e2ff..0ebb92eda42 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-6.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-6.c
> @@ -21,9 +21,9 @@ void foo (void *base, void *out, size_t vl)
> /*
> ** foo2:
> -** addi\t[a-x0-9]+,\s*[a-x0-9]+,100
> +** fld\tfa[0-9]+,\s*100\(a0\)
> ** vsetvli\tzero,a2,e64,m2,t[au],m[au]
> -** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
> +** vfmv\.v\.f\tv[0-9]+,\s*fa[0-9]+
> ** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
> ** ret
> */
> @@ -52,7 +52,7 @@ void foo3 (void *base, void *out, size_t vl)
> /*
> ** foo4:
> ** ...
> -** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
> +** vfmv\.v\.f\tv[0-9]+,\s*fa[0-9]+
> ** ...
> ** ret
> */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-7.c
> b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-7.c
> index b218f2d0ba4..512fa62858a 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-7.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-7.c
> @@ -21,9 +21,9 @@ void foo (void *base, void *out, size_t vl)
> /*
> ** foo2:
> -** addi\t[a-x0-9]+,\s*[a-x0-9]+,100
> +** fld\tfa[0-9]+,\s*100\(a0\)
> ** vsetvli\tzero,a2,e64,m2,t[au],m[au]
> -** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
> +** vfmv\.v\.f\tv[0-9]+,\s*fa[0-9]+
> ** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
> ** ret
> */
> @@ -52,7 +52,7 @@ void foo3 (void *base, void *out, size_t vl)
> /*
> ** foo4:
> ** ...
> -** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
> +** vfmv\.v\.f\tv[0-9]+,\s*fa[0-9]+
> ** ...
> ** ret
> */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-8.c
> b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-8.c
> index 4438e793dbc..d9d10f3702a 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-8.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-8.c
> @@ -21,9 +21,9 @@ void foo (void *base, void *out, size_t vl)
> /*
> ** foo2:
> -** addi\t[a-x0-9]+,\s*[a-x0-9]+,100
> +** fld\tfa[0-9]+,\s*100\(a0\)
> ** vsetvli\tzero,a2,e64,m2,t[au],m[au]
> -** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
> +** vfmv\.v\.f\tv[0-9]+,\s*fa[0-9]+
> ** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
> ** ret
> */
> @@ -52,7 +52,7 @@ void foo3 (void *base, void *out, size_t vl)
> /*
> ** foo4:
> ** ...
> -** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
> +** vfmv\.v\.f\tv[0-9]+,\s*fa[0-9]+
> ** ...
> ** ret
> */
> --
> 2.34.1
>
>
>
>
Li, Pan2 July 3, 2024, 2:41 p.m. UTC | #3
Committed, thanks Juzhe and Kito. Let’s wait for a while before backport to 14.

I suspect there may be similar cases for other insn(s), will double check and fix first.

Pan

From: Kito Cheng <kito.cheng@gmail.com>
Sent: Wednesday, July 3, 2024 10:32 PM
To: juzhe.zhong@rivai.ai
Cc: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH v1] RISC-V: Bugfix vfmv insn honor zvfhmin for FP16 SEW [PR115763]


LGTM and ok for gcc 14 as well,
btw an idea is that actually could passed via gpr, I mean fpr->gpr and then vmv.v.x, but it's not block commend for this patch.

钟居哲 <juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai>> 於 2024年7月3日 週三 22:18 寫道:
LGTM。
Li, Pan2 July 9, 2024, 5:33 a.m. UTC | #4
Backported to gcc 14 already.

Pan

From: Li, Pan2
Sent: Wednesday, July 3, 2024 10:41 PM
To: Kito Cheng <kito.cheng@gmail.com>; juzhe.zhong@rivai.ai
Cc: gcc-patches@gcc.gnu.org; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
Subject: RE: [PATCH v1] RISC-V: Bugfix vfmv insn honor zvfhmin for FP16 SEW [PR115763]

Committed, thanks Juzhe and Kito. Let’s wait for a while before backport to 14.

I suspect there may be similar cases for other insn(s), will double check and fix first.

Pan

From: Kito Cheng <kito.cheng@gmail.com<mailto:kito.cheng@gmail.com>>
Sent: Wednesday, July 3, 2024 10:32 PM
To: juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai>
Cc: Li, Pan2 <pan2.li@intel.com<mailto:pan2.li@intel.com>>; gcc-patches@gcc.gnu.org<mailto:gcc-patches@gcc.gnu.org>; jeffreyalaw@gmail.com<mailto:jeffreyalaw@gmail.com>; rdapp.gcc@gmail.com<mailto:rdapp.gcc@gmail.com>
Subject: Re: [PATCH v1] RISC-V: Bugfix vfmv insn honor zvfhmin for FP16 SEW [PR115763]


LGTM and ok for gcc 14 as well,
btw an idea is that actually could passed via gpr, I mean fpr->gpr and then vmv.v.x, but it's not block commend for this patch.

钟居哲 <juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai>> 於 2024年7月3日 週三 22:18 寫道:
LGTM。
diff mbox series

Patch

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index fe18ee5b5f7..d9474262d54 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -2080,31 +2080,50 @@  (define_insn_and_split "*pred_broadcast<mode>"
   [(set_attr "type" "vimov,vimov,vlds,vlds,vlds,vlds,vimovxv,vimovxv")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*pred_broadcast<mode>"
-  [(set (match_operand:V_VLSF_ZVFHMIN 0 "register_operand"         "=vr, vr, vr, vr, vr, vr, vr, vr")
-	(if_then_else:V_VLSF_ZVFHMIN
+(define_insn "*pred_broadcast<mode>_zvfh"
+  [(set (match_operand:V_VLSF    0 "register_operand"              "=vr,  vr,  vr,  vr")
+	(if_then_else:V_VLSF
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_broadcast_mask_operand" "Wc1,Wc1, vm, vm,Wc1,Wc1,Wb1,Wb1")
-	     (match_operand 4 "vector_length_operand"              " rK, rK, rK, rK, rK, rK, rK, rK")
-	     (match_operand 5 "const_int_operand"                  "  i,  i,  i,  i,  i,  i,  i,  i")
-	     (match_operand 6 "const_int_operand"                  "  i,  i,  i,  i,  i,  i,  i,  i")
-	     (match_operand 7 "const_int_operand"                  "  i,  i,  i,  i,  i,  i,  i,  i")
+	    [(match_operand:<VM> 1 "vector_broadcast_mask_operand" "Wc1, Wc1, Wb1, Wb1")
+	     (match_operand      4 "vector_length_operand"         " rK,  rK,  rK,  rK")
+	     (match_operand      5 "const_int_operand"             "  i,   i,   i,   i")
+	     (match_operand      6 "const_int_operand"             "  i,   i,   i,   i")
+	     (match_operand      7 "const_int_operand"             "  i,   i,   i,   i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
-	  (vec_duplicate:V_VLSF_ZVFHMIN
-	    (match_operand:<VEL> 3 "direct_broadcast_operand"       " f,  f,Wdm,Wdm,Wdm,Wdm,  f,  f"))
-	  (match_operand:V_VLSF_ZVFHMIN 2 "vector_merge_operand"    "vu,  0, vu,  0, vu,  0, vu,  0")))]
+	  (vec_duplicate:V_VLSF
+	    (match_operand:<VEL> 3 "direct_broadcast_operand"      "  f,   f,   f,   f"))
+	  (match_operand:V_VLSF  2 "vector_merge_operand"          " vu,   0,  vu,   0")))]
   "TARGET_VECTOR"
   "@
    vfmv.v.f\t%0,%3
    vfmv.v.f\t%0,%3
+   vfmv.s.f\t%0,%3
+   vfmv.s.f\t%0,%3"
+  [(set_attr "type" "vfmov,vfmov,vfmovfv,vfmovfv")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*pred_broadcast<mode>_zvfhmin"
+  [(set (match_operand:V_VLSF_ZVFHMIN   0 "register_operand"              "=vr,  vr,  vr,  vr")
+	(if_then_else:V_VLSF_ZVFHMIN
+	  (unspec:<VM>
+	    [(match_operand:<VM>        1 "vector_broadcast_mask_operand" " vm,  vm, Wc1, Wc1")
+	     (match_operand             4 "vector_length_operand"         " rK,  rK,  rK,  rK")
+	     (match_operand             5 "const_int_operand"             "  i,   i,   i,   i")
+	     (match_operand             6 "const_int_operand"             "  i,   i,   i,   i")
+	     (match_operand             7 "const_int_operand"             "  i,   i,   i,   i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (vec_duplicate:V_VLSF_ZVFHMIN
+	    (match_operand:<VEL>        3 "direct_broadcast_operand"      "Wdm, Wdm, Wdm, Wdm"))
+	  (match_operand:V_VLSF_ZVFHMIN 2 "vector_merge_operand"          " vu,   0,  vu,   0")))]
+  "TARGET_VECTOR"
+  "@
    vlse<sew>.v\t%0,%3,zero,%1.t
    vlse<sew>.v\t%0,%3,zero,%1.t
    vlse<sew>.v\t%0,%3,zero
-   vlse<sew>.v\t%0,%3,zero
-   vfmv.s.f\t%0,%3
-   vfmv.s.f\t%0,%3"
-  [(set_attr "type" "vfmov,vfmov,vlds,vlds,vlds,vlds,vfmovfv,vfmovfv")
+   vlse<sew>.v\t%0,%3,zero"
+  [(set_attr "type" "vlds,vlds,vlds,vlds")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*pred_broadcast<mode>_extended_scalar"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-1.c
new file mode 100644
index 00000000000..3b0b0046041
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-1.c
@@ -0,0 +1,9 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zfh_zvfh -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model" } */
+
+void test (_Float16 *dest, _Float16 bias) {
+  dest[0] = bias;
+  dest[1] = bias;
+}
+
+/* { dg-final { scan-assembler-times {vfmv\.v\.f\s+v[0-9]+,\s*fa[0-9]+} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-2.c
new file mode 100644
index 00000000000..f4d53e72022
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-2.c
@@ -0,0 +1,10 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zfh_zvfhmin -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model" } */
+
+void test (_Float16 *dest, _Float16 bias) {
+  dest[0] = bias;
+  dest[1] = bias;
+}
+
+/* { dg-final { scan-assembler-times {fsh\s+fa[0-9]+,[0-9]+\(sp\)} 1 } } */
+/* { dg-final { scan-assembler-not {vfmv\.v\.x\s+v[0-9]+,\s*fa[0-9]+} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-5.c b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-5.c
index 2e897a4896f..04dec7bc8dc 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-5.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-5.c
@@ -21,9 +21,9 @@  void foo (void *base, void *out, size_t vl)
 
 /*
 ** foo2:
-** addi\t[a-x0-9]+,\s*[a-x0-9]+,100
+** fld\tfa[0-9]+,\s*100\(a0\)
 ** vsetvli\tzero,a2,e64,m2,t[au],m[au]
-** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** vfmv\.v\.f\tv[0-9]+,\s*fa[0-9]+
 ** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
 ** ret
 */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-6.c b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-6.c
index 326cfd8e2ff..0ebb92eda42 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-6.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-6.c
@@ -21,9 +21,9 @@  void foo (void *base, void *out, size_t vl)
 
 /*
 ** foo2:
-** addi\t[a-x0-9]+,\s*[a-x0-9]+,100
+** fld\tfa[0-9]+,\s*100\(a0\)
 ** vsetvli\tzero,a2,e64,m2,t[au],m[au]
-** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** vfmv\.v\.f\tv[0-9]+,\s*fa[0-9]+
 ** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
 ** ret
 */
@@ -52,7 +52,7 @@  void foo3 (void *base, void *out, size_t vl)
 /*
 ** foo4:
 ** ...
-** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** vfmv\.v\.f\tv[0-9]+,\s*fa[0-9]+
 ** ...
 ** ret
 */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-7.c b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-7.c
index b218f2d0ba4..512fa62858a 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-7.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-7.c
@@ -21,9 +21,9 @@  void foo (void *base, void *out, size_t vl)
 
 /*
 ** foo2:
-** addi\t[a-x0-9]+,\s*[a-x0-9]+,100
+** fld\tfa[0-9]+,\s*100\(a0\)
 ** vsetvli\tzero,a2,e64,m2,t[au],m[au]
-** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** vfmv\.v\.f\tv[0-9]+,\s*fa[0-9]+
 ** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
 ** ret
 */
@@ -52,7 +52,7 @@  void foo3 (void *base, void *out, size_t vl)
 /*
 ** foo4:
 ** ...
-** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** vfmv\.v\.f\tv[0-9]+,\s*fa[0-9]+
 ** ...
 ** ret
 */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-8.c b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-8.c
index 4438e793dbc..d9d10f3702a 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-8.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-8.c
@@ -21,9 +21,9 @@  void foo (void *base, void *out, size_t vl)
 
 /*
 ** foo2:
-** addi\t[a-x0-9]+,\s*[a-x0-9]+,100
+** fld\tfa[0-9]+,\s*100\(a0\)
 ** vsetvli\tzero,a2,e64,m2,t[au],m[au]
-** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** vfmv\.v\.f\tv[0-9]+,\s*fa[0-9]+
 ** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
 ** ret
 */
@@ -52,7 +52,7 @@  void foo3 (void *base, void *out, size_t vl)
 /*
 ** foo4:
 ** ...
-** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero
+** vfmv\.v\.f\tv[0-9]+,\s*fa[0-9]+
 ** ...
 ** ret
 */