diff mbox series

[v2] RISC-V:Auto vect for vector-bfloat16

Message ID 20241018072458.22223-1-wangfeng@eswincomputing.com
State New
Headers show
Series [v2] RISC-V:Auto vect for vector-bfloat16 | expand

Commit Message

Feng Wang Oct. 18, 2024, 7:24 a.m. UTC
This patch add auto-vect patterns for vector-bfloat16 extension.
Similar to vector extensions, these patterns can use vector
BF16 instructions to optimize the automatic vectorization of for loops.
gcc/ChangeLog:

	* config/riscv/autovec-opt.md (*widen_bf16_fma<mode>):
	Add vfwmacc auto-vect opt pattern for vector-bfloat16.
	* config/riscv/vector-bfloat16.md (extend<v_fpwidetobf16_trunc><mode>2):
	Add auto-vect pattern for Zvfbfmin extension.
	(trunc<mode><v_fpwidetobf16_trunc>2): Ditto.
	* config/riscv/vector-iterators.md:
	Move vector-bfloat16 iterator definitions from vector-bfloat16.md.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/vfncvt-auto-vect.c: New test.
	* gcc.target/riscv/rvv/autovec/vfwcvt-auto-vect.c: New test.
	* gcc.target/riscv/rvv/autovec/vfwmacc-auto-vect.c: New test.

Signed-off-by: Feng Wang <wangfeng@eswincomputing.com>
---
 gcc/config/riscv/autovec-opt.md               |  23 ++++
 gcc/config/riscv/vector-bfloat16.md           | 116 +++++++++++++-----
 gcc/config/riscv/vector-iterators.md          |  32 +++++
 .../riscv/rvv/autovec/vfncvt-auto-vect.c      |  19 +++
 .../riscv/rvv/autovec/vfwcvt-auto-vect.c      |  19 +++
 .../riscv/rvv/autovec/vfwmacc-auto-vect.c     |  14 +++
 6 files changed, 195 insertions(+), 28 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vfncvt-auto-vect.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwcvt-auto-vect.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwmacc-auto-vect.c

Comments

钟居哲 Oct. 18, 2024, 9:53 a.m. UTC | #1
Could you add run test case (verified by QEMU or SPIKE ) ?



juzhe.zhong@rivai.ai
 
From: Feng Wang
Date: 2024-10-18 15:24
To: gcc-patches
CC: kito.cheng; juzhe.zhong; Feng Wang
Subject: [PATCH v2] RISC-V:Auto vect for vector-bfloat16
This patch add auto-vect patterns for vector-bfloat16 extension.
Similar to vector extensions, these patterns can use vector
BF16 instructions to optimize the automatic vectorization of for loops.
gcc/ChangeLog:
 
* config/riscv/autovec-opt.md (*widen_bf16_fma<mode>):
Add vfwmacc auto-vect opt pattern for vector-bfloat16.
* config/riscv/vector-bfloat16.md (extend<v_fpwidetobf16_trunc><mode>2):
Add auto-vect pattern for Zvfbfmin extension.
(trunc<mode><v_fpwidetobf16_trunc>2): Ditto.
* config/riscv/vector-iterators.md:
Move vector-bfloat16 iterator definitions from vector-bfloat16.md.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/autovec/vfncvt-auto-vect.c: New test.
* gcc.target/riscv/rvv/autovec/vfwcvt-auto-vect.c: New test.
* gcc.target/riscv/rvv/autovec/vfwmacc-auto-vect.c: New test.
 
Signed-off-by: Feng Wang <wangfeng@eswincomputing.com>
---
gcc/config/riscv/autovec-opt.md               |  23 ++++
gcc/config/riscv/vector-bfloat16.md           | 116 +++++++++++++-----
gcc/config/riscv/vector-iterators.md          |  32 +++++
.../riscv/rvv/autovec/vfncvt-auto-vect.c      |  19 +++
.../riscv/rvv/autovec/vfwcvt-auto-vect.c      |  19 +++
.../riscv/rvv/autovec/vfwmacc-auto-vect.c     |  14 +++
6 files changed, 195 insertions(+), 28 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vfncvt-auto-vect.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwcvt-auto-vect.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwmacc-auto-vect.c
 
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 4b33a145c17..0c6722601ff 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1009,6 +1009,29 @@
   }
   [(set_attr "type" "vfwmuladd")])
+;; vfwmacc for vector_bfloat16
+(define_insn_and_split "*widen_bf16_fma<mode>"
+  [(set (match_operand:VWEXTF_ZVFBF 0 "register_operand")
+        (plus:VWEXTF_ZVFBF
+   (mult:VWEXTF_ZVFBF
+            (float_extend:VWEXTF_ZVFBF
+       (match_operand:<V_FPWIDETOBF16_TRUNC> 2 "register_operand"))
+            (float_extend:VWEXTF_ZVFBF
+       (match_operand:<V_FPWIDETOBF16_TRUNC> 3 "register_operand")))
+   (match_operand:VWEXTF_ZVFBF 1 "register_operand")))]
+  "TARGET_ZVFBFWMA && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    rtx ops[] = {operands[0], operands[1], operands[2], operands[3]};
+    riscv_vector::emit_vlmax_insn (code_for_pred_widen_bf16_mul (<MODE>mode),
+    riscv_vector::WIDEN_TERNARY_OP_FRM_DYN, ops);
+    DONE;
+  }
+  [(set_attr "type" "vfwmaccbf16")
+   (set_attr "mode" "<MODE>")])
+
;; This combine pattern does not correspond to an single instruction.
;; This is a temporary pattern produced by a combine pass and if there
;; is no further combine into widen pattern, then fall back to extend
diff --git a/gcc/config/riscv/vector-bfloat16.md b/gcc/config/riscv/vector-bfloat16.md
index 562aa8ee5ed..90b174be2e7 100644
--- a/gcc/config/riscv/vector-bfloat16.md
+++ b/gcc/config/riscv/vector-bfloat16.md
@@ -17,26 +17,11 @@
;; along with GCC; see the file COPYING3.  If not see
;; <http://www.gnu.org/licenses/>.
-(define_mode_iterator VWEXTF_ZVFBF [
-  (RVVM8SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
-  (RVVM4SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
-  (RVVM2SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
-  (RVVM1SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
-  (RVVMF2SF "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
-])
-
-(define_mode_attr V_FP32TOBF16_TRUNC [
-  (RVVM8SF "RVVM4BF") (RVVM4SF "RVVM2BF") (RVVM2SF "RVVM1BF") (RVVM1SF "RVVMF2BF") (RVVMF2SF "RVVMF4BF")
-])
-
-(define_mode_attr VF32_SUBEL [
-   (RVVM8SF "BF") (RVVM4SF "BF") (RVVM2SF "BF") (RVVM1SF "BF") (RVVMF2SF "BF")])
-
;; Zvfbfmin extension
(define_insn "@pred_trunc<mode>_to_bf16"
-  [(set (match_operand:<V_FP32TOBF16_TRUNC> 0 "register_operand"   "=vd, vd, vr, vr,  &vr,  &vr")
-     (if_then_else:<V_FP32TOBF16_TRUNC>
+  [(set (match_operand:<V_FPWIDETOBF16_TRUNC> 0 "register_operand"   "=vd, vd, vr, vr,  &vr,  &vr")
+     (if_then_else:<V_FPWIDETOBF16_TRUNC>
        (unspec:<VM>
          [(match_operand:<VM> 1 "vector_mask_operand"              " vm, vm,Wc1,Wc1,vmWc1,vmWc1")
           (match_operand 4 "vector_length_operand"                 " rK, rK, rK, rK,   rK,   rK")
@@ -47,13 +32,13 @@
           (reg:SI VL_REGNUM)
           (reg:SI VTYPE_REGNUM)
           (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
-       (float_truncate:<V_FP32TOBF16_TRUNC>
+       (float_truncate:<V_FPWIDETOBF16_TRUNC>
           (match_operand:VWEXTF_ZVFBF 3 "register_operand"          "  0,  0,  0,  0,   vr,   vr"))
-       (match_operand:<V_FP32TOBF16_TRUNC> 2 "vector_merge_operand" " vu,  0, vu,  0,   vu,    0")))]
+       (match_operand:<V_FPWIDETOBF16_TRUNC> 2 "vector_merge_operand" " vu,  0, vu,  0,   vu,    0")))]
   "TARGET_ZVFBFMIN"
   "vfncvtbf16.f.f.w\t%0,%3%p1"
   [(set_attr "type" "vfncvtbf16")
-   (set_attr "mode" "<V_FP32TOBF16_TRUNC>")
+   (set_attr "mode" "<V_FPWIDETOBF16_TRUNC>")
    (set (attr "frm_mode")
(symbol_ref "riscv_vector::get_frm_mode (operands[8])"))])
@@ -69,12 +54,12 @@
          (reg:SI VL_REGNUM)
          (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
       (float_extend:VWEXTF_ZVFBF
-         (match_operand:<V_FP32TOBF16_TRUNC> 3 "register_operand" "   vr,   vr"))
+         (match_operand:<V_FPWIDETOBF16_TRUNC> 3 "register_operand" "   vr,   vr"))
       (match_operand:VWEXTF_ZVFBF 2 "vector_merge_operand"        "   vu,    0")))]
   "TARGET_ZVFBFMIN"
   "vfwcvtbf16.f.f.v\t%0,%3%p1"
   [(set_attr "type" "vfwcvtbf16")
-   (set_attr "mode" "<V_FP32TOBF16_TRUNC>")])
+   (set_attr "mode" "<V_FPWIDETOBF16_TRUNC>")])
(define_insn "@pred_widen_bf16_mul_<mode>"
@@ -93,15 +78,15 @@
       (plus:VWEXTF_ZVFBF
         (mult:VWEXTF_ZVFBF
           (float_extend:VWEXTF_ZVFBF
-            (match_operand:<V_FP32TOBF16_TRUNC> 3 "register_operand" "   vr"))
+            (match_operand:<V_FPWIDETOBF16_TRUNC> 3 "register_operand" "   vr"))
           (float_extend:VWEXTF_ZVFBF
-            (match_operand:<V_FP32TOBF16_TRUNC> 4 "register_operand" "   vr")))
+            (match_operand:<V_FPWIDETOBF16_TRUNC> 4 "register_operand" "   vr")))
         (match_operand:VWEXTF_ZVFBF 2 "register_operand"             "    0"))
       (match_dup 2)))]
   "TARGET_ZVFBFWMA"
   "vfwmaccbf16.vv\t%0,%3,%4%p1"
   [(set_attr "type" "vfwmaccbf16")
-   (set_attr "mode" "<V_FP32TOBF16_TRUNC>")
+   (set_attr "mode" "<V_FPWIDETOBF16_TRUNC>")
    (set (attr "frm_mode")
(symbol_ref "riscv_vector::get_frm_mode (operands[9])"))])
@@ -121,15 +106,90 @@
       (plus:VWEXTF_ZVFBF
         (mult:VWEXTF_ZVFBF
           (float_extend:VWEXTF_ZVFBF
-            (vec_duplicate:<V_FP32TOBF16_TRUNC>
+            (vec_duplicate:<V_FPWIDETOBF16_TRUNC>
               (match_operand:<VF32_SUBEL> 3 "register_operand"       "    f")))
           (float_extend:VWEXTF_ZVFBF
-            (match_operand:<V_FP32TOBF16_TRUNC> 4 "register_operand" "   vr")))
+            (match_operand:<V_FPWIDETOBF16_TRUNC> 4 "register_operand" "   vr")))
         (match_operand:VWEXTF_ZVFBF 2 "register_operand"             "    0"))
       (match_dup 2)))]
   "TARGET_ZVFBFWMA"
   "vfwmaccbf16.vf\t%0,%3,%4%p1"
   [(set_attr "type" "vfwmaccbf16")
-   (set_attr "mode" "<V_FP32TOBF16_TRUNC>")
+   (set_attr "mode" "<V_FPWIDETOBF16_TRUNC>")
    (set (attr "frm_mode")
(symbol_ref "riscv_vector::get_frm_mode (operands[9])"))])
+
+;; Auto vect pattern
+
+;; -------------------------------------------------------------------------
+;; ---- [BF16] Widening.
+;; -------------------------------------------------------------------------
+;; - vfwcvtbf16.f.f.v
+;; -------------------------------------------------------------------------
+(define_insn_and_split "extend<v_fpwidetobf16_trunc><mode>2"
+  [(set (match_operand:VWEXTF_ZVFBF 0 "register_operand" "=&vr")
+    (float_extend:VWEXTF_ZVFBF
+     (match_operand:<V_FPWIDETOBF16_TRUNC>  1 "register_operand" "  vr")))]
+  "TARGET_ZVFBFMIN && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  insn_code icode = code_for_pred_extend_bf16_to (<MODE>mode);
+  riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP, operands);
+  DONE;
+}
+  [(set_attr "type" "vfwcvtbf16")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "extend<v_fpwidetobf16_trunc><mode>2"
+  [(set (match_operand:VDF 0 "register_operand")
+    (float_extend:VDF
+     (match_operand:<V_FPWIDETOBF16_TRUNC> 1 "register_operand")))]
+  "TARGET_ZVFBFMIN"
+{
+  rtx dblw = gen_reg_rtx (<V_DOUBLE_TRUNC>mode);
+  emit_insn (gen_extend<v_fpwidetobf16_trunc><v_double_trunc>2 (dblw, operands[1]));
+  emit_insn (gen_extend<v_double_trunc><mode>2 (operands[0], dblw));
+  DONE;
+})
+
+;; -------------------------------------------------------------------------
+;; ---- [BF16] Narrowing.
+;; -------------------------------------------------------------------------
+;; - vfncvtbf16.f.f.w
+;; -------------------------------------------------------------------------
+(define_insn_and_split "trunc<mode><v_fpwidetobf16_trunc>2"
+  [(set (match_operand:<V_FPWIDETOBF16_TRUNC> 0 "register_operand" "=vr")
+    (float_truncate:<V_FPWIDETOBF16_TRUNC>
+     (match_operand:VSF 1 "register_operand"      " vr")))]
+  "TARGET_ZVFBFMIN && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  insn_code icode = code_for_pred_trunc_to_bf16 (<MODE>mode);
+  riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP_FRM_DYN, operands);
+  DONE;
+}
+  [(set_attr "type" "vfncvtbf16")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "trunc<mode><v_fpwidetobf16_trunc>2"
+  [(set (match_operand:<V_FPWIDETOBF16_TRUNC> 0 "register_operand")
+    (float_truncate:<V_FPWIDETOBF16_TRUNC>
+     (match_operand:VDF 1 "register_operand")))]
+  "TARGET_ZVFBFMIN"
+{
+  rtx half = gen_reg_rtx (<V_DOUBLE_TRUNC>mode);
+  rtx opshalf[] = {half, operands[1]};
+
+  /* According to the RISC-V V Spec 13.19. we need to use
+     vfncvt.rod.f.f.w for all steps but the last.  */
+  insn_code icode = code_for_pred_rod_trunc (<MODE>mode);
+  riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP, opshalf);
+
+  emit_insn (gen_trunc<v_double_trunc><v_fpwidetobf16_trunc>2 (operands[0], half));
+  DONE;
+})
+
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
index 43325d1ba87..a53c5233839 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -4512,3 +4512,35 @@
   (V256DF "v64df")
   (V512DF "v128df")
])
+
+;;vector bfloat16
+(define_mode_iterator VWEXTF_ZVFBF [
+  (RVVM8SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
+  (RVVM4SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
+  (RVVM2SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
+  (RVVM1SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
+  (RVVMF2SF "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
+])
+
+(define_mode_iterator VSF [
+  (RVVM8SF "TARGET_VECTOR_ELEN_FP_32") (RVVM4SF "TARGET_VECTOR_ELEN_FP_32") (RVVM2SF "TARGET_VECTOR_ELEN_FP_32")
+  (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
+])
+
+(define_mode_iterator VDF [
+  (RVVM8DF "TARGET_VECTOR_ELEN_FP_64") (RVVM4DF "TARGET_VECTOR_ELEN_FP_64")
+  (RVVM2DF "TARGET_VECTOR_ELEN_FP_64") (RVVM1DF "TARGET_VECTOR_ELEN_FP_64")
+])
+
+(define_mode_attr V_FPWIDETOBF16_TRUNC [
+  (RVVM8SF "RVVM4BF") (RVVM4SF "RVVM2BF") (RVVM2SF "RVVM1BF") (RVVM1SF "RVVMF2BF") (RVVMF2SF "RVVMF4BF")
+  (RVVM8DF "RVVM2BF") (RVVM4DF "RVVM1BF") (RVVM2DF "RVVMF2BF") (RVVM1DF "RVVMF4BF")
+])
+
+(define_mode_attr v_fpwidetobf16_trunc [
+  (RVVM8SF "rvvm4bf") (RVVM4SF "rvvm2bf") (RVVM2SF "rvvm1bf") (RVVM1SF "rvvmf2bf") (RVVMF2SF "rvvmf4bf")
+  (RVVM8DF "rvvm2bf") (RVVM4DF "rvvm1bf") (RVVM2DF "rvvmf2bf") (RVVM1DF "rvvmf4bf")
+])
+
+(define_mode_attr VF32_SUBEL [
+   (RVVM8SF "BF") (RVVM4SF "BF") (RVVM2SF "BF") (RVVM1SF "BF") (RVVMF2SF "BF")])
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfncvt-auto-vect.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfncvt-auto-vect.c
new file mode 100644
index 00000000000..7ba3615ccf1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfncvt-auto-vect.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv_zvfbfmin -mabi=ilp32d" } */
+
+__attribute__((noipa))
+void vfncvt_float_BFloat16 (__bf16 *dst, float *a, int n)
+{ 
+  for (int i = 0; i < n; i++)
+    dst[i] = (__bf16)a[i];
+}
+
+__attribute__((noipa))
+void vfncvt_double_BFloat16 (__bf16 *dst, double *a, int n)
+{ 
+  for (int i = 0; i < n; i++)
+    dst[i] = (__bf16)a[i];
+}
+
+/* { dg-final { scan-assembler-times {\tvfncvtbf16\.f\.f\.w} 2 } } */
+/* { dg-final { scan-assembler-times {\tvfncvt\.rod\.f\.f\.w} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwcvt-auto-vect.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwcvt-auto-vect.c
new file mode 100644
index 00000000000..6629dd909a0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwcvt-auto-vect.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv_zvfbfmin -mabi=ilp32d" } */
+
+__attribute__((noipa))
+void vfwcvt__BFloat16float (float *dst, __bf16 *a, int n)
+{
+  for (int i = 0; i < n; i++)
+    dst[i] = (float)a[i];
+}
+
+__attribute__((noipa))
+void vfwcvt__BFloat16double (double *dst, __bf16 *a, int n)
+{
+  for (int i = 0; i < n; i++)
+    dst[i] = (double)a[i];
+}
+
+/* { dg-final { scan-assembler-times {\tvfwcvtbf16\.f\.f\.v} 2 } } */
+/* { dg-final { scan-assembler-times {\tvfwcvt\.f\.f\.v} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwmacc-auto-vect.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwmacc-auto-vect.c
new file mode 100644
index 00000000000..a767f2c8ef8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwmacc-auto-vect.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv_zvfbfwma -mabi=ilp32d -ffast-math" } */
+
+__attribute__ ((noipa))
+void vwmacc_float_bf16 (float *__restrict dst,
+ __bf16 *__restrict a,
+ __bf16 *__restrict b,
+ int n)
+{
+  for (int i = 0; i < n; i++)
+    dst[i] += (float) (a[i] * b[i]);
+}
+
+/* { dg-final { scan-assembler-times {\tvfwmaccbf16\.vv} 1 } } */
diff mbox series

Patch

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 4b33a145c17..0c6722601ff 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1009,6 +1009,29 @@ 
   }
   [(set_attr "type" "vfwmuladd")])
 
+;; vfwmacc for vector_bfloat16
+(define_insn_and_split "*widen_bf16_fma<mode>"
+  [(set (match_operand:VWEXTF_ZVFBF 0 "register_operand")
+        (plus:VWEXTF_ZVFBF
+	  (mult:VWEXTF_ZVFBF
+            (float_extend:VWEXTF_ZVFBF
+	      (match_operand:<V_FPWIDETOBF16_TRUNC> 2 "register_operand"))
+            (float_extend:VWEXTF_ZVFBF
+	      (match_operand:<V_FPWIDETOBF16_TRUNC> 3 "register_operand")))
+	  (match_operand:VWEXTF_ZVFBF 1 "register_operand")))]
+  "TARGET_ZVFBFWMA && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    rtx ops[] = {operands[0], operands[1], operands[2], operands[3]};
+    riscv_vector::emit_vlmax_insn (code_for_pred_widen_bf16_mul (<MODE>mode),
+				   riscv_vector::WIDEN_TERNARY_OP_FRM_DYN, ops);
+    DONE;
+  }
+  [(set_attr "type" "vfwmaccbf16")
+   (set_attr "mode" "<MODE>")])
+
 ;; This combine pattern does not correspond to an single instruction.
 ;; This is a temporary pattern produced by a combine pass and if there
 ;; is no further combine into widen pattern, then fall back to extend
diff --git a/gcc/config/riscv/vector-bfloat16.md b/gcc/config/riscv/vector-bfloat16.md
index 562aa8ee5ed..90b174be2e7 100644
--- a/gcc/config/riscv/vector-bfloat16.md
+++ b/gcc/config/riscv/vector-bfloat16.md
@@ -17,26 +17,11 @@ 
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.
 
-(define_mode_iterator VWEXTF_ZVFBF [
-  (RVVM8SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
-  (RVVM4SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
-  (RVVM2SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
-  (RVVM1SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
-  (RVVMF2SF "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
-])
-
-(define_mode_attr V_FP32TOBF16_TRUNC [
-  (RVVM8SF "RVVM4BF") (RVVM4SF "RVVM2BF") (RVVM2SF "RVVM1BF") (RVVM1SF "RVVMF2BF") (RVVMF2SF "RVVMF4BF")
-])
-
-(define_mode_attr VF32_SUBEL [
-   (RVVM8SF "BF") (RVVM4SF "BF") (RVVM2SF "BF") (RVVM1SF "BF") (RVVMF2SF "BF")])
-
 ;; Zvfbfmin extension
 
 (define_insn "@pred_trunc<mode>_to_bf16"
-  [(set (match_operand:<V_FP32TOBF16_TRUNC> 0 "register_operand"   "=vd, vd, vr, vr,  &vr,  &vr")
-     (if_then_else:<V_FP32TOBF16_TRUNC>
+  [(set (match_operand:<V_FPWIDETOBF16_TRUNC> 0 "register_operand"   "=vd, vd, vr, vr,  &vr,  &vr")
+     (if_then_else:<V_FPWIDETOBF16_TRUNC>
        (unspec:<VM>
          [(match_operand:<VM> 1 "vector_mask_operand"              " vm, vm,Wc1,Wc1,vmWc1,vmWc1")
           (match_operand 4 "vector_length_operand"                 " rK, rK, rK, rK,   rK,   rK")
@@ -47,13 +32,13 @@ 
           (reg:SI VL_REGNUM)
           (reg:SI VTYPE_REGNUM)
           (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
-       (float_truncate:<V_FP32TOBF16_TRUNC>
+       (float_truncate:<V_FPWIDETOBF16_TRUNC>
           (match_operand:VWEXTF_ZVFBF 3 "register_operand"          "  0,  0,  0,  0,   vr,   vr"))
-       (match_operand:<V_FP32TOBF16_TRUNC> 2 "vector_merge_operand" " vu,  0, vu,  0,   vu,    0")))]
+       (match_operand:<V_FPWIDETOBF16_TRUNC> 2 "vector_merge_operand" " vu,  0, vu,  0,   vu,    0")))]
   "TARGET_ZVFBFMIN"
   "vfncvtbf16.f.f.w\t%0,%3%p1"
   [(set_attr "type" "vfncvtbf16")
-   (set_attr "mode" "<V_FP32TOBF16_TRUNC>")
+   (set_attr "mode" "<V_FPWIDETOBF16_TRUNC>")
    (set (attr "frm_mode")
 	(symbol_ref "riscv_vector::get_frm_mode (operands[8])"))])
 
@@ -69,12 +54,12 @@ 
          (reg:SI VL_REGNUM)
          (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
       (float_extend:VWEXTF_ZVFBF
-         (match_operand:<V_FP32TOBF16_TRUNC> 3 "register_operand" "   vr,   vr"))
+         (match_operand:<V_FPWIDETOBF16_TRUNC> 3 "register_operand" "   vr,   vr"))
       (match_operand:VWEXTF_ZVFBF 2 "vector_merge_operand"        "   vu,    0")))]
   "TARGET_ZVFBFMIN"
   "vfwcvtbf16.f.f.v\t%0,%3%p1"
   [(set_attr "type" "vfwcvtbf16")
-   (set_attr "mode" "<V_FP32TOBF16_TRUNC>")])
+   (set_attr "mode" "<V_FPWIDETOBF16_TRUNC>")])
 
 
 (define_insn "@pred_widen_bf16_mul_<mode>"
@@ -93,15 +78,15 @@ 
       (plus:VWEXTF_ZVFBF
         (mult:VWEXTF_ZVFBF
           (float_extend:VWEXTF_ZVFBF
-            (match_operand:<V_FP32TOBF16_TRUNC> 3 "register_operand" "   vr"))
+            (match_operand:<V_FPWIDETOBF16_TRUNC> 3 "register_operand" "   vr"))
           (float_extend:VWEXTF_ZVFBF
-            (match_operand:<V_FP32TOBF16_TRUNC> 4 "register_operand" "   vr")))
+            (match_operand:<V_FPWIDETOBF16_TRUNC> 4 "register_operand" "   vr")))
         (match_operand:VWEXTF_ZVFBF 2 "register_operand"             "    0"))
       (match_dup 2)))]
   "TARGET_ZVFBFWMA"
   "vfwmaccbf16.vv\t%0,%3,%4%p1"
   [(set_attr "type" "vfwmaccbf16")
-   (set_attr "mode" "<V_FP32TOBF16_TRUNC>")
+   (set_attr "mode" "<V_FPWIDETOBF16_TRUNC>")
    (set (attr "frm_mode")
 	(symbol_ref "riscv_vector::get_frm_mode (operands[9])"))])
 
@@ -121,15 +106,90 @@ 
       (plus:VWEXTF_ZVFBF
         (mult:VWEXTF_ZVFBF
           (float_extend:VWEXTF_ZVFBF
-            (vec_duplicate:<V_FP32TOBF16_TRUNC>
+            (vec_duplicate:<V_FPWIDETOBF16_TRUNC>
               (match_operand:<VF32_SUBEL> 3 "register_operand"       "    f")))
           (float_extend:VWEXTF_ZVFBF
-            (match_operand:<V_FP32TOBF16_TRUNC> 4 "register_operand" "   vr")))
+            (match_operand:<V_FPWIDETOBF16_TRUNC> 4 "register_operand" "   vr")))
         (match_operand:VWEXTF_ZVFBF 2 "register_operand"             "    0"))
       (match_dup 2)))]
   "TARGET_ZVFBFWMA"
   "vfwmaccbf16.vf\t%0,%3,%4%p1"
   [(set_attr "type" "vfwmaccbf16")
-   (set_attr "mode" "<V_FP32TOBF16_TRUNC>")
+   (set_attr "mode" "<V_FPWIDETOBF16_TRUNC>")
    (set (attr "frm_mode")
 	(symbol_ref "riscv_vector::get_frm_mode (operands[9])"))])
+
+;; Auto vect pattern
+
+;; -------------------------------------------------------------------------
+;; ---- [BF16] Widening.
+;; -------------------------------------------------------------------------
+;; - vfwcvtbf16.f.f.v
+;; -------------------------------------------------------------------------
+(define_insn_and_split "extend<v_fpwidetobf16_trunc><mode>2"
+  [(set (match_operand:VWEXTF_ZVFBF 0 "register_operand" "=&vr")
+    (float_extend:VWEXTF_ZVFBF
+     (match_operand:<V_FPWIDETOBF16_TRUNC>  1 "register_operand" "  vr")))]
+  "TARGET_ZVFBFMIN && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  insn_code icode = code_for_pred_extend_bf16_to (<MODE>mode);
+  riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP, operands);
+  DONE;
+}
+  [(set_attr "type" "vfwcvtbf16")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "extend<v_fpwidetobf16_trunc><mode>2"
+  [(set (match_operand:VDF 0 "register_operand")
+    (float_extend:VDF
+     (match_operand:<V_FPWIDETOBF16_TRUNC> 1 "register_operand")))]
+  "TARGET_ZVFBFMIN"
+{
+  rtx dblw = gen_reg_rtx (<V_DOUBLE_TRUNC>mode);
+  emit_insn (gen_extend<v_fpwidetobf16_trunc><v_double_trunc>2 (dblw, operands[1]));
+  emit_insn (gen_extend<v_double_trunc><mode>2 (operands[0], dblw));
+  DONE;
+})
+
+;; -------------------------------------------------------------------------
+;; ---- [BF16] Narrowing.
+;; -------------------------------------------------------------------------
+;; - vfncvtbf16.f.f.w
+;; -------------------------------------------------------------------------
+(define_insn_and_split "trunc<mode><v_fpwidetobf16_trunc>2"
+  [(set (match_operand:<V_FPWIDETOBF16_TRUNC> 0 "register_operand" "=vr")
+    (float_truncate:<V_FPWIDETOBF16_TRUNC>
+     (match_operand:VSF 1 "register_operand"      " vr")))]
+  "TARGET_ZVFBFMIN && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  insn_code icode = code_for_pred_trunc_to_bf16 (<MODE>mode);
+  riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP_FRM_DYN, operands);
+  DONE;
+}
+  [(set_attr "type" "vfncvtbf16")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "trunc<mode><v_fpwidetobf16_trunc>2"
+  [(set (match_operand:<V_FPWIDETOBF16_TRUNC> 0 "register_operand")
+    (float_truncate:<V_FPWIDETOBF16_TRUNC>
+     (match_operand:VDF 1 "register_operand")))]
+  "TARGET_ZVFBFMIN"
+{
+  rtx half = gen_reg_rtx (<V_DOUBLE_TRUNC>mode);
+  rtx opshalf[] = {half, operands[1]};
+
+  /* According to the RISC-V V Spec 13.19. we need to use
+     vfncvt.rod.f.f.w for all steps but the last.  */
+  insn_code icode = code_for_pred_rod_trunc (<MODE>mode);
+  riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP, opshalf);
+
+  emit_insn (gen_trunc<v_double_trunc><v_fpwidetobf16_trunc>2 (operands[0], half));
+  DONE;
+})
+
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
index 43325d1ba87..a53c5233839 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -4512,3 +4512,35 @@ 
   (V256DF "v64df")
   (V512DF "v128df")
 ])
+
+;;vector bfloat16
+(define_mode_iterator VWEXTF_ZVFBF [
+  (RVVM8SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
+  (RVVM4SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
+  (RVVM2SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
+  (RVVM1SF  "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32")
+  (RVVMF2SF "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
+])
+
+(define_mode_iterator VSF [
+  (RVVM8SF "TARGET_VECTOR_ELEN_FP_32") (RVVM4SF "TARGET_VECTOR_ELEN_FP_32") (RVVM2SF "TARGET_VECTOR_ELEN_FP_32")
+  (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
+])
+
+(define_mode_iterator VDF [
+  (RVVM8DF "TARGET_VECTOR_ELEN_FP_64") (RVVM4DF "TARGET_VECTOR_ELEN_FP_64")
+  (RVVM2DF "TARGET_VECTOR_ELEN_FP_64") (RVVM1DF "TARGET_VECTOR_ELEN_FP_64")
+])
+
+(define_mode_attr V_FPWIDETOBF16_TRUNC [
+  (RVVM8SF "RVVM4BF") (RVVM4SF "RVVM2BF") (RVVM2SF "RVVM1BF") (RVVM1SF "RVVMF2BF") (RVVMF2SF "RVVMF4BF")
+  (RVVM8DF "RVVM2BF") (RVVM4DF "RVVM1BF") (RVVM2DF "RVVMF2BF") (RVVM1DF "RVVMF4BF")
+])
+
+(define_mode_attr v_fpwidetobf16_trunc [
+  (RVVM8SF "rvvm4bf") (RVVM4SF "rvvm2bf") (RVVM2SF "rvvm1bf") (RVVM1SF "rvvmf2bf") (RVVMF2SF "rvvmf4bf")
+  (RVVM8DF "rvvm2bf") (RVVM4DF "rvvm1bf") (RVVM2DF "rvvmf2bf") (RVVM1DF "rvvmf4bf")
+])
+
+(define_mode_attr VF32_SUBEL [
+   (RVVM8SF "BF") (RVVM4SF "BF") (RVVM2SF "BF") (RVVM1SF "BF") (RVVMF2SF "BF")])
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfncvt-auto-vect.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfncvt-auto-vect.c
new file mode 100644
index 00000000000..7ba3615ccf1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfncvt-auto-vect.c
@@ -0,0 +1,19 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv_zvfbfmin -mabi=ilp32d" } */
+
+__attribute__((noipa))
+void vfncvt_float_BFloat16 (__bf16 *dst, float *a, int n)
+{ 
+  for (int i = 0; i < n; i++)
+    dst[i] = (__bf16)a[i];
+}
+
+__attribute__((noipa))
+void vfncvt_double_BFloat16 (__bf16 *dst, double *a, int n)
+{ 
+  for (int i = 0; i < n; i++)
+    dst[i] = (__bf16)a[i];
+}
+
+/* { dg-final { scan-assembler-times {\tvfncvtbf16\.f\.f\.w} 2 } } */
+/* { dg-final { scan-assembler-times {\tvfncvt\.rod\.f\.f\.w} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwcvt-auto-vect.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwcvt-auto-vect.c
new file mode 100644
index 00000000000..6629dd909a0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwcvt-auto-vect.c
@@ -0,0 +1,19 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv_zvfbfmin -mabi=ilp32d" } */
+
+__attribute__((noipa))
+void vfwcvt__BFloat16float (float *dst, __bf16 *a, int n)
+{
+  for (int i = 0; i < n; i++)
+    dst[i] = (float)a[i];
+}
+
+__attribute__((noipa))
+void vfwcvt__BFloat16double (double *dst, __bf16 *a, int n)
+{
+  for (int i = 0; i < n; i++)
+    dst[i] = (double)a[i];
+}
+
+/* { dg-final { scan-assembler-times {\tvfwcvtbf16\.f\.f\.v} 2 } } */
+/* { dg-final { scan-assembler-times {\tvfwcvt\.f\.f\.v} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwmacc-auto-vect.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwmacc-auto-vect.c
new file mode 100644
index 00000000000..a767f2c8ef8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vfwmacc-auto-vect.c
@@ -0,0 +1,14 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv_zvfbfwma -mabi=ilp32d -ffast-math" } */
+
+__attribute__ ((noipa))
+void vwmacc_float_bf16 (float *__restrict dst,
+			__bf16 *__restrict a,
+			__bf16 *__restrict b,
+			int n)
+{
+  for (int i = 0; i < n; i++)
+    dst[i] += (float) (a[i] * b[i]);
+}
+
+/* { dg-final { scan-assembler-times {\tvfwmaccbf16\.vv} 1 } } */