@@ -2099,3 +2099,20 @@ DEF_HELPER_6(th_vfmsub_vf_d, void, ptr, ptr, i64, ptr, env, i32)
DEF_HELPER_6(th_vfnmsub_vf_h, void, ptr, ptr, i64, ptr, env, i32)
DEF_HELPER_6(th_vfnmsub_vf_w, void, ptr, ptr, i64, ptr, env, i32)
DEF_HELPER_6(th_vfnmsub_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+
+DEF_HELPER_6(th_vfwmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vfwmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vfwnmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vfwnmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vfwmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vfwmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vfwnmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vfwnmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vfwmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(th_vfwmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(th_vfwnmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(th_vfwnmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(th_vfwmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(th_vfwmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(th_vfwnmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(th_vfwnmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32)
@@ -2037,20 +2037,22 @@ GEN_OPFVF_TRANS_TH(th_vfnmadd_vf, opfvf_check_th)
GEN_OPFVF_TRANS_TH(th_vfmsub_vf, opfvf_check_th)
GEN_OPFVF_TRANS_TH(th_vfnmsub_vf, opfvf_check_th)
+/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
+GEN_OPFVV_WIDEN_TRANS_TH(th_vfwmacc_vv, opfvv_widen_check_th)
+GEN_OPFVV_WIDEN_TRANS_TH(th_vfwnmacc_vv, opfvv_widen_check_th)
+GEN_OPFVV_WIDEN_TRANS_TH(th_vfwmsac_vv, opfvv_widen_check_th)
+GEN_OPFVV_WIDEN_TRANS_TH(th_vfwnmsac_vv, opfvv_widen_check_th)
+GEN_OPFVF_WIDEN_TRANS_TH(th_vfwmacc_vf)
+GEN_OPFVF_WIDEN_TRANS_TH(th_vfwnmacc_vf)
+GEN_OPFVF_WIDEN_TRANS_TH(th_vfwmsac_vf)
+GEN_OPFVF_WIDEN_TRANS_TH(th_vfwnmsac_vf)
+
#define TH_TRANS_STUB(NAME) \
static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
{ \
return require_xtheadvector(s); \
}
-TH_TRANS_STUB(th_vfwmacc_vv)
-TH_TRANS_STUB(th_vfwmacc_vf)
-TH_TRANS_STUB(th_vfwnmacc_vv)
-TH_TRANS_STUB(th_vfwnmacc_vf)
-TH_TRANS_STUB(th_vfwmsac_vv)
-TH_TRANS_STUB(th_vfwmsac_vf)
-TH_TRANS_STUB(th_vfwnmsac_vv)
-TH_TRANS_STUB(th_vfwnmsac_vf)
TH_TRANS_STUB(th_vfsqrt_v)
TH_TRANS_STUB(th_vfmin_vv)
TH_TRANS_STUB(th_vfmin_vf)
@@ -3332,13 +3332,13 @@ GEN_VEXT_VF(vfnmsub_vf_w, 4)
GEN_VEXT_VF(vfnmsub_vf_d, 8)
/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
-static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
+uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
{
return float32_muladd(float16_to_float32(a, true, s),
float16_to_float32(b, true, s), d, 0, s);
}
-static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
+uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
{
return float64_muladd(float32_to_float64(a, s),
float32_to_float64(b, s), d, 0, s);
@@ -3364,7 +3364,7 @@ GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4)
RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmaccbf16)
GEN_VEXT_VF(vfwmaccbf16_vf, 4)
-static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
+uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
{
return float32_muladd(float16_to_float32(a, true, s),
float16_to_float32(b, true, s), d,
@@ -3372,7 +3372,7 @@ static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
s);
}
-static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
+uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
{
return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s),
d, float_muladd_negate_c |
@@ -3388,14 +3388,14 @@ RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
GEN_VEXT_VF(vfwnmacc_vf_h, 4)
GEN_VEXT_VF(vfwnmacc_vf_w, 8)
-static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
+uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
{
return float32_muladd(float16_to_float32(a, true, s),
float16_to_float32(b, true, s), d,
float_muladd_negate_c, s);
}
-static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
+uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
{
return float64_muladd(float32_to_float64(a, s),
float32_to_float64(b, s), d,
@@ -3411,14 +3411,14 @@ RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
GEN_VEXT_VF(vfwmsac_vf_h, 4)
GEN_VEXT_VF(vfwmsac_vf_w, 8)
-static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
+uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
{
return float32_muladd(float16_to_float32(a, true, s),
float16_to_float32(b, true, s), d,
float_muladd_negate_product, s);
}
-static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
+uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
{
return float64_muladd(float32_to_float64(a, s),
float32_to_float64(b, s), d,
@@ -385,4 +385,13 @@ uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s);
uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s);
uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s);
+uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s);
+uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s);
+uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s);
+uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s);
+uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s);
+uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s);
+uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s);
+uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s);
+
#endif /* TARGET_RISCV_VECTOR_INTERNALS_H */
@@ -2904,3 +2904,41 @@ THCALL(TH_OPFVF3, th_vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
GEN_TH_VF(th_vfnmsub_vf_h, 2, 2, clearh_th)
GEN_TH_VF(th_vfnmsub_vf_w, 4, 4, clearl_th)
GEN_TH_VF(th_vfnmsub_vf_d, 8, 8, clearq_th)
+
+/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
+
+THCALL(TH_OPFVV3, th_vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
+THCALL(TH_OPFVV3, th_vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
+GEN_TH_VV_ENV(th_vfwmacc_vv_h, 2, 4, clearl_th)
+GEN_TH_VV_ENV(th_vfwmacc_vv_w, 4, 8, clearq_th)
+THCALL(TH_OPFVF3, th_vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
+THCALL(TH_OPFVF3, th_vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
+GEN_TH_VF(th_vfwmacc_vf_h, 2, 4, clearl_th)
+GEN_TH_VF(th_vfwmacc_vf_w, 4, 8, clearq_th)
+
+THCALL(TH_OPFVV3, th_vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
+THCALL(TH_OPFVV3, th_vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
+GEN_TH_VV_ENV(th_vfwnmacc_vv_h, 2, 4, clearl_th)
+GEN_TH_VV_ENV(th_vfwnmacc_vv_w, 4, 8, clearq_th)
+THCALL(TH_OPFVF3, th_vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
+THCALL(TH_OPFVF3, th_vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
+GEN_TH_VF(th_vfwnmacc_vf_h, 2, 4, clearl_th)
+GEN_TH_VF(th_vfwnmacc_vf_w, 4, 8, clearq_th)
+
+THCALL(TH_OPFVV3, th_vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
+THCALL(TH_OPFVV3, th_vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
+GEN_TH_VV_ENV(th_vfwmsac_vv_h, 2, 4, clearl_th)
+GEN_TH_VV_ENV(th_vfwmsac_vv_w, 4, 8, clearq_th)
+THCALL(TH_OPFVF3, th_vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
+THCALL(TH_OPFVF3, th_vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
+GEN_TH_VF(th_vfwmsac_vf_h, 2, 4, clearl_th)
+GEN_TH_VF(th_vfwmsac_vf_w, 4, 8, clearq_th)
+
+THCALL(TH_OPFVV3, th_vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
+THCALL(TH_OPFVV3, th_vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
+GEN_TH_VV_ENV(th_vfwnmsac_vv_h, 2, 4, clearl_th)
+GEN_TH_VV_ENV(th_vfwnmsac_vv_w, 4, 8, clearq_th)
+THCALL(TH_OPFVF3, th_vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
+THCALL(TH_OPFVF3, th_vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
+GEN_TH_VF(th_vfwnmsac_vf_h, 2, 4, clearl_th)
+GEN_TH_VF(th_vfwnmsac_vf_w, 4, 8, clearq_th)
The instructions have the same function as RVV1.0. Overall there are only general differences between XTheadVector and RVV1.0. Signed-off-by: Huang Tao <eric.huang@linux.alibaba.com> --- target/riscv/helper.h | 17 +++++++++ .../riscv/insn_trans/trans_xtheadvector.c.inc | 18 +++++---- target/riscv/vector_helper.c | 16 ++++---- target/riscv/vector_internals.h | 9 +++++ target/riscv/xtheadvector_helper.c | 38 +++++++++++++++++++ 5 files changed, 82 insertions(+), 16 deletions(-)