diff mbox series

[3/8] target/arm: Pass env pointer through to gvec_bfdot helper

Message ID 20240730160306.2959745-4-peter.maydell@linaro.org
State New
Headers show
Series target/arm: Implement FEAT_EBF16 | expand

Commit Message

Peter Maydell July 30, 2024, 4:03 p.m. UTC
Pass the env pointer through to the gvec_bfdot helper,
so we can use it to add support for FEAT_EBF16.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target/arm/helper.h             |  4 ++--
 target/arm/tcg/translate-a64.c  | 27 ++++++++++++++++++++++++-
 target/arm/tcg/translate-neon.c | 35 +++++++++++++++++++++++++++++++--
 target/arm/tcg/translate-sve.c  | 15 +++++++++++++-
 target/arm/tcg/vec_helper.c     |  3 ++-
 5 files changed, 77 insertions(+), 7 deletions(-)

Comments

Richard Henderson July 31, 2024, 1:36 a.m. UTC | #1
On 7/31/24 02:03, Peter Maydell wrote:
> Pass the env pointer through to the gvec_bfdot helper,
> so we can use it to add support for FEAT_EBF16.
> 
> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
> ---
>   target/arm/helper.h             |  4 ++--
>   target/arm/tcg/translate-a64.c  | 27 ++++++++++++++++++++++++-
>   target/arm/tcg/translate-neon.c | 35 +++++++++++++++++++++++++++++++--
>   target/arm/tcg/translate-sve.c  | 15 +++++++++++++-
>   target/arm/tcg/vec_helper.c     |  3 ++-
>   5 files changed, 77 insertions(+), 7 deletions(-)
> 
> diff --git a/target/arm/helper.h b/target/arm/helper.h
> index 970d059dec5..aece9fd4aa7 100644
> --- a/target/arm/helper.h
> +++ b/target/arm/helper.h
> @@ -1027,8 +1027,8 @@ DEF_HELPER_FLAGS_5(gvec_ummla_b, TCG_CALL_NO_RWG,
>   DEF_HELPER_FLAGS_5(gvec_usmmla_b, TCG_CALL_NO_RWG,
>                      void, ptr, ptr, ptr, ptr, i32)
>   
> -DEF_HELPER_FLAGS_5(gvec_bfdot, TCG_CALL_NO_RWG,
> -                   void, ptr, ptr, ptr, ptr, i32)
> +DEF_HELPER_FLAGS_6(gvec_bfdot, TCG_CALL_NO_RWG,
> +                   void, ptr, ptr, ptr, ptr, ptr, i32)

Because env expands to TCGv_ptr in the translation context, I suspect that you can use 
that here.  Worth a try, anyway, so that

> -void HELPER(gvec_bfdot)(void *vd, void *vn, void *vm, void *va, uint32_t desc)
> +void HELPER(gvec_bfdot)(void *vd, void *vn, void *vm, void *va,
> +                        void *envp, uint32_t desc)

this doesn't have to use void *.

Either way,
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~
Peter Maydell July 31, 2024, 12:31 p.m. UTC | #2
On Wed, 31 Jul 2024 at 02:36, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> On 7/31/24 02:03, Peter Maydell wrote:
> > Pass the env pointer through to the gvec_bfdot helper,
> > so we can use it to add support for FEAT_EBF16.
> >
> > Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
> > ---
> >   target/arm/helper.h             |  4 ++--
> >   target/arm/tcg/translate-a64.c  | 27 ++++++++++++++++++++++++-
> >   target/arm/tcg/translate-neon.c | 35 +++++++++++++++++++++++++++++++--
> >   target/arm/tcg/translate-sve.c  | 15 +++++++++++++-
> >   target/arm/tcg/vec_helper.c     |  3 ++-
> >   5 files changed, 77 insertions(+), 7 deletions(-)
> >
> > diff --git a/target/arm/helper.h b/target/arm/helper.h
> > index 970d059dec5..aece9fd4aa7 100644
> > --- a/target/arm/helper.h
> > +++ b/target/arm/helper.h
> > @@ -1027,8 +1027,8 @@ DEF_HELPER_FLAGS_5(gvec_ummla_b, TCG_CALL_NO_RWG,
> >   DEF_HELPER_FLAGS_5(gvec_usmmla_b, TCG_CALL_NO_RWG,
> >                      void, ptr, ptr, ptr, ptr, i32)
> >
> > -DEF_HELPER_FLAGS_5(gvec_bfdot, TCG_CALL_NO_RWG,
> > -                   void, ptr, ptr, ptr, ptr, i32)
> > +DEF_HELPER_FLAGS_6(gvec_bfdot, TCG_CALL_NO_RWG,
> > +                   void, ptr, ptr, ptr, ptr, ptr, i32)
>
> Because env expands to TCGv_ptr in the translation context, I suspect that you can use
> that here.  Worth a try, anyway, so that
>
> > -void HELPER(gvec_bfdot)(void *vd, void *vn, void *vm, void *va, uint32_t desc)
> > +void HELPER(gvec_bfdot)(void *vd, void *vn, void *vm, void *va,
> > +                        void *envp, uint32_t desc)
>
> this doesn't have to use void *.

I thought I'd tried that, but obviously I didn't hit on the
right combination of types in the prototype/definition.
This does work, so I've changed the patchset to use it.

thanks
-- PMM
diff mbox series

Patch

diff --git a/target/arm/helper.h b/target/arm/helper.h
index 970d059dec5..aece9fd4aa7 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -1027,8 +1027,8 @@  DEF_HELPER_FLAGS_5(gvec_ummla_b, TCG_CALL_NO_RWG,
 DEF_HELPER_FLAGS_5(gvec_usmmla_b, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
 
-DEF_HELPER_FLAGS_5(gvec_bfdot, TCG_CALL_NO_RWG,
-                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(gvec_bfdot, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(gvec_bfdot_idx, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
 
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 148be2826ec..4aef8b9211a 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -735,6 +735,22 @@  static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 }
 
+/*
+ * Expand a 4-operand operation using an out-of-line helper that takes
+ * a pointer to the CPU env.
+ */
+static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn,
+                             int rm, int ra, int data,
+                             gen_helper_gvec_4_ptr *fn)
+{
+    tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
+                       vec_full_reg_offset(s, rn),
+                       vec_full_reg_offset(s, rm),
+                       vec_full_reg_offset(s, ra),
+                       tcg_env,
+                       is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
+}
+
 /*
  * Expand a 4-operand + fpstatus pointer + simd data value operation using
  * an out-of-line helper.
@@ -5601,10 +5617,19 @@  static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a,
     return true;
 }
 
+static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a,
+                              gen_helper_gvec_4_ptr *fn)
+{
+    if (fp_access_check(s)) {
+        gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
+    }
+    return true;
+}
+
 TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b)
 TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b)
 TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b)
-TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector, a, gen_helper_gvec_bfdot)
+TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot)
 TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector, a, gen_helper_gvec_bfmmla)
 TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b)
 TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b)
diff --git a/target/arm/tcg/translate-neon.c b/target/arm/tcg/translate-neon.c
index 915c9e56db5..454380f01d7 100644
--- a/target/arm/tcg/translate-neon.c
+++ b/target/arm/tcg/translate-neon.c
@@ -148,6 +148,37 @@  static bool do_neon_ddda(DisasContext *s, int q, int vd, int vn, int vm,
     return true;
 }
 
+static bool do_neon_ddda_env(DisasContext *s, int q, int vd, int vn, int vm,
+                             int data, gen_helper_gvec_4_ptr *fn_gvec)
+{
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
+        return false;
+    }
+
+    /*
+     * UNDEF accesses to odd registers for each bit of Q.
+     * Q will be 0b111 for all Q-reg instructions, otherwise
+     * when we have mixed Q- and D-reg inputs.
+     */
+    if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    int opr_sz = q ? 16 : 8;
+    tcg_gen_gvec_4_ptr(vfp_reg_offset(1, vd),
+                       vfp_reg_offset(1, vn),
+                       vfp_reg_offset(1, vm),
+                       vfp_reg_offset(1, vd),
+                       tcg_env,
+                       opr_sz, opr_sz, data, fn_gvec);
+    return true;
+}
+
 static bool do_neon_ddda_fpst(DisasContext *s, int q, int vd, int vn, int vm,
                               int data, ARMFPStatusFlavour fp_flavour,
                               gen_helper_gvec_4_ptr *fn_gvec_ptr)
@@ -266,8 +297,8 @@  static bool trans_VDOT_b16(DisasContext *s, arg_VDOT_b16 *a)
     if (!dc_isar_feature(aa32_bf16, s)) {
         return false;
     }
-    return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
-                        gen_helper_gvec_bfdot);
+    return do_neon_ddda_env(s, a->q * 7, a->vd, a->vn, a->vm, 0,
+                            gen_helper_gvec_bfdot);
 }
 
 static bool trans_VFML(DisasContext *s, arg_VFML *a)
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index 798ab2bfb13..4fb0bd077b4 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -238,6 +238,19 @@  static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
     return ret;
 }
 
+static bool gen_gvec_env_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
+                              int rd, int rn, int rm, int ra,
+                              int data)
+{
+    return gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, tcg_env);
+}
+
+static bool gen_gvec_env_arg_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
+                                  arg_rrrr_esz *a, int data)
+{
+    return gen_gvec_env_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
+}
+
 /* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */
 static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn,
                                 int rd, int rn, int rm, int ra, int pg,
@@ -7099,7 +7112,7 @@  TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
 TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
                         gen_helper_gvec_ummla_b, a, 0)
 
-TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
+TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_env_arg_zzzz,
            gen_helper_gvec_bfdot, a, 0)
 TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
            gen_helper_gvec_bfdot_idx, a)
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index 98604d170fd..37aad4be4b0 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -2814,7 +2814,8 @@  float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2)
     return t1;
 }
 
-void HELPER(gvec_bfdot)(void *vd, void *vn, void *vm, void *va, uint32_t desc)
+void HELPER(gvec_bfdot)(void *vd, void *vn, void *vm, void *va,
+                        void *envp, uint32_t desc)
 {
     intptr_t i, opr_sz = simd_oprsz(desc);
     float32 *d = vd, *a = va;