diff mbox series

[11/13] trans_rvv.c.inc: remove vlmax arg from vec_element_loadx()

Message ID 20240112213812.173521-12-dbarboza@ventanamicro.com
State New
Headers show
Series target/riscv: add 'cpu->cfg.vlenb', remove 'cpu->cfg.vlen' | expand

Commit Message

Daniel Henrique Barboza Jan. 12, 2024, 9:38 p.m. UTC
Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
---
 target/riscv/insn_trans/trans_rvv.c.inc | 26 +++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

Comments

Richard Henderson Jan. 12, 2024, 10:51 p.m. UTC | #1
On 1/13/24 08:38, Daniel Henrique Barboza wrote:
> Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
> ---
>   target/riscv/insn_trans/trans_rvv.c.inc | 26 +++++++++++++++++--------
>   1 file changed, 18 insertions(+), 8 deletions(-)
> 
> diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
> index 804cfd6c7f..3782d0fa2f 100644
> --- a/target/riscv/insn_trans/trans_rvv.c.inc
> +++ b/target/riscv/insn_trans/trans_rvv.c.inc
> @@ -3265,21 +3265,28 @@ static void endian_adjust(TCGv_i32 ofs, int sew)
>   #endif
>   }
>   
> -/* Load idx >= VLMAX ? 0 : vreg[idx] */
> +/*
> + * Load idx >= VLMAX ? 0 : vreg[idx]
> + *
> + * This function assumes ctx->vl_eq_vlmax = true.
> + */
>   static void vec_element_loadx(DisasContext *s, TCGv_i64 dest,
> -                              int vreg, TCGv idx, int vlmax)
> +                              int vreg, TCGv idx)

I think removing the cpu configuration constant is a mistake.
Compile-time constants are always better than computation...

> +#ifdef TARGET_RISCV64
> +    tcg_gen_mov_i64(t_vlmax, cpu_vl);
> +#else
> +    tcg_gen_extu_tl_i64(t_vlmax, cpu_vl);
> +#endif

That said, no ifdef required -- the second statement should always work.



r~
Daniel Henrique Barboza Jan. 15, 2024, 5:57 p.m. UTC | #2
On 1/12/24 19:51, Richard Henderson wrote:
> On 1/13/24 08:38, Daniel Henrique Barboza wrote:
>> Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
>> ---
>>   target/riscv/insn_trans/trans_rvv.c.inc | 26 +++++++++++++++++--------
>>   1 file changed, 18 insertions(+), 8 deletions(-)
>>
>> diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
>> index 804cfd6c7f..3782d0fa2f 100644
>> --- a/target/riscv/insn_trans/trans_rvv.c.inc
>> +++ b/target/riscv/insn_trans/trans_rvv.c.inc
>> @@ -3265,21 +3265,28 @@ static void endian_adjust(TCGv_i32 ofs, int sew)
>>   #endif
>>   }
>> -/* Load idx >= VLMAX ? 0 : vreg[idx] */
>> +/*
>> + * Load idx >= VLMAX ? 0 : vreg[idx]
>> + *
>> + * This function assumes ctx->vl_eq_vlmax = true.
>> + */
>>   static void vec_element_loadx(DisasContext *s, TCGv_i64 dest,
>> -                              int vreg, TCGv idx, int vlmax)
>> +                              int vreg, TCGv idx)
> 
> I think removing the cpu configuration constant is a mistake.
> Compile-time constants are always better than computation...

Apparently my commit msg is AWOL ...

The 'vlmax' used in vec_element_loadx() is being calculated here, in trans_vrgather_vx():

-        int scale = s->lmul - (s->sew + 3);
-        int vlmax = s->cfg_ptr->vlen >> -scale;

My idea was to eliminate the use of 'vlen' since, in this block, 'vl_eq_vlmax'
is true and we have 'vl' in the TCG global 'cpu_vl'.

I didn't find a way of reading 'cpu_vl' into an int variable and passing it as
'vlmax' to vec_element_loadx(), but inside vec_element_loadx() we can operate
'cpu_vl' normally since we're using TCG ops there. This is the reasoning behind
this change.

I am now wondering if this is worth the trouble, and we should instead do:

+    int vlmax = cpu->cfg.vlenb >> (s->sew - s->lmul);

Like we're already doing in patch 9. Patch 12 would be a similar case.



Thanks,

Daniel



> 
>> +#ifdef TARGET_RISCV64
>> +    tcg_gen_mov_i64(t_vlmax, cpu_vl);
>> +#else
>> +    tcg_gen_extu_tl_i64(t_vlmax, cpu_vl);
>> +#endif
> 
> That said, no ifdef required -- the second statement should always work.
> 
> 
> 
> r~
Richard Henderson Jan. 15, 2024, 8:16 p.m. UTC | #3
On 1/16/24 04:57, Daniel Henrique Barboza wrote:
> I am now wondering if this is worth the trouble, and we should instead do:
> 
> +    int vlmax = cpu->cfg.vlenb >> (s->sew - s->lmul);
> 
> Like we're already doing in patch 9. Patch 12 would be a similar case.

This is more like what I expected to see.


r~
diff mbox series

Patch

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index 804cfd6c7f..3782d0fa2f 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -3265,21 +3265,28 @@  static void endian_adjust(TCGv_i32 ofs, int sew)
 #endif
 }
 
-/* Load idx >= VLMAX ? 0 : vreg[idx] */
+/*
+ * Load idx >= VLMAX ? 0 : vreg[idx]
+ *
+ * This function assumes ctx->vl_eq_vlmax = true.
+ */
 static void vec_element_loadx(DisasContext *s, TCGv_i64 dest,
-                              int vreg, TCGv idx, int vlmax)
+                              int vreg, TCGv idx)
 {
     TCGv_i32 ofs = tcg_temp_new_i32();
+    TCGv_i32 vlmax = tcg_temp_new_i32();
     TCGv_ptr base = tcg_temp_new_ptr();
     TCGv_i64 t_idx = tcg_temp_new_i64();
-    TCGv_i64 t_vlmax, t_zero;
+    TCGv_i64 t_zero, t_vlmax = tcg_temp_new_i64();
 
     /*
      * Mask the index to the length so that we do
      * not produce an out-of-range load.
      */
+    tcg_gen_trunc_tl_i32(vlmax, cpu_vl);
+    tcg_gen_sub_i32(vlmax, vlmax, tcg_constant_i32(1));
     tcg_gen_trunc_tl_i32(ofs, idx);
-    tcg_gen_andi_i32(ofs, ofs, vlmax - 1);
+    tcg_gen_and_i32(ofs, ofs, vlmax);
 
     /* Convert the index to an offset. */
     endian_adjust(ofs, s->sew);
@@ -3294,10 +3301,15 @@  static void vec_element_loadx(DisasContext *s, TCGv_i64 dest,
                  vreg_ofs(s, vreg), s->sew, false);
 
     /* Flush out-of-range indexing to zero.  */
-    t_vlmax = tcg_constant_i64(vlmax);
     t_zero = tcg_constant_i64(0);
     tcg_gen_extu_tl_i64(t_idx, idx);
 
+#ifdef TARGET_RISCV64
+    tcg_gen_mov_i64(t_vlmax, cpu_vl);
+#else
+    tcg_gen_extu_tl_i64(t_vlmax, cpu_vl);
+#endif
+
     tcg_gen_movcond_i64(TCG_COND_LTU, dest, t_idx,
                         t_vlmax, dest, t_zero);
 }
@@ -3534,14 +3546,12 @@  static bool trans_vrgather_vx(DisasContext *s, arg_rmrr *a)
     }
 
     if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
-        int scale = s->lmul - (s->sew + 3);
-        int vlmax = s->cfg_ptr->vlen >> -scale;
         TCGv_i64 dest = tcg_temp_new_i64();
 
         if (a->rs1 == 0) {
             vec_element_loadi(s, dest, a->rs2, 0, false);
         } else {
-            vec_element_loadx(s, dest, a->rs2, cpu_gpr[a->rs1], vlmax);
+            vec_element_loadx(s, dest, a->rs2, cpu_gpr[a->rs1]);
         }
 
         tcg_gen_gvec_dup_i64(s->sew, vreg_ofs(s, a->rd),