diff mbox series

[RFC,1/7] target/ppc: Implement xxm[tf]acc and xxsetaccz

Message ID 20220426125028.18844-2-lucas.araujo@eldorado.org.br
State New
Headers show
Series VSX MMA Implementation | expand

Commit Message

Lucas Mateus Martins Araujo e Castro April 26, 2022, 12:50 p.m. UTC
From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>

Implement the following PowerISA v3.1 instructions:
xxmfacc: VSX Move From Accumulator
xxmtacc: VSX Move To Accumulator
xxsetaccz: VSX Set Accumulator to Zero

The PowerISA 3.1 mentions that for the current version of the
architecture, "the hardware implementation provides the effect of ACC[i]
and VSRs 4*i to 4*i + 3 logically containing the same data" and "The
Accumulators introduce no new logical state at this time" (page 501).
For now it seems unnecessary to create new structures, so this patch
just uses ACC[i] as VSRs 4*i to 4*i+3 and therefore move to and from
accumulators are no-ops.

Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
---
 target/ppc/insn32.decode            |  9 ++++++++
 target/ppc/translate/vsx-impl.c.inc | 36 +++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+)

Comments

Richard Henderson April 26, 2022, 10:59 p.m. UTC | #1
On 4/26/22 05:50, Lucas Mateus Castro(alqotel) wrote:
> From: "Lucas Mateus Castro (alqotel)"<lucas.araujo@eldorado.org.br>
> 
> Implement the following PowerISA v3.1 instructions:
> xxmfacc: VSX Move From Accumulator
> xxmtacc: VSX Move To Accumulator
> xxsetaccz: VSX Set Accumulator to Zero
> 
> The PowerISA 3.1 mentions that for the current version of the
> architecture, "the hardware implementation provides the effect of ACC[i]
> and VSRs 4*i to 4*i + 3 logically containing the same data" and "The
> Accumulators introduce no new logical state at this time" (page 501).
> For now it seems unnecessary to create new structures, so this patch
> just uses ACC[i] as VSRs 4*i to 4*i+3 and therefore move to and from
> accumulators are no-ops.
> 
> Signed-off-by: Lucas Mateus Castro (alqotel)<lucas.araujo@eldorado.org.br>
> ---
>   target/ppc/insn32.decode            |  9 ++++++++
>   target/ppc/translate/vsx-impl.c.inc | 36 +++++++++++++++++++++++++++++
>   2 files changed, 45 insertions(+)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


> +    TCGv_i64 zero = tcg_constant_i64(0);
> +    for (i = 0; i < 4; i++) {
> +        set_cpu_vsr(a->ra * 4 + i, zero, false);
> +        set_cpu_vsr(a->ra * 4 + i, zero, true);
> +    }

or

   tcg_gen_gvec_dup_imm(MO_64, acc_full_offset(a->ra), 64, 64, 0);


r~
diff mbox series

Patch

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 39372fe673..7a76bedfa6 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -151,6 +151,9 @@ 
 &X_vrt_frbp     vrt frbp
 @X_vrt_frbp     ...... vrt:5 ..... ....0 .......... .           &X_vrt_frbp frbp=%x_frbp
 
+&X_a            ra
+@X_a            ...... ra:3 .. ..... ..... .......... .         &X_a
+
 %xx_xt          0:1 21:5
 %xx_xb          1:1 11:5
 %xx_xa          2:1 16:5
@@ -710,3 +713,9 @@  XVTLSBB         111100 ... -- 00010 ..... 111011011 . - @XX2_bf_xb
 &XL_s           s:uint8_t
 @XL_s           ......-------------- s:1 .......... -   &XL_s
 RFEBB           010011-------------- .   0010010010 -   @XL_s
+
+## Accumulator Instructions
+
+XXMFACC         011111 ... -- 00000 ----- 0010110001 -   @X_a
+XXMTACC         011111 ... -- 00001 ----- 0010110001 -   @X_a
+XXSETACCZ       011111 ... -- 00011 ----- 0010110001 -   @X_a
diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc
index 3692740736..919b889c40 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -2787,6 +2787,42 @@  static bool trans_XVCVBF16SPN(DisasContext *ctx, arg_XX2 *a)
     return true;
 }
 
+    /*
+     *  The PowerISA 3.1 mentions that for the current version of the
+     *  architecture, "the hardware implementation provides the effect of
+     *  ACC[i] and VSRs 4*i to 4*i + 3 logically containing the same data"
+     *  and "The Accumulators introduce no new logical state at this time"
+     *  (page 501). For now it seems unnecessary to create new structures,
+     *  so this patch just uses ACC[i] as VSRs 4*i to 4*i+3 and therefore
+     *  move to and from accumulators are no-ops.
+     */
+static bool trans_XXMFACC(DisasContext *ctx, arg_X_a *a)
+{
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    REQUIRE_VSX(ctx);
+    return true;
+}
+
+static bool trans_XXMTACC(DisasContext *ctx, arg_X_a *a)
+{
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    REQUIRE_VSX(ctx);
+    return true;
+}
+
+static bool trans_XXSETACCZ(DisasContext *ctx, arg_X_a *a)
+{
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    REQUIRE_VSX(ctx);
+    int i;
+    TCGv_i64 zero = tcg_constant_i64(0);
+    for (i = 0; i < 4; i++) {
+        set_cpu_vsr(a->ra * 4 + i, zero, false);
+        set_cpu_vsr(a->ra * 4 + i, zero, true);
+    }
+    return true;
+}
+
 #undef GEN_XX2FORM
 #undef GEN_XX3FORM
 #undef GEN_XX2IFORM