@@ -726,6 +726,7 @@ static inline void tcg_gen_neg_i64(TCGv_i64 ret, TCGv_i64 arg)
void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg);
void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg);
+void tcg_gen_ext_i64_i128(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg);
void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high);
void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg);
void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg);
@@ -39,6 +39,11 @@
%imm_addi16sp 12:s1 3:2 5:1 2:1 6:1 !function=ex_shift_4
%imm_lui 12:s1 2:5 !function=ex_shift_12
+# Added for 128 bit support
+%uimm_cl_q 5:2 10:3 !function=ex_shift_3
+%uimm_6bit_lq 2:3 12:1 5:2 !function=ex_shift_3
+%uimm_6bit_sq 7:3 10:3 !function=ex_shift_3
+
# Argument sets imported from insn32.decode:
&empty !extern
@@ -54,16 +59,20 @@
# Formats 16:
@cr .... ..... ..... .. &r rs2=%rs2_5 rs1=%rd %rd
@ci ... . ..... ..... .. &i imm=%imm_ci rs1=%rd %rd
+@cl_q ... . ..... ..... .. &i imm=%uimm_6bit_lq rs1=2 %rd
@cl_d ... ... ... .. ... .. &i imm=%uimm_cl_d rs1=%rs1_3 rd=%rs2_3
@cl_w ... ... ... .. ... .. &i imm=%uimm_cl_w rs1=%rs1_3 rd=%rs2_3
@cs_2 ... ... ... .. ... .. &r rs2=%rs2_3 rs1=%rs1_3 rd=%rs1_3
+@cs_q ... ... ... .. ... .. &s imm=%uimm_cl_q rs1=%rs1_3 rs2=%rs2_3
@cs_d ... ... ... .. ... .. &s imm=%uimm_cl_d rs1=%rs1_3 rs2=%rs2_3
@cs_w ... ... ... .. ... .. &s imm=%uimm_cl_w rs1=%rs1_3 rs2=%rs2_3
@cj ... ........... .. &j imm=%imm_cj
@cb_z ... ... ... .. ... .. &b imm=%imm_cb rs1=%rs1_3 rs2=0
+@c_lqsp ... . ..... ..... .. &i imm=%uimm_6bit_lq rs1=2 %rd
@c_ldsp ... . ..... ..... .. &i imm=%uimm_6bit_ld rs1=2 %rd
@c_lwsp ... . ..... ..... .. &i imm=%uimm_6bit_lw rs1=2 %rd
+@c_sqsp ... . ..... ..... .. &s imm=%uimm_6bit_sq rs1=2 rs2=%rs2_5
@c_sdsp ... . ..... ..... .. &s imm=%uimm_6bit_sd rs1=2 rs2=%rs2_5
@c_swsp ... . ..... ..... .. &s imm=%uimm_6bit_sw rs1=2 rs2=%rs2_5
@c_li ... . ..... ..... .. &i imm=%imm_ci rs1=0 %rd
@@ -87,9 +96,17 @@
illegal 000 000 000 00 --- 00
addi 000 ... ... .. ... 00 @c_addi4spn
}
-fld 001 ... ... .. ... 00 @cl_d
+{
+ fld 001 ... ... .. ... 00 @cl_d
+ # *** RV128C specific Standard Extension (Quadrant 0) ***
+ lq 001 ... ... .. ... 00 @cl_q
+}
lw 010 ... ... .. ... 00 @cl_w
-fsd 101 ... ... .. ... 00 @cs_d
+{
+ fsd 101 ... ... .. ... 00 @cs_d
+ # *** RV128C specific Standard Extension (Quadrant 0) ***
+ sq 101 ... ... .. ... 00 @cs_q
+}
sw 110 ... ... .. ... 00 @cs_w
# *** RV32C and RV64C specific Standard Extension (Quadrant 0) ***
@@ -132,7 +149,11 @@ addw 100 1 11 ... 01 ... 01 @cs_2
# *** RV32/64C Standard Extension (Quadrant 2) ***
slli 000 . ..... ..... 10 @c_shift2
-fld 001 . ..... ..... 10 @c_ldsp
+{
+ fld 001 . ..... ..... 10 @c_ldsp
+ # *** RV128C specific Standard Extension (Quadrant 2) ***
+ lq 001 ... ... .. ... 10 @c_lqsp
+}
{
illegal 010 - 00000 ----- 10 # c.lwsp, RES rd=0
lw 010 . ..... ..... 10 @c_lwsp
@@ -147,7 +168,11 @@ fld 001 . ..... ..... 10 @c_ldsp
jalr 100 1 ..... 00000 10 @c_jalr rd=1 # C.JALR
add 100 1 ..... ..... 10 @cr
}
-fsd 101 ...... ..... 10 @c_sdsp
+{
+ fsd 101 ...... ..... 10 @c_sdsp
+ # *** RV128C specific Standard Extension (Quadrant 2) ***
+ sq 101 ... ... .. ... 10 @c_sqsp
+}
sw 110 . ..... ..... 10 @c_swsp
# *** RV32C and RV64C specific Standard Extension (Quadrant 2) ***
@@ -162,6 +162,11 @@ sllw 0000000 ..... ..... 001 ..... 0111011 @r
srlw 0000000 ..... ..... 101 ..... 0111011 @r
sraw 0100000 ..... ..... 101 ..... 0111011 @r
+# *** RV128I Base Instruction Set (in addition to RV64I) ***
+ldu ............ ..... 111 ..... 0000011 @i
+lq ............ ..... 010 ..... 0001111 @i
+sq ............ ..... 100 ..... 0100011 @s
+
# *** RV32M Standard Extension ***
mul 0000001 ..... ..... 000 ..... 0110011 @r
mulh 0000001 ..... ..... 001 ..... 0110011 @r
@@ -141,7 +141,7 @@ static bool trans_bgeu(DisasContext *ctx, arg_bgeu *a)
return gen_branch(ctx, a, TCG_COND_GEU);
}
-static bool gen_load(DisasContext *ctx, arg_lb *a, MemOp memop)
+static bool gen_load_tl(DisasContext *ctx, arg_lb *a, MemOp memop)
{
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
@@ -155,32 +155,133 @@ static bool gen_load(DisasContext *ctx, arg_lb *a, MemOp memop)
return true;
}
+#if defined(TARGET_RISCV128)
+/*
+ * Accessing signed 64-bit or 128-bit values should be part of MemOp in
+ * include/exec/memop.h
+ * Unfortunately, this requires to change the defines there, as MO_SIGN is 4,
+ * and values 0 to 3 are usual types sizes.
+ * Note that an assert is triggered when MemOp is MO_SIGN|MO_TEQ, this value
+ * being some kind of sentinel.
+ * Changing MemOp values is too involved given our understanding, we
+ * therefore use our own way to deal locally with zero or sign extended
+ * 64-bit values, and 128-bit values.
+ * Doing this implies adding a preprocessor conditional in all memory access
+ * functions to avoid penalizing 32 and 64-bit accesses.
+ */
+typedef enum XMemOp {
+ XMO_NOP = 0, /* MemOp rules! */
+ XMO_TEUQ = 1, /* Zero-extended 64-bit access */
+ XMO_TET = 2, /* 128-bit (T integer format) access */
+ XMO_SIGN = 4, /* Sign-extended 64-bit access */
+ XMO_TESQ = XMO_TEUQ | XMO_SIGN,
+} XMemOp;
+
+static bool gen_load_i128(DisasContext *ctx, arg_lb *a, MemOp memop, XMemOp xm)
+{
+ if (is_128bit(ctx)) {
+ TCGv rs1l = tcg_temp_new();
+ TCGv rs1h = tcg_temp_new();
+ TCGv rdl = tcg_temp_new();
+ TCGv rdh = tcg_temp_new();
+ TCGv imml = tcg_temp_new();
+ TCGv immh = tcg_const_tl((a->imm & 0b100000000000)
+ ? 0xffffffffffffffff : 0);
+
+ gen_get_gpr(rs1l, a->rs1);
+ gen_get_gprh(rs1h, a->rs1);
+
+ /* Build a 128-bit address */
+ tcg_gen_movi_tl(imml, a->imm);
+ tcg_gen_add2_tl(rs1l, rs1h, rs1l, rs1h, imml, immh);
+ /* TODO: should assert that rs1h == 0 for now */
+
+ if (xm != XMO_TET) {
+ tcg_gen_qemu_ld_tl(rdl, rs1l, ctx->mem_idx, memop);
+ if ((memop & MO_SIGN) || (xm & XMO_SIGN)) {
+ tcg_gen_ext_i64_i128(rdl, rdh, rdl);
+ } else {
+ tcg_gen_movi_tl(rdh, 0);
+ }
+ } else {
+ tcg_gen_qemu_ld_tl(memop & MO_BSWAP ? rdh : rdl, rs1l,
+ ctx->mem_idx, MO_TEQ);
+ tcg_gen_movi_tl(imml, 8);
+ tcg_gen_movi_tl(immh, 0);
+ tcg_gen_add2_tl(rs1l, rs1h, rs1l, rs1h, imml, immh);
+ /* TODO: should assert that rs1h == 0 for now */
+ tcg_gen_qemu_ld_tl(memop & MO_BSWAP ? rdl : rdh, rs1l,
+ ctx->mem_idx, MO_TEQ);
+ }
+
+ gen_set_gpr(a->rd, rdl);
+ gen_set_gprh(a->rd, rdh);
+
+ tcg_temp_free(rs1l);
+ tcg_temp_free(rs1h);
+ tcg_temp_free(rdl);
+ tcg_temp_free(rdh);
+ tcg_temp_free(imml);
+ tcg_temp_free(immh);
+ return true;
+ }
+ return gen_load_tl(ctx, a, memop);
+}
+#define gen_load(ctx, a, memop, xmemop) gen_load_i128(ctx, a, memop, xmemop)
+#else
+#define gen_load(ctx, a, memop, xmemop) gen_load_tl(ctx, a, memop)
+#endif
+
static bool trans_lb(DisasContext *ctx, arg_lb *a)
{
- return gen_load(ctx, a, MO_SB);
+ return gen_load(ctx, a, MO_SB, XMO_NOP);
}
static bool trans_lh(DisasContext *ctx, arg_lh *a)
{
- return gen_load(ctx, a, MO_TESW);
+ return gen_load(ctx, a, MO_TESW, XMO_NOP);
}
static bool trans_lw(DisasContext *ctx, arg_lw *a)
{
- return gen_load(ctx, a, MO_TESL);
+ return gen_load(ctx, a, MO_TESL, XMO_NOP);
+}
+
+static bool trans_ld(DisasContext *ctx, arg_ld *a)
+{
+ REQUIRE_64_OR_128BIT(ctx);
+ return gen_load(ctx, a, MO_TEQ, XMO_TESQ);
+}
+
+static bool trans_lq(DisasContext *ctx, arg_lq *a)
+{
+ REQUIRE_128BIT(ctx);
+ return gen_load(ctx, a, MO_TEQ, XMO_TET);
}
static bool trans_lbu(DisasContext *ctx, arg_lbu *a)
{
- return gen_load(ctx, a, MO_UB);
+ return gen_load(ctx, a, MO_UB, XMO_NOP);
}
static bool trans_lhu(DisasContext *ctx, arg_lhu *a)
{
- return gen_load(ctx, a, MO_TEUW);
+ return gen_load(ctx, a, MO_TEUW, XMO_NOP);
+}
+
+static bool trans_lwu(DisasContext *ctx, arg_lwu *a)
+{
+ REQUIRE_64_OR_128BIT(ctx);
+ return gen_load(ctx, a, MO_TEUL, XMO_NOP);
}
-static bool gen_store(DisasContext *ctx, arg_sb *a, MemOp memop)
+static bool trans_ldu(DisasContext *ctx, arg_ldu* a)
+{
+ REQUIRE_128BIT(ctx);
+ return gen_load(ctx, a, MO_TEQ, XMO_TEUQ);
+}
+
+static bool gen_store_tl(DisasContext *ctx, arg_sb *a, MemOp memop)
{
TCGv t0 = tcg_temp_new();
TCGv dat = tcg_temp_new();
@@ -194,38 +295,81 @@ static bool gen_store(DisasContext *ctx, arg_sb *a, MemOp memop)
return true;
}
+#if defined(TARGET_RISCV128)
+static bool gen_store_i128(DisasContext *ctx, arg_sb *a, MemOp memop, XMemOp xm)
+{
+ if (is_128bit(ctx)) {
+ TCGv rs1l = tcg_temp_new();
+ TCGv rs1h = tcg_temp_new();
+ TCGv rs2l = tcg_temp_new();
+ TCGv rs2h = tcg_temp_new();
+ TCGv imml = tcg_temp_new();
+ TCGv immh = tcg_const_tl((a->imm & 0b100000000000)
+ ? 0xffffffffffffffff
+ : 0);
+
+ gen_get_gpr(rs1l, a->rs1);
+ gen_get_gprh(rs1h, a->rs1);
+ gen_get_gpr(rs2l, a->rs2);
+ gen_get_gprh(rs2h, a->rs2);
+ /* Build a 128-bit address */
+ tcg_gen_movi_tl(imml, a->imm);
+ tcg_gen_add2_tl(rs1l, rs1h, rs1l, rs1h, imml, immh);
+ /* TODO: should assert that rs1h == 0 for now */
+
+ if (xm != XMO_TET) {
+ tcg_gen_qemu_st_tl(rs2l, rs1l, ctx->mem_idx, memop);
+ } else {
+ tcg_gen_qemu_st_tl(memop & MO_BSWAP ? rs2h : rs2l, rs1l,
+ ctx->mem_idx, MO_TEQ);
+ tcg_gen_movi_tl(imml, 8);
+ tcg_gen_movi_tl(immh, 0);
+ tcg_gen_add2_tl(rs1l, rs1h, rs1l, rs1h, imml, immh);
+ /* TODO: should assert that rs1h == 0 for now */
+ tcg_gen_qemu_st_tl(memop & MO_BSWAP ? rs2l : rs2h, rs1l,
+ ctx->mem_idx, MO_TEQ);
+ }
+
+ tcg_temp_free(rs1l);
+ tcg_temp_free(rs1h);
+ tcg_temp_free(rs2l);
+ tcg_temp_free(rs2h);
+ tcg_temp_free(imml);
+ tcg_temp_free(immh);
+ return true;
+ }
+ return gen_store_tl(ctx, a, memop);
+}
+#define gen_store(ctx, a, memop, xmemop) gen_store_i128(ctx, a, memop, xmemop)
+#else
+#define gen_store(ctx, a, memop, xmemop) gen_store_tl(ctx, a, memop)
+#endif
static bool trans_sb(DisasContext *ctx, arg_sb *a)
{
- return gen_store(ctx, a, MO_SB);
+ return gen_store(ctx, a, MO_SB, XMO_NOP);
}
static bool trans_sh(DisasContext *ctx, arg_sh *a)
{
- return gen_store(ctx, a, MO_TESW);
+ return gen_store(ctx, a, MO_TESW, XMO_NOP);
}
static bool trans_sw(DisasContext *ctx, arg_sw *a)
{
- return gen_store(ctx, a, MO_TESL);
+ return gen_store(ctx, a, MO_TESL, XMO_NOP);
}
-static bool trans_lwu(DisasContext *ctx, arg_lwu *a)
-{
- REQUIRE_64BIT(ctx);
- return gen_load(ctx, a, MO_TEUL);
-}
-
-static bool trans_ld(DisasContext *ctx, arg_ld *a)
+static bool trans_sd(DisasContext *ctx, arg_sd *a)
{
- REQUIRE_64BIT(ctx);
- return gen_load(ctx, a, MO_TEQ);
+ REQUIRE_64_OR_128BIT(ctx);
+ return gen_store(ctx, a, MO_TEQ, XMO_NOP);
}
-static bool trans_sd(DisasContext *ctx, arg_sd *a)
+static bool trans_sq(DisasContext *ctx, arg_sq *a)
{
- REQUIRE_64BIT(ctx);
- return gen_store(ctx, a, MO_TEQ);
+ REQUIRE_128BIT(ctx);
+ return gen_store(ctx, a, MO_TEQ, XMO_TET);
}
static bool trans_addi(DisasContext *ctx, arg_addi *a)
@@ -2655,6 +2655,12 @@ void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
}
}
+void tcg_gen_ext_i64_i128(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg)
+{
+ tcg_gen_mov_i64(lo, arg);
+ tcg_gen_sari_i64(hi, arg, 63);
+}
+
void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high)
{
TCGv_i64 tmp;
Addition of the load(s) and store instructions of the 128-bit extension. These instructions have addresses on 128-bit but explicitly assume that the upper 64-bit of the address registers is null, and therefore can use the existing address translation mechanism. 128-bit memory access identification and 64-bit signedness is handled a bit off-the-record: MemOp reserves 2 bits for size and a contiguous 3rd bit for the sign, so we cannot simply take value 4 to indicate a size of 16 bytes. Additionally, MO_TEQ | MO_SIGN seems to be a sentinel value, leading to a QEMU assertion violation. Modifying the existing state in QEMU has a great impact that we are not capable of fully evaluating, so we choose to pass this information into another parameter and let memop as it is for now. Signed-off-by: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr> Co-authored-by: Fabien Portas <fabien.portas@grenoble-inp.org> --- include/tcg/tcg-op.h | 1 + target/riscv/insn16.decode | 33 ++++- target/riscv/insn32.decode | 5 + target/riscv/insn_trans/trans_rvi.c.inc | 188 +++++++++++++++++++++--- tcg/tcg-op.c | 6 + 5 files changed, 207 insertions(+), 26 deletions(-)