@@ -1149,3 +1149,11 @@ DEF_HELPER_6(vcompress_vm_b, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vcompress_vm_h, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vcompress_vm_w, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vcompress_vm_d, void, ptr, ptr, ptr, ptr, env, i32)
+
+#ifdef TARGET_RISCV128
+/* 128-bit integer multiplication and division */
+DEF_HELPER_6(idivu128, void, env, i64, i64, i64, i64, i64)
+DEF_HELPER_6(idivs128, void, env, i64, i64, i64, i64, i64)
+DEF_HELPER_6(iremu128, void, env, i64, i64, i64, i64, i64)
+DEF_HELPER_6(irems128, void, env, i64, i64, i64, i64, i64)
+#endif
@@ -197,6 +197,13 @@ divuw 0000001 ..... ..... 101 ..... 0111011 @r
remw 0000001 ..... ..... 110 ..... 0111011 @r
remuw 0000001 ..... ..... 111 ..... 0111011 @r
+# *** RV128M Standard Extension (in addition to RV64M) ***
+muld 0000001 ..... ..... 000 ..... 1111011 @r
+divd 0000001 ..... ..... 100 ..... 1111011 @r
+divud 0000001 ..... ..... 101 ..... 1111011 @r
+remd 0000001 ..... ..... 110 ..... 1111011 @r
+remud 0000001 ..... ..... 111 ..... 1111011 @r
+
# *** RV32A Standard Extension ***
lr_w 00010 . . 00000 ..... 010 ..... 0101111 @atom_ld
sc_w 00011 . . ..... ..... 010 ..... 0101111 @atom_st
@@ -18,16 +18,157 @@
* this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#if defined(TARGET_RISCV128)
+static void gen_mulu2_128(TCGv rll, TCGv rlh, TCGv rhl, TCGv rhh,
+ TCGv al, TCGv ah, TCGv bl, TCGv bh)
+{
+ TCGv tmpl = tcg_temp_new(),
+ tmph = tcg_temp_new(),
+ cnst_zero = tcg_const_tl(0);
+
+ tcg_gen_mulu2_tl(rll, rlh, al, bl);
+
+ tcg_gen_mulu2_tl(tmpl, tmph, al, bh);
+ tcg_gen_add2_tl(rlh, rhl, rlh, cnst_zero, tmpl, tmph);
+ tcg_gen_mulu2_tl(tmpl, tmph, ah, bl);
+ tcg_gen_add2_tl(rlh, tmph, rlh, rhl, tmpl, tmph);
+ /* Overflow detection into rhh */
+ tcg_gen_setcond_tl(TCG_COND_LTU, rhh, tmph, rhl);
+
+ tcg_gen_mov_tl(rhl, tmph);
+
+ tcg_gen_mulu2_tl(tmpl, tmph, ah, bh);
+ tcg_gen_add2_tl(rhl, rhh, rhl, rhh, tmpl, tmph);
+
+ tcg_temp_free(tmpl);
+ tcg_temp_free(tmph);
+ tcg_temp_free(cnst_zero);
+}
+#endif
static bool trans_mul(DisasContext *ctx, arg_mul *a)
{
REQUIRE_EXT(ctx, RVM);
+#if defined(TARGET_RISCV128)
+ if (is_128bit(ctx)) {
+ TCGv rs1h = tcg_temp_new(),
+ rs1l = tcg_temp_new(),
+ rs2h = tcg_temp_new(),
+ rs2l = tcg_temp_new(),
+ rll = tcg_temp_new(),
+ rlh = tcg_temp_new(),
+ rhl = tcg_temp_new(),
+ rhh = tcg_temp_new();
+
+ gen_get_gpr(rs1l, a->rs1);
+ gen_get_gprh(rs1h, a->rs1);
+ gen_get_gpr(rs2l, a->rs2);
+ gen_get_gprh(rs2h, a->rs2);
+
+ gen_mulu2_128(rll, rlh, rhl, rhh, rs1l, rs1h, rs2l, rs2h);
+
+ gen_set_gpr(a->rd, rll);
+ gen_set_gprh(a->rd, rlh);
+
+ tcg_temp_free(rs1h);
+ tcg_temp_free(rs1l);
+ tcg_temp_free(rs2l);
+ tcg_temp_free(rs2h);
+ tcg_temp_free(rll);
+ tcg_temp_free(rlh);
+ tcg_temp_free(rhl);
+ tcg_temp_free(rhh);
+
+ return true;
+ }
+#endif
return gen_arith(ctx, a, &tcg_gen_mul_tl);
}
static bool trans_mulh(DisasContext *ctx, arg_mulh *a)
{
REQUIRE_EXT(ctx, RVM);
+#if defined(TARGET_RISCV128)
+ if (is_128bit(ctx)) {
+ TCGv rs1h = tcg_temp_new(),
+ rs1l = tcg_temp_new(),
+ rs2h = tcg_temp_new(),
+ rs2l = tcg_temp_new(),
+ rll = tcg_temp_new(),
+ rlh = tcg_temp_new(),
+ rhl = tcg_temp_new(),
+ rhh = tcg_temp_new(),
+ rlln = tcg_temp_new(),
+ rlhn = tcg_temp_new(),
+ rhln = tcg_temp_new(),
+ rhhn = tcg_temp_new(),
+ sgnres = tcg_temp_new(),
+ tmp = tcg_temp_new(),
+ cnst_one = tcg_const_tl(1),
+ cnst_zero = tcg_const_tl(0);
+
+ gen_get_gpr(rs1l, a->rs1);
+ gen_get_gprh(rs1h, a->rs1);
+ gen_get_gpr(rs2l, a->rs2);
+ gen_get_gprh(rs2h, a->rs2);
+
+ /* Extract sign of result (=> sgn(a) xor sgn(b)) */
+ tcg_gen_setcondi_tl(TCG_COND_LT, sgnres, rs1h, 0);
+ tcg_gen_setcondi_tl(TCG_COND_LT, tmp, rs2h, 0);
+ tcg_gen_xor_tl(sgnres, sgnres, tmp);
+
+ /* Take absolute value of operands */
+ tcg_gen_sari_tl(rhl, rs1h, 63);
+ tcg_gen_add2_tl(rs1l, rs1h, rs1l, rs1h, rhl, rhl);
+ tcg_gen_xor_tl(rs1l, rs1l, rhl);
+ tcg_gen_xor_tl(rs1h, rs1h, rhl);
+
+ tcg_gen_sari_tl(rhl, rs2h, 63);
+ tcg_gen_add2_tl(rs2l, rs2h, rs2l, rs2h, rhl, rhl);
+ tcg_gen_xor_tl(rs2l, rs2l, rhl);
+ tcg_gen_xor_tl(rs2h, rs2h, rhl);
+
+ /* Unsigned multiplication */
+ gen_mulu2_128(rll, rlh, rhl, rhh, rs1l, rs1h, rs2l, rs2h);
+
+ /* Negation of result (two's complement : ~res + 1) */
+ tcg_gen_not_tl(rlln, rll);
+ tcg_gen_not_tl(rlhn, rlh);
+ tcg_gen_not_tl(rhln, rhl);
+ tcg_gen_not_tl(rhhn, rhh);
+
+ tcg_gen_add2_tl(rlln, tmp, rlln, cnst_zero, cnst_one, cnst_zero);
+ tcg_gen_add2_tl(rlhn, tmp, rlhn, cnst_zero, tmp, cnst_zero);
+ tcg_gen_add2_tl(rhln, tmp, rhln, cnst_zero, tmp, cnst_zero);
+ tcg_gen_add2_tl(rhhn, tmp, rhhn, cnst_zero, tmp, cnst_zero);
+
+ /* Move conditionally result or -result depending on result sign */
+ tcg_gen_movcond_tl(TCG_COND_NE, rhl, sgnres, cnst_zero, rhln, rhl);
+ tcg_gen_movcond_tl(TCG_COND_NE, rhh, sgnres, cnst_zero, rhhn, rhh);
+
+ gen_set_gpr(a->rd, rhl);
+ gen_set_gprh(a->rd, rhh);
+
+ tcg_temp_free(rs1h);
+ tcg_temp_free(rs1l);
+ tcg_temp_free(rs2l);
+ tcg_temp_free(rs2h);
+ tcg_temp_free(rll);
+ tcg_temp_free(rlh);
+ tcg_temp_free(rhl);
+ tcg_temp_free(rhh);
+ tcg_temp_free(rlln);
+ tcg_temp_free(rlhn);
+ tcg_temp_free(rhln);
+ tcg_temp_free(rhhn);
+ tcg_temp_free(sgnres);
+ tcg_temp_free(tmp);
+ tcg_temp_free(cnst_one);
+ tcg_temp_free(cnst_zero);
+
+ return true;
+ }
+#endif
TCGv source1 = tcg_temp_new();
TCGv source2 = tcg_temp_new();
gen_get_gpr(source1, a->rs1);
@@ -44,12 +185,119 @@ static bool trans_mulh(DisasContext *ctx, arg_mulh *a)
static bool trans_mulhsu(DisasContext *ctx, arg_mulhsu *a)
{
REQUIRE_EXT(ctx, RVM);
+#if defined(TARGET_RISCV128)
+ if (is_128bit(ctx)) {
+ TCGv rs1h = tcg_temp_new(),
+ rs1l = tcg_temp_new(),
+ rs2h = tcg_temp_new(),
+ rs2l = tcg_temp_new(),
+ rll = tcg_temp_new(),
+ rlh = tcg_temp_new(),
+ rhl = tcg_temp_new(),
+ rhh = tcg_temp_new(),
+ rlln = tcg_temp_new(),
+ rlhn = tcg_temp_new(),
+ rhln = tcg_temp_new(),
+ rhhn = tcg_temp_new(),
+ sgnres = tcg_temp_new(),
+ tmp = tcg_temp_new(),
+ cnst_one = tcg_const_tl(1),
+ cnst_zero = tcg_const_tl(0);
+
+ gen_get_gpr(rs1l, a->rs1);
+ gen_get_gprh(rs1h, a->rs1);
+ gen_get_gpr(rs2l, a->rs2);
+ gen_get_gprh(rs2h, a->rs2);
+
+ /* Extract sign of result (=> sgn(a)) */
+ tcg_gen_setcondi_tl(TCG_COND_LT, sgnres, rs1h, 0);
+
+ /* Take absolute value of rs1 */
+ tcg_gen_sari_tl(rhl, rs1h, 63);
+ tcg_gen_add2_tl(rs1l, rs1h, rs1l, rs1h, rhl, rhl);
+ tcg_gen_xor_tl(rs1l, rs1l, rhl);
+ tcg_gen_xor_tl(rs1h, rs1h, rhl);
+
+ /* Unsigned multiplication */
+ gen_mulu2_128(rll, rlh, rhl, rhh, rs1l, rs1h, rs2l, rs2h);
+
+ /* Negation of result (two's complement : ~res + 1) */
+ tcg_gen_not_tl(rlln, rll);
+ tcg_gen_not_tl(rlhn, rlh);
+ tcg_gen_not_tl(rhln, rhl);
+ tcg_gen_not_tl(rhhn, rhh);
+
+ tcg_gen_add2_tl(rlln, tmp, rlln, cnst_zero, cnst_one, cnst_zero);
+ tcg_gen_add2_tl(rlhn, tmp, rlhn, cnst_zero, tmp, cnst_zero);
+ tcg_gen_add2_tl(rhln, tmp, rhln, cnst_zero, tmp, cnst_zero);
+ tcg_gen_add2_tl(rhhn, tmp, rhhn, cnst_zero, tmp, cnst_zero);
+
+ /* Move conditionally result or -result depending on result sign */
+ tcg_gen_movcond_tl(TCG_COND_NE, rhl, sgnres, cnst_zero, rhln, rhl);
+ tcg_gen_movcond_tl(TCG_COND_NE, rhh, sgnres, cnst_zero, rhhn, rhh);
+
+ gen_set_gpr(a->rd, rhl);
+ gen_set_gprh(a->rd, rhh);
+
+ tcg_temp_free(rs1h);
+ tcg_temp_free(rs1l);
+ tcg_temp_free(rs2l);
+ tcg_temp_free(rs2h);
+ tcg_temp_free(rll);
+ tcg_temp_free(rlh);
+ tcg_temp_free(rhl);
+ tcg_temp_free(rhh);
+ tcg_temp_free(rlln);
+ tcg_temp_free(rlhn);
+ tcg_temp_free(rhln);
+ tcg_temp_free(rhhn);
+ tcg_temp_free(sgnres);
+ tcg_temp_free(tmp);
+ tcg_temp_free(cnst_one);
+ tcg_temp_free(cnst_zero);
+
+ return true;
+ }
+#endif
return gen_arith(ctx, a, &gen_mulhsu);
}
static bool trans_mulhu(DisasContext *ctx, arg_mulhu *a)
{
REQUIRE_EXT(ctx, RVM);
+#if defined(TARGET_RISCV128)
+ if (is_128bit(ctx)) {
+ TCGv rs1h = tcg_temp_new(),
+ rs1l = tcg_temp_new(),
+ rs2h = tcg_temp_new(),
+ rs2l = tcg_temp_new(),
+ rll = tcg_temp_new(),
+ rlh = tcg_temp_new(),
+ rhl = tcg_temp_new(),
+ rhh = tcg_temp_new();
+
+ gen_get_gpr(rs1l, a->rs1);
+ gen_get_gprh(rs1h, a->rs1);
+ gen_get_gpr(rs2l, a->rs2);
+ gen_get_gprh(rs2h, a->rs2);
+
+ gen_mulu2_128(rll, rlh, rhl, rhh, rs1l, rs1h, rs2l, rs2h);
+
+ gen_set_gpr(a->rd, rhl);
+ gen_set_gprh(a->rd, rhh);
+
+ tcg_temp_free(rs1h);
+ tcg_temp_free(rs1l);
+ tcg_temp_free(rs2l);
+ tcg_temp_free(rs2h);
+ tcg_temp_free(rll);
+ tcg_temp_free(rlh);
+ tcg_temp_free(rhl);
+ tcg_temp_free(rhh);
+
+ return true;
+ }
+#endif
TCGv source1 = tcg_temp_new();
TCGv source2 = tcg_temp_new();
gen_get_gpr(source1, a->rs1);
@@ -66,63 +314,243 @@ static bool trans_mulhu(DisasContext *ctx, arg_mulhu *a)
static bool trans_div(DisasContext *ctx, arg_div *a)
{
REQUIRE_EXT(ctx, RVM);
- return gen_arith(ctx, a, &gen_div);
+ if (!is_128bit(ctx)) {
+ return gen_arith(ctx, a, &gen_div);
+ }
+
+#ifdef TARGET_RISCV128
+ TCGv ul = tcg_temp_new(),
+ uh = tcg_temp_new(),
+ vl = tcg_temp_new(),
+ vh = tcg_temp_new(),
+ rd = tcg_temp_new();
+
+ tcg_gen_movi_i64(rd, a->rd);
+
+ gen_get_gpr(ul, a->rs1);
+ gen_get_gprh(uh, a->rs1);
+ gen_get_gpr(vl, a->rs2);
+ gen_get_gprh(vh, a->rs2);
+
+ gen_helper_idivs128(cpu_env, rd, ul, uh, vl, vh);
+#endif
+ return true;
}
static bool trans_divu(DisasContext *ctx, arg_divu *a)
{
REQUIRE_EXT(ctx, RVM);
- return gen_arith(ctx, a, &gen_divu);
+ if (!is_128bit(ctx)) {
+ return gen_arith(ctx, a, &gen_divu);
+ }
+
+#ifdef TARGET_RISCV128
+ TCGv ul = tcg_temp_new(),
+ uh = tcg_temp_new(),
+ vl = tcg_temp_new(),
+ vh = tcg_temp_new(),
+ rd = tcg_temp_new();
+
+ tcg_gen_movi_i64(rd, a->rd);
+
+ gen_get_gpr(ul, a->rs1);
+ gen_get_gprh(uh, a->rs1);
+ gen_get_gpr(vl, a->rs2);
+ gen_get_gprh(vh, a->rs2);
+
+ gen_helper_idivu128(cpu_env, rd, ul, uh, vl, vh);
+#endif
+ return true;
}
static bool trans_rem(DisasContext *ctx, arg_rem *a)
{
REQUIRE_EXT(ctx, RVM);
- return gen_arith(ctx, a, &gen_rem);
+ if (!is_128bit(ctx)) {
+ return gen_arith(ctx, a, &gen_rem);
+ }
+
+#ifdef TARGET_RISCV128
+ TCGv ul = tcg_temp_new(),
+ uh = tcg_temp_new(),
+ vl = tcg_temp_new(),
+ vh = tcg_temp_new(),
+ rd = tcg_temp_new();
+
+ tcg_gen_movi_i64(rd, a->rd);
+
+ gen_get_gpr(ul, a->rs1);
+ gen_get_gprh(uh, a->rs1);
+ gen_get_gpr(vl, a->rs2);
+ gen_get_gprh(vh, a->rs2);
+
+ gen_helper_irems128(cpu_env, rd, ul, uh, vl, vh);
+#endif
+ return true;
}
static bool trans_remu(DisasContext *ctx, arg_remu *a)
{
REQUIRE_EXT(ctx, RVM);
- return gen_arith(ctx, a, &gen_remu);
+ if (!is_128bit(ctx)) {
+ return gen_arith(ctx, a, &gen_remu);
+ }
+
+#ifdef TARGET_RISCV128
+ TCGv ul = tcg_temp_new(),
+ uh = tcg_temp_new(),
+ vl = tcg_temp_new(),
+ vh = tcg_temp_new(),
+ rd = tcg_temp_new();
+
+ tcg_gen_movi_tl(rd, a->rd);
+
+ gen_get_gpr(ul, a->rs1);
+ gen_get_gprh(uh, a->rs1);
+ gen_get_gpr(vl, a->rs2);
+ gen_get_gprh(vh, a->rs2);
+
+ gen_helper_iremu128(cpu_env, rd, ul, uh, vl, vh);
+#endif
+ return true;
}
static bool trans_mulw(DisasContext *ctx, arg_mulw *a)
{
- REQUIRE_64BIT(ctx);
+ REQUIRE_64_OR_128BIT(ctx);
REQUIRE_EXT(ctx, RVM);
- return gen_arith(ctx, a, &gen_mulw);
+ bool rv = gen_arith(ctx, a, &gen_mulw);
+#if defined(TARGET_RISCV128)
+ if (is_128bit(ctx) && a->rd != 0) {
+ tcg_gen_ext_i64_i128(cpu_gpr[a->rd], cpu_gprh[a->rd], cpu_gpr[a->rd]);
+ }
+#endif
+ return rv;
}
static bool trans_divw(DisasContext *ctx, arg_divw *a)
{
- REQUIRE_64BIT(ctx);
+ REQUIRE_64_OR_128BIT(ctx);
REQUIRE_EXT(ctx, RVM);
- return gen_arith_div_w(ctx, a, &gen_div);
+ bool rv = gen_arith_div_w(ctx, a, &gen_div);
+#if defined(TARGET_RISCV128)
+ if (is_128bit(ctx) && a->rd != 0) {
+ tcg_gen_ext_i64_i128(cpu_gpr[a->rd], cpu_gprh[a->rd], cpu_gpr[a->rd]);
+ }
+#endif
+ return rv;
}
static bool trans_divuw(DisasContext *ctx, arg_divuw *a)
{
- REQUIRE_64BIT(ctx);
+ REQUIRE_64_OR_128BIT(ctx);
REQUIRE_EXT(ctx, RVM);
- return gen_arith_div_uw(ctx, a, &gen_divu);
+ bool rv = gen_arith_div_uw(ctx, a, &gen_divu);
+#if defined(TARGET_RISCV128)
+ if (is_128bit(ctx) && a->rd != 0) {
+ tcg_gen_ext_i64_i128(cpu_gpr[a->rd], cpu_gprh[a->rd], cpu_gpr[a->rd]);
+ }
+#endif
+ return rv;
}
static bool trans_remw(DisasContext *ctx, arg_remw *a)
{
- REQUIRE_64BIT(ctx);
+ REQUIRE_64_OR_128BIT(ctx);
REQUIRE_EXT(ctx, RVM);
- return gen_arith_div_w(ctx, a, &gen_rem);
+ bool rv = gen_arith_div_w(ctx, a, &gen_rem);
+#if defined(TARGET_RISCV128)
+ if (is_128bit(ctx) && a->rd != 0) {
+ tcg_gen_ext_i64_i128(cpu_gpr[a->rd], cpu_gprh[a->rd], cpu_gpr[a->rd]);
+ }
+#endif
+ return rv;
}
static bool trans_remuw(DisasContext *ctx, arg_remuw *a)
{
- REQUIRE_64BIT(ctx);
+ REQUIRE_64_OR_128BIT(ctx);
+ REQUIRE_EXT(ctx, RVM);
+
+ bool rv = gen_arith_div_uw(ctx, a, &gen_remu);
+#if defined(TARGET_RISCV128)
+ if (is_128bit(ctx) && a->rd != 0) {
+ tcg_gen_ext_i64_i128(cpu_gpr[a->rd], cpu_gprh[a->rd], cpu_gpr[a->rd]);
+ }
+#endif
+ return rv;
+}
+
+static bool trans_muld(DisasContext *ctx, arg_muld *a)
+{
+ REQUIRE_EXT(ctx, RVM);
+ REQUIRE_128BIT(ctx);
+
+ bool rv = gen_arith(ctx, a, &tcg_gen_mul_tl);
+#if defined(TARGET_RISCV128)
+ if (a->rd != 0) {
+ tcg_gen_ext_i64_i128(cpu_gpr[a->rd], cpu_gprh[a->rd], cpu_gpr[a->rd]);
+ }
+#endif
+ return rv;
+}
+
+static bool trans_divd(DisasContext *ctx, arg_divd *a)
+{
+ REQUIRE_EXT(ctx, RVM);
+ REQUIRE_128BIT(ctx);
+
+ bool rv = gen_arith(ctx, a, &gen_div);
+#if defined(TARGET_RISCV128)
+ if (a->rd != 0) {
+ tcg_gen_ext_i64_i128(cpu_gpr[a->rd], cpu_gprh[a->rd], cpu_gpr[a->rd]);
+ }
+#endif
+ return rv;
+}
+
+static bool trans_divud(DisasContext *ctx, arg_divud *a)
+{
+ REQUIRE_EXT(ctx, RVM);
+ REQUIRE_128BIT(ctx);
+
+ bool rv = gen_arith(ctx, a, &gen_divu);
+#if defined(TARGET_RISCV128)
+ if (a->rd != 0) {
+ tcg_gen_ext_i64_i128(cpu_gpr[a->rd], cpu_gprh[a->rd], cpu_gpr[a->rd]);
+ }
+#endif
+ return rv;
+}
+
+static bool trans_remd(DisasContext *ctx, arg_remd *a)
+{
+ REQUIRE_EXT(ctx, RVM);
+ REQUIRE_128BIT(ctx);
+
+ bool rv = gen_arith(ctx, a, &gen_rem);
+#if defined(TARGET_RISCV128)
+ if (a->rd != 0) {
+ tcg_gen_ext_i64_i128(cpu_gpr[a->rd], cpu_gprh[a->rd], cpu_gpr[a->rd]);
+ }
+#endif
+ return rv;
+}
+
+static bool trans_remud(DisasContext *ctx, arg_remud *a)
+{
REQUIRE_EXT(ctx, RVM);
+ REQUIRE_128BIT(ctx);
- return gen_arith_div_uw(ctx, a, &gen_remu);
+ bool rv = gen_arith(ctx, a, &gen_remu);
+#if defined(TARGET_RISCV128)
+ if (a->rd != 0) {
+ tcg_gen_ext_i64_i128(cpu_gpr[a->rd], cpu_gprh[a->rd], cpu_gpr[a->rd]);
+ }
+#endif
+ return rv;
}
new file mode 100644
@@ -0,0 +1,301 @@
+/*
+ * RISC-V Emulation Helpers for QEMU.
+ *
+ * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu
+ * Copyright (c) 2017-2018 SiFive, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "qemu/main-loop.h"
+#include "exec/exec-all.h"
+#include "exec/helper-proto.h"
+
+#ifdef TARGET_RISCV128
+/* TODO : This can be optimized by a lot */
+static void divmod128(uint64_t ul, uint64_t uh,
+ uint64_t vl, uint64_t vh,
+ uint64_t *ql, uint64_t *qh,
+ uint64_t *rl, uint64_t *rh)
+{
+ const uint64_t b = ((uint64_t) 1) << 32;
+ const int m = 4;
+ uint64_t qhat, rhat, p;
+ int n, s, i;
+ int64_t j, t, k;
+
+ /* Build arrays of 32-bit words for u and v */
+ uint32_t *u = alloca(4 * sizeof(uint32_t));
+ u[0] = ul & 0xffffffff;
+ u[1] = (ul >> 32) & 0xffffffff;
+ u[2] = uh & 0xffffffff;
+ u[3] = (uh >> 32) & 0xffffffff;
+
+ uint32_t *v = alloca(4 * sizeof(uint32_t));
+ v[0] = vl & 0xffffffff;
+ v[1] = (vl >> 32) & 0xffffffff;
+ v[2] = vh & 0xffffffff;
+ v[3] = (vh >> 32) & 0xffffffff;
+
+ uint32_t *q = alloca(4 * sizeof(uint32_t));
+ uint32_t *r = alloca(4 * sizeof(uint32_t));
+ uint32_t *un = alloca(5 * sizeof(uint32_t));
+ uint32_t *vn = alloca(4 * sizeof(uint32_t));
+
+ memset(q, 0, 4 * sizeof(uint32_t));
+ memset(r, 0, 4 * sizeof(uint32_t));
+ memset(un, 0, 5 * sizeof(uint32_t));
+ memset(vn, 0, 4 * sizeof(uint32_t));
+
+ if (v[3] != 0) {
+ n = 4;
+ } else if (v[2]) {
+ n = 3;
+ } else if (v[1]) {
+ n = 2;
+ } else if (v[0]) {
+ n = 1;
+ } else {
+ /* never happens, but makes gcc shy */
+ return;
+ }
+
+ if (n == 1) {
+ /* Take care of the case of a single-digit divisor here */
+ k = 0;
+ for (j = m - 1; j >= 0; j--) {
+ q[j] = (k * b + u[j]) / v[0];
+ k = (k * b + u[j]) - q[j] * v[0];
+ }
+ if (r != NULL) {
+ r[0] = k;
+ }
+ } else {
+ s = clz32(v[n - 1]); /* 0 <= s <= 32 */
+ if (s != 0) {
+ for (i = n - 1; i > 0; i--) {
+ vn[i] = ((v[i] << s) | (v[i - 1] >> (32 - s)));
+ }
+ vn[0] = v[0] << s;
+
+ un[m] = u[m - 1] >> (32 - s);
+ for (i = m - 1; i > 0; i--) {
+ un[i] = (u[i] << s) | (u[i - 1] >> (32 - s));
+ }
+ un[0] = u[0] << s;
+ } else {
+ for (i = 0; i < n; i++) {
+ vn[i] = v[i];
+ }
+
+ for (i = 0; i < m; i++) {
+ un[i] = u[i];
+ }
+ un[m] = 0;
+ }
+
+ /* Step D2 : loop on j */
+ for (j = m - n; j >= 0; j--) { /* Main loop */
+ /* Step D3 : Compute estimate qhat of q[j] */
+ qhat = (un[j + n] * b + un[j + n - 1]) / vn[n - 1];
+ /* Optimized mod vn[n -1 ] */
+ rhat = (un[j + n] * b + un[j + n - 1]) - qhat * vn[n - 1];
+
+ while (true) {
+ if (qhat == b
+ || qhat * vn[n - 2] > b * rhat + un[j + n - 2]) {
+ qhat = qhat - 1;
+ rhat = rhat + vn[n - 1];
+ if (rhat < b) {
+ continue;
+ }
+ }
+ break;
+ }
+
+ /* Step D4 : Multiply and subtract */
+ k = 0;
+ for (i = 0; i < n; i++) {
+ p = qhat * vn[i];
+ t = un[i + j] - k - (p & 0xffffffff);
+ un[i + j] = t;
+ k = (p >> 32) - (t >> 32);
+ }
+ t = un[j + n] - k;
+ un[j + n] = t;
+
+ /* Step D5 */
+ q[j] = qhat; /* Store quotient digit */
+ /* Step D6 */
+ if (t < 0) { /* If we subtracted too much, add back */
+ q[j] = q[j] - 1;
+ k = 0;
+ for (i = 0; i < n; i++) {
+ t = un[i + j] + vn[i] + k;
+ un[i + j] = t;
+ k = t >> 32;
+ }
+ un[j + n] = un[j + n] + k;
+ }
+ } /* D7 Loop */
+
+ /* Step D8 : Unnormalize */
+ if (rl && rh) {
+ if (s != 0) {
+ for (i = 0; i < n; i++) {
+ r[i] = (un[i] >> s) | (un[i + 1] << (32 - s));
+ }
+ } else {
+ for (i = 0; i < n; i++) {
+ r[i] = un[i];
+ }
+ }
+ }
+ }
+
+ if (ql && qh) {
+ *ql = q[0] | ((uint64_t)q[1] << 32);
+ *qh = q[2] | ((uint64_t)q[3] << 32);
+ }
+
+ if (rl && rh) {
+ *rl = r[0] | ((uint64_t)r[1] << 32);
+ *rh = r[2] | ((uint64_t)r[3] << 32);
+ }
+}
+
+void HELPER(idivu128)(CPURISCVState *env, uint64_t rd,
+ uint64_t ul, uint64_t uh,
+ uint64_t vl, uint64_t vh)
+{
+ uint64_t ql, qh;
+ if (vl == 0 && vh == 0) { /* Handle special behavior on div by zero */
+ ql = 0xffffffffffffffff;
+ qh = ql;
+ } else {
+ /* Soft quad division */
+ divmod128(ul, uh, vl, vh, &ql, &qh, NULL, NULL);
+ }
+
+ if (rd != 0) {
+ env->gpr[rd] = ql;
+ env->gprh[rd] = qh;
+ }
+ return;
+}
+
+void HELPER(iremu128)(CPURISCVState *env, uint64_t rd,
+ uint64_t ul, uint64_t uh,
+ uint64_t vl, uint64_t vh)
+{
+ uint64_t rl, rh;
+ if (vl == 0 && vh == 0) {
+ rl = ul;
+ rh = uh;
+ } else {
+ /* Soft quad division */
+ divmod128(ul, uh, vl, vh, NULL, NULL, &rl, &rh);
+ }
+
+ if (rd != 0) {
+ env->gpr[rd] = rl;
+ env->gprh[rd] = rh;
+ }
+ return;
+}
+
+static void neg128(uint64_t *valh, uint64_t *vall)
+{
+ uint64_t oneh = ~(*valh), onel = ~(*vall);
+ *vall = onel + 1;
+ /* Carry into upper 64 bits */
+ *valh = (*vall < onel) ? oneh + 1 : oneh;
+}
+
+void HELPER(idivs128)(CPURISCVState *env, uint64_t rd,
+ uint64_t ul, uint64_t uh,
+ uint64_t vl, uint64_t vh)
+{
+ uint64_t qh, ql;
+ if (vl == 0 && vh == 0) { /* Div by zero check */
+ ql = 0xffffffffffffffff;
+ qh = 0xffffffffffffffff;
+ } else if (uh == 0x8000000000000000 && ul == 0 &&
+ vh == 0xffffffffffffffff && vl == 0xffffffffffffffff) {
+ /* Signed div overflow check (-2**127 / -1) */
+ ql = ul;
+ qh = uh;
+ } else {
+ /* User unsigned divmod to build signed quotient */
+ bool sgnu = (uh & 0x8000000000000000),
+ sgnv = (vh & 0x8000000000000000);
+
+ if (sgnu) {
+ neg128(&uh, &ul);
+ }
+
+ if (sgnv) {
+ neg128(&vh, &vl);
+ }
+
+ divmod128(ul, uh, vl, vh, &ql, &qh, NULL, NULL);
+
+ if (sgnu != sgnv) {
+ neg128(&qh, &ql);
+ }
+ }
+
+ if (rd != 0) {
+ env->gpr[rd] = ql;
+ env->gprh[rd] = qh;
+ }
+ return;
+}
+
+void HELPER(irems128)(CPURISCVState *env, uint64_t rd,
+ uint64_t ul, uint64_t uh,
+ uint64_t vl, uint64_t vh)
+{
+ uint64_t rh, rl;
+ if (vl == 0 && vh == 0) {
+ rl = ul;
+ rh = uh;
+ } else {
+ /* User unsigned divmod to build signed remainder */
+ bool sgnu = (uh & 0x8000000000000000),
+ sgnv = (vh & 0x8000000000000000);
+
+ if (sgnu) {
+ neg128(&uh, &ul);
+ }
+
+ if (sgnv) {
+ neg128(&vh, &vl);
+ }
+
+ divmod128(ul, uh, vl, vh, NULL, NULL, &rl, &rh);
+
+ if (sgnu) {
+ neg128(&rh, &rl);
+ }
+ }
+
+ if (rd != 0) {
+ env->gpr[rd] = rl;
+ env->gprh[rd] = rh;
+ }
+ return;
+}
+#endif
@@ -16,6 +16,7 @@ riscv_ss.add(files(
'gdbstub.c',
'op_helper.c',
'vector_helper.c',
+ 'm128_helper.c',
'bitmanip_helper.c',
'translate.c',
))
Adding the support for the 128-bit (m) extension. Division and remainder are helpers using a simple implementation of Knuth algorithm D. Signed-off-by: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr> Co-authored-by: Fabien Portas <fabien.portas@grenoble-inp.org> --- target/riscv/helper.h | 8 + target/riscv/insn32.decode | 7 + target/riscv/insn_trans/trans_rvm.c.inc | 456 +++++++++++++++++++++++- target/riscv/m128_helper.c | 301 ++++++++++++++++ target/riscv/meson.build | 1 + 5 files changed, 759 insertions(+), 14 deletions(-) create mode 100644 target/riscv/m128_helper.c