@@ -24,7 +24,7 @@
#define SIGNBIT32 0x80000000
-int64_t helper_add_saturate(CPUTLGState *env, uint64_t rsrc, uint64_t rsrcb)
+int64_t helper_add_saturate(uint64_t rsrc, uint64_t rsrcb)
{
uint32_t rdst = rsrc + rsrcb;
@@ -53,6 +53,16 @@ uint64_t helper_cnttz(uint64_t arg)
return ctz64(arg);
}
+uint64_t helper_pcnt(uint64_t arg)
+{
+ return ctpop64(arg);
+}
+
+uint64_t helper_revbytes(uint64_t arg)
+{
+ return bswap64(arg);
+}
+
/*
* Functional Description
* uint64_t a = rf[SrcA];
@@ -1,5 +1,7 @@
DEF_HELPER_2(exception, noreturn, env, i32)
DEF_HELPER_FLAGS_1(cntlz, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_1(cnttz, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_FLAGS_1(pcnt, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_FLAGS_1(revbytes, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_3(shufflebytes, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64)
-DEF_HELPER_3(add_saturate, s64, env, i64, i64)
+DEF_HELPER_FLAGS_2(add_saturate, TCG_CALL_NO_RWG_SE, s64, i64, i64)
@@ -302,20 +302,104 @@ static void gen_mtspr(struct DisasContext *dc, uint8_t rsrc, uint16_t imm14)
set_exception(dc, TILEGX_EXCP_OPCODE_UNIMPLEMENTED);
}
-static void extract_v1(TCGv out, TCGv in, unsigned byte)
+static void extract_v(TCGv out, TCGv in, int count, int v)
{
- tcg_gen_shri_i64(out, in, byte * 8);
- tcg_gen_ext8u_i64(out, out);
+ tcg_gen_shri_i64(out, in, count * v * 8);
+ switch (v) {
+ case 1:
+ tcg_gen_ext8u_i64(out, out);
+ break;
+ case 2:
+ tcg_gen_ext16u_i64(out, out);
+ break;
+ case 4:
+ tcg_gen_ext32u_i64(out, out);
+ break;
+ default:
+ g_assert_not_reached();
+ }
}
-static void insert_v1(TCGv out, TCGv in, unsigned byte)
+static void insert_v(TCGv out, TCGv in, int count, int v)
{
- tcg_gen_deposit_i64(out, out, in, byte * 8, 8);
+ tcg_gen_deposit_i64(out, out, in, count * v * 8, v * 8);
}
-static void gen_v1cmpi(struct DisasContext *dc,
- uint8_t rdst, uint8_t rsrc, int8_t imm8,
- TCGCond cond, const char *code)
+static void gen_vadd(struct DisasContext *dc,
+ uint8_t rdst, uint8_t rsrc, uint8_t rsrcb, int v)
+{
+ int count;
+ TCGv vdst = dest_gr(dc, rdst);
+ TCGv vsrc = load_gr(dc, rsrc);
+ TCGv vsrcb = load_gr(dc, rsrcb);
+ TCGv tmp = tcg_temp_new_i64();
+ TCGv tmpb = tcg_temp_new_i64();
+
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, "v%dadd r%d, r%d, r%d\n",
+ v, rdst, rsrc, rsrcb);
+
+ tcg_gen_movi_i64(vdst, 0);
+
+ for (count = 0; count < sizeof(uint64_t) / v; count++) {
+ extract_v(tmp, vsrc, count, v);
+ extract_v(tmpb, vsrcb, count, v);
+ tcg_gen_add_i64(tmp, tmp, tmpb);
+ insert_v(vdst, tmp, count, v);
+ }
+
+ tcg_temp_free_i64(tmpb);
+ tcg_temp_free_i64(tmp);
+}
+
+static void gen_vsub(struct DisasContext *dc,
+ uint8_t rdst, uint8_t rsrc, uint8_t rsrcb, int v)
+{
+ int count;
+ TCGv vdst = dest_gr(dc, rdst);
+ TCGv vsrc = load_gr(dc, rsrc);
+ TCGv vsrcb = load_gr(dc, rsrcb);
+ TCGv tmp = tcg_temp_new_i64();
+ TCGv tmpb = tcg_temp_new_i64();
+
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, "v%dsub r%d, r%d, r%d\n",
+ v, rdst, rsrc, rsrcb);
+
+ tcg_gen_movi_i64(vdst, 0);
+
+ for (count = 0; count < sizeof(uint64_t) / v; count++) {
+ extract_v(tmp, vsrc, count, v);
+ extract_v(tmpb, vsrcb, count, v);
+ tcg_gen_sub_i64(tmp, tmp, tmpb);
+ insert_v(vdst, tmp, count, v);
+ }
+
+ tcg_temp_free_i64(tmpb);
+ tcg_temp_free_i64(tmp);
+}
+
+static void gen_vaddi(struct DisasContext *dc,
+ uint8_t rdst, uint8_t rsrc, int8_t imm8, int v)
+{
+ int count;
+ TCGv vdst = dest_gr(dc, rdst);
+ TCGv vsrc = load_gr(dc, rsrc);
+ TCGv tmp = tcg_temp_new_i64();
+
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, "v%daddi r%d, r%d, %d\n",
+ v, rdst, rsrc, imm8);
+
+ tcg_gen_movi_i64(vdst, 0);
+ for (count = 0; count < sizeof(uint64_t) / v; count++) {
+ extract_v(tmp, vsrc, count, v);
+ tcg_gen_addi_i64(tmp, tmp, imm8);
+ insert_v(vdst, tmp, count, v);
+ }
+ tcg_temp_free_i64(tmp);
+}
+
+static void gen_vcmpi(struct DisasContext *dc,
+ uint8_t rdst, uint8_t rsrc, int8_t imm8, int v,
+ TCGCond cond, const char *code)
{
int count;
TCGv vdst = dest_gr(dc, rdst);
@@ -326,17 +410,17 @@ static void gen_v1cmpi(struct DisasContext *dc,
code, rdst, rsrc, imm8);
tcg_gen_movi_i64(vdst, 0);
- for (count = 0; count < 8; count++) {
- extract_v1(tmp, vsrc, count);
+ for (count = 0; count < sizeof(uint64_t) / v; count++) {
+ extract_v(tmp, vsrc, count, v);
tcg_gen_setcondi_i64(cond, tmp, tmp, imm8);
- insert_v1(vdst, tmp, count);
+ insert_v(vdst, tmp, count, v);
}
tcg_temp_free_i64(tmp);
}
-static void gen_v1cmp(struct DisasContext *dc,
- uint8_t rdst, uint8_t rsrc, uint8_t rsrcb,
- TCGCond cond, const char *code)
+static void gen_vcmp(struct DisasContext *dc,
+ uint8_t rdst, uint8_t rsrc, uint8_t rsrcb, int v,
+ TCGCond cond, const char *code)
{
int count;
TCGv vdst = dest_gr(dc, rdst);
@@ -349,33 +433,33 @@ static void gen_v1cmp(struct DisasContext *dc,
code, rdst, rsrc, rsrcb);
tcg_gen_movi_i64(vdst, 0);
- for (count = 0; count < 8; count++) {
- extract_v1(tmp, vsrc, count);
- extract_v1(tmp2, vsrcb, count);
+ for (count = 0; count < sizeof(uint64_t) / v; count++) {
+ extract_v(tmp, vsrc, count, v);
+ extract_v(tmp2, vsrcb, count, v);
tcg_gen_setcond_i64(cond, tmp, tmp, tmp2);
- insert_v1(vdst, tmp, count);
+ insert_v(vdst, tmp, count, v);
}
tcg_temp_free_i64(tmp2);
tcg_temp_free_i64(tmp);
}
-static void gen_v1shrui(struct DisasContext *dc,
- uint8_t rdst, uint8_t rsrc, uint8_t shamt)
+static void gen_vshrui(struct DisasContext *dc,
+ uint8_t rdst, uint8_t rsrc, uint8_t shamt, int v)
{
int count;
TCGv vdst = dest_gr(dc, rdst);
TCGv vsrc = load_gr(dc, rsrc);
TCGv tmp = tcg_temp_new_i64();
- qemu_log_mask(CPU_LOG_TB_IN_ASM, "v1shrui r%d, r%d, %u\n",
- rdst, rsrc, shamt);
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, "v%dshrui r%d, r%d, %u\n",
+ v, rdst, rsrc, shamt);
shamt &= 7;
tcg_gen_movi_i64(vdst, 0);
- for (count = 0; count < 8; count++) {
- extract_v1(tmp, vsrc, count);
+ for (count = 0; count < sizeof(uint64_t) / v; count++) {
+ extract_v(tmp, vsrc, count, v);
tcg_gen_shri_i64(tmp, tmp, shamt);
- insert_v1(vdst, tmp, count);
+ insert_v(vdst, tmp, count, v);
}
tcg_temp_free_i64(tmp);
}
@@ -404,8 +488,8 @@ static void gen_v1shrui(struct DisasContext *dc,
* }
* rf[Dest] = output;
*/
-static void gen_v1int_l(struct DisasContext *dc,
- uint8_t rdst, uint8_t rsrc, uint8_t rsrcb)
+static void gen_vint_l(struct DisasContext *dc,
+ uint8_t rdst, uint8_t rsrc, uint8_t rsrcb, int v)
{
int count;
TCGv vdst = dest_gr(dc, rdst);
@@ -413,19 +497,20 @@ static void gen_v1int_l(struct DisasContext *dc,
TCGv vsrcb = load_gr(dc, rsrcb);
TCGv tmp = tcg_temp_new_i64();
- qemu_log_mask(CPU_LOG_TB_IN_ASM, "v1int_l r%d, r%d, r%d\n",
- rdst, rsrc, rsrcb);
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, "v%dint_l r%d, r%d, r%d\n",
+ v, rdst, rsrc, rsrcb);
tcg_gen_movi_i64(vdst, 0);
- for (count = 0; count < 4; count++) {
- extract_v1(tmp, vsrc, count);
- insert_v1(vdst, tmp, 2 * count + 1);
- extract_v1(tmp, vsrcb, count);
- insert_v1(vdst, tmp, 2 * count);
+ for (count = 0; count < sizeof(uint64_t) / (v * 2); count++) {
+ extract_v(tmp, vsrc, count, v);
+ insert_v(vdst, tmp, 2 * count + 1, v);
+ extract_v(tmp, vsrcb, count, v);
+ insert_v(vdst, tmp, 2 * count, v);
}
tcg_temp_free_i64(tmp);
}
+/* Still reserve v4int_l for optimization */
static void gen_v4int_l(struct DisasContext *dc,
uint8_t rdst, uint8_t rsrc, uint8_t rsrcb)
{
@@ -496,7 +581,7 @@ static void gen_addxsc(struct DisasContext *dc,
{
qemu_log_mask(CPU_LOG_TB_IN_ASM, "addxsc r%d, r%d, r%d\n",
rdst, rsrc, rsrcb);
- gen_helper_add_saturate(dest_gr(dc, rdst), cpu_env,
+ gen_helper_add_saturate(dest_gr(dc, rdst),
load_gr(dc, rsrc), load_gr(dc, rsrcb));
}
@@ -910,6 +995,18 @@ static void gen_cnttz(struct DisasContext *dc, uint8_t rdst, uint8_t rsrc)
gen_helper_cnttz(dest_gr(dc, rdst), load_gr(dc, rsrc));
}
+static void gen_pcnt(struct DisasContext *dc, uint8_t rdst, uint8_t rsrc)
+{
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, "pcnt r%d, r%d\n", rdst, rsrc);
+ gen_helper_pcnt(dest_gr(dc, rdst), load_gr(dc, rsrc));
+}
+
+static void gen_revbytes(struct DisasContext *dc, uint8_t rdst, uint8_t rsrc)
+{
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, "revbytes r%d, r%d\n", rdst, rsrc);
+ gen_helper_revbytes(dest_gr(dc, rdst), load_gr(dc, rsrc));
+}
+
static void gen_ld(struct DisasContext *dc,
uint8_t rdst, uint8_t rsrc,
TCGMemOp ops, const char *code)
@@ -1008,6 +1105,18 @@ static void gen_wh64(struct DisasContext *dc, uint8_t rsrc)
/* FIXME: Do we need any implementation for it? I guess no. */
}
+static void gen_icoh(struct DisasContext *dc, uint8_t rsrc)
+{
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, "icoh r%d\n", rsrc);
+ /* FIXME: Do we need any implementation for it? I guess no. */
+}
+
+static void gen_drain(struct DisasContext *dc)
+{
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, "drain\n");
+ /* FIXME: Do we need any implementation for it? I guess no. */
+}
+
static void gen_jr(struct DisasContext *dc, uint8_t rsrc)
{
qemu_log_mask(CPU_LOG_TB_IN_ASM, "jr(p) r%d\n", rsrc);
@@ -1096,6 +1205,12 @@ static void decode_u_opcode_ex_y0(struct DisasContext *dc,
case CNTTZ_UNARY_OPCODE_Y0:
gen_cnttz(dc, rdst, rsrc);
return;
+ case PCNT_UNARY_OPCODE_Y0:
+ gen_pcnt(dc, rdst, rsrc);
+ return;
+ case REVBYTES_UNARY_OPCODE_Y0:
+ gen_revbytes(dc, rdst, rsrc);
+ return;
case FNOP_UNARY_OPCODE_Y0:
case NOP_UNARY_OPCODE_Y0:
if (!rsrc && !rdst) {
@@ -1104,9 +1219,7 @@ static void decode_u_opcode_ex_y0(struct DisasContext *dc,
}
/* Fall through */
case FSINGLE_PACK1_UNARY_OPCODE_Y0:
- case PCNT_UNARY_OPCODE_Y0:
case REVBITS_UNARY_OPCODE_Y0:
- case REVBYTES_UNARY_OPCODE_Y0:
case TBLIDXB0_UNARY_OPCODE_Y0:
case TBLIDXB1_UNARY_OPCODE_Y0:
case TBLIDXB2_UNARY_OPCODE_Y0:
@@ -1603,9 +1716,7 @@ static void decode_ldst2_opcode_y2(struct DisasContext *dc,
gen_ld(dc, rsrcbdst, rsrc, MO_LEUL, "ld4u");
return;
case MODE_OPCODE_YA2:
- qemu_log_mask(LOG_UNIMP,
- "UNIMP ldst2_opcode_y2, [" FMT64X "]\n", bundle);
- set_exception(dc, TILEGX_EXCP_OPCODE_UNIMPLEMENTED);
+ gen_ld(dc, rsrcbdst, rsrc, MO_LESW, "ld2s");
return;
default:
g_assert_not_reached();
@@ -1690,25 +1801,35 @@ static void decode_imm8_opcode_x0(struct DisasContext *dc,
case ORI_IMM8_OPCODE_X0:
gen_ori(dc, rdst, rsrc, imm8);
return;
+ case V1ADDI_IMM8_OPCODE_X0:
+ gen_vaddi(dc, rdst, rsrc, imm8, 1);
+ return;
case V1CMPEQI_IMM8_OPCODE_X0:
- gen_v1cmpi(dc, rdst, rsrc, imm8, TCG_COND_EQ, "v1cmpeqi");
+ gen_vcmpi(dc, rdst, rsrc, imm8, 1, TCG_COND_EQ, "v1cmpeqi");
return;
case V1CMPLTSI_IMM8_OPCODE_X0:
- gen_v1cmpi(dc, rdst, rsrc, imm8, TCG_COND_LT, "v1cmpltsi");
+ gen_vcmpi(dc, rdst, rsrc, imm8, 1, TCG_COND_LT, "v1cmpltsi");
return;
case V1CMPLTUI_IMM8_OPCODE_X0:
- gen_v1cmpi(dc, rdst, rsrc, imm8, TCG_COND_LTU, "v1cmpltui");
+ gen_vcmpi(dc, rdst, rsrc, imm8, 1, TCG_COND_LTU, "v1cmpltui");
+ return;
+ case V2ADDI_IMM8_OPCODE_X0:
+ gen_vaddi(dc, rdst, rsrc, imm8, 2);
+ return;
+ case V2CMPEQI_IMM8_OPCODE_X0:
+ gen_vcmpi(dc, rdst, rsrc, imm8, 2, TCG_COND_EQ, "v2cmpeqi");
+ return;
+ case V2CMPLTSI_IMM8_OPCODE_X0:
+ gen_vcmpi(dc, rdst, rsrc, imm8, 2, TCG_COND_LT, "v2cmpltsi");
+ return;
+ case V2CMPLTUI_IMM8_OPCODE_X0:
+ gen_vcmpi(dc, rdst, rsrc, imm8, 2, TCG_COND_LTU, "v2cmpltui");
return;
case XORI_IMM8_OPCODE_X0:
gen_xori(dc, rdst, rsrc, imm8);
return;
- case V1ADDI_IMM8_OPCODE_X0:
case V1MAXUI_IMM8_OPCODE_X0:
case V1MINUI_IMM8_OPCODE_X0:
- case V2ADDI_IMM8_OPCODE_X0:
- case V2CMPEQI_IMM8_OPCODE_X0:
- case V2CMPLTSI_IMM8_OPCODE_X0:
- case V2CMPLTUI_IMM8_OPCODE_X0:
case V2MAXSI_IMM8_OPCODE_X0:
case V2MINSI_IMM8_OPCODE_X0:
qemu_log_mask(LOG_UNIMP,
@@ -1733,6 +1854,12 @@ static void decode_u_opcode_ex_x0(struct DisasContext *dc,
case CNTTZ_UNARY_OPCODE_X0:
gen_cnttz(dc, rdst, rsrc);
return;
+ case PCNT_UNARY_OPCODE_X0:
+ gen_pcnt(dc, rdst, rsrc);
+ return;
+ case REVBYTES_UNARY_OPCODE_X0:
+ gen_revbytes(dc, rdst, rsrc);
+ return;
case FNOP_UNARY_OPCODE_X0:
case NOP_UNARY_OPCODE_X0:
if (!rsrc && !rdst) {
@@ -1741,9 +1868,7 @@ static void decode_u_opcode_ex_x0(struct DisasContext *dc,
}
/* Fall through */
case FSINGLE_PACK1_UNARY_OPCODE_X0:
- case PCNT_UNARY_OPCODE_X0:
case REVBITS_UNARY_OPCODE_X0:
- case REVBYTES_UNARY_OPCODE_X0:
case TBLIDXB0_UNARY_OPCODE_X0:
case TBLIDXB1_UNARY_OPCODE_X0:
case TBLIDXB2_UNARY_OPCODE_X0:
@@ -1950,28 +2075,68 @@ static void decode_rrr_0_opcode_x0(struct DisasContext *dc,
case UNARY_RRR_0_OPCODE_X0:
return decode_u_opcode_ex_x0(dc, bundle);
case V1INT_L_RRR_0_OPCODE_X0:
- gen_v1int_l(dc, rdst, rsrc, rsrcb);
+ gen_vint_l(dc, rdst, rsrc, rsrcb, 1);
return;
- case V4INT_L_RRR_0_OPCODE_X0:
- gen_v4int_l(dc, rdst, rsrc, rsrcb);
+ case V1ADD_RRR_0_OPCODE_X0:
+ gen_vadd(dc, rdst, rsrc, rsrcb, 1);
+ return;
+ case V1SUB_RRR_0_OPCODE_X0:
+ gen_vsub(dc, rdst, rsrc, rsrcb, 1);
+ return;
+ case V2ADD_RRR_0_OPCODE_X0:
+ gen_vadd(dc, rdst, rsrc, rsrcb, 2);
+ return;
+ case V2SUB_RRR_0_OPCODE_X0:
+ gen_vsub(dc, rdst, rsrc, rsrcb, 2);
+ return;
+ case V4ADD_RRR_0_OPCODE_X0:
+ gen_vadd(dc, rdst, rsrc, rsrcb, 4);
+ return;
+ case V4SUB_RRR_0_OPCODE_X0:
+ gen_vsub(dc, rdst, rsrc, rsrcb, 4);
return;
case V1CMPEQ_RRR_0_OPCODE_X0:
- gen_v1cmp(dc, rdst, rsrc, rsrcb, TCG_COND_EQ, "v1cmpeq");
+ gen_vcmp(dc, rdst, rsrc, rsrcb, 1, TCG_COND_EQ, "v1cmpeq");
return;
case V1CMPLES_RRR_0_OPCODE_X0:
- gen_v1cmp(dc, rdst, rsrc, rsrcb, TCG_COND_LE, "v1cmples");
+ gen_vcmp(dc, rdst, rsrc, rsrcb, 1, TCG_COND_LE, "v1cmples");
return;
case V1CMPLEU_RRR_0_OPCODE_X0:
- gen_v1cmp(dc, rdst, rsrc, rsrcb, TCG_COND_LEU, "v1cmpleu");
+ gen_vcmp(dc, rdst, rsrc, rsrcb, 1, TCG_COND_LEU, "v1cmpleu");
return;
case V1CMPLTS_RRR_0_OPCODE_X0:
- gen_v1cmp(dc, rdst, rsrc, rsrcb, TCG_COND_LT, "v1cmplts");
+ gen_vcmp(dc, rdst, rsrc, rsrcb, 1, TCG_COND_LT, "v1cmplts");
return;
case V1CMPLTU_RRR_0_OPCODE_X0:
- gen_v1cmp(dc, rdst, rsrc, rsrcb, TCG_COND_LTU, "v1cmpltu");
+ gen_vcmp(dc, rdst, rsrc, rsrcb, 1, TCG_COND_LTU, "v1cmpltu");
return;
case V1CMPNE_RRR_0_OPCODE_X0:
- gen_v1cmp(dc, rdst, rsrc, rsrcb, TCG_COND_NE, "v1cmpne");
+ gen_vcmp(dc, rdst, rsrc, rsrcb, 1, TCG_COND_NE, "v1cmpne");
+ return;
+ case V2CMPEQ_RRR_0_OPCODE_X0:
+ gen_vcmp(dc, rdst, rsrc, rsrcb, 2, TCG_COND_EQ, "v2cmpeq");
+ return;
+ case V2CMPLES_RRR_0_OPCODE_X0:
+ gen_vcmp(dc, rdst, rsrc, rsrcb, 2, TCG_COND_LE, "v2cmples");
+ return;
+ case V2CMPLEU_RRR_0_OPCODE_X0:
+ gen_vcmp(dc, rdst, rsrc, rsrcb, 2, TCG_COND_LEU, "v2cmpleu");
+ return;
+ case V2CMPLTS_RRR_0_OPCODE_X0:
+ gen_vcmp(dc, rdst, rsrc, rsrcb, 2, TCG_COND_LT, "v2cmplts");
+ return;
+ case V2CMPLTU_RRR_0_OPCODE_X0:
+ gen_vcmp(dc, rdst, rsrc, rsrcb, 2, TCG_COND_LTU, "v2cmpltu");
+ return;
+ case V2CMPNE_RRR_0_OPCODE_X0:
+ gen_vcmp(dc, rdst, rsrc, rsrcb, 2, TCG_COND_NE, "v2cmpne");
+ return;
+ case V2INT_L_RRR_0_OPCODE_X0:
+ gen_vint_l(dc, rdst, rsrc, rsrcb, 2);
+ return;
+ case V4INT_L_RRR_0_OPCODE_X0:
+ /* v4int_l is a little faster then generic vint_l */
+ gen_v4int_l(dc, rdst, rsrc, rsrcb);
return;
case XOR_RRR_0_OPCODE_X0:
gen_xor(dc, rdst, rsrc, rsrcb);
@@ -1988,8 +2153,8 @@ static void decode_rrr_0_opcode_x0(struct DisasContext *dc,
case DBLALIGN2_RRR_0_OPCODE_X0:
case DBLALIGN4_RRR_0_OPCODE_X0:
case DBLALIGN6_RRR_0_OPCODE_X0:
- case FDOUBLE_ADDSUB_RRR_0_OPCODE_X0:
case FDOUBLE_ADD_FLAGS_RRR_0_OPCODE_X0:
+ case FDOUBLE_ADDSUB_RRR_0_OPCODE_X0:
case FDOUBLE_MUL_FLAGS_RRR_0_OPCODE_X0:
case FDOUBLE_PACK1_RRR_0_OPCODE_X0:
case FDOUBLE_PACK2_RRR_0_OPCODE_X0:
@@ -2000,11 +2165,10 @@ static void decode_rrr_0_opcode_x0(struct DisasContext *dc,
case FSINGLE_ADDSUB2_RRR_0_OPCODE_X0:
case FSINGLE_MUL1_RRR_0_OPCODE_X0:
case FSINGLE_MUL2_RRR_0_OPCODE_X0:
- case FSINGLE_PACK2_RRR_0_OPCODE_X0:
case FSINGLE_SUB1_RRR_0_OPCODE_X0:
+ case FSINGLE_PACK2_RRR_0_OPCODE_X0:
case SUBXSC_RRR_0_OPCODE_X0:
case V1ADDUC_RRR_0_OPCODE_X0:
- case V1ADD_RRR_0_OPCODE_X0:
case V1ADIFFU_RRR_0_OPCODE_X0:
case V1AVGU_RRR_0_OPCODE_X0:
case V1DDOTPUSA_RRR_0_OPCODE_X0:
@@ -2026,23 +2190,13 @@ static void decode_rrr_0_opcode_x0(struct DisasContext *dc,
case V1SHRS_RRR_0_OPCODE_X0:
case V1SHRU_RRR_0_OPCODE_X0:
case V1SUBUC_RRR_0_OPCODE_X0:
- case V1SUB_RRR_0_OPCODE_X0:
case V1INT_H_RRR_0_OPCODE_X0:
- case V2INT_H_RRR_0_OPCODE_X0:
- case V2INT_L_RRR_0_OPCODE_X0:
- case V4INT_H_RRR_0_OPCODE_X0:
case V2ADDSC_RRR_0_OPCODE_X0:
- case V2ADD_RRR_0_OPCODE_X0:
case V2ADIFFS_RRR_0_OPCODE_X0:
case V2AVGS_RRR_0_OPCODE_X0:
- case V2CMPEQ_RRR_0_OPCODE_X0:
- case V2CMPLES_RRR_0_OPCODE_X0:
- case V2CMPLEU_RRR_0_OPCODE_X0:
- case V2CMPLTS_RRR_0_OPCODE_X0:
- case V2CMPLTU_RRR_0_OPCODE_X0:
- case V2CMPNE_RRR_0_OPCODE_X0:
case V2DOTPA_RRR_0_OPCODE_X0:
case V2DOTP_RRR_0_OPCODE_X0:
+ case V2INT_H_RRR_0_OPCODE_X0:
case V2MAXS_RRR_0_OPCODE_X0:
case V2MINS_RRR_0_OPCODE_X0:
case V2MNZ_RRR_0_OPCODE_X0:
@@ -2062,16 +2216,14 @@ static void decode_rrr_0_opcode_x0(struct DisasContext *dc,
case V2SHRS_RRR_0_OPCODE_X0:
case V2SHRU_RRR_0_OPCODE_X0:
case V2SUBSC_RRR_0_OPCODE_X0:
- case V2SUB_RRR_0_OPCODE_X0:
case V4ADDSC_RRR_0_OPCODE_X0:
- case V4ADD_RRR_0_OPCODE_X0:
+ case V4INT_H_RRR_0_OPCODE_X0:
case V4PACKSC_RRR_0_OPCODE_X0:
case V4SHLSC_RRR_0_OPCODE_X0:
case V4SHL_RRR_0_OPCODE_X0:
case V4SHRS_RRR_0_OPCODE_X0:
case V4SHRU_RRR_0_OPCODE_X0:
case V4SUBSC_RRR_0_OPCODE_X0:
- case V4SUB_RRR_0_OPCODE_X0:
case V1DDOTPUA_RRR_0_OPCODE_X0:
case V1DDOTPU_RRR_0_OPCODE_X0:
case V1DOTPUA_RRR_0_OPCODE_X0:
@@ -2112,13 +2264,15 @@ static void decode_shift_opcode_x0(struct DisasContext *dc,
gen_shruxi(dc, rdst, rsrc, shamt);
return;
case V1SHRUI_SHIFT_OPCODE_X0:
- gen_v1shrui(dc, rdst, rsrc, shamt);
+ gen_vshrui(dc, rdst, rsrc, shamt, 1);
+ return;
+ case V2SHRUI_SHIFT_OPCODE_X0:
+ gen_vshrui(dc, rdst, rsrc, shamt, 2);
return;
case V1SHLI_SHIFT_OPCODE_X0:
case V1SHRSI_SHIFT_OPCODE_X0:
case V2SHLI_SHIFT_OPCODE_X0:
case V2SHRSI_SHIFT_OPCODE_X0:
- case V2SHRUI_SHIFT_OPCODE_X0:
qemu_log_mask(LOG_UNIMP,
"UNIMP shift_opcode_x0, [" FMT64X "]\n", bundle);
set_exception(dc, TILEGX_EXCP_OPCODE_UNIMPLEMENTED);
@@ -2242,14 +2396,29 @@ static void decode_imm8_opcode_x1(struct DisasContext *dc,
case ST4_ADD_IMM8_OPCODE_X1:
gen_st_add(dc, rsrc, rsrcb, dimm8, MO_LEUL, "st4_add");
return;
+ case V1ADDI_IMM8_OPCODE_X1:
+ gen_vaddi(dc, rdst, rsrc, imm8, 1);
+ return;
case V1CMPEQI_IMM8_OPCODE_X1:
- gen_v1cmpi(dc, rdst, rsrc, imm8, TCG_COND_EQ, "v1cmpeqi");
+ gen_vcmpi(dc, rdst, rsrc, imm8, 1, TCG_COND_EQ, "v1cmpeqi");
return;
case V1CMPLTSI_IMM8_OPCODE_X1:
- gen_v1cmpi(dc, rdst, rsrc, imm8, TCG_COND_LT, "v1cmpltsi");
+ gen_vcmpi(dc, rdst, rsrc, imm8, 1, TCG_COND_LT, "v1cmpltsi");
return;
case V1CMPLTUI_IMM8_OPCODE_X1:
- gen_v1cmpi(dc, rdst, rsrc, imm8, TCG_COND_LTU, "v1cmpltui");
+ gen_vcmpi(dc, rdst, rsrc, imm8, 1, TCG_COND_LTU, "v1cmpltui");
+ return;
+ case V2ADDI_IMM8_OPCODE_X1:
+ gen_vaddi(dc, rdst, rsrc, imm8, 2);
+ return;
+ case V2CMPEQI_IMM8_OPCODE_X1:
+ gen_vcmpi(dc, rdst, rsrc, imm8, 2, TCG_COND_EQ, "v2cmpeqi");
+ return;
+ case V2CMPLTSI_IMM8_OPCODE_X1:
+ gen_vcmpi(dc, rdst, rsrc, imm8, 2, TCG_COND_LT, "v2cmpltsi");
+ return;
+ case V2CMPLTUI_IMM8_OPCODE_X1:
+ gen_vcmpi(dc, rdst, rsrc, imm8, 2, TCG_COND_LTU, "v2cmpltui");
return;
case XORI_IMM8_OPCODE_X1:
gen_xori(dc, rdst, rsrc, imm8);
@@ -2266,13 +2435,8 @@ static void decode_imm8_opcode_x1(struct DisasContext *dc,
case STNT2_ADD_IMM8_OPCODE_X1:
case STNT4_ADD_IMM8_OPCODE_X1:
case STNT_ADD_IMM8_OPCODE_X1:
- case V1ADDI_IMM8_OPCODE_X1:
case V1MAXUI_IMM8_OPCODE_X1:
case V1MINUI_IMM8_OPCODE_X1:
- case V2ADDI_IMM8_OPCODE_X1:
- case V2CMPEQI_IMM8_OPCODE_X1:
- case V2CMPLTSI_IMM8_OPCODE_X1:
- case V2CMPLTUI_IMM8_OPCODE_X1:
case V2MAXSI_IMM8_OPCODE_X1:
case V2MINSI_IMM8_OPCODE_X1:
qemu_log_mask(LOG_UNIMP,
@@ -2308,6 +2472,12 @@ static void decode_u_opcode_ex_x1(struct DisasContext *dc,
uint8_t rdst = get_Dest_X1(bundle);
switch (get_UnaryOpcodeExtension_X1(bundle)) {
+ case DRAIN_UNARY_OPCODE_X1:
+ if (!rdst && !rsrc) {
+ gen_drain(dc);
+ return;
+ }
+ break;
case NOP_UNARY_OPCODE_X1:
case FNOP_UNARY_OPCODE_X1:
if (!rdst && !rsrc) {
@@ -2315,6 +2485,12 @@ static void decode_u_opcode_ex_x1(struct DisasContext *dc,
return;
}
break;
+ case ICOH_UNARY_OPCODE_X1:
+ if (!rdst) {
+ gen_icoh(dc, rsrc);
+ return;
+ }
+ break;
case JALRP_UNARY_OPCODE_X1:
case JALR_UNARY_OPCODE_X1:
if (!rdst) {
@@ -2381,12 +2557,10 @@ static void decode_u_opcode_ex_x1(struct DisasContext *dc,
return;
}
break;
- case DRAIN_UNARY_OPCODE_X1:
case DTLBPR_UNARY_OPCODE_X1:
case FINV_UNARY_OPCODE_X1:
case FLUSHWB_UNARY_OPCODE_X1:
case FLUSH_UNARY_OPCODE_X1:
- case ICOH_UNARY_OPCODE_X1:
case ILL_UNARY_OPCODE_X1:
case INV_UNARY_OPCODE_X1:
case LDNT1S_UNARY_OPCODE_X1:
@@ -2576,29 +2750,69 @@ static void decode_rrr_0_opcode_x1(struct DisasContext *dc,
break;
case UNARY_RRR_0_OPCODE_X1:
return decode_u_opcode_ex_x1(dc, bundle);
- case V1INT_L_RRR_0_OPCODE_X1:
- gen_v1int_l(dc, rdst, rsrc, rsrcb);
- return;
- case V4INT_L_RRR_0_OPCODE_X1:
- gen_v4int_l(dc, rdst, rsrc, rsrcb);
+ case V1ADD_RRR_0_OPCODE_X1:
+ gen_vadd(dc, rdst, rsrc, rsrcb, 1);
return;
case V1CMPEQ_RRR_0_OPCODE_X1:
- gen_v1cmp(dc, rdst, rsrc, rsrcb, TCG_COND_EQ, "v1cmpeq");
+ gen_vcmp(dc, rdst, rsrc, rsrcb, 1, TCG_COND_EQ, "v1cmpeq");
return;
case V1CMPLES_RRR_0_OPCODE_X1:
- gen_v1cmp(dc, rdst, rsrc, rsrcb, TCG_COND_LE, "v1cmples");
+ gen_vcmp(dc, rdst, rsrc, rsrcb, 1, TCG_COND_LE, "v1cmples");
return;
case V1CMPLEU_RRR_0_OPCODE_X1:
- gen_v1cmp(dc, rdst, rsrc, rsrcb, TCG_COND_LEU, "v1cmpleu");
+ gen_vcmp(dc, rdst, rsrc, rsrcb, 1, TCG_COND_LEU, "v1cmpleu");
return;
case V1CMPLTS_RRR_0_OPCODE_X1:
- gen_v1cmp(dc, rdst, rsrc, rsrcb, TCG_COND_LT, "v1cmplts");
+ gen_vcmp(dc, rdst, rsrc, rsrcb, 1, TCG_COND_LT, "v1cmplts");
return;
case V1CMPLTU_RRR_0_OPCODE_X1:
- gen_v1cmp(dc, rdst, rsrc, rsrcb, TCG_COND_LTU, "v1cmpltu");
+ gen_vcmp(dc, rdst, rsrc, rsrcb, 1, TCG_COND_LTU, "v1cmpltu");
return;
case V1CMPNE_RRR_0_OPCODE_X1:
- gen_v1cmp(dc, rdst, rsrc, rsrcb, TCG_COND_NE, "v1cmpne");
+ gen_vcmp(dc, rdst, rsrc, rsrcb, 1, TCG_COND_NE, "v1cmpne");
+ return;
+ case V1INT_L_RRR_0_OPCODE_X1:
+ gen_vint_l(dc, rdst, rsrc, rsrcb, 1);
+ return;
+ case V1SUB_RRR_0_OPCODE_X1:
+ gen_vsub(dc, rdst, rsrc, rsrcb, 1);
+ return;
+ case V2ADD_RRR_0_OPCODE_X1:
+ gen_vadd(dc, rdst, rsrc, rsrcb, 2);
+ return;
+ case V2CMPEQ_RRR_0_OPCODE_X1:
+ gen_vcmp(dc, rdst, rsrc, rsrcb, 2, TCG_COND_EQ, "v2cmpeq");
+ return;
+ case V2CMPLES_RRR_0_OPCODE_X1:
+ gen_vcmp(dc, rdst, rsrc, rsrcb, 2, TCG_COND_LE, "v2cmples");
+ return;
+ case V2CMPLEU_RRR_0_OPCODE_X1:
+ gen_vcmp(dc, rdst, rsrc, rsrcb, 2, TCG_COND_LEU, "v2cmpleu");
+ return;
+ case V2CMPLTS_RRR_0_OPCODE_X1:
+ gen_vcmp(dc, rdst, rsrc, rsrcb, 2, TCG_COND_LT, "v2cmplts");
+ return;
+ case V2CMPLTU_RRR_0_OPCODE_X1:
+ gen_vcmp(dc, rdst, rsrc, rsrcb, 2, TCG_COND_LTU, "v2cmpltu");
+ return;
+ case V2CMPNE_RRR_0_OPCODE_X1:
+ gen_vcmp(dc, rdst, rsrc, rsrcb, 2, TCG_COND_NE, "v2cmpne");
+ return;
+ case V2INT_L_RRR_0_OPCODE_X1:
+ gen_vint_l(dc, rdst, rsrc, rsrcb, 2);
+ return;
+ case V2SUB_RRR_0_OPCODE_X1:
+ gen_vsub(dc, rdst, rsrc, rsrcb, 2);
+ return;
+ case V4ADD_RRR_0_OPCODE_X1:
+ gen_vadd(dc, rdst, rsrc, rsrcb, 4);
+ return;
+ case V4INT_L_RRR_0_OPCODE_X1:
+ /* v4int_l is a little faster then generic vint_l */
+ gen_v4int_l(dc, rdst, rsrc, rsrcb);
+ return;
+ case V4SUB_RRR_0_OPCODE_X1:
+ gen_vsub(dc, rdst, rsrc, rsrcb, 4);
return;
case XOR_RRR_0_OPCODE_X1:
gen_xor(dc, rdst, rsrc, rsrcb);
@@ -2613,10 +2827,8 @@ static void decode_rrr_0_opcode_x1(struct DisasContext *dc,
case SUBXSC_RRR_0_OPCODE_X1:
case V1INT_H_RRR_0_OPCODE_X1:
case V2INT_H_RRR_0_OPCODE_X1:
- case V2INT_L_RRR_0_OPCODE_X1:
case V4INT_H_RRR_0_OPCODE_X1:
case V1ADDUC_RRR_0_OPCODE_X1:
- case V1ADD_RRR_0_OPCODE_X1:
case V1MAXU_RRR_0_OPCODE_X1:
case V1MINU_RRR_0_OPCODE_X1:
case V1MNZ_RRR_0_OPCODE_X1:
@@ -2625,15 +2837,7 @@ static void decode_rrr_0_opcode_x1(struct DisasContext *dc,
case V1SHRS_RRR_0_OPCODE_X1:
case V1SHRU_RRR_0_OPCODE_X1:
case V1SUBUC_RRR_0_OPCODE_X1:
- case V1SUB_RRR_0_OPCODE_X1:
case V2ADDSC_RRR_0_OPCODE_X1:
- case V2ADD_RRR_0_OPCODE_X1:
- case V2CMPEQ_RRR_0_OPCODE_X1:
- case V2CMPLES_RRR_0_OPCODE_X1:
- case V2CMPLEU_RRR_0_OPCODE_X1:
- case V2CMPLTS_RRR_0_OPCODE_X1:
- case V2CMPLTU_RRR_0_OPCODE_X1:
- case V2CMPNE_RRR_0_OPCODE_X1:
case V2MAXS_RRR_0_OPCODE_X1:
case V2MINS_RRR_0_OPCODE_X1:
case V2MNZ_RRR_0_OPCODE_X1:
@@ -2646,16 +2850,13 @@ static void decode_rrr_0_opcode_x1(struct DisasContext *dc,
case V2SHRS_RRR_0_OPCODE_X1:
case V2SHRU_RRR_0_OPCODE_X1:
case V2SUBSC_RRR_0_OPCODE_X1:
- case V2SUB_RRR_0_OPCODE_X1:
case V4ADDSC_RRR_0_OPCODE_X1:
- case V4ADD_RRR_0_OPCODE_X1:
case V4PACKSC_RRR_0_OPCODE_X1:
case V4SHLSC_RRR_0_OPCODE_X1:
case V4SHL_RRR_0_OPCODE_X1:
case V4SHRS_RRR_0_OPCODE_X1:
case V4SHRU_RRR_0_OPCODE_X1:
case V4SUBSC_RRR_0_OPCODE_X1:
- case V4SUB_RRR_0_OPCODE_X1:
break;
default:
g_assert_not_reached();
@@ -2692,13 +2893,15 @@ static void decode_shift_opcode_x1(struct DisasContext *dc,
gen_shruxi(dc, rdst, rsrc, shamt);
return;
case V1SHRUI_SHIFT_OPCODE_X1:
- gen_v1shrui(dc, rdst, rsrc, shamt);
+ gen_vshrui(dc, rdst, rsrc, shamt, 1);
+ return;
+ case V2SHRUI_SHIFT_OPCODE_X1:
+ gen_vshrui(dc, rdst, rsrc, shamt, 2);
return;
case V1SHLI_SHIFT_OPCODE_X1:
case V1SHRSI_SHIFT_OPCODE_X1:
case V2SHLI_SHIFT_OPCODE_X1:
case V2SHRSI_SHIFT_OPCODE_X1:
- case V2SHRUI_SHIFT_OPCODE_X1:
qemu_log_mask(LOG_UNIMP,
"UNIMP shift_opcode_x1, [" FMT64X "]\n", bundle);
set_exception(dc, TILEGX_EXCP_OPCODE_UNIMPLEMENTED);
They are vectors, pcnt, revbytes, icoh, and drain. Signed-off-by: Chen Gang <gang.chen.5i5j@gmail.com> --- target-tilegx/helper.c | 12 +- target-tilegx/helper.h | 4 +- target-tilegx/translate.c | 431 ++++++++++++++++++++++++++++++++++------------ 3 files changed, 331 insertions(+), 116 deletions(-)