From patchwork Thu Aug 20 21:48:49 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chen Gang X-Patchwork-Id: 509200 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [IPv6:2001:4830:134:3::11]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 16700140773 for ; Fri, 21 Aug 2015 09:34:00 +1000 (AEST) Received: from localhost ([::1]:37876 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZSZLK-0008Ri-6H for incoming@patchwork.ozlabs.org; Thu, 20 Aug 2015 19:33:58 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:50018) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZSXg1-0001GQ-T8 for qemu-devel@nongnu.org; Thu, 20 Aug 2015 17:47:16 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1ZSXfw-0000F7-Jn for qemu-devel@nongnu.org; Thu, 20 Aug 2015 17:47:13 -0400 Received: from blu004-omc1s8.hotmail.com ([65.55.116.19]:52074) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZSXfw-0000E7-Dv for qemu-devel@nongnu.org; Thu, 20 Aug 2015 17:47:08 -0400 Received: from BLU436-SMTP121 ([65.55.116.9]) by BLU004-OMC1S8.hotmail.com over TLS secured channel with Microsoft SMTPSVC(7.5.7601.23008); Thu, 20 Aug 2015 14:47:08 -0700 X-TMN: [g8z6QM9BFS9oOjH3/rSS/07JXRQoMvXU] X-Originating-Email: [xili_gchen_5257@hotmail.com] Message-ID: Date: Fri, 21 Aug 2015 05:48:49 +0800 From: Chen Gang User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:31.0) Gecko/20100101 Thunderbird/31.7.0 MIME-Version: 1.0 To: Peter Maydell , "rth@twiddle.net" , Chris Metcalf , =?windows-1252?Q?Andreas_F=E4rber?= , "walt@tilera.com" , Riku Voipio References: In-Reply-To: X-OriginalArrivalTime: 20 Aug 2015 21:47:05.0672 (UTC) FILETIME=[C17D9C80:01D0DB91] X-detected-operating-system: by eggs.gnu.org: Windows 7 or 8 [fuzzy] X-Received-From: 65.55.116.19 Cc: qemu-devel Subject: [Qemu-devel] [PATCH 16/16 v1] target-tilegx: Implement additional instructions in normal working flow X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org They are vectors, pcnt, revbytes, icoh, and drain. Signed-off-by: Chen Gang --- target-tilegx/helper.c | 12 +- target-tilegx/helper.h | 4 +- target-tilegx/translate.c | 431 ++++++++++++++++++++++++++++++++++------------ 3 files changed, 331 insertions(+), 116 deletions(-) diff --git a/target-tilegx/helper.c b/target-tilegx/helper.c index 5ab41cd..98dd805 100644 --- a/target-tilegx/helper.c +++ b/target-tilegx/helper.c @@ -24,7 +24,7 @@ #define SIGNBIT32 0x80000000 -int64_t helper_add_saturate(CPUTLGState *env, uint64_t rsrc, uint64_t rsrcb) +int64_t helper_add_saturate(uint64_t rsrc, uint64_t rsrcb) { uint32_t rdst = rsrc + rsrcb; @@ -53,6 +53,16 @@ uint64_t helper_cnttz(uint64_t arg) return ctz64(arg); } +uint64_t helper_pcnt(uint64_t arg) +{ + return ctpop64(arg); +} + +uint64_t helper_revbytes(uint64_t arg) +{ + return bswap64(arg); +} + /* * Functional Description * uint64_t a = rf[SrcA]; diff --git a/target-tilegx/helper.h b/target-tilegx/helper.h index 1411c19..fbd995a 100644 --- a/target-tilegx/helper.h +++ b/target-tilegx/helper.h @@ -1,5 +1,7 @@ DEF_HELPER_2(exception, noreturn, env, i32) DEF_HELPER_FLAGS_1(cntlz, TCG_CALL_NO_RWG_SE, i64, i64) DEF_HELPER_FLAGS_1(cnttz, TCG_CALL_NO_RWG_SE, i64, i64) +DEF_HELPER_FLAGS_1(pcnt, TCG_CALL_NO_RWG_SE, i64, i64) +DEF_HELPER_FLAGS_1(revbytes, TCG_CALL_NO_RWG_SE, i64, i64) DEF_HELPER_FLAGS_3(shufflebytes, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64) -DEF_HELPER_3(add_saturate, s64, env, i64, i64) +DEF_HELPER_FLAGS_2(add_saturate, TCG_CALL_NO_RWG_SE, s64, i64, i64) diff --git a/target-tilegx/translate.c b/target-tilegx/translate.c index 9ae1c6d..6d993b4 100644 --- a/target-tilegx/translate.c +++ b/target-tilegx/translate.c @@ -302,20 +302,104 @@ static void gen_mtspr(struct DisasContext *dc, uint8_t rsrc, uint16_t imm14) set_exception(dc, TILEGX_EXCP_OPCODE_UNIMPLEMENTED); } -static void extract_v1(TCGv out, TCGv in, unsigned byte) +static void extract_v(TCGv out, TCGv in, int count, int v) { - tcg_gen_shri_i64(out, in, byte * 8); - tcg_gen_ext8u_i64(out, out); + tcg_gen_shri_i64(out, in, count * v * 8); + switch (v) { + case 1: + tcg_gen_ext8u_i64(out, out); + break; + case 2: + tcg_gen_ext16u_i64(out, out); + break; + case 4: + tcg_gen_ext32u_i64(out, out); + break; + default: + g_assert_not_reached(); + } } -static void insert_v1(TCGv out, TCGv in, unsigned byte) +static void insert_v(TCGv out, TCGv in, int count, int v) { - tcg_gen_deposit_i64(out, out, in, byte * 8, 8); + tcg_gen_deposit_i64(out, out, in, count * v * 8, v * 8); } -static void gen_v1cmpi(struct DisasContext *dc, - uint8_t rdst, uint8_t rsrc, int8_t imm8, - TCGCond cond, const char *code) +static void gen_vadd(struct DisasContext *dc, + uint8_t rdst, uint8_t rsrc, uint8_t rsrcb, int v) +{ + int count; + TCGv vdst = dest_gr(dc, rdst); + TCGv vsrc = load_gr(dc, rsrc); + TCGv vsrcb = load_gr(dc, rsrcb); + TCGv tmp = tcg_temp_new_i64(); + TCGv tmpb = tcg_temp_new_i64(); + + qemu_log_mask(CPU_LOG_TB_IN_ASM, "v%dadd r%d, r%d, r%d\n", + v, rdst, rsrc, rsrcb); + + tcg_gen_movi_i64(vdst, 0); + + for (count = 0; count < sizeof(uint64_t) / v; count++) { + extract_v(tmp, vsrc, count, v); + extract_v(tmpb, vsrcb, count, v); + tcg_gen_add_i64(tmp, tmp, tmpb); + insert_v(vdst, tmp, count, v); + } + + tcg_temp_free_i64(tmpb); + tcg_temp_free_i64(tmp); +} + +static void gen_vsub(struct DisasContext *dc, + uint8_t rdst, uint8_t rsrc, uint8_t rsrcb, int v) +{ + int count; + TCGv vdst = dest_gr(dc, rdst); + TCGv vsrc = load_gr(dc, rsrc); + TCGv vsrcb = load_gr(dc, rsrcb); + TCGv tmp = tcg_temp_new_i64(); + TCGv tmpb = tcg_temp_new_i64(); + + qemu_log_mask(CPU_LOG_TB_IN_ASM, "v%dsub r%d, r%d, r%d\n", + v, rdst, rsrc, rsrcb); + + tcg_gen_movi_i64(vdst, 0); + + for (count = 0; count < sizeof(uint64_t) / v; count++) { + extract_v(tmp, vsrc, count, v); + extract_v(tmpb, vsrcb, count, v); + tcg_gen_sub_i64(tmp, tmp, tmpb); + insert_v(vdst, tmp, count, v); + } + + tcg_temp_free_i64(tmpb); + tcg_temp_free_i64(tmp); +} + +static void gen_vaddi(struct DisasContext *dc, + uint8_t rdst, uint8_t rsrc, int8_t imm8, int v) +{ + int count; + TCGv vdst = dest_gr(dc, rdst); + TCGv vsrc = load_gr(dc, rsrc); + TCGv tmp = tcg_temp_new_i64(); + + qemu_log_mask(CPU_LOG_TB_IN_ASM, "v%daddi r%d, r%d, %d\n", + v, rdst, rsrc, imm8); + + tcg_gen_movi_i64(vdst, 0); + for (count = 0; count < sizeof(uint64_t) / v; count++) { + extract_v(tmp, vsrc, count, v); + tcg_gen_addi_i64(tmp, tmp, imm8); + insert_v(vdst, tmp, count, v); + } + tcg_temp_free_i64(tmp); +} + +static void gen_vcmpi(struct DisasContext *dc, + uint8_t rdst, uint8_t rsrc, int8_t imm8, int v, + TCGCond cond, const char *code) { int count; TCGv vdst = dest_gr(dc, rdst); @@ -326,17 +410,17 @@ static void gen_v1cmpi(struct DisasContext *dc, code, rdst, rsrc, imm8); tcg_gen_movi_i64(vdst, 0); - for (count = 0; count < 8; count++) { - extract_v1(tmp, vsrc, count); + for (count = 0; count < sizeof(uint64_t) / v; count++) { + extract_v(tmp, vsrc, count, v); tcg_gen_setcondi_i64(cond, tmp, tmp, imm8); - insert_v1(vdst, tmp, count); + insert_v(vdst, tmp, count, v); } tcg_temp_free_i64(tmp); } -static void gen_v1cmp(struct DisasContext *dc, - uint8_t rdst, uint8_t rsrc, uint8_t rsrcb, - TCGCond cond, const char *code) +static void gen_vcmp(struct DisasContext *dc, + uint8_t rdst, uint8_t rsrc, uint8_t rsrcb, int v, + TCGCond cond, const char *code) { int count; TCGv vdst = dest_gr(dc, rdst); @@ -349,33 +433,33 @@ static void gen_v1cmp(struct DisasContext *dc, code, rdst, rsrc, rsrcb); tcg_gen_movi_i64(vdst, 0); - for (count = 0; count < 8; count++) { - extract_v1(tmp, vsrc, count); - extract_v1(tmp2, vsrcb, count); + for (count = 0; count < sizeof(uint64_t) / v; count++) { + extract_v(tmp, vsrc, count, v); + extract_v(tmp2, vsrcb, count, v); tcg_gen_setcond_i64(cond, tmp, tmp, tmp2); - insert_v1(vdst, tmp, count); + insert_v(vdst, tmp, count, v); } tcg_temp_free_i64(tmp2); tcg_temp_free_i64(tmp); } -static void gen_v1shrui(struct DisasContext *dc, - uint8_t rdst, uint8_t rsrc, uint8_t shamt) +static void gen_vshrui(struct DisasContext *dc, + uint8_t rdst, uint8_t rsrc, uint8_t shamt, int v) { int count; TCGv vdst = dest_gr(dc, rdst); TCGv vsrc = load_gr(dc, rsrc); TCGv tmp = tcg_temp_new_i64(); - qemu_log_mask(CPU_LOG_TB_IN_ASM, "v1shrui r%d, r%d, %u\n", - rdst, rsrc, shamt); + qemu_log_mask(CPU_LOG_TB_IN_ASM, "v%dshrui r%d, r%d, %u\n", + v, rdst, rsrc, shamt); shamt &= 7; tcg_gen_movi_i64(vdst, 0); - for (count = 0; count < 8; count++) { - extract_v1(tmp, vsrc, count); + for (count = 0; count < sizeof(uint64_t) / v; count++) { + extract_v(tmp, vsrc, count, v); tcg_gen_shri_i64(tmp, tmp, shamt); - insert_v1(vdst, tmp, count); + insert_v(vdst, tmp, count, v); } tcg_temp_free_i64(tmp); } @@ -404,8 +488,8 @@ static void gen_v1shrui(struct DisasContext *dc, * } * rf[Dest] = output; */ -static void gen_v1int_l(struct DisasContext *dc, - uint8_t rdst, uint8_t rsrc, uint8_t rsrcb) +static void gen_vint_l(struct DisasContext *dc, + uint8_t rdst, uint8_t rsrc, uint8_t rsrcb, int v) { int count; TCGv vdst = dest_gr(dc, rdst); @@ -413,19 +497,20 @@ static void gen_v1int_l(struct DisasContext *dc, TCGv vsrcb = load_gr(dc, rsrcb); TCGv tmp = tcg_temp_new_i64(); - qemu_log_mask(CPU_LOG_TB_IN_ASM, "v1int_l r%d, r%d, r%d\n", - rdst, rsrc, rsrcb); + qemu_log_mask(CPU_LOG_TB_IN_ASM, "v%dint_l r%d, r%d, r%d\n", + v, rdst, rsrc, rsrcb); tcg_gen_movi_i64(vdst, 0); - for (count = 0; count < 4; count++) { - extract_v1(tmp, vsrc, count); - insert_v1(vdst, tmp, 2 * count + 1); - extract_v1(tmp, vsrcb, count); - insert_v1(vdst, tmp, 2 * count); + for (count = 0; count < sizeof(uint64_t) / (v * 2); count++) { + extract_v(tmp, vsrc, count, v); + insert_v(vdst, tmp, 2 * count + 1, v); + extract_v(tmp, vsrcb, count, v); + insert_v(vdst, tmp, 2 * count, v); } tcg_temp_free_i64(tmp); } +/* Still reserve v4int_l for optimization */ static void gen_v4int_l(struct DisasContext *dc, uint8_t rdst, uint8_t rsrc, uint8_t rsrcb) { @@ -496,7 +581,7 @@ static void gen_addxsc(struct DisasContext *dc, { qemu_log_mask(CPU_LOG_TB_IN_ASM, "addxsc r%d, r%d, r%d\n", rdst, rsrc, rsrcb); - gen_helper_add_saturate(dest_gr(dc, rdst), cpu_env, + gen_helper_add_saturate(dest_gr(dc, rdst), load_gr(dc, rsrc), load_gr(dc, rsrcb)); } @@ -910,6 +995,18 @@ static void gen_cnttz(struct DisasContext *dc, uint8_t rdst, uint8_t rsrc) gen_helper_cnttz(dest_gr(dc, rdst), load_gr(dc, rsrc)); } +static void gen_pcnt(struct DisasContext *dc, uint8_t rdst, uint8_t rsrc) +{ + qemu_log_mask(CPU_LOG_TB_IN_ASM, "pcnt r%d, r%d\n", rdst, rsrc); + gen_helper_pcnt(dest_gr(dc, rdst), load_gr(dc, rsrc)); +} + +static void gen_revbytes(struct DisasContext *dc, uint8_t rdst, uint8_t rsrc) +{ + qemu_log_mask(CPU_LOG_TB_IN_ASM, "revbytes r%d, r%d\n", rdst, rsrc); + gen_helper_revbytes(dest_gr(dc, rdst), load_gr(dc, rsrc)); +} + static void gen_ld(struct DisasContext *dc, uint8_t rdst, uint8_t rsrc, TCGMemOp ops, const char *code) @@ -1008,6 +1105,18 @@ static void gen_wh64(struct DisasContext *dc, uint8_t rsrc) /* FIXME: Do we need any implementation for it? I guess no. */ } +static void gen_icoh(struct DisasContext *dc, uint8_t rsrc) +{ + qemu_log_mask(CPU_LOG_TB_IN_ASM, "icoh r%d\n", rsrc); + /* FIXME: Do we need any implementation for it? I guess no. */ +} + +static void gen_drain(struct DisasContext *dc) +{ + qemu_log_mask(CPU_LOG_TB_IN_ASM, "drain\n"); + /* FIXME: Do we need any implementation for it? I guess no. */ +} + static void gen_jr(struct DisasContext *dc, uint8_t rsrc) { qemu_log_mask(CPU_LOG_TB_IN_ASM, "jr(p) r%d\n", rsrc); @@ -1096,6 +1205,12 @@ static void decode_u_opcode_ex_y0(struct DisasContext *dc, case CNTTZ_UNARY_OPCODE_Y0: gen_cnttz(dc, rdst, rsrc); return; + case PCNT_UNARY_OPCODE_Y0: + gen_pcnt(dc, rdst, rsrc); + return; + case REVBYTES_UNARY_OPCODE_Y0: + gen_revbytes(dc, rdst, rsrc); + return; case FNOP_UNARY_OPCODE_Y0: case NOP_UNARY_OPCODE_Y0: if (!rsrc && !rdst) { @@ -1104,9 +1219,7 @@ static void decode_u_opcode_ex_y0(struct DisasContext *dc, } /* Fall through */ case FSINGLE_PACK1_UNARY_OPCODE_Y0: - case PCNT_UNARY_OPCODE_Y0: case REVBITS_UNARY_OPCODE_Y0: - case REVBYTES_UNARY_OPCODE_Y0: case TBLIDXB0_UNARY_OPCODE_Y0: case TBLIDXB1_UNARY_OPCODE_Y0: case TBLIDXB2_UNARY_OPCODE_Y0: @@ -1603,9 +1716,7 @@ static void decode_ldst2_opcode_y2(struct DisasContext *dc, gen_ld(dc, rsrcbdst, rsrc, MO_LEUL, "ld4u"); return; case MODE_OPCODE_YA2: - qemu_log_mask(LOG_UNIMP, - "UNIMP ldst2_opcode_y2, [" FMT64X "]\n", bundle); - set_exception(dc, TILEGX_EXCP_OPCODE_UNIMPLEMENTED); + gen_ld(dc, rsrcbdst, rsrc, MO_LESW, "ld2s"); return; default: g_assert_not_reached(); @@ -1690,25 +1801,35 @@ static void decode_imm8_opcode_x0(struct DisasContext *dc, case ORI_IMM8_OPCODE_X0: gen_ori(dc, rdst, rsrc, imm8); return; + case V1ADDI_IMM8_OPCODE_X0: + gen_vaddi(dc, rdst, rsrc, imm8, 1); + return; case V1CMPEQI_IMM8_OPCODE_X0: - gen_v1cmpi(dc, rdst, rsrc, imm8, TCG_COND_EQ, "v1cmpeqi"); + gen_vcmpi(dc, rdst, rsrc, imm8, 1, TCG_COND_EQ, "v1cmpeqi"); return; case V1CMPLTSI_IMM8_OPCODE_X0: - gen_v1cmpi(dc, rdst, rsrc, imm8, TCG_COND_LT, "v1cmpltsi"); + gen_vcmpi(dc, rdst, rsrc, imm8, 1, TCG_COND_LT, "v1cmpltsi"); return; case V1CMPLTUI_IMM8_OPCODE_X0: - gen_v1cmpi(dc, rdst, rsrc, imm8, TCG_COND_LTU, "v1cmpltui"); + gen_vcmpi(dc, rdst, rsrc, imm8, 1, TCG_COND_LTU, "v1cmpltui"); + return; + case V2ADDI_IMM8_OPCODE_X0: + gen_vaddi(dc, rdst, rsrc, imm8, 2); + return; + case V2CMPEQI_IMM8_OPCODE_X0: + gen_vcmpi(dc, rdst, rsrc, imm8, 2, TCG_COND_EQ, "v2cmpeqi"); + return; + case V2CMPLTSI_IMM8_OPCODE_X0: + gen_vcmpi(dc, rdst, rsrc, imm8, 2, TCG_COND_LT, "v2cmpltsi"); + return; + case V2CMPLTUI_IMM8_OPCODE_X0: + gen_vcmpi(dc, rdst, rsrc, imm8, 2, TCG_COND_LTU, "v2cmpltui"); return; case XORI_IMM8_OPCODE_X0: gen_xori(dc, rdst, rsrc, imm8); return; - case V1ADDI_IMM8_OPCODE_X0: case V1MAXUI_IMM8_OPCODE_X0: case V1MINUI_IMM8_OPCODE_X0: - case V2ADDI_IMM8_OPCODE_X0: - case V2CMPEQI_IMM8_OPCODE_X0: - case V2CMPLTSI_IMM8_OPCODE_X0: - case V2CMPLTUI_IMM8_OPCODE_X0: case V2MAXSI_IMM8_OPCODE_X0: case V2MINSI_IMM8_OPCODE_X0: qemu_log_mask(LOG_UNIMP, @@ -1733,6 +1854,12 @@ static void decode_u_opcode_ex_x0(struct DisasContext *dc, case CNTTZ_UNARY_OPCODE_X0: gen_cnttz(dc, rdst, rsrc); return; + case PCNT_UNARY_OPCODE_X0: + gen_pcnt(dc, rdst, rsrc); + return; + case REVBYTES_UNARY_OPCODE_X0: + gen_revbytes(dc, rdst, rsrc); + return; case FNOP_UNARY_OPCODE_X0: case NOP_UNARY_OPCODE_X0: if (!rsrc && !rdst) { @@ -1741,9 +1868,7 @@ static void decode_u_opcode_ex_x0(struct DisasContext *dc, } /* Fall through */ case FSINGLE_PACK1_UNARY_OPCODE_X0: - case PCNT_UNARY_OPCODE_X0: case REVBITS_UNARY_OPCODE_X0: - case REVBYTES_UNARY_OPCODE_X0: case TBLIDXB0_UNARY_OPCODE_X0: case TBLIDXB1_UNARY_OPCODE_X0: case TBLIDXB2_UNARY_OPCODE_X0: @@ -1950,28 +2075,68 @@ static void decode_rrr_0_opcode_x0(struct DisasContext *dc, case UNARY_RRR_0_OPCODE_X0: return decode_u_opcode_ex_x0(dc, bundle); case V1INT_L_RRR_0_OPCODE_X0: - gen_v1int_l(dc, rdst, rsrc, rsrcb); + gen_vint_l(dc, rdst, rsrc, rsrcb, 1); return; - case V4INT_L_RRR_0_OPCODE_X0: - gen_v4int_l(dc, rdst, rsrc, rsrcb); + case V1ADD_RRR_0_OPCODE_X0: + gen_vadd(dc, rdst, rsrc, rsrcb, 1); + return; + case V1SUB_RRR_0_OPCODE_X0: + gen_vsub(dc, rdst, rsrc, rsrcb, 1); + return; + case V2ADD_RRR_0_OPCODE_X0: + gen_vadd(dc, rdst, rsrc, rsrcb, 2); + return; + case V2SUB_RRR_0_OPCODE_X0: + gen_vsub(dc, rdst, rsrc, rsrcb, 2); + return; + case V4ADD_RRR_0_OPCODE_X0: + gen_vadd(dc, rdst, rsrc, rsrcb, 4); + return; + case V4SUB_RRR_0_OPCODE_X0: + gen_vsub(dc, rdst, rsrc, rsrcb, 4); return; case V1CMPEQ_RRR_0_OPCODE_X0: - gen_v1cmp(dc, rdst, rsrc, rsrcb, TCG_COND_EQ, "v1cmpeq"); + gen_vcmp(dc, rdst, rsrc, rsrcb, 1, TCG_COND_EQ, "v1cmpeq"); return; case V1CMPLES_RRR_0_OPCODE_X0: - gen_v1cmp(dc, rdst, rsrc, rsrcb, TCG_COND_LE, "v1cmples"); + gen_vcmp(dc, rdst, rsrc, rsrcb, 1, TCG_COND_LE, "v1cmples"); return; case V1CMPLEU_RRR_0_OPCODE_X0: - gen_v1cmp(dc, rdst, rsrc, rsrcb, TCG_COND_LEU, "v1cmpleu"); + gen_vcmp(dc, rdst, rsrc, rsrcb, 1, TCG_COND_LEU, "v1cmpleu"); return; case V1CMPLTS_RRR_0_OPCODE_X0: - gen_v1cmp(dc, rdst, rsrc, rsrcb, TCG_COND_LT, "v1cmplts"); + gen_vcmp(dc, rdst, rsrc, rsrcb, 1, TCG_COND_LT, "v1cmplts"); return; case V1CMPLTU_RRR_0_OPCODE_X0: - gen_v1cmp(dc, rdst, rsrc, rsrcb, TCG_COND_LTU, "v1cmpltu"); + gen_vcmp(dc, rdst, rsrc, rsrcb, 1, TCG_COND_LTU, "v1cmpltu"); return; case V1CMPNE_RRR_0_OPCODE_X0: - gen_v1cmp(dc, rdst, rsrc, rsrcb, TCG_COND_NE, "v1cmpne"); + gen_vcmp(dc, rdst, rsrc, rsrcb, 1, TCG_COND_NE, "v1cmpne"); + return; + case V2CMPEQ_RRR_0_OPCODE_X0: + gen_vcmp(dc, rdst, rsrc, rsrcb, 2, TCG_COND_EQ, "v2cmpeq"); + return; + case V2CMPLES_RRR_0_OPCODE_X0: + gen_vcmp(dc, rdst, rsrc, rsrcb, 2, TCG_COND_LE, "v2cmples"); + return; + case V2CMPLEU_RRR_0_OPCODE_X0: + gen_vcmp(dc, rdst, rsrc, rsrcb, 2, TCG_COND_LEU, "v2cmpleu"); + return; + case V2CMPLTS_RRR_0_OPCODE_X0: + gen_vcmp(dc, rdst, rsrc, rsrcb, 2, TCG_COND_LT, "v2cmplts"); + return; + case V2CMPLTU_RRR_0_OPCODE_X0: + gen_vcmp(dc, rdst, rsrc, rsrcb, 2, TCG_COND_LTU, "v2cmpltu"); + return; + case V2CMPNE_RRR_0_OPCODE_X0: + gen_vcmp(dc, rdst, rsrc, rsrcb, 2, TCG_COND_NE, "v2cmpne"); + return; + case V2INT_L_RRR_0_OPCODE_X0: + gen_vint_l(dc, rdst, rsrc, rsrcb, 2); + return; + case V4INT_L_RRR_0_OPCODE_X0: + /* v4int_l is a little faster then generic vint_l */ + gen_v4int_l(dc, rdst, rsrc, rsrcb); return; case XOR_RRR_0_OPCODE_X0: gen_xor(dc, rdst, rsrc, rsrcb); @@ -1988,8 +2153,8 @@ static void decode_rrr_0_opcode_x0(struct DisasContext *dc, case DBLALIGN2_RRR_0_OPCODE_X0: case DBLALIGN4_RRR_0_OPCODE_X0: case DBLALIGN6_RRR_0_OPCODE_X0: - case FDOUBLE_ADDSUB_RRR_0_OPCODE_X0: case FDOUBLE_ADD_FLAGS_RRR_0_OPCODE_X0: + case FDOUBLE_ADDSUB_RRR_0_OPCODE_X0: case FDOUBLE_MUL_FLAGS_RRR_0_OPCODE_X0: case FDOUBLE_PACK1_RRR_0_OPCODE_X0: case FDOUBLE_PACK2_RRR_0_OPCODE_X0: @@ -2000,11 +2165,10 @@ static void decode_rrr_0_opcode_x0(struct DisasContext *dc, case FSINGLE_ADDSUB2_RRR_0_OPCODE_X0: case FSINGLE_MUL1_RRR_0_OPCODE_X0: case FSINGLE_MUL2_RRR_0_OPCODE_X0: - case FSINGLE_PACK2_RRR_0_OPCODE_X0: case FSINGLE_SUB1_RRR_0_OPCODE_X0: + case FSINGLE_PACK2_RRR_0_OPCODE_X0: case SUBXSC_RRR_0_OPCODE_X0: case V1ADDUC_RRR_0_OPCODE_X0: - case V1ADD_RRR_0_OPCODE_X0: case V1ADIFFU_RRR_0_OPCODE_X0: case V1AVGU_RRR_0_OPCODE_X0: case V1DDOTPUSA_RRR_0_OPCODE_X0: @@ -2026,23 +2190,13 @@ static void decode_rrr_0_opcode_x0(struct DisasContext *dc, case V1SHRS_RRR_0_OPCODE_X0: case V1SHRU_RRR_0_OPCODE_X0: case V1SUBUC_RRR_0_OPCODE_X0: - case V1SUB_RRR_0_OPCODE_X0: case V1INT_H_RRR_0_OPCODE_X0: - case V2INT_H_RRR_0_OPCODE_X0: - case V2INT_L_RRR_0_OPCODE_X0: - case V4INT_H_RRR_0_OPCODE_X0: case V2ADDSC_RRR_0_OPCODE_X0: - case V2ADD_RRR_0_OPCODE_X0: case V2ADIFFS_RRR_0_OPCODE_X0: case V2AVGS_RRR_0_OPCODE_X0: - case V2CMPEQ_RRR_0_OPCODE_X0: - case V2CMPLES_RRR_0_OPCODE_X0: - case V2CMPLEU_RRR_0_OPCODE_X0: - case V2CMPLTS_RRR_0_OPCODE_X0: - case V2CMPLTU_RRR_0_OPCODE_X0: - case V2CMPNE_RRR_0_OPCODE_X0: case V2DOTPA_RRR_0_OPCODE_X0: case V2DOTP_RRR_0_OPCODE_X0: + case V2INT_H_RRR_0_OPCODE_X0: case V2MAXS_RRR_0_OPCODE_X0: case V2MINS_RRR_0_OPCODE_X0: case V2MNZ_RRR_0_OPCODE_X0: @@ -2062,16 +2216,14 @@ static void decode_rrr_0_opcode_x0(struct DisasContext *dc, case V2SHRS_RRR_0_OPCODE_X0: case V2SHRU_RRR_0_OPCODE_X0: case V2SUBSC_RRR_0_OPCODE_X0: - case V2SUB_RRR_0_OPCODE_X0: case V4ADDSC_RRR_0_OPCODE_X0: - case V4ADD_RRR_0_OPCODE_X0: + case V4INT_H_RRR_0_OPCODE_X0: case V4PACKSC_RRR_0_OPCODE_X0: case V4SHLSC_RRR_0_OPCODE_X0: case V4SHL_RRR_0_OPCODE_X0: case V4SHRS_RRR_0_OPCODE_X0: case V4SHRU_RRR_0_OPCODE_X0: case V4SUBSC_RRR_0_OPCODE_X0: - case V4SUB_RRR_0_OPCODE_X0: case V1DDOTPUA_RRR_0_OPCODE_X0: case V1DDOTPU_RRR_0_OPCODE_X0: case V1DOTPUA_RRR_0_OPCODE_X0: @@ -2112,13 +2264,15 @@ static void decode_shift_opcode_x0(struct DisasContext *dc, gen_shruxi(dc, rdst, rsrc, shamt); return; case V1SHRUI_SHIFT_OPCODE_X0: - gen_v1shrui(dc, rdst, rsrc, shamt); + gen_vshrui(dc, rdst, rsrc, shamt, 1); + return; + case V2SHRUI_SHIFT_OPCODE_X0: + gen_vshrui(dc, rdst, rsrc, shamt, 2); return; case V1SHLI_SHIFT_OPCODE_X0: case V1SHRSI_SHIFT_OPCODE_X0: case V2SHLI_SHIFT_OPCODE_X0: case V2SHRSI_SHIFT_OPCODE_X0: - case V2SHRUI_SHIFT_OPCODE_X0: qemu_log_mask(LOG_UNIMP, "UNIMP shift_opcode_x0, [" FMT64X "]\n", bundle); set_exception(dc, TILEGX_EXCP_OPCODE_UNIMPLEMENTED); @@ -2242,14 +2396,29 @@ static void decode_imm8_opcode_x1(struct DisasContext *dc, case ST4_ADD_IMM8_OPCODE_X1: gen_st_add(dc, rsrc, rsrcb, dimm8, MO_LEUL, "st4_add"); return; + case V1ADDI_IMM8_OPCODE_X1: + gen_vaddi(dc, rdst, rsrc, imm8, 1); + return; case V1CMPEQI_IMM8_OPCODE_X1: - gen_v1cmpi(dc, rdst, rsrc, imm8, TCG_COND_EQ, "v1cmpeqi"); + gen_vcmpi(dc, rdst, rsrc, imm8, 1, TCG_COND_EQ, "v1cmpeqi"); return; case V1CMPLTSI_IMM8_OPCODE_X1: - gen_v1cmpi(dc, rdst, rsrc, imm8, TCG_COND_LT, "v1cmpltsi"); + gen_vcmpi(dc, rdst, rsrc, imm8, 1, TCG_COND_LT, "v1cmpltsi"); return; case V1CMPLTUI_IMM8_OPCODE_X1: - gen_v1cmpi(dc, rdst, rsrc, imm8, TCG_COND_LTU, "v1cmpltui"); + gen_vcmpi(dc, rdst, rsrc, imm8, 1, TCG_COND_LTU, "v1cmpltui"); + return; + case V2ADDI_IMM8_OPCODE_X1: + gen_vaddi(dc, rdst, rsrc, imm8, 2); + return; + case V2CMPEQI_IMM8_OPCODE_X1: + gen_vcmpi(dc, rdst, rsrc, imm8, 2, TCG_COND_EQ, "v2cmpeqi"); + return; + case V2CMPLTSI_IMM8_OPCODE_X1: + gen_vcmpi(dc, rdst, rsrc, imm8, 2, TCG_COND_LT, "v2cmpltsi"); + return; + case V2CMPLTUI_IMM8_OPCODE_X1: + gen_vcmpi(dc, rdst, rsrc, imm8, 2, TCG_COND_LTU, "v2cmpltui"); return; case XORI_IMM8_OPCODE_X1: gen_xori(dc, rdst, rsrc, imm8); @@ -2266,13 +2435,8 @@ static void decode_imm8_opcode_x1(struct DisasContext *dc, case STNT2_ADD_IMM8_OPCODE_X1: case STNT4_ADD_IMM8_OPCODE_X1: case STNT_ADD_IMM8_OPCODE_X1: - case V1ADDI_IMM8_OPCODE_X1: case V1MAXUI_IMM8_OPCODE_X1: case V1MINUI_IMM8_OPCODE_X1: - case V2ADDI_IMM8_OPCODE_X1: - case V2CMPEQI_IMM8_OPCODE_X1: - case V2CMPLTSI_IMM8_OPCODE_X1: - case V2CMPLTUI_IMM8_OPCODE_X1: case V2MAXSI_IMM8_OPCODE_X1: case V2MINSI_IMM8_OPCODE_X1: qemu_log_mask(LOG_UNIMP, @@ -2308,6 +2472,12 @@ static void decode_u_opcode_ex_x1(struct DisasContext *dc, uint8_t rdst = get_Dest_X1(bundle); switch (get_UnaryOpcodeExtension_X1(bundle)) { + case DRAIN_UNARY_OPCODE_X1: + if (!rdst && !rsrc) { + gen_drain(dc); + return; + } + break; case NOP_UNARY_OPCODE_X1: case FNOP_UNARY_OPCODE_X1: if (!rdst && !rsrc) { @@ -2315,6 +2485,12 @@ static void decode_u_opcode_ex_x1(struct DisasContext *dc, return; } break; + case ICOH_UNARY_OPCODE_X1: + if (!rdst) { + gen_icoh(dc, rsrc); + return; + } + break; case JALRP_UNARY_OPCODE_X1: case JALR_UNARY_OPCODE_X1: if (!rdst) { @@ -2381,12 +2557,10 @@ static void decode_u_opcode_ex_x1(struct DisasContext *dc, return; } break; - case DRAIN_UNARY_OPCODE_X1: case DTLBPR_UNARY_OPCODE_X1: case FINV_UNARY_OPCODE_X1: case FLUSHWB_UNARY_OPCODE_X1: case FLUSH_UNARY_OPCODE_X1: - case ICOH_UNARY_OPCODE_X1: case ILL_UNARY_OPCODE_X1: case INV_UNARY_OPCODE_X1: case LDNT1S_UNARY_OPCODE_X1: @@ -2576,29 +2750,69 @@ static void decode_rrr_0_opcode_x1(struct DisasContext *dc, break; case UNARY_RRR_0_OPCODE_X1: return decode_u_opcode_ex_x1(dc, bundle); - case V1INT_L_RRR_0_OPCODE_X1: - gen_v1int_l(dc, rdst, rsrc, rsrcb); - return; - case V4INT_L_RRR_0_OPCODE_X1: - gen_v4int_l(dc, rdst, rsrc, rsrcb); + case V1ADD_RRR_0_OPCODE_X1: + gen_vadd(dc, rdst, rsrc, rsrcb, 1); return; case V1CMPEQ_RRR_0_OPCODE_X1: - gen_v1cmp(dc, rdst, rsrc, rsrcb, TCG_COND_EQ, "v1cmpeq"); + gen_vcmp(dc, rdst, rsrc, rsrcb, 1, TCG_COND_EQ, "v1cmpeq"); return; case V1CMPLES_RRR_0_OPCODE_X1: - gen_v1cmp(dc, rdst, rsrc, rsrcb, TCG_COND_LE, "v1cmples"); + gen_vcmp(dc, rdst, rsrc, rsrcb, 1, TCG_COND_LE, "v1cmples"); return; case V1CMPLEU_RRR_0_OPCODE_X1: - gen_v1cmp(dc, rdst, rsrc, rsrcb, TCG_COND_LEU, "v1cmpleu"); + gen_vcmp(dc, rdst, rsrc, rsrcb, 1, TCG_COND_LEU, "v1cmpleu"); return; case V1CMPLTS_RRR_0_OPCODE_X1: - gen_v1cmp(dc, rdst, rsrc, rsrcb, TCG_COND_LT, "v1cmplts"); + gen_vcmp(dc, rdst, rsrc, rsrcb, 1, TCG_COND_LT, "v1cmplts"); return; case V1CMPLTU_RRR_0_OPCODE_X1: - gen_v1cmp(dc, rdst, rsrc, rsrcb, TCG_COND_LTU, "v1cmpltu"); + gen_vcmp(dc, rdst, rsrc, rsrcb, 1, TCG_COND_LTU, "v1cmpltu"); return; case V1CMPNE_RRR_0_OPCODE_X1: - gen_v1cmp(dc, rdst, rsrc, rsrcb, TCG_COND_NE, "v1cmpne"); + gen_vcmp(dc, rdst, rsrc, rsrcb, 1, TCG_COND_NE, "v1cmpne"); + return; + case V1INT_L_RRR_0_OPCODE_X1: + gen_vint_l(dc, rdst, rsrc, rsrcb, 1); + return; + case V1SUB_RRR_0_OPCODE_X1: + gen_vsub(dc, rdst, rsrc, rsrcb, 1); + return; + case V2ADD_RRR_0_OPCODE_X1: + gen_vadd(dc, rdst, rsrc, rsrcb, 2); + return; + case V2CMPEQ_RRR_0_OPCODE_X1: + gen_vcmp(dc, rdst, rsrc, rsrcb, 2, TCG_COND_EQ, "v2cmpeq"); + return; + case V2CMPLES_RRR_0_OPCODE_X1: + gen_vcmp(dc, rdst, rsrc, rsrcb, 2, TCG_COND_LE, "v2cmples"); + return; + case V2CMPLEU_RRR_0_OPCODE_X1: + gen_vcmp(dc, rdst, rsrc, rsrcb, 2, TCG_COND_LEU, "v2cmpleu"); + return; + case V2CMPLTS_RRR_0_OPCODE_X1: + gen_vcmp(dc, rdst, rsrc, rsrcb, 2, TCG_COND_LT, "v2cmplts"); + return; + case V2CMPLTU_RRR_0_OPCODE_X1: + gen_vcmp(dc, rdst, rsrc, rsrcb, 2, TCG_COND_LTU, "v2cmpltu"); + return; + case V2CMPNE_RRR_0_OPCODE_X1: + gen_vcmp(dc, rdst, rsrc, rsrcb, 2, TCG_COND_NE, "v2cmpne"); + return; + case V2INT_L_RRR_0_OPCODE_X1: + gen_vint_l(dc, rdst, rsrc, rsrcb, 2); + return; + case V2SUB_RRR_0_OPCODE_X1: + gen_vsub(dc, rdst, rsrc, rsrcb, 2); + return; + case V4ADD_RRR_0_OPCODE_X1: + gen_vadd(dc, rdst, rsrc, rsrcb, 4); + return; + case V4INT_L_RRR_0_OPCODE_X1: + /* v4int_l is a little faster then generic vint_l */ + gen_v4int_l(dc, rdst, rsrc, rsrcb); + return; + case V4SUB_RRR_0_OPCODE_X1: + gen_vsub(dc, rdst, rsrc, rsrcb, 4); return; case XOR_RRR_0_OPCODE_X1: gen_xor(dc, rdst, rsrc, rsrcb); @@ -2613,10 +2827,8 @@ static void decode_rrr_0_opcode_x1(struct DisasContext *dc, case SUBXSC_RRR_0_OPCODE_X1: case V1INT_H_RRR_0_OPCODE_X1: case V2INT_H_RRR_0_OPCODE_X1: - case V2INT_L_RRR_0_OPCODE_X1: case V4INT_H_RRR_0_OPCODE_X1: case V1ADDUC_RRR_0_OPCODE_X1: - case V1ADD_RRR_0_OPCODE_X1: case V1MAXU_RRR_0_OPCODE_X1: case V1MINU_RRR_0_OPCODE_X1: case V1MNZ_RRR_0_OPCODE_X1: @@ -2625,15 +2837,7 @@ static void decode_rrr_0_opcode_x1(struct DisasContext *dc, case V1SHRS_RRR_0_OPCODE_X1: case V1SHRU_RRR_0_OPCODE_X1: case V1SUBUC_RRR_0_OPCODE_X1: - case V1SUB_RRR_0_OPCODE_X1: case V2ADDSC_RRR_0_OPCODE_X1: - case V2ADD_RRR_0_OPCODE_X1: - case V2CMPEQ_RRR_0_OPCODE_X1: - case V2CMPLES_RRR_0_OPCODE_X1: - case V2CMPLEU_RRR_0_OPCODE_X1: - case V2CMPLTS_RRR_0_OPCODE_X1: - case V2CMPLTU_RRR_0_OPCODE_X1: - case V2CMPNE_RRR_0_OPCODE_X1: case V2MAXS_RRR_0_OPCODE_X1: case V2MINS_RRR_0_OPCODE_X1: case V2MNZ_RRR_0_OPCODE_X1: @@ -2646,16 +2850,13 @@ static void decode_rrr_0_opcode_x1(struct DisasContext *dc, case V2SHRS_RRR_0_OPCODE_X1: case V2SHRU_RRR_0_OPCODE_X1: case V2SUBSC_RRR_0_OPCODE_X1: - case V2SUB_RRR_0_OPCODE_X1: case V4ADDSC_RRR_0_OPCODE_X1: - case V4ADD_RRR_0_OPCODE_X1: case V4PACKSC_RRR_0_OPCODE_X1: case V4SHLSC_RRR_0_OPCODE_X1: case V4SHL_RRR_0_OPCODE_X1: case V4SHRS_RRR_0_OPCODE_X1: case V4SHRU_RRR_0_OPCODE_X1: case V4SUBSC_RRR_0_OPCODE_X1: - case V4SUB_RRR_0_OPCODE_X1: break; default: g_assert_not_reached(); @@ -2692,13 +2893,15 @@ static void decode_shift_opcode_x1(struct DisasContext *dc, gen_shruxi(dc, rdst, rsrc, shamt); return; case V1SHRUI_SHIFT_OPCODE_X1: - gen_v1shrui(dc, rdst, rsrc, shamt); + gen_vshrui(dc, rdst, rsrc, shamt, 1); + return; + case V2SHRUI_SHIFT_OPCODE_X1: + gen_vshrui(dc, rdst, rsrc, shamt, 2); return; case V1SHLI_SHIFT_OPCODE_X1: case V1SHRSI_SHIFT_OPCODE_X1: case V2SHLI_SHIFT_OPCODE_X1: case V2SHRSI_SHIFT_OPCODE_X1: - case V2SHRUI_SHIFT_OPCODE_X1: qemu_log_mask(LOG_UNIMP, "UNIMP shift_opcode_x1, [" FMT64X "]\n", bundle); set_exception(dc, TILEGX_EXCP_OPCODE_UNIMPLEMENTED);