@@ -278,6 +278,8 @@ enum {
OPC_SUB_A3 = 0x10128000000ull,
OPC_UNPACK4_L_I2 = 0x0f860000000ull,
OPC_XMA_L_F2 = 0x1d000000000ull,
+ OPC_XMA_H_F2 = 0x1dc00000000ull,
+ OPC_XMA_HU_F2 = 0x1d800000000ull,
OPC_XOR_A1 = 0x10078000000ull,
OPC_ZXT1_I29 = 0x00080000000ull,
OPC_ZXT2_I29 = 0x00088000000ull,
@@ -1098,6 +1100,79 @@ static inline void tcg_out_mul(TCGContext *s, TCGArg ret,
tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
}
+static inline void tcg_out_mul2_i32(TCGContext *s, TCGArg retl, TCGArg reth,
+ TCGArg arg1, int const_arg1,
+ TCGArg arg2, int const_arg2,
+ int is_signed)
+{
+ uint64_t opc2, opc3;
+
+ if (const_arg1 && arg1 != 0) {
+ opc2 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
+ TCG_REG_R2, arg1, TCG_REG_R0);
+ } else if (is_signed) {
+ opc2 = tcg_opc_i29(TCG_REG_P0, OPC_SXT4_I29, TCG_REG_R2, arg1);
+ } else {
+ opc2 = tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R2, arg1);
+ }
+ if (const_arg2 && arg2 != 0) {
+ opc3 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
+ TCG_REG_R3, arg2, TCG_REG_R0);
+ } else if (is_signed) {
+ opc3 = tcg_opc_i29(TCG_REG_P0, OPC_SXT4_I29, TCG_REG_R3, arg2);
+ } else {
+ opc3 = tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R3, arg2);
+ }
+
+ tcg_out_bundle(s, miI,
+ tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+ opc2,
+ opc3);
+ tcg_out_bundle(s, mmI,
+ tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18,
+ TCG_REG_F6, TCG_REG_R2),
+ tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18,
+ TCG_REG_F7, TCG_REG_R3),
+ tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+ tcg_out_bundle(s, mmF,
+ tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+ tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+ tcg_opc_f2 (TCG_REG_P0, OPC_XMA_L_F2, TCG_REG_F6, TCG_REG_F6,
+ TCG_REG_F7, TCG_REG_F0));
+ tcg_out_bundle(s, MmI,
+ tcg_opc_m19(TCG_REG_P0, OPC_GETF_SIG_M19, retl, TCG_REG_F6),
+ tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+ tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, reth, retl, 32, 31));
+}
+
+static inline void tcg_out_mul2_i64(TCGContext *s, TCGArg retl,
+ TCGArg reth, TCGArg arg1, TCGArg arg2,
+ int is_signed)
+{
+ uint64_t opc_xma_h_f2;
+
+ opc_xma_h_f2 = is_signed ? OPC_XMA_H_F2 : OPC_XMA_HU_F2;
+
+ tcg_out_bundle(s, mmI,
+ tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F8, arg1),
+ tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F9, arg2),
+ tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+ tcg_out_bundle(s, mmf,
+ tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+ tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+ tcg_opc_f2 (TCG_REG_P0, OPC_XMA_L_F2, TCG_REG_F6, TCG_REG_F8,
+ TCG_REG_F9, TCG_REG_F0));
+ tcg_out_bundle(s, mmF,
+ tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+ tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+ tcg_opc_f2 (TCG_REG_P0, opc_xma_h_f2, TCG_REG_F7, TCG_REG_F8,
+ TCG_REG_F9, TCG_REG_F0));
+ tcg_out_bundle(s, mmI,
+ tcg_opc_m19(TCG_REG_P0, OPC_GETF_SIG_M19, retl, TCG_REG_F6),
+ tcg_opc_m19(TCG_REG_P0, OPC_GETF_SIG_M19, reth, TCG_REG_F7),
+ tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+}
+
static inline void tcg_out_sar_i32(TCGContext *s, TCGArg ret, TCGArg arg1,
TCGArg arg2, int const_arg2)
{
@@ -2107,6 +2182,20 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_mul_i64:
tcg_out_mul(s, args[0], args[1], args[2]);
break;
+ case INDEX_op_mulu2_i32:
+ tcg_out_mul2_i32(s, args[0], args[1], args[2], const_args[2],
+ args[3], const_args[3], 0);
+ break;
+ case INDEX_op_mulu2_i64:
+ tcg_out_mul2_i64(s, args[0], args[1], args[2], args[3], 0);
+ break;
+ case INDEX_op_muls2_i32:
+ tcg_out_mul2_i32(s, args[0], args[1], args[2], const_args[2],
+ args[3], const_args[3], 1);
+ break;
+ case INDEX_op_muls2_i64:
+ tcg_out_mul2_i64(s, args[0], args[1], args[2], args[3], 1);
+ break;
case INDEX_op_sar_i32:
tcg_out_sar_i32(s, args[0], args[1], args[2], const_args[2]);
@@ -2275,6 +2364,8 @@ static const TCGTargetOpDef ia64_op_defs[] = {
{ INDEX_op_xor_i32, { "r", "rI", "rI" } },
{ INDEX_op_mul_i32, { "r", "rZ", "rZ" } },
+ { INDEX_op_mulu2_i32, { "r", "r", "rI", "rI" } },
+ { INDEX_op_muls2_i32, { "r", "r", "rI", "rI" } },
{ INDEX_op_sar_i32, { "r", "rZ", "ri" } },
{ INDEX_op_shl_i32, { "r", "rZ", "ri" } },
@@ -2322,6 +2413,8 @@ static const TCGTargetOpDef ia64_op_defs[] = {
{ INDEX_op_xor_i64, { "r", "rI", "rI" } },
{ INDEX_op_mul_i64, { "r", "rZ", "rZ" } },
+ { INDEX_op_mulu2_i64, { "r", "r", "rZ", "rZ" } },
+ { INDEX_op_muls2_i64, { "r", "r", "rZ", "rZ" } },
{ INDEX_op_sar_i64, { "r", "rZ", "ri" } },
{ INDEX_op_shl_i64, { "r", "rZ", "ri" } },
@@ -140,10 +140,10 @@ typedef enum {
#define TCG_TARGET_HAS_add2_i64 0
#define TCG_TARGET_HAS_sub2_i32 0
#define TCG_TARGET_HAS_sub2_i64 0
-#define TCG_TARGET_HAS_mulu2_i32 0
-#define TCG_TARGET_HAS_mulu2_i64 0
-#define TCG_TARGET_HAS_muls2_i32 0
-#define TCG_TARGET_HAS_muls2_i64 0
+#define TCG_TARGET_HAS_mulu2_i32 1
+#define TCG_TARGET_HAS_mulu2_i64 1
+#define TCG_TARGET_HAS_muls2_i32 1
+#define TCG_TARGET_HAS_muls2_i64 1
#define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16)
#define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16)
Add 32-bit and 64-bit mulu2 and muls2 TCG ops. On IA64, 32-bit ops should just ignore the 32 most significant bits of registers, and can leave them with non-zero values. This means registers should be zero/sign extended before doing the actual multiplying. This leave some slots in the bundle to possibly load a constant. Signed-off-by: Aurelien Jarno <aurelien@aurel32.net> --- tcg/ia64/tcg-target.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++++ tcg/ia64/tcg-target.h | 8 ++--- 2 files changed, 97 insertions(+), 4 deletions(-)