@@ -237,6 +237,8 @@ enum {
OPC_CMP4_LT_A6 = 0x18400000000ull,
OPC_CMP4_LTU_A6 = 0x1a400000000ull,
OPC_CMP4_EQ_A6 = 0x1c400000000ull,
+ OPC_DEP_I14 = 0x0ae00000000ull,
+ OPC_DEP_I15 = 0x08000000000ull,
OPC_DEP_Z_I12 = 0x0a600000000ull,
OPC_EXTR_I11 = 0x0a400002000ull,
OPC_EXTR_U_I11 = 0x0a400000000ull,
@@ -508,6 +510,32 @@ static inline uint64_t tcg_opc_i12(int qp, uint64_t opc, int r1,
| (qp & 0x3f);
}
+static inline uint64_t tcg_opc_i14(int qp, uint64_t opc, int r1, int imm1,
+ int r3, uint64_t len,
+ uint64_t cpos)
+{
+ return opc
+ | ((imm1 & 1LL) << 36)
+ | ((len & 0x0f) << 27)
+ | ((r3 & 0x7f) << 20)
+ | ((cpos & 0x3f) << 14)
+ | ((r1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_i15(int qp, uint64_t opc, int r1,
+ int r2, int r3, uint64_t len,
+ uint64_t cpos)
+{
+ return opc
+ | ((cpos & 0x3f) << 31)
+ | ((len & 0x0f) << 27)
+ | ((r3 & 0x7f) << 20)
+ | ((r2 & 0x7f) << 13)
+ | ((r1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
static inline uint64_t tcg_opc_i18(int qp, uint64_t opc, uint64_t imm)
{
return opc
@@ -1335,6 +1363,84 @@ static inline void tcg_out_bswap64(TCGContext *s, TCGArg ret, TCGArg arg)
tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, arg, 0xb));
}
+static void tcg_out_deposit_i32(TCGContext *s, TCGArg out, TCGArg in,
+ TCGArg val, unsigned ofs, unsigned len)
+{
+ uint64_t nop_m = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
+
+ if (in == 0) {
+ tcg_out_bundle(s, mmI, nop_m, nop_m,
+ tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, out, val,
+ len - 1, 63 - ofs));
+ } else if (val == 0) {
+ tcg_out_bundle(s, mmI, nop_m, nop_m,
+ tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, out, val,
+ in, len - 1, 63 - ofs));
+ } else if (len <= 16) {
+ tcg_out_bundle(s, mmI, nop_m, nop_m,
+ tcg_opc_i15(TCG_REG_P0, OPC_DEP_I15, out, val, in,
+ len - 1, 63 - ofs));
+ } else {
+ /* Perform the 17- to 32-bit deposit in two parts. At the same
+ time we perform the first deposit, extract the high 16-bits
+ into a scratch register. */
+ tcg_out_bundle(s, miI, nop_m,
+ tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R2,
+ val, 16, 31 - 16),
+ tcg_opc_i15(TCG_REG_P0, OPC_DEP_I15, out, val, in,
+ 16 - 1, 63 - ofs));
+ tcg_out_bundle(s, mmI, nop_m, nop_m,
+ tcg_opc_i15(TCG_REG_P0, OPC_DEP_I15, out, TCG_REG_R2,
+ out, len - 16 - 1, 63 - (ofs + 16)));
+ }
+}
+
+static void tcg_out_deposit_i64(TCGContext *s, TCGArg out, TCGArg in,
+ TCGArg val, unsigned ofs, unsigned len)
+{
+ uint64_t nop_m = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
+
+ if (in == 0) {
+ tcg_out_bundle(s, mmI, nop_m, nop_m,
+ tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, out, in,
+ len - 1, 63 - ofs));
+ } else if (val == 0) {
+ tcg_out_bundle(s, mmI, nop_m, nop_m,
+ tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, out, val,
+ in, len - 1, 63 - ofs));
+ } else if (len <= 16) {
+ tcg_out_bundle(s, mmI, nop_m, nop_m,
+ tcg_opc_i15(TCG_REG_P0, OPC_DEP_I15, out, val, in,
+ len - 1, 63 - ofs));
+ } else {
+ uint64_t ror = 0, shrp, rol = 0;
+
+ if (ofs) {
+ ror = tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, TCG_REG_R2,
+ in, in, ofs);
+ in = TCG_REG_R2;
+ }
+
+ shrp = tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, out, in, val, len);
+
+ ofs = (ofs - len) & 63;
+ if (ofs) {
+ rol = tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, out, out, out, 64-ofs);
+ }
+
+ if (ror) {
+ tcg_out_bundle(s, mII, nop_m, ror, shrp);
+ if (rol) {
+ tcg_out_bundle(s, mmI, nop_m, nop_m, rol);
+ }
+ } else if (rol) {
+ tcg_out_bundle(s, mII, nop_m, shrp, rol);
+ } else {
+ tcg_out_bundle(s, mmI, nop_m, nop_m, shrp);
+ }
+ }
+}
+
static inline uint64_t tcg_opc_cmp_a(int qp, TCGCond cond, TCGArg arg1,
TCGArg arg2, int cmp4)
{
@@ -2063,6 +2169,13 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_rotr_i64(s, args[0], args[1], args[2], const_args[2]);
break;
+ case INDEX_op_deposit_i32:
+ tcg_out_deposit_i32(s, args[0], args[1], args[2], args[3], args[4]);
+ break;
+ case INDEX_op_deposit_i64:
+ tcg_out_deposit_i64(s, args[0], args[1], args[2], args[3], args[4]);
+ break;
+
case INDEX_op_ext8s_i32:
case INDEX_op_ext8s_i64:
tcg_out_ext(s, OPC_SXT1_I29, args[0], args[1]);
@@ -2192,6 +2305,7 @@ static const TCGTargetOpDef ia64_op_defs[] = {
{ INDEX_op_shr_i32, { "r", "rZ", "ri" } },
{ INDEX_op_rotl_i32, { "r", "rZ", "ri" } },
{ INDEX_op_rotr_i32, { "r", "rZ", "ri" } },
+ { INDEX_op_deposit_i32, { "r", "rZ", "rZ" } },
{ INDEX_op_ext8s_i32, { "r", "rZ"} },
{ INDEX_op_ext8u_i32, { "r", "rZ"} },
@@ -2238,6 +2352,7 @@ static const TCGTargetOpDef ia64_op_defs[] = {
{ INDEX_op_shr_i64, { "r", "rZ", "ri" } },
{ INDEX_op_rotl_i64, { "r", "rZ", "ri" } },
{ INDEX_op_rotr_i64, { "r", "rZ", "ri" } },
+ { INDEX_op_deposit_i64, { "r", "rZ", "rZ" } },
{ INDEX_op_ext8s_i64, { "r", "rZ"} },
{ INDEX_op_ext8u_i64, { "r", "rZ"} },
@@ -131,6 +131,8 @@ enum {
#define TCG_TARGET_HAS_orc_i64
#define TCG_TARGET_HAS_rot_i32
#define TCG_TARGET_HAS_rot_i64
+#define TCG_TARGET_HAS_deposit_i32
+#define TCG_TARGET_HAS_deposit_i64
/* optional instructions automatically implemented */
#undef TCG_TARGET_HAS_neg_i32 /* sub r1, r0, r3 */
Signed-off-by: Richard Henderson <rth@twiddle.net> --- tcg/ia64/tcg-target.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++++ tcg/ia64/tcg-target.h | 2 + 2 files changed, 117 insertions(+), 0 deletions(-)