@@ -32,63 +32,124 @@
#include "tcg-op.h"
typedef enum {
- TCG_TEMP_UNDEF = 0,
+ TCG_TEMP_VAR = 0,
TCG_TEMP_CONST,
TCG_TEMP_COPY,
- TCG_TEMP_ANY
} tcg_temp_state;
struct tcg_temp_info {
tcg_temp_state state;
- uint16_t num_copies;
+ uint16_t prev_copy;
+ uint16_t next_copy;
tcg_target_ulong val;
};
-/* Reset TEMP's state to TCG_TEMP_ANY. If TEMP was a representative of some
- class of equivalent temp's, a new representative should be chosen in this
- class. */
-static void reset_temp(struct tcg_temp_info *temps, TCGArg temp, int nb_temps,
- int nb_globals)
+/* Array TEMPS has an element for each temp.
+ If this temp holds a constant then its value is kept in .VAL.
+ If this temp is a copy of other ones then this equivalence class'
+ representative is kept in .VAL. */
+struct tcg_temp_info temps[TCG_MAX_TEMPS];
+
+/* Avoid some of the rampant if-deffery related to 32 vs 64-bit operations. */
+#if TCG_TARGET_REG_BITS == 64
+#define CASE_OP_32_64(x) \
+ glue(glue(case INDEX_op_, x), _i32): \
+ glue(glue(case INDEX_op_, x), _i64)
+#else
+#define CASE_OP_32_64(x) \
+ glue(glue(case INDEX_op_, x), _i32)
+#endif
+
+
+/* Reset all temporaries to TCG_TEMP_VAR. */
+static void reset_all_temps(int nb_temps)
{
int i;
- TCGArg new_base;
- if (temps[temp].state == TCG_TEMP_COPY) {
- temps[temps[temp].val].num_copies--;
+ for (i = 0; i < nb_temps; ++i) {
+ temps[i].state = TCG_TEMP_VAR;
+ temps[i].prev_copy = i;
+ temps[i].next_copy = i;
}
- if (temps[temp].num_copies > 0) {
- new_base = (TCGArg)-1;
- for (i = nb_globals; i < nb_temps; i++) {
- if (temps[i].state == TCG_TEMP_COPY && temps[i].val == temp) {
- if (new_base == ((TCGArg)-1)) {
- new_base = (TCGArg)i;
- temps[i].state = TCG_TEMP_ANY;
- temps[i].num_copies = 0;
- } else {
- temps[i].val = new_base;
- temps[new_base].num_copies++;
- }
+}
+
+/* Reset T's state to TCG_TEMP_VAR. If T was a representative of some
+ class of equivalent temp's, a new representative should be chosen
+ in this class. */
+static void reset_temp(int t)
+{
+ int prev = temps[t].prev_copy;
+ int next = temps[t].next_copy;
+
+ switch (temps[t].state) {
+ case TCG_TEMP_VAR:
+ /* If next (and prev) equals temp, that means there are no copies.
+ Otherwise, promote NEXT to be the new representative of the
+ equivalence class. */
+ if (next != t) {
+ int i;
+ temps[next].state = TCG_TEMP_VAR;
+ for (i = temps[next].next_copy; i != t; i = temps[i].next_copy) {
+ temps[i].val = next;
}
}
- for (i = 0; i < nb_globals; i++) {
- if (temps[i].state == TCG_TEMP_COPY && temps[i].val == temp) {
- if (new_base == ((TCGArg)-1)) {
- temps[i].state = TCG_TEMP_ANY;
- temps[i].num_copies = 0;
- } else {
- temps[i].val = new_base;
- temps[new_base].num_copies++;
- }
- }
+ /* FALLTHRU */
+
+ case TCG_TEMP_COPY:
+ /* Unlink T from the equivalence class. */
+ temps[prev].next_copy = next;
+ temps[next].prev_copy = prev;
+ break;
+
+ case TCG_TEMP_CONST:
+ break;
+
+ default:
+ tcg_abort();
+ }
+
+ temps[t].state = TCG_TEMP_VAR;
+ temps[t].prev_copy = t;
+ temps[t].next_copy = t;
+}
+
+static void reset_temps_bb(TCGContext *s)
+{
+ int i, nb_temps = s->nb_temps;
+
+ for (i = s->nb_globals; i < nb_temps; ++i) {
+ if (!s->temps[i].temp_local) {
+ reset_temp(i);
}
}
- temps[temp].state = TCG_TEMP_ANY;
- temps[temp].num_copies = 0;
}
-static int op_bits(int op)
+static void make_copy(int dest, int src, int nb_globals)
+{
+ int prev, next, head = src;
+
+ if (temps[head].state == TCG_TEMP_COPY) {
+ head = temps[head].val;
+ }
+
+ temps[dest].state = TCG_TEMP_COPY;
+ temps[dest].val = head;
+
+ /* Insert at the tail of the list. This will tend to choose
+ the next-oldest copy when HEAD gets flushed. */
+ prev = temps[head].prev_copy;
+ next = head;
+
+ temps[prev].next_copy = dest;
+ temps[dest].prev_copy = prev;
+ temps[dest].next_copy = next;
+ temps[next].prev_copy = dest;
+}
+
+static int op_bits(TCGOpcode op)
{
switch (op) {
case INDEX_op_mov_i32:
+ case INDEX_op_movi_i32:
case INDEX_op_add_i32:
case INDEX_op_sub_i32:
case INDEX_op_mul_i32:
@@ -101,6 +162,7 @@ static int op_bits(int op)
case INDEX_op_rotl_i32:
case INDEX_op_rotr_i32:
case INDEX_op_not_i32:
+ case INDEX_op_neg_i32:
case INDEX_op_ext8s_i32:
case INDEX_op_ext16s_i32:
case INDEX_op_ext8u_i32:
@@ -108,6 +170,7 @@ static int op_bits(int op)
return 32;
#if TCG_TARGET_REG_BITS == 64
case INDEX_op_mov_i64:
+ case INDEX_op_movi_i64:
case INDEX_op_add_i64:
case INDEX_op_sub_i64:
case INDEX_op_mul_i64:
@@ -120,6 +183,7 @@ static int op_bits(int op)
case INDEX_op_rotl_i64:
case INDEX_op_rotr_i64:
case INDEX_op_not_i64:
+ case INDEX_op_neg_i64:
case INDEX_op_ext8s_i64:
case INDEX_op_ext16s_i64:
case INDEX_op_ext32s_i64:
@@ -134,163 +198,105 @@ static int op_bits(int op)
}
}
-static int op_to_movi(int op)
+static int op_to_movi(TCGOpcode op)
{
- if (op_bits(op) == 32) {
+ switch (op_bits(op)) {
+ case 32:
return INDEX_op_movi_i32;
- }
#if TCG_TARGET_REG_BITS == 64
- if (op_bits(op) == 64) {
+ case 64:
return INDEX_op_movi_i64;
- }
#endif
+ }
tcg_abort();
}
-static int op_to_mov(int op)
+static int op_to_mov(TCGOpcode op)
{
- if (op_bits(op) == 32) {
+ switch (op_bits(op)) {
+ case 32:
return INDEX_op_mov_i32;
- }
#if TCG_TARGET_REG_BITS == 64
- if (op_bits(op) == 64) {
+ case 64:
return INDEX_op_mov_i64;
- }
#endif
+ }
tcg_abort();
}
-static TCGArg do_constant_folding_2(int op, TCGArg x, TCGArg y)
+static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
{
switch (op) {
- case INDEX_op_add_i32:
-#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_add_i64:
-#endif
+ CASE_OP_32_64(add):
return x + y;
-
- case INDEX_op_sub_i32:
-#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_sub_i64:
-#endif
+ CASE_OP_32_64(sub):
return x - y;
-
- case INDEX_op_mul_i32:
-#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_mul_i64:
-#endif
+ CASE_OP_32_64(mul):
return x * y;
-
- case INDEX_op_and_i32:
-#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_and_i64:
-#endif
+ CASE_OP_32_64(and):
return x & y;
-
- case INDEX_op_or_i32:
-#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_or_i64:
-#endif
+ CASE_OP_32_64(or):
return x | y;
-
- case INDEX_op_xor_i32:
-#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_xor_i64:
-#endif
+ CASE_OP_32_64(xor):
return x ^ y;
- case INDEX_op_shl_i32:
-#if TCG_TARGET_REG_BITS == 64
- y &= 0xffffffff;
- case INDEX_op_shl_i64:
-#endif
- return x << y;
+ CASE_OP_32_64(neg):
+ return -x;
+ CASE_OP_32_64(not):
+ return ~x;
+
+ CASE_OP_32_64(shl):
+ return x << (y & (op_bits(op) - 1));
case INDEX_op_shr_i32:
+ return (uint32_t)x >> (y & 31);
#if TCG_TARGET_REG_BITS == 64
- x &= 0xffffffff;
- y &= 0xffffffff;
case INDEX_op_shr_i64:
+ return (uint64_t)x >> (y & 63);
#endif
- /* Assuming TCGArg to be unsigned */
- return x >> y;
case INDEX_op_sar_i32:
-#if TCG_TARGET_REG_BITS == 64
- x &= 0xffffffff;
- y &= 0xffffffff;
-#endif
- return (int32_t)x >> (int32_t)y;
-
+ return (int32_t)x >> (y & 31);
#if TCG_TARGET_REG_BITS == 64
case INDEX_op_sar_i64:
- return (int64_t)x >> (int64_t)y;
+ return (int64_t)x >> (y & 63);
#endif
case INDEX_op_rotr_i32:
-#if TCG_TARGET_REG_BITS == 64
- x &= 0xffffffff;
- y &= 0xffffffff;
-#endif
- x = (x << (32 - y)) | (x >> y);
+ y &= 31;
+ x = ((uint32_t)x << (32 - y)) | ((uint32_t)x >> y);
return x;
-
#if TCG_TARGET_REG_BITS == 64
case INDEX_op_rotr_i64:
- x = (x << (64 - y)) | (x >> y);
+ y &= 63;
+ x = ((uint64_t)x << (64 - y)) | ((uint64_t)x >> y);
return x;
#endif
case INDEX_op_rotl_i32:
-#if TCG_TARGET_REG_BITS == 64
- x &= 0xffffffff;
- y &= 0xffffffff;
-#endif
- x = (x << y) | (x >> (32 - y));
+ y &= 31;
+ x = ((uint32_t)x << y) | ((uint32_t)x >> (32 - y));
return x;
-
#if TCG_TARGET_REG_BITS == 64
case INDEX_op_rotl_i64:
- x = (x << y) | (x >> (64 - y));
+ y &= 63;
+ x = ((uint64_t)x << y) | ((uint64_t)x >> (64 - y));
return x;
#endif
- case INDEX_op_not_i32:
-#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_not_i64:
-#endif
- return ~x;
-
- case INDEX_op_ext8s_i32:
- return (int32_t)(int8_t)x;
-
- case INDEX_op_ext16s_i32:
- return (int32_t)(int16_t)x;
-
- case INDEX_op_ext8u_i32:
- return (uint32_t)(uint8_t)x;
-
- case INDEX_op_ext16u_i32:
- return (uint32_t)(uint16_t)x;
-
+ CASE_OP_32_64(ext8s):
+ return (int8_t)x;
+ CASE_OP_32_64(ext8u):
+ return (uint8_t)x;
+ CASE_OP_32_64(ext16s):
+ return (int16_t)x;
+ CASE_OP_32_64(ext16u):
+ return (uint16_t)x;
#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_ext8s_i64:
- return (int64_t)(int8_t)x;
-
- case INDEX_op_ext16s_i64:
- return (int64_t)(int16_t)x;
-
case INDEX_op_ext32s_i64:
- return (int64_t)(int32_t)x;
-
- case INDEX_op_ext8u_i64:
- return (uint64_t)(uint8_t)x;
-
- case INDEX_op_ext16u_i64:
- return (uint64_t)(uint16_t)x;
-
+ return (int32_t)x;
case INDEX_op_ext32u_i64:
- return (uint64_t)(uint32_t)x;
+ return (uint32_t)x;
#endif
default:
@@ -300,7 +306,7 @@ static TCGArg do_constant_folding_2(int op, TCGArg x, TCGArg y)
}
}
-static TCGArg do_constant_folding(int op, TCGArg x, TCGArg y)
+static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
{
TCGArg res = do_constant_folding_2(op, x, y);
#if TCG_TARGET_REG_BITS == 64
@@ -315,310 +321,320 @@ static TCGArg do_constant_folding(int op, TCGArg x, TCGArg y)
static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
TCGArg *args, TCGOpDef *tcg_op_defs)
{
- int i, nb_ops, op_index, op, nb_temps, nb_globals;
- const TCGOpDef *def;
+ int i, nb_ops, op_index, nb_temps, nb_globals;
TCGArg *gen_args;
- TCGArg tmp;
- /* Array VALS has an element for each temp.
- If this temp holds a constant then its value is kept in VALS' element.
- If this temp is a copy of other ones then this equivalence class'
- representative is kept in VALS' element.
- If this temp is neither copy nor constant then corresponding VALS'
- element is unused. */
- struct tcg_temp_info temps[TCG_MAX_TEMPS];
nb_temps = s->nb_temps;
nb_globals = s->nb_globals;
- memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+ reset_all_temps(nb_temps);
nb_ops = tcg_opc_ptr - gen_opc_buf;
gen_args = args;
for (op_index = 0; op_index < nb_ops; op_index++) {
- op = gen_opc_buf[op_index];
- def = &tcg_op_defs[op];
- /* Do copy propagation */
- if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS))) {
- assert(op != INDEX_op_call);
- for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
- if (temps[args[i]].state == TCG_TEMP_COPY
- && !(def->args_ct[i].ct & TCG_CT_IALIAS)
- && (def->args_ct[i].ct & TCG_CT_REG)) {
- args[i] = temps[args[i]].val;
+ TCGOpcode op = gen_opc_buf[op_index];
+ const TCGOpDef *def = &tcg_op_defs[op];
+ TCGArg arg0, arg1, arg2, tmp;
+
+ /* The format of CALL operations is special. */
+ if (op == INDEX_op_call) {
+ int n_in, n_out, flags;
+
+ n_in = *gen_args++ = *args++; /* in/out argument */
+ n_out = n_in >> 16;
+ n_in &= 0xffff;
+
+ /* Both pure and const functions do not modify globals. */
+ flags = args[n_out + n_in];
+ if ((flags & (TCG_CALL_PURE | TCG_CALL_CONST)) == 0) {
+ for (i = 0; i < nb_globals; i++) {
+ reset_temp(i);
}
}
- }
- /* Simplify expression if possible. */
- switch (op) {
- case INDEX_op_add_i32:
-#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_add_i64:
-#endif
- if (temps[args[1]].state == TCG_TEMP_CONST) {
- /* 0 + x == x + 0 */
- tmp = args[1];
- args[1] = args[2];
- args[2] = tmp;
+ /* All outputs are now variable. */
+ for (i = 0; i < n_out; i++) {
+ tmp = *args++;
+ *gen_args++ = tmp;
+ reset_temp(tmp);
}
- /* Fallthrough */
- case INDEX_op_sub_i32:
- case INDEX_op_shl_i32:
- case INDEX_op_shr_i32:
- case INDEX_op_sar_i32:
- case INDEX_op_rotl_i32:
- case INDEX_op_rotr_i32:
-#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_sub_i64:
- case INDEX_op_shl_i64:
- case INDEX_op_shr_i64:
- case INDEX_op_sar_i64:
- case INDEX_op_rotl_i64:
- case INDEX_op_rotr_i64:
-#endif
- if (temps[args[1]].state == TCG_TEMP_CONST) {
- /* Proceed with possible constant folding. */
- break;
- }
- if (temps[args[2]].state == TCG_TEMP_CONST
- && temps[args[2]].val == 0) {
- if ((temps[args[0]].state == TCG_TEMP_COPY
- && temps[args[0]].val == args[1])
- || args[0] == args[1]) {
- args += 3;
- gen_opc_buf[op_index] = INDEX_op_nop;
- } else {
- reset_temp(temps, args[0], nb_temps, nb_globals);
- if (args[1] >= s->nb_globals) {
- temps[args[0]].state = TCG_TEMP_COPY;
- temps[args[0]].val = args[1];
- temps[args[1]].num_copies++;
- }
- gen_opc_buf[op_index] = op_to_mov(op);
- gen_args[0] = args[0];
- gen_args[1] = args[1];
- gen_args += 2;
- args += 3;
+
+ /* Copy propagate into input arguments. */
+ for (i = 0; i < n_in; i++) {
+ tmp = *args++;
+ if (temps[tmp].state == TCG_TEMP_COPY) {
+ tmp = temps[tmp].val;
}
- continue;
+ *gen_args++ = tmp;
}
- break;
- case INDEX_op_mul_i32:
-#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_mul_i64:
-#endif
- if ((temps[args[1]].state == TCG_TEMP_CONST
- && temps[args[1]].val == 0)
- || (temps[args[2]].state == TCG_TEMP_CONST
- && temps[args[2]].val == 0)) {
- reset_temp(temps, args[0], nb_temps, nb_globals);
- temps[args[0]].state = TCG_TEMP_CONST;
- temps[args[0]].val = 0;
- assert(temps[args[0]].num_copies == 0);
- gen_opc_buf[op_index] = op_to_movi(op);
- gen_args[0] = args[0];
- gen_args[1] = 0;
- args += 3;
- gen_args += 2;
- continue;
+
+ for (i = def->nb_cargs; i > 0; --i) {
+ *gen_args++ = *args++;
}
- break;
- case INDEX_op_or_i32:
- case INDEX_op_and_i32:
-#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_and_i64:
- case INDEX_op_or_i64:
-#endif
- if (args[1] == args[2]) {
- if (args[1] == args[0]) {
- args += 3;
- gen_opc_buf[op_index] = INDEX_op_nop;
- } else {
- reset_temp(temps, args[0], nb_temps, nb_globals);
- if (args[1] >= s->nb_globals) {
- temps[args[0]].state = TCG_TEMP_COPY;
- temps[args[0]].val = args[1];
- assert(temps[args[0]].num_copies == 0);
- }
- gen_opc_buf[op_index] = op_to_mov(op);
- gen_args[0] = args[0];
- gen_args[1] = args[1];
- gen_args += 2;
- args += 3;
- }
- continue;
+ continue;
+ }
+
+ /* Do copy propagation. */
+ for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
+ tmp = args[i];
+ if (temps[tmp].state == TCG_TEMP_COPY
+ && (def->args_ct[i].ct & TCG_CT_REG)) {
+ args[i] = temps[tmp].val;
}
- break;
}
- /* Propagate constants through copy operations and do constant
- folding. Constants will be substituted to arguments by register
- allocator where needed and possible. Also detect copies. */
+ /* Most everything below operates on unary or binary ops. We
+ can encourage better code by pulling the arguments into
+ registers now. At the same time, filter out ops that we
+ don't handle at all. */
+ arg2 = 0;
switch (op) {
- case INDEX_op_mov_i32:
+ CASE_OP_32_64(add):
+ CASE_OP_32_64(sub):
+ CASE_OP_32_64(mul):
+ CASE_OP_32_64(and):
+ CASE_OP_32_64(or):
+ CASE_OP_32_64(xor):
+ CASE_OP_32_64(shl):
+ CASE_OP_32_64(shr):
+ CASE_OP_32_64(sar):
+ CASE_OP_32_64(rotl):
+ CASE_OP_32_64(rotr):
+ arg2 = args[2];
+ /* FALLTHRU */
+
+ CASE_OP_32_64(mov):
+ CASE_OP_32_64(movi):
+ CASE_OP_32_64(not):
+ CASE_OP_32_64(neg):
+ CASE_OP_32_64(ext8s):
+ CASE_OP_32_64(ext8u):
+ CASE_OP_32_64(ext16s):
+ CASE_OP_32_64(ext16u):
#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_mov_i64:
+ case INDEX_op_ext32s_i64:
+ case INDEX_op_ext32u_i64:
#endif
- if ((temps[args[1]].state == TCG_TEMP_COPY
- && temps[args[1]].val == args[0])
- || args[0] == args[1]) {
- args += 2;
- gen_opc_buf[op_index] = INDEX_op_nop;
- break;
+ arg0 = args[0];
+ arg1 = args[1];
+ break;
+
+ case INDEX_op_set_label:
+ /* Flush info at the beginning of an extended basic block. */
+ reset_all_temps(nb_temps);
+ *gen_args++ = *args++;
+ continue;
+
+ default:
+ /* At the end of a basic block (but not extended bb)
+ we must flush all bb temporaries. */
+ if (def->flags & TCG_OPF_BB_END) {
+ reset_temps_bb(s);
}
- if (temps[args[1]].state != TCG_TEMP_CONST) {
- reset_temp(temps, args[0], nb_temps, nb_globals);
- if (args[1] >= s->nb_globals) {
- temps[args[0]].state = TCG_TEMP_COPY;
- temps[args[0]].val = args[1];
- temps[args[1]].num_copies++;
- }
- gen_args[0] = args[0];
- gen_args[1] = args[1];
- gen_args += 2;
- args += 2;
- break;
+
+ /* An unhandled opcode. Outputs are variable. */
+ for (i = 0; i < def->nb_oargs; i++) {
+ reset_temp(args[i]);
+ }
+ for (i = def->nb_args; i > 0; --i) {
+ *gen_args++ = *args++;
+ }
+ continue;
+ }
+
+ /* Do full constant folding. */
+ switch (op) {
+ CASE_OP_32_64(add):
+ CASE_OP_32_64(sub):
+ CASE_OP_32_64(mul):
+ CASE_OP_32_64(and):
+ CASE_OP_32_64(or):
+ CASE_OP_32_64(xor):
+ CASE_OP_32_64(shl):
+ CASE_OP_32_64(shr):
+ CASE_OP_32_64(sar):
+ CASE_OP_32_64(rotl):
+ CASE_OP_32_64(rotr):
+ if (temps[arg1].state == TCG_TEMP_CONST
+ && temps[arg2].state == TCG_TEMP_CONST) {
+ arg1 = do_constant_folding(op, temps[arg1].val,
+ temps[arg2].val);
+ gen_opc_buf[op_index] = op_to_movi(op);
+ goto do_const_prop;
}
- /* Source argument is constant. Rewrite the operation and
- let movi case handle it. */
- op = op_to_movi(op);
- gen_opc_buf[op_index] = op;
- args[1] = temps[args[1]].val;
- /* fallthrough */
- case INDEX_op_movi_i32:
-#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_movi_i64:
-#endif
- reset_temp(temps, args[0], nb_temps, nb_globals);
- temps[args[0]].state = TCG_TEMP_CONST;
- temps[args[0]].val = args[1];
- assert(temps[args[0]].num_copies == 0);
- gen_args[0] = args[0];
- gen_args[1] = args[1];
- gen_args += 2;
- args += 2;
break;
- case INDEX_op_not_i32:
- case INDEX_op_ext8s_i32:
- case INDEX_op_ext16s_i32:
- case INDEX_op_ext8u_i32:
- case INDEX_op_ext16u_i32:
+
+ CASE_OP_32_64(not):
+ CASE_OP_32_64(neg):
+ CASE_OP_32_64(ext8s):
+ CASE_OP_32_64(ext8u):
+ CASE_OP_32_64(ext16s):
+ CASE_OP_32_64(ext16u):
#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_not_i64:
- case INDEX_op_ext8s_i64:
- case INDEX_op_ext16s_i64:
case INDEX_op_ext32s_i64:
- case INDEX_op_ext8u_i64:
- case INDEX_op_ext16u_i64:
case INDEX_op_ext32u_i64:
#endif
- if (temps[args[1]].state == TCG_TEMP_CONST) {
+ if (temps[arg1].state == TCG_TEMP_CONST) {
+ arg1 = do_constant_folding(op, temps[arg1].val, 0);
gen_opc_buf[op_index] = op_to_movi(op);
- gen_args[0] = args[0];
- gen_args[1] = do_constant_folding(op, temps[args[1]].val, 0);
- reset_temp(temps, gen_args[0], nb_temps, nb_globals);
- temps[gen_args[0]].state = TCG_TEMP_CONST;
- temps[gen_args[0]].val = gen_args[1];
- assert(temps[gen_args[0]].num_copies == 0);
- gen_args += 2;
- args += 2;
- break;
- } else {
- reset_temp(temps, args[0], nb_temps, nb_globals);
- gen_args[0] = args[0];
- gen_args[1] = args[1];
- gen_args += 2;
- args += 2;
- break;
+ goto do_const_prop;
}
- case INDEX_op_or_i32:
- case INDEX_op_and_i32:
- case INDEX_op_xor_i32:
- case INDEX_op_add_i32:
- case INDEX_op_sub_i32:
- case INDEX_op_mul_i32:
- case INDEX_op_shl_i32:
- case INDEX_op_shr_i32:
- case INDEX_op_sar_i32:
- case INDEX_op_rotl_i32:
- case INDEX_op_rotr_i32:
-#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_and_i64:
- case INDEX_op_or_i64:
- case INDEX_op_xor_i64:
- case INDEX_op_add_i64:
- case INDEX_op_sub_i64:
- case INDEX_op_mul_i64:
- case INDEX_op_shl_i64:
- case INDEX_op_shr_i64:
- case INDEX_op_sar_i64:
- case INDEX_op_rotl_i64:
- case INDEX_op_rotr_i64:
-#endif
- if (temps[args[1]].state == TCG_TEMP_CONST
- && temps[args[2]].state == TCG_TEMP_CONST) {
- gen_opc_buf[op_index] = op_to_movi(op);
- gen_args[0] = args[0];
- gen_args[1] =
- do_constant_folding(op, temps[args[1]].val,
- temps[args[2]].val);
- reset_temp(temps, gen_args[0], nb_temps, nb_globals);
- temps[gen_args[0]].state = TCG_TEMP_CONST;
- temps[gen_args[0]].val = gen_args[1];
- assert(temps[gen_args[0]].num_copies == 0);
- gen_args += 2;
- args += 3;
- break;
- } else {
- reset_temp(temps, args[0], nb_temps, nb_globals);
- gen_args[0] = args[0];
- gen_args[1] = args[1];
- gen_args[2] = args[2];
- gen_args += 3;
- args += 3;
- break;
+ break;
+
+ default:
+ break;
+ }
+
+ /* For commutative operations, put the constant second.
+ Also swap operands to help matching operands. */
+ switch (op) {
+ CASE_OP_32_64(add):
+ CASE_OP_32_64(mul):
+ CASE_OP_32_64(and):
+ CASE_OP_32_64(or):
+ CASE_OP_32_64(xor):
+ if (temps[arg1].state == TCG_TEMP_CONST || arg0 == arg2) {
+ tmp = arg1;
+ arg1 = arg2;
+ arg2 = tmp;
}
- case INDEX_op_call:
- for (i = 0; i < nb_globals; i++) {
- reset_temp(temps, i, nb_temps, nb_globals);
+ break;
+
+ default:
+ break;
+ }
+
+ /* Simplify expressions involving one constant. */
+ switch (op) {
+ CASE_OP_32_64(add):
+ CASE_OP_32_64(sub):
+ CASE_OP_32_64(shl):
+ CASE_OP_32_64(shr):
+ CASE_OP_32_64(sar):
+ CASE_OP_32_64(rotl):
+ CASE_OP_32_64(rotr):
+ CASE_OP_32_64(or):
+ CASE_OP_32_64(xor):
+ if (temps[arg2].state == TCG_TEMP_CONST && temps[arg2].val == 0) {
+ goto do_copy_const_prop;
}
- for (i = 0; i < (args[0] >> 16); i++) {
- reset_temp(temps, args[i + 1], nb_temps, nb_globals);
+ break;
+
+ CASE_OP_32_64(mul):
+ CASE_OP_32_64(and):
+ if (temps[arg2].state == TCG_TEMP_CONST && temps[arg2].val == 0) {
+ arg1 = 0;
+ goto do_const_prop;
}
- i = (args[0] >> 16) + (args[0] & 0xffff) + 3;
- while (i) {
- *gen_args = *args;
- args++;
- gen_args++;
- i--;
+ break;
+
+ default:
+ break;
+ }
+
+ /* Simplify expressions involving no constants. */
+ switch (op) {
+ CASE_OP_32_64(and):
+ CASE_OP_32_64(or):
+ if (arg1 == arg2) {
+ goto do_copy_const_prop;
}
break;
- case INDEX_op_set_label:
- case INDEX_op_jmp:
- case INDEX_op_br:
- case INDEX_op_brcond_i32:
-#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_brcond_i64:
-#endif
- memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
- for (i = 0; i < def->nb_args; i++) {
- *gen_args = *args;
- args++;
- gen_args++;
+
+ CASE_OP_32_64(xor):
+ if (arg1 == arg2) {
+ arg1 = 0;
+ goto do_const_prop;
}
break;
+
default:
- /* Default case: we do know nothing about operation so no
- propagation is done. We only trash output args. */
- for (i = 0; i < def->nb_oargs; i++) {
- reset_temp(temps, args[i], nb_temps, nb_globals);
+ break;
+ }
+
+ /* Finish constant and copy propagation. */
+ switch (op) {
+ CASE_OP_32_64(mov):
+ do_copy_const_prop:
+ /* Notice copies back to self. */
+ if (arg0 == arg1) {
+ do_nop:
+ gen_opc_buf[op_index] = INDEX_op_nop;
+ break;
+ }
+
+ if (temps[arg1].state == TCG_TEMP_CONST) {
+ arg1 = temps[arg1].val;
+ goto do_const_prop;
}
- for (i = 0; i < def->nb_args; i++) {
- gen_args[i] = args[i];
+
+ /* Notice copies through another member of the equivalence. */
+ for (i = temps[arg1].next_copy; i != arg1; i = temps[i].next_copy) {
+ if (i == arg0) {
+ goto do_nop;
+ }
}
- args += def->nb_args;
- gen_args += def->nb_args;
+
+ /* Set up the equivalence class for copy propagation. */
+ reset_temp(arg0);
+ make_copy(arg0, arg1, nb_globals);
+
+ /* "Copy" the move down. Note that we get here by any path
+ that simplifies to a move, so OP may not itself be a move. */
+ gen_opc_buf[op_index] = op_to_mov(op);
+ gen_args[0] = arg0;
+ gen_args[1] = arg1;
+ gen_args += 2;
break;
+
+ CASE_OP_32_64(movi):
+ do_const_prop:
+ reset_temp(arg0);
+ temps[arg0].state = TCG_TEMP_CONST;
+ temps[arg0].val = arg1;
+
+ gen_opc_buf[op_index] = op_to_movi(op);
+ gen_args[0] = arg0;
+ gen_args[1] = arg1;
+ gen_args += 2;
+ break;
+
+ CASE_OP_32_64(add):
+ CASE_OP_32_64(sub):
+ CASE_OP_32_64(mul):
+ CASE_OP_32_64(and):
+ CASE_OP_32_64(or):
+ CASE_OP_32_64(xor):
+ CASE_OP_32_64(shl):
+ CASE_OP_32_64(shr):
+ CASE_OP_32_64(sar):
+ CASE_OP_32_64(rotl):
+ CASE_OP_32_64(rotr):
+ /* Given that the opcode is already as simple as we can make it,
+ and the result is not a simple copy or constant, the output
+ must now be variable. */
+ reset_temp(arg0);
+ gen_args[0] = arg0;
+ gen_args[1] = arg1;
+ gen_args[2] = arg2;
+ gen_args += 3;
+ break;
+
+ CASE_OP_32_64(not):
+ CASE_OP_32_64(neg):
+ CASE_OP_32_64(ext8s):
+ CASE_OP_32_64(ext8u):
+ CASE_OP_32_64(ext16s):
+ CASE_OP_32_64(ext16u):
+ reset_temp(arg0);
+ gen_args[0] = arg0;
+ gen_args[1] = arg1;
+ gen_args += 2;
+ break;
+
+ default:
+ tcg_abort();
}
+ args += def->nb_args;
}
return gen_args;