Message ID | 4763ae5461ae14adbb6aca5c925fa0fe81f4f214.1305889001.git.batuzovk@ispras.ru |
---|---|
State | New |
Headers | show |
On 05/20/2011 05:39 AM, Kirill Batuzov wrote: > + static tcg_target_ulong vals[TCG_MAX_TEMPS]; > + static tcg_temp_state state[TCG_MAX_TEMPS]; Any particular reason to make these static? It's 4-6k on the host stack depending on sizeof tcg_target_ulong. Large, but not excessive. I think it's just confusing to have them static, myself. I think it might be clearer to have these two in a structure, rather than two separate arrays. That does waste a bit of memory in padding for 64-bit target, but I think it might clean up the code a bit. > nb_temps = s->nb_temps; > nb_globals = s->nb_globals; > + memset(state, 0, nb_temps * sizeof(tcg_temp_state)); Of course, instead of allocating static structures, you could alloca the memory in the appropriate size... > + case INDEX_op_mov_i32: > +#if TCG_TARGET_REG_BITS == 64 > + case INDEX_op_mov_i64: > +#endif > + if ((state[args[1]] == TCG_TEMP_COPY > + && vals[args[1]] == args[0]) > + || args[0] == args[1]) { > + args += 2; > + gen_opc_buf[op_index] = INDEX_op_nop; > + break; Here, for example, INDEX_op_nop2 would be more appropriate, obviating the need for the arg copy loop from patch 1. > + if (state[args[1]] != TCG_TEMP_CONST) { > + } else { > + /* Source argument is constant. Rewrite the operation and > + let movi case handle it. */ > + } FWIW, I think writing positive tests is clearer than writing negative tests. I.e. reverse the condition and the if/else bodies. > case INDEX_op_brcond_i64: > #endif > + memset(state, 0, nb_temps * sizeof(tcg_temp_state)); Why are you resetting at the branch, rather than at the label? Seems reasonable enough to handle the extended basic block when possible... r~
On 05/20/2011 08:22 PM, Richard Henderson wrote: > I think it might be clearer to have these two in a structure, rather > than two separate arrays. That does waste a bit of memory in padding > for 64-bit target, but I think it might clean up the code a bit. You can use the padding to implement a generation count. O(1) clearing of the array might help performance somewhat. At this point making the arrays static makes sense again, because it allows to bump the generation count at the beginning of tcg_constant_folding (if you put it on the stack you have to zero the state). It could also help to add a num_copies flag tracking whether a register is a copy of this one. If not, you can avoid processing the output registers in the default case. That would be something like struct { uint16_t state; uint16_t num_copies; uint32_t gen; tcg_target_ulong val; }; Paolo
On Fri, May 20, 2011 at 04:39:29PM +0400, Kirill Batuzov wrote: > Make tcg_constant_folding do copy and constant propagation. It is a > preparational work before actual constant folding. > > Signed-off-by: Kirill Batuzov <batuzovk@ispras.ru> > --- > tcg/optimize.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 files changed, 123 insertions(+), 0 deletions(-) > > diff --git a/tcg/optimize.c b/tcg/optimize.c > index cf31d18..a761c51 100644 > --- a/tcg/optimize.c > +++ b/tcg/optimize.c > @@ -31,22 +31,139 @@ > #include "qemu-common.h" > #include "tcg-op.h" > > +typedef enum { > + TCG_TEMP_UNDEF = 0, > + TCG_TEMP_CONST, > + TCG_TEMP_COPY, > + TCG_TEMP_ANY > +} tcg_temp_state; > + > +static int mov_to_movi(int op) > +{ > + switch (op) { > + case INDEX_op_mov_i32: return INDEX_op_movi_i32; > +#if TCG_TARGET_REG_BITS == 64 > + case INDEX_op_mov_i64: return INDEX_op_movi_i64; > +#endif > + default: > + fprintf(stderr, "Unrecognized operation %d in mov_to_movi.\n", op); > + tcg_abort(); > + } > +} > + > +/* Reset TEMP's state to TCG_TEMP_ANY. If TEMP was a representative of some > + class of equivalent temp's, a new representative should be chosen in this > + class. */ > +static void reset_temp(tcg_temp_state *state, tcg_target_ulong *vals, > + TCGArg temp, int nb_temps, int nb_globals) > +{ > + int i; > + TCGArg new_base; > + new_base = (TCGArg)-1; > + for (i = nb_globals; i < nb_temps; i++) { > + if (state[i] == TCG_TEMP_COPY && vals[i] == temp) { > + if (new_base == ((TCGArg)-1)) { > + new_base = (TCGArg)i; > + state[i] = TCG_TEMP_ANY; > + } else { > + vals[i] = new_base; > + } > + } > + } > + for (i = 0; i < nb_globals; i++) { > + if (state[i] == TCG_TEMP_COPY && vals[i] == temp) { > + if (new_base == ((TCGArg)-1)) { > + state[i] = TCG_TEMP_ANY; > + } else { > + vals[i] = new_base; > + } > + } > + } > + state[temp] = TCG_TEMP_ANY; > +} > + > +/* Propagate constants and copies, fold constant expressions. */ > static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, > TCGArg *args, TCGOpDef *tcg_op_defs) > { > int i, nb_ops, op_index, op, nb_temps, nb_globals; > const TCGOpDef *def; > TCGArg *gen_args; > + /* Array VALS has an element for each temp. > + If this temp holds a constant then its value is kept in VALS' element. > + If this temp is a copy of other ones then this equivalence class' > + representative is kept in VALS' element. > + If this temp is neither copy nor constant then corresponding VALS' > + element is unused. */ > + static tcg_target_ulong vals[TCG_MAX_TEMPS]; > + static tcg_temp_state state[TCG_MAX_TEMPS]; > > nb_temps = s->nb_temps; > nb_globals = s->nb_globals; > + memset(state, 0, nb_temps * sizeof(tcg_temp_state)); > > nb_ops = tcg_opc_ptr - gen_opc_buf; > gen_args = args; > for (op_index = 0; op_index < nb_ops; op_index++) { > op = gen_opc_buf[op_index]; > def = &tcg_op_defs[op]; > + /* Do copy propagation */ > + if (op != INDEX_op_call) { > + for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) { > + if (state[args[i]] == TCG_TEMP_COPY > + && !(def->args_ct[i].ct & TCG_CT_IALIAS) > + && (def->args_ct[i].ct & TCG_CT_REG)) { > + args[i] = vals[args[i]]; > + } > + } > + } > + > + /* Propagate constants through copy operations and do constant > + folding. Constants will be substituted to arguments by register > + allocator where needed and possible. Also detect copies. */ > switch (op) { > + case INDEX_op_mov_i32: > +#if TCG_TARGET_REG_BITS == 64 > + case INDEX_op_mov_i64: > +#endif > + if ((state[args[1]] == TCG_TEMP_COPY > + && vals[args[1]] == args[0]) > + || args[0] == args[1]) { > + args += 2; > + gen_opc_buf[op_index] = INDEX_op_nop; > + break; > + } > + if (state[args[1]] != TCG_TEMP_CONST) { > + reset_temp(state, vals, args[0], nb_temps, nb_globals); > + if (args[1] >= s->nb_globals) { > + state[args[0]] = TCG_TEMP_COPY; > + vals[args[0]] = args[1]; > + } > + gen_args[0] = args[0]; > + gen_args[1] = args[1]; > + gen_args += 2; > + args += 2; > + break; > + } else { > + /* Source argument is constant. Rewrite the operation and > + let movi case handle it. */ > + op = mov_to_movi(op); > + gen_opc_buf[op_index] = op; > + args[1] = vals[args[1]]; > + /* fallthrough */ > + } > + case INDEX_op_movi_i32: > +#if TCG_TARGET_REG_BITS == 64 > + case INDEX_op_movi_i64: > +#endif > + reset_temp(state, vals, args[0], nb_temps, nb_globals); > + state[args[0]] = TCG_TEMP_CONST; > + vals[args[0]] = args[1]; > + gen_args[0] = args[0]; > + gen_args[1] = args[1]; > + gen_args += 2; > + args += 2; > + break; > case INDEX_op_call: > case INDEX_op_jmp: > case INDEX_op_br: > @@ -55,6 +172,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, > #if TCG_TARGET_REG_BITS == 64 > case INDEX_op_brcond_i64: > #endif > + memset(state, 0, nb_temps * sizeof(tcg_temp_state)); > i = (op == INDEX_op_call) ? > (args[0] >> 16) + (args[0] & 0xffff) + 3 : > def->nb_args; > @@ -66,6 +184,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, > } > break; > default: > + /* Default case: we do know nothing about operation so no > + propagation is done. We only trash output args. */ > + for (i = 0; i < def->nb_oargs; i++) { > + reset_temp(state, vals, args[i], nb_temps, nb_globals); > + } > for (i = 0; i < def->nb_args; i++) { > gen_args[i] = args[i]; > } It's not possible to do any optimization across certain ops that have side effect or helper which access globals directly. For helpers this can be easily detected looking at the TCG_CALL_PURE and TCG_CALL_CONST flags. For ops, you should look at TCG_OPF_BB_END, TCG_OPF_CALL_CLOBBER and TCG_OPF_SIDE_EFFECTS flags.
diff --git a/tcg/optimize.c b/tcg/optimize.c index cf31d18..a761c51 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -31,22 +31,139 @@ #include "qemu-common.h" #include "tcg-op.h" +typedef enum { + TCG_TEMP_UNDEF = 0, + TCG_TEMP_CONST, + TCG_TEMP_COPY, + TCG_TEMP_ANY +} tcg_temp_state; + +static int mov_to_movi(int op) +{ + switch (op) { + case INDEX_op_mov_i32: return INDEX_op_movi_i32; +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_mov_i64: return INDEX_op_movi_i64; +#endif + default: + fprintf(stderr, "Unrecognized operation %d in mov_to_movi.\n", op); + tcg_abort(); + } +} + +/* Reset TEMP's state to TCG_TEMP_ANY. If TEMP was a representative of some + class of equivalent temp's, a new representative should be chosen in this + class. */ +static void reset_temp(tcg_temp_state *state, tcg_target_ulong *vals, + TCGArg temp, int nb_temps, int nb_globals) +{ + int i; + TCGArg new_base; + new_base = (TCGArg)-1; + for (i = nb_globals; i < nb_temps; i++) { + if (state[i] == TCG_TEMP_COPY && vals[i] == temp) { + if (new_base == ((TCGArg)-1)) { + new_base = (TCGArg)i; + state[i] = TCG_TEMP_ANY; + } else { + vals[i] = new_base; + } + } + } + for (i = 0; i < nb_globals; i++) { + if (state[i] == TCG_TEMP_COPY && vals[i] == temp) { + if (new_base == ((TCGArg)-1)) { + state[i] = TCG_TEMP_ANY; + } else { + vals[i] = new_base; + } + } + } + state[temp] = TCG_TEMP_ANY; +} + +/* Propagate constants and copies, fold constant expressions. */ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, TCGArg *args, TCGOpDef *tcg_op_defs) { int i, nb_ops, op_index, op, nb_temps, nb_globals; const TCGOpDef *def; TCGArg *gen_args; + /* Array VALS has an element for each temp. + If this temp holds a constant then its value is kept in VALS' element. + If this temp is a copy of other ones then this equivalence class' + representative is kept in VALS' element. + If this temp is neither copy nor constant then corresponding VALS' + element is unused. */ + static tcg_target_ulong vals[TCG_MAX_TEMPS]; + static tcg_temp_state state[TCG_MAX_TEMPS]; nb_temps = s->nb_temps; nb_globals = s->nb_globals; + memset(state, 0, nb_temps * sizeof(tcg_temp_state)); nb_ops = tcg_opc_ptr - gen_opc_buf; gen_args = args; for (op_index = 0; op_index < nb_ops; op_index++) { op = gen_opc_buf[op_index]; def = &tcg_op_defs[op]; + /* Do copy propagation */ + if (op != INDEX_op_call) { + for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) { + if (state[args[i]] == TCG_TEMP_COPY + && !(def->args_ct[i].ct & TCG_CT_IALIAS) + && (def->args_ct[i].ct & TCG_CT_REG)) { + args[i] = vals[args[i]]; + } + } + } + + /* Propagate constants through copy operations and do constant + folding. Constants will be substituted to arguments by register + allocator where needed and possible. Also detect copies. */ switch (op) { + case INDEX_op_mov_i32: +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_mov_i64: +#endif + if ((state[args[1]] == TCG_TEMP_COPY + && vals[args[1]] == args[0]) + || args[0] == args[1]) { + args += 2; + gen_opc_buf[op_index] = INDEX_op_nop; + break; + } + if (state[args[1]] != TCG_TEMP_CONST) { + reset_temp(state, vals, args[0], nb_temps, nb_globals); + if (args[1] >= s->nb_globals) { + state[args[0]] = TCG_TEMP_COPY; + vals[args[0]] = args[1]; + } + gen_args[0] = args[0]; + gen_args[1] = args[1]; + gen_args += 2; + args += 2; + break; + } else { + /* Source argument is constant. Rewrite the operation and + let movi case handle it. */ + op = mov_to_movi(op); + gen_opc_buf[op_index] = op; + args[1] = vals[args[1]]; + /* fallthrough */ + } + case INDEX_op_movi_i32: +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_movi_i64: +#endif + reset_temp(state, vals, args[0], nb_temps, nb_globals); + state[args[0]] = TCG_TEMP_CONST; + vals[args[0]] = args[1]; + gen_args[0] = args[0]; + gen_args[1] = args[1]; + gen_args += 2; + args += 2; + break; case INDEX_op_call: case INDEX_op_jmp: case INDEX_op_br: @@ -55,6 +172,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, #if TCG_TARGET_REG_BITS == 64 case INDEX_op_brcond_i64: #endif + memset(state, 0, nb_temps * sizeof(tcg_temp_state)); i = (op == INDEX_op_call) ? (args[0] >> 16) + (args[0] & 0xffff) + 3 : def->nb_args; @@ -66,6 +184,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, } break; default: + /* Default case: we do know nothing about operation so no + propagation is done. We only trash output args. */ + for (i = 0; i < def->nb_oargs; i++) { + reset_temp(state, vals, args[i], nb_temps, nb_globals); + } for (i = 0; i < def->nb_args; i++) { gen_args[i] = args[i]; }
Make tcg_constant_folding do copy and constant propagation. It is a preparational work before actual constant folding. Signed-off-by: Kirill Batuzov <batuzovk@ispras.ru> --- tcg/optimize.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 123 insertions(+), 0 deletions(-)