Message ID | 1365479139-18737-1-git-send-email-lig.fnst@cn.fujitsu.com |
---|---|
State | New |
Headers | show |
Hi liguang, Just to be curious, how much performance improvement this patch can get? Regards, chenwj On Tue, Apr 09, 2013 at 11:45:39AM +0800, liguang wrote: > Signed-off-by: liguang <lig.fnst@cn.fujitsu.com> > --- > target-arm/translate.c | 17 ++++++++--------- > target-i386/translate.c | 17 ++++++++--------- > target-mips/translate.c | 16 ++++++++-------- > 3 files changed, 24 insertions(+), 26 deletions(-) > > diff --git a/target-arm/translate.c b/target-arm/translate.c > index 35a21be..c0c080d 100644 > --- a/target-arm/translate.c > +++ b/target-arm/translate.c > @@ -9806,11 +9806,10 @@ static inline void gen_intermediate_code_internal(CPUARMState *env, > cpu_M0 = tcg_temp_new_i64(); > next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE; > lj = -1; > - num_insns = 0; > max_insns = tb->cflags & CF_COUNT_MASK; > - if (max_insns == 0) > + if (max_insns == 0) { > max_insns = CF_COUNT_MASK; > - > + } > gen_tb_start(); > > tcg_clear_temp_count(); > @@ -9889,9 +9888,9 @@ static inline void gen_intermediate_code_internal(CPUARMState *env, > if (search_pc) { > j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf; > if (lj < j) { > - lj++; > - while (lj < j) > - tcg_ctx.gen_opc_instr_start[lj++] = 0; > + while (++lj < j) { > + tcg_ctx.gen_opc_instr_start[lj] = 0; > + } > } > tcg_ctx.gen_opc_pc[lj] = dc->pc; > gen_opc_condexec_bits[lj] = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1); > @@ -10028,9 +10027,9 @@ done_generating: > #endif > if (search_pc) { > j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf; > - lj++; > - while (lj <= j) > - tcg_ctx.gen_opc_instr_start[lj++] = 0; > + while (++lj <= j) { > + tcg_ctx.gen_opc_instr_start[lj] = 0; > + } > } else { > tb->size = dc->pc - pc_start; > tb->icount = num_insns; > diff --git a/target-i386/translate.c b/target-i386/translate.c > index 7596a90..9c5e1a3 100644 > --- a/target-i386/translate.c > +++ b/target-i386/translate.c > @@ -8319,11 +8319,10 @@ static inline void gen_intermediate_code_internal(CPUX86State *env, > dc->is_jmp = DISAS_NEXT; > pc_ptr = pc_start; > lj = -1; > - num_insns = 0; > max_insns = tb->cflags & CF_COUNT_MASK; > - if (max_insns == 0) > + if (max_insns == 0) { > max_insns = CF_COUNT_MASK; > - > + } > gen_tb_start(); > for(;;) { > if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) { > @@ -8338,9 +8337,9 @@ static inline void gen_intermediate_code_internal(CPUX86State *env, > if (search_pc) { > j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf; > if (lj < j) { > - lj++; > - while (lj < j) > - tcg_ctx.gen_opc_instr_start[lj++] = 0; > + while (++lj < j) { > + tcg_ctx.gen_opc_instr_start[lj] = 0; > + } > } > tcg_ctx.gen_opc_pc[lj] = pc_ptr; > gen_opc_cc_op[lj] = dc->cc_op; > @@ -8387,9 +8386,9 @@ static inline void gen_intermediate_code_internal(CPUX86State *env, > /* we don't forget to fill the last values */ > if (search_pc) { > j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf; > - lj++; > - while (lj <= j) > - tcg_ctx.gen_opc_instr_start[lj++] = 0; > + while (++lj <= j) { > + tcg_ctx.gen_opc_instr_start[lj] = 0; > + } > } > > #ifdef DEBUG_DISAS > diff --git a/target-mips/translate.c b/target-mips/translate.c > index b7f8203..d1e5d84 100644 > --- a/target-mips/translate.c > +++ b/target-mips/translate.c > @@ -15571,10 +15571,10 @@ gen_intermediate_code_internal (CPUMIPSState *env, TranslationBlock *tb, > #else > ctx.mem_idx = ctx.hflags & MIPS_HFLAG_KSU; > #endif > - num_insns = 0; > max_insns = tb->cflags & CF_COUNT_MASK; > - if (max_insns == 0) > + if (max_insns == 0) { > max_insns = CF_COUNT_MASK; > + } > LOG_DISAS("\ntb %p idx %d hflags %04x\n", tb, ctx.mem_idx, ctx.hflags); > gen_tb_start(); > while (ctx.bstate == BS_NONE) { > @@ -15595,9 +15595,9 @@ gen_intermediate_code_internal (CPUMIPSState *env, TranslationBlock *tb, > if (search_pc) { > j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf; > if (lj < j) { > - lj++; > - while (lj < j) > - tcg_ctx.gen_opc_instr_start[lj++] = 0; > + while (++lj < j) { > + tcg_ctx.gen_opc_instr_start[lj] = 0; > + } > } > tcg_ctx.gen_opc_pc[lj] = ctx.pc; > gen_opc_hflags[lj] = ctx.hflags & MIPS_HFLAG_BMASK; > @@ -15678,9 +15678,9 @@ done_generating: > *tcg_ctx.gen_opc_ptr = INDEX_op_end; > if (search_pc) { > j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf; > - lj++; > - while (lj <= j) > - tcg_ctx.gen_opc_instr_start[lj++] = 0; > + while (++lj <= j) { > + tcg_ctx.gen_opc_instr_start[lj] = 0; > + } > } else { > tb->size = ctx.pc - pc_start; > tb->icount = num_insns; > -- > 1.7.2.5 >
Il 09/04/2013 10:05, 陳韋任 (Wei-Ren Chen) ha scritto: > Hi liguang, > > Just to be curious, how much performance improvement this patch can get? I think zero. It is indeed making the code a tiny bit more readable in the 2nd/3rd/5th/6th hunks. But the 1st and 4th hunks, which do - num_insns = 0; are wrong. I'm surprised the compiler doesn't report usage of an uninitialized variable. liguang, how did you test them? Paolo > Regards, > chenwj
在 2013-04-09二的 10:11 +0200,Paolo Bonzini写道: > Il 09/04/2013 10:05, 陳韋任 (Wei-Ren Chen) ha scritto: > > Hi liguang, > > > > Just to be curious, how much performance improvement this patch can get? > > I think zero. It is indeed making the code a tiny bit more readable in > the 2nd/3rd/5th/6th hunks. I think maybe a little greater than 0, for it optimizes a variable 'lj' increment. > > But the 1st and 4th hunks, which do > > - num_insns = 0; > > are wrong. I'm surprised the compiler doesn't report usage of an > uninitialized variable. liguang, how did you test them? I do normal compile and run guest OS for TCG, I think 'max_insns = tb->cflags & CF_COUNT_MASK;' is enough to feed compiler. > > Paolo > > > > Regards, > > chenwj >
On Tue, Apr 09, 2013 at 10:11:37AM +0200, Paolo Bonzini wrote: > Il 09/04/2013 10:05, 陳韋任 (Wei-Ren Chen) ha scritto: > > Hi liguang, > > > > Just to be curious, how much performance improvement this patch can get? > > I think zero. It is indeed making the code a tiny bit more readable in > the 2nd/3rd/5th/6th hunks. Then I think the subject should be changed from "optimize" to "readable". "Optimize" is a kind of misleading... > But the 1st and 4th hunks, which do > > - num_insns = 0; > > are wrong. I'm surprised the compiler doesn't report usage of an > uninitialized variable. liguang, how did you test them? > > Paolo > > > > Regards, > > chenwj >
On 9 April 2013 09:21, li guang <lig.fnst@cn.fujitsu.com> wrote: > 在 2013-04-09二的 10:11 +0200,Paolo Bonzini写道: >> Il 09/04/2013 10:05, 陳韋任 (Wei-Ren Chen) ha scritto: >> > Hi liguang, >> > >> > Just to be curious, how much performance improvement this patch can get? >> >> I think zero. It is indeed making the code a tiny bit more readable in >> the 2nd/3rd/5th/6th hunks. > > I think maybe a little greater than 0, > for it optimizes a variable 'lj' increment. (1) The compiler is easily smart enough to know that both ways of writing the code are equivalent, and to generate whatever code is better (2) This is in a comparatively rare code path anyway, because it only happens when we take an unexpected exception [eg fault on a guest load/store] (3) All optimisation should start with profiling, otherwise you have no idea whether you're even looking at the right places to improve. -- PMM
Hi liguang, On Tue, Apr 09, 2013 at 04:21:10PM +0800, li guang wrote: > 在 2013-04-09二的 10:11 +0200,Paolo Bonzini写道: > > Il 09/04/2013 10:05, 陳韋任 (Wei-Ren Chen) ha scritto: > > > Hi liguang, > > > > > > Just to be curious, how much performance improvement this patch can get? > > > > I think zero. It is indeed making the code a tiny bit more readable in > > the 2nd/3rd/5th/6th hunks. > > I think maybe a little greater than 0, > for it optimizes a variable 'lj' increment. As Peter said, only profiling can show if there is a performance improvement. > > But the 1st and 4th hunks, which do > > > > - num_insns = 0; > > > > are wrong. I'm surprised the compiler doesn't report usage of an > > uninitialized variable. liguang, how did you test them? > > I do normal compile and run guest OS for TCG, > I think 'max_insns = tb->cflags & CF_COUNT_MASK;' is enough > to feed compiler. num_insns and max_insns are two different variables, right? So this assignment does not do anything with num_insns. Regards, chenwj
在 2013-04-09二的 10:08 +0100,Peter Maydell写道: > On 9 April 2013 09:21, li guang <lig.fnst@cn.fujitsu.com> wrote: > > 在 2013-04-09二的 10:11 +0200,Paolo Bonzini写道: > >> Il 09/04/2013 10:05, 陳韋任 (Wei-Ren Chen) ha scritto: > >> > Hi liguang, > >> > > >> > Just to be curious, how much performance improvement this patch can get? > >> > >> I think zero. It is indeed making the code a tiny bit more readable in > >> the 2nd/3rd/5th/6th hunks. > > > > I think maybe a little greater than 0, > > for it optimizes a variable 'lj' increment. > > (1) The compiler is easily smart enough to know that both ways > of writing the code are equivalent, and to generate whatever > code is better > (2) This is in a comparatively rare code path anyway, because > it only happens when we take an unexpected exception [eg fault > on a guest load/store] > (3) All optimisation should start with profiling, otherwise > you have no idea whether you're even looking at the right > places to improve. > OK, thanks! but, please make no mistake, I'm not saying this patch promote performance of TCG, this just optimize code writing of this function, of course code writing is not deemed to promote its performance. or, we shouldn't do any code change if they benefit little in performance?
Hi liguang, > OK, thanks! > but, please make no mistake, > I'm not saying this patch promote performance of TCG, > this just optimize code writing of this function, of > course code writing is not deemed to promote its performance. > or, we shouldn't do any code change if they benefit little > in performance? IMO, if you are saying the patch can improve performance, then you should run benchmark to convince others. If your patch is trying to make code clear, then "optimize" might not be a good term to describe your patch. Regards, chenwj
在 2013-04-10三的 10:31 +0800,陳韋任 (Wei-Ren Chen)写道: > Hi liguang, > > > OK, thanks! > > but, please make no mistake, > > I'm not saying this patch promote performance of TCG, > > this just optimize code writing of this function, of > > course code writing is not deemed to promote its performance. > > or, we shouldn't do any code change if they benefit little > > in performance? > > IMO, if you are saying the patch can improve performance, sorry, I'm not saying that :-) > then you should run benchmark to convince others. If your patch is trying > to make code clear, then "optimize" might not be a good term to describe > your patch. > you're right, seems the word 'optimize' is not good here. but what should I use? you know, even compiler like gcc will use -O for code optimization.
> you're right, seems the word 'optimize' is not good here. > but what should I use? > you know, even compiler like gcc will use -O for code optimization. "translate: code cleanup in gen_intermediate_code_internal" would be better, I think. :) Regards, chenwj
在 2013-04-10三的 10:58 +0800,陳韋任 (Wei-Ren Chen)写道: > > you're right, seems the word 'optimize' is not good here. > > but what should I use? > > you know, even compiler like gcc will use -O for code optimization. > > "translate: code cleanup in gen_intermediate_code_internal" would be > better, I think. :) > OK, thanks!
在 2013-04-09二的 17:20 +0800,陳韋任 (Wei-Ren Chen)写道: > Hi liguang, > > On Tue, Apr 09, 2013 at 04:21:10PM +0800, li guang wrote: > > 在 2013-04-09二的 10:11 +0200,Paolo Bonzini写道: > > > Il 09/04/2013 10:05, 陳韋任 (Wei-Ren Chen) ha scritto: > > > > Hi liguang, > > > > > > > > Just to be curious, how much performance improvement this patch can get? > > > > > > I think zero. It is indeed making the code a tiny bit more readable in > > > the 2nd/3rd/5th/6th hunks. > > > > I think maybe a little greater than 0, > > for it optimizes a variable 'lj' increment. > > As Peter said, only profiling can show if there is a performance > improvement. > > > > But the 1st and 4th hunks, which do > > > > > > - num_insns = 0; > > > > > > are wrong. I'm surprised the compiler doesn't report usage of an > > > uninitialized variable. liguang, how did you test them? > > > > I do normal compile and run guest OS for TCG, > > I think 'max_insns = tb->cflags & CF_COUNT_MASK;' is enough > > to feed compiler. > > num_insns and max_insns are two different variables, right? So this > assignment does not do anything with num_insns. Yes, you're right. Thanks! > > Regards, > chenwj >
diff --git a/target-arm/translate.c b/target-arm/translate.c index 35a21be..c0c080d 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -9806,11 +9806,10 @@ static inline void gen_intermediate_code_internal(CPUARMState *env, cpu_M0 = tcg_temp_new_i64(); next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE; lj = -1; - num_insns = 0; max_insns = tb->cflags & CF_COUNT_MASK; - if (max_insns == 0) + if (max_insns == 0) { max_insns = CF_COUNT_MASK; - + } gen_tb_start(); tcg_clear_temp_count(); @@ -9889,9 +9888,9 @@ static inline void gen_intermediate_code_internal(CPUARMState *env, if (search_pc) { j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf; if (lj < j) { - lj++; - while (lj < j) - tcg_ctx.gen_opc_instr_start[lj++] = 0; + while (++lj < j) { + tcg_ctx.gen_opc_instr_start[lj] = 0; + } } tcg_ctx.gen_opc_pc[lj] = dc->pc; gen_opc_condexec_bits[lj] = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1); @@ -10028,9 +10027,9 @@ done_generating: #endif if (search_pc) { j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf; - lj++; - while (lj <= j) - tcg_ctx.gen_opc_instr_start[lj++] = 0; + while (++lj <= j) { + tcg_ctx.gen_opc_instr_start[lj] = 0; + } } else { tb->size = dc->pc - pc_start; tb->icount = num_insns; diff --git a/target-i386/translate.c b/target-i386/translate.c index 7596a90..9c5e1a3 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -8319,11 +8319,10 @@ static inline void gen_intermediate_code_internal(CPUX86State *env, dc->is_jmp = DISAS_NEXT; pc_ptr = pc_start; lj = -1; - num_insns = 0; max_insns = tb->cflags & CF_COUNT_MASK; - if (max_insns == 0) + if (max_insns == 0) { max_insns = CF_COUNT_MASK; - + } gen_tb_start(); for(;;) { if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) { @@ -8338,9 +8337,9 @@ static inline void gen_intermediate_code_internal(CPUX86State *env, if (search_pc) { j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf; if (lj < j) { - lj++; - while (lj < j) - tcg_ctx.gen_opc_instr_start[lj++] = 0; + while (++lj < j) { + tcg_ctx.gen_opc_instr_start[lj] = 0; + } } tcg_ctx.gen_opc_pc[lj] = pc_ptr; gen_opc_cc_op[lj] = dc->cc_op; @@ -8387,9 +8386,9 @@ static inline void gen_intermediate_code_internal(CPUX86State *env, /* we don't forget to fill the last values */ if (search_pc) { j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf; - lj++; - while (lj <= j) - tcg_ctx.gen_opc_instr_start[lj++] = 0; + while (++lj <= j) { + tcg_ctx.gen_opc_instr_start[lj] = 0; + } } #ifdef DEBUG_DISAS diff --git a/target-mips/translate.c b/target-mips/translate.c index b7f8203..d1e5d84 100644 --- a/target-mips/translate.c +++ b/target-mips/translate.c @@ -15571,10 +15571,10 @@ gen_intermediate_code_internal (CPUMIPSState *env, TranslationBlock *tb, #else ctx.mem_idx = ctx.hflags & MIPS_HFLAG_KSU; #endif - num_insns = 0; max_insns = tb->cflags & CF_COUNT_MASK; - if (max_insns == 0) + if (max_insns == 0) { max_insns = CF_COUNT_MASK; + } LOG_DISAS("\ntb %p idx %d hflags %04x\n", tb, ctx.mem_idx, ctx.hflags); gen_tb_start(); while (ctx.bstate == BS_NONE) { @@ -15595,9 +15595,9 @@ gen_intermediate_code_internal (CPUMIPSState *env, TranslationBlock *tb, if (search_pc) { j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf; if (lj < j) { - lj++; - while (lj < j) - tcg_ctx.gen_opc_instr_start[lj++] = 0; + while (++lj < j) { + tcg_ctx.gen_opc_instr_start[lj] = 0; + } } tcg_ctx.gen_opc_pc[lj] = ctx.pc; gen_opc_hflags[lj] = ctx.hflags & MIPS_HFLAG_BMASK; @@ -15678,9 +15678,9 @@ done_generating: *tcg_ctx.gen_opc_ptr = INDEX_op_end; if (search_pc) { j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf; - lj++; - while (lj <= j) - tcg_ctx.gen_opc_instr_start[lj++] = 0; + while (++lj <= j) { + tcg_ctx.gen_opc_instr_start[lj] = 0; + } } else { tb->size = ctx.pc - pc_start; tb->icount = num_insns;
Signed-off-by: liguang <lig.fnst@cn.fujitsu.com> --- target-arm/translate.c | 17 ++++++++--------- target-i386/translate.c | 17 ++++++++--------- target-mips/translate.c | 16 ++++++++-------- 3 files changed, 24 insertions(+), 26 deletions(-)