Message ID | 1319750100-7592-1-git-send-email-rth@twiddle.net |
---|---|
State | New |
Headers | show |
Thanks, applied. On Thu, Oct 27, 2011 at 21:15, Richard Henderson <rth@twiddle.net> wrote: > If the deposit replaces the entire word, optimize to a move. > > If we're inserting to the top of the word, avoid the mask of arg2 > as we'll be shifting out all of the garbage and shifting in zeros. > > If the host is 32-bit, reduce a 64-bit deposit to a 32-bit deposit > when possible. > > Signed-off-by: Richard Henderson <rth@twiddle.net> > --- > tcg/tcg-op.h | 65 +++++++++++++++++++++++++++++++++++++++++++++------------ > 1 files changed, 51 insertions(+), 14 deletions(-) > > V2: checkpatch errors fixed. > > diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h > index fea5983..24ec7fc 100644 > --- a/tcg/tcg-op.h > +++ b/tcg/tcg-op.h > @@ -2045,38 +2045,75 @@ static inline void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, > TCGv_i32 arg2, unsigned int ofs, > unsigned int len) > { > + uint32_t mask; > + TCGv_i32 t1; > + > + if (ofs == 0 && len == 32) { > + tcg_gen_mov_i32(ret, arg2); > + return; > + } > if (TCG_TARGET_HAS_deposit_i32 && TCG_TARGET_deposit_i32_valid(ofs, len)) { > tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, arg1, arg2, ofs, len); > - } else { > - uint32_t mask = (1u << len) - 1; > - TCGv_i32 t1 = tcg_temp_new_i32 (); > + return; > + } > + > + mask = (1u << len) - 1; > + t1 = tcg_temp_new_i32(); > > + if (ofs + len < 32) { > tcg_gen_andi_i32(t1, arg2, mask); > tcg_gen_shli_i32(t1, t1, ofs); > - tcg_gen_andi_i32(ret, arg1, ~(mask << ofs)); > - tcg_gen_or_i32(ret, ret, t1); > - > - tcg_temp_free_i32(t1); > + } else { > + tcg_gen_shli_i32(t1, arg2, ofs); > } > + tcg_gen_andi_i32(ret, arg1, ~(mask << ofs)); > + tcg_gen_or_i32(ret, ret, t1); > + > + tcg_temp_free_i32(t1); > } > > static inline void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, > TCGv_i64 arg2, unsigned int ofs, > unsigned int len) > { > + uint64_t mask; > + TCGv_i64 t1; > + > + if (ofs == 0 && len == 64) { > + tcg_gen_mov_i64(ret, arg2); > + return; > + } > if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(ofs, len)) { > tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len); > - } else { > - uint64_t mask = (1ull << len) - 1; > - TCGv_i64 t1 = tcg_temp_new_i64 (); > + return; > + } > > +#if TCG_TARGET_REG_BITS == 32 > + if (ofs >= 32) { > + tcg_gen_deposit_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), > + TCGV_LOW(arg2), ofs - 32, len); > + return; > + } > + if (ofs + len <= 32) { > + tcg_gen_deposit_i32(TCGV_LOW(ret), TCGV_LOW(arg1), > + TCGV_LOW(arg2), ofs, len); > + return; > + } > +#endif > + > + mask = (1ull << len) - 1; > + t1 = tcg_temp_new_i64(); > + > + if (ofs + len < 64) { > tcg_gen_andi_i64(t1, arg2, mask); > tcg_gen_shli_i64(t1, t1, ofs); > - tcg_gen_andi_i64(ret, arg1, ~(mask << ofs)); > - tcg_gen_or_i64(ret, ret, t1); > - > - tcg_temp_free_i64(t1); > + } else { > + tcg_gen_shli_i64(t1, arg2, ofs); > } > + tcg_gen_andi_i64(ret, arg1, ~(mask << ofs)); > + tcg_gen_or_i64(ret, ret, t1); > + > + tcg_temp_free_i64(t1); > } > > /***************************************/ > -- > 1.7.4.4 > >
On 27.10.2011, at 23:15, Richard Henderson wrote: > If the deposit replaces the entire word, optimize to a move. > > If we're inserting to the top of the word, avoid the mask of arg2 > as we'll be shifting out all of the garbage and shifting in zeros. > > If the host is 32-bit, reduce a 64-bit deposit to a 32-bit deposit > when possible. This patch breaks qemu-system-ppc64 on ppc32 hosts: IN: 0x00000000fff08618: mfmsr r0 0x00000000fff0861c: ori r0,r0,48 0x00000000fff08620: mtmsr r0 OP: ---- 0xfff08618 mov_i32 r0_0,msr_0 mov_i32 r0_1,msr_1 ---- 0xfff0861c movi_i32 tmp0,$0x30 or_i32 r0_0,r0_0,tmp0 ---- 0xfff08620 movi_i32 nip_0,$0xfff08624 movi_i32 nip_1,$0x0 mov_i32 tmp1,r0_0 movi_i32 tmp0,$store_msr call tmp0,$0x0,$0,tmp2,tmp1 movi_i32 nip_0,$0xfff08624 movi_i32 nip_1,$0x0 exit_tb $0x0 OP after liveness analysis: ---- 0xfff08618 mov_i32 r0_0,msr_0 mov_i32 r0_1,msr_1 ---- 0xfff0861c movi_i32 tmp0,$0x30 or_i32 r0_0,r0_0,tmp0 ---- 0xfff08620 movi_i32 nip_0,$0xfff08624 movi_i32 nip_1,$0x0 mov_i32 tmp1,r0_0 movi_i32 tmp0,$store_msr call tmp0,$0x0,$0,tmp2,tmp1 movi_i32 nip_0,$0xfff08624 movi_i32 nip_1,$0x0 exit_tb $0x0 end agraf@lychee:/home/agraf/release/qemu> ./ppc64-softmmu/qemu-system-ppc64 -kernel /boot/vmlinux -initrd /boot/initrd -nographic -d in_asm,cpu,int,op,op_opt,out_asm /home/agraf/release/qemu/tcg/tcg.c:1929: tcg fatal error Aborted Alex
On Mon, Oct 31, 2011 at 03:47, Alexander Graf <agraf@suse.de> wrote: > > On 27.10.2011, at 23:15, Richard Henderson wrote: > >> If the deposit replaces the entire word, optimize to a move. >> >> If we're inserting to the top of the word, avoid the mask of arg2 >> as we'll be shifting out all of the garbage and shifting in zeros. >> >> If the host is 32-bit, reduce a 64-bit deposit to a 32-bit deposit >> when possible. > > This patch breaks qemu-system-ppc64 on ppc32 hosts: > > IN: > 0x00000000fff08618: mfmsr r0 > 0x00000000fff0861c: ori r0,r0,48 > 0x00000000fff08620: mtmsr r0 > > OP: > ---- 0xfff08618 > mov_i32 r0_0,msr_0 > mov_i32 r0_1,msr_1 > > ---- 0xfff0861c > movi_i32 tmp0,$0x30 > or_i32 r0_0,r0_0,tmp0 > > ---- 0xfff08620 > movi_i32 nip_0,$0xfff08624 > movi_i32 nip_1,$0x0 > mov_i32 tmp1,r0_0 > movi_i32 tmp0,$store_msr > call tmp0,$0x0,$0,tmp2,tmp1 tmp2 is not defined. Where does it come from? The patch still looks fine to me. Maybe the problem is with the optimizer, or a different bug is exposed by one of these. Can you try if #undefining USE_TCG_OPTIMIZATIONS changes anything? > movi_i32 nip_0,$0xfff08624 > movi_i32 nip_1,$0x0 > exit_tb $0x0 > > OP after liveness analysis: > ---- 0xfff08618 > mov_i32 r0_0,msr_0 > mov_i32 r0_1,msr_1 > > ---- 0xfff0861c > movi_i32 tmp0,$0x30 > or_i32 r0_0,r0_0,tmp0 > > ---- 0xfff08620 > movi_i32 nip_0,$0xfff08624 > movi_i32 nip_1,$0x0 > mov_i32 tmp1,r0_0 > movi_i32 tmp0,$store_msr > call tmp0,$0x0,$0,tmp2,tmp1 > movi_i32 nip_0,$0xfff08624 > movi_i32 nip_1,$0x0 > exit_tb $0x0 > end > > > agraf@lychee:/home/agraf/release/qemu> ./ppc64-softmmu/qemu-system-ppc64 -kernel /boot/vmlinux -initrd /boot/initrd -nographic -d in_asm,cpu,int,op,op_opt,out_asm > /home/agraf/release/qemu/tcg/tcg.c:1929: tcg fatal error > Aborted > > > > Alex > >
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index fea5983..24ec7fc 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -2045,38 +2045,75 @@ static inline void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2, unsigned int ofs, unsigned int len) { + uint32_t mask; + TCGv_i32 t1; + + if (ofs == 0 && len == 32) { + tcg_gen_mov_i32(ret, arg2); + return; + } if (TCG_TARGET_HAS_deposit_i32 && TCG_TARGET_deposit_i32_valid(ofs, len)) { tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, arg1, arg2, ofs, len); - } else { - uint32_t mask = (1u << len) - 1; - TCGv_i32 t1 = tcg_temp_new_i32 (); + return; + } + + mask = (1u << len) - 1; + t1 = tcg_temp_new_i32(); + if (ofs + len < 32) { tcg_gen_andi_i32(t1, arg2, mask); tcg_gen_shli_i32(t1, t1, ofs); - tcg_gen_andi_i32(ret, arg1, ~(mask << ofs)); - tcg_gen_or_i32(ret, ret, t1); - - tcg_temp_free_i32(t1); + } else { + tcg_gen_shli_i32(t1, arg2, ofs); } + tcg_gen_andi_i32(ret, arg1, ~(mask << ofs)); + tcg_gen_or_i32(ret, ret, t1); + + tcg_temp_free_i32(t1); } static inline void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2, unsigned int ofs, unsigned int len) { + uint64_t mask; + TCGv_i64 t1; + + if (ofs == 0 && len == 64) { + tcg_gen_mov_i64(ret, arg2); + return; + } if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(ofs, len)) { tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len); - } else { - uint64_t mask = (1ull << len) - 1; - TCGv_i64 t1 = tcg_temp_new_i64 (); + return; + } +#if TCG_TARGET_REG_BITS == 32 + if (ofs >= 32) { + tcg_gen_deposit_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), + TCGV_LOW(arg2), ofs - 32, len); + return; + } + if (ofs + len <= 32) { + tcg_gen_deposit_i32(TCGV_LOW(ret), TCGV_LOW(arg1), + TCGV_LOW(arg2), ofs, len); + return; + } +#endif + + mask = (1ull << len) - 1; + t1 = tcg_temp_new_i64(); + + if (ofs + len < 64) { tcg_gen_andi_i64(t1, arg2, mask); tcg_gen_shli_i64(t1, t1, ofs); - tcg_gen_andi_i64(ret, arg1, ~(mask << ofs)); - tcg_gen_or_i64(ret, ret, t1); - - tcg_temp_free_i64(t1); + } else { + tcg_gen_shli_i64(t1, arg2, ofs); } + tcg_gen_andi_i64(ret, arg1, ~(mask << ofs)); + tcg_gen_or_i64(ret, ret, t1); + + tcg_temp_free_i64(t1); } /***************************************/
If the deposit replaces the entire word, optimize to a move. If we're inserting to the top of the word, avoid the mask of arg2 as we'll be shifting out all of the garbage and shifting in zeros. If the host is 32-bit, reduce a 64-bit deposit to a 32-bit deposit when possible. Signed-off-by: Richard Henderson <rth@twiddle.net> --- tcg/tcg-op.h | 65 +++++++++++++++++++++++++++++++++++++++++++++------------ 1 files changed, 51 insertions(+), 14 deletions(-) V2: checkpatch errors fixed.