Message ID | D1FE725F-C260-49B5-9E87-543B280DBDAF@nokia.com |
---|---|
State | New |
Headers | show |
diff --git a/target-arm/translate.c b/target-arm/translate.c index 1734fae..fa03df8 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -3692,6 +3692,7 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) TCGv tmp; TCGv tmp2; TCGv_i64 tmp64; + TCGv stride_v; if (!vfp_enabled(env))
Reduce the amount of tcg ops generated from NEON vld/vst instructions by simplifying the code generation. Signed-off-by: Juha Riihimäki <juha.riihimaki@nokia.com> --- return 1; @@ -3710,6 +3711,7 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) interleave = neon_ls_element_type[op].interleave; load_reg_var(s, addr, rn); stride = (1 << size) * interleave; + stride_v = tcg_const_i32(stride); for (reg = 0; reg < nregs; reg++) { if (interleave > 2 || (interleave == 2 && nregs == 2)) { load_reg_var(s, addr, rn); @@ -3728,7 +3730,7 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) neon_load_reg64(tmp64, rd); gen_st64(tmp64, addr, IS_USER(s)); } - tcg_gen_addi_i32(addr, addr, stride); + tcg_gen_add_i32(addr, addr, stride_v); } else { for (pass = 0; pass < 2; pass++) { if (size == 2) { @@ -3739,58 +3741,57 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) tmp = neon_load_reg(rd, pass); gen_st32(tmp, addr, IS_USER(s)); } - tcg_gen_addi_i32(addr, addr, stride); + tcg_gen_add_i32(addr, addr, stride_v); } else if (size == 1) { if (load) { tmp = gen_ld16u(addr, IS_USER(s)); tcg_gen_addi_i32(addr, addr, stride); tmp2 = gen_ld16u(addr, IS_USER(s)); - tcg_gen_addi_i32(addr, addr, stride); - gen_bfi(tmp, tmp, tmp2, 16, 0xffff); + tcg_gen_add_i32(addr, addr, stride_v); + tcg_gen_shli_i32(tmp2, tmp2, 16); + tcg_gen_or_i32(tmp, tmp, tmp2); dead_tmp(tmp2); neon_store_reg(rd, pass, tmp); } else { tmp = neon_load_reg(rd, pass); - tmp2 = new_tmp(); - tcg_gen_shri_i32(tmp2, tmp, 16); - gen_st16(tmp, addr, IS_USER(s)); - tcg_gen_addi_i32(addr, addr, stride); - gen_st16(tmp2, addr, IS_USER(s)); - tcg_gen_addi_i32(addr, addr, stride); + tcg_gen_qemu_st16(tmp, addr, IS_USER(s)); + tcg_gen_add_i32(addr, addr, stride_v); + tcg_gen_shri_i32(tmp, tmp, 16); + tcg_gen_qemu_st16(tmp, addr, IS_USER(s)); + tcg_gen_add_i32(addr, addr, stride_v); + dead_tmp(tmp); } } else /* size == 0 */ { if (load) { - TCGV_UNUSED(tmp2); - for (n = 0; n < 4; n++) { - tmp = gen_ld8u(addr, IS_USER(s)); - tcg_gen_addi_i32(addr, addr, stride); - if (n == 0) { - tmp2 = tmp; - } else { - gen_bfi(tmp2, tmp2, tmp, n * 8, 0xff); - dead_tmp(tmp); - } + tmp = gen_ld8u(addr, IS_USER(s)); + tcg_gen_add_i32(addr, addr, stride_v); + for (n = 1; n < 4; n++) { + tmp2 = gen_ld8u(addr, IS_USER(s)); + tcg_gen_add_i32(addr, addr, stride_v); + tcg_gen_shli_i32(tmp2, tmp2, n * 8); + tcg_gen_or_i32(tmp, tmp, tmp2); + dead_tmp(tmp2); } - neon_store_reg(rd, pass, tmp2); + neon_store_reg(rd, pass, tmp); } else { - tmp2 = neon_load_reg(rd, pass); - for (n = 0; n < 4; n++) { - tmp = new_tmp(); - if (n == 0) { - tcg_gen_mov_i32(tmp, tmp2); - } else { - tcg_gen_shri_i32(tmp, tmp2, n * 8); - } - gen_st8(tmp, addr, IS_USER(s)); - tcg_gen_addi_i32(addr, addr, stride); + tmp2 = tcg_const_i32(8); + tmp = neon_load_reg(rd, pass); + for (n = 0; n < 3; n++) { + tcg_gen_qemu_st8(tmp, addr, IS_USER (s)); + tcg_gen_add_i32(addr, addr, stride_v); + tcg_gen_shr_i32(tmp, tmp, tmp2); } - dead_tmp(tmp2); + tcg_gen_qemu_st8(tmp, addr, IS_USER(s)); + tcg_gen_add_i32(addr, addr, stride_v); + dead_tmp(tmp); + tcg_temp_free_i32(tmp2); } } } } rd += neon_ls_element_type[op].spacing; } + tcg_temp_free_i32(stride_v); stride = nregs * 8; } else { size = (insn >> 10) & 3;