From patchwork Sat Oct 24 12:19:08 2009 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Juha.Riihimaki@nokia.com X-Patchwork-Id: 36837 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [199.232.76.165]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id DE3AEB7BC7 for ; Sat, 24 Oct 2009 23:36:08 +1100 (EST) Received: from localhost ([127.0.0.1]:45375 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1N1fqn-0007m1-VI for incoming@patchwork.ozlabs.org; Sat, 24 Oct 2009 08:36:05 -0400 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1N1faq-0008JY-6K for qemu-devel@nongnu.org; Sat, 24 Oct 2009 08:19:36 -0400 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1N1fai-0008EM-AO for qemu-devel@nongnu.org; Sat, 24 Oct 2009 08:19:32 -0400 Received: from [199.232.76.173] (port=58458 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1N1fah-0008EE-JZ for qemu-devel@nongnu.org; Sat, 24 Oct 2009 08:19:27 -0400 Received: from smtp.nokia.com ([192.100.122.230]:58028 helo=mgw-mx03.nokia.com) by monty-python.gnu.org with esmtps (TLS-1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.60) (envelope-from ) id 1N1fag-0004Ig-Rd for qemu-devel@nongnu.org; Sat, 24 Oct 2009 08:19:27 -0400 Received: from esebh105.NOE.Nokia.com (esebh105.ntc.nokia.com [172.21.138.211]) by mgw-mx03.nokia.com (Switch-3.3.3/Switch-3.3.3) with ESMTP id n9OCJH3M011602 for ; Sat, 24 Oct 2009 15:19:24 +0300 Received: from vaebh104.NOE.Nokia.com ([10.160.244.30]) by esebh105.NOE.Nokia.com with Microsoft SMTPSVC(6.0.3790.3959); Sat, 24 Oct 2009 15:19:22 +0300 Received: from mgw-sa02.ext.nokia.com ([147.243.1.48]) by vaebh104.NOE.Nokia.com over TLS secured channel with Microsoft SMTPSVC(6.0.3790.3959); Sat, 24 Oct 2009 15:19:21 +0300 Received: from localhost.localdomain (essapo-nirac252105.europe.nokia.com [10.162.252.105]) by mgw-sa02.ext.nokia.com (Switch-3.3.3/Switch-3.3.3) with ESMTP id n9OCJ8qF022164 for ; Sat, 24 Oct 2009 15:19:20 +0300 From: juha.riihimaki@nokia.com To: qemu-devel@nongnu.org Date: Sat, 24 Oct 2009 15:19:08 +0300 Message-Id: <1256386749-85299-10-git-send-email-juha.riihimaki@nokia.com> X-Mailer: git-send-email 1.6.5 In-Reply-To: <1256386749-85299-1-git-send-email-juha.riihimaki@nokia.com> References: <1256386749-85299-1-git-send-email-juha.riihimaki@nokia.com> MIME-Version: 1.0 X-OriginalArrivalTime: 24 Oct 2009 12:19:21.0406 (UTC) FILETIME=[37629DE0:01CA54A4] X-Nokia-AV: Clean X-MIME-Autoconverted: from 8bit to quoted-printable by mgw-mx03.nokia.com id n9OCJH3M011602 X-detected-operating-system: by monty-python.gnu.org: GNU/Linux 2.6 (newer, 1) Subject: [Qemu-devel] [PATCH v2 09/10] target-arm: optimize neon vld/vst ops X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org From: Juha Riihimäki Reduce the amount of TCG ops generated from NEON vld/vst instructions by simplifying the code generation. Signed-off-by: Juha Riihimäki --- target-arm/translate.c | 67 ++++++++++++++++++++++++----------------------- 1 files changed, 34 insertions(+), 33 deletions(-) diff --git a/target-arm/translate.c b/target-arm/translate.c index f262758..55d6377 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -3708,6 +3708,7 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) TCGv tmp; TCGv tmp2; TCGv_i64 tmp64; + TCGv stride_var; if (!vfp_enabled(env)) return 1; @@ -3729,6 +3730,7 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) return 1; load_reg_var(s, addr, rn); stride = (1 << size) * interleave; + stride_var = tcg_const_i32(stride); for (reg = 0; reg < nregs; reg++) { if (interleave > 2 || (interleave == 2 && nregs == 2)) { load_reg_var(s, addr, rn); @@ -3747,7 +3749,7 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) neon_load_reg64(tmp64, rd); gen_st64(tmp64, addr, IS_USER(s)); } - tcg_gen_addi_i32(addr, addr, stride); + tcg_gen_add_i32(addr, addr, stride_var); } else { for (pass = 0; pass < 2; pass++) { if (size == 2) { @@ -3758,58 +3760,57 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) tmp = neon_load_reg(rd, pass); gen_st32(tmp, addr, IS_USER(s)); } - tcg_gen_addi_i32(addr, addr, stride); + tcg_gen_add_i32(addr, addr, stride_var); } else if (size == 1) { if (load) { tmp = gen_ld16u(addr, IS_USER(s)); - tcg_gen_addi_i32(addr, addr, stride); + tcg_gen_add_i32(addr, addr, stride_var); tmp2 = gen_ld16u(addr, IS_USER(s)); - tcg_gen_addi_i32(addr, addr, stride); - gen_bfi(tmp, tmp, tmp2, 16, 0xffff); + tcg_gen_add_i32(addr, addr, stride_var); + tcg_gen_shli_i32(tmp2, tmp2, 16); + tcg_gen_or_i32(tmp, tmp, tmp2); dead_tmp(tmp2); neon_store_reg(rd, pass, tmp); } else { tmp = neon_load_reg(rd, pass); - tmp2 = new_tmp(); - tcg_gen_shri_i32(tmp2, tmp, 16); - gen_st16(tmp, addr, IS_USER(s)); - tcg_gen_addi_i32(addr, addr, stride); - gen_st16(tmp2, addr, IS_USER(s)); - tcg_gen_addi_i32(addr, addr, stride); + tcg_gen_qemu_st16(tmp, addr, IS_USER(s)); + tcg_gen_add_i32(addr, addr, stride_var); + tcg_gen_shri_i32(tmp, tmp, 16); + tcg_gen_qemu_st16(tmp, addr, IS_USER(s)); + tcg_gen_add_i32(addr, addr, stride_var); + dead_tmp(tmp); } } else /* size == 0 */ { if (load) { - TCGV_UNUSED(tmp2); - for (n = 0; n < 4; n++) { - tmp = gen_ld8u(addr, IS_USER(s)); - tcg_gen_addi_i32(addr, addr, stride); - if (n == 0) { - tmp2 = tmp; - } else { - gen_bfi(tmp2, tmp2, tmp, n * 8, 0xff); - dead_tmp(tmp); - } + tmp = gen_ld8u(addr, IS_USER(s)); + tcg_gen_add_i32(addr, addr, stride_var); + for (n = 1; n < 4; n++) { + tmp2 = gen_ld8u(addr, IS_USER(s)); + tcg_gen_add_i32(addr, addr, stride_var); + tcg_gen_shli_i32(tmp2, tmp2, n * 8); + tcg_gen_or_i32(tmp, tmp, tmp2); + dead_tmp(tmp2); } - neon_store_reg(rd, pass, tmp2); + neon_store_reg(rd, pass, tmp); } else { - tmp2 = neon_load_reg(rd, pass); - for (n = 0; n < 4; n++) { - tmp = new_tmp(); - if (n == 0) { - tcg_gen_mov_i32(tmp, tmp2); - } else { - tcg_gen_shri_i32(tmp, tmp2, n * 8); - } - gen_st8(tmp, addr, IS_USER(s)); - tcg_gen_addi_i32(addr, addr, stride); + tmp2 = tcg_const_i32(8); + tmp = neon_load_reg(rd, pass); + for (n = 0; n < 3; n++) { + tcg_gen_qemu_st8(tmp, addr, IS_USER(s)); + tcg_gen_add_i32(addr, addr, stride_var); + tcg_gen_shr_i32(tmp, tmp, tmp2); } - dead_tmp(tmp2); + tcg_gen_qemu_st8(tmp, addr, IS_USER(s)); + tcg_gen_add_i32(addr, addr, stride_var); + dead_tmp(tmp); + tcg_temp_free_i32(tmp2); } } } } rd += spacing; } + tcg_temp_free_i32(stride_var); stride = nregs * 8; } else { size = (insn >> 10) & 3;