From patchwork Sun Aug 22 05:28:50 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Carrot Wei X-Patchwork-Id: 62360 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id 1114BB6EFF for ; Sun, 22 Aug 2010 15:32:12 +1000 (EST) Received: (qmail 6317 invoked by alias); 22 Aug 2010 05:32:09 -0000 Received: (qmail 6307 invoked by uid 22791); 22 Aug 2010 05:32:07 -0000 X-SWARE-Spam-Status: No, hits=-1.5 required=5.0 tests=AWL, BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, SPF_SOFTFAIL, TW_QE X-Spam-Check-By: sourceware.org Received: from eggs.gnu.org (HELO eggs.gnu.org) (140.186.70.92) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Sun, 22 Aug 2010 05:32:01 +0000 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.69) (envelope-from ) id 1On39y-0003YZ-8C for gcc-patches@gcc.gnu.org; Sun, 22 Aug 2010 01:31:59 -0400 Received: from smtp-out.google.com ([216.239.44.51]:39787) by eggs.gnu.org with esmtp (Exim 4.69) (envelope-from ) id 1On39y-0003Y3-39 for gcc-patches@gcc.gnu.org; Sun, 22 Aug 2010 01:31:58 -0400 Received: from wpaz9.hot.corp.google.com (wpaz9.hot.corp.google.com [172.24.198.73]) by smtp-out.google.com with ESMTP id o7M5Sqob013739 for ; Sat, 21 Aug 2010 22:28:52 -0700 Received: from gwb20 (gwb20.prod.google.com [10.200.2.20]) by wpaz9.hot.corp.google.com with ESMTP id o7M5SpeA029455 for ; Sat, 21 Aug 2010 22:28:51 -0700 Received: by gwb20 with SMTP id 20so2060844gwb.31 for ; Sat, 21 Aug 2010 22:28:51 -0700 (PDT) MIME-Version: 1.0 Received: by 10.150.13.15 with SMTP id 15mr3937860ybm.40.1282454931026; Sat, 21 Aug 2010 22:28:51 -0700 (PDT) Received: by 10.151.129.10 with HTTP; Sat, 21 Aug 2010 22:28:50 -0700 (PDT) Date: Sun, 22 Aug 2010 13:28:50 +0800 Message-ID: Subject: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words From: Carrot Wei To: gcc-patches@gcc.gnu.org X-System-Of-Record: true X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.6 (newer, 3) Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Hi Current arm compiler can't merge the two consecutive load or store into ldrd or strd. This patch adds new patterns of ldrd and strd, and new peephole2 rules to do the optimization. This patch handles thumb2 instructions only. For arm instructions there are more constraints on the register usage, it's better to be handled before register allocation. This patch has been tested on qemu with thumb2 instructions. ChangeLog: 2010-08-22 Wei Guozhi PR target/45335 * gcc/config/arm/thumb2.md (*thumb2_ldrd and peephole2): New insn pattern and related peephole2. (*thumb2_strd and peephole2): New insn pattern and related peephole2. * gcc/config/arm/arm.c (thumb2_ldrd_addr): New function. * gcc/config/arm/arm-protos.h (thumb2_ldrd_addr): New prototype. 2010-08-22 Wei Guozhi PR target/45335 * gcc.target/arm/pr45335.c: New test. thanks Wei Guozhi Index: thumb2.md =================================================================== --- thumb2.md (revision 163363) +++ thumb2.md (working copy) @@ -1257,3 +1257,69 @@ " operands[2] = GEN_INT (32 - INTVAL (operands[2])); ") + +(define_insn "*thumb2_ldrd" + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") + (mem:SI (match_operand:SI 2 "" ""))) + (set (match_operand:SI 1 "s_register_operand" "") + (mem:SI (match_operand:SI 3 "" "")))])] + "TARGET_THUMB2 && + thumb2_ldrd_addr (operands[0], operands[1], operands[2], operands[3], 1)" + "* + { + rtx ldrd_addr = thumb2_ldrd_addr (operands[0], operands[1], + operands[2], operands[3], 1); + operands[4] = gen_rtx_MEM (SImode, ldrd_addr); + if (ldrd_addr == operands[3]) + return \"ldrd\\t%1, %0, %4\"; + else + return \"ldrd\\t%0, %1, %4\"; + }" +) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (mem:SI (match_operand:SI 2 "" ""))) + (set (match_operand:SI 1 "s_register_operand" "") + (mem:SI (match_operand:SI 3 "" "")))] + "TARGET_THUMB2 && + thumb2_ldrd_addr (operands[0], operands[1], operands[2], operands[3], 1)" + [(parallel [(set (match_operand:SI 0 "s_register_operand" "") + (mem:SI (match_operand:SI 2 "" ""))) + (set (match_operand:SI 1 "s_register_operand" "") + (mem:SI (match_operand:SI 3 "" "")))])] + "" +) + +(define_insn "*thumb2_strd" + [(parallel [(set (mem:SI (match_operand:SI 2 "" "")) + (match_operand:SI 0 "s_register_operand" "")) + (set (mem:SI (match_operand:SI 3 "" "")) + (match_operand:SI 1 "s_register_operand" ""))])] + "TARGET_THUMB2 && + thumb2_ldrd_addr (operands[0], operands[1], operands[2], operands[3], 0)" + "* + { + rtx strd_addr = thumb2_ldrd_addr (operands[0], operands[1], + operands[2], operands[3], 0); + operands[4] = gen_rtx_MEM (SImode, strd_addr); + if (strd_addr == operands[3]) + return \"strd\\t%1, %0, %4\"; + else + return \"strd\\t%0, %1, %4\"; + }" +) + +(define_peephole2 + [(set (mem:SI (match_operand:SI 2 "" "")) + (match_operand:SI 0 "s_register_operand" "")) + (set (mem:SI (match_operand:SI 3 "" "")) + (match_operand:SI 1 "s_register_operand" ""))] + "TARGET_THUMB2 && + thumb2_ldrd_addr (operands[0], operands[1], operands[2], operands[3], 0)" + [(parallel [(set (mem:SI (match_operand:SI 2 "" "")) + (match_operand:SI 0 "s_register_operand" "")) + (set (mem:SI (match_operand:SI 3 "" "")) + (match_operand:SI 1 "s_register_operand" ""))])] + "" +) Index: arm.c =================================================================== --- arm.c (revision 163363) +++ arm.c (working copy) @@ -22959,4 +22959,76 @@ arm_expand_sync (enum machine_mode mode, } } +/* Check if the two memory addresses can be accessed by an ldrd instruction. + That is they use the same base register, and the gap between constant + offsets should be 4. It can also be used for strd instruction. + If so return the lower address, otherwise return NULL. */ +rtx +thumb2_ldrd_addr (rtx dest1, rtx dest2, rtx addr1, rtx addr2, bool ldrd) +{ + rtx reg1, reg2, op0, op1; + rtx addr = NULL; + HOST_WIDE_INT offset1 = 0; + HOST_WIDE_INT offset2 = 0; + + switch (GET_CODE (addr1)) + { + case REG: + reg1 = addr1; + break; + + case PLUS: + op0 = XEXP (addr1, 0); + op1 = XEXP (addr1, 1); + if ((GET_CODE (op0) != REG) || (GET_CODE (op1) != CONST_INT)) + return NULL; + reg1 = op0; + offset1 = INTVAL (op1); + break; + + default: + return NULL; + } + + switch (GET_CODE (addr2)) + { + case REG: + reg2 = addr2; + break; + + case PLUS: + op0 = XEXP (addr2, 0); + op1 = XEXP (addr2, 1); + if ((GET_CODE (op0) != REG) || (GET_CODE (op1) != CONST_INT)) + return NULL; + reg2 = op0; + offset2 = INTVAL (op1); + break; + + default: + return NULL; + } + + if (reg1 != reg2) + return NULL; + + if (ldrd && ((dest1 == dest2) || (dest1 == reg1))) + return NULL; + + if ((offset1 + 4) == offset2) + addr = addr1; + else if ((offset2 + 4) == offset1) + { + addr = addr2; + offset1 = offset2; + } + else + return NULL; + + if (((offset1 % 4) != 0) || (offset1 > 1020) || (offset1 < -1020)) + return NULL; + + return addr; +} + #include "gt-arm.h" Index: arm-protos.h =================================================================== --- arm-protos.h (revision 163363) +++ arm-protos.h (working copy) @@ -149,7 +149,7 @@ extern void arm_expand_sync (enum machin extern const char *arm_output_memory_barrier (rtx *); extern const char *arm_output_sync_insn (rtx, rtx *); extern unsigned int arm_sync_loop_insns (rtx , rtx *); - +extern rtx thumb2_ldrd_addr (rtx, rtx, rtx, rtx, bool); extern bool arm_output_addr_const_extra (FILE *, rtx); #if defined TREE_CODE Index: pr45335.c =================================================================== --- pr45335.c (revision 0) +++ pr45335.c (revision 0) @@ -0,0 +1,20 @@ +/* { dg-options "-mthumb -O2" } */ +/* { dg-require-effective-target arm_thumb2_ok } */ +/* { dg-final { scan-assembler "ldrd" } } */ +/* { dg-final { scan-assembler "strd" } } */ + +struct S +{ + void* p1; + void* p2; + void* p3; + void* p4; +}; + +void foo1(struct S* fp, struct S* otherSaveArea) +{ + struct S* saveA = fp - 1; + printf("StackSaveArea for fp %p [%p/%p]:\n", fp, saveA, otherSaveArea); + printf("prevFrame=%p savedPc=%p meth=%p curPc=%p fp[0]=0x%08x\n", + saveA->p1, saveA->p2, saveA->p3, saveA->p4, *(unsigned int*)fp); +}