From patchwork Fri Oct 22 20:11:48 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pat Haugen X-Patchwork-Id: 68957 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id A09ABB6F0B for ; Sat, 23 Oct 2010 07:12:14 +1100 (EST) Received: (qmail 4982 invoked by alias); 22 Oct 2010 20:12:04 -0000 Received: (qmail 4747 invoked by uid 22791); 22 Oct 2010 20:12:00 -0000 X-SWARE-Spam-Status: No, hits=-1.0 required=5.0 tests=AWL, BAYES_40, T_RP_MATCHES_RCVD X-Spam-Check-By: sourceware.org Received: from e33.co.us.ibm.com (HELO e33.co.us.ibm.com) (32.97.110.151) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Fri, 22 Oct 2010 20:11:53 +0000 Received: from d03relay02.boulder.ibm.com (d03relay02.boulder.ibm.com [9.17.195.227]) by e33.co.us.ibm.com (8.14.4/8.13.1) with ESMTP id o9MK6TbY021685 for ; Fri, 22 Oct 2010 14:06:29 -0600 Received: from d03av06.boulder.ibm.com (d03av06.boulder.ibm.com [9.17.195.245]) by d03relay02.boulder.ibm.com (8.13.8/8.13.8/NCO v9.1) with ESMTP id o9MKBoiN260920 for ; Fri, 22 Oct 2010 14:11:50 -0600 Received: from d03av06.boulder.ibm.com (loopback [127.0.0.1]) by d03av06.boulder.ibm.com (8.14.4/8.13.1/NCO v10.0 AVout) with ESMTP id o9MKFjNT010881 for ; Fri, 22 Oct 2010 14:15:45 -0600 Received: from [127.0.0.1] (sig-9-65-26-27.mts.ibm.com [9.65.26.27]) by d03av06.boulder.ibm.com (8.14.4/8.13.1/NCO v10.0 AVin) with ESMTP id o9MKFh4H010822; Fri, 22 Oct 2010 14:15:45 -0600 Message-ID: <4CC1F004.2000505@linux.vnet.ibm.com> Date: Fri, 22 Oct 2010 15:11:48 -0500 From: Pat Haugen User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.2.11) Gecko/20101013 Thunderbird/3.1.5 MIME-Version: 1.0 To: gcc-patches@gcc.gnu.org CC: David Edelsohn Subject: [PATCH] Bump alignment for small loops on PowerPC X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org The following patch increases the alignment for certain small loops (5-8 insns) on PowerPC for improved performance. Small loops can benefit by being contained within a single 32-byte icache sector. Spec testing was neutral, but it should eliminate variations in performance I have seen in the past due to whether one of these loops crossed a 32-byte boundary. Bootstrap/regtested on powerpc64-linux with no new regressions. OK for trunk? 2010-10-22 Pat Haugen * final.c (compute_alignments): Compute/free loop info all the time. * config/rs6000/rs6000.h (LOOP_ALIGN): Define. * config/rs6000/rs6000-protos.h (rs6000_loop_align): Declare. * config/rs6000/t-rs6000 (rs6000.o): Add cfgloop.h. * config/rs6000/rs6000.c (cfgloop.h): Include. (can_override_loop_align): New. (rs6000_option_override_internal): Set it. (TARGET_ASM_LOOP_ALIGN_MAX_SKIP): Define target hook. (rs6000_loop_align): New function. (rs6000_loop_align_max_skip): Likewise. * testsuite/gcc.target/powerpc/loop_align.c: New. -Pat Index: gcc/final.c =================================================================== --- gcc/final.c (revision 165738) +++ gcc/final.c (working copy) @@ -715,8 +715,8 @@ compute_alignments (void) { dump_flow_info (dump_file, TDF_DETAILS); flow_loops_dump (dump_file, NULL, 1); - loop_optimizer_init (AVOID_CFG_MODIFICATIONS); } + loop_optimizer_init (AVOID_CFG_MODIFICATIONS); FOR_EACH_BB (bb) if (bb->frequency > freq_max) freq_max = bb->frequency; @@ -808,11 +808,8 @@ compute_alignments (void) LABEL_TO_MAX_SKIP (label) = max_skip; } - if (dump_file) - { - loop_optimizer_finalize (); - free_dominance_info (CDI_DOMINATORS); - } + loop_optimizer_finalize (); + free_dominance_info (CDI_DOMINATORS); return 0; } Index: gcc/config/rs6000/rs6000.h =================================================================== --- gcc/config/rs6000/rs6000.h (revision 165738) +++ gcc/config/rs6000/rs6000.h (working copy) @@ -2388,6 +2388,9 @@ extern char rs6000_reg_names[][8]; /* re if ((LOG) != 0) \ fprintf (FILE, "\t.align %d\n", (LOG)) +/* How to align the given loop. */ +#define LOOP_ALIGN(LABEL) rs6000_loop_align(LABEL) + /* Pick up the return address upon entry to a procedure. Used for dwarf2 unwind information. This also enables the table driven mechanism. */ Index: gcc/config/rs6000/rs6000-protos.h =================================================================== --- gcc/config/rs6000/rs6000-protos.h (revision 165738) +++ gcc/config/rs6000/rs6000-protos.h (working copy) @@ -131,6 +131,7 @@ extern rtx rs6000_machopic_legitimize_pi extern rtx rs6000_address_for_fpconvert (rtx); extern rtx rs6000_allocate_stack_temp (enum machine_mode, bool, bool); extern void rs6000_expand_convert_si_to_sfdf (rtx, rtx, bool); +extern int rs6000_loop_align (rtx); #endif /* RTX_CODE */ #ifdef TREE_CODE Index: gcc/config/rs6000/t-rs6000 =================================================================== --- gcc/config/rs6000/t-rs6000 (revision 165738) +++ gcc/config/rs6000/t-rs6000 (working copy) @@ -27,7 +27,7 @@ rs6000.o: $(CONFIG_H) $(SYSTEM_H) corety $(OBSTACK_H) $(TREE_H) $(EXPR_H) $(OPTABS_H) except.h function.h \ output.h $(BASIC_BLOCK_H) $(INTEGRATE_H) toplev.h $(GGC_H) $(HASHTAB_H) \ $(TM_P_H) $(TARGET_H) $(TARGET_DEF_H) langhooks.h reload.h gt-rs6000.h \ - cfglayout.h + cfglayout.h cfgloop.h rs6000-c.o: $(srcdir)/config/rs6000/rs6000-c.c \ $(srcdir)/config/rs6000/rs6000-protos.h \ Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (revision 165738) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -51,6 +51,7 @@ #include "langhooks.h" #include "reload.h" #include "cfglayout.h" +#include "cfgloop.h" #include "sched-int.h" #include "gimple.h" #include "tree-flow.h" @@ -154,6 +155,9 @@ static GTY(()) bool rs6000_sched_groups; /* Align branch targets. */ static GTY(()) bool rs6000_align_branch_targets; +/* Non-zero to allow overriding loop alignment. */ +static int can_override_loop_align = 0; + /* Support for -msched-costly-dep option. */ const char *rs6000_sched_costly_dep_str; enum rs6000_dependence_cost rs6000_sched_costly_dep; @@ -1139,6 +1143,7 @@ static void rs6000_option_override (void static void rs6000_option_init_struct (struct gcc_options *); static void rs6000_option_default_params (void); static bool rs6000_handle_option (size_t, const char *, int); +static int rs6000_loop_align_max_skip (rtx); static void rs6000_parse_tls_size_option (void); static void rs6000_parse_yes_no_option (const char *, const char *, int *); static int first_altivec_reg_to_save (void); @@ -1599,6 +1604,9 @@ static const struct attribute_spec rs600 #undef TARGET_HANDLE_OPTION #define TARGET_HANDLE_OPTION rs6000_handle_option +#undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP +#define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip + #undef TARGET_OPTION_OVERRIDE #define TARGET_OPTION_OVERRIDE rs6000_option_override @@ -3025,7 +3033,10 @@ rs6000_option_override_internal (const c if (align_jumps <= 0) align_jumps = 16; if (align_loops <= 0) - align_loops = 16; + { + can_override_loop_align = 1; + align_loops = 16; + } } if (align_jumps_max_skip <= 0) align_jumps_max_skip = 15; @@ -3270,6 +3281,38 @@ rs6000_builtin_mask_for_load (void) return 0; } +/* Implement LOOP_ALIGN. */ +int +rs6000_loop_align (rtx label) +{ + basic_block bb; + int ninsns; + + /* Don't override loop alignment if -falign-loops was specified. */ + if (!can_override_loop_align) + return align_loops_log; + + bb = BLOCK_FOR_INSN (label); + ninsns = num_loop_insns(bb->loop_father); + + /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */ + if (ninsns > 4 && ninsns <= 8 + && (rs6000_cpu == PROCESSOR_POWER4 + || rs6000_cpu == PROCESSOR_POWER5 + || rs6000_cpu == PROCESSOR_POWER6 + || rs6000_cpu == PROCESSOR_POWER7)) + return 5; + else + return align_loops_log; +} + +/* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */ +static int +rs6000_loop_align_max_skip (rtx label) +{ + return (1 << rs6000_loop_align (label)) - 1; +} + /* Implement targetm.vectorize.builtin_conversion. Returns a decl of a function that implements conversion of an integer vector into a floating-point vector, or vice-versa. DEST_TYPE is the Index: gcc/testsuite/gcc.target/powerpc/loop_align.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/loop_align.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/loop_align.c (revision 0) @@ -0,0 +1,10 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-options "-O2 -mcpu=power7 -falign-functions=16" } */ +/* { dg-final { scan-assembler ".p2align 5,,31" } } */ + +void f(double *a, double *b, double *c, int n) { + int i; + for (i=0; i < n; i++) + a[i] = b[i] + c[i]; +}