[PR,44576] : Reduce the computation cost in compute_miss_rate for prefetching loop arrays

Message ID	D4C76825A6780047854A11E93CDE84D02F7759@SAUSEXMBP01.amd.com
State	New
Headers	show Return-Path: <gcc-patches-return-266836-incoming=patchwork.ozlabs.org@gcc.gnu.org> From: "Fang, Changpeng" <Changpeng.Fang@amd.com> To: Zdenek Dvorak <rakdver@kam.mff.cuni.cz> CC: Richard Guenther <richard.guenther@gmail.com>, Christian Borntraeger <borntraeger@de.ibm.com>, "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>, "uweigand@de.ibm.com" <uweigand@de.ibm.com>, "sebpop@gmail.com" <sebpop@gmail.com> Date: Tue, 29 Jun 2010 19:06:45 -0500 Subject: [Patch PR 44576]: Reduce the computation cost in compute_miss_rate for prefetching loop arrays Message-ID: <D4C76825A6780047854A11E93CDE84D02F7759@SAUSEXMBP01.amd.com> Content-Type: multipart/mixed; boundary="_002_D4C76825A6780047854A11E93CDE84D02F7759SAUSEXMBP01amdcom_" MIME-Version: 1.0 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org

From 3dbd1a84428979b5e73b2532c6389d31377b929a Mon Sep 17 00:00:00 2001 From: Changpeng Fang <chfang@pathscale.(none)> Date: Mon, 28 Jun 2010 17:20:10 -0700 Subject: [PATCH 5/5] Reduce the cost in miss rate computation * tree-ssa-loop-prefetch.c (compute_miss_rate): Rename to is_miss_rate_acceptable. Pull total_positions computation out of the loops. Early return if miss_positions exceeds the acceptable threshold. * tree-ssa-loop-prefetch.c (prune_ref_by_group_reuse): Call is_miss_rate_acceptable after renaming of compute_miss_rate. --- gcc/tree-ssa-loop-prefetch.c | 41 +++++++++++++++++++++-------------------- 1 files changed, 21 insertions(+), 20 deletions(-) diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index 27e2b42..f2c95ac 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -640,27 +640,29 @@ ddown (HOST_WIDE_INT x, unsigned HOST_WIDE_INT by) /* Given a CACHE_LINE_SIZE and two inductive memory references with a common STEP greater than CACHE_LINE_SIZE and an address difference DELTA, compute the probability that they will fall - in different cache lines. DISTINCT_ITERS is the number of - distinct iterations after which the pattern repeats itself. + in different cache lines. Return true if the computed miss rate + is not greater than the ACCEPTABLE_MISS_RATE. DISTINCT_ITERS is the + number of distinct iterations after which the pattern repeats itself. ALIGN_UNIT is the unit of alignment in bytes. */ -static int -compute_miss_rate (unsigned HOST_WIDE_INT cache_line_size, +static bool +is_miss_rate_acceptable (unsigned HOST_WIDE_INT cache_line_size, HOST_WIDE_INT step, HOST_WIDE_INT delta, unsigned HOST_WIDE_INT distinct_iters, int align_unit) { unsigned align, iter; - int total_positions, miss_positions, miss_rate; + int total_positions, miss_positions, max_allowed_miss_positions; int address1, address2, cache_line1, cache_line2; /* It always misses if delta is greater than or equal to the cache - line size. */ - if (delta >= cache_line_size) - return 1000; + line size. */ + if (delta >= (HOST_WIDE_INT) cache_line_size) + return false; - total_positions = 0; miss_positions = 0; + total_positions = (cache_line_size / align_unit) * distinct_iters; + max_allowed_miss_positions = (ACCEPTABLE_MISS_RATE * total_positions) / 1000; /* Iterate through all possible alignments of the first memory reference within its cache line. */ @@ -673,12 +675,14 @@ compute_miss_rate (unsigned HOST_WIDE_INT cache_line_size, address2 = address1 + delta; cache_line1 = address1 / cache_line_size; cache_line2 = address2 / cache_line_size; - total_positions += 1; if (cache_line1 != cache_line2) - miss_positions += 1; + { + miss_positions += 1; + if (miss_positions > max_allowed_miss_positions) + return false; + } } - miss_rate = 1000 * miss_positions / total_positions; - return miss_rate; + return true; } /* Prune the prefetch candidate REF using the reuse with BY. @@ -694,7 +698,6 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by, HOST_WIDE_INT delta = delta_b - delta_r; HOST_WIDE_INT hit_from; unsigned HOST_WIDE_INT prefetch_before, prefetch_block; - int miss_rate; HOST_WIDE_INT reduced_step; unsigned HOST_WIDE_INT reduced_prefetch_block; tree ref_type; @@ -793,9 +796,8 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by, delta %= step; ref_type = TREE_TYPE (ref->mem); align_unit = TYPE_ALIGN (ref_type) / 8; - miss_rate = compute_miss_rate(prefetch_block, step, delta, - reduced_prefetch_block, align_unit); - if (miss_rate <= ACCEPTABLE_MISS_RATE) + if (is_miss_rate_acceptable (prefetch_block, step, delta, + reduced_prefetch_block, align_unit)) { /* Do not reduce prefetch_before if we meet beyond cache size. */ if (prefetch_before > L2_CACHE_SIZE_BYTES / PREFETCH_BLOCK) @@ -809,9 +811,8 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by, /* Try also the following iteration. */ prefetch_before++; delta = step - delta; - miss_rate = compute_miss_rate(prefetch_block, step, delta, - reduced_prefetch_block, align_unit); - if (miss_rate <= ACCEPTABLE_MISS_RATE) + if (is_miss_rate_acceptable (prefetch_block, step, delta, + reduced_prefetch_block, align_unit)) { if (prefetch_before < ref->prefetch_before) ref->prefetch_before = prefetch_before; -- 1.6.3.3

[PR,44576] : Reduce the computation cost in compute_miss_rate for prefetching loop arrays

Commit Message

Comments

Patch