using scratchpads to enhance RTL-level if-conversion: revised patch

Message ID	5615AADE.4030306@yahoo.com
State	New
Headers	show Return-Path: <gcc-patches-return-409544-incoming=patchwork.ozlabs.org@gcc.gnu.org> DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :message-id:date:from:mime-version:to:subject:content-type; q= dns; s=default; b=posCFKBSdD/UFeDOAb8AiEe+ezqcC37TlQdvS8SchMSl8k uDAnx4UFj8B8eTuItT2/JlTycpLbfUUlvtbiDJ77TDuq6wFgIY9ronBVxpvolIyt Y/FQ+L1iF0UkeN/1srY4uOgd15SksmJLlcAGv7WGReyD3VQlFlGKt6i8YhC8Q= Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org Message-ID: <5615AADE.4030306@yahoo.com> Date: Wed, 07 Oct 2015 18:29:34 -0500 From: Abe <abe_skolnik@yahoo.com> User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:31.0) Gecko/20100101 Thunderbird/31.4.0 MIME-Version: 1.0 To: "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>, Sebastian Pop <sebpop@gmail.com>, Bernd Schmidt <bschmidt@redhat.com>, Kyrill Tkachov <kyrylo.tkachov@arm.com> Subject: using scratchpads to enhance RTL-level if-conversion: revised patch Content-Type: multipart/mixed; boundary="------------060401080107050802030705"

--- ifcvt.c 2015-09-01 12:54:38.234108158 -0500 +++ ifcvt.c 2015-10-07 11:14:11.606439645 -0500 @@ -47,6 +47,7 @@ #include "insn-codes.h" #include "optabs.h" #include "diagnostic-core.h" +#include "diagnostic-color.h" #include "tm_p.h" #include "cfgloop.h" #include "target.h" @@ -56,6 +57,8 @@ #include "rtl-iter.h" #include "ifcvt.h" +#include <utility> + #ifndef MAX_CONDITIONAL_EXECUTE #define MAX_CONDITIONAL_EXECUTE \ (BRANCH_COST (optimize_function_for_speed_p (cfun), false) \ @@ -66,6 +69,9 @@ #define NULL_BLOCK ((basic_block) NULL) +/* An arbitrary inclusive maximum size (in bytes) for each scratchpad. */ +#define SCRATCHPAD_MAX_SIZE 128 + /* True if after combine pass. */ static bool ifcvt_after_combine; @@ -110,6 +116,8 @@ static int dead_or_predicable (basic_blo edge, int); static void noce_emit_move_insn (rtx, rtx); static rtx_insn *block_has_only_trap (basic_block); + +static auto_vec<std::pair<rtx, unsigned int> > scratchpads; /* Count the number of non-jump active insns in BB. */ @@ -2784,19 +2792,16 @@ noce_operand_ok (const_rtx op) return ! may_trap_p (op); } -/* Return true if a write into MEM may trap or fault. */ +/* Return true if a write into MEM may trap or fault + even in the presence of scratchpad support. */ static bool -noce_mem_write_may_trap_or_fault_p (const_rtx mem) +noce_mem_write_may_trap_or_fault_p_1 (const_rtx mem) { - rtx addr; - if (MEM_READONLY_P (mem)) return true; - if (may_trap_or_fault_p (mem)) - return true; - + rtx addr; addr = XEXP (mem, 0); /* Call target hook to avoid the effects of -fpic etc.... */ @@ -2837,6 +2842,18 @@ noce_mem_write_may_trap_or_fault_p (cons return false; } +/* Return true if a write into MEM may trap or fault + without scratchpad support. */ + +static bool +noce_mem_write_may_trap_or_fault_p (const_rtx mem) +{ + if (may_trap_or_fault_p (mem)) + return true; + + return noce_mem_write_may_trap_or_fault_p_1 (mem); +} + /* Return whether we can use store speculation for MEM. TOP_BB is the basic block above the conditional block where we are considering doing the speculative store. We look for whether MEM is set @@ -3156,17 +3173,149 @@ noce_process_if_block (struct noce_if_in if (!set_b && MEM_P (orig_x)) { - /* Disallow the "if (...) x = a;" form (implicit "else x = x;") - for optimizations if writing to x may trap or fault, - i.e. it's a memory other than a static var or a stack slot, - is misaligned on strict aligned machines or is read-only. If - x is a read-only memory, then the program is valid only if we - avoid the store into it. If there are stores on both the - THEN and ELSE arms, then we can go ahead with the conversion; - either the program is broken, or the condition is always - false such that the other memory is selected. */ + /* Disallow the "if (...) x = a;" form (with no "else") for optimizations + when x is misaligned on strict-alignment machines or is read-only. + If x is a memory other than a static var or a stack slot: for targets + _with_ conditional move and _without_ conditional execution, + convert using the scratchpad technique, otherwise don`t convert. + If x is a read-only memory, then the program is valid only if we avoid + the store into it. If there are stores on both the THEN and ELSE arms, + then we can go ahead with the conversion; either the program is broken, + or the condition is always false such that the other memory is selected. + The non-scratchpad-based conversion here has an implicit "else x = x;". */ if (noce_mem_write_may_trap_or_fault_p (orig_x)) - return FALSE; + { + /* The next "if": quoting "noce_emit_cmove": + If we can't create new pseudos, though, don't bother. */ + if (reload_completed) + return FALSE; + + if (optimize<2) + return FALSE; + + if (optimize_function_for_size_p (cfun)) + return FALSE; + + if (targetm.have_conditional_execution () || ! HAVE_conditional_move) + return FALSE; + + const bool not_a_scratchpad_candidate = + noce_mem_write_may_trap_or_fault_p_1 (orig_x); + + if (! not_a_scratchpad_candidate) + { + if (MEM_SIZE_KNOWN_P (orig_x)) + { + const size_t size_of_MEM = MEM_SIZE (orig_x); + + if (size_of_MEM <= SCRATCHPAD_MAX_SIZE) + { + rtx biggest_scratchpad = 0; + unsigned int biggest_scratchpad_size = 0; + if (size_t vec_len = scratchpads.length ()) + { + std::pair<rtx, unsigned> tmp_pair = scratchpads[vec_len-1]; + biggest_scratchpad = tmp_pair.first; + biggest_scratchpad_size = tmp_pair.second; + } + if (size_of_MEM > biggest_scratchpad_size) + { + biggest_scratchpad_size = size_of_MEM; + biggest_scratchpad = assign_stack_local + (GET_MODE (orig_x), size_of_MEM, 0); + gcc_assert (biggest_scratchpad); + scratchpads.safe_push (std::make_pair (biggest_scratchpad, + size_of_MEM)); + } + + gcc_assert (biggest_scratchpad); + + rtx reg_for_store_addr = gen_reg_rtx (Pmode); + set_used_flags (reg_for_store_addr); + + start_sequence (); + + /* We must copy the insns between the start of the THEN block + and the set of 'a', if they exist, since they may be needed + for the converted code as well, but we must not copy a + start-of-BB note if one is present, nor debug "insn"s. */ + + for (rtx_insn* insn = BB_HEAD (then_bb); insn && insn != insn_a + && insn != BB_END (then_bb); insn=NEXT_INSN (insn)) + { + if (! (NOTE_INSN_BASIC_BLOCK_P (insn) || DEBUG_INSN_P (insn))) + duplicate_insn_chain (insn, insn); + /* A return of 0 from "duplicate_insn_chain" is _not_ + a failure; it just returns the "NEXT_INSN" of the + last insn it duplicated. */ + } + + /* Done copying the needed insns between the start of the + THEN block and the set of 'a', if any. */ + + if (CONSTANT_P (XEXP (cond, 0)) && CONSTANT_P (XEXP (cond, 1))) + { + end_sequence (); + return FALSE; + } + + rtx target = noce_emit_cmove (if_info, + reg_for_store_addr, + GET_CODE (cond), + XEXP (cond, 0), + XEXP (cond, 1), + XEXP (orig_x, 0), + XEXP (biggest_scratchpad, 0)); + + if (!target) + { + end_sequence (); + return FALSE; + } + if (target != reg_for_store_addr) + noce_emit_move_insn (reg_for_store_addr, target); + + rtx mem = gen_rtx_MEM (GET_MODE (orig_x), reg_for_store_addr); + MEM_NOTRAP_P (mem) = true; + MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_x); + + alias_set_type temp_alias_set = new_alias_set (); + if (MEM_ALIAS_SET (orig_x)) + record_alias_subset (MEM_ALIAS_SET (orig_x), temp_alias_set); + set_mem_alias_set (mem, temp_alias_set); + + set_mem_align (mem, + MIN (MEM_ALIGN (biggest_scratchpad), MEM_ALIGN (orig_x))); + if (MEM_ADDR_SPACE (orig_x) + != MEM_ADDR_SPACE (biggest_scratchpad)) + { + end_sequence (); + return FALSE; + } + + set_used_flags (mem); + + noce_emit_move_insn (mem, a); + + rtx_insn *seq = end_ifcvt_sequence (if_info); + if (!seq) + return FALSE; + + unshare_all_rtl_in_chain (seq); + + /* Prevent the code right after "success:" + from throwing away the changes. */ + x = orig_x; + + emit_insn_before_setloc (seq, if_info->jump, + INSN_LOCATION (if_info->insn_a)); + goto success; + + } + } + } + return FALSE; + } /* Avoid store speculation: given "if (...) x = a" where x is a MEM, we only want to do the store if x is always set @@ -4959,6 +5108,9 @@ if_convert (bool after_combine) basic_block bb; int pass; + /* Ensure that we start the scratchpads data fresh each time. */ + scratchpads.truncate (0); + if (optimize == 1) { df_live_add_problem ();

using scratchpads to enhance RTL-level if-conversion: revised patch

Commit Message

Comments

Patch