From patchwork Wed Oct 7 23:29:34 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Abe Skolnik X-Patchwork-Id: 527507 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id C362D140D5F for ; Thu, 8 Oct 2015 10:29:48 +1100 (AEDT) Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b=t+covuh/; dkim-atps=neutral DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :message-id:date:from:mime-version:to:subject:content-type; q= dns; s=default; b=posCFKBSdD/UFeDOAb8AiEe+ezqcC37TlQdvS8SchMSl8k uDAnx4UFj8B8eTuItT2/JlTycpLbfUUlvtbiDJ77TDuq6wFgIY9ronBVxpvolIyt Y/FQ+L1iF0UkeN/1srY4uOgd15SksmJLlcAGv7WGReyD3VQlFlGKt6i8YhC8Q= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :message-id:date:from:mime-version:to:subject:content-type; s= default; bh=Aseg9F1hc46K7jJqangaL4hmH2U=; b=t+covuh/5vAOipUu98ra pFwqK1mb8LknUWdSdHNlaDtKS7UrBJW4gpnjRoAse6z+zBxl8u2XkHRsbPFShZyE duXdrpyM4Rjhm+sF27FRjVXhMuswj75esK/reaHDzAF3WE0d/zALkjUvWQractkO l3iUg80KJf1fAYtlikJhh2c= Received: (qmail 54068 invoked by alias); 7 Oct 2015 23:29:41 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 54058 invoked by uid 89); 7 Oct 2015 23:29:40 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-0.3 required=5.0 tests=AWL, BAYES_20, FREEMAIL_FROM, RCVD_IN_DNSWL_LOW, SPF_PASS, T_RP_MATCHES_RCVD autolearn=ham version=3.3.2 X-HELO: nm12-vm2.bullet.mail.ne1.yahoo.com Received: from nm12-vm2.bullet.mail.ne1.yahoo.com (HELO nm12-vm2.bullet.mail.ne1.yahoo.com) (98.138.91.88) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-SHA encrypted) ESMTPS; Wed, 07 Oct 2015 23:29:39 +0000 Received: from [98.138.100.115] by nm12.bullet.mail.ne1.yahoo.com with NNFMP; 07 Oct 2015 23:29:36 -0000 Received: from [98.138.226.60] by tm106.bullet.mail.ne1.yahoo.com with NNFMP; 07 Oct 2015 23:29:36 -0000 Received: from [127.0.0.1] by smtp211.mail.ne1.yahoo.com with NNFMP; 07 Oct 2015 23:29:36 -0000 X-Yahoo-SMTP: RhyaqECswBCSKHdmagqyBBwGHjobejNv Message-ID: <5615AADE.4030306@yahoo.com> Date: Wed, 07 Oct 2015 18:29:34 -0500 From: Abe User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:31.0) Gecko/20100101 Thunderbird/31.4.0 MIME-Version: 1.0 To: "gcc-patches@gcc.gnu.org" , Sebastian Pop , Bernd Schmidt , Kyrill Tkachov Subject: using scratchpads to enhance RTL-level if-conversion: revised patch Dear all, Attached please find my revised patch to the RTL if converter. This patch enables the if-conversion of half-hammocks with a store in them that the internal GCC machinery otherwise considers too hazardous to if-convert. This is made safe by using the "scratchpad" technique, i.e. throwing away the store into a safe location where nothing of any importance is currently stored. The scratchpads are allocated in the stack frame. Here is an example of code which is newly converted with this patch, at least when targeting AArch64: int A[10]; void half_hammock() { if (A[0]) A[1] = 2; } Both tested against trunk and bootstrapped OK with defaults* on AMD64-AKA-"x86_64" GNU/Linux. '*': [except for "--prefix"] I`m sending the patch as an attachment to avoid it being corrupted/reformatted by any e-mail troubles. I look forward to your feedback. Regards, Abe --- ifcvt.c 2015-09-01 12:54:38.234108158 -0500 +++ ifcvt.c 2015-10-07 11:14:11.606439645 -0500 @@ -47,6 +47,7 @@ #include "insn-codes.h" #include "optabs.h" #include "diagnostic-core.h" +#include "diagnostic-color.h" #include "tm_p.h" #include "cfgloop.h" #include "target.h" @@ -56,6 +57,8 @@ #include "rtl-iter.h" #include "ifcvt.h" +#include + #ifndef MAX_CONDITIONAL_EXECUTE #define MAX_CONDITIONAL_EXECUTE \ (BRANCH_COST (optimize_function_for_speed_p (cfun), false) \ @@ -66,6 +69,9 @@ #define NULL_BLOCK ((basic_block) NULL) +/* An arbitrary inclusive maximum size (in bytes) for each scratchpad. */ +#define SCRATCHPAD_MAX_SIZE 128 + /* True if after combine pass. */ static bool ifcvt_after_combine; @@ -110,6 +116,8 @@ static int dead_or_predicable (basic_blo edge, int); static void noce_emit_move_insn (rtx, rtx); static rtx_insn *block_has_only_trap (basic_block); + +static auto_vec > scratchpads; /* Count the number of non-jump active insns in BB. */ @@ -2784,19 +2792,16 @@ noce_operand_ok (const_rtx op) return ! may_trap_p (op); } -/* Return true if a write into MEM may trap or fault. */ +/* Return true if a write into MEM may trap or fault + even in the presence of scratchpad support. */ static bool -noce_mem_write_may_trap_or_fault_p (const_rtx mem) +noce_mem_write_may_trap_or_fault_p_1 (const_rtx mem) { - rtx addr; - if (MEM_READONLY_P (mem)) return true; - if (may_trap_or_fault_p (mem)) - return true; - + rtx addr; addr = XEXP (mem, 0); /* Call target hook to avoid the effects of -fpic etc.... */ @@ -2837,6 +2842,18 @@ noce_mem_write_may_trap_or_fault_p (cons return false; } +/* Return true if a write into MEM may trap or fault + without scratchpad support. */ + +static bool +noce_mem_write_may_trap_or_fault_p (const_rtx mem) +{ + if (may_trap_or_fault_p (mem)) + return true; + + return noce_mem_write_may_trap_or_fault_p_1 (mem); +} + /* Return whether we can use store speculation for MEM. TOP_BB is the basic block above the conditional block where we are considering doing the speculative store. We look for whether MEM is set @@ -3156,17 +3173,149 @@ noce_process_if_block (struct noce_if_in if (!set_b && MEM_P (orig_x)) { - /* Disallow the "if (...) x = a;" form (implicit "else x = x;") - for optimizations if writing to x may trap or fault, - i.e. it's a memory other than a static var or a stack slot, - is misaligned on strict aligned machines or is read-only. If - x is a read-only memory, then the program is valid only if we - avoid the store into it. If there are stores on both the - THEN and ELSE arms, then we can go ahead with the conversion; - either the program is broken, or the condition is always - false such that the other memory is selected. */ + /* Disallow the "if (...) x = a;" form (with no "else") for optimizations + when x is misaligned on strict-alignment machines or is read-only. + If x is a memory other than a static var or a stack slot: for targets + _with_ conditional move and _without_ conditional execution, + convert using the scratchpad technique, otherwise don`t convert. + If x is a read-only memory, then the program is valid only if we avoid + the store into it. If there are stores on both the THEN and ELSE arms, + then we can go ahead with the conversion; either the program is broken, + or the condition is always false such that the other memory is selected. + The non-scratchpad-based conversion here has an implicit "else x = x;". */ if (noce_mem_write_may_trap_or_fault_p (orig_x)) - return FALSE; + { + /* The next "if": quoting "noce_emit_cmove": + If we can't create new pseudos, though, don't bother. */ + if (reload_completed) + return FALSE; + + if (optimize<2) + return FALSE; + + if (optimize_function_for_size_p (cfun)) + return FALSE; + + if (targetm.have_conditional_execution () || ! HAVE_conditional_move) + return FALSE; + + const bool not_a_scratchpad_candidate = + noce_mem_write_may_trap_or_fault_p_1 (orig_x); + + if (! not_a_scratchpad_candidate) + { + if (MEM_SIZE_KNOWN_P (orig_x)) + { + const size_t size_of_MEM = MEM_SIZE (orig_x); + + if (size_of_MEM <= SCRATCHPAD_MAX_SIZE) + { + rtx biggest_scratchpad = 0; + unsigned int biggest_scratchpad_size = 0; + if (size_t vec_len = scratchpads.length ()) + { + std::pair tmp_pair = scratchpads[vec_len-1]; + biggest_scratchpad = tmp_pair.first; + biggest_scratchpad_size = tmp_pair.second; + } + if (size_of_MEM > biggest_scratchpad_size) + { + biggest_scratchpad_size = size_of_MEM; + biggest_scratchpad = assign_stack_local + (GET_MODE (orig_x), size_of_MEM, 0); + gcc_assert (biggest_scratchpad); + scratchpads.safe_push (std::make_pair (biggest_scratchpad, + size_of_MEM)); + } + + gcc_assert (biggest_scratchpad); + + rtx reg_for_store_addr = gen_reg_rtx (Pmode); + set_used_flags (reg_for_store_addr); + + start_sequence (); + + /* We must copy the insns between the start of the THEN block + and the set of 'a', if they exist, since they may be needed + for the converted code as well, but we must not copy a + start-of-BB note if one is present, nor debug "insn"s. */ + + for (rtx_insn* insn = BB_HEAD (then_bb); insn && insn != insn_a + && insn != BB_END (then_bb); insn=NEXT_INSN (insn)) + { + if (! (NOTE_INSN_BASIC_BLOCK_P (insn) || DEBUG_INSN_P (insn))) + duplicate_insn_chain (insn, insn); + /* A return of 0 from "duplicate_insn_chain" is _not_ + a failure; it just returns the "NEXT_INSN" of the + last insn it duplicated. */ + } + + /* Done copying the needed insns between the start of the + THEN block and the set of 'a', if any. */ + + if (CONSTANT_P (XEXP (cond, 0)) && CONSTANT_P (XEXP (cond, 1))) + { + end_sequence (); + return FALSE; + } + + rtx target = noce_emit_cmove (if_info, + reg_for_store_addr, + GET_CODE (cond), + XEXP (cond, 0), + XEXP (cond, 1), + XEXP (orig_x, 0), + XEXP (biggest_scratchpad, 0)); + + if (!target) + { + end_sequence (); + return FALSE; + } + if (target != reg_for_store_addr) + noce_emit_move_insn (reg_for_store_addr, target); + + rtx mem = gen_rtx_MEM (GET_MODE (orig_x), reg_for_store_addr); + MEM_NOTRAP_P (mem) = true; + MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_x); + + alias_set_type temp_alias_set = new_alias_set (); + if (MEM_ALIAS_SET (orig_x)) + record_alias_subset (MEM_ALIAS_SET (orig_x), temp_alias_set); + set_mem_alias_set (mem, temp_alias_set); + + set_mem_align (mem, + MIN (MEM_ALIGN (biggest_scratchpad), MEM_ALIGN (orig_x))); + if (MEM_ADDR_SPACE (orig_x) + != MEM_ADDR_SPACE (biggest_scratchpad)) + { + end_sequence (); + return FALSE; + } + + set_used_flags (mem); + + noce_emit_move_insn (mem, a); + + rtx_insn *seq = end_ifcvt_sequence (if_info); + if (!seq) + return FALSE; + + unshare_all_rtl_in_chain (seq); + + /* Prevent the code right after "success:" + from throwing away the changes. */ + x = orig_x; + + emit_insn_before_setloc (seq, if_info->jump, + INSN_LOCATION (if_info->insn_a)); + goto success; + + } + } + } + return FALSE; + } /* Avoid store speculation: given "if (...) x = a" where x is a MEM, we only want to do the store if x is always set @@ -4959,6 +5108,9 @@ if_convert (bool after_combine) basic_block bb; int pass; + /* Ensure that we start the scratchpads data fresh each time. */ + scratchpads.truncate (0); + if (optimize == 1) { df_live_add_problem ();