From patchwork Tue Oct 13 20:05:06 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Abe Skolnik X-Patchwork-Id: 529908 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id E6FDE1402D6 for ; Wed, 14 Oct 2015 07:05:21 +1100 (AEDT) Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b=aGLkJRi/; dkim-atps=neutral DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :message-id:date:from:mime-version:to:subject:content-type; q= dns; s=default; b=Q7egndSBOIvIrLwN8R6BfLFehqoDbQ8kJHn7qWxQI44AVU MhCMVCnD0Dwm3bPseqp5PYgdFFmctEOu0qn/D9D/3qFflK7Tx4HqEM5fQDxMCKXH ECsvyLR1NIU9/CGHIu09qFRBCoflSgX/NpOZ9CO6fHZUgQcJ9D0gwRBgjWBs4= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :message-id:date:from:mime-version:to:subject:content-type; s= default; bh=nuRtbbRyVy2FWo4bEIjeVrpkDfQ=; b=aGLkJRi/wH9dTSR1Xmf+ tHhyYcFpvPduivcP10e9xVIh09WInBRm3KDuruuWKYVLSA60GOXGOa2ktzfzUq7K s6BUUwdN6KIqHGuSm8+k0AGAGc9MFTD+f5+bdlfmeF0W85kjBv/H+ZqycBnhySs+ CHu5v4ARcNy2aTaISK2QoAk= Received: (qmail 87708 invoked by alias); 13 Oct 2015 20:05:12 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 87697 invoked by uid 89); 13 Oct 2015 20:05:10 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-1.3 required=5.0 tests=AWL, BAYES_00, FREEMAIL_FROM, RCVD_IN_DNSWL_LOW, SPF_PASS, T_RP_MATCHES_RCVD autolearn=ham version=3.3.2 X-HELO: nm10-vm5.bullet.mail.ne1.yahoo.com Received: from nm10-vm5.bullet.mail.ne1.yahoo.com (HELO nm10-vm5.bullet.mail.ne1.yahoo.com) (98.138.91.232) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-SHA encrypted) ESMTPS; Tue, 13 Oct 2015 20:05:09 +0000 Received: from [98.138.100.117] by nm10.bullet.mail.ne1.yahoo.com with NNFMP; 13 Oct 2015 20:05:07 -0000 Received: from [98.138.226.126] by tm108.bullet.mail.ne1.yahoo.com with NNFMP; 13 Oct 2015 20:05:07 -0000 Received: from [127.0.0.1] by smtp205.mail.ne1.yahoo.com with NNFMP; 13 Oct 2015 20:05:07 -0000 X-Yahoo-SMTP: RhyaqECswBCSKHdmagqyBBwGHjobejNv Message-ID: <561D63F2.2080705@yahoo.com> Date: Tue, 13 Oct 2015 15:05:06 -0500 From: Abe User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:31.0) Gecko/20100101 Thunderbird/31.4.0 MIME-Version: 1.0 To: "gcc-patches@gcc.gnu.org" , Sebastian Pop , Kyrill Tkachov , Bernd Schmidt Subject: using scratchpads to enhance RTL-level if-conversion: revised patch Attached please find my revised patch to enhance RTL-level if-conversion using scratchpads, newly optimizing only half-hammock stores for now. Bootstrapped and regression-tested on x86_64 GNU/Linux. Regards, Abe --- ifcvt.c 2015-09-01 12:54:38.234108158 -0500 +++ ifcvt.c 2015-10-13 14:22:27.935204461 -0500 @@ -56,6 +56,8 @@ #include "rtl-iter.h" #include "ifcvt.h" +#include + #ifndef MAX_CONDITIONAL_EXECUTE #define MAX_CONDITIONAL_EXECUTE \ (BRANCH_COST (optimize_function_for_speed_p (cfun), false) \ @@ -66,6 +68,9 @@ #define NULL_BLOCK ((basic_block) NULL) +/* An arbitrary inclusive maximum size (in bytes) for each scratchpad. */ +#define SCRATCHPAD_MAX_SIZE 128 + /* True if after combine pass. */ static bool ifcvt_after_combine; @@ -110,6 +115,8 @@ static int dead_or_predicable (basic_blo edge, int); static void noce_emit_move_insn (rtx, rtx); static rtx_insn *block_has_only_trap (basic_block); + +static auto_vec > scratchpads; /* Count the number of non-jump active insns in BB. */ @@ -2784,19 +2791,16 @@ noce_operand_ok (const_rtx op) return ! may_trap_p (op); } -/* Return true if a write into MEM may trap or fault. */ +/* Return true if a write into MEM may trap or fault + even in the presence of scratchpad support. */ static bool -noce_mem_write_may_trap_or_fault_p (const_rtx mem) +noce_mem_write_may_trap_or_fault_p_1 (const_rtx mem) { - rtx addr; - if (MEM_READONLY_P (mem)) return true; - if (may_trap_or_fault_p (mem)) - return true; - + rtx addr; addr = XEXP (mem, 0); /* Call target hook to avoid the effects of -fpic etc.... */ @@ -2837,6 +2841,18 @@ noce_mem_write_may_trap_or_fault_p (cons return false; } +/* Return true if a write into MEM may trap or fault + without scratchpad support. */ + +static bool +noce_mem_write_may_trap_or_fault_p (const_rtx mem) +{ + if (may_trap_or_fault_p (mem)) + return true; + + return noce_mem_write_may_trap_or_fault_p_1 (mem); +} + /* Return whether we can use store speculation for MEM. TOP_BB is the basic block above the conditional block where we are considering doing the speculative store. We look for whether MEM is set @@ -3156,17 +3172,116 @@ noce_process_if_block (struct noce_if_in if (!set_b && MEM_P (orig_x)) { - /* Disallow the "if (...) x = a;" form (implicit "else x = x;") - for optimizations if writing to x may trap or fault, - i.e. it's a memory other than a static var or a stack slot, - is misaligned on strict aligned machines or is read-only. If - x is a read-only memory, then the program is valid only if we - avoid the store into it. If there are stores on both the - THEN and ELSE arms, then we can go ahead with the conversion; - either the program is broken, or the condition is always - false such that the other memory is selected. */ + /* Disallow the "if (...) x = a;" form (with no "else") for optimizations + when x is misaligned on strict-alignment machines or is read-only. + If x is a memory other than a static var or a stack slot: for targets + _with_ conditional move and _without_ conditional execution, + convert using the scratchpad technique, otherwise don`t convert. + If x is a read-only memory, then the program is valid only if we avoid + the store into it. If there are stores on both the THEN and ELSE arms, + then we can go ahead with the conversion; either the program is broken, + or the condition is always false such that the other memory is selected. + The non-scratchpad-based conversion here has an implicit "else x = x;". */ if (noce_mem_write_may_trap_or_fault_p (orig_x)) - return FALSE; + { + if ( reload_completed + || optimize < 2 + || optimize_function_for_size_p (cfun) + || targetm.have_conditional_execution () + || !HAVE_conditional_move + || (CONSTANT_P (XEXP (cond, 0)) && CONSTANT_P (XEXP (cond, 1)))) + return FALSE; + + + if (noce_mem_write_may_trap_or_fault_p_1 (orig_x) + || !MEM_SIZE_KNOWN_P (orig_x)) + return FALSE; + + const size_t MEM_size = MEM_SIZE (orig_x); + if (MEM_size > SCRATCHPAD_MAX_SIZE) + return FALSE; + + rtx biggest_spad = 0; + unsigned int biggest_spad_size = 0; + if (size_t vec_len = scratchpads.length ()) + { + std::pair tmp_pair = scratchpads[vec_len - 1]; + biggest_spad = tmp_pair.first; + biggest_spad_size = tmp_pair.second; + } + if (MEM_size > biggest_spad_size) + { + biggest_spad_size = MEM_size; + biggest_spad = assign_stack_local (GET_MODE (orig_x), MEM_size, 0); + gcc_assert (biggest_spad); + scratchpads.safe_push (std::make_pair (biggest_spad, MEM_size)); + } + + gcc_assert (biggest_spad); + + rtx reg_for_store_addr = gen_reg_rtx (Pmode); + set_used_flags (reg_for_store_addr); + + start_sequence (); + + for (rtx_insn *insn = BB_HEAD (then_bb); + insn && insn != insn_a && insn != BB_END (then_bb); + insn = NEXT_INSN (insn)) + if (!(NOTE_INSN_BASIC_BLOCK_P (insn) || DEBUG_INSN_P (insn))) + duplicate_insn_chain (insn, insn); + + rtx target = noce_emit_cmove (if_info, + reg_for_store_addr, + GET_CODE (cond), + XEXP (cond, 0), + XEXP (cond, 1), + XEXP (orig_x, 0), + XEXP (biggest_spad, 0)); + + if (!target) + { + end_sequence (); + return FALSE; + } + if (target != reg_for_store_addr) + noce_emit_move_insn (reg_for_store_addr, target); + + rtx mem = gen_rtx_MEM (GET_MODE (orig_x), reg_for_store_addr); + MEM_NOTRAP_P (mem) = true; + MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_x); + + alias_set_type temp_alias_set = new_alias_set (); + if (MEM_ALIAS_SET (orig_x)) + record_alias_subset (MEM_ALIAS_SET (orig_x), temp_alias_set); + set_mem_alias_set (mem, temp_alias_set); + + set_mem_align (mem, MIN (MEM_ALIGN (biggest_spad), + MEM_ALIGN (orig_x))); + if (MEM_ADDR_SPACE (orig_x) != MEM_ADDR_SPACE (biggest_spad)) + { + end_sequence (); + return FALSE; + } + + set_used_flags (mem); + + noce_emit_move_insn (mem, a); + + rtx_insn *seq = end_ifcvt_sequence (if_info); + if (!seq) + return FALSE; + + unshare_all_rtl_in_chain (seq); + + /* Prevent the code right after "success:" + from throwing away the changes. */ + x = orig_x; + + emit_insn_before_setloc (seq, if_info->jump, + INSN_LOCATION (if_info->insn_a)); + goto success; + + } /* Avoid store speculation: given "if (...) x = a" where x is a MEM, we only want to do the store if x is always set @@ -4959,6 +5074,9 @@ if_convert (bool after_combine) basic_block bb; int pass; + /* Ensure that we start the scratchpads data fresh each time. */ + scratchpads.truncate (0); + if (optimize == 1) { df_live_add_problem ();