From patchwork Mon Oct 28 20:57:17 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andi Kleen X-Patchwork-Id: 2003506 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@legolas.ozlabs.org Authentication-Results: legolas.ozlabs.org; dkim=pass (2048-bit key; unprotected) header.d=intel.com header.i=@intel.com header.a=rsa-sha256 header.s=Intel header.b=f3ciMG7l; dkim-atps=neutral Authentication-Results: legolas.ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=gcc.gnu.org (client-ip=8.43.85.97; helo=server2.sourceware.org; envelope-from=gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=patchwork.ozlabs.org) Received: from server2.sourceware.org (server2.sourceware.org [8.43.85.97]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature ECDSA (secp384r1) server-digest SHA384) (No client certificate requested) by legolas.ozlabs.org (Postfix) with ESMTPS id 4Xcm1f5kBVz1xwF for ; Tue, 29 Oct 2024 07:57:57 +1100 (AEDT) Received: from server2.sourceware.org (localhost [IPv6:::1]) by sourceware.org (Postfix) with ESMTP id E26B73858429 for ; Mon, 28 Oct 2024 20:57:54 +0000 (GMT) X-Original-To: gcc-patches@gcc.gnu.org Delivered-To: gcc-patches@gcc.gnu.org Received: from mgamail.intel.com (mgamail.intel.com [192.198.163.14]) by sourceware.org (Postfix) with ESMTPS id 4DE0D3858D26; Mon, 28 Oct 2024 20:57:26 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 4DE0D3858D26 Authentication-Results: sourceware.org; dmarc=none (p=none dis=none) header.from=linux.intel.com Authentication-Results: sourceware.org; spf=none smtp.mailfrom=linux.intel.com ARC-Filter: OpenARC Filter v1.0.0 sourceware.org 4DE0D3858D26 Authentication-Results: server2.sourceware.org; arc=none smtp.remote-ip=192.198.163.14 ARC-Seal: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1730149048; cv=none; b=sDmhnDAom6HZdkx3Sg1hlBG7ppd3tliH2H/kmVPv2LirPxLiTin5b0IHF0VOr0SJtGhFLd3WslKQr2b8Sa3xDN7wmeD0pJpjf/5yEe7r0Zn4AbUF1fQutn1JoYl2KaJUQ7MutGnox02NUXUJmtDcWLzaOekZaD51vmZJ7WNqhes= ARC-Message-Signature: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1730149048; c=relaxed/simple; bh=79ETvewIrjpJ2Bskr3jhAYe9akZ2g5k8IjROAsCSpm4=; h=DKIM-Signature:From:To:Subject:Date:Message-ID:MIME-Version; b=Utn/0p16XC032UjCr1sLUL5UtroBapPlk4eQ1omOtrmmMvkVSkvD3VsMtSuRsARscHGKsCQHshQJr9lAgTMvLCJe1N/3G7r1MSmgFcmXbrF1J1ffDE7LZUES/ilBUcX7hpKtJPnQVvgZOixwHqpJhUClf3qDqqBsmLIdG0gjz6E= ARC-Authentication-Results: i=1; server2.sourceware.org DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1730149046; x=1761685046; h=from:to:cc:subject:date:message-id:mime-version: content-transfer-encoding; bh=79ETvewIrjpJ2Bskr3jhAYe9akZ2g5k8IjROAsCSpm4=; b=f3ciMG7l7NQJm3StG0CXpDQLd8aAIO/psFua9sfEBHTohOyMDn9doY5p szR54O2LBXjizSAoMO7SWy7696lRffAx7mPGBYPaiziRsHC3vX2t8uUpS PJQ/RS2CcRnD3OpI3WukXhBApaGq1emasACUADBaDq4pBchqz2JuX8vN8 hyZaR2OCTkbgR3TWzOf1idxRSbvC4Bw4gi/U4+YDUXZTR7gA1K8udznvo +1XG/G3Cfn4HHFavfMNIOON/nJY/znn1H2COCGeKGCzuN6Y3F8ybIDlGN 0uOb00/Di0tHteiag6Sat80Tz6mJKKpA+wwK/aEihzPQAio4ooahzdre+ A==; X-CSE-ConnectionGUID: 9Kd4ul+UQra0/oVQvQjS4Q== X-CSE-MsgGUID: KCCQsyTETmifP5lR5UEIlQ== X-IronPort-AV: E=McAfee;i="6700,10204,11239"; a="29975951" X-IronPort-AV: E=Sophos;i="6.11,240,1725346800"; d="scan'208";a="29975951" Received: from orviesa008.jf.intel.com ([10.64.159.148]) by fmvoesa108.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 28 Oct 2024 13:57:25 -0700 X-CSE-ConnectionGUID: 5JcSvAiwSHGzU1hc2pY9CA== X-CSE-MsgGUID: RHYJIbPwQWCwLY+HsLhsHw== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="6.11,240,1725346800"; d="scan'208";a="82555730" Received: from tassilo.jf.intel.com ([10.54.38.190]) by orviesa008-auth.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 28 Oct 2024 13:57:25 -0700 From: Andi Kleen To: gcc-patches@gcc.gnu.org Cc: Andi Kleen Subject: [PATCH v2 1/3] Disable -fbit-tests and -fjump-tables at -O0 Date: Mon, 28 Oct 2024 13:57:17 -0700 Message-ID: <20241028205719.685557-1-ak@linux.intel.com> X-Mailer: git-send-email 2.46.2 MIME-Version: 1.0 X-Spam-Status: No, score=-10.9 required=5.0 tests=BAYES_00, DKIMWL_WL_HIGH, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_EF, GIT_PATCH_0, KAM_NUMSUBJECT, SPF_HELO_NONE, SPF_NONE, TXREP autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org X-BeenThere: gcc-patches@gcc.gnu.org X-Mailman-Version: 2.1.30 Precedence: list List-Id: Gcc-patches mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org From: Andi Kleen gcc/ChangeLog: * common.opt: Enable -fbit-tests and -fjump-tables only at -O1. * opts.cc (default_options_table): Dito. --- gcc/common.opt | 4 ++-- gcc/opts.cc | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/gcc/common.opt b/gcc/common.opt index 12b25ff486de..70a22cdc71a4 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -2189,11 +2189,11 @@ Common Var(flag_ivopts) Init(1) Optimization Optimize induction variables on trees. fjump-tables -Common Var(flag_jump_tables) Init(1) Optimization +Common Var(flag_jump_tables) Init(0) Optimization Use jump tables for sufficiently large switch statements. fbit-tests -Common Var(flag_bit_tests) Init(1) Optimization +Common Var(flag_bit_tests) Init(0) Optimization Use bit tests for sufficiently large switch statements. fkeep-inline-functions diff --git a/gcc/opts.cc b/gcc/opts.cc index acd53befdbfc..7adc495a7c2a 100644 --- a/gcc/opts.cc +++ b/gcc/opts.cc @@ -610,6 +610,7 @@ static const struct default_options default_options_table[] = { OPT_LEVELS_1_PLUS, OPT_fvar_tracking, NULL, 1 }, /* -O1 (and not -Og) optimizations. */ + { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fbit_tests, NULL, 1 }, { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fbranch_count_reg, NULL, 1 }, #if DELAY_SLOTS { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fdelayed_branch, NULL, 1 }, @@ -618,6 +619,7 @@ static const struct default_options default_options_table[] = { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fif_conversion, NULL, 1 }, { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fif_conversion2, NULL, 1 }, { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_finline_functions_called_once, NULL, 1 }, + { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fjump_tables, NULL, 1 }, { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fmove_loop_invariants, NULL, 1 }, { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fmove_loop_stores, NULL, 1 }, { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fssa_phiopt, NULL, 1 }, From patchwork Mon Oct 28 20:57:18 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andi Kleen X-Patchwork-Id: 2003508 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@legolas.ozlabs.org Authentication-Results: legolas.ozlabs.org; dkim=pass (2048-bit key; unprotected) header.d=intel.com header.i=@intel.com header.a=rsa-sha256 header.s=Intel header.b=XJCrXPSy; dkim-atps=neutral Authentication-Results: legolas.ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=gcc.gnu.org (client-ip=8.43.85.97; helo=server2.sourceware.org; envelope-from=gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=patchwork.ozlabs.org) Received: from server2.sourceware.org (server2.sourceware.org [8.43.85.97]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature ECDSA (secp384r1) server-digest SHA384) (No client certificate requested) by legolas.ozlabs.org (Postfix) with ESMTPS id 4Xcm1s2WFCz1xwF for ; Tue, 29 Oct 2024 07:58:09 +1100 (AEDT) Received: from server2.sourceware.org (localhost [IPv6:::1]) by sourceware.org (Postfix) with ESMTP id 91A1D385841D for ; Mon, 28 Oct 2024 20:58:07 +0000 (GMT) X-Original-To: gcc-patches@gcc.gnu.org Delivered-To: gcc-patches@gcc.gnu.org Received: from mgamail.intel.com (mgamail.intel.com [192.198.163.14]) by sourceware.org (Postfix) with ESMTPS id A04B53858C98; Mon, 28 Oct 2024 20:57:28 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org A04B53858C98 Authentication-Results: sourceware.org; dmarc=none (p=none dis=none) header.from=linux.intel.com Authentication-Results: sourceware.org; spf=none smtp.mailfrom=linux.intel.com ARC-Filter: OpenARC Filter v1.0.0 sourceware.org A04B53858C98 Authentication-Results: server2.sourceware.org; arc=none smtp.remote-ip=192.198.163.14 ARC-Seal: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1730149053; cv=none; b=LKe1NGdzjrQsCfNLRbsfZ+wNOgFmQlYoGP/DOcW3YRj/w67izkIUFbc9u72OXp7ZzB9K4I/w646FAjE1VzYlt5dQFtdhAHTbIPv2n+EhG6KxrVqHILpewYnxlLeZQlkPsHek0u9ogIKC01sx+5lMzPUz708n2ntCO26yqPIxcb4= ARC-Message-Signature: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1730149053; c=relaxed/simple; bh=YQTTGu+7VQg5Vf2icbchShBs5iQ9bZhlDJ0hLnnozzs=; h=DKIM-Signature:From:To:Subject:Date:Message-ID:MIME-Version; b=MkdSYr4zCydoEVOQSd3uxPmV6qG5UeUOwWaDXC9qxj0y6I0fKoOtAyKXAGjjg4aht8enk28/sDMBHxnEColM6VZriWGvG6SSGNK+G7+pMEjp2Xk03ICBNbGKnotDkWaf/sybBZiBVyexS9zHQ+IGpCvCcYJ6SScKtgqvDsg2zzE= ARC-Authentication-Results: i=1; server2.sourceware.org DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1730149049; x=1761685049; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=YQTTGu+7VQg5Vf2icbchShBs5iQ9bZhlDJ0hLnnozzs=; b=XJCrXPSywjkf2BEe2zBYWKFyoHfZ8SbM3Sn06zpGpp38A3aiK2SmLa5z p7wvtUUImlmRkIxDJ1sNexIF6jL80r1bC8gnF1mJbnp/ilD+d8jk7EFjG 2YtBRB6KcYpddy6td1Nd8Via+TN6jNX7lG0IzjbmXGk9dr0ltHqwmFxpE PrFMKbMN05ozggWK7Z21vhpilCRvSvA2CToR7KfmBJKbCgdlbbYbFoUiA zN9AimdiEbEoH5eYyeEjOEAzpp7h4522KB8CGKXfehx1nUBja7FdUdenv p//fecJcVFMBlrzD3SX3wfMvp1wdAcCUtXzOqapYummvRtSe9gnE4LKun g==; X-CSE-ConnectionGUID: spE6qfntTPW/xw00bhj5fQ== X-CSE-MsgGUID: zZs5UxcbR3e1npfR8puStg== X-IronPort-AV: E=McAfee;i="6700,10204,11239"; a="29975952" X-IronPort-AV: E=Sophos;i="6.11,240,1725346800"; d="scan'208";a="29975952" Received: from orviesa008.jf.intel.com ([10.64.159.148]) by fmvoesa108.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 28 Oct 2024 13:57:25 -0700 X-CSE-ConnectionGUID: JChEt53AT+26XmkSIO6Wmw== X-CSE-MsgGUID: oWBH9haCTNSCUsp9VN+PfA== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="6.11,240,1725346800"; d="scan'208";a="82555731" Received: from tassilo.jf.intel.com ([10.54.38.190]) by orviesa008-auth.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 28 Oct 2024 13:57:25 -0700 From: Andi Kleen To: gcc-patches@gcc.gnu.org Cc: Andi Kleen Subject: [PATCH v2 2/3] Only do switch bit test clustering when multiple labels point to same bb Date: Mon, 28 Oct 2024 13:57:18 -0700 Message-ID: <20241028205719.685557-2-ak@linux.intel.com> X-Mailer: git-send-email 2.46.2 In-Reply-To: <20241028205719.685557-1-ak@linux.intel.com> References: <20241028205719.685557-1-ak@linux.intel.com> MIME-Version: 1.0 X-Spam-Status: No, score=-11.2 required=5.0 tests=BAYES_00, DKIMWL_WL_HIGH, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_EF, GIT_PATCH_0, SPF_HELO_NONE, SPF_NONE, TXREP autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org X-BeenThere: gcc-patches@gcc.gnu.org X-Mailman-Version: 2.1.30 Precedence: list List-Id: Gcc-patches mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org From: Andi Kleen The bit cluster code generation strategy is only beneficial when multiple case labels point to the same code. Do a quick check if that is the case before trying to cluster. This fixes the switch part of PR117091 where all case labels are unique however it doesn't address the performance problems for non unique cases. gcc/ChangeLog: PR middle-end/117091 * gimple-if-to-switch.cc (if_chain::is_beneficial): Update find_bit_test call. * tree-switch-conversion.cc (bit_test_cluster::find_bit_tests): Get max_c argument and bail out early if all case labels are unique. (switch_decision_tree::compute_cases_per_edge): Record number of targets per label and return. (switch_decision_tree::analyze_switch_statement): ... pass to find_bit_tests. * tree-switch-conversion.h: Update prototypes. --- gcc/gimple-if-to-switch.cc | 2 +- gcc/tree-switch-conversion.cc | 23 ++++++++++++++++------- gcc/tree-switch-conversion.h | 5 +++-- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/gcc/gimple-if-to-switch.cc b/gcc/gimple-if-to-switch.cc index 96ce1c380a59..4151d1bb520e 100644 --- a/gcc/gimple-if-to-switch.cc +++ b/gcc/gimple-if-to-switch.cc @@ -254,7 +254,7 @@ if_chain::is_beneficial () else output.release (); - output = bit_test_cluster::find_bit_tests (filtered_clusters); + output = bit_test_cluster::find_bit_tests (filtered_clusters, 2); r = output.length () < filtered_clusters.length (); if (r) dump_clusters (&output, "BT can be built"); diff --git a/gcc/tree-switch-conversion.cc b/gcc/tree-switch-conversion.cc index 00426d400006..3436c2a8b98c 100644 --- a/gcc/tree-switch-conversion.cc +++ b/gcc/tree-switch-conversion.cc @@ -1772,12 +1772,13 @@ jump_table_cluster::is_beneficial (const vec &, } /* Find bit tests of given CLUSTERS, where all members of the vector - are of type simple_cluster. New clusters are returned. */ + are of type simple_cluster. MAX_C is the approx max number of cases per + label. New clusters are returned. */ vec -bit_test_cluster::find_bit_tests (vec &clusters) +bit_test_cluster::find_bit_tests (vec &clusters, int max_c) { - if (!is_enabled ()) + if (!is_enabled () || max_c == 1) return clusters.copy (); unsigned l = clusters.length (); @@ -2206,18 +2207,26 @@ bit_test_cluster::hoist_edge_and_branch_if_true (gimple_stmt_iterator *gsip, } /* Compute the number of case labels that correspond to each outgoing edge of - switch statement. Record this information in the aux field of the edge. */ + switch statement. Record this information in the aux field of the edge. + Return the approx max number of cases per edge. */ -void +int switch_decision_tree::compute_cases_per_edge () { + int max_c = 0; reset_out_edges_aux (m_switch); int ncases = gimple_switch_num_labels (m_switch); for (int i = ncases - 1; i >= 1; --i) { edge case_edge = gimple_switch_edge (cfun, m_switch, i); case_edge->aux = (void *) ((intptr_t) (case_edge->aux) + 1); + /* For a range case add one extra. That's enough for the bit + cluster heuristic. */ + if ((intptr_t)case_edge->aux > max_c) + max_c = (intptr_t)case_edge->aux + + !!CASE_HIGH (gimple_switch_label (m_switch, i)); } + return max_c; } /* Analyze switch statement and return true when the statement is expanded @@ -2235,7 +2244,7 @@ switch_decision_tree::analyze_switch_statement () m_case_bbs.reserve (l); m_case_bbs.quick_push (default_bb); - compute_cases_per_edge (); + int max_c = compute_cases_per_edge (); for (unsigned i = 1; i < l; i++) { @@ -2256,7 +2265,7 @@ switch_decision_tree::analyze_switch_statement () reset_out_edges_aux (m_switch); /* Find bit-test clusters. */ - vec output = bit_test_cluster::find_bit_tests (clusters); + vec output = bit_test_cluster::find_bit_tests (clusters, max_c); /* Find jump table clusters. */ vec output2; diff --git a/gcc/tree-switch-conversion.h b/gcc/tree-switch-conversion.h index 6468995eb316..e6a85fa60258 100644 --- a/gcc/tree-switch-conversion.h +++ b/gcc/tree-switch-conversion.h @@ -399,7 +399,7 @@ public: /* Find bit tests of given CLUSTERS, where all members of the vector are of type simple_cluster. New clusters are returned. */ - static vec find_bit_tests (vec &clusters); + static vec find_bit_tests (vec &clusters, int max_c); /* Return true when RANGE of case values with UNIQ labels can build a bit test. */ @@ -576,8 +576,9 @@ public: bool try_switch_expansion (vec &clusters); /* Compute the number of case labels that correspond to each outgoing edge of switch statement. Record this information in the aux field of the edge. + Returns approx max number of cases per edge. */ - void compute_cases_per_edge (); + int compute_cases_per_edge (); /* Before switch transformation, record all SSA_NAMEs defined in switch BB and used in a label basic block. */ From patchwork Mon Oct 28 20:57:19 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andi Kleen X-Patchwork-Id: 2003507 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@legolas.ozlabs.org Authentication-Results: legolas.ozlabs.org; dkim=pass (2048-bit key; unprotected) header.d=intel.com header.i=@intel.com header.a=rsa-sha256 header.s=Intel header.b=QfkQLNrU; dkim-atps=neutral Authentication-Results: legolas.ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=gcc.gnu.org (client-ip=2620:52:3:1:0:246e:9693:128c; helo=server2.sourceware.org; envelope-from=gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=patchwork.ozlabs.org) Received: from server2.sourceware.org (server2.sourceware.org [IPv6:2620:52:3:1:0:246e:9693:128c]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature ECDSA (secp384r1) server-digest SHA384) (No client certificate requested) by legolas.ozlabs.org (Postfix) with ESMTPS id 4Xcm1n3b4sz1xwF for ; Tue, 29 Oct 2024 07:58:04 +1100 (AEDT) Received: from server2.sourceware.org (localhost [IPv6:::1]) by sourceware.org (Postfix) with ESMTP id 9D222385841D for ; Mon, 28 Oct 2024 20:57:57 +0000 (GMT) X-Original-To: gcc-patches@gcc.gnu.org Delivered-To: gcc-patches@gcc.gnu.org Received: from mgamail.intel.com (mgamail.intel.com [192.198.163.14]) by sourceware.org (Postfix) with ESMTPS id 5F0663858C41; Mon, 28 Oct 2024 20:57:29 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 5F0663858C41 Authentication-Results: sourceware.org; dmarc=none (p=none dis=none) header.from=linux.intel.com Authentication-Results: sourceware.org; spf=none smtp.mailfrom=linux.intel.com ARC-Filter: OpenARC Filter v1.0.0 sourceware.org 5F0663858C41 Authentication-Results: server2.sourceware.org; arc=none smtp.remote-ip=192.198.163.14 ARC-Seal: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1730149052; cv=none; b=vZnwATw/Vi2ptOO0hG1jsx7CRLdgIK6Oi6UvKjDSR+SILeq47GmPidgm95K9ckD/n0pODZswM2MD+oVohV7KkvxXWUd8S3MhojEz8UesOrTQGFpa78hzVkTLz0Orlww6NzaEOw+wJMRRYkINzZvVnArWQpp28wc4dwSs9VYk+Us= ARC-Message-Signature: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1730149052; c=relaxed/simple; bh=mLXNR9NIeyuWZg12TzrjLCJfNYsZKN99QtxWM4b7Ais=; h=DKIM-Signature:From:To:Subject:Date:Message-ID:MIME-Version; b=uxFHH0VCAaIhonJIcMIe71kr3SNhMLfgO++zPz68FBVUd5LWQzeFuWdV6irCaPLe499kWbB9MFUgGu+5i9y/7HWKSzT4Fl3SD/pEnRJ6lKxorAakEqv3YNPMAygBorSFoM864QsW2x7MX051RNd4bIWsdEtHy9gsjtARoTbrZlM= ARC-Authentication-Results: i=1; server2.sourceware.org DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1730149050; x=1761685050; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=mLXNR9NIeyuWZg12TzrjLCJfNYsZKN99QtxWM4b7Ais=; b=QfkQLNrUwWH3dW/nFITjA5aSDCOMSwcYovgONo2Nl6KO7f0PX8JhydBH yOBZAPgXEZAFmqOBAPIeLPjdcYktWdk76fBEMbRhWQvgyIxnBJx/kqquN XgTmUUkdONnHdkXkjj+GNPJ4LEnyaKMKxDOAZT63Z0d129H/zysh74PMv zJsO8rkTl3mN4N/v7y5igF20qWG2GV/+tuqYBaNaWsKCETOElk+/AkV5Z NrLzMbYSQt90zRuHJ6IToOw53B2v7njsZfph5cZNGWlff8hU7iQCPhrtG ibjdiWkaBvnP1rCrm2yahXtvnKLUZ5AcK9Dj9YFVzITALBl0eaTqlMDSq w==; X-CSE-ConnectionGUID: UNsxoTrSSmeEo0D1pRZuSQ== X-CSE-MsgGUID: Q2ZJ7OOoTLK6V93JtdfrXQ== X-IronPort-AV: E=McAfee;i="6700,10204,11239"; a="29975953" X-IronPort-AV: E=Sophos;i="6.11,240,1725346800"; d="scan'208";a="29975953" Received: from orviesa008.jf.intel.com ([10.64.159.148]) by fmvoesa108.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 28 Oct 2024 13:57:25 -0700 X-CSE-ConnectionGUID: m4xsgqo2RxS7wddADlNUKQ== X-CSE-MsgGUID: WDJL2LowSE+AIRp1YS3K1w== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="6.11,240,1725346800"; d="scan'208";a="82555732" Received: from tassilo.jf.intel.com ([10.54.38.190]) by orviesa008-auth.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 28 Oct 2024 13:57:25 -0700 From: Andi Kleen To: gcc-patches@gcc.gnu.org Cc: Andi Kleen Subject: [PATCH v2 3/3] Simplify switch bit test clustering algorithm Date: Mon, 28 Oct 2024 13:57:19 -0700 Message-ID: <20241028205719.685557-3-ak@linux.intel.com> X-Mailer: git-send-email 2.46.2 In-Reply-To: <20241028205719.685557-1-ak@linux.intel.com> References: <20241028205719.685557-1-ak@linux.intel.com> MIME-Version: 1.0 X-Spam-Status: No, score=-11.2 required=5.0 tests=BAYES_00, DKIMWL_WL_HIGH, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_EF, GIT_PATCH_0, SPF_HELO_NONE, SPF_NONE, TXREP autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org X-BeenThere: gcc-patches@gcc.gnu.org X-Mailman-Version: 2.1.30 Precedence: list List-Id: Gcc-patches mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org From: Andi Kleen The current switch bit test clustering enumerates all possible case clusters combinations to find ones that fit the bit test constrains best. This causes performance problems with very large switches. For bit test clustering which happens naturally in word sized chunks I don't think such an expensive algorithm is really needed. This patch implements a simple greedy algorithm that walks the sorted list and examines word sized windows and tries to cluster them. Surprisingly the new algorithm gives consistly better clusters for the examples I tried. For example from the gcc bootstrap: old: 0-15 16-31 96-175 new: 0-31 96-175 I'm not fully sure why that is, probably some bug in the old algorithm? This shows even up in the test suite where if-to-switch-6 now can generate a switch, as well as a case in switch-1.c I don't have a proof that the new algorithm is always as good or better, but so far at least I don't see any counter examples. It also fixes the excessive compile time in PR117091, however this was already fixed by an earlier patch that doesn't run clustering when no targets have multiple values. gcc/ChangeLog: PR middle-end/117091 * tree-switch-conversion.cc (bit_test_cluster::find_bit_tests): Change clustering algorithm to simple greedy. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/if-to-switch-6.c: Allow condition chain. * gcc.dg/tree-ssa/switch-1.c: Allow more bit tests. --- .../gcc.dg/tree-ssa/if-to-switch-6.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/switch-1.c | 2 +- gcc/tree-switch-conversion.cc | 76 ++++++++++--------- 3 files changed, 42 insertions(+), 38 deletions(-) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/if-to-switch-6.c b/gcc/testsuite/gcc.dg/tree-ssa/if-to-switch-6.c index b1640673eae1..657af770e438 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/if-to-switch-6.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/if-to-switch-6.c @@ -39,4 +39,4 @@ int main(int argc, char **argv) return 0; } -/* { dg-final { scan-tree-dump-not "Condition chain" "iftoswitch" } } */ +/* { dg-final { scan-tree-dump "Condition chain" "iftoswitch" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/switch-1.c b/gcc/testsuite/gcc.dg/tree-ssa/switch-1.c index 6f70c9de0c19..f1654aba6d99 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/switch-1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/switch-1.c @@ -107,4 +107,4 @@ int foo5 (int x) } } -/* { dg-final { scan-tree-dump ";; GIMPLE switch case clusters: BT:10-62 600-700 JT:1000-1021 111111" "switchlower1" } } */ +/* { dg-final { scan-tree-dump ";; GIMPLE switch case clusters: BT:10-62 600-700 BT:1000-1021 111111" "switchlower1" } } */ diff --git a/gcc/tree-switch-conversion.cc b/gcc/tree-switch-conversion.cc index 3436c2a8b98c..b7736a9853d9 100644 --- a/gcc/tree-switch-conversion.cc +++ b/gcc/tree-switch-conversion.cc @@ -1782,55 +1782,59 @@ bit_test_cluster::find_bit_tests (vec &clusters, int max_c) return clusters.copy (); unsigned l = clusters.length (); - auto_vec min; - min.reserve (l + 1); + vec output; - min.quick_push (min_cluster_item (0, 0, 0)); + output.create (l); - for (unsigned i = 1; i <= l; i++) + unsigned end; + for (unsigned i = 0; i < l; i += end) { - /* Set minimal # of clusters with i-th item to infinite. */ - min.quick_push (min_cluster_item (INT_MAX, INT_MAX, INT_MAX)); + HOST_WIDE_INT values = 0; + hash_set targets; + cluster *start_cluster = clusters[i]; - for (unsigned j = 0; j < i; j++) + end = 0; + while (i + end < l) { - if (min[j].m_count + 1 < min[i].m_count - && can_be_handled (clusters, j, i - 1)) - min[i] = min_cluster_item (min[j].m_count + 1, j, INT_MAX); + cluster *end_cluster = clusters[i + end]; + + /* Does value range fit into the BITS_PER_WORD window? */ + HOST_WIDE_INT w = cluster::get_range (start_cluster->get_low (), + end_cluster->get_high ()); + if (w == 0 || w > BITS_PER_WORD) + break; + + /* Compute # of values tested for new case. */ + HOST_WIDE_INT r = 1; + if (!end_cluster->is_single_value_p ()) + r = cluster::get_range (end_cluster->get_low (), + end_cluster->get_high ()); + if (r == 0) + break; + + /* Check for max # of targets. */ + if (targets.elements() == m_max_case_bit_tests + && !targets.contains (end_cluster->m_case_bb)) + break; + + targets.add (end_cluster->m_case_bb); + values += r; + end++; } - gcc_checking_assert (min[i].m_count != INT_MAX); - } - - /* No result. */ - if (min[l].m_count == l) - return clusters.copy (); - - vec output; - output.create (4); - - /* Find and build the clusters. */ - for (unsigned end = l;;) - { - int start = min[end].m_start; - - if (is_beneficial (clusters, start, end - 1)) + if (is_beneficial (values, targets.elements ())) { - bool entire = start == 0 && end == clusters.length (); - output.safe_push (new bit_test_cluster (clusters, start, end - 1, - entire)); + output.safe_push (new bit_test_cluster (clusters, i, i + end - 1, + i == 0 && end == l)); } else - for (int i = end - 1; i >= start; i--) + { output.safe_push (clusters[i]); - - end = start; - - if (start <= 0) - break; + /* ??? Might be able to skip more. */ + end = 1; + } } - output.reverse (); return output; }