From patchwork Wed Jul 10 11:22:19 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Prathamesh Kulkarni X-Patchwork-Id: 1958810 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@legolas.ozlabs.org Authentication-Results: legolas.ozlabs.org; dkim=pass (2048-bit key; unprotected) header.d=Nvidia.com header.i=@Nvidia.com header.a=rsa-sha256 header.s=selector2 header.b=I4/WKZAq; dkim-atps=neutral Authentication-Results: legolas.ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=gcc.gnu.org (client-ip=2620:52:3:1:0:246e:9693:128c; helo=server2.sourceware.org; envelope-from=gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=patchwork.ozlabs.org) Received: from server2.sourceware.org (server2.sourceware.org [IPv6:2620:52:3:1:0:246e:9693:128c]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature ECDSA (secp384r1) server-digest SHA384) (No client certificate requested) by legolas.ozlabs.org (Postfix) with ESMTPS id 4WJwSs2s0sz1xqj for ; Wed, 10 Jul 2024 21:22:51 +1000 (AEST) Received: from server2.sourceware.org (localhost [IPv6:::1]) by sourceware.org (Postfix) with ESMTP id D19D2383915A for ; Wed, 10 Jul 2024 11:22:48 +0000 (GMT) X-Original-To: gcc-patches@gcc.gnu.org Delivered-To: gcc-patches@gcc.gnu.org Received: from NAM02-BN1-obe.outbound.protection.outlook.com (mail-bn1nam02on20609.outbound.protection.outlook.com [IPv6:2a01:111:f403:2407::609]) by sourceware.org (Postfix) with ESMTPS id 8A165384DEE3; Wed, 10 Jul 2024 11:22:24 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 8A165384DEE3 Authentication-Results: sourceware.org; dmarc=fail (p=reject dis=none) header.from=nvidia.com Authentication-Results: sourceware.org; spf=fail smtp.mailfrom=nvidia.com ARC-Filter: OpenARC Filter v1.0.0 sourceware.org 8A165384DEE3 Authentication-Results: server2.sourceware.org; arc=pass smtp.remote-ip=2a01:111:f403:2407::609 ARC-Seal: i=2; a=rsa-sha256; d=sourceware.org; s=key; t=1720610546; cv=pass; b=rBaBpW8KIr625Epk7Wpz37G112Cy04BfTax9CpBg5BtVUjFkJfc4OeyXWsPgxyDUaHWRmqBL4fpjC/ICjVgHSH6dGHuRner0Yl1RMF4HP5xpRv7d0GQtijEG4hPTGnP1Cv4Mz2AdJUI6bzvJXgX7X4IpF5wj6n1cnyKvuFh8o3Q= ARC-Message-Signature: i=2; a=rsa-sha256; d=sourceware.org; s=key; t=1720610546; c=relaxed/simple; bh=f9IZBYyJewovxrWxuaugeokBXTqve1pSsSNcjgoONKs=; h=DKIM-Signature:From:To:Subject:Date:Message-ID:MIME-Version; b=CtYAS5gUAyOQu6RzSTFr/bP0PorgtPaKbvam0RK1f5yCfPGzrBjle6Q/JtRRE6SAhpEU1mec7f4XE0CBoVSFcuKlwSRGWSVgi9lcxNRhln9+GOtz89flzkEOrJeAwRJbPMWCk8YFP6ttYoPiwo+meQo0yggAUQPnn5GfdCGbzjk= ARC-Authentication-Results: i=2; server2.sourceware.org ARC-Seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=Pwu3iJROuT99q/jruOKuTlE/jWYnZsJH/4WIlgW7zVSRdGWxqCHFeqzkKAur0U4/Ys1y8O0W2hIHLbL7+0fe0qiO5poCCqwlcypZnlvOYPCzJMw8KTIMjmv7nAn6feMkTghtln9MgWF0DtDGfGGbhqjvn25TxxTlWESRTA+8anMdynz9sOS884YBbeuxvGUpy6+LR+469E70DMmLFWzJ/Y7O004+5G48uKBjEJ89C1nh+ARQeNeYIBTtKR/rmkKvC6poYpZhpf73WUGTir889e8gcz0oYdENbvjw1B84W4Il00jfQ28weFMjqv0uCKEuGYszxcFVTjCVybq1nJWlrg== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=YIxBs4nuRez6orI8XOA+a4qtFc92K+ExWhPdwXVEzxg=; b=HEgt/WSpOflljuCwPEKPMSYbWhFVJ/t8V13pNplGW5AD00j7gkKB3FN7X9jeQod/yo3N4SeQvC6NELorpreZa8TzuJVETX66j0HMHoKPH6W1rQUJIevbQVW0tKNjc4pHgg8JfgQ8w4EWVX9wgt8xbW7X61VQuEbp68ekzPqtartoYUz3csJFkMj4UPddWJWG6si0PDpd3G8e42RZFw239KUHIMSEy4dFXyW+GaCBPOs1c7unBim7qxjyHWUV9LMiyfJB+fZzQQa2n3IZk7haQDau4J6G8yPBEcVdv/eiiQfFeqmFlap8HZOSIGksUvvv0suW3UWJa2lTNcdL3HBPcw== ARC-Authentication-Results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=nvidia.com; dmarc=pass action=none header.from=nvidia.com; dkim=pass header.d=nvidia.com; arc=none DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=Nvidia.com; s=selector2; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck; bh=YIxBs4nuRez6orI8XOA+a4qtFc92K+ExWhPdwXVEzxg=; b=I4/WKZAq+UTqvhxsxCAyudnOV73qFvDd6f45ew4sfrdOoODXcSVzjHlLbYQH+q1H3Z462F3jNS/+f45JrG6mUsRjz8w2oeryHLo9XdCVo5/uIDUxEys7qPrfqbdxaIxVFDbEnxRvbu0UrM88h2xEpalKmqKn5QM7xCmLW9jhrt9l1A0JiA2FnP3DWTUvNVB024tSAGrJdPV18rUxLxdSZC89iXzi/+BKwmvCf66UV3XgMjZ1YTxJvkmhxyP2HVg7x/zN80idB2FS/4+DsDuLQsbHWGCYumoxyAI4tJFEiaJWYdAelOFLN3dypjIhKxovnNfhMW1fscgkkiCa9IPhRQ== Received: from DM5PR12MB2423.namprd12.prod.outlook.com (2603:10b6:4:b3::30) by IA0PR12MB8254.namprd12.prod.outlook.com (2603:10b6:208:408::8) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.20.7741.36; Wed, 10 Jul 2024 11:22:20 +0000 Received: from DM5PR12MB2423.namprd12.prod.outlook.com ([fe80::3169:f56e:5635:b4fe]) by DM5PR12MB2423.namprd12.prod.outlook.com ([fe80::3169:f56e:5635:b4fe%7]) with mapi id 15.20.7741.033; Wed, 10 Jul 2024 11:22:19 +0000 From: Prathamesh Kulkarni To: "gcc-patches@gcc.gnu.org" , "fortran@gcc.gnu.org" Subject: Lower zeroing array assignment to memset for allocatable arrays Thread-Topic: Lower zeroing array assignment to memset for allocatable arrays Thread-Index: AdrROvB1I+JIVr2USiObaV6rWCIY9g== Date: Wed, 10 Jul 2024 11:22:19 +0000 Message-ID: Accept-Language: en-US Content-Language: en-US X-MS-Has-Attach: yes X-MS-TNEF-Correlator: authentication-results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=nvidia.com; x-ms-publictraffictype: Email x-ms-traffictypediagnostic: DM5PR12MB2423:EE_|IA0PR12MB8254:EE_ x-ms-office365-filtering-correlation-id: 181fecf1-6b2f-4afd-c30a-08dca0d28fcc x-ms-exchange-senderadcheck: 1 x-ms-exchange-antispam-relay: 0 x-microsoft-antispam: BCL:0; ARA:13230040|366016|1800799024|376014|38070700018; x-microsoft-antispam-message-info: KZo204oCj+19deddodH0gjfxcyRdvwIiKc7p5zV10BEhbD6RlMuF00oXlPvMFRu+KQR1xeIU7S31L9oT86y3BcGXpc0MyBed9002zxkdiJqvB7xt3HFHFQGvO530evNDTQOUdJp4OEPypReco8A0Sh83ukEtkTKFIqQHk7QZqgd+L0OE+yibpQ4L2/NNPmkZUBWgE3jV0nb0N6c2/AwKBI/+QXbOEcYd1CRW78lTydiSlKsOFqQPeTb0iwsNf6qWqrPmcdzDK7DTYU7lTC/sY3QCj0VdK6kMHs6dIYw3Zrl9m20qTQniWuYYUergUP2dCqfU6TuYQ0TeXrzpFyPgPqmW61CGJL7X944BhAEoMhxvXYZtUaY4aSST0OCneRIw3McAyaUNT16VEzhAkLc9eVsiyT74Ocl/e0E3LiW/l2yscS2tEYJVDNcplA2z0udVIShio3/FPB+70paMZ6VLAFOJ/M9MWUa56ZK2QM0BewkHSkW0et4fW6Mgxme6+SkhwMbvBT0C5D5iZEImd5MUxROdzq97xjmZVpZsrntQykWIEX1DhJIrQmI+0drWgy+p1q2ybzOgp0pY2Vcp0YJ3etT/GuwGAHYGWbB2tIxcFzkETNT2ge0ICdQi72hIPplqaAYu0Z4lpijIhzIQsMC3B6f56ySqlFwcqAz5udvBSjOdN6BeC7dSsk60vu1SWBH9vPwiboKrT2waW6HieEFgGPgOk+l060ikoZrN04FwGi5gMH73ilXHP8+pOfDpFVP6EDrFrmFxjbVC9V3oZGV4y2JBTNvTOAkdtl9KPKwFuDPNtCYsYUCYNJ3oxoVEKD8OV1+Pm42yz4WXzYoYNAJtVsBngYqLwczavTkR1qEM3hRMzSFbJeiEYvWPgnBSmttuO32tpst+UR+rYCss4rw6fUdhBwdK7OtqDPLFoCUE9PPRI4zpKj7wyhrPwe4m2q/jjokVqFBlG0JQktVLB8vqjHTQlY55/ArS9+MlVpC4AHnJD+Wgdhk5Ea7rOt1Csh9hB7u2ZfYtZ2Kav5aHUxOeXdbnd/CfSRCNp7gyf18yBBQQqFMGID0J5y2rRCEMXzl7YjdBe9UVVWIKdbOjdvxc4OIN1NZH0PQprc7tO+umhY5Y3ArB5nLs1gsAFB33hMdqxrgPRBRJQLHkurFsBOiEb7UdkYibuU1f0j1tx8Z4qf1h365j8ohPCa4i7Cq17mT60ng7WWoVnW2Dn0qq1N24fo4DesBPLqb06+ObnqbT4ojI8jRByVNyS6KpTlad8yNIA50un0TjHd8JuJ+zxRuyJg4dJ+PRsZ5iDvh+y3gSVSwqYTDIG9boEmycnU82HEXFa/WlJ0Wzmfu3uCVRQ9fqJN1VXlrfAgFbiDmE6QV4JWxOwWzhOYF9v1nWD8ZHY5i5LXw+1qcqhBWC8xhcLUhl9Q== x-forefront-antispam-report: CIP:255.255.255.255; CTRY:; LANG:en; SCL:1; SRV:; IPV:NLI; SFV:NSPM; H:DM5PR12MB2423.namprd12.prod.outlook.com; PTR:; CAT:NONE; SFS:(13230040)(366016)(1800799024)(376014)(38070700018); DIR:OUT; SFP:1101; x-ms-exchange-antispam-messagedata-chunkcount: 1 x-ms-exchange-antispam-messagedata-0: kpaDD3dqB4SezZHg8qi60DDvK65Tld7+UNI5oyQ5zCFaW6eXU4+GU2yPesus0xZwPbh+ceyqJ4VUr95kPVAw9VOWwK5vELNa/HcravLXG9GA3mxTDwEeNZjz4j24EtpKgDzLip1TEenhmriiCSZE/2uW6xOTpYgQKq3AF54gJeJYPh5ywylhwauUZnYdTwhwd8eoDK0fUsJEI8IpzQIKpLys5X0jPKUNx86w90CurQqEvNYcCOlV1rhE3kDLN6+ONnDtPftSVEojtP7Cp2sTDRaGv1ZwrolR0Uc5ekThyQCkI4I5zx7hnWTuayUoNRn7KKAneJfwTtZwokt/tmJkzQoeiUoVEGY02FCLKxPzbDpy2frd3ZROhtX5UnCA73h4qSSX8OLgc8zOBWitBA+F/ajJwyn3p/BZdwhOlSJgMrAPdRsk0Z02LQgdNnq9u2px19wMFEpvJ11aNfyXmRktluRwnkyfPqELDTOglSI55CZh4ygglpgig5CScY8e5PagdvoLxn3ysjsg6iLN08tGR90wCfBM9bj+9wQvIbmsqtjbr5J0IJaI+l3yAmu0vMiHjbENXpGW0KPwY6htVVS9EnHDarM9sNxGWeEElR2Stp/KiGpOdUIbjWtYSTAQjLQ6VuJQVc1eELc6EatMQdkxBWE4rZ0Pz85wUARGQ1Jy4PXE/b4hE5nILQI+9D63d2JpLYyivFlQkHgw8CI7I1LRM071hKLDQrV7rV6C4rx1K/sReEiIFKx5hfyL7/kw9YxqL276aMvSeWv853Hm+roehOV55ydPbCqDAdU5zTT0Au8O6Vci3d8KEV8qRt4wmWHk5ETWS4RwP6uQIBso7CjtTZ36+yUIv58Fcob+5MIB+S1Y6Q2uOaAxY1e+RgJ2XKsCCOOiX0Reem61hIXncR13U4jbgV/xUkQhVZ9Xt6t/0lQZlkGsN/TwurXT4xyrrSYzk8ZxlTcggq0f52Eib8KP82hXaE4uZF1x3t9tJngxKuJpOTBZI3D5QGbR7cST0pakG6FreSDk6fdwNnRKt51j+lwF8ux7EtA7D5lsfEuSDXj6WyPNmrGzIY2rqIMxVCJzVZvLUk/ilaLy/NoNgue0F07LQSODX8B/T0Dkr4fOmBPcmqe4KWwntP0dm3Mo7WONQenz6rx1OUb75SCpPMN7mKkn9xYh/GnXsJqbpfK96xicJCnqo3VjGBEoPuH2qLkf7QLRIG1Nj4dR/KhifYmNTVLsdP7HubyLl2HOGd9klXciVVrVs5LLSp//YifOlU3aCUTi7s3QXc+earLiXWEDfFSYODjYd+sKhecR58VFsK7CikIMB8twQGtJbRB7WebqxECBOAkT4FSg87RfhaYWlZNNoPJuLXTtLt3d2iJkBS4wBD4yEMqVJyspKxAxiA+9AyONnAxEXIuiJa9eiykYle+4A2Pm1kA36s9QxMJrMfSftRJRjTkjQgPJYOBBfWQ/GgrB7ZkkP0hgg4vcuNXaQ7zRN3fhCQR6DzfsQiHVEmZvu9lpREywQzyR0TNnNRA20qoiidBPThvGCSD2TwpQ7fWmjvtsqNNrQqFcUbfGQMHXcSccxPJv75xzJkKkgaKc MIME-Version: 1.0 X-OriginatorOrg: Nvidia.com X-MS-Exchange-CrossTenant-AuthAs: Internal X-MS-Exchange-CrossTenant-AuthSource: DM5PR12MB2423.namprd12.prod.outlook.com X-MS-Exchange-CrossTenant-Network-Message-Id: 181fecf1-6b2f-4afd-c30a-08dca0d28fcc X-MS-Exchange-CrossTenant-originalarrivaltime: 10 Jul 2024 11:22:19.5806 (UTC) X-MS-Exchange-CrossTenant-fromentityheader: Hosted X-MS-Exchange-CrossTenant-id: 43083d15-7273-40c1-b7db-39efd9ccc17a X-MS-Exchange-CrossTenant-mailboxtype: HOSTED X-MS-Exchange-CrossTenant-userprincipalname: uoGy/MpuszCVihD7USPIP38xtSJj4ekjnYl/+/Q4wKEngXaN1P+JdDq67JrBiMPg+xQnchhPfMxA/FkQh2DXnQ== X-MS-Exchange-Transport-CrossTenantHeadersStamped: IA0PR12MB8254 X-Spam-Status: No, score=-11.5 required=5.0 tests=BAYES_00, DKIMWL_WL_HIGH, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, FORGED_SPF_HELO, GIT_PATCH_0, SPF_HELO_PASS, SPF_NONE, TXREP autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org X-BeenThere: gcc-patches@gcc.gnu.org X-Mailman-Version: 2.1.30 Precedence: list List-Id: Gcc-patches mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org Hi, The attached patch lowers zeroing array assignment to memset for allocatable arrays. For example: subroutine test(z, n) implicit none integer :: n real(4), allocatable :: z(:,:,:) allocate(z(n, 8192, 2048)) z = 0 end subroutine results in following call to memset instead of 3 nested loops for z = 0: (void) __builtin_memset ((void *) z->data, 0, (unsigned long) ((((MAX_EXPR dim[0].ubound - z->dim[0].lbound, -1> + 1) * (MAX_EXPR dim[1].ubound - z->dim[1].lbound, -1> + 1)) * (MAX_EXPR dim[2].ubound - z->dim[2].lbound, -1> + 1)) * 4)); The patch significantly improves speedup for an internal Fortran application on AArch64 -mcpu=grace (and potentially on other AArch64 cores too). Bootstrapped+tested on aarch64-linux-gnu. Does the patch look OK to commit ? Signed-off-by: Prathamesh Kulkarni Thanks, Prathamesh Lower zeroing array assignment to memset for allocatable arrays. gcc/fortran/ChangeLog: * trans-expr.cc (gfc_trans_zero_assign): Handle allocatable arrays. gcc/testsuite/ChangeLog: * gfortran.dg/array_memset_3.f90: New test. Signed-off-by: Prathamesh Kulkarni Signed-off-by: Prathamesh Kulkarni Signed-off-by: Prathamesh Kulkarni Signed-off-by: Prathamesh Kulkarni diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc index 605434f4ddb..7773a24f9d4 100644 --- a/gcc/fortran/trans-expr.cc +++ b/gcc/fortran/trans-expr.cc @@ -11421,18 +11421,23 @@ gfc_trans_zero_assign (gfc_expr * expr) type = TREE_TYPE (dest); if (POINTER_TYPE_P (type)) type = TREE_TYPE (type); - if (!GFC_ARRAY_TYPE_P (type)) - return NULL_TREE; - - /* Determine the length of the array. */ - len = GFC_TYPE_ARRAY_SIZE (type); - if (!len || TREE_CODE (len) != INTEGER_CST) + if (GFC_ARRAY_TYPE_P (type)) + { + /* Determine the length of the array. */ + len = GFC_TYPE_ARRAY_SIZE (type); + if (!len || TREE_CODE (len) != INTEGER_CST) + return NULL_TREE; + } + else if (GFC_DESCRIPTOR_TYPE_P (type)) + { + if (POINTER_TYPE_P (TREE_TYPE (dest))) + dest = build_fold_indirect_ref_loc (input_location, dest); + len = gfc_conv_descriptor_size (dest, GFC_TYPE_ARRAY_RANK (type)); + dest = gfc_conv_descriptor_data_get (dest); + } + else return NULL_TREE; - tmp = TYPE_SIZE_UNIT (gfc_get_element_type (type)); - len = fold_build2_loc (input_location, MULT_EXPR, gfc_array_index_type, len, - fold_convert (gfc_array_index_type, tmp)); - /* If we are zeroing a local array avoid taking its address by emitting a = {} instead. */ if (!POINTER_TYPE_P (TREE_TYPE (dest))) @@ -11440,6 +11445,11 @@ gfc_trans_zero_assign (gfc_expr * expr) dest, build_constructor (TREE_TYPE (dest), NULL)); + /* Multiply len by element size. */ + tmp = TYPE_SIZE_UNIT (gfc_get_element_type (type)); + len = fold_build2_loc (input_location, MULT_EXPR, gfc_array_index_type, + len, fold_convert (gfc_array_index_type, tmp)); + /* Convert arguments to the correct types. */ dest = fold_convert (pvoid_type_node, dest); len = fold_convert (size_type_node, len); diff --git a/gcc/testsuite/gfortran.dg/array_memset_3.f90 b/gcc/testsuite/gfortran.dg/array_memset_3.f90 new file mode 100644 index 00000000000..b750c8de67d --- /dev/null +++ b/gcc/testsuite/gfortran.dg/array_memset_3.f90 @@ -0,0 +1,31 @@ +! { dg-do compile } +! { dg-options "-O2 -fdump-tree-original" } + +subroutine test1(n) + implicit none + integer(8) :: n + real(4), allocatable :: z(:,:,:) + + allocate(z(n, 100, 200)) + z = 0 +end subroutine + +subroutine test2(n) + implicit none + integer(8) :: n + integer, allocatable :: z(:,:,:) + + allocate(z(n, 100, 200)) + z = 0 +end subroutine + +subroutine test3(n) + implicit none + integer(8) :: n + logical, allocatable :: z(:,:,:) + + allocate(z(n, 100, 200)) + z = .false. +end subroutine + +! { dg-final { scan-tree-dump-times "__builtin_memset" 3 "original" } }