From patchwork Wed Jun 29 23:07:15 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Noah Goldstein X-Patchwork-Id: 1650415 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: bilbo.ozlabs.org; dkim=pass (1024-bit key; secure) header.d=sourceware.org header.i=@sourceware.org header.a=rsa-sha256 header.s=default header.b=C6SQxooG; dkim-atps=neutral Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=sourceware.org (client-ip=2620:52:3:1:0:246e:9693:128c; helo=sourceware.org; envelope-from=libc-alpha-bounces+incoming=patchwork.ozlabs.org@sourceware.org; receiver=) Received: from sourceware.org (server2.sourceware.org [IPv6:2620:52:3:1:0:246e:9693:128c]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by bilbo.ozlabs.org (Postfix) with ESMTPS id 4LYHF94bX9z9s09 for ; Thu, 30 Jun 2022 09:08:13 +1000 (AEST) Received: from server2.sourceware.org (localhost [IPv6:::1]) by sourceware.org (Postfix) with ESMTP id 75A6A386DC7C for ; Wed, 29 Jun 2022 23:08:11 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 75A6A386DC7C DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1656544091; bh=Xm48kxjXB6rGoJuPaok07/eUF43GpzUlIU08q4Fs8jM=; h=To:Subject:Date:In-Reply-To:References:List-Id:List-Unsubscribe: List-Archive:List-Post:List-Help:List-Subscribe:From:Reply-To: From; b=C6SQxooGl/YW5noX+E8A5r3mwAtvJhRF3/+SDPSvv+ElfmtjBsBtrPFi+5gyFv4w0 sUnKNexP3R+Ha8abaDyo5V6nAeUt8YjXv7V3xD6ePlT+TAeyNR4Tyq4Fa+wCJR7pm4 6VYpPsCSxtHYYc19JiVSY74lf5pepu7nhpxl1P58= X-Original-To: libc-alpha@sourceware.org Delivered-To: libc-alpha@sourceware.org Received: from mail-pf1-x435.google.com (mail-pf1-x435.google.com [IPv6:2607:f8b0:4864:20::435]) by sourceware.org (Postfix) with ESMTPS id 3515038485A1 for ; Wed, 29 Jun 2022 23:07:19 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 3515038485A1 Received: by mail-pf1-x435.google.com with SMTP id p14so16456418pfh.6 for ; Wed, 29 Jun 2022 16:07:19 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=Xm48kxjXB6rGoJuPaok07/eUF43GpzUlIU08q4Fs8jM=; b=4YgjncJXLCc1J16GcaZNU7mUarNdxjmPuc8kgHfrUD5qIqpEt2nokiOTvc7dTnrhVa NU2huhSOg5h8igyToYtYz70G1o/7s4bgjBX17cg2yoviJWzHq/O/De8gRNDRQ5lTpfVq 1IjroHnjJ0nGqvbumzsmUccF84tXHcQGZn/Uxp4NTcD9CphAMg44dUqa382EMmtkC3Kb Dw+04YlgGdbF50PNHjolX2mY3bqHNeecY98pYCd15GdD5EMO3TgPfghpbWtp2k6d2zgJ qqOCx01P4sLucone7sl8PaRJ9ZXzhWEICUuI8/XQzZqOr+naTAAJ7u4G5lkGzt5eO2xB CfLQ== X-Gm-Message-State: AJIora+5te4vji2H8+HT5d6GojFNoU9+OMiAg8Ep6jDBz/bCeudoDYWo hMReAz9DnGASgeL/jKPTYc6BSOQZSaE= X-Google-Smtp-Source: AGRyM1vtQ+jUvKW3eK2tUfB8igUpZKhneaOiWNZSH/8GpQAEFHN8BMNApCNKddo+LMxjXdX7GU+oaA== X-Received: by 2002:aa7:900a:0:b0:525:1e54:e64e with SMTP id m10-20020aa7900a000000b005251e54e64emr12304256pfo.35.1656544037979; Wed, 29 Jun 2022 16:07:17 -0700 (PDT) Received: from noah-tgl.. ([192.55.60.43]) by smtp.gmail.com with ESMTPSA id x124-20020a626382000000b00525231e15ccsm12071537pfb.113.2022.06.29.16.07.17 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Wed, 29 Jun 2022 16:07:17 -0700 (PDT) To: libc-alpha@sourceware.org Subject: [PATCH v3 1/2] x86: Move mem{p}{mov|cpy}_{chk_}erms to its own file Date: Wed, 29 Jun 2022 16:07:15 -0700 Message-Id: <20220629230716.1264249-1-goldstein.w.n@gmail.com> X-Mailer: git-send-email 2.34.1 In-Reply-To: <20220628152757.17922-1-goldstein.w.n@gmail.com> References: <20220628152757.17922-1-goldstein.w.n@gmail.com> MIME-Version: 1.0 X-Spam-Status: No, score=-12.1 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, FREEMAIL_FROM, GIT_PATCH_0, KAM_SHORT, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP, T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org X-BeenThere: libc-alpha@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libc-alpha mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-Patchwork-Original-From: Noah Goldstein via Libc-alpha From: Noah Goldstein Reply-To: Noah Goldstein Errors-To: libc-alpha-bounces+incoming=patchwork.ozlabs.org@sourceware.org Sender: "Libc-alpha" The primary memmove_{impl}_unaligned_erms implementations don't interact with this function. Putting them in same file both wastes space and unnecessarily bloats a hot code section. --- sysdeps/x86_64/multiarch/Makefile | 1 + sysdeps/x86_64/multiarch/memmove-erms.S | 72 +++++++++++++++++++ .../multiarch/memmove-vec-unaligned-erms.S | 50 ------------- 3 files changed, 73 insertions(+), 50 deletions(-) create mode 100644 sysdeps/x86_64/multiarch/memmove-erms.S diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index 666ee4d5d6..62a4d96fb8 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -18,6 +18,7 @@ sysdep_routines += \ memmove-avx-unaligned-erms-rtm \ memmove-avx512-no-vzeroupper \ memmove-avx512-unaligned-erms \ + memmove-erms \ memmove-evex-unaligned-erms \ memmove-sse2-unaligned-erms \ memmove-ssse3 \ diff --git a/sysdeps/x86_64/multiarch/memmove-erms.S b/sysdeps/x86_64/multiarch/memmove-erms.S new file mode 100644 index 0000000000..2d3a6ccb76 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memmove-erms.S @@ -0,0 +1,72 @@ +/* memcpy/mempcpy/memmove implement with rep movsb + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + + +#include + +#if defined USE_MULTIARCH && IS_IN (libc) + .text +ENTRY (__mempcpy_chk_erms) + cmp %RDX_LP, %RCX_LP + jb HIDDEN_JUMPTARGET (__chk_fail) +END (__mempcpy_chk_erms) + +/* Only used to measure performance of REP MOVSB. */ +ENTRY (__mempcpy_erms) + mov %RDI_LP, %RAX_LP + /* Skip zero length. */ + test %RDX_LP, %RDX_LP + jz 2f + add %RDX_LP, %RAX_LP + jmp L(start_movsb) +END (__mempcpy_erms) + +ENTRY (__memmove_chk_erms) + cmp %RDX_LP, %RCX_LP + jb HIDDEN_JUMPTARGET (__chk_fail) +END (__memmove_chk_erms) + +ENTRY (__memmove_erms) + movq %rdi, %rax + /* Skip zero length. */ + test %RDX_LP, %RDX_LP + jz 2f +L(start_movsb): + mov %RDX_LP, %RCX_LP + cmp %RSI_LP, %RDI_LP + jb 1f + /* Source == destination is less common. */ + je 2f + lea (%rsi,%rcx), %RDX_LP + cmp %RDX_LP, %RDI_LP + jb L(movsb_backward) +1: + rep movsb +2: + ret +L(movsb_backward): + leaq -1(%rdi,%rcx), %rdi + leaq -1(%rsi,%rcx), %rsi + std + rep movsb + cld + ret +END (__memmove_erms) +strong_alias (__memmove_erms, __memcpy_erms) +strong_alias (__memmove_chk_erms, __memcpy_chk_erms) +#endif diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S index d1518b8bab..04747133b7 100644 --- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S +++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S @@ -239,56 +239,6 @@ L(start): #endif #if defined USE_MULTIARCH && IS_IN (libc) END (MEMMOVE_SYMBOL (__memmove, unaligned)) -# if VEC_SIZE == 16 -ENTRY (__mempcpy_chk_erms) - cmp %RDX_LP, %RCX_LP - jb HIDDEN_JUMPTARGET (__chk_fail) -END (__mempcpy_chk_erms) - -/* Only used to measure performance of REP MOVSB. */ -ENTRY (__mempcpy_erms) - mov %RDI_LP, %RAX_LP - /* Skip zero length. */ - test %RDX_LP, %RDX_LP - jz 2f - add %RDX_LP, %RAX_LP - jmp L(start_movsb) -END (__mempcpy_erms) - -ENTRY (__memmove_chk_erms) - cmp %RDX_LP, %RCX_LP - jb HIDDEN_JUMPTARGET (__chk_fail) -END (__memmove_chk_erms) - -ENTRY (__memmove_erms) - movq %rdi, %rax - /* Skip zero length. */ - test %RDX_LP, %RDX_LP - jz 2f -L(start_movsb): - mov %RDX_LP, %RCX_LP - cmp %RSI_LP, %RDI_LP - jb 1f - /* Source == destination is less common. */ - je 2f - lea (%rsi,%rcx), %RDX_LP - cmp %RDX_LP, %RDI_LP - jb L(movsb_backward) -1: - rep movsb -2: - ret -L(movsb_backward): - leaq -1(%rdi,%rcx), %rdi - leaq -1(%rsi,%rcx), %rsi - std - rep movsb - cld - ret -END (__memmove_erms) -strong_alias (__memmove_erms, __memcpy_erms) -strong_alias (__memmove_chk_erms, __memcpy_chk_erms) -# endif # ifdef SHARED ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms))