Message ID | 20221015000615.126774-1-goldstein.w.n@gmail.com |
---|---|
State | New |
Headers | show |
Series | [v8,1/6] x86: Update VEC macros to complete API for evex/evex512 impls | expand |
On Fri, Oct 14, 2022 at 5:06 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > 1) Copy so that backport will be easier. > 2) Make section only define if there is not a previous definition > 3) Add `VEC_lo` definition for proper reg-width but in the > ymm/zmm0-15 range. > 4) Add macros for accessing GPRs based on VEC_SIZE > This is to make it easier to do think like: > ``` > vpcmpb %VEC(0), %VEC(1), %k0 > kmov{d|q} %k0, %{eax|rax} > test %{eax|rax} > ``` > It adds macro s.t any GPR can get the proper width with: > `V{upper_case_GPR_name}` > > and any mask insn can get the proper width with: > `{mask_insn_without_postfix}V` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ This comment is incorrect. > > This commit does not change libc.so > > Tested build on x86-64 > --- > sysdeps/x86_64/multiarch/reg-macros.h | 168 ++++++++++++++++++ > .../multiarch/scripts/gen-reg-macros.py | 125 +++++++++++++ > sysdeps/x86_64/multiarch/x86-avx-rtm-vecs.h | 35 ++++ > sysdeps/x86_64/multiarch/x86-avx-vecs.h | 47 +++++ > .../x86_64/multiarch/x86-evex-vecs-common.h | 39 ++++ > sysdeps/x86_64/multiarch/x86-evex256-vecs.h | 38 ++++ > sysdeps/x86_64/multiarch/x86-evex512-vecs.h | 38 ++++ > sysdeps/x86_64/multiarch/x86-sse2-vecs.h | 47 +++++ > sysdeps/x86_64/multiarch/x86-vec-macros.h | 90 ++++++++++ > 9 files changed, 627 insertions(+) > create mode 100644 sysdeps/x86_64/multiarch/reg-macros.h > create mode 100644 sysdeps/x86_64/multiarch/scripts/gen-reg-macros.py > create mode 100644 sysdeps/x86_64/multiarch/x86-avx-rtm-vecs.h > create mode 100644 sysdeps/x86_64/multiarch/x86-avx-vecs.h > create mode 100644 sysdeps/x86_64/multiarch/x86-evex-vecs-common.h > create mode 100644 sysdeps/x86_64/multiarch/x86-evex256-vecs.h > create mode 100644 sysdeps/x86_64/multiarch/x86-evex512-vecs.h > create mode 100644 sysdeps/x86_64/multiarch/x86-sse2-vecs.h > create mode 100644 sysdeps/x86_64/multiarch/x86-vec-macros.h > > diff --git a/sysdeps/x86_64/multiarch/reg-macros.h b/sysdeps/x86_64/multiarch/reg-macros.h > new file mode 100644 > index 0000000000..c8ea330256 > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/reg-macros.h > @@ -0,0 +1,168 @@ > +/* This file was generated by: gen-reg-macros.py. > + > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#ifndef _REG_MACROS_H > +#define _REG_MACROS_H 1 > + > +#define rax_8 al > +#define rax_16 ax > +#define rax_32 eax > +#define rax_64 rax > +#define rbx_8 bl > +#define rbx_16 bx > +#define rbx_32 ebx > +#define rbx_64 rbx > +#define rcx_8 cl > +#define rcx_16 cx > +#define rcx_32 ecx > +#define rcx_64 rcx > +#define rdx_8 dl > +#define rdx_16 dx > +#define rdx_32 edx > +#define rdx_64 rdx > +#define rbp_8 bpl > +#define rbp_16 bp > +#define rbp_32 ebp > +#define rbp_64 rbp > +#define rsp_8 spl > +#define rsp_16 sp > +#define rsp_32 esp > +#define rsp_64 rsp > +#define rsi_8 sil > +#define rsi_16 si > +#define rsi_32 esi > +#define rsi_64 rsi > +#define rdi_8 dil > +#define rdi_16 di > +#define rdi_32 edi > +#define rdi_64 rdi > +#define r8_8 r8b > +#define r8_16 r8w > +#define r8_32 r8d > +#define r8_64 r8 > +#define r9_8 r9b > +#define r9_16 r9w > +#define r9_32 r9d > +#define r9_64 r9 > +#define r10_8 r10b > +#define r10_16 r10w > +#define r10_32 r10d > +#define r10_64 r10 > +#define r11_8 r11b > +#define r11_16 r11w > +#define r11_32 r11d > +#define r11_64 r11 > +#define r12_8 r12b > +#define r12_16 r12w > +#define r12_32 r12d > +#define r12_64 r12 > +#define r13_8 r13b > +#define r13_16 r13w > +#define r13_32 r13d > +#define r13_64 r13 > +#define r14_8 r14b > +#define r14_16 r14w > +#define r14_32 r14d > +#define r14_64 r14 > +#define r15_8 r15b > +#define r15_16 r15w > +#define r15_32 r15d > +#define r15_64 r15 > + > +#define kmov_8 kmovb > +#define kmov_16 kmovw > +#define kmov_32 kmovd > +#define kmov_64 kmovq > +#define kortest_8 kortestb > +#define kortest_16 kortestw > +#define kortest_32 kortestd > +#define kortest_64 kortestq > +#define kor_8 korb > +#define kor_16 korw > +#define kor_32 kord > +#define kor_64 korq > +#define ktest_8 ktestb > +#define ktest_16 ktestw > +#define ktest_32 ktestd > +#define ktest_64 ktestq > +#define kand_8 kandb > +#define kand_16 kandw > +#define kand_32 kandd > +#define kand_64 kandq > +#define kxor_8 kxorb > +#define kxor_16 kxorw > +#define kxor_32 kxord > +#define kxor_64 kxorq > +#define knot_8 knotb > +#define knot_16 knotw > +#define knot_32 knotd > +#define knot_64 knotq > +#define kxnor_8 kxnorb > +#define kxnor_16 kxnorw > +#define kxnor_32 kxnord > +#define kxnor_64 kxnorq > +#define kunpack_8 kunpackbw > +#define kunpack_16 kunpackwd > +#define kunpack_32 kunpackdq > + > +/* Common API for accessing proper width GPR is V{upcase_GPR_name}. */ > +#define VRAX VGPR(rax) > +#define VRBX VGPR(rbx) > +#define VRCX VGPR(rcx) > +#define VRDX VGPR(rdx) > +#define VRBP VGPR(rbp) > +#define VRSP VGPR(rsp) > +#define VRSI VGPR(rsi) > +#define VRDI VGPR(rdi) > +#define VR8 VGPR(r8) > +#define VR9 VGPR(r9) > +#define VR10 VGPR(r10) > +#define VR11 VGPR(r11) > +#define VR12 VGPR(r12) > +#define VR13 VGPR(r13) > +#define VR14 VGPR(r14) > +#define VR15 VGPR(r15) > + > +/* Common API for accessing proper width mask insn is {upcase_mask_insn}. */ > +#define KMOV VKINSN(kmov) > +#define KORTEST VKINSN(kortest) > +#define KOR VKINSN(kor) > +#define KTEST VKINSN(ktest) > +#define KAND VKINSN(kand) > +#define KXOR VKINSN(kxor) > +#define KNOT VKINSN(knot) > +#define KXNOR VKINSN(kxnor) > +#define KUNPACK VKINSN(kunpack) These aren't register macros. Should reg-macros.h be renamed, like vec-macros.h? > + > +#ifdef USE_WIDE_CHAR > +# define REG_WIDTH 32 > +#else > +# define REG_WIDTH VEC_SIZE > +#endif > + > +#define VPASTER(x, y) x##_##y > +#define VEVALUATOR(x, y) VPASTER(x, y) > + > +#define VGPR_SZ(reg_name, reg_size) VEVALUATOR(reg_name, reg_size) > +#define VKINSN_SZ(insn, reg_size) VEVALUATOR(insn, reg_size) > + > +#define VGPR(reg_name) VGPR_SZ(reg_name, REG_WIDTH) > +#define VKINSN(mask_insn) VKINSN_SZ(mask_insn, REG_WIDTH) > + > +#endif > diff --git a/sysdeps/x86_64/multiarch/scripts/gen-reg-macros.py b/sysdeps/x86_64/multiarch/scripts/gen-reg-macros.py > new file mode 100644 > index 0000000000..6a05f27ff4 > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/scripts/gen-reg-macros.py > @@ -0,0 +1,125 @@ > +#!/usr/bin/python3 > +# Copyright (C) 2022 Free Software Foundation, Inc. > +# This file is part of the GNU C Library. > +# > +# The GNU C Library is free software; you can redistribute it and/or > +# modify it under the terms of the GNU Lesser General Public > +# License as published by the Free Software Foundation; either > +# version 2.1 of the License, or (at your option) any later version. > +# > +# The GNU C Library is distributed in the hope that it will be useful, > +# but WITHOUT ANY WARRANTY; without even the implied warranty of > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +# Lesser General Public License for more details. > +# > +# You should have received a copy of the GNU Lesser General Public > +# License along with the GNU C Library; if not, see > +# <https://www.gnu.org/licenses/>. > +"""Generate macros for getting GPR name of a certain size > + > +Inputs: None > +Output: Prints header fill to stdout > + > +API: > + VGPR(reg_name) > + - Get register name VEC_SIZE component of `reg_name` > + VGPR_SZ(reg_name, reg_size) > + - Get register name `reg_size` component of `reg_name` > +""" > + > +import sys > +import os > +from datetime import datetime > + > +registers = [["rax", "eax", "ax", "al"], ["rbx", "ebx", "bx", "bl"], > + ["rcx", "ecx", "cx", "cl"], ["rdx", "edx", "dx", "dl"], > + ["rbp", "ebp", "bp", "bpl"], ["rsp", "esp", "sp", "spl"], > + ["rsi", "esi", "si", "sil"], ["rdi", "edi", "di", "dil"], > + ["r8", "r8d", "r8w", "r8b"], ["r9", "r9d", "r9w", "r9b"], > + ["r10", "r10d", "r10w", "r10b"], ["r11", "r11d", "r11w", "r11b"], > + ["r12", "r12d", "r12w", "r12b"], ["r13", "r13d", "r13w", "r13b"], > + ["r14", "r14d", "r14w", "r14b"], ["r15", "r15d", "r15w", "r15b"]] > + > +mask_insns = [ > + "kmov", > + "kortest", > + "kor", > + "ktest", > + "kand", > + "kxor", > + "knot", > + "kxnor", > +] > +mask_insns_ext = ["b", "w", "d", "q"] > + > +cr = """ > + Copyright (C) {} Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > +""" > + > +print("/* This file was generated by: {}.".format(os.path.basename( > + sys.argv[0]))) > +print(cr.format(datetime.today().year)) > + > +print("#ifndef _REG_MACROS_H") > +print("#define _REG_MACROS_H\t1") > +print("") > +for reg in registers: > + for i in range(0, 4): > + print("#define {}_{}\t{}".format(reg[0], 8 << i, reg[3 - i])) > + > +print("") > +for mask_insn in mask_insns: > + for i in range(0, 4): > + print("#define {}_{}\t{}{}".format(mask_insn, 8 << i, mask_insn, > + mask_insns_ext[i])) > +for i in range(0, 3): > + print("#define kunpack_{}\tkunpack{}{}".format(8 << i, mask_insns_ext[i], > + mask_insns_ext[i + 1])) > +mask_insns.append("kunpack") > + > +print("") > +print( > + "/* Common API for accessing proper width GPR is V{upcase_GPR_name}. */") > +for reg in registers: > + print("#define V{}\tVGPR({})".format(reg[0].upper(), reg[0])) > + > +print("") > + > +print( > + "/* Common API for accessing proper width mask insn is {upcase_mask_insn}. */" > +) > +for mask_insn in mask_insns: > + print("#define {} \tVKINSN({})".format(mask_insn.upper(), mask_insn)) > +print("") > + > +print("#ifdef USE_WIDE_CHAR") > +print("# define REG_WIDTH 32") > +print("#else") > +print("# define REG_WIDTH VEC_SIZE") > +print("#endif") > +print("") > +print("#define VPASTER(x, y)\tx##_##y") > +print("#define VEVALUATOR(x, y)\tVPASTER(x, y)") > +print("") > +print("#define VGPR_SZ(reg_name, reg_size)\tVEVALUATOR(reg_name, reg_size)") > +print("#define VKINSN_SZ(insn, reg_size)\tVEVALUATOR(insn, reg_size)") > +print("") > +print("#define VGPR(reg_name)\tVGPR_SZ(reg_name, REG_WIDTH)") > +print("#define VKINSN(mask_insn)\tVKINSN_SZ(mask_insn, REG_WIDTH)") > + > +print("\n#endif") > diff --git a/sysdeps/x86_64/multiarch/x86-avx-rtm-vecs.h b/sysdeps/x86_64/multiarch/x86-avx-rtm-vecs.h > new file mode 100644 > index 0000000000..0b326c8a70 > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/x86-avx-rtm-vecs.h > @@ -0,0 +1,35 @@ > +/* Common config for AVX-RTM VECs > + All versions must be listed in ifunc-impl-list.c. > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#ifndef _X86_AVX_RTM_VECS_H > +#define _X86_AVX_RTM_VECS_H 1 > + > +#define COND_VZEROUPPER COND_VZEROUPPER_XTEST > +#define ZERO_UPPER_VEC_REGISTERS_RETURN \ > + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST > + > +#define VZEROUPPER_RETURN jmp L(return_vzeroupper) > + > +#define USE_WITH_RTM 1 > +#include "x86-avx-vecs.h" > + > +#undef SECTION > +#define SECTION(p) p##.avx.rtm > + > +#endif > diff --git a/sysdeps/x86_64/multiarch/x86-avx-vecs.h b/sysdeps/x86_64/multiarch/x86-avx-vecs.h > new file mode 100644 > index 0000000000..dca1089060 > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/x86-avx-vecs.h > @@ -0,0 +1,47 @@ > +/* Common config for AVX VECs > + All versions must be listed in ifunc-impl-list.c. > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#ifndef _X86_AVX_VECS_H > +#define _X86_AVX_VECS_H 1 > + > +#ifdef VEC_SIZE > +# error "Multiple VEC configs included!" > +#endif > + > +#define VEC_SIZE 32 > +#include "x86-vec-macros.h" > + > +#define USE_WITH_AVX 1 > +#define SECTION(p) p##.avx > + > +/* 4-byte mov instructions with AVX2. */ > +#define MOV_SIZE 4 > +/* 1 (ret) + 3 (vzeroupper). */ > +#define RET_SIZE 4 > +#define VZEROUPPER vzeroupper > + > +#define VMOVU vmovdqu > +#define VMOVA vmovdqa > +#define VMOVNT vmovntdq > + > +/* Often need to access xmm portion. */ > +#define VMM_128 VMM_any_xmm > +#define VMM VMM_any_ymm > + > +#endif > diff --git a/sysdeps/x86_64/multiarch/x86-evex-vecs-common.h b/sysdeps/x86_64/multiarch/x86-evex-vecs-common.h > new file mode 100644 > index 0000000000..f331e9d8ec > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/x86-evex-vecs-common.h > @@ -0,0 +1,39 @@ > +/* Common config for EVEX256 and EVEX512 VECs > + All versions must be listed in ifunc-impl-list.c. > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#ifndef _X86_EVEX_VECS_COMMON_H > +#define _X86_EVEX_VECS_COMMON_H 1 > + > +#include "x86-vec-macros.h" > + > +/* 6-byte mov instructions with EVEX. */ > +#define MOV_SIZE 6 > +/* No vzeroupper needed. */ > +#define RET_SIZE 1 > +#define VZEROUPPER > + > +#define VMOVU vmovdqu64 > +#define VMOVA vmovdqa64 > +#define VMOVNT vmovntdq > + > +#define VMM_128 VMM_hi_xmm > +#define VMM_256 VMM_hi_ymm > +#define VMM_512 VMM_hi_zmm > + > +#endif > diff --git a/sysdeps/x86_64/multiarch/x86-evex256-vecs.h b/sysdeps/x86_64/multiarch/x86-evex256-vecs.h > new file mode 100644 > index 0000000000..8337b95504 > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/x86-evex256-vecs.h > @@ -0,0 +1,38 @@ > +/* Common config for EVEX256 VECs > + All versions must be listed in ifunc-impl-list.c. > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#ifndef _EVEX256_VECS_H > +#define _EVEX256_VECS_H 1 > + > +#ifdef VEC_SIZE > +# error "Multiple VEC configs included!" > +#endif > + > +#define VEC_SIZE 32 > +#include "x86-evex-vecs-common.h" > + > +#define USE_WITH_EVEX256 1 > + > +#ifndef SECTION > +# define SECTION(p) p##.evex > +#endif > + > +#define VMM VMM_256 > +#define VMM_lo VMM_any_ymm > +#endif > diff --git a/sysdeps/x86_64/multiarch/x86-evex512-vecs.h b/sysdeps/x86_64/multiarch/x86-evex512-vecs.h > new file mode 100644 > index 0000000000..7dc5c23ad0 > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/x86-evex512-vecs.h > @@ -0,0 +1,38 @@ > +/* Common config for EVEX512 VECs > + All versions must be listed in ifunc-impl-list.c. > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#ifndef _EVEX512_VECS_H > +#define _EVEX512_VECS_H 1 > + > +#ifdef VEC_SIZE > +# error "Multiple VEC configs included!" > +#endif > + > +#define VEC_SIZE 64 > +#include "x86-evex-vecs-common.h" > + > +#define USE_WITH_EVEX512 1 > + > +#ifndef SECTION > +# define SECTION(p) p##.evex512 > +#endif > + > +#define VMM VMM_512 > +#define VMM_lo VMM_any_zmm > +#endif > diff --git a/sysdeps/x86_64/multiarch/x86-sse2-vecs.h b/sysdeps/x86_64/multiarch/x86-sse2-vecs.h > new file mode 100644 > index 0000000000..b8bbd5dc29 > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/x86-sse2-vecs.h > @@ -0,0 +1,47 @@ > +/* Common config for SSE2 VECs > + All versions must be listed in ifunc-impl-list.c. > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#ifndef _X86_SSE2_VECS_H > +#define _X86_SSE2_VECS_H 1 > + > +#ifdef VEC_SIZE > +# error "Multiple VEC configs included!" > +#endif > + > +#define VEC_SIZE 16 > +#include "x86-vec-macros.h" > + > +#define USE_WITH_SSE2 1 > +#define SECTION(p) p > + > +/* 3-byte mov instructions with SSE2. */ > +#define MOV_SIZE 3 > +/* No vzeroupper needed. */ > +#define RET_SIZE 1 > +#define VZEROUPPER > + > +#define VMOVU movups > +#define VMOVA movaps > +#define VMOVNT movntdq > + > +#define VMM_128 VMM_any_xmm > +#define VMM VMM_any_xmm > + > + > +#endif > diff --git a/sysdeps/x86_64/multiarch/x86-vec-macros.h b/sysdeps/x86_64/multiarch/x86-vec-macros.h > new file mode 100644 > index 0000000000..7d6bb31d55 > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/x86-vec-macros.h > @@ -0,0 +1,90 @@ > +/* Macro helpers for VEC_{type}({vec_num}) > + All versions must be listed in ifunc-impl-list.c. > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#ifndef _X86_VEC_MACROS_H > +#define _X86_VEC_MACROS_H 1 > + > +#ifndef VEC_SIZE > +# error "Never include this file directly. Always include a vector config." > +#endif > + > +/* Defines so we can use SSE2 / AVX2 / EVEX / EVEX512 encoding with same > + VMM(N) values. */ > +#define VMM_hi_xmm0 xmm16 > +#define VMM_hi_xmm1 xmm17 > +#define VMM_hi_xmm2 xmm18 > +#define VMM_hi_xmm3 xmm19 > +#define VMM_hi_xmm4 xmm20 > +#define VMM_hi_xmm5 xmm21 > +#define VMM_hi_xmm6 xmm22 > +#define VMM_hi_xmm7 xmm23 > +#define VMM_hi_xmm8 xmm24 > +#define VMM_hi_xmm9 xmm25 > +#define VMM_hi_xmm10 xmm26 > +#define VMM_hi_xmm11 xmm27 > +#define VMM_hi_xmm12 xmm28 > +#define VMM_hi_xmm13 xmm29 > +#define VMM_hi_xmm14 xmm30 > +#define VMM_hi_xmm15 xmm31 > + > +#define VMM_hi_ymm0 ymm16 > +#define VMM_hi_ymm1 ymm17 > +#define VMM_hi_ymm2 ymm18 > +#define VMM_hi_ymm3 ymm19 > +#define VMM_hi_ymm4 ymm20 > +#define VMM_hi_ymm5 ymm21 > +#define VMM_hi_ymm6 ymm22 > +#define VMM_hi_ymm7 ymm23 > +#define VMM_hi_ymm8 ymm24 > +#define VMM_hi_ymm9 ymm25 > +#define VMM_hi_ymm10 ymm26 > +#define VMM_hi_ymm11 ymm27 > +#define VMM_hi_ymm12 ymm28 > +#define VMM_hi_ymm13 ymm29 > +#define VMM_hi_ymm14 ymm30 > +#define VMM_hi_ymm15 ymm31 > + > +#define VMM_hi_zmm0 zmm16 > +#define VMM_hi_zmm1 zmm17 > +#define VMM_hi_zmm2 zmm18 > +#define VMM_hi_zmm3 zmm19 > +#define VMM_hi_zmm4 zmm20 > +#define VMM_hi_zmm5 zmm21 > +#define VMM_hi_zmm6 zmm22 > +#define VMM_hi_zmm7 zmm23 > +#define VMM_hi_zmm8 zmm24 > +#define VMM_hi_zmm9 zmm25 > +#define VMM_hi_zmm10 zmm26 > +#define VMM_hi_zmm11 zmm27 > +#define VMM_hi_zmm12 zmm28 > +#define VMM_hi_zmm13 zmm29 > +#define VMM_hi_zmm14 zmm30 > +#define VMM_hi_zmm15 zmm31 > + > +#define PRIMITIVE_VMM(vec, num) vec##num > + > +#define VMM_any_xmm(i) PRIMITIVE_VMM(xmm, i) > +#define VMM_any_ymm(i) PRIMITIVE_VMM(ymm, i) > +#define VMM_any_zmm(i) PRIMITIVE_VMM(zmm, i) > + > +#define VMM_hi_xmm(i) PRIMITIVE_VMM(VMM_hi_xmm, i) > +#define VMM_hi_ymm(i) PRIMITIVE_VMM(VMM_hi_ymm, i) > +#define VMM_hi_zmm(i) PRIMITIVE_VMM(VMM_hi_zmm, i) > + > +#endif > -- > 2.34.1 >
On Fri, Oct 14, 2022 at 7:13 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > On Fri, Oct 14, 2022 at 5:06 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > 1) Copy so that backport will be easier. > > 2) Make section only define if there is not a previous definition > > 3) Add `VEC_lo` definition for proper reg-width but in the > > ymm/zmm0-15 range. > > 4) Add macros for accessing GPRs based on VEC_SIZE > > This is to make it easier to do think like: > > ``` > > vpcmpb %VEC(0), %VEC(1), %k0 > > kmov{d|q} %k0, %{eax|rax} > > test %{eax|rax} > > ``` > > It adds macro s.t any GPR can get the proper width with: > > `V{upper_case_GPR_name}` > > > > and any mask insn can get the proper width with: > > `{mask_insn_without_postfix}V` > ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ > > This comment is incorrect. Fixed in V9. > > > > > This commit does not change libc.so > > > > Tested build on x86-64 > > --- > > sysdeps/x86_64/multiarch/reg-macros.h | 168 ++++++++++++++++++ > > .../multiarch/scripts/gen-reg-macros.py | 125 +++++++++++++ > > sysdeps/x86_64/multiarch/x86-avx-rtm-vecs.h | 35 ++++ > > sysdeps/x86_64/multiarch/x86-avx-vecs.h | 47 +++++ > > .../x86_64/multiarch/x86-evex-vecs-common.h | 39 ++++ > > sysdeps/x86_64/multiarch/x86-evex256-vecs.h | 38 ++++ > > sysdeps/x86_64/multiarch/x86-evex512-vecs.h | 38 ++++ > > sysdeps/x86_64/multiarch/x86-sse2-vecs.h | 47 +++++ > > sysdeps/x86_64/multiarch/x86-vec-macros.h | 90 ++++++++++ > > 9 files changed, 627 insertions(+) > > create mode 100644 sysdeps/x86_64/multiarch/reg-macros.h > > create mode 100644 sysdeps/x86_64/multiarch/scripts/gen-reg-macros.py > > create mode 100644 sysdeps/x86_64/multiarch/x86-avx-rtm-vecs.h > > create mode 100644 sysdeps/x86_64/multiarch/x86-avx-vecs.h > > create mode 100644 sysdeps/x86_64/multiarch/x86-evex-vecs-common.h > > create mode 100644 sysdeps/x86_64/multiarch/x86-evex256-vecs.h > > create mode 100644 sysdeps/x86_64/multiarch/x86-evex512-vecs.h > > create mode 100644 sysdeps/x86_64/multiarch/x86-sse2-vecs.h > > create mode 100644 sysdeps/x86_64/multiarch/x86-vec-macros.h > > > > diff --git a/sysdeps/x86_64/multiarch/reg-macros.h b/sysdeps/x86_64/multiarch/reg-macros.h > > new file mode 100644 > > index 0000000000..c8ea330256 > > --- /dev/null > > +++ b/sysdeps/x86_64/multiarch/reg-macros.h > > @@ -0,0 +1,168 @@ > > +/* This file was generated by: gen-reg-macros.py. > > + > > + Copyright (C) 2022 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#ifndef _REG_MACROS_H > > +#define _REG_MACROS_H 1 > > + > > +#define rax_8 al > > +#define rax_16 ax > > +#define rax_32 eax > > +#define rax_64 rax > > +#define rbx_8 bl > > +#define rbx_16 bx > > +#define rbx_32 ebx > > +#define rbx_64 rbx > > +#define rcx_8 cl > > +#define rcx_16 cx > > +#define rcx_32 ecx > > +#define rcx_64 rcx > > +#define rdx_8 dl > > +#define rdx_16 dx > > +#define rdx_32 edx > > +#define rdx_64 rdx > > +#define rbp_8 bpl > > +#define rbp_16 bp > > +#define rbp_32 ebp > > +#define rbp_64 rbp > > +#define rsp_8 spl > > +#define rsp_16 sp > > +#define rsp_32 esp > > +#define rsp_64 rsp > > +#define rsi_8 sil > > +#define rsi_16 si > > +#define rsi_32 esi > > +#define rsi_64 rsi > > +#define rdi_8 dil > > +#define rdi_16 di > > +#define rdi_32 edi > > +#define rdi_64 rdi > > +#define r8_8 r8b > > +#define r8_16 r8w > > +#define r8_32 r8d > > +#define r8_64 r8 > > +#define r9_8 r9b > > +#define r9_16 r9w > > +#define r9_32 r9d > > +#define r9_64 r9 > > +#define r10_8 r10b > > +#define r10_16 r10w > > +#define r10_32 r10d > > +#define r10_64 r10 > > +#define r11_8 r11b > > +#define r11_16 r11w > > +#define r11_32 r11d > > +#define r11_64 r11 > > +#define r12_8 r12b > > +#define r12_16 r12w > > +#define r12_32 r12d > > +#define r12_64 r12 > > +#define r13_8 r13b > > +#define r13_16 r13w > > +#define r13_32 r13d > > +#define r13_64 r13 > > +#define r14_8 r14b > > +#define r14_16 r14w > > +#define r14_32 r14d > > +#define r14_64 r14 > > +#define r15_8 r15b > > +#define r15_16 r15w > > +#define r15_32 r15d > > +#define r15_64 r15 > > + > > +#define kmov_8 kmovb > > +#define kmov_16 kmovw > > +#define kmov_32 kmovd > > +#define kmov_64 kmovq > > +#define kortest_8 kortestb > > +#define kortest_16 kortestw > > +#define kortest_32 kortestd > > +#define kortest_64 kortestq > > +#define kor_8 korb > > +#define kor_16 korw > > +#define kor_32 kord > > +#define kor_64 korq > > +#define ktest_8 ktestb > > +#define ktest_16 ktestw > > +#define ktest_32 ktestd > > +#define ktest_64 ktestq > > +#define kand_8 kandb > > +#define kand_16 kandw > > +#define kand_32 kandd > > +#define kand_64 kandq > > +#define kxor_8 kxorb > > +#define kxor_16 kxorw > > +#define kxor_32 kxord > > +#define kxor_64 kxorq > > +#define knot_8 knotb > > +#define knot_16 knotw > > +#define knot_32 knotd > > +#define knot_64 knotq > > +#define kxnor_8 kxnorb > > +#define kxnor_16 kxnorw > > +#define kxnor_32 kxnord > > +#define kxnor_64 kxnorq > > +#define kunpack_8 kunpackbw > > +#define kunpack_16 kunpackwd > > +#define kunpack_32 kunpackdq > > + > > +/* Common API for accessing proper width GPR is V{upcase_GPR_name}. */ > > +#define VRAX VGPR(rax) > > +#define VRBX VGPR(rbx) > > +#define VRCX VGPR(rcx) > > +#define VRDX VGPR(rdx) > > +#define VRBP VGPR(rbp) > > +#define VRSP VGPR(rsp) > > +#define VRSI VGPR(rsi) > > +#define VRDI VGPR(rdi) > > +#define VR8 VGPR(r8) > > +#define VR9 VGPR(r9) > > +#define VR10 VGPR(r10) > > +#define VR11 VGPR(r11) > > +#define VR12 VGPR(r12) > > +#define VR13 VGPR(r13) > > +#define VR14 VGPR(r14) > > +#define VR15 VGPR(r15) > > + > > +/* Common API for accessing proper width mask insn is {upcase_mask_insn}. */ > > +#define KMOV VKINSN(kmov) > > +#define KORTEST VKINSN(kortest) > > +#define KOR VKINSN(kor) > > +#define KTEST VKINSN(ktest) > > +#define KAND VKINSN(kand) > > +#define KXOR VKINSN(kxor) > > +#define KNOT VKINSN(knot) > > +#define KXNOR VKINSN(kxnor) > > +#define KUNPACK VKINSN(kunpack) > > These aren't register macros. Should reg-macros.h be renamed, like > vec-macros.h? > > > + > > +#ifdef USE_WIDE_CHAR > > +# define REG_WIDTH 32 > > +#else > > +# define REG_WIDTH VEC_SIZE > > +#endif > > + > > +#define VPASTER(x, y) x##_##y > > +#define VEVALUATOR(x, y) VPASTER(x, y) > > + > > +#define VGPR_SZ(reg_name, reg_size) VEVALUATOR(reg_name, reg_size) > > +#define VKINSN_SZ(insn, reg_size) VEVALUATOR(insn, reg_size) > > + > > +#define VGPR(reg_name) VGPR_SZ(reg_name, REG_WIDTH) > > +#define VKINSN(mask_insn) VKINSN_SZ(mask_insn, REG_WIDTH) > > + > > +#endif > > diff --git a/sysdeps/x86_64/multiarch/scripts/gen-reg-macros.py b/sysdeps/x86_64/multiarch/scripts/gen-reg-macros.py > > new file mode 100644 > > index 0000000000..6a05f27ff4 > > --- /dev/null > > +++ b/sysdeps/x86_64/multiarch/scripts/gen-reg-macros.py > > @@ -0,0 +1,125 @@ > > +#!/usr/bin/python3 > > +# Copyright (C) 2022 Free Software Foundation, Inc. > > +# This file is part of the GNU C Library. > > +# > > +# The GNU C Library is free software; you can redistribute it and/or > > +# modify it under the terms of the GNU Lesser General Public > > +# License as published by the Free Software Foundation; either > > +# version 2.1 of the License, or (at your option) any later version. > > +# > > +# The GNU C Library is distributed in the hope that it will be useful, > > +# but WITHOUT ANY WARRANTY; without even the implied warranty of > > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > +# Lesser General Public License for more details. > > +# > > +# You should have received a copy of the GNU Lesser General Public > > +# License along with the GNU C Library; if not, see > > +# <https://www.gnu.org/licenses/>. > > +"""Generate macros for getting GPR name of a certain size > > + > > +Inputs: None > > +Output: Prints header fill to stdout > > + > > +API: > > + VGPR(reg_name) > > + - Get register name VEC_SIZE component of `reg_name` > > + VGPR_SZ(reg_name, reg_size) > > + - Get register name `reg_size` component of `reg_name` > > +""" > > + > > +import sys > > +import os > > +from datetime import datetime > > + > > +registers = [["rax", "eax", "ax", "al"], ["rbx", "ebx", "bx", "bl"], > > + ["rcx", "ecx", "cx", "cl"], ["rdx", "edx", "dx", "dl"], > > + ["rbp", "ebp", "bp", "bpl"], ["rsp", "esp", "sp", "spl"], > > + ["rsi", "esi", "si", "sil"], ["rdi", "edi", "di", "dil"], > > + ["r8", "r8d", "r8w", "r8b"], ["r9", "r9d", "r9w", "r9b"], > > + ["r10", "r10d", "r10w", "r10b"], ["r11", "r11d", "r11w", "r11b"], > > + ["r12", "r12d", "r12w", "r12b"], ["r13", "r13d", "r13w", "r13b"], > > + ["r14", "r14d", "r14w", "r14b"], ["r15", "r15d", "r15w", "r15b"]] > > + > > +mask_insns = [ > > + "kmov", > > + "kortest", > > + "kor", > > + "ktest", > > + "kand", > > + "kxor", > > + "knot", > > + "kxnor", > > +] > > +mask_insns_ext = ["b", "w", "d", "q"] > > + > > +cr = """ > > + Copyright (C) {} Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > +""" > > + > > +print("/* This file was generated by: {}.".format(os.path.basename( > > + sys.argv[0]))) > > +print(cr.format(datetime.today().year)) > > + > > +print("#ifndef _REG_MACROS_H") > > +print("#define _REG_MACROS_H\t1") > > +print("") > > +for reg in registers: > > + for i in range(0, 4): > > + print("#define {}_{}\t{}".format(reg[0], 8 << i, reg[3 - i])) > > + > > +print("") > > +for mask_insn in mask_insns: > > + for i in range(0, 4): > > + print("#define {}_{}\t{}{}".format(mask_insn, 8 << i, mask_insn, > > + mask_insns_ext[i])) > > +for i in range(0, 3): > > + print("#define kunpack_{}\tkunpack{}{}".format(8 << i, mask_insns_ext[i], > > + mask_insns_ext[i + 1])) > > +mask_insns.append("kunpack") > > + > > +print("") > > +print( > > + "/* Common API for accessing proper width GPR is V{upcase_GPR_name}. */") > > +for reg in registers: > > + print("#define V{}\tVGPR({})".format(reg[0].upper(), reg[0])) > > + > > +print("") > > + > > +print( > > + "/* Common API for accessing proper width mask insn is {upcase_mask_insn}. */" > > +) > > +for mask_insn in mask_insns: > > + print("#define {} \tVKINSN({})".format(mask_insn.upper(), mask_insn)) > > +print("") > > + > > +print("#ifdef USE_WIDE_CHAR") > > +print("# define REG_WIDTH 32") > > +print("#else") > > +print("# define REG_WIDTH VEC_SIZE") > > +print("#endif") > > +print("") > > +print("#define VPASTER(x, y)\tx##_##y") > > +print("#define VEVALUATOR(x, y)\tVPASTER(x, y)") > > +print("") > > +print("#define VGPR_SZ(reg_name, reg_size)\tVEVALUATOR(reg_name, reg_size)") > > +print("#define VKINSN_SZ(insn, reg_size)\tVEVALUATOR(insn, reg_size)") > > +print("") > > +print("#define VGPR(reg_name)\tVGPR_SZ(reg_name, REG_WIDTH)") > > +print("#define VKINSN(mask_insn)\tVKINSN_SZ(mask_insn, REG_WIDTH)") > > + > > +print("\n#endif") > > diff --git a/sysdeps/x86_64/multiarch/x86-avx-rtm-vecs.h b/sysdeps/x86_64/multiarch/x86-avx-rtm-vecs.h > > new file mode 100644 > > index 0000000000..0b326c8a70 > > --- /dev/null > > +++ b/sysdeps/x86_64/multiarch/x86-avx-rtm-vecs.h > > @@ -0,0 +1,35 @@ > > +/* Common config for AVX-RTM VECs > > + All versions must be listed in ifunc-impl-list.c. > > + Copyright (C) 2022 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#ifndef _X86_AVX_RTM_VECS_H > > +#define _X86_AVX_RTM_VECS_H 1 > > + > > +#define COND_VZEROUPPER COND_VZEROUPPER_XTEST > > +#define ZERO_UPPER_VEC_REGISTERS_RETURN \ > > + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST > > + > > +#define VZEROUPPER_RETURN jmp L(return_vzeroupper) > > + > > +#define USE_WITH_RTM 1 > > +#include "x86-avx-vecs.h" > > + > > +#undef SECTION > > +#define SECTION(p) p##.avx.rtm > > + > > +#endif > > diff --git a/sysdeps/x86_64/multiarch/x86-avx-vecs.h b/sysdeps/x86_64/multiarch/x86-avx-vecs.h > > new file mode 100644 > > index 0000000000..dca1089060 > > --- /dev/null > > +++ b/sysdeps/x86_64/multiarch/x86-avx-vecs.h > > @@ -0,0 +1,47 @@ > > +/* Common config for AVX VECs > > + All versions must be listed in ifunc-impl-list.c. > > + Copyright (C) 2022 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#ifndef _X86_AVX_VECS_H > > +#define _X86_AVX_VECS_H 1 > > + > > +#ifdef VEC_SIZE > > +# error "Multiple VEC configs included!" > > +#endif > > + > > +#define VEC_SIZE 32 > > +#include "x86-vec-macros.h" > > + > > +#define USE_WITH_AVX 1 > > +#define SECTION(p) p##.avx > > + > > +/* 4-byte mov instructions with AVX2. */ > > +#define MOV_SIZE 4 > > +/* 1 (ret) + 3 (vzeroupper). */ > > +#define RET_SIZE 4 > > +#define VZEROUPPER vzeroupper > > + > > +#define VMOVU vmovdqu > > +#define VMOVA vmovdqa > > +#define VMOVNT vmovntdq > > + > > +/* Often need to access xmm portion. */ > > +#define VMM_128 VMM_any_xmm > > +#define VMM VMM_any_ymm > > + > > +#endif > > diff --git a/sysdeps/x86_64/multiarch/x86-evex-vecs-common.h b/sysdeps/x86_64/multiarch/x86-evex-vecs-common.h > > new file mode 100644 > > index 0000000000..f331e9d8ec > > --- /dev/null > > +++ b/sysdeps/x86_64/multiarch/x86-evex-vecs-common.h > > @@ -0,0 +1,39 @@ > > +/* Common config for EVEX256 and EVEX512 VECs > > + All versions must be listed in ifunc-impl-list.c. > > + Copyright (C) 2022 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#ifndef _X86_EVEX_VECS_COMMON_H > > +#define _X86_EVEX_VECS_COMMON_H 1 > > + > > +#include "x86-vec-macros.h" > > + > > +/* 6-byte mov instructions with EVEX. */ > > +#define MOV_SIZE 6 > > +/* No vzeroupper needed. */ > > +#define RET_SIZE 1 > > +#define VZEROUPPER > > + > > +#define VMOVU vmovdqu64 > > +#define VMOVA vmovdqa64 > > +#define VMOVNT vmovntdq > > + > > +#define VMM_128 VMM_hi_xmm > > +#define VMM_256 VMM_hi_ymm > > +#define VMM_512 VMM_hi_zmm > > + > > +#endif > > diff --git a/sysdeps/x86_64/multiarch/x86-evex256-vecs.h b/sysdeps/x86_64/multiarch/x86-evex256-vecs.h > > new file mode 100644 > > index 0000000000..8337b95504 > > --- /dev/null > > +++ b/sysdeps/x86_64/multiarch/x86-evex256-vecs.h > > @@ -0,0 +1,38 @@ > > +/* Common config for EVEX256 VECs > > + All versions must be listed in ifunc-impl-list.c. > > + Copyright (C) 2022 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#ifndef _EVEX256_VECS_H > > +#define _EVEX256_VECS_H 1 > > + > > +#ifdef VEC_SIZE > > +# error "Multiple VEC configs included!" > > +#endif > > + > > +#define VEC_SIZE 32 > > +#include "x86-evex-vecs-common.h" > > + > > +#define USE_WITH_EVEX256 1 > > + > > +#ifndef SECTION > > +# define SECTION(p) p##.evex > > +#endif > > + > > +#define VMM VMM_256 > > +#define VMM_lo VMM_any_ymm > > +#endif > > diff --git a/sysdeps/x86_64/multiarch/x86-evex512-vecs.h b/sysdeps/x86_64/multiarch/x86-evex512-vecs.h > > new file mode 100644 > > index 0000000000..7dc5c23ad0 > > --- /dev/null > > +++ b/sysdeps/x86_64/multiarch/x86-evex512-vecs.h > > @@ -0,0 +1,38 @@ > > +/* Common config for EVEX512 VECs > > + All versions must be listed in ifunc-impl-list.c. > > + Copyright (C) 2022 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#ifndef _EVEX512_VECS_H > > +#define _EVEX512_VECS_H 1 > > + > > +#ifdef VEC_SIZE > > +# error "Multiple VEC configs included!" > > +#endif > > + > > +#define VEC_SIZE 64 > > +#include "x86-evex-vecs-common.h" > > + > > +#define USE_WITH_EVEX512 1 > > + > > +#ifndef SECTION > > +# define SECTION(p) p##.evex512 > > +#endif > > + > > +#define VMM VMM_512 > > +#define VMM_lo VMM_any_zmm > > +#endif > > diff --git a/sysdeps/x86_64/multiarch/x86-sse2-vecs.h b/sysdeps/x86_64/multiarch/x86-sse2-vecs.h > > new file mode 100644 > > index 0000000000..b8bbd5dc29 > > --- /dev/null > > +++ b/sysdeps/x86_64/multiarch/x86-sse2-vecs.h > > @@ -0,0 +1,47 @@ > > +/* Common config for SSE2 VECs > > + All versions must be listed in ifunc-impl-list.c. > > + Copyright (C) 2022 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#ifndef _X86_SSE2_VECS_H > > +#define _X86_SSE2_VECS_H 1 > > + > > +#ifdef VEC_SIZE > > +# error "Multiple VEC configs included!" > > +#endif > > + > > +#define VEC_SIZE 16 > > +#include "x86-vec-macros.h" > > + > > +#define USE_WITH_SSE2 1 > > +#define SECTION(p) p > > + > > +/* 3-byte mov instructions with SSE2. */ > > +#define MOV_SIZE 3 > > +/* No vzeroupper needed. */ > > +#define RET_SIZE 1 > > +#define VZEROUPPER > > + > > +#define VMOVU movups > > +#define VMOVA movaps > > +#define VMOVNT movntdq > > + > > +#define VMM_128 VMM_any_xmm > > +#define VMM VMM_any_xmm > > + > > + > > +#endif > > diff --git a/sysdeps/x86_64/multiarch/x86-vec-macros.h b/sysdeps/x86_64/multiarch/x86-vec-macros.h > > new file mode 100644 > > index 0000000000..7d6bb31d55 > > --- /dev/null > > +++ b/sysdeps/x86_64/multiarch/x86-vec-macros.h > > @@ -0,0 +1,90 @@ > > +/* Macro helpers for VEC_{type}({vec_num}) > > + All versions must be listed in ifunc-impl-list.c. > > + Copyright (C) 2022 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#ifndef _X86_VEC_MACROS_H > > +#define _X86_VEC_MACROS_H 1 > > + > > +#ifndef VEC_SIZE > > +# error "Never include this file directly. Always include a vector config." > > +#endif > > + > > +/* Defines so we can use SSE2 / AVX2 / EVEX / EVEX512 encoding with same > > + VMM(N) values. */ > > +#define VMM_hi_xmm0 xmm16 > > +#define VMM_hi_xmm1 xmm17 > > +#define VMM_hi_xmm2 xmm18 > > +#define VMM_hi_xmm3 xmm19 > > +#define VMM_hi_xmm4 xmm20 > > +#define VMM_hi_xmm5 xmm21 > > +#define VMM_hi_xmm6 xmm22 > > +#define VMM_hi_xmm7 xmm23 > > +#define VMM_hi_xmm8 xmm24 > > +#define VMM_hi_xmm9 xmm25 > > +#define VMM_hi_xmm10 xmm26 > > +#define VMM_hi_xmm11 xmm27 > > +#define VMM_hi_xmm12 xmm28 > > +#define VMM_hi_xmm13 xmm29 > > +#define VMM_hi_xmm14 xmm30 > > +#define VMM_hi_xmm15 xmm31 > > + > > +#define VMM_hi_ymm0 ymm16 > > +#define VMM_hi_ymm1 ymm17 > > +#define VMM_hi_ymm2 ymm18 > > +#define VMM_hi_ymm3 ymm19 > > +#define VMM_hi_ymm4 ymm20 > > +#define VMM_hi_ymm5 ymm21 > > +#define VMM_hi_ymm6 ymm22 > > +#define VMM_hi_ymm7 ymm23 > > +#define VMM_hi_ymm8 ymm24 > > +#define VMM_hi_ymm9 ymm25 > > +#define VMM_hi_ymm10 ymm26 > > +#define VMM_hi_ymm11 ymm27 > > +#define VMM_hi_ymm12 ymm28 > > +#define VMM_hi_ymm13 ymm29 > > +#define VMM_hi_ymm14 ymm30 > > +#define VMM_hi_ymm15 ymm31 > > + > > +#define VMM_hi_zmm0 zmm16 > > +#define VMM_hi_zmm1 zmm17 > > +#define VMM_hi_zmm2 zmm18 > > +#define VMM_hi_zmm3 zmm19 > > +#define VMM_hi_zmm4 zmm20 > > +#define VMM_hi_zmm5 zmm21 > > +#define VMM_hi_zmm6 zmm22 > > +#define VMM_hi_zmm7 zmm23 > > +#define VMM_hi_zmm8 zmm24 > > +#define VMM_hi_zmm9 zmm25 > > +#define VMM_hi_zmm10 zmm26 > > +#define VMM_hi_zmm11 zmm27 > > +#define VMM_hi_zmm12 zmm28 > > +#define VMM_hi_zmm13 zmm29 > > +#define VMM_hi_zmm14 zmm30 > > +#define VMM_hi_zmm15 zmm31 > > + > > +#define PRIMITIVE_VMM(vec, num) vec##num > > + > > +#define VMM_any_xmm(i) PRIMITIVE_VMM(xmm, i) > > +#define VMM_any_ymm(i) PRIMITIVE_VMM(ymm, i) > > +#define VMM_any_zmm(i) PRIMITIVE_VMM(zmm, i) > > + > > +#define VMM_hi_xmm(i) PRIMITIVE_VMM(VMM_hi_xmm, i) > > +#define VMM_hi_ymm(i) PRIMITIVE_VMM(VMM_hi_ymm, i) > > +#define VMM_hi_zmm(i) PRIMITIVE_VMM(VMM_hi_zmm, i) > > + > > +#endif > > -- > > 2.34.1 > > > > > -- > H.J.
diff --git a/sysdeps/x86_64/multiarch/reg-macros.h b/sysdeps/x86_64/multiarch/reg-macros.h new file mode 100644 index 0000000000..c8ea330256 --- /dev/null +++ b/sysdeps/x86_64/multiarch/reg-macros.h @@ -0,0 +1,168 @@ +/* This file was generated by: gen-reg-macros.py. + + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _REG_MACROS_H +#define _REG_MACROS_H 1 + +#define rax_8 al +#define rax_16 ax +#define rax_32 eax +#define rax_64 rax +#define rbx_8 bl +#define rbx_16 bx +#define rbx_32 ebx +#define rbx_64 rbx +#define rcx_8 cl +#define rcx_16 cx +#define rcx_32 ecx +#define rcx_64 rcx +#define rdx_8 dl +#define rdx_16 dx +#define rdx_32 edx +#define rdx_64 rdx +#define rbp_8 bpl +#define rbp_16 bp +#define rbp_32 ebp +#define rbp_64 rbp +#define rsp_8 spl +#define rsp_16 sp +#define rsp_32 esp +#define rsp_64 rsp +#define rsi_8 sil +#define rsi_16 si +#define rsi_32 esi +#define rsi_64 rsi +#define rdi_8 dil +#define rdi_16 di +#define rdi_32 edi +#define rdi_64 rdi +#define r8_8 r8b +#define r8_16 r8w +#define r8_32 r8d +#define r8_64 r8 +#define r9_8 r9b +#define r9_16 r9w +#define r9_32 r9d +#define r9_64 r9 +#define r10_8 r10b +#define r10_16 r10w +#define r10_32 r10d +#define r10_64 r10 +#define r11_8 r11b +#define r11_16 r11w +#define r11_32 r11d +#define r11_64 r11 +#define r12_8 r12b +#define r12_16 r12w +#define r12_32 r12d +#define r12_64 r12 +#define r13_8 r13b +#define r13_16 r13w +#define r13_32 r13d +#define r13_64 r13 +#define r14_8 r14b +#define r14_16 r14w +#define r14_32 r14d +#define r14_64 r14 +#define r15_8 r15b +#define r15_16 r15w +#define r15_32 r15d +#define r15_64 r15 + +#define kmov_8 kmovb +#define kmov_16 kmovw +#define kmov_32 kmovd +#define kmov_64 kmovq +#define kortest_8 kortestb +#define kortest_16 kortestw +#define kortest_32 kortestd +#define kortest_64 kortestq +#define kor_8 korb +#define kor_16 korw +#define kor_32 kord +#define kor_64 korq +#define ktest_8 ktestb +#define ktest_16 ktestw +#define ktest_32 ktestd +#define ktest_64 ktestq +#define kand_8 kandb +#define kand_16 kandw +#define kand_32 kandd +#define kand_64 kandq +#define kxor_8 kxorb +#define kxor_16 kxorw +#define kxor_32 kxord +#define kxor_64 kxorq +#define knot_8 knotb +#define knot_16 knotw +#define knot_32 knotd +#define knot_64 knotq +#define kxnor_8 kxnorb +#define kxnor_16 kxnorw +#define kxnor_32 kxnord +#define kxnor_64 kxnorq +#define kunpack_8 kunpackbw +#define kunpack_16 kunpackwd +#define kunpack_32 kunpackdq + +/* Common API for accessing proper width GPR is V{upcase_GPR_name}. */ +#define VRAX VGPR(rax) +#define VRBX VGPR(rbx) +#define VRCX VGPR(rcx) +#define VRDX VGPR(rdx) +#define VRBP VGPR(rbp) +#define VRSP VGPR(rsp) +#define VRSI VGPR(rsi) +#define VRDI VGPR(rdi) +#define VR8 VGPR(r8) +#define VR9 VGPR(r9) +#define VR10 VGPR(r10) +#define VR11 VGPR(r11) +#define VR12 VGPR(r12) +#define VR13 VGPR(r13) +#define VR14 VGPR(r14) +#define VR15 VGPR(r15) + +/* Common API for accessing proper width mask insn is {upcase_mask_insn}. */ +#define KMOV VKINSN(kmov) +#define KORTEST VKINSN(kortest) +#define KOR VKINSN(kor) +#define KTEST VKINSN(ktest) +#define KAND VKINSN(kand) +#define KXOR VKINSN(kxor) +#define KNOT VKINSN(knot) +#define KXNOR VKINSN(kxnor) +#define KUNPACK VKINSN(kunpack) + +#ifdef USE_WIDE_CHAR +# define REG_WIDTH 32 +#else +# define REG_WIDTH VEC_SIZE +#endif + +#define VPASTER(x, y) x##_##y +#define VEVALUATOR(x, y) VPASTER(x, y) + +#define VGPR_SZ(reg_name, reg_size) VEVALUATOR(reg_name, reg_size) +#define VKINSN_SZ(insn, reg_size) VEVALUATOR(insn, reg_size) + +#define VGPR(reg_name) VGPR_SZ(reg_name, REG_WIDTH) +#define VKINSN(mask_insn) VKINSN_SZ(mask_insn, REG_WIDTH) + +#endif diff --git a/sysdeps/x86_64/multiarch/scripts/gen-reg-macros.py b/sysdeps/x86_64/multiarch/scripts/gen-reg-macros.py new file mode 100644 index 0000000000..6a05f27ff4 --- /dev/null +++ b/sysdeps/x86_64/multiarch/scripts/gen-reg-macros.py @@ -0,0 +1,125 @@ +#!/usr/bin/python3 +# Copyright (C) 2022 Free Software Foundation, Inc. +# This file is part of the GNU C Library. +# +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# <https://www.gnu.org/licenses/>. +"""Generate macros for getting GPR name of a certain size + +Inputs: None +Output: Prints header fill to stdout + +API: + VGPR(reg_name) + - Get register name VEC_SIZE component of `reg_name` + VGPR_SZ(reg_name, reg_size) + - Get register name `reg_size` component of `reg_name` +""" + +import sys +import os +from datetime import datetime + +registers = [["rax", "eax", "ax", "al"], ["rbx", "ebx", "bx", "bl"], + ["rcx", "ecx", "cx", "cl"], ["rdx", "edx", "dx", "dl"], + ["rbp", "ebp", "bp", "bpl"], ["rsp", "esp", "sp", "spl"], + ["rsi", "esi", "si", "sil"], ["rdi", "edi", "di", "dil"], + ["r8", "r8d", "r8w", "r8b"], ["r9", "r9d", "r9w", "r9b"], + ["r10", "r10d", "r10w", "r10b"], ["r11", "r11d", "r11w", "r11b"], + ["r12", "r12d", "r12w", "r12b"], ["r13", "r13d", "r13w", "r13b"], + ["r14", "r14d", "r14w", "r14b"], ["r15", "r15d", "r15w", "r15b"]] + +mask_insns = [ + "kmov", + "kortest", + "kor", + "ktest", + "kand", + "kxor", + "knot", + "kxnor", +] +mask_insns_ext = ["b", "w", "d", "q"] + +cr = """ + Copyright (C) {} Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ +""" + +print("/* This file was generated by: {}.".format(os.path.basename( + sys.argv[0]))) +print(cr.format(datetime.today().year)) + +print("#ifndef _REG_MACROS_H") +print("#define _REG_MACROS_H\t1") +print("") +for reg in registers: + for i in range(0, 4): + print("#define {}_{}\t{}".format(reg[0], 8 << i, reg[3 - i])) + +print("") +for mask_insn in mask_insns: + for i in range(0, 4): + print("#define {}_{}\t{}{}".format(mask_insn, 8 << i, mask_insn, + mask_insns_ext[i])) +for i in range(0, 3): + print("#define kunpack_{}\tkunpack{}{}".format(8 << i, mask_insns_ext[i], + mask_insns_ext[i + 1])) +mask_insns.append("kunpack") + +print("") +print( + "/* Common API for accessing proper width GPR is V{upcase_GPR_name}. */") +for reg in registers: + print("#define V{}\tVGPR({})".format(reg[0].upper(), reg[0])) + +print("") + +print( + "/* Common API for accessing proper width mask insn is {upcase_mask_insn}. */" +) +for mask_insn in mask_insns: + print("#define {} \tVKINSN({})".format(mask_insn.upper(), mask_insn)) +print("") + +print("#ifdef USE_WIDE_CHAR") +print("# define REG_WIDTH 32") +print("#else") +print("# define REG_WIDTH VEC_SIZE") +print("#endif") +print("") +print("#define VPASTER(x, y)\tx##_##y") +print("#define VEVALUATOR(x, y)\tVPASTER(x, y)") +print("") +print("#define VGPR_SZ(reg_name, reg_size)\tVEVALUATOR(reg_name, reg_size)") +print("#define VKINSN_SZ(insn, reg_size)\tVEVALUATOR(insn, reg_size)") +print("") +print("#define VGPR(reg_name)\tVGPR_SZ(reg_name, REG_WIDTH)") +print("#define VKINSN(mask_insn)\tVKINSN_SZ(mask_insn, REG_WIDTH)") + +print("\n#endif") diff --git a/sysdeps/x86_64/multiarch/x86-avx-rtm-vecs.h b/sysdeps/x86_64/multiarch/x86-avx-rtm-vecs.h new file mode 100644 index 0000000000..0b326c8a70 --- /dev/null +++ b/sysdeps/x86_64/multiarch/x86-avx-rtm-vecs.h @@ -0,0 +1,35 @@ +/* Common config for AVX-RTM VECs + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _X86_AVX_RTM_VECS_H +#define _X86_AVX_RTM_VECS_H 1 + +#define COND_VZEROUPPER COND_VZEROUPPER_XTEST +#define ZERO_UPPER_VEC_REGISTERS_RETURN \ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + +#define VZEROUPPER_RETURN jmp L(return_vzeroupper) + +#define USE_WITH_RTM 1 +#include "x86-avx-vecs.h" + +#undef SECTION +#define SECTION(p) p##.avx.rtm + +#endif diff --git a/sysdeps/x86_64/multiarch/x86-avx-vecs.h b/sysdeps/x86_64/multiarch/x86-avx-vecs.h new file mode 100644 index 0000000000..dca1089060 --- /dev/null +++ b/sysdeps/x86_64/multiarch/x86-avx-vecs.h @@ -0,0 +1,47 @@ +/* Common config for AVX VECs + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _X86_AVX_VECS_H +#define _X86_AVX_VECS_H 1 + +#ifdef VEC_SIZE +# error "Multiple VEC configs included!" +#endif + +#define VEC_SIZE 32 +#include "x86-vec-macros.h" + +#define USE_WITH_AVX 1 +#define SECTION(p) p##.avx + +/* 4-byte mov instructions with AVX2. */ +#define MOV_SIZE 4 +/* 1 (ret) + 3 (vzeroupper). */ +#define RET_SIZE 4 +#define VZEROUPPER vzeroupper + +#define VMOVU vmovdqu +#define VMOVA vmovdqa +#define VMOVNT vmovntdq + +/* Often need to access xmm portion. */ +#define VMM_128 VMM_any_xmm +#define VMM VMM_any_ymm + +#endif diff --git a/sysdeps/x86_64/multiarch/x86-evex-vecs-common.h b/sysdeps/x86_64/multiarch/x86-evex-vecs-common.h new file mode 100644 index 0000000000..f331e9d8ec --- /dev/null +++ b/sysdeps/x86_64/multiarch/x86-evex-vecs-common.h @@ -0,0 +1,39 @@ +/* Common config for EVEX256 and EVEX512 VECs + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _X86_EVEX_VECS_COMMON_H +#define _X86_EVEX_VECS_COMMON_H 1 + +#include "x86-vec-macros.h" + +/* 6-byte mov instructions with EVEX. */ +#define MOV_SIZE 6 +/* No vzeroupper needed. */ +#define RET_SIZE 1 +#define VZEROUPPER + +#define VMOVU vmovdqu64 +#define VMOVA vmovdqa64 +#define VMOVNT vmovntdq + +#define VMM_128 VMM_hi_xmm +#define VMM_256 VMM_hi_ymm +#define VMM_512 VMM_hi_zmm + +#endif diff --git a/sysdeps/x86_64/multiarch/x86-evex256-vecs.h b/sysdeps/x86_64/multiarch/x86-evex256-vecs.h new file mode 100644 index 0000000000..8337b95504 --- /dev/null +++ b/sysdeps/x86_64/multiarch/x86-evex256-vecs.h @@ -0,0 +1,38 @@ +/* Common config for EVEX256 VECs + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _EVEX256_VECS_H +#define _EVEX256_VECS_H 1 + +#ifdef VEC_SIZE +# error "Multiple VEC configs included!" +#endif + +#define VEC_SIZE 32 +#include "x86-evex-vecs-common.h" + +#define USE_WITH_EVEX256 1 + +#ifndef SECTION +# define SECTION(p) p##.evex +#endif + +#define VMM VMM_256 +#define VMM_lo VMM_any_ymm +#endif diff --git a/sysdeps/x86_64/multiarch/x86-evex512-vecs.h b/sysdeps/x86_64/multiarch/x86-evex512-vecs.h new file mode 100644 index 0000000000..7dc5c23ad0 --- /dev/null +++ b/sysdeps/x86_64/multiarch/x86-evex512-vecs.h @@ -0,0 +1,38 @@ +/* Common config for EVEX512 VECs + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _EVEX512_VECS_H +#define _EVEX512_VECS_H 1 + +#ifdef VEC_SIZE +# error "Multiple VEC configs included!" +#endif + +#define VEC_SIZE 64 +#include "x86-evex-vecs-common.h" + +#define USE_WITH_EVEX512 1 + +#ifndef SECTION +# define SECTION(p) p##.evex512 +#endif + +#define VMM VMM_512 +#define VMM_lo VMM_any_zmm +#endif diff --git a/sysdeps/x86_64/multiarch/x86-sse2-vecs.h b/sysdeps/x86_64/multiarch/x86-sse2-vecs.h new file mode 100644 index 0000000000..b8bbd5dc29 --- /dev/null +++ b/sysdeps/x86_64/multiarch/x86-sse2-vecs.h @@ -0,0 +1,47 @@ +/* Common config for SSE2 VECs + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _X86_SSE2_VECS_H +#define _X86_SSE2_VECS_H 1 + +#ifdef VEC_SIZE +# error "Multiple VEC configs included!" +#endif + +#define VEC_SIZE 16 +#include "x86-vec-macros.h" + +#define USE_WITH_SSE2 1 +#define SECTION(p) p + +/* 3-byte mov instructions with SSE2. */ +#define MOV_SIZE 3 +/* No vzeroupper needed. */ +#define RET_SIZE 1 +#define VZEROUPPER + +#define VMOVU movups +#define VMOVA movaps +#define VMOVNT movntdq + +#define VMM_128 VMM_any_xmm +#define VMM VMM_any_xmm + + +#endif diff --git a/sysdeps/x86_64/multiarch/x86-vec-macros.h b/sysdeps/x86_64/multiarch/x86-vec-macros.h new file mode 100644 index 0000000000..7d6bb31d55 --- /dev/null +++ b/sysdeps/x86_64/multiarch/x86-vec-macros.h @@ -0,0 +1,90 @@ +/* Macro helpers for VEC_{type}({vec_num}) + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _X86_VEC_MACROS_H +#define _X86_VEC_MACROS_H 1 + +#ifndef VEC_SIZE +# error "Never include this file directly. Always include a vector config." +#endif + +/* Defines so we can use SSE2 / AVX2 / EVEX / EVEX512 encoding with same + VMM(N) values. */ +#define VMM_hi_xmm0 xmm16 +#define VMM_hi_xmm1 xmm17 +#define VMM_hi_xmm2 xmm18 +#define VMM_hi_xmm3 xmm19 +#define VMM_hi_xmm4 xmm20 +#define VMM_hi_xmm5 xmm21 +#define VMM_hi_xmm6 xmm22 +#define VMM_hi_xmm7 xmm23 +#define VMM_hi_xmm8 xmm24 +#define VMM_hi_xmm9 xmm25 +#define VMM_hi_xmm10 xmm26 +#define VMM_hi_xmm11 xmm27 +#define VMM_hi_xmm12 xmm28 +#define VMM_hi_xmm13 xmm29 +#define VMM_hi_xmm14 xmm30 +#define VMM_hi_xmm15 xmm31 + +#define VMM_hi_ymm0 ymm16 +#define VMM_hi_ymm1 ymm17 +#define VMM_hi_ymm2 ymm18 +#define VMM_hi_ymm3 ymm19 +#define VMM_hi_ymm4 ymm20 +#define VMM_hi_ymm5 ymm21 +#define VMM_hi_ymm6 ymm22 +#define VMM_hi_ymm7 ymm23 +#define VMM_hi_ymm8 ymm24 +#define VMM_hi_ymm9 ymm25 +#define VMM_hi_ymm10 ymm26 +#define VMM_hi_ymm11 ymm27 +#define VMM_hi_ymm12 ymm28 +#define VMM_hi_ymm13 ymm29 +#define VMM_hi_ymm14 ymm30 +#define VMM_hi_ymm15 ymm31 + +#define VMM_hi_zmm0 zmm16 +#define VMM_hi_zmm1 zmm17 +#define VMM_hi_zmm2 zmm18 +#define VMM_hi_zmm3 zmm19 +#define VMM_hi_zmm4 zmm20 +#define VMM_hi_zmm5 zmm21 +#define VMM_hi_zmm6 zmm22 +#define VMM_hi_zmm7 zmm23 +#define VMM_hi_zmm8 zmm24 +#define VMM_hi_zmm9 zmm25 +#define VMM_hi_zmm10 zmm26 +#define VMM_hi_zmm11 zmm27 +#define VMM_hi_zmm12 zmm28 +#define VMM_hi_zmm13 zmm29 +#define VMM_hi_zmm14 zmm30 +#define VMM_hi_zmm15 zmm31 + +#define PRIMITIVE_VMM(vec, num) vec##num + +#define VMM_any_xmm(i) PRIMITIVE_VMM(xmm, i) +#define VMM_any_ymm(i) PRIMITIVE_VMM(ymm, i) +#define VMM_any_zmm(i) PRIMITIVE_VMM(zmm, i) + +#define VMM_hi_xmm(i) PRIMITIVE_VMM(VMM_hi_xmm, i) +#define VMM_hi_ymm(i) PRIMITIVE_VMM(VMM_hi_ymm, i) +#define VMM_hi_zmm(i) PRIMITIVE_VMM(VMM_hi_zmm, i) + +#endif