Message ID | 20230920204451.1086900-1-goldstein.w.n@gmail.com |
---|---|
State | New |
Headers | show |
Series | x86: Add support for AVX10 preset and vec size in cpu-features | expand |
On Wed, Sep 20, 2023 at 1:44 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > This commit add support for the new AVX10 cpu features: > https://cdrdv2-public.intel.com/784267/355989-intel-avx10-spec.pdf > > We add checks for: > - `AVX10`: Check if AVX10 is present. > - `AVX10_{X,Y,Z}MM`: Check if a given vec class has AVX10 support. > > `make check` passes and cpuid output was checked against GNR/DMR on an > emulator. > --- > manual/platform.texi | 12 ++++++++++++ > sysdeps/x86/bits/platform/x86.h | 14 ++++++++++++-- > sysdeps/x86/cpu-features.c | 25 +++++++++++++++++++++++++ > sysdeps/x86/include/cpu-features.h | 27 ++++++++++++++++++++++++++- > sysdeps/x86/tst-get-cpu-features.c | 8 ++++++++ > 5 files changed, 83 insertions(+), 3 deletions(-) > > diff --git a/manual/platform.texi b/manual/platform.texi > index 2a2d557067..478b6fdcdf 100644 > --- a/manual/platform.texi > +++ b/manual/platform.texi > @@ -222,6 +222,18 @@ Leaf (EAX = 23H). > @item > @code{AVX} -- The AVX instruction extensions. > > +@item > +@code{AVX10} -- The AVX10 instruction extensions. > + > +@item > +@code{AVX10_XMM} -- Whether AVX10 includes xmm registers. > + > +@item > +@code{AVX10_YMM} -- Whether AVX10 includes ymm registers. > + > +@item > +@code{AVX10_ZMM} -- Whether AVX10 includes zmm registers. > + > @item > @code{AVX2} -- The AVX2 instruction extensions. > > diff --git a/sysdeps/x86/bits/platform/x86.h b/sysdeps/x86/bits/platform/x86.h > index 88ca071aa7..1e23d53ba2 100644 > --- a/sysdeps/x86/bits/platform/x86.h > +++ b/sysdeps/x86/bits/platform/x86.h > @@ -30,7 +30,8 @@ enum > CPUID_INDEX_80000008, > CPUID_INDEX_7_ECX_1, > CPUID_INDEX_19, > - CPUID_INDEX_14_ECX_0 > + CPUID_INDEX_14_ECX_0, > + CPUID_INDEX_24_ECX_0 > }; > > struct cpuid_feature > @@ -312,6 +313,7 @@ enum > x86_cpu_AVX_NE_CONVERT = x86_cpu_index_7_ecx_1_edx + 5, > x86_cpu_AMX_COMPLEX = x86_cpu_index_7_ecx_1_edx + 8, > x86_cpu_PREFETCHI = x86_cpu_index_7_ecx_1_edx + 14, > + x86_cpu_AVX10 = x86_cpu_index_7_ecx_1_edx + 19, > x86_cpu_APX_F = x86_cpu_index_7_ecx_1_edx + 21, > > x86_cpu_index_19_ebx > @@ -325,5 +327,13 @@ enum > = (CPUID_INDEX_14_ECX_0 * 8 * 4 * sizeof (unsigned int) > + cpuid_register_index_ebx * 8 * sizeof (unsigned int)), > > - x86_cpu_PTWRITE = x86_cpu_index_14_ecx_0_ebx + 4 > + x86_cpu_PTWRITE = x86_cpu_index_14_ecx_0_ebx + 4, > + > + x86_cpu_index_24_ecx_0_ebx > + = (CPUID_INDEX_24_ECX_0 * 8 * 4 * sizeof (unsigned int) > + + cpuid_register_index_ebx * 8 * sizeof (unsigned int)), > + > + x86_cpu_AVX10_XMM = x86_cpu_index_24_ecx_0_ebx + 16, > + x86_cpu_AVX10_YMM = x86_cpu_index_24_ecx_0_ebx + 17, > + x86_cpu_AVX10_ZMM = x86_cpu_index_24_ecx_0_ebx + 18, > }; > diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c > index badf088874..0bf923d48b 100644 > --- a/sysdeps/x86/cpu-features.c > +++ b/sysdeps/x86/cpu-features.c > @@ -115,11 +115,18 @@ update_active (struct cpu_features *cpu_features) > CPU_FEATURE_SET_ACTIVE (cpu_features, SHSTK); > #endif > > + enum > + { > + os_xmm = 1, > + os_ymm = 2, > + os_zmm = 4 > + } os_vector_size = os_xmm; > /* Can we call xgetbv? */ > if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE)) > { > unsigned int xcrlow; > unsigned int xcrhigh; > + CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10); > asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); > /* Is YMM and XMM state usable? */ > if ((xcrlow & (bit_YMM_state | bit_XMM_state)) > @@ -128,6 +135,7 @@ update_active (struct cpu_features *cpu_features) > /* Determine if AVX is usable. */ > if (CPU_FEATURES_CPU_P (cpu_features, AVX)) > { > + os_vector_size |= os_ymm; > CPU_FEATURE_SET (cpu_features, AVX); > /* The following features depend on AVX being usable. */ > /* Determine if AVX2 is usable. */ > @@ -166,6 +174,7 @@ update_active (struct cpu_features *cpu_features) > | bit_ZMM16_31_state)) > == (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state)) > { > + os_vector_size |= os_zmm; > /* Determine if AVX512F is usable. */ > if (CPU_FEATURES_CPU_P (cpu_features, AVX512F)) > { > @@ -210,6 +219,22 @@ update_active (struct cpu_features *cpu_features) > } > } > > + if (CPU_FEATURES_CPU_P (cpu_features, AVX10) > + && cpu_features->basic.max_cpuid >= 0x24) > + { > + __cpuid_count ( > + 0x24, 0, cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.eax, > + cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ebx, > + cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ecx, > + cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.edx); > + if (os_vector_size & os_xmm) > + CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_XMM); > + if (os_vector_size & os_ymm) > + CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_YMM); > + if (os_vector_size & os_zmm) > + CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_ZMM); > + } > + > /* Are XTILECFG and XTILEDATA states usable? */ > if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state)) > == (bit_XTILECFG_state | bit_XTILEDATA_state)) > diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h > index eb30d342a6..2d7427a6c0 100644 > --- a/sysdeps/x86/include/cpu-features.h > +++ b/sysdeps/x86/include/cpu-features.h > @@ -29,7 +29,7 @@ > > enum > { > - CPUID_INDEX_MAX = CPUID_INDEX_14_ECX_0 + 1 > + CPUID_INDEX_MAX = CPUID_INDEX_24_ECX_0 + 1 > }; > > enum > @@ -319,6 +319,7 @@ enum > #define bit_cpu_AVX_NE_CONVERT (1u << 5) > #define bit_cpu_AMX_COMPLEX (1u << 8) > #define bit_cpu_PREFETCHI (1u << 14) > +#define bit_cpu_AVX10 (1u << 19) > #define bit_cpu_APX_F (1u << 21) > > /* CPUID_INDEX_19. */ > @@ -332,6 +333,13 @@ enum > /* EBX. */ > #define bit_cpu_PTWRITE (1u << 4) > > +/* CPUID_INDEX_24_ECX_0. */ > + > +/* EBX. */ > +#define bit_cpu_AVX10_XMM (1u << 16) > +#define bit_cpu_AVX10_YMM (1u << 17) > +#define bit_cpu_AVX10_ZMM (1u << 18) > + > /* CPUID_INDEX_1. */ > > /* ECX. */ > @@ -563,6 +571,7 @@ enum > #define index_cpu_AVX_NE_CONVERT CPUID_INDEX_7_ECX_1 > #define index_cpu_AMX_COMPLEX CPUID_INDEX_7_ECX_1 > #define index_cpu_PREFETCHI CPUID_INDEX_7_ECX_1 > +#define index_cpu_AVX10 CPUID_INDEX_7_ECX_1 > #define index_cpu_APX_F CPUID_INDEX_7_ECX_1 > > /* CPUID_INDEX_19. */ > @@ -576,6 +585,13 @@ enum > /* EBX. */ > #define index_cpu_PTWRITE CPUID_INDEX_14_ECX_0 > > +/* CPUID_INDEX_24_ECX_0. */ > + > +/* EBX. */ > +#define index_cpu_AVX10_XMM CPUID_INDEX_24_ECX_0 > +#define index_cpu_AVX10_YMM CPUID_INDEX_24_ECX_0 > +#define index_cpu_AVX10_ZMM CPUID_INDEX_24_ECX_0 > + > /* CPUID_INDEX_1. */ > > /* ECX. */ > @@ -809,6 +825,7 @@ enum > #define reg_AVX_NE_CONVERT edx > #define reg_AMX_COMPLEX edx > #define reg_PREFETCHI edx > +#define reg_AVX10 edx > #define reg_APX_F edx > > /* CPUID_INDEX_19. */ > @@ -822,6 +839,14 @@ enum > /* EBX. */ > #define reg_PTWRITE ebx > > +/* CPUID_INDEX_24_ECX_0. */ > + > +/* EBX. */ > +#define reg_AVX10_XMM ebx > +#define reg_AVX10_YMM ebx > +#define reg_AVX10_ZMM ebx > + > + > /* PREFERRED_FEATURE_INDEX_1. First define the bitindex values > sequentially, then define the bit_arch* and index_arch_* lookup > constants. */ > diff --git a/sysdeps/x86/tst-get-cpu-features.c b/sysdeps/x86/tst-get-cpu-features.c > index b27fa7324a..44edd18df2 100644 > --- a/sysdeps/x86/tst-get-cpu-features.c > +++ b/sysdeps/x86/tst-get-cpu-features.c > @@ -219,6 +219,7 @@ do_test (void) > CHECK_CPU_FEATURE_PRESENT (AVX_NE_CONVERT); > CHECK_CPU_FEATURE_PRESENT (AMX_COMPLEX); > CHECK_CPU_FEATURE_PRESENT (PREFETCHI); > + CHECK_CPU_FEATURE_PRESENT (AVX10); > CHECK_CPU_FEATURE_PRESENT (APX_F); > CHECK_CPU_FEATURE_PRESENT (AESKLE); > CHECK_CPU_FEATURE_PRESENT (WIDE_KL); > @@ -391,11 +392,18 @@ do_test (void) > CHECK_CPU_FEATURE_ACTIVE (AVX_NE_CONVERT); > CHECK_CPU_FEATURE_ACTIVE (AMX_COMPLEX); > CHECK_CPU_FEATURE_ACTIVE (PREFETCHI); > + CHECK_CPU_FEATURE_ACTIVE (AVX10); > CHECK_CPU_FEATURE_ACTIVE (APX_F); > CHECK_CPU_FEATURE_ACTIVE (AESKLE); > CHECK_CPU_FEATURE_ACTIVE (WIDE_KL); > CHECK_CPU_FEATURE_ACTIVE (PTWRITE); > > + if (CPU_FEATURE_ACTIVE (AVX10)) > + { > + CHECK_CPU_FEATURE_ACTIVE (AVX10_XMM); > + CHECK_CPU_FEATURE_ACTIVE (AVX10_YMM); > + CHECK_CPU_FEATURE_ACTIVE (AVX10_ZMM); > + } > return 0; > } > > -- > 2.34.1 > LGTM. Thanks.
diff --git a/manual/platform.texi b/manual/platform.texi index 2a2d557067..478b6fdcdf 100644 --- a/manual/platform.texi +++ b/manual/platform.texi @@ -222,6 +222,18 @@ Leaf (EAX = 23H). @item @code{AVX} -- The AVX instruction extensions. +@item +@code{AVX10} -- The AVX10 instruction extensions. + +@item +@code{AVX10_XMM} -- Whether AVX10 includes xmm registers. + +@item +@code{AVX10_YMM} -- Whether AVX10 includes ymm registers. + +@item +@code{AVX10_ZMM} -- Whether AVX10 includes zmm registers. + @item @code{AVX2} -- The AVX2 instruction extensions. diff --git a/sysdeps/x86/bits/platform/x86.h b/sysdeps/x86/bits/platform/x86.h index 88ca071aa7..1e23d53ba2 100644 --- a/sysdeps/x86/bits/platform/x86.h +++ b/sysdeps/x86/bits/platform/x86.h @@ -30,7 +30,8 @@ enum CPUID_INDEX_80000008, CPUID_INDEX_7_ECX_1, CPUID_INDEX_19, - CPUID_INDEX_14_ECX_0 + CPUID_INDEX_14_ECX_0, + CPUID_INDEX_24_ECX_0 }; struct cpuid_feature @@ -312,6 +313,7 @@ enum x86_cpu_AVX_NE_CONVERT = x86_cpu_index_7_ecx_1_edx + 5, x86_cpu_AMX_COMPLEX = x86_cpu_index_7_ecx_1_edx + 8, x86_cpu_PREFETCHI = x86_cpu_index_7_ecx_1_edx + 14, + x86_cpu_AVX10 = x86_cpu_index_7_ecx_1_edx + 19, x86_cpu_APX_F = x86_cpu_index_7_ecx_1_edx + 21, x86_cpu_index_19_ebx @@ -325,5 +327,13 @@ enum = (CPUID_INDEX_14_ECX_0 * 8 * 4 * sizeof (unsigned int) + cpuid_register_index_ebx * 8 * sizeof (unsigned int)), - x86_cpu_PTWRITE = x86_cpu_index_14_ecx_0_ebx + 4 + x86_cpu_PTWRITE = x86_cpu_index_14_ecx_0_ebx + 4, + + x86_cpu_index_24_ecx_0_ebx + = (CPUID_INDEX_24_ECX_0 * 8 * 4 * sizeof (unsigned int) + + cpuid_register_index_ebx * 8 * sizeof (unsigned int)), + + x86_cpu_AVX10_XMM = x86_cpu_index_24_ecx_0_ebx + 16, + x86_cpu_AVX10_YMM = x86_cpu_index_24_ecx_0_ebx + 17, + x86_cpu_AVX10_ZMM = x86_cpu_index_24_ecx_0_ebx + 18, }; diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index badf088874..0bf923d48b 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -115,11 +115,18 @@ update_active (struct cpu_features *cpu_features) CPU_FEATURE_SET_ACTIVE (cpu_features, SHSTK); #endif + enum + { + os_xmm = 1, + os_ymm = 2, + os_zmm = 4 + } os_vector_size = os_xmm; /* Can we call xgetbv? */ if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE)) { unsigned int xcrlow; unsigned int xcrhigh; + CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10); asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); /* Is YMM and XMM state usable? */ if ((xcrlow & (bit_YMM_state | bit_XMM_state)) @@ -128,6 +135,7 @@ update_active (struct cpu_features *cpu_features) /* Determine if AVX is usable. */ if (CPU_FEATURES_CPU_P (cpu_features, AVX)) { + os_vector_size |= os_ymm; CPU_FEATURE_SET (cpu_features, AVX); /* The following features depend on AVX being usable. */ /* Determine if AVX2 is usable. */ @@ -166,6 +174,7 @@ update_active (struct cpu_features *cpu_features) | bit_ZMM16_31_state)) == (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state)) { + os_vector_size |= os_zmm; /* Determine if AVX512F is usable. */ if (CPU_FEATURES_CPU_P (cpu_features, AVX512F)) { @@ -210,6 +219,22 @@ update_active (struct cpu_features *cpu_features) } } + if (CPU_FEATURES_CPU_P (cpu_features, AVX10) + && cpu_features->basic.max_cpuid >= 0x24) + { + __cpuid_count ( + 0x24, 0, cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.eax, + cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ebx, + cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ecx, + cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.edx); + if (os_vector_size & os_xmm) + CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_XMM); + if (os_vector_size & os_ymm) + CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_YMM); + if (os_vector_size & os_zmm) + CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_ZMM); + } + /* Are XTILECFG and XTILEDATA states usable? */ if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state)) == (bit_XTILECFG_state | bit_XTILEDATA_state)) diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h index eb30d342a6..2d7427a6c0 100644 --- a/sysdeps/x86/include/cpu-features.h +++ b/sysdeps/x86/include/cpu-features.h @@ -29,7 +29,7 @@ enum { - CPUID_INDEX_MAX = CPUID_INDEX_14_ECX_0 + 1 + CPUID_INDEX_MAX = CPUID_INDEX_24_ECX_0 + 1 }; enum @@ -319,6 +319,7 @@ enum #define bit_cpu_AVX_NE_CONVERT (1u << 5) #define bit_cpu_AMX_COMPLEX (1u << 8) #define bit_cpu_PREFETCHI (1u << 14) +#define bit_cpu_AVX10 (1u << 19) #define bit_cpu_APX_F (1u << 21) /* CPUID_INDEX_19. */ @@ -332,6 +333,13 @@ enum /* EBX. */ #define bit_cpu_PTWRITE (1u << 4) +/* CPUID_INDEX_24_ECX_0. */ + +/* EBX. */ +#define bit_cpu_AVX10_XMM (1u << 16) +#define bit_cpu_AVX10_YMM (1u << 17) +#define bit_cpu_AVX10_ZMM (1u << 18) + /* CPUID_INDEX_1. */ /* ECX. */ @@ -563,6 +571,7 @@ enum #define index_cpu_AVX_NE_CONVERT CPUID_INDEX_7_ECX_1 #define index_cpu_AMX_COMPLEX CPUID_INDEX_7_ECX_1 #define index_cpu_PREFETCHI CPUID_INDEX_7_ECX_1 +#define index_cpu_AVX10 CPUID_INDEX_7_ECX_1 #define index_cpu_APX_F CPUID_INDEX_7_ECX_1 /* CPUID_INDEX_19. */ @@ -576,6 +585,13 @@ enum /* EBX. */ #define index_cpu_PTWRITE CPUID_INDEX_14_ECX_0 +/* CPUID_INDEX_24_ECX_0. */ + +/* EBX. */ +#define index_cpu_AVX10_XMM CPUID_INDEX_24_ECX_0 +#define index_cpu_AVX10_YMM CPUID_INDEX_24_ECX_0 +#define index_cpu_AVX10_ZMM CPUID_INDEX_24_ECX_0 + /* CPUID_INDEX_1. */ /* ECX. */ @@ -809,6 +825,7 @@ enum #define reg_AVX_NE_CONVERT edx #define reg_AMX_COMPLEX edx #define reg_PREFETCHI edx +#define reg_AVX10 edx #define reg_APX_F edx /* CPUID_INDEX_19. */ @@ -822,6 +839,14 @@ enum /* EBX. */ #define reg_PTWRITE ebx +/* CPUID_INDEX_24_ECX_0. */ + +/* EBX. */ +#define reg_AVX10_XMM ebx +#define reg_AVX10_YMM ebx +#define reg_AVX10_ZMM ebx + + /* PREFERRED_FEATURE_INDEX_1. First define the bitindex values sequentially, then define the bit_arch* and index_arch_* lookup constants. */ diff --git a/sysdeps/x86/tst-get-cpu-features.c b/sysdeps/x86/tst-get-cpu-features.c index b27fa7324a..44edd18df2 100644 --- a/sysdeps/x86/tst-get-cpu-features.c +++ b/sysdeps/x86/tst-get-cpu-features.c @@ -219,6 +219,7 @@ do_test (void) CHECK_CPU_FEATURE_PRESENT (AVX_NE_CONVERT); CHECK_CPU_FEATURE_PRESENT (AMX_COMPLEX); CHECK_CPU_FEATURE_PRESENT (PREFETCHI); + CHECK_CPU_FEATURE_PRESENT (AVX10); CHECK_CPU_FEATURE_PRESENT (APX_F); CHECK_CPU_FEATURE_PRESENT (AESKLE); CHECK_CPU_FEATURE_PRESENT (WIDE_KL); @@ -391,11 +392,18 @@ do_test (void) CHECK_CPU_FEATURE_ACTIVE (AVX_NE_CONVERT); CHECK_CPU_FEATURE_ACTIVE (AMX_COMPLEX); CHECK_CPU_FEATURE_ACTIVE (PREFETCHI); + CHECK_CPU_FEATURE_ACTIVE (AVX10); CHECK_CPU_FEATURE_ACTIVE (APX_F); CHECK_CPU_FEATURE_ACTIVE (AESKLE); CHECK_CPU_FEATURE_ACTIVE (WIDE_KL); CHECK_CPU_FEATURE_ACTIVE (PTWRITE); + if (CPU_FEATURE_ACTIVE (AVX10)) + { + CHECK_CPU_FEATURE_ACTIVE (AVX10_XMM); + CHECK_CPU_FEATURE_ACTIVE (AVX10_YMM); + CHECK_CPU_FEATURE_ACTIVE (AVX10_ZMM); + } return 0; }