diff mbox series

[RFC] <sys/platform/x86.h>: Add initial AVX10 support

Message ID ZNzuhX0OaJIB24ZQ@gmail.com
State New
Headers show
Series [RFC] <sys/platform/x86.h>: Add initial AVX10 support | expand

Commit Message

H.J. Lu Aug. 16, 2023, 3:43 p.m. UTC
Hi,

AVX10 CPUID enumeration is different.  Since AVX10 version is stored as
a byte value, but CPU_FEATURE_PRESENT/CPU_FEATURE_ACTIVE return a boolean
value and can't return AVX10 version.  This patch adds AVX10_VERSION and
AVX10_VECTOR_SIZE macros.  Any suggestions?

Thanks.


H.J.
---
Add initial support for Intel Advanced Performance Extensions:

https://www.intel.com/content/www/us/en/developer/articles/technical/advanced-performance-extensions-apx.html

to <sys/platform/x86.h>.

1. Add CPUID_INDEX_24_ECX_0 for CPUID leaf 0x24 to store AVX10 version
and vector size info.
2. Add AVX10_VERSION and AVX10_VECTOR_SIZE for AVX10 version and vector
size.
---
 manual/platform.texi               | 27 +++++++++++++++++++++++
 sysdeps/x86/bits/platform/x86.h    | 10 +++++++--
 sysdeps/x86/cpu-features.c         | 35 ++++++++++++++++++++++++++++++
 sysdeps/x86/include/cpu-features.h |  5 ++++-
 sysdeps/x86/sys/platform/x86.h     | 28 ++++++++++++++++++++++++
 sysdeps/x86/tst-get-cpu-features.c |  8 +++++++
 6 files changed, 110 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/manual/platform.texi b/manual/platform.texi
index 2a2d557067..1567fdf255 100644
--- a/manual/platform.texi
+++ b/manual/platform.texi
@@ -222,6 +222,9 @@  Leaf (EAX = 23H).
 @item
 @code{AVX} -- The AVX instruction extensions.
 
+@item
+@code{AVX10} -- The AVX10 instruction extensions.
+
 @item
 @code{AVX2} -- The AVX2 instruction extensions.
 
@@ -760,3 +763,27 @@  avx_active (void)
   return CPU_FEATURE_ACTIVE (AVX);
 @}
 @end smallexample
+
+You could query @code{AVX10} version number with:
+
+@smallexample
+#include <sys/platform/x86.h>
+
+int
+get_avx10_version (void)
+@{
+  return AVX10_VERSION ();
+@}
+@end smallexample
+
+and @code{AVX10} vector size in bits with:
+
+@smallexample
+#include <sys/platform/x86.h>
+
+int
+get_avx10_vector_size (void)
+@{
+  return AVX10_VECTOR_SIZE ();
+@}
+@end smallexample
diff --git a/sysdeps/x86/bits/platform/x86.h b/sysdeps/x86/bits/platform/x86.h
index 88ca071aa7..dbba9c95c3 100644
--- a/sysdeps/x86/bits/platform/x86.h
+++ b/sysdeps/x86/bits/platform/x86.h
@@ -30,7 +30,8 @@  enum
   CPUID_INDEX_80000008,
   CPUID_INDEX_7_ECX_1,
   CPUID_INDEX_19,
-  CPUID_INDEX_14_ECX_0
+  CPUID_INDEX_14_ECX_0,
+  CPUID_INDEX_24_ECX_0
 };
 
 struct cpuid_feature
@@ -312,6 +313,7 @@  enum
   x86_cpu_AVX_NE_CONVERT	= x86_cpu_index_7_ecx_1_edx + 5,
   x86_cpu_AMX_COMPLEX		= x86_cpu_index_7_ecx_1_edx + 8,
   x86_cpu_PREFETCHI		= x86_cpu_index_7_ecx_1_edx + 14,
+  x86_cpu_AVX10			= x86_cpu_index_7_ecx_1_edx + 19,
   x86_cpu_APX_F			= x86_cpu_index_7_ecx_1_edx + 21,
 
   x86_cpu_index_19_ebx
@@ -325,5 +327,9 @@  enum
     = (CPUID_INDEX_14_ECX_0 * 8 * 4 * sizeof (unsigned int)
        + cpuid_register_index_ebx * 8 * sizeof (unsigned int)),
 
-  x86_cpu_PTWRITE		= x86_cpu_index_14_ecx_0_ebx + 4
+  x86_cpu_PTWRITE		= x86_cpu_index_14_ecx_0_ebx + 4,
+
+  x86_cpu_index_24_ecx_0_ebx
+    = (CPUID_INDEX_24_ECX_0 * 8 * 4 * sizeof (unsigned int)
+       + cpuid_register_index_ebx * 8 * sizeof (unsigned int)),
 };
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index badf088874..8dd8392586 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -120,6 +120,14 @@  update_active (struct cpu_features *cpu_features)
     {
       unsigned int xcrlow;
       unsigned int xcrhigh;
+      enum
+	{
+	  xmm = 0,
+	  ymm,
+	  zmm
+	}
+      vector_size = xmm;
+      CPU_FEATURE_SET (cpu_features, AVX10);
       asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
       /* Is YMM and XMM state usable?  */
       if ((xcrlow & (bit_YMM_state | bit_XMM_state))
@@ -128,6 +136,7 @@  update_active (struct cpu_features *cpu_features)
 	  /* Determine if AVX is usable.  */
 	  if (CPU_FEATURES_CPU_P (cpu_features, AVX))
 	    {
+	      vector_size = ymm;
 	      CPU_FEATURE_SET (cpu_features, AVX);
 	      /* The following features depend on AVX being usable.  */
 	      /* Determine if AVX2 is usable.  */
@@ -166,6 +175,7 @@  update_active (struct cpu_features *cpu_features)
 			 | bit_ZMM16_31_state))
 	      == (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
 	    {
+	      vector_size = zmm;
 	      /* Determine if AVX512F is usable.  */
 	      if (CPU_FEATURES_CPU_P (cpu_features, AVX512F))
 		{
@@ -210,6 +220,31 @@  update_active (struct cpu_features *cpu_features)
 	    }
 	}
 
+      if (CPU_FEATURES_CPU_P (cpu_features, AVX10)
+	  && cpu_features->basic.max_cpuid >= 0x24)
+	{
+	  __cpuid_count (0x24, 0,
+			 cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.eax,
+			 cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ebx,
+			 cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ecx,
+			 cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.edx);
+	  switch (vector_size)
+	    {
+	    case zmm:
+	      break;
+	    case ymm:
+	      /* Clear the ZMM bit.  */
+	      cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ebx
+		&= ~(1 << 18);
+	      break;
+	    case xmm:
+	      /* Clear the YMM and ZMM bits.  */
+	      cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ebx
+		&= ~((1 << 17) | (1 << 18));
+	      break;
+	    }
+	}
+
       /* Are XTILECFG and XTILEDATA states usable?  */
       if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state))
 	  == (bit_XTILECFG_state | bit_XTILEDATA_state))
diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h
index eb30d342a6..c1b1811528 100644
--- a/sysdeps/x86/include/cpu-features.h
+++ b/sysdeps/x86/include/cpu-features.h
@@ -29,7 +29,7 @@ 
 
 enum
 {
-  CPUID_INDEX_MAX = CPUID_INDEX_14_ECX_0 + 1
+  CPUID_INDEX_MAX = CPUID_INDEX_24_ECX_0 + 1
 };
 
 enum
@@ -319,6 +319,7 @@  enum
 #define bit_cpu_AVX_NE_CONVERT	(1u << 5)
 #define bit_cpu_AMX_COMPLEX	(1u << 8)
 #define bit_cpu_PREFETCHI	(1u << 14)
+#define bit_cpu_AVX10		(1u << 19)
 #define bit_cpu_APX_F		(1u << 21)
 
 /* CPUID_INDEX_19.  */
@@ -563,6 +564,7 @@  enum
 #define index_cpu_AVX_NE_CONVERT CPUID_INDEX_7_ECX_1
 #define index_cpu_AMX_COMPLEX	CPUID_INDEX_7_ECX_1
 #define index_cpu_PREFETCHI	CPUID_INDEX_7_ECX_1
+#define index_cpu_AVX10		CPUID_INDEX_7_ECX_1
 #define index_cpu_APX_F		CPUID_INDEX_7_ECX_1
 
 /* CPUID_INDEX_19.  */
@@ -809,6 +811,7 @@  enum
 #define reg_AVX_NE_CONVERT	edx
 #define reg_AMX_COMPLEX		edx
 #define reg_PREFETCHI		edx
+#define reg_AVX10		edx
 #define reg_APX_F		edx
 
 /* CPUID_INDEX_19.  */
diff --git a/sysdeps/x86/sys/platform/x86.h b/sysdeps/x86/sys/platform/x86.h
index 1ea2c5fc0b..11edf4df3e 100644
--- a/sysdeps/x86/sys/platform/x86.h
+++ b/sysdeps/x86/sys/platform/x86.h
@@ -55,10 +55,38 @@  x86_cpu_active (unsigned int __index)
   return __ptr->active_array[__reg] & (1 << __bit);
 }
 
+static __inline__ unsigned int
+x86_cpu_get_avx10_info (unsigned int __index)
+{
+  const struct cpuid_feature *__ptr = __x86_get_cpuid_feature_leaf
+    (__index / (8 * sizeof (unsigned int) * 4));
+  unsigned int __reg
+     = __index & (8 * sizeof (unsigned int) * 4 - 1);
+  __reg /= 8 * sizeof (unsigned int);
+
+  return __ptr->cpuid_array[__reg];
+}
+
+static __inline__ unsigned int
+x86_cpu_get_avx10_vector_size (void)
+{
+  unsigned int ebx = x86_cpu_get_avx10_info (x86_cpu_index_24_ecx_0_ebx);
+  if ((ebx & (1 << 18)) != 0)
+    return 512;
+  if ((ebx & (1 << 17)) != 0)
+    return 256;
+  return 128;
+}
+
 /* CPU_FEATURE_PRESENT evaluates to true if CPU supports the feature.  */
 #define CPU_FEATURE_PRESENT(name) x86_cpu_present (x86_cpu_##name)
 /* CPU_FEATURE_ACTIVE evaluates to true if the feature is active.  */
 #define CPU_FEATURE_ACTIVE(name) x86_cpu_active (x86_cpu_##name)
+/* Get AVX10 version number.  */
+#define AVX10_VERSION()	\
+  (x86_cpu_get_avx10_info (x86_cpu_index_24_ecx_0_ebx) & 0xff)
+/* Get AVX10 vector size.  */
+#define AVX10_VECTOR_SIZE()	x86_cpu_get_avx10_vector_size ()
 
 __END_DECLS
 
diff --git a/sysdeps/x86/tst-get-cpu-features.c b/sysdeps/x86/tst-get-cpu-features.c
index b27fa7324a..e788f37df2 100644
--- a/sysdeps/x86/tst-get-cpu-features.c
+++ b/sysdeps/x86/tst-get-cpu-features.c
@@ -219,6 +219,7 @@  do_test (void)
   CHECK_CPU_FEATURE_PRESENT (AVX_NE_CONVERT);
   CHECK_CPU_FEATURE_PRESENT (AMX_COMPLEX);
   CHECK_CPU_FEATURE_PRESENT (PREFETCHI);
+  CHECK_CPU_FEATURE_PRESENT (AVX10);
   CHECK_CPU_FEATURE_PRESENT (APX_F);
   CHECK_CPU_FEATURE_PRESENT (AESKLE);
   CHECK_CPU_FEATURE_PRESENT (WIDE_KL);
@@ -391,11 +392,18 @@  do_test (void)
   CHECK_CPU_FEATURE_ACTIVE (AVX_NE_CONVERT);
   CHECK_CPU_FEATURE_ACTIVE (AMX_COMPLEX);
   CHECK_CPU_FEATURE_ACTIVE (PREFETCHI);
+  CHECK_CPU_FEATURE_ACTIVE (AVX10);
   CHECK_CPU_FEATURE_ACTIVE (APX_F);
   CHECK_CPU_FEATURE_ACTIVE (AESKLE);
   CHECK_CPU_FEATURE_ACTIVE (WIDE_KL);
   CHECK_CPU_FEATURE_ACTIVE (PTWRITE);
 
+  if (CPU_FEATURE_ACTIVE (AVX10))
+    {
+      printf ("AVX10 version: %d\n", AVX10_VERSION ());
+      printf ("AVX10 vector size: %d\n", AVX10_VECTOR_SIZE ());
+    }
+
   return 0;
 }