Message ID | 1396595802-21567-1-git-send-email-ling.ma.program@gmail.com |
---|---|
State | New |
Headers | show |
On Fri, Apr 4, 2014 at 12:16 AM, <ling.ma.program@gmail.com> wrote: > From: Sihai Yao <sihai.ysh@alibaba-inc.com> > > This patch sets bit_AVX2_Usable of __cpu_features.feature by checking > COMMON_CPUID_INDEX_7 for Haswell. Architecture related assembler file > can use this bit to determine calling path. > > --- > ChangeLog | 9 +++++++++ > sysdeps/x86_64/multiarch/ifunc-defines.sym | 2 ++ > sysdeps/x86_64/multiarch/init-arch.c | 3 +++ > sysdeps/x86_64/multiarch/init-arch.h | 9 +++++++++ > 4 files changed, 23 insertions(+) > > diff --git a/ChangeLog b/ChangeLog > index da8ea6d..ab23a3a 100644 > --- a/ChangeLog > +++ b/ChangeLog > @@ -1,3 +1,12 @@ > +2014-04-04 Sihai Yao <sihai.ysh@alibaba-inc.com> > + > + * sysdeps/x86_64/multiarch/ifunc-defines.sym: Add COMMON_CPU_INDEX_7 and > + FEATURE_INDEX_7. > + * sysdeps/x86_64/multiarch/init-arch.c: Add AVX2 detection from cpu > + features word of COMMON_CPUID_INDEX_7. > + * sysdeps/x86_64/multiarch/init-arch.h: Add bit_AVX2_Usable for memset.S > + to determine calling path. > + > 2014-04-03 David Svoboda <svoboda@cert.org> > > [BZ #5666] > diff --git a/sysdeps/x86_64/multiarch/ifunc-defines.sym b/sysdeps/x86_64/multiarch/ifunc-defines.sym > index eb1538a..448b8c4 100644 > --- a/sysdeps/x86_64/multiarch/ifunc-defines.sym > +++ b/sysdeps/x86_64/multiarch/ifunc-defines.sym > @@ -17,4 +17,6 @@ FEATURE_OFFSET offsetof (struct cpu_features, feature) > FEATURE_SIZE sizeof (unsigned int) > > COMMON_CPUID_INDEX_1 > +COMMON_CPUID_INDEX_7 > FEATURE_INDEX_1 > +FEATURE_INDEX_7 > diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c > index db74d97..2bbc5eb 100644 > --- a/sysdeps/x86_64/multiarch/init-arch.c > +++ b/sysdeps/x86_64/multiarch/init-arch.c > @@ -106,6 +106,7 @@ __init_cpu_features (void) > case 0x2c: > case 0x2e: > case 0x2f: > + case 0x3c: This isn't mentioned in ChangeLog. IA Optimization reference manual shows that 0x45 and 0x46 are also Haswell. This should be in a separate patch. > /* Rep string instructions, copy backward, unaligned loads > and pminub are fast on Intel Core i3, i5 and i7. */ > #if index_Fast_Rep_String != index_Fast_Copy_Backward > @@ -153,6 +154,8 @@ __init_cpu_features (void) > __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ebx, > __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ecx, > __cpu_features.cpuid[COMMON_CPUID_INDEX_7].edx); > + if (CPUID_AVX2) > + __cpu_features.feature[index_AVX2_Usable] |= bit_AVX2_Usable; ^^^^^^^^^^ This should be inside if (CPUID_OSXSAVE), similar to bit_AVX_Usable. > /* Can we call xgetbv? */ > if (CPUID_OSXSAVE) > diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h > index 793707a..e453ccc 100644 > --- a/sysdeps/x86_64/multiarch/init-arch.h > +++ b/sysdeps/x86_64/multiarch/init-arch.h > @@ -24,6 +24,7 @@ > #define bit_FMA_Usable (1 << 7) > #define bit_FMA4_Usable (1 << 8) > #define bit_Slow_SSE4_2 (1 << 9) > +#define bit_AVX2_Usable (1 << 10) > > /* CPUID Feature flags. */ > > @@ -40,6 +41,7 @@ > > /* COMMON_CPUID_INDEX_7. */ > #define bit_RTM (1 << 11) > +#define bit_AVX2 (1 << 5) > > /* XCR0 Feature flags. */ > #define bit_XMM_state (1 << 1) > @@ -54,6 +56,7 @@ > # define index_SSE4_1 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET > # define index_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET > # define index_AVX COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET > +# define index_AVX2 COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET > > # define index_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE > # define index_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE > @@ -64,6 +67,7 @@ > # define index_FMA_Usable FEATURE_INDEX_1*FEATURE_SIZE > # define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE > # define index_Slow_SSE4_2 FEATURE_INDEX_1*FEATURE_SIZE > +# define index_AVX2_Usable FEATURE_INDEX_7*FEATURE_SIZE > > #else /* __ASSEMBLER__ */ > > @@ -81,6 +85,7 @@ enum > enum > { > FEATURE_INDEX_1 = 0, > + FEATURE_INDEX_7, > /* Keep the following line at the end. */ > FEATURE_INDEX_MAX > }; > @@ -145,6 +150,8 @@ extern const struct cpu_features *__get_cpu_features (void) > HAS_CPUID_FLAG (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4) > # define CPUID_RTM \ > HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_RTM) > +# define CPUID_AVX2 \ > + HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX2) > > /* HAS_* evaluates to true if we may use the feature at runtime. */ > # define HAS_SSE2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, bit_SSE2) > @@ -153,6 +160,7 @@ extern const struct cpu_features *__get_cpu_features (void) > # define HAS_SSE4_1 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1) > # define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2) > # define HAS_RTM HAS_CPU_FEATURE (COMMON_CPUID_INDEX_7, ebx, bit_RTM) > +# define HAS_AVX2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_7, ebx, bit_AVX2) > > # define index_Fast_Rep_String FEATURE_INDEX_1 > # define index_Fast_Copy_Backward FEATURE_INDEX_1 > @@ -163,6 +171,7 @@ extern const struct cpu_features *__get_cpu_features (void) > # define index_FMA_Usable FEATURE_INDEX_1 > # define index_FMA4_Usable FEATURE_INDEX_1 > # define index_Slow_SSE4_2 FEATURE_INDEX_1 > +# define index_AVX2_Usable FEATURE_INDEX_7 > > # define HAS_ARCH_FEATURE(name) \ > ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0) > -- > 1.8.1.4 >
diff --git a/ChangeLog b/ChangeLog index da8ea6d..ab23a3a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +2014-04-04 Sihai Yao <sihai.ysh@alibaba-inc.com> + + * sysdeps/x86_64/multiarch/ifunc-defines.sym: Add COMMON_CPU_INDEX_7 and + FEATURE_INDEX_7. + * sysdeps/x86_64/multiarch/init-arch.c: Add AVX2 detection from cpu + features word of COMMON_CPUID_INDEX_7. + * sysdeps/x86_64/multiarch/init-arch.h: Add bit_AVX2_Usable for memset.S + to determine calling path. + 2014-04-03 David Svoboda <svoboda@cert.org> [BZ #5666] diff --git a/sysdeps/x86_64/multiarch/ifunc-defines.sym b/sysdeps/x86_64/multiarch/ifunc-defines.sym index eb1538a..448b8c4 100644 --- a/sysdeps/x86_64/multiarch/ifunc-defines.sym +++ b/sysdeps/x86_64/multiarch/ifunc-defines.sym @@ -17,4 +17,6 @@ FEATURE_OFFSET offsetof (struct cpu_features, feature) FEATURE_SIZE sizeof (unsigned int) COMMON_CPUID_INDEX_1 +COMMON_CPUID_INDEX_7 FEATURE_INDEX_1 +FEATURE_INDEX_7 diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c index db74d97..2bbc5eb 100644 --- a/sysdeps/x86_64/multiarch/init-arch.c +++ b/sysdeps/x86_64/multiarch/init-arch.c @@ -106,6 +106,7 @@ __init_cpu_features (void) case 0x2c: case 0x2e: case 0x2f: + case 0x3c: /* Rep string instructions, copy backward, unaligned loads and pminub are fast on Intel Core i3, i5 and i7. */ #if index_Fast_Rep_String != index_Fast_Copy_Backward @@ -153,6 +154,8 @@ __init_cpu_features (void) __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ebx, __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ecx, __cpu_features.cpuid[COMMON_CPUID_INDEX_7].edx); + if (CPUID_AVX2) + __cpu_features.feature[index_AVX2_Usable] |= bit_AVX2_Usable; /* Can we call xgetbv? */ if (CPUID_OSXSAVE) diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h index 793707a..e453ccc 100644 --- a/sysdeps/x86_64/multiarch/init-arch.h +++ b/sysdeps/x86_64/multiarch/init-arch.h @@ -24,6 +24,7 @@ #define bit_FMA_Usable (1 << 7) #define bit_FMA4_Usable (1 << 8) #define bit_Slow_SSE4_2 (1 << 9) +#define bit_AVX2_Usable (1 << 10) /* CPUID Feature flags. */ @@ -40,6 +41,7 @@ /* COMMON_CPUID_INDEX_7. */ #define bit_RTM (1 << 11) +#define bit_AVX2 (1 << 5) /* XCR0 Feature flags. */ #define bit_XMM_state (1 << 1) @@ -54,6 +56,7 @@ # define index_SSE4_1 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET # define index_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET # define index_AVX COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET +# define index_AVX2 COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET # define index_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE # define index_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE @@ -64,6 +67,7 @@ # define index_FMA_Usable FEATURE_INDEX_1*FEATURE_SIZE # define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE # define index_Slow_SSE4_2 FEATURE_INDEX_1*FEATURE_SIZE +# define index_AVX2_Usable FEATURE_INDEX_7*FEATURE_SIZE #else /* __ASSEMBLER__ */ @@ -81,6 +85,7 @@ enum enum { FEATURE_INDEX_1 = 0, + FEATURE_INDEX_7, /* Keep the following line at the end. */ FEATURE_INDEX_MAX }; @@ -145,6 +150,8 @@ extern const struct cpu_features *__get_cpu_features (void) HAS_CPUID_FLAG (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4) # define CPUID_RTM \ HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_RTM) +# define CPUID_AVX2 \ + HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX2) /* HAS_* evaluates to true if we may use the feature at runtime. */ # define HAS_SSE2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, bit_SSE2) @@ -153,6 +160,7 @@ extern const struct cpu_features *__get_cpu_features (void) # define HAS_SSE4_1 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1) # define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2) # define HAS_RTM HAS_CPU_FEATURE (COMMON_CPUID_INDEX_7, ebx, bit_RTM) +# define HAS_AVX2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_7, ebx, bit_AVX2) # define index_Fast_Rep_String FEATURE_INDEX_1 # define index_Fast_Copy_Backward FEATURE_INDEX_1 @@ -163,6 +171,7 @@ extern const struct cpu_features *__get_cpu_features (void) # define index_FMA_Usable FEATURE_INDEX_1 # define index_FMA4_Usable FEATURE_INDEX_1 # define index_Slow_SSE4_2 FEATURE_INDEX_1 +# define index_AVX2_Usable FEATURE_INDEX_7 # define HAS_ARCH_FEATURE(name) \ ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
From: Sihai Yao <sihai.ysh@alibaba-inc.com> This patch sets bit_AVX2_Usable of __cpu_features.feature by checking COMMON_CPUID_INDEX_7 for Haswell. Architecture related assembler file can use this bit to determine calling path. --- ChangeLog | 9 +++++++++ sysdeps/x86_64/multiarch/ifunc-defines.sym | 2 ++ sysdeps/x86_64/multiarch/init-arch.c | 3 +++ sysdeps/x86_64/multiarch/init-arch.h | 9 +++++++++ 4 files changed, 23 insertions(+)