Message ID | 20230512051024.1905173-2-goldstein.w.n@gmail.com |
---|---|
State | New |
Headers | show |
Series | [v8,1/3] x86: Increase `non_temporal_threshold` to roughly `sizeof_L3 / 4` | expand |
On Thu, May 11, 2023 at 10:11 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > This patch should have no affect on existing functionality. > > The current code, which has a single switch for model detection and > setting prefered features, is difficult to follow/extend. The cases > use magic numbers and many microarchitectures are missing. This makes > it difficult to reason about what is implemented so far and/or > how/where to add support for new features. > > This patch splits the model detection and preference setting stages so > that CPU preferences can be set based on a complete list of available > microarchitectures, rather than based on model magic numbers. > --- > sysdeps/x86/cpu-features.c | 400 +++++++++++++++++++++++++++++-------- > 1 file changed, 316 insertions(+), 84 deletions(-) > > diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c > index 5bff8ec0b4..264d309dd7 100644 > --- a/sysdeps/x86/cpu-features.c > +++ b/sysdeps/x86/cpu-features.c > @@ -417,6 +417,218 @@ _Static_assert (((index_arch_Fast_Unaligned_Load > == index_arch_Fast_Copy_Backward)), > "Incorrect index_arch_Fast_Unaligned_Load"); > > + > +/* Intel Family-6 microarch list. */ > +enum > +{ > + /* Atom processors. */ > + INTEL_ATOM_BONNELL, > + INTEL_ATOM_SILVERMONT, > + INTEL_ATOM_AIRMONT, > + INTEL_ATOM_GOLDMONT, > + INTEL_ATOM_GOLDMONT_PLUS, > + INTEL_ATOM_SIERRAFOREST, > + INTEL_ATOM_GRANDRIDGE, > + INTEL_ATOM_TREMONT, > + > + /* Bigcore processors. */ > + INTEL_BIGCORE_MEROM, > + INTEL_BIGCORE_PENRYN, > + INTEL_BIGCORE_DUNNINGTON, > + INTEL_BIGCORE_NEHALEM, > + INTEL_BIGCORE_WESTMERE, > + INTEL_BIGCORE_SANDYBRIDGE, > + INTEL_BIGCORE_IVYBRIDGE, > + INTEL_BIGCORE_HASWELL, > + INTEL_BIGCORE_BROADWELL, > + INTEL_BIGCORE_SKYLAKE, > + INTEL_BIGCORE_AMBERLAKE, > + INTEL_BIGCORE_COFFEELAKE, > + INTEL_BIGCORE_WHISKEYLAKE, > + INTEL_BIGCORE_KABYLAKE, > + INTEL_BIGCORE_COMETLAKE, > + INTEL_BIGCORE_SKYLAKE_AVX512, > + INTEL_BIGCORE_CANNONLAKE, > + INTEL_BIGCORE_ICELAKE, > + INTEL_BIGCORE_TIGERLAKE, > + INTEL_BIGCORE_ROCKETLAKE, > + INTEL_BIGCORE_SAPPHIRERAPIDS, > + INTEL_BIGCORE_RAPTORLAKE, > + INTEL_BIGCORE_EMERALDRAPIDS, > + INTEL_BIGCORE_METEORLAKE, > + INTEL_BIGCORE_LUNARLAKE, > + INTEL_BIGCORE_ARROWLAKE, > + INTEL_BIGCORE_GRANITERAPIDS, > + > + /* Mixed (bigcore + atom SOC). */ > + INTEL_MIXED_LAKEFIELD, > + INTEL_MIXED_ALDERLAKE, > + > + /* KNL. */ > + INTEL_KNIGHTS_MILL, > + INTEL_KNIGHTS_LANDING, > + > + /* Unknown. */ > + INTEL_UNKNOWN, > +}; > + > +static unsigned int > +intel_get_fam6_microarch (unsigned int model, unsigned int stepping) > +{ > + switch (model) > + { > + case 0x1C: > + case 0x26: > + return INTEL_ATOM_BONNELL; > + case 0x27: > + case 0x35: > + case 0x36: > + /* Really Saltwell, but Saltwell is just a die shrink of Bonnell > + (microarchitecturally identical). */ > + return INTEL_ATOM_BONNELL; > + case 0x37: > + case 0x4A: > + case 0x4D: > + case 0x5D: > + return INTEL_ATOM_SILVERMONT; > + case 0x4C: > + case 0x5A: > + case 0x75: > + return INTEL_ATOM_AIRMONT; > + case 0x5C: > + case 0x5F: > + return INTEL_ATOM_GOLDMONT; > + case 0x7A: > + return INTEL_ATOM_GOLDMONT_PLUS; > + case 0xAF: > + return INTEL_ATOM_SIERRAFOREST; > + case 0xB6: > + return INTEL_ATOM_GRANDRIDGE; > + case 0x86: > + case 0x96: > + case 0x9C: > + return INTEL_ATOM_TREMONT; > + case 0x0F: > + case 0x16: > + return INTEL_BIGCORE_MEROM; > + case 0x17: > + return INTEL_BIGCORE_PENRYN; > + case 0x1D: > + return INTEL_BIGCORE_DUNNINGTON; > + case 0x1A: > + case 0x1E: > + case 0x1F: > + case 0x2E: > + return INTEL_BIGCORE_NEHALEM; > + case 0x25: > + case 0x2C: > + case 0x2F: > + return INTEL_BIGCORE_WESTMERE; > + case 0x2A: > + case 0x2D: > + return INTEL_BIGCORE_SANDYBRIDGE; > + case 0x3A: > + case 0x3E: > + return INTEL_BIGCORE_IVYBRIDGE; > + case 0x3C: > + case 0x3F: > + case 0x45: > + case 0x46: > + return INTEL_BIGCORE_HASWELL; > + case 0x3D: > + case 0x47: > + case 0x4F: > + case 0x56: > + return INTEL_BIGCORE_BROADWELL; > + case 0x4E: > + case 0x5E: > + return INTEL_BIGCORE_SKYLAKE; > + case 0x8E: > + switch (stepping) > + { > + case 0x09: > + return INTEL_BIGCORE_AMBERLAKE; > + case 0x0A: > + return INTEL_BIGCORE_COFFEELAKE; > + case 0x0B: > + case 0x0C: > + return INTEL_BIGCORE_WHISKEYLAKE; > + default: > + return INTEL_BIGCORE_KABYLAKE; > + } > + case 0x9E: > + switch (stepping) > + { > + case 0x0A: > + case 0x0B: > + case 0x0C: > + case 0x0D: > + return INTEL_BIGCORE_COFFEELAKE; > + default: > + return INTEL_BIGCORE_KABYLAKE; > + } All these stepping checks can be put in comments. > + case 0xA5: > + case 0xA6: > + return INTEL_BIGCORE_COMETLAKE; > + case 0x66: > + return INTEL_BIGCORE_CANNONLAKE; > + case 0x55: > + /* > + Stepping = {6, 7} > + -> Cascadelake > + Stepping = {11} > + -> Cooperlake > + else > + -> Skylake-avx512 > + > + These are all microarchitecturally indentical, so use > + Skylake-avx512 for all of them. > + */ > + return INTEL_BIGCORE_SKYLAKE_AVX512; > + case 0x6A: > + case 0x6C: > + case 0x7D: > + case 0x7E: > + case 0x9D: > + return INTEL_BIGCORE_ICELAKE; > + case 0x8C: > + case 0x8D: > + return INTEL_BIGCORE_TIGERLAKE; > + case 0xA7: > + return INTEL_BIGCORE_ROCKETLAKE; > + case 0x8F: > + return INTEL_BIGCORE_SAPPHIRERAPIDS; > + case 0xB7: > + case 0xBA: > + case 0xBF: > + return INTEL_BIGCORE_RAPTORLAKE; > + case 0xCF: > + return INTEL_BIGCORE_EMERALDRAPIDS; > + case 0xAA: > + case 0xAC: > + return INTEL_BIGCORE_METEORLAKE; > + case 0xbd: > + return INTEL_BIGCORE_LUNARLAKE; > + case 0xc6: > + return INTEL_BIGCORE_ARROWLAKE; > + case 0xAD: > + case 0xAE: > + return INTEL_BIGCORE_GRANITERAPIDS; > + case 0x8A: > + return INTEL_MIXED_LAKEFIELD; > + case 0x97: > + case 0x9A: > + case 0xBE: > + return INTEL_MIXED_ALDERLAKE; > + case 0x85: > + return INTEL_KNIGHTS_MILL; > + case 0x57: > + return INTEL_KNIGHTS_LANDING; > + default: > + return INTEL_UNKNOWN; > + } > +} > + > static inline void > init_cpu_features (struct cpu_features *cpu_features) > { > @@ -453,129 +665,149 @@ init_cpu_features (struct cpu_features *cpu_features) > if (family == 0x06) > { > model += extended_model; > - switch (model) > + unsigned int microarch > + = intel_get_fam6_microarch (model, stepping); > + > + switch (microarch) > { > - case 0x1c: > - case 0x26: > - /* BSF is slow on Atom. */ > + /* Atom / KNL tuning. */ > + case INTEL_ATOM_BONNELL: > + /* BSF is slow on Bonnell. */ > cpu_features->preferred[index_arch_Slow_BSF] > - |= bit_arch_Slow_BSF; > + |= bit_arch_Slow_BSF; > break; > > - case 0x57: > - /* Knights Landing. Enable Silvermont optimizations. */ > - > - case 0x7a: > - /* Unaligned load versions are faster than SSSE3 > - on Goldmont Plus. */ > - > - case 0x5c: > - case 0x5f: > /* Unaligned load versions are faster than SSSE3 > - on Goldmont. */ > + on Airmont, Silvermont, Goldmont, and Goldmont Plus. */ > + case INTEL_ATOM_AIRMONT: > + case INTEL_ATOM_SILVERMONT: > + case INTEL_ATOM_GOLDMONT: > + case INTEL_ATOM_GOLDMONT_PLUS: > > - case 0x4c: > - case 0x5a: > - case 0x75: > - /* Airmont is a die shrink of Silvermont. */ > + /* Knights Landing. Enable Silvermont optimizations. */ > + case INTEL_KNIGHTS_LANDING: > > - case 0x37: > - case 0x4a: > - case 0x4d: > - case 0x5d: > - /* Unaligned load versions are faster than SSSE3 > - on Silvermont. */ > cpu_features->preferred[index_arch_Fast_Unaligned_Load] > - |= (bit_arch_Fast_Unaligned_Load > - | bit_arch_Fast_Unaligned_Copy > - | bit_arch_Prefer_PMINUB_for_stringop > - | bit_arch_Slow_SSE4_2); > + |= (bit_arch_Fast_Unaligned_Load > + | bit_arch_Fast_Unaligned_Copy > + | bit_arch_Prefer_PMINUB_for_stringop > + | bit_arch_Slow_SSE4_2); > break; > > - case 0x86: > - case 0x96: > - case 0x9c: > + case INTEL_ATOM_TREMONT: > /* Enable rep string instructions, unaligned load, unaligned > - copy, pminub and avoid SSE 4.2 on Tremont. */ > + copy, pminub and avoid SSE 4.2 on Tremont. */ > cpu_features->preferred[index_arch_Fast_Rep_String] > - |= (bit_arch_Fast_Rep_String > - | bit_arch_Fast_Unaligned_Load > - | bit_arch_Fast_Unaligned_Copy > - | bit_arch_Prefer_PMINUB_for_stringop > - | bit_arch_Slow_SSE4_2); > + |= (bit_arch_Fast_Rep_String | bit_arch_Fast_Unaligned_Load > + | bit_arch_Fast_Unaligned_Copy > + | bit_arch_Prefer_PMINUB_for_stringop > + | bit_arch_Slow_SSE4_2); > break; > > + /* > + Default tuned KNL microarch. KNM > + case INTEL_KNIGHTS_MILL: > + */ > + > + /* > + Default tuned atom microarch. > + case INTEL_ATOM_SIERRAFOREST: > + case INTEL_ATOM_GRANDRIDGE: > + */ > + > + /* Bigcore/Default Tuning. */ > default: > /* Unknown family 0x06 processors. Assuming this is one > of Core i3/i5/i7 processors if AVX is available. */ > if (!CPU_FEATURES_CPU_P (cpu_features, AVX)) > break; > - /* Fall through. */ > - > - case 0x1a: > - case 0x1e: > - case 0x1f: > - case 0x25: > - case 0x2c: > - case 0x2e: > - case 0x2f: > + case INTEL_BIGCORE_NEHALEM: > + case INTEL_BIGCORE_WESTMERE: > > /* Rep string instructions, unaligned load, unaligned copy, > and pminub are fast on Intel Core i3, i5 and i7. */ > cpu_features->preferred[index_arch_Fast_Rep_String] > - |= (bit_arch_Fast_Rep_String > - | bit_arch_Fast_Unaligned_Load > - | bit_arch_Fast_Unaligned_Copy > - | bit_arch_Prefer_PMINUB_for_stringop); > + |= (bit_arch_Fast_Rep_String | bit_arch_Fast_Unaligned_Load > + | bit_arch_Fast_Unaligned_Copy > + | bit_arch_Prefer_PMINUB_for_stringop); > break; > + > + /* > + Default tuned Bigcore microarch. > + case INTEL_BIGCORE_SANDYBRIDGE: > + case INTEL_BIGCORE_IVYBRIDGE: > + case INTEL_BIGCORE_HASWELL: > + case INTEL_BIGCORE_BROADWELL: > + case INTEL_BIGCORE_SKYLAKE: > + case INTEL_BIGCORE_AMBERLAKE: > + case INTEL_BIGCORE_COFFEELAKE: > + case INTEL_BIGCORE_WHISKEYLAKE: > + case INTEL_BIGCORE_KABYLAKE: > + case INTEL_BIGCORE_COMETLAKE: > + case INTEL_BIGCORE_SKYLAKE_AVX512: > + case INTEL_BIGCORE_CANNONLAKE: > + case INTEL_BIGCORE_ICELAKE: > + case INTEL_BIGCORE_TIGERLAKE: > + case INTEL_BIGCORE_ROCKETLAKE: > + case INTEL_BIGCORE_RAPTORLAKE: > + case INTEL_BIGCORE_METEORLAKE: > + case INTEL_BIGCORE_LUNARLAKE: > + case INTEL_BIGCORE_ARROWLAKE: > + case INTEL_BIGCORE_SAPPHIRERAPIDS: > + case INTEL_BIGCORE_EMERALDRAPIDS: > + case INTEL_BIGCORE_GRANITERAPIDS: > + */ > + > + /* > + Default tuned Mixed (bigcore + atom SOC). > + case INTEL_MIXED_LAKEFIELD: > + case INTEL_MIXED_ALDERLAKE: > + */ > } > > - /* Disable TSX on some processors to avoid TSX on kernels that > - weren't updated with the latest microcode package (which > - disables broken feature by default). */ > - switch (model) > + /* Disable TSX on some processors to avoid TSX on kernels that > + weren't updated with the latest microcode package (which > + disables broken feature by default). */ > + switch (microarch) > { > - case 0x55: > + case INTEL_BIGCORE_SKYLAKE_AVX512: > + /* 0x55 (Skylake-avx512) && stepping <= 5 disable TSX. */ > if (stepping <= 5) > goto disable_tsx; > break; > - case 0x8e: > - /* NB: Although the errata documents that for model == 0x8e, > - only 0xb stepping or lower are impacted, the intention of > - the errata was to disable TSX on all client processors on > - all steppings. Include 0xc stepping which is an Intel > - Core i7-8665U, a client mobile processor. */ > - case 0x9e: > - if (stepping > 0xc) > + > + case INTEL_BIGCORE_SKYLAKE: > + case INTEL_BIGCORE_AMBERLAKE: > + case INTEL_BIGCORE_COFFEELAKE: > + case INTEL_BIGCORE_WHISKEYLAKE: > + case INTEL_BIGCORE_KABYLAKE: > + /* NB: Although the errata documents that for model == 0x8e > + (skylake client), only 0xb stepping or lower are impacted, > + the intention of the errata was to disable TSX on all client > + processors on all steppings. Include 0xc stepping which is > + an Intel Core i7-8665U, a client mobile processor. */ > + if ((model == 0x8e || model == 0x9e) && stepping > 0xc) > break; > - /* Fall through. */ > - case 0x4e: > - case 0x5e: > - { > + > /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for > processors listed in: > > https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html > */ > -disable_tsx: > + disable_tsx: > CPU_FEATURE_UNSET (cpu_features, HLE); > CPU_FEATURE_UNSET (cpu_features, RTM); > CPU_FEATURE_SET (cpu_features, RTM_ALWAYS_ABORT); > - } > - break; > - case 0x3f: > - /* Xeon E7 v3 with stepping >= 4 has working TSX. */ > - if (stepping >= 4) > break; > - /* Fall through. */ > - case 0x3c: > - case 0x45: > - case 0x46: > - /* Disable Intel TSX on Haswell processors (except Xeon E7 v3 > - with stepping >= 4) to avoid TSX on kernels that weren't > - updated with the latest microcode package (which disables > - broken feature by default). */ > - CPU_FEATURE_UNSET (cpu_features, RTM); > - break; > + > + case INTEL_BIGCORE_HASWELL: > + /* Xeon E7 v3 (model == 0x3f) with stepping >= 4 has working > + TSX. Haswell also include other model numbers that have > + working TSX. */ > + if (model == 0x3f && stepping >= 4) > + break; > + > + CPU_FEATURE_UNSET (cpu_features, RTM); > + break; > } > } > > -- > 2.34.1 >
On Fri, May 12, 2023 at 5:18 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > On Thu, May 11, 2023 at 10:11 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > This patch should have no affect on existing functionality. > > > > The current code, which has a single switch for model detection and > > setting prefered features, is difficult to follow/extend. The cases > > use magic numbers and many microarchitectures are missing. This makes > > it difficult to reason about what is implemented so far and/or > > how/where to add support for new features. > > > > This patch splits the model detection and preference setting stages so > > that CPU preferences can be set based on a complete list of available > > microarchitectures, rather than based on model magic numbers. > > --- > > sysdeps/x86/cpu-features.c | 400 +++++++++++++++++++++++++++++-------- > > 1 file changed, 316 insertions(+), 84 deletions(-) > > > > diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c > > index 5bff8ec0b4..264d309dd7 100644 > > --- a/sysdeps/x86/cpu-features.c > > +++ b/sysdeps/x86/cpu-features.c > > @@ -417,6 +417,218 @@ _Static_assert (((index_arch_Fast_Unaligned_Load > > == index_arch_Fast_Copy_Backward)), > > "Incorrect index_arch_Fast_Unaligned_Load"); > > > > + > > +/* Intel Family-6 microarch list. */ > > +enum > > +{ > > + /* Atom processors. */ > > + INTEL_ATOM_BONNELL, > > + INTEL_ATOM_SILVERMONT, > > + INTEL_ATOM_AIRMONT, > > + INTEL_ATOM_GOLDMONT, > > + INTEL_ATOM_GOLDMONT_PLUS, > > + INTEL_ATOM_SIERRAFOREST, > > + INTEL_ATOM_GRANDRIDGE, > > + INTEL_ATOM_TREMONT, > > + > > + /* Bigcore processors. */ > > + INTEL_BIGCORE_MEROM, > > + INTEL_BIGCORE_PENRYN, > > + INTEL_BIGCORE_DUNNINGTON, > > + INTEL_BIGCORE_NEHALEM, > > + INTEL_BIGCORE_WESTMERE, > > + INTEL_BIGCORE_SANDYBRIDGE, > > + INTEL_BIGCORE_IVYBRIDGE, > > + INTEL_BIGCORE_HASWELL, > > + INTEL_BIGCORE_BROADWELL, > > + INTEL_BIGCORE_SKYLAKE, > > + INTEL_BIGCORE_AMBERLAKE, > > + INTEL_BIGCORE_COFFEELAKE, > > + INTEL_BIGCORE_WHISKEYLAKE, > > + INTEL_BIGCORE_KABYLAKE, > > + INTEL_BIGCORE_COMETLAKE, > > + INTEL_BIGCORE_SKYLAKE_AVX512, > > + INTEL_BIGCORE_CANNONLAKE, > > + INTEL_BIGCORE_ICELAKE, > > + INTEL_BIGCORE_TIGERLAKE, > > + INTEL_BIGCORE_ROCKETLAKE, > > + INTEL_BIGCORE_SAPPHIRERAPIDS, > > + INTEL_BIGCORE_RAPTORLAKE, > > + INTEL_BIGCORE_EMERALDRAPIDS, > > + INTEL_BIGCORE_METEORLAKE, > > + INTEL_BIGCORE_LUNARLAKE, > > + INTEL_BIGCORE_ARROWLAKE, > > + INTEL_BIGCORE_GRANITERAPIDS, > > + > > + /* Mixed (bigcore + atom SOC). */ > > + INTEL_MIXED_LAKEFIELD, > > + INTEL_MIXED_ALDERLAKE, > > + > > + /* KNL. */ > > + INTEL_KNIGHTS_MILL, > > + INTEL_KNIGHTS_LANDING, > > + > > + /* Unknown. */ > > + INTEL_UNKNOWN, > > +}; > > + > > +static unsigned int > > +intel_get_fam6_microarch (unsigned int model, unsigned int stepping) > > +{ > > + switch (model) > > + { > > + case 0x1C: > > + case 0x26: > > + return INTEL_ATOM_BONNELL; > > + case 0x27: > > + case 0x35: > > + case 0x36: > > + /* Really Saltwell, but Saltwell is just a die shrink of Bonnell > > + (microarchitecturally identical). */ > > + return INTEL_ATOM_BONNELL; > > + case 0x37: > > + case 0x4A: > > + case 0x4D: > > + case 0x5D: > > + return INTEL_ATOM_SILVERMONT; > > + case 0x4C: > > + case 0x5A: > > + case 0x75: > > + return INTEL_ATOM_AIRMONT; > > + case 0x5C: > > + case 0x5F: > > + return INTEL_ATOM_GOLDMONT; > > + case 0x7A: > > + return INTEL_ATOM_GOLDMONT_PLUS; > > + case 0xAF: > > + return INTEL_ATOM_SIERRAFOREST; > > + case 0xB6: > > + return INTEL_ATOM_GRANDRIDGE; > > + case 0x86: > > + case 0x96: > > + case 0x9C: > > + return INTEL_ATOM_TREMONT; > > + case 0x0F: > > + case 0x16: > > + return INTEL_BIGCORE_MEROM; > > + case 0x17: > > + return INTEL_BIGCORE_PENRYN; > > + case 0x1D: > > + return INTEL_BIGCORE_DUNNINGTON; > > + case 0x1A: > > + case 0x1E: > > + case 0x1F: > > + case 0x2E: > > + return INTEL_BIGCORE_NEHALEM; > > + case 0x25: > > + case 0x2C: > > + case 0x2F: > > + return INTEL_BIGCORE_WESTMERE; > > + case 0x2A: > > + case 0x2D: > > + return INTEL_BIGCORE_SANDYBRIDGE; > > + case 0x3A: > > + case 0x3E: > > + return INTEL_BIGCORE_IVYBRIDGE; > > + case 0x3C: > > + case 0x3F: > > + case 0x45: > > + case 0x46: > > + return INTEL_BIGCORE_HASWELL; > > + case 0x3D: > > + case 0x47: > > + case 0x4F: > > + case 0x56: > > + return INTEL_BIGCORE_BROADWELL; > > + case 0x4E: > > + case 0x5E: > > + return INTEL_BIGCORE_SKYLAKE; > > + case 0x8E: > > + switch (stepping) > > + { > > + case 0x09: > > + return INTEL_BIGCORE_AMBERLAKE; > > + case 0x0A: > > + return INTEL_BIGCORE_COFFEELAKE; > > + case 0x0B: > > + case 0x0C: > > + return INTEL_BIGCORE_WHISKEYLAKE; > > + default: > > + return INTEL_BIGCORE_KABYLAKE; > > + } > > + case 0x9E: > > + switch (stepping) > > + { > > + case 0x0A: > > + case 0x0B: > > + case 0x0C: > > + case 0x0D: > > + return INTEL_BIGCORE_COFFEELAKE; > > + default: > > + return INTEL_BIGCORE_KABYLAKE; > > + } > > All these stepping checks can be put in comments. Okay. Should we drop commetlake / cannonlake / kabylake identifiers as well and only use skylake/skylake-avx512? > > > + case 0xA5: > > + case 0xA6: > > + return INTEL_BIGCORE_COMETLAKE; > > + case 0x66: > > + return INTEL_BIGCORE_CANNONLAKE; > > + case 0x55: > > + /* > > + Stepping = {6, 7} > > + -> Cascadelake > > + Stepping = {11} > > + -> Cooperlake > > + else > > + -> Skylake-avx512 > > + > > + These are all microarchitecturally indentical, so use > > + Skylake-avx512 for all of them. > > + */ > > + return INTEL_BIGCORE_SKYLAKE_AVX512; > > + case 0x6A: > > + case 0x6C: > > + case 0x7D: > > + case 0x7E: > > + case 0x9D: > > + return INTEL_BIGCORE_ICELAKE; > > + case 0x8C: > > + case 0x8D: > > + return INTEL_BIGCORE_TIGERLAKE; > > + case 0xA7: > > + return INTEL_BIGCORE_ROCKETLAKE; > > + case 0x8F: > > + return INTEL_BIGCORE_SAPPHIRERAPIDS; > > + case 0xB7: > > + case 0xBA: > > + case 0xBF: > > + return INTEL_BIGCORE_RAPTORLAKE; > > + case 0xCF: > > + return INTEL_BIGCORE_EMERALDRAPIDS; > > + case 0xAA: > > + case 0xAC: > > + return INTEL_BIGCORE_METEORLAKE; > > + case 0xbd: > > + return INTEL_BIGCORE_LUNARLAKE; > > + case 0xc6: > > + return INTEL_BIGCORE_ARROWLAKE; > > + case 0xAD: > > + case 0xAE: > > + return INTEL_BIGCORE_GRANITERAPIDS; > > + case 0x8A: > > + return INTEL_MIXED_LAKEFIELD; > > + case 0x97: > > + case 0x9A: > > + case 0xBE: > > + return INTEL_MIXED_ALDERLAKE; > > + case 0x85: > > + return INTEL_KNIGHTS_MILL; > > + case 0x57: > > + return INTEL_KNIGHTS_LANDING; > > + default: > > + return INTEL_UNKNOWN; > > + } > > +} > > + > > static inline void > > init_cpu_features (struct cpu_features *cpu_features) > > { > > @@ -453,129 +665,149 @@ init_cpu_features (struct cpu_features *cpu_features) > > if (family == 0x06) > > { > > model += extended_model; > > - switch (model) > > + unsigned int microarch > > + = intel_get_fam6_microarch (model, stepping); > > + > > + switch (microarch) > > { > > - case 0x1c: > > - case 0x26: > > - /* BSF is slow on Atom. */ > > + /* Atom / KNL tuning. */ > > + case INTEL_ATOM_BONNELL: > > + /* BSF is slow on Bonnell. */ > > cpu_features->preferred[index_arch_Slow_BSF] > > - |= bit_arch_Slow_BSF; > > + |= bit_arch_Slow_BSF; > > break; > > > > - case 0x57: > > - /* Knights Landing. Enable Silvermont optimizations. */ > > - > > - case 0x7a: > > - /* Unaligned load versions are faster than SSSE3 > > - on Goldmont Plus. */ > > - > > - case 0x5c: > > - case 0x5f: > > /* Unaligned load versions are faster than SSSE3 > > - on Goldmont. */ > > + on Airmont, Silvermont, Goldmont, and Goldmont Plus. */ > > + case INTEL_ATOM_AIRMONT: > > + case INTEL_ATOM_SILVERMONT: > > + case INTEL_ATOM_GOLDMONT: > > + case INTEL_ATOM_GOLDMONT_PLUS: > > > > - case 0x4c: > > - case 0x5a: > > - case 0x75: > > - /* Airmont is a die shrink of Silvermont. */ > > + /* Knights Landing. Enable Silvermont optimizations. */ > > + case INTEL_KNIGHTS_LANDING: > > > > - case 0x37: > > - case 0x4a: > > - case 0x4d: > > - case 0x5d: > > - /* Unaligned load versions are faster than SSSE3 > > - on Silvermont. */ > > cpu_features->preferred[index_arch_Fast_Unaligned_Load] > > - |= (bit_arch_Fast_Unaligned_Load > > - | bit_arch_Fast_Unaligned_Copy > > - | bit_arch_Prefer_PMINUB_for_stringop > > - | bit_arch_Slow_SSE4_2); > > + |= (bit_arch_Fast_Unaligned_Load > > + | bit_arch_Fast_Unaligned_Copy > > + | bit_arch_Prefer_PMINUB_for_stringop > > + | bit_arch_Slow_SSE4_2); > > break; > > > > - case 0x86: > > - case 0x96: > > - case 0x9c: > > + case INTEL_ATOM_TREMONT: > > /* Enable rep string instructions, unaligned load, unaligned > > - copy, pminub and avoid SSE 4.2 on Tremont. */ > > + copy, pminub and avoid SSE 4.2 on Tremont. */ > > cpu_features->preferred[index_arch_Fast_Rep_String] > > - |= (bit_arch_Fast_Rep_String > > - | bit_arch_Fast_Unaligned_Load > > - | bit_arch_Fast_Unaligned_Copy > > - | bit_arch_Prefer_PMINUB_for_stringop > > - | bit_arch_Slow_SSE4_2); > > + |= (bit_arch_Fast_Rep_String | bit_arch_Fast_Unaligned_Load > > + | bit_arch_Fast_Unaligned_Copy > > + | bit_arch_Prefer_PMINUB_for_stringop > > + | bit_arch_Slow_SSE4_2); > > break; > > > > + /* > > + Default tuned KNL microarch. > KNM Err, trying to refer to the whole "Knights_..." lineup. Changed to "Knights". > > + case INTEL_KNIGHTS_MILL: > > + */ > > + > > + /* > > + Default tuned atom microarch. > > + case INTEL_ATOM_SIERRAFOREST: > > + case INTEL_ATOM_GRANDRIDGE: > > + */ > > + > > + /* Bigcore/Default Tuning. */ > > default: > > /* Unknown family 0x06 processors. Assuming this is one > > of Core i3/i5/i7 processors if AVX is available. */ > > if (!CPU_FEATURES_CPU_P (cpu_features, AVX)) > > break; > > - /* Fall through. */ > > - > > - case 0x1a: > > - case 0x1e: > > - case 0x1f: > > - case 0x25: > > - case 0x2c: > > - case 0x2e: > > - case 0x2f: > > + case INTEL_BIGCORE_NEHALEM: > > + case INTEL_BIGCORE_WESTMERE: > > > > /* Rep string instructions, unaligned load, unaligned copy, > > and pminub are fast on Intel Core i3, i5 and i7. */ > > cpu_features->preferred[index_arch_Fast_Rep_String] > > - |= (bit_arch_Fast_Rep_String > > - | bit_arch_Fast_Unaligned_Load > > - | bit_arch_Fast_Unaligned_Copy > > - | bit_arch_Prefer_PMINUB_for_stringop); > > + |= (bit_arch_Fast_Rep_String | bit_arch_Fast_Unaligned_Load > > + | bit_arch_Fast_Unaligned_Copy > > + | bit_arch_Prefer_PMINUB_for_stringop); > > break; > > + > > + /* > > + Default tuned Bigcore microarch. > > + case INTEL_BIGCORE_SANDYBRIDGE: > > + case INTEL_BIGCORE_IVYBRIDGE: > > + case INTEL_BIGCORE_HASWELL: > > + case INTEL_BIGCORE_BROADWELL: > > + case INTEL_BIGCORE_SKYLAKE: > > + case INTEL_BIGCORE_AMBERLAKE: > > + case INTEL_BIGCORE_COFFEELAKE: > > + case INTEL_BIGCORE_WHISKEYLAKE: > > + case INTEL_BIGCORE_KABYLAKE: > > + case INTEL_BIGCORE_COMETLAKE: > > + case INTEL_BIGCORE_SKYLAKE_AVX512: > > + case INTEL_BIGCORE_CANNONLAKE: > > + case INTEL_BIGCORE_ICELAKE: > > + case INTEL_BIGCORE_TIGERLAKE: > > + case INTEL_BIGCORE_ROCKETLAKE: > > + case INTEL_BIGCORE_RAPTORLAKE: > > + case INTEL_BIGCORE_METEORLAKE: > > + case INTEL_BIGCORE_LUNARLAKE: > > + case INTEL_BIGCORE_ARROWLAKE: > > + case INTEL_BIGCORE_SAPPHIRERAPIDS: > > + case INTEL_BIGCORE_EMERALDRAPIDS: > > + case INTEL_BIGCORE_GRANITERAPIDS: > > + */ > > + > > + /* > > + Default tuned Mixed (bigcore + atom SOC). > > + case INTEL_MIXED_LAKEFIELD: > > + case INTEL_MIXED_ALDERLAKE: > > + */ > > } > > > > - /* Disable TSX on some processors to avoid TSX on kernels that > > - weren't updated with the latest microcode package (which > > - disables broken feature by default). */ > > - switch (model) > > + /* Disable TSX on some processors to avoid TSX on kernels that > > + weren't updated with the latest microcode package (which > > + disables broken feature by default). */ > > + switch (microarch) > > { > > - case 0x55: > > + case INTEL_BIGCORE_SKYLAKE_AVX512: > > + /* 0x55 (Skylake-avx512) && stepping <= 5 disable TSX. */ > > if (stepping <= 5) > > goto disable_tsx; > > break; > > - case 0x8e: > > - /* NB: Although the errata documents that for model == 0x8e, > > - only 0xb stepping or lower are impacted, the intention of > > - the errata was to disable TSX on all client processors on > > - all steppings. Include 0xc stepping which is an Intel > > - Core i7-8665U, a client mobile processor. */ > > - case 0x9e: > > - if (stepping > 0xc) > > + > > + case INTEL_BIGCORE_SKYLAKE: > > + case INTEL_BIGCORE_AMBERLAKE: > > + case INTEL_BIGCORE_COFFEELAKE: > > + case INTEL_BIGCORE_WHISKEYLAKE: > > + case INTEL_BIGCORE_KABYLAKE: > > + /* NB: Although the errata documents that for model == 0x8e > > + (skylake client), only 0xb stepping or lower are impacted, > > + the intention of the errata was to disable TSX on all client > > + processors on all steppings. Include 0xc stepping which is > > + an Intel Core i7-8665U, a client mobile processor. */ > > + if ((model == 0x8e || model == 0x9e) && stepping > 0xc) > > break; > > - /* Fall through. */ > > - case 0x4e: > > - case 0x5e: > > - { > > + > > /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for > > processors listed in: > > > > https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html > > */ > > -disable_tsx: > > + disable_tsx: > > CPU_FEATURE_UNSET (cpu_features, HLE); > > CPU_FEATURE_UNSET (cpu_features, RTM); > > CPU_FEATURE_SET (cpu_features, RTM_ALWAYS_ABORT); > > - } > > - break; > > - case 0x3f: > > - /* Xeon E7 v3 with stepping >= 4 has working TSX. */ > > - if (stepping >= 4) > > break; > > - /* Fall through. */ > > - case 0x3c: > > - case 0x45: > > - case 0x46: > > - /* Disable Intel TSX on Haswell processors (except Xeon E7 v3 > > - with stepping >= 4) to avoid TSX on kernels that weren't > > - updated with the latest microcode package (which disables > > - broken feature by default). */ > > - CPU_FEATURE_UNSET (cpu_features, RTM); > > - break; > > + > > + case INTEL_BIGCORE_HASWELL: > > + /* Xeon E7 v3 (model == 0x3f) with stepping >= 4 has working > > + TSX. Haswell also include other model numbers that have > > + working TSX. */ > > + if (model == 0x3f && stepping >= 4) > > + break; > > + > > + CPU_FEATURE_UNSET (cpu_features, RTM); > > + break; > > } > > } > > > > -- > > 2.34.1 > > > > > -- > H.J.
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index 5bff8ec0b4..264d309dd7 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -417,6 +417,218 @@ _Static_assert (((index_arch_Fast_Unaligned_Load == index_arch_Fast_Copy_Backward)), "Incorrect index_arch_Fast_Unaligned_Load"); + +/* Intel Family-6 microarch list. */ +enum +{ + /* Atom processors. */ + INTEL_ATOM_BONNELL, + INTEL_ATOM_SILVERMONT, + INTEL_ATOM_AIRMONT, + INTEL_ATOM_GOLDMONT, + INTEL_ATOM_GOLDMONT_PLUS, + INTEL_ATOM_SIERRAFOREST, + INTEL_ATOM_GRANDRIDGE, + INTEL_ATOM_TREMONT, + + /* Bigcore processors. */ + INTEL_BIGCORE_MEROM, + INTEL_BIGCORE_PENRYN, + INTEL_BIGCORE_DUNNINGTON, + INTEL_BIGCORE_NEHALEM, + INTEL_BIGCORE_WESTMERE, + INTEL_BIGCORE_SANDYBRIDGE, + INTEL_BIGCORE_IVYBRIDGE, + INTEL_BIGCORE_HASWELL, + INTEL_BIGCORE_BROADWELL, + INTEL_BIGCORE_SKYLAKE, + INTEL_BIGCORE_AMBERLAKE, + INTEL_BIGCORE_COFFEELAKE, + INTEL_BIGCORE_WHISKEYLAKE, + INTEL_BIGCORE_KABYLAKE, + INTEL_BIGCORE_COMETLAKE, + INTEL_BIGCORE_SKYLAKE_AVX512, + INTEL_BIGCORE_CANNONLAKE, + INTEL_BIGCORE_ICELAKE, + INTEL_BIGCORE_TIGERLAKE, + INTEL_BIGCORE_ROCKETLAKE, + INTEL_BIGCORE_SAPPHIRERAPIDS, + INTEL_BIGCORE_RAPTORLAKE, + INTEL_BIGCORE_EMERALDRAPIDS, + INTEL_BIGCORE_METEORLAKE, + INTEL_BIGCORE_LUNARLAKE, + INTEL_BIGCORE_ARROWLAKE, + INTEL_BIGCORE_GRANITERAPIDS, + + /* Mixed (bigcore + atom SOC). */ + INTEL_MIXED_LAKEFIELD, + INTEL_MIXED_ALDERLAKE, + + /* KNL. */ + INTEL_KNIGHTS_MILL, + INTEL_KNIGHTS_LANDING, + + /* Unknown. */ + INTEL_UNKNOWN, +}; + +static unsigned int +intel_get_fam6_microarch (unsigned int model, unsigned int stepping) +{ + switch (model) + { + case 0x1C: + case 0x26: + return INTEL_ATOM_BONNELL; + case 0x27: + case 0x35: + case 0x36: + /* Really Saltwell, but Saltwell is just a die shrink of Bonnell + (microarchitecturally identical). */ + return INTEL_ATOM_BONNELL; + case 0x37: + case 0x4A: + case 0x4D: + case 0x5D: + return INTEL_ATOM_SILVERMONT; + case 0x4C: + case 0x5A: + case 0x75: + return INTEL_ATOM_AIRMONT; + case 0x5C: + case 0x5F: + return INTEL_ATOM_GOLDMONT; + case 0x7A: + return INTEL_ATOM_GOLDMONT_PLUS; + case 0xAF: + return INTEL_ATOM_SIERRAFOREST; + case 0xB6: + return INTEL_ATOM_GRANDRIDGE; + case 0x86: + case 0x96: + case 0x9C: + return INTEL_ATOM_TREMONT; + case 0x0F: + case 0x16: + return INTEL_BIGCORE_MEROM; + case 0x17: + return INTEL_BIGCORE_PENRYN; + case 0x1D: + return INTEL_BIGCORE_DUNNINGTON; + case 0x1A: + case 0x1E: + case 0x1F: + case 0x2E: + return INTEL_BIGCORE_NEHALEM; + case 0x25: + case 0x2C: + case 0x2F: + return INTEL_BIGCORE_WESTMERE; + case 0x2A: + case 0x2D: + return INTEL_BIGCORE_SANDYBRIDGE; + case 0x3A: + case 0x3E: + return INTEL_BIGCORE_IVYBRIDGE; + case 0x3C: + case 0x3F: + case 0x45: + case 0x46: + return INTEL_BIGCORE_HASWELL; + case 0x3D: + case 0x47: + case 0x4F: + case 0x56: + return INTEL_BIGCORE_BROADWELL; + case 0x4E: + case 0x5E: + return INTEL_BIGCORE_SKYLAKE; + case 0x8E: + switch (stepping) + { + case 0x09: + return INTEL_BIGCORE_AMBERLAKE; + case 0x0A: + return INTEL_BIGCORE_COFFEELAKE; + case 0x0B: + case 0x0C: + return INTEL_BIGCORE_WHISKEYLAKE; + default: + return INTEL_BIGCORE_KABYLAKE; + } + case 0x9E: + switch (stepping) + { + case 0x0A: + case 0x0B: + case 0x0C: + case 0x0D: + return INTEL_BIGCORE_COFFEELAKE; + default: + return INTEL_BIGCORE_KABYLAKE; + } + case 0xA5: + case 0xA6: + return INTEL_BIGCORE_COMETLAKE; + case 0x66: + return INTEL_BIGCORE_CANNONLAKE; + case 0x55: + /* + Stepping = {6, 7} + -> Cascadelake + Stepping = {11} + -> Cooperlake + else + -> Skylake-avx512 + + These are all microarchitecturally indentical, so use + Skylake-avx512 for all of them. + */ + return INTEL_BIGCORE_SKYLAKE_AVX512; + case 0x6A: + case 0x6C: + case 0x7D: + case 0x7E: + case 0x9D: + return INTEL_BIGCORE_ICELAKE; + case 0x8C: + case 0x8D: + return INTEL_BIGCORE_TIGERLAKE; + case 0xA7: + return INTEL_BIGCORE_ROCKETLAKE; + case 0x8F: + return INTEL_BIGCORE_SAPPHIRERAPIDS; + case 0xB7: + case 0xBA: + case 0xBF: + return INTEL_BIGCORE_RAPTORLAKE; + case 0xCF: + return INTEL_BIGCORE_EMERALDRAPIDS; + case 0xAA: + case 0xAC: + return INTEL_BIGCORE_METEORLAKE; + case 0xbd: + return INTEL_BIGCORE_LUNARLAKE; + case 0xc6: + return INTEL_BIGCORE_ARROWLAKE; + case 0xAD: + case 0xAE: + return INTEL_BIGCORE_GRANITERAPIDS; + case 0x8A: + return INTEL_MIXED_LAKEFIELD; + case 0x97: + case 0x9A: + case 0xBE: + return INTEL_MIXED_ALDERLAKE; + case 0x85: + return INTEL_KNIGHTS_MILL; + case 0x57: + return INTEL_KNIGHTS_LANDING; + default: + return INTEL_UNKNOWN; + } +} + static inline void init_cpu_features (struct cpu_features *cpu_features) { @@ -453,129 +665,149 @@ init_cpu_features (struct cpu_features *cpu_features) if (family == 0x06) { model += extended_model; - switch (model) + unsigned int microarch + = intel_get_fam6_microarch (model, stepping); + + switch (microarch) { - case 0x1c: - case 0x26: - /* BSF is slow on Atom. */ + /* Atom / KNL tuning. */ + case INTEL_ATOM_BONNELL: + /* BSF is slow on Bonnell. */ cpu_features->preferred[index_arch_Slow_BSF] - |= bit_arch_Slow_BSF; + |= bit_arch_Slow_BSF; break; - case 0x57: - /* Knights Landing. Enable Silvermont optimizations. */ - - case 0x7a: - /* Unaligned load versions are faster than SSSE3 - on Goldmont Plus. */ - - case 0x5c: - case 0x5f: /* Unaligned load versions are faster than SSSE3 - on Goldmont. */ + on Airmont, Silvermont, Goldmont, and Goldmont Plus. */ + case INTEL_ATOM_AIRMONT: + case INTEL_ATOM_SILVERMONT: + case INTEL_ATOM_GOLDMONT: + case INTEL_ATOM_GOLDMONT_PLUS: - case 0x4c: - case 0x5a: - case 0x75: - /* Airmont is a die shrink of Silvermont. */ + /* Knights Landing. Enable Silvermont optimizations. */ + case INTEL_KNIGHTS_LANDING: - case 0x37: - case 0x4a: - case 0x4d: - case 0x5d: - /* Unaligned load versions are faster than SSSE3 - on Silvermont. */ cpu_features->preferred[index_arch_Fast_Unaligned_Load] - |= (bit_arch_Fast_Unaligned_Load - | bit_arch_Fast_Unaligned_Copy - | bit_arch_Prefer_PMINUB_for_stringop - | bit_arch_Slow_SSE4_2); + |= (bit_arch_Fast_Unaligned_Load + | bit_arch_Fast_Unaligned_Copy + | bit_arch_Prefer_PMINUB_for_stringop + | bit_arch_Slow_SSE4_2); break; - case 0x86: - case 0x96: - case 0x9c: + case INTEL_ATOM_TREMONT: /* Enable rep string instructions, unaligned load, unaligned - copy, pminub and avoid SSE 4.2 on Tremont. */ + copy, pminub and avoid SSE 4.2 on Tremont. */ cpu_features->preferred[index_arch_Fast_Rep_String] - |= (bit_arch_Fast_Rep_String - | bit_arch_Fast_Unaligned_Load - | bit_arch_Fast_Unaligned_Copy - | bit_arch_Prefer_PMINUB_for_stringop - | bit_arch_Slow_SSE4_2); + |= (bit_arch_Fast_Rep_String | bit_arch_Fast_Unaligned_Load + | bit_arch_Fast_Unaligned_Copy + | bit_arch_Prefer_PMINUB_for_stringop + | bit_arch_Slow_SSE4_2); break; + /* + Default tuned KNL microarch. + case INTEL_KNIGHTS_MILL: + */ + + /* + Default tuned atom microarch. + case INTEL_ATOM_SIERRAFOREST: + case INTEL_ATOM_GRANDRIDGE: + */ + + /* Bigcore/Default Tuning. */ default: /* Unknown family 0x06 processors. Assuming this is one of Core i3/i5/i7 processors if AVX is available. */ if (!CPU_FEATURES_CPU_P (cpu_features, AVX)) break; - /* Fall through. */ - - case 0x1a: - case 0x1e: - case 0x1f: - case 0x25: - case 0x2c: - case 0x2e: - case 0x2f: + case INTEL_BIGCORE_NEHALEM: + case INTEL_BIGCORE_WESTMERE: /* Rep string instructions, unaligned load, unaligned copy, and pminub are fast on Intel Core i3, i5 and i7. */ cpu_features->preferred[index_arch_Fast_Rep_String] - |= (bit_arch_Fast_Rep_String - | bit_arch_Fast_Unaligned_Load - | bit_arch_Fast_Unaligned_Copy - | bit_arch_Prefer_PMINUB_for_stringop); + |= (bit_arch_Fast_Rep_String | bit_arch_Fast_Unaligned_Load + | bit_arch_Fast_Unaligned_Copy + | bit_arch_Prefer_PMINUB_for_stringop); break; + + /* + Default tuned Bigcore microarch. + case INTEL_BIGCORE_SANDYBRIDGE: + case INTEL_BIGCORE_IVYBRIDGE: + case INTEL_BIGCORE_HASWELL: + case INTEL_BIGCORE_BROADWELL: + case INTEL_BIGCORE_SKYLAKE: + case INTEL_BIGCORE_AMBERLAKE: + case INTEL_BIGCORE_COFFEELAKE: + case INTEL_BIGCORE_WHISKEYLAKE: + case INTEL_BIGCORE_KABYLAKE: + case INTEL_BIGCORE_COMETLAKE: + case INTEL_BIGCORE_SKYLAKE_AVX512: + case INTEL_BIGCORE_CANNONLAKE: + case INTEL_BIGCORE_ICELAKE: + case INTEL_BIGCORE_TIGERLAKE: + case INTEL_BIGCORE_ROCKETLAKE: + case INTEL_BIGCORE_RAPTORLAKE: + case INTEL_BIGCORE_METEORLAKE: + case INTEL_BIGCORE_LUNARLAKE: + case INTEL_BIGCORE_ARROWLAKE: + case INTEL_BIGCORE_SAPPHIRERAPIDS: + case INTEL_BIGCORE_EMERALDRAPIDS: + case INTEL_BIGCORE_GRANITERAPIDS: + */ + + /* + Default tuned Mixed (bigcore + atom SOC). + case INTEL_MIXED_LAKEFIELD: + case INTEL_MIXED_ALDERLAKE: + */ } - /* Disable TSX on some processors to avoid TSX on kernels that - weren't updated with the latest microcode package (which - disables broken feature by default). */ - switch (model) + /* Disable TSX on some processors to avoid TSX on kernels that + weren't updated with the latest microcode package (which + disables broken feature by default). */ + switch (microarch) { - case 0x55: + case INTEL_BIGCORE_SKYLAKE_AVX512: + /* 0x55 (Skylake-avx512) && stepping <= 5 disable TSX. */ if (stepping <= 5) goto disable_tsx; break; - case 0x8e: - /* NB: Although the errata documents that for model == 0x8e, - only 0xb stepping or lower are impacted, the intention of - the errata was to disable TSX on all client processors on - all steppings. Include 0xc stepping which is an Intel - Core i7-8665U, a client mobile processor. */ - case 0x9e: - if (stepping > 0xc) + + case INTEL_BIGCORE_SKYLAKE: + case INTEL_BIGCORE_AMBERLAKE: + case INTEL_BIGCORE_COFFEELAKE: + case INTEL_BIGCORE_WHISKEYLAKE: + case INTEL_BIGCORE_KABYLAKE: + /* NB: Although the errata documents that for model == 0x8e + (skylake client), only 0xb stepping or lower are impacted, + the intention of the errata was to disable TSX on all client + processors on all steppings. Include 0xc stepping which is + an Intel Core i7-8665U, a client mobile processor. */ + if ((model == 0x8e || model == 0x9e) && stepping > 0xc) break; - /* Fall through. */ - case 0x4e: - case 0x5e: - { + /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for processors listed in: https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html */ -disable_tsx: + disable_tsx: CPU_FEATURE_UNSET (cpu_features, HLE); CPU_FEATURE_UNSET (cpu_features, RTM); CPU_FEATURE_SET (cpu_features, RTM_ALWAYS_ABORT); - } - break; - case 0x3f: - /* Xeon E7 v3 with stepping >= 4 has working TSX. */ - if (stepping >= 4) break; - /* Fall through. */ - case 0x3c: - case 0x45: - case 0x46: - /* Disable Intel TSX on Haswell processors (except Xeon E7 v3 - with stepping >= 4) to avoid TSX on kernels that weren't - updated with the latest microcode package (which disables - broken feature by default). */ - CPU_FEATURE_UNSET (cpu_features, RTM); - break; + + case INTEL_BIGCORE_HASWELL: + /* Xeon E7 v3 (model == 0x3f) with stepping >= 4 has working + TSX. Haswell also include other model numbers that have + working TSX. */ + if (model == 0x3f && stepping >= 4) + break; + + CPU_FEATURE_UNSET (cpu_features, RTM); + break; } }