diff mbox series

[SRU,J,v3,1/1] x86/CPU/AMD: Improve the erratum 1386 workaround

Message ID 20240906173709.17861-3-bethany.jamison@canonical.com
State New
Headers show
Series disable XSAVES workaround | expand

Commit Message

Bethany Jamison Sept. 6, 2024, 5:37 p.m. UTC
From: "Borislav Petkov (AMD)" <bp@alien8.de>

BugLink: https://bugs.launchpad.net/bugs/2077321

Disable XSAVES only on machines which haven't loaded the microcode
revision containing the erratum fix.

This will come in handy when running archaic OSes as guests. OSes whose
brilliant programmers thought that CPUID is overrated and one should not
query it but use features directly, ala shoot first, ask questions
later... but only if you're alive after the shooting.

Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Tested-by: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Link: https://lore.kernel.org/r/20240324200525.GBZgCHhYFsBj12PrKv@fat_crate.local
(backported from commit 29ba89f1895285f06c333546882e0c5ae9a6df23)
[bjamison: context conflict from neighboring line, Jammy is missing
'fix_erratum_1386' definition]
Signed-off-by: Bethany Jamison <bethany.jamison@canonical.com>
---
 arch/x86/include/asm/cpu_device_id.h |  8 ++++++++
 arch/x86/kernel/cpu/amd.c            | 12 ++++++++++++
 2 files changed, 20 insertions(+)

Comments

Stefan Bader Sept. 9, 2024, 9:50 a.m. UTC | #1
On 06.09.24 19:37, Bethany Jamison wrote:
> From: "Borislav Petkov (AMD)" <bp@alien8.de>
> 
> BugLink: https://bugs.launchpad.net/bugs/2077321
> 
> Disable XSAVES only on machines which haven't loaded the microcode
> revision containing the erratum fix.
> 
> This will come in handy when running archaic OSes as guests. OSes whose
> brilliant programmers thought that CPUID is overrated and one should not
> query it but use features directly, ala shoot first, ask questions
> later... but only if you're alive after the shooting.
> 
> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
> Tested-by: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> Link: https://lore.kernel.org/r/20240324200525.GBZgCHhYFsBj12PrKv@fat_crate.local
> (backported from commit 29ba89f1895285f06c333546882e0c5ae9a6df23)
> [bjamison: context conflict from neighboring line, Jammy is missing
> 'fix_erratum_1386' definition]
> Signed-off-by: Bethany Jamison <bethany.jamison@canonical.com>
> ---
>   arch/x86/include/asm/cpu_device_id.h |  8 ++++++++
>   arch/x86/kernel/cpu/amd.c            | 12 ++++++++++++
>   2 files changed, 20 insertions(+)
> 
> diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h
> index e8e3dbe7f1730..b6325ee308718 100644
> --- a/arch/x86/include/asm/cpu_device_id.h
> +++ b/arch/x86/include/asm/cpu_device_id.h
> @@ -288,6 +288,14 @@ struct x86_cpu_desc {
>   	.x86_microcode_rev	= (revision),			\
>   }
>   
> +#define AMD_CPU_DESC(fam, model, stepping, revision) {		\
> +	.x86_family		= (fam),			\
> +	.x86_vendor		= X86_VENDOR_AMD,		\
> +	.x86_model		= (model),			\
> +	.x86_stepping		= (stepping),			\
> +	.x86_microcode_rev	= (revision),			\
> +}
> +
>   extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match);
>   extern bool x86_cpu_has_min_microcode_rev(const struct x86_cpu_desc *table);
>   
> diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
> index c99a16417cf03..cb8bd6145d1bc 100644
> --- a/arch/x86/kernel/cpu/amd.c
> +++ b/arch/x86/kernel/cpu/amd.c
> @@ -13,6 +13,7 @@
>   #include <asm/apic.h>
>   #include <asm/cacheinfo.h>
>   #include <asm/cpu.h>
> +#include <asm/cpu_device_id.h>
>   #include <asm/spec-ctrl.h>
>   #include <asm/smp.h>
>   #include <asm/numa.h>
> @@ -931,6 +932,11 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
>   	clear_rdrand_cpuid_bit(c);
>   }
>   
> +static const struct x86_cpu_desc erratum_1386_microcode[] = {
> +	AMD_CPU_DESC(0x17,  0x1, 0x2, 0x0800126e),
> +	AMD_CPU_DESC(0x17, 0x31, 0x0, 0x08301052),
> +};

Hm, if above defines erratum_1386 and the missing call was 
fix_erratum_1386 I have a bad feeling this is done right in jammy...
The disable xsave call is placed in init_spectral_chicken() which is 
called from init_amd() for cpu model 0x17. After that if falls through 
to model 0x19 which calls init_amd_zn(). To me the latter sounds more 
like Zen architecture. That would have been the place to disable xsave 
for zen and even then probably should have been adjusted to exclude zen3 
(or the other way round limited to only zen1/2).

> +
>   void init_spectral_chicken(struct cpuinfo_x86 *c)
>   {
>   #ifdef CONFIG_CPU_UNRET_ENTRY
> @@ -958,7 +964,13 @@ void init_spectral_chicken(struct cpuinfo_x86 *c)
>   	 *
>   	 * Affected parts all have no supervisor XSAVE states, meaning that
>   	 * the XSAVEC instruction (which works fine) is equivalent.
> +	 *
> +	 * Clear the feature flag only on microcode revisions which
> +	 * don't have the fix.
>   	 */
> +	if (x86_cpu_has_min_microcode_rev(erratum_1386_microcode))
> +		return;
> +
>   	clear_cpu_cap(c, X86_FEATURE_XSAVES);
>   }
>
Stefan Bader Sept. 9, 2024, 1:15 p.m. UTC | #2
On 09.09.24 11:50, Stefan Bader wrote:
> On 06.09.24 19:37, Bethany Jamison wrote:
>> From: "Borislav Petkov (AMD)" <bp@alien8.de>
>>
>> BugLink: https://bugs.launchpad.net/bugs/2077321
>>
>> Disable XSAVES only on machines which haven't loaded the microcode
>> revision containing the erratum fix.
>>
>> This will come in handy when running archaic OSes as guests. OSes whose
>> brilliant programmers thought that CPUID is overrated and one should not
>> query it but use features directly, ala shoot first, ask questions
>> later... but only if you're alive after the shooting.
>>
>> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
>> Tested-by: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
>> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
>> Link: 
>> https://lore.kernel.org/r/20240324200525.GBZgCHhYFsBj12PrKv@fat_crate.local
>> (backported from commit 29ba89f1895285f06c333546882e0c5ae9a6df23)
>> [bjamison: context conflict from neighboring line, Jammy is missing
>> 'fix_erratum_1386' definition]
>> Signed-off-by: Bethany Jamison <bethany.jamison@canonical.com>
>> ---
>>   arch/x86/include/asm/cpu_device_id.h |  8 ++++++++
>>   arch/x86/kernel/cpu/amd.c            | 12 ++++++++++++
>>   2 files changed, 20 insertions(+)
>>
>> diff --git a/arch/x86/include/asm/cpu_device_id.h 
>> b/arch/x86/include/asm/cpu_device_id.h
>> index e8e3dbe7f1730..b6325ee308718 100644
>> --- a/arch/x86/include/asm/cpu_device_id.h
>> +++ b/arch/x86/include/asm/cpu_device_id.h
>> @@ -288,6 +288,14 @@ struct x86_cpu_desc {
>>       .x86_microcode_rev    = (revision),            \
>>   }
>> +#define AMD_CPU_DESC(fam, model, stepping, revision) {        \
>> +    .x86_family        = (fam),            \
>> +    .x86_vendor        = X86_VENDOR_AMD,        \
>> +    .x86_model        = (model),            \
>> +    .x86_stepping        = (stepping),            \
>> +    .x86_microcode_rev    = (revision),            \
>> +}
>> +
>>   extern const struct x86_cpu_id *x86_match_cpu(const struct 
>> x86_cpu_id *match);
>>   extern bool x86_cpu_has_min_microcode_rev(const struct x86_cpu_desc 
>> *table);
>> diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
>> index c99a16417cf03..cb8bd6145d1bc 100644
>> --- a/arch/x86/kernel/cpu/amd.c
>> +++ b/arch/x86/kernel/cpu/amd.c
>> @@ -13,6 +13,7 @@
>>   #include <asm/apic.h>
>>   #include <asm/cacheinfo.h>
>>   #include <asm/cpu.h>
>> +#include <asm/cpu_device_id.h>
>>   #include <asm/spec-ctrl.h>
>>   #include <asm/smp.h>
>>   #include <asm/numa.h>
>> @@ -931,6 +932,11 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
>>       clear_rdrand_cpuid_bit(c);
>>   }
>> +static const struct x86_cpu_desc erratum_1386_microcode[] = {
>> +    AMD_CPU_DESC(0x17,  0x1, 0x2, 0x0800126e),
>> +    AMD_CPU_DESC(0x17, 0x31, 0x0, 0x08301052),
>> +};
> 
> Hm, if above defines erratum_1386 and the missing call was 
> fix_erratum_1386 I have a bad feeling this is done right in jammy...
> The disable xsave call is placed in init_spectral_chicken() which is 
> called from init_amd() for cpu model 0x17. After that if falls through 
> to model 0x19 which calls init_amd_zn(). To me the latter sounds more 
> like Zen architecture. That would have been the place to disable xsave 
> for zen and even then probably should have been adjusted to exclude zen3 
> (or the other way round limited to only zen1/2).

Looking back at the change which introduced disabling XSAVE into the 
spectral_chicken init function I found that it appears to be deliberate 
by AMD people... I guess in that case it is not on me to question that.

commit b0563468eeac88ebc70559d52a0b66efc37e4e9d upstream
   x86/CPU/AMD: Disable XSAVES on AMD family 0x17
   ...
   Ignore the XSAVES feature on all AMD Zen1/2 hardware.  The XSAVEC
   instruction (which works fine) is equivalent on affected parts.

   [ bp: Typos, move it into the F17h-specific function. ]

   Reported-by: Tavis Ormandy <taviso@gmail.com>
   Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
   Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
   ...

> 
>> +
>>   void init_spectral_chicken(struct cpuinfo_x86 *c)
>>   {
>>   #ifdef CONFIG_CPU_UNRET_ENTRY
>> @@ -958,7 +964,13 @@ void init_spectral_chicken(struct cpuinfo_x86 *c)
>>        *
>>        * Affected parts all have no supervisor XSAVE states, meaning that
>>        * the XSAVEC instruction (which works fine) is equivalent.
>> +     *
>> +     * Clear the feature flag only on microcode revisions which
>> +     * don't have the fix.
>>        */
>> +    if (x86_cpu_has_min_microcode_rev(erratum_1386_microcode))
>> +        return;
>> +
>>       clear_cpu_cap(c, X86_FEATURE_XSAVES);
>>   }
> 
>
diff mbox series

Patch

diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h
index e8e3dbe7f1730..b6325ee308718 100644
--- a/arch/x86/include/asm/cpu_device_id.h
+++ b/arch/x86/include/asm/cpu_device_id.h
@@ -288,6 +288,14 @@  struct x86_cpu_desc {
 	.x86_microcode_rev	= (revision),			\
 }
 
+#define AMD_CPU_DESC(fam, model, stepping, revision) {		\
+	.x86_family		= (fam),			\
+	.x86_vendor		= X86_VENDOR_AMD,		\
+	.x86_model		= (model),			\
+	.x86_stepping		= (stepping),			\
+	.x86_microcode_rev	= (revision),			\
+}
+
 extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match);
 extern bool x86_cpu_has_min_microcode_rev(const struct x86_cpu_desc *table);
 
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index c99a16417cf03..cb8bd6145d1bc 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -13,6 +13,7 @@ 
 #include <asm/apic.h>
 #include <asm/cacheinfo.h>
 #include <asm/cpu.h>
+#include <asm/cpu_device_id.h>
 #include <asm/spec-ctrl.h>
 #include <asm/smp.h>
 #include <asm/numa.h>
@@ -931,6 +932,11 @@  static void init_amd_bd(struct cpuinfo_x86 *c)
 	clear_rdrand_cpuid_bit(c);
 }
 
+static const struct x86_cpu_desc erratum_1386_microcode[] = {
+	AMD_CPU_DESC(0x17,  0x1, 0x2, 0x0800126e),
+	AMD_CPU_DESC(0x17, 0x31, 0x0, 0x08301052),
+};
+
 void init_spectral_chicken(struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_CPU_UNRET_ENTRY
@@ -958,7 +964,13 @@  void init_spectral_chicken(struct cpuinfo_x86 *c)
 	 *
 	 * Affected parts all have no supervisor XSAVE states, meaning that
 	 * the XSAVEC instruction (which works fine) is equivalent.
+	 *
+	 * Clear the feature flag only on microcode revisions which
+	 * don't have the fix.
 	 */
+	if (x86_cpu_has_min_microcode_rev(erratum_1386_microcode))
+		return;
+
 	clear_cpu_cap(c, X86_FEATURE_XSAVES);
 }