diff mbox series

[REPOST,v3,5/5] amd_iommu: report x2APIC support to the operating system

Message ID 20230411142440.8018-6-minhquangbui99@gmail.com
State New
Headers show
Series [REPOST,v3,1/5] i386/tcg: implement x2APIC registers MSR access | expand

Commit Message

Bui Quang Minh April 11, 2023, 2:24 p.m. UTC
This commit adds XTSup configuration to let user choose to whether enable
this feature or not. When XTSup is enabled, additional bytes in IRTE with
enabled guest virtual VAPIC are used to support 32-bit destination id.

Additionally, this commit changes to use IVHD type 0x11 in ACPI table for
feature report to operating system. This is because Linux does not use
XTSup in IOMMU Feature Reporting field of IVHD type 0x10 but only use XTSup
bit in EFR Register Image of IVHD 0x11 to indicate x2APIC support (see
init_iommu_one in linux/drivers/iommu/amd/init.c)

Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
---
 hw/i386/acpi-build.c | 28 ++++++++++++++--------------
 hw/i386/amd_iommu.c  | 21 +++++++++++++++++++--
 hw/i386/amd_iommu.h  | 16 +++++++++++-----
 3 files changed, 44 insertions(+), 21 deletions(-)

Comments

Michael S. Tsirkin May 12, 2023, 2:39 p.m. UTC | #1
On Tue, Apr 11, 2023 at 09:24:40PM +0700, Bui Quang Minh wrote:
> This commit adds XTSup configuration to let user choose to whether enable
> this feature or not. When XTSup is enabled, additional bytes in IRTE with
> enabled guest virtual VAPIC are used to support 32-bit destination id.
> 
> Additionally, this commit changes to use IVHD type 0x11 in ACPI table for
> feature report to operating system. This is because Linux does not use
> XTSup in IOMMU Feature Reporting field of IVHD type 0x10 but only use XTSup
> bit in EFR Register Image of IVHD 0x11 to indicate x2APIC support (see
> init_iommu_one in linux/drivers/iommu/amd/init.c)
> 
> Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>

I'm concerned that switching to type 11 will break some older guests.
It would be better if we could export both type 10 and type 11
ivhd. A question however would be how does this interact
with older guests. For example:
https://lists.linuxfoundation.org/pipermail/iommu/2016-January/015310.html
it looks like linux before 2016 only expected one ivhd entry?

Some research and testing here would be benefitial.
Similarly for windows guests.

Thanks!



> ---
>  hw/i386/acpi-build.c | 28 ++++++++++++++--------------
>  hw/i386/amd_iommu.c  | 21 +++++++++++++++++++--
>  hw/i386/amd_iommu.h  | 16 +++++++++++-----
>  3 files changed, 44 insertions(+), 21 deletions(-)
> 
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index ec857a117e..72d6bb2892 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -2339,7 +2339,7 @@ static void
>  build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
>                  const char *oem_table_id)
>  {
> -    int ivhd_table_len = 24;
> +    int ivhd_table_len = 40;
>      AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default());
>      GArray *ivhd_blob = g_array_new(false, true, 1);
>      AcpiTable table = { .sig = "IVRS", .rev = 1, .oem_id = oem_id,
> @@ -2349,18 +2349,19 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
>      /* IVinfo - IO virtualization information common to all
>       * IOMMU units in a system
>       */
> -    build_append_int_noprefix(table_data, 40UL << 8/* PASize */, 4);
> +    build_append_int_noprefix(table_data,
> +                             (1UL << 0) | /* EFRSup */
> +                             (40UL << 8), /* PASize */
> +                             4);
>      /* reserved */
>      build_append_int_noprefix(table_data, 0, 8);
>  
> -    /* IVHD definition - type 10h */
> -    build_append_int_noprefix(table_data, 0x10, 1);
> +    /* IVHD definition - type 11h */
> +    build_append_int_noprefix(table_data, 0x11, 1);
>      /* virtualization flags */
>      build_append_int_noprefix(table_data,
>                               (1UL << 0) | /* HtTunEn      */
> -                             (1UL << 4) | /* iotblSup     */

btw this should have been iotlbsup?

> -                             (1UL << 6) | /* PrefSup      */
> -                             (1UL << 7),  /* PPRSup       */
> +                             (1UL << 4),  /* iotblSup     */
>                               1);
>  
>      /*

hmm why are you removing these other flags?

> @@ -2404,13 +2405,12 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
>      build_append_int_noprefix(table_data, 0, 2);
>      /* IOMMU info */
>      build_append_int_noprefix(table_data, 0, 2);
> -    /* IOMMU Feature Reporting */
> -    build_append_int_noprefix(table_data,
> -                             (48UL << 30) | /* HATS   */
> -                             (48UL << 28) | /* GATS   */
> -                             (1UL << 2)   | /* GTSup  */
> -                             (1UL << 6),    /* GASup  */
> -                             4);
> +    /* IOMMU Attributes */
> +    build_append_int_noprefix(table_data, 0, 4);
> +    /* EFR Register Image */
> +    build_append_int_noprefix(table_data, s->efr_reg, 8);
> +    /* EFR Register Image 2 */
> +    build_append_int_noprefix(table_data, 0, 8);


here too. what's going on?

>  
>      /* IVHD entries as found above */
>      g_array_append_vals(table_data, ivhd_blob->data, ivhd_blob->len);
> diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
> index bcd016f5c5..5dfa93d945 100644
> --- a/hw/i386/amd_iommu.c
> +++ b/hw/i386/amd_iommu.c
> @@ -31,6 +31,7 @@
>  #include "hw/i386/apic_internal.h"
>  #include "trace.h"
>  #include "hw/i386/apic-msidef.h"
> +#include "hw/qdev-properties.h"
>  
>  /* used AMD-Vi MMIO registers */
>  const char *amdvi_mmio_low[] = {
> @@ -1155,7 +1156,12 @@ static int amdvi_int_remap_ga(AMDVIState *iommu,
>      irq->vector = irte.hi.fields.vector;
>      irq->dest_mode = irte.lo.fields_remap.dm;
>      irq->redir_hint = irte.lo.fields_remap.rq_eoi;
> -    irq->dest = irte.lo.fields_remap.destination;
> +    if (!iommu->xtsup) {
> +        irq->dest = irte.lo.fields_remap.destination & 0xff;
> +    } else {
> +        irq->dest = irte.lo.fields_remap.destination |
> +                    (irte.hi.fields.destination_hi << 24);
> +    }

Weird way to put it. Why not if (iommu->xtsup) ... ?

>  
>      return 0;
>  }
> @@ -1503,10 +1509,15 @@ static void amdvi_init(AMDVIState *s)
>      s->enabled = false;
>      s->ats_enabled = false;
>      s->cmdbuf_enabled = false;
> +    s->efr_reg = AMDVI_DEFAULT_EXT_FEATURES;
> +
> +    if (s->xtsup) {
> +        s->efr_reg |= AMDVI_FEATURE_XT;
> +    }
>  
>      /* reset MMIO */
>      memset(s->mmior, 0, AMDVI_MMIO_SIZE);
> -    amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES, AMDVI_EXT_FEATURES,
> +    amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES, s->efr_reg,
>              0xffffffffffffffef, 0);
>      amdvi_set_quad(s, AMDVI_MMIO_STATUS, 0, 0x98, 0x67);
>  
> @@ -1586,6 +1597,11 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
>      amdvi_init(s);
>  }
>  
> +static Property amdvi_properties[] = {
> +    DEFINE_PROP_BOOL("xtsup", AMDVIState, xtsup, false),
> +    DEFINE_PROP_END_OF_LIST(),
> +};
> +
>  static const VMStateDescription vmstate_amdvi_sysbus = {
>      .name = "amd-iommu",
>      .unmigratable = 1
> @@ -1612,6 +1628,7 @@ static void amdvi_sysbus_class_init(ObjectClass *klass, void *data)
>      dc->user_creatable = true;
>      set_bit(DEVICE_CATEGORY_MISC, dc->categories);
>      dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device";
> +    device_class_set_props(dc, amdvi_properties);
>  }
>  
>  static const TypeInfo amdvi_sysbus = {
> diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
> index 79d38a3e41..96df7b0400 100644
> --- a/hw/i386/amd_iommu.h
> +++ b/hw/i386/amd_iommu.h
> @@ -154,6 +154,7 @@
>  
>  #define AMDVI_FEATURE_PREFETCH            (1ULL << 0) /* page prefetch       */
>  #define AMDVI_FEATURE_PPR                 (1ULL << 1) /* PPR Support         */
> +#define AMDVI_FEATURE_XT                  (1ULL << 2) /* x2APIC Support      */
>  #define AMDVI_FEATURE_GT                  (1ULL << 4) /* Guest Translation   */
>  #define AMDVI_FEATURE_IA                  (1ULL << 6) /* inval all support   */
>  #define AMDVI_FEATURE_GA                  (1ULL << 7) /* guest VAPIC support */
> @@ -173,8 +174,9 @@
>  #define AMDVI_IOTLB_MAX_SIZE 1024
>  #define AMDVI_DEVID_SHIFT    36
>  
> -/* extended feature support */
> -#define AMDVI_EXT_FEATURES (AMDVI_FEATURE_PREFETCH | AMDVI_FEATURE_PPR | \
> +/* default extended feature */
> +#define AMDVI_DEFAULT_EXT_FEATURES \
> +        (AMDVI_FEATURE_PREFETCH | AMDVI_FEATURE_PPR | \
>          AMDVI_FEATURE_IA | AMDVI_FEATURE_GT | AMDVI_FEATURE_HE | \
>          AMDVI_GATS_MODE | AMDVI_HATS_MODE | AMDVI_FEATURE_GA)
>  
> @@ -278,8 +280,8 @@ union irte_ga_lo {
>                  dm:1,
>                  /* ------ */
>                  guest_mode:1,
> -                destination:8,
> -                rsvd_1:48;
> +                destination:24,
> +                rsvd_1:32;
>    } fields_remap;
>  };
>  
> @@ -287,7 +289,8 @@ union irte_ga_hi {
>    uint64_t val;
>    struct {
>        uint64_t  vector:8,
> -                rsvd_2:56;
> +                rsvd_2:48,
> +                destination_hi:8;
>    } fields;
>  };
>  
> @@ -367,6 +370,9 @@ struct AMDVIState {
>  
>      /* Interrupt remapping */
>      bool ga_enabled;
> +    bool xtsup;
> +
> +    uint64_t efr_reg;            /* extended feature register */
>  };
>  
>  #endif
> -- 
> 2.25.1
Bui Quang Minh May 14, 2023, 8:55 a.m. UTC | #2
On 5/12/23 21:39, Michael S. Tsirkin wrote:
> On Tue, Apr 11, 2023 at 09:24:40PM +0700, Bui Quang Minh wrote:
>> This commit adds XTSup configuration to let user choose to whether enable
>> this feature or not. When XTSup is enabled, additional bytes in IRTE with
>> enabled guest virtual VAPIC are used to support 32-bit destination id.
>>
>> Additionally, this commit changes to use IVHD type 0x11 in ACPI table for
>> feature report to operating system. This is because Linux does not use
>> XTSup in IOMMU Feature Reporting field of IVHD type 0x10 but only use XTSup
>> bit in EFR Register Image of IVHD 0x11 to indicate x2APIC support (see
>> init_iommu_one in linux/drivers/iommu/amd/init.c)
>>
>> Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
> 
> I'm concerned that switching to type 11 will break some older guests.
> It would be better if we could export both type 10 and type 11
> ivhd. A question however would be how does this interact
> with older guests. For example:
> https://lists.linuxfoundation.org/pipermail/iommu/2016-January/015310.html
> it looks like linux before 2016 only expected one ivhd entry?

Export both type 0x10 and 0x11 looks reasonable to me. Before the above 
commit, I see that Linux still loops through multiple ivhd but only 
handles one with type 0x10. On newer kernel, it will choose to handle 
the type that appears last corresponding the first devid, which is weird 
in my opinion.

+static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs)
+{
+	u8 *base = (u8 *)ivrs;
+	struct ivhd_header *ivhd = (struct ivhd_header *)
+					(base + IVRS_HEADER_LENGTH);
+	u8 last_type = ivhd->type;
+	u16 devid = ivhd->devid;
+
+	while (((u8 *)ivhd - base < ivrs->length) &&
+	       (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) {
+		u8 *p = (u8 *) ivhd;
+
+		if (ivhd->devid == devid)
+			last_type = ivhd->type;
+		ivhd = (struct ivhd_header *)(p + ivhd->length);
+	}
+
+	return last_type;
+}

So when exposing type 0x10 following by 0x11, old kernel only parses 
0x10 and does not support x2APIC while new kernel parse 0x11 and support 
x2APIC. I will expose both types in the next version.

> Some research and testing here would be benefitial.
> Similarly for windows guests.
> 
> Thanks!
> 
> 
> 
>> ---
>>   hw/i386/acpi-build.c | 28 ++++++++++++++--------------
>>   hw/i386/amd_iommu.c  | 21 +++++++++++++++++++--
>>   hw/i386/amd_iommu.h  | 16 +++++++++++-----
>>   3 files changed, 44 insertions(+), 21 deletions(-)
>>
>> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
>> index ec857a117e..72d6bb2892 100644
>> --- a/hw/i386/acpi-build.c
>> +++ b/hw/i386/acpi-build.c
>> @@ -2339,7 +2339,7 @@ static void
>>   build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
>>                   const char *oem_table_id)
>>   {
>> -    int ivhd_table_len = 24;
>> +    int ivhd_table_len = 40;
>>       AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default());
>>       GArray *ivhd_blob = g_array_new(false, true, 1);
>>       AcpiTable table = { .sig = "IVRS", .rev = 1, .oem_id = oem_id,
>> @@ -2349,18 +2349,19 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
>>       /* IVinfo - IO virtualization information common to all
>>        * IOMMU units in a system
>>        */
>> -    build_append_int_noprefix(table_data, 40UL << 8/* PASize */, 4);
>> +    build_append_int_noprefix(table_data,
>> +                             (1UL << 0) | /* EFRSup */
>> +                             (40UL << 8), /* PASize */
>> +                             4);
>>       /* reserved */
>>       build_append_int_noprefix(table_data, 0, 8);
>>   
>> -    /* IVHD definition - type 10h */
>> -    build_append_int_noprefix(table_data, 0x10, 1);
>> +    /* IVHD definition - type 11h */
>> +    build_append_int_noprefix(table_data, 0x11, 1);
>>       /* virtualization flags */
>>       build_append_int_noprefix(table_data,
>>                                (1UL << 0) | /* HtTunEn      */
>> -                             (1UL << 4) | /* iotblSup     */
> 
> btw this should have been iotlbsup?
> 
>> -                             (1UL << 6) | /* PrefSup      */
>> -                             (1UL << 7),  /* PPRSup       */
>> +                             (1UL << 4),  /* iotblSup     */
>>                                1);
>>   
>>       /*
> 
> hmm why are you removing these other flags?

According to the AMD IOMMU specification, the bit 6, 7 are reserved in 
type 0x11 which are PerfSup, PPRSup respectively in type 0x10 so I 
remove those flags when changing to type 0x11. In type 0x11, these 
feature are reported via the below EFR Register Image I believe.

> 
>> @@ -2404,13 +2405,12 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
>>       build_append_int_noprefix(table_data, 0, 2);
>>       /* IOMMU info */
>>       build_append_int_noprefix(table_data, 0, 2);
>> -    /* IOMMU Feature Reporting */
>> -    build_append_int_noprefix(table_data,
>> -                             (48UL << 30) | /* HATS   */
>> -                             (48UL << 28) | /* GATS   */
>> -                             (1UL << 2)   | /* GTSup  */
>> -                             (1UL << 6),    /* GASup  */
>> -                             4);
>> +    /* IOMMU Attributes */
>> +    build_append_int_noprefix(table_data, 0, 4);
>> +    /* EFR Register Image */
>> +    build_append_int_noprefix(table_data, s->efr_reg, 8);
>> +    /* EFR Register Image 2 */
>> +    build_append_int_noprefix(table_data, 0, 8);
> 
> 
> here too. what's going on?

Same as the above, the structure of type 0x11 is different from 0x10. At 
offset 20 it is not IOMMU feature reporting but IOMMU attributes. And 
there are 2 more fields: EFR Register Image, EFR Register Image 2 before 
IVHD device entries.

>>   
>>       /* IVHD entries as found above */
>>       g_array_append_vals(table_data, ivhd_blob->data, ivhd_blob->len);
>> diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
>> index bcd016f5c5..5dfa93d945 100644
>> --- a/hw/i386/amd_iommu.c
>> +++ b/hw/i386/amd_iommu.c
>> @@ -31,6 +31,7 @@
>>   #include "hw/i386/apic_internal.h"
>>   #include "trace.h"
>>   #include "hw/i386/apic-msidef.h"
>> +#include "hw/qdev-properties.h"
>>   
>>   /* used AMD-Vi MMIO registers */
>>   const char *amdvi_mmio_low[] = {
>> @@ -1155,7 +1156,12 @@ static int amdvi_int_remap_ga(AMDVIState *iommu,
>>       irq->vector = irte.hi.fields.vector;
>>       irq->dest_mode = irte.lo.fields_remap.dm;
>>       irq->redir_hint = irte.lo.fields_remap.rq_eoi;
>> -    irq->dest = irte.lo.fields_remap.destination;
>> +    if (!iommu->xtsup) {
>> +        irq->dest = irte.lo.fields_remap.destination & 0xff;
>> +    } else {
>> +        irq->dest = irte.lo.fields_remap.destination |
>> +                    (irte.hi.fields.destination_hi << 24);
>> +    }
> 
> Weird way to put it. Why not if (iommu->xtsup) ... ?

Okay, I'll fix this in next version.

>>   
>>       return 0;
>>   }
>> @@ -1503,10 +1509,15 @@ static void amdvi_init(AMDVIState *s)
>>       s->enabled = false;
>>       s->ats_enabled = false;
>>       s->cmdbuf_enabled = false;
>> +    s->efr_reg = AMDVI_DEFAULT_EXT_FEATURES;
>> +
>> +    if (s->xtsup) {
>> +        s->efr_reg |= AMDVI_FEATURE_XT;
>> +    }
>>   
>>       /* reset MMIO */
>>       memset(s->mmior, 0, AMDVI_MMIO_SIZE);
>> -    amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES, AMDVI_EXT_FEATURES,
>> +    amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES, s->efr_reg,
>>               0xffffffffffffffef, 0);
>>       amdvi_set_quad(s, AMDVI_MMIO_STATUS, 0, 0x98, 0x67);
>>   
>> @@ -1586,6 +1597,11 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
>>       amdvi_init(s);
>>   }
>>   
>> +static Property amdvi_properties[] = {
>> +    DEFINE_PROP_BOOL("xtsup", AMDVIState, xtsup, false),
>> +    DEFINE_PROP_END_OF_LIST(),
>> +};
>> +
>>   static const VMStateDescription vmstate_amdvi_sysbus = {
>>       .name = "amd-iommu",
>>       .unmigratable = 1
>> @@ -1612,6 +1628,7 @@ static void amdvi_sysbus_class_init(ObjectClass *klass, void *data)
>>       dc->user_creatable = true;
>>       set_bit(DEVICE_CATEGORY_MISC, dc->categories);
>>       dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device";
>> +    device_class_set_props(dc, amdvi_properties);
>>   }
>>   
>>   static const TypeInfo amdvi_sysbus = {
>> diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
>> index 79d38a3e41..96df7b0400 100644
>> --- a/hw/i386/amd_iommu.h
>> +++ b/hw/i386/amd_iommu.h
>> @@ -154,6 +154,7 @@
>>   
>>   #define AMDVI_FEATURE_PREFETCH            (1ULL << 0) /* page prefetch       */
>>   #define AMDVI_FEATURE_PPR                 (1ULL << 1) /* PPR Support         */
>> +#define AMDVI_FEATURE_XT                  (1ULL << 2) /* x2APIC Support      */
>>   #define AMDVI_FEATURE_GT                  (1ULL << 4) /* Guest Translation   */
>>   #define AMDVI_FEATURE_IA                  (1ULL << 6) /* inval all support   */
>>   #define AMDVI_FEATURE_GA                  (1ULL << 7) /* guest VAPIC support */
>> @@ -173,8 +174,9 @@
>>   #define AMDVI_IOTLB_MAX_SIZE 1024
>>   #define AMDVI_DEVID_SHIFT    36
>>   
>> -/* extended feature support */
>> -#define AMDVI_EXT_FEATURES (AMDVI_FEATURE_PREFETCH | AMDVI_FEATURE_PPR | \
>> +/* default extended feature */
>> +#define AMDVI_DEFAULT_EXT_FEATURES \
>> +        (AMDVI_FEATURE_PREFETCH | AMDVI_FEATURE_PPR | \
>>           AMDVI_FEATURE_IA | AMDVI_FEATURE_GT | AMDVI_FEATURE_HE | \
>>           AMDVI_GATS_MODE | AMDVI_HATS_MODE | AMDVI_FEATURE_GA)
>>   
>> @@ -278,8 +280,8 @@ union irte_ga_lo {
>>                   dm:1,
>>                   /* ------ */
>>                   guest_mode:1,
>> -                destination:8,
>> -                rsvd_1:48;
>> +                destination:24,
>> +                rsvd_1:32;
>>     } fields_remap;
>>   };
>>   
>> @@ -287,7 +289,8 @@ union irte_ga_hi {
>>     uint64_t val;
>>     struct {
>>         uint64_t  vector:8,
>> -                rsvd_2:56;
>> +                rsvd_2:48,
>> +                destination_hi:8;
>>     } fields;
>>   };
>>   
>> @@ -367,6 +370,9 @@ struct AMDVIState {
>>   
>>       /* Interrupt remapping */
>>       bool ga_enabled;
>> +    bool xtsup;
>> +
>> +    uint64_t efr_reg;            /* extended feature register */
>>   };
>>   
>>   #endif
>> -- 
>> 2.25.1
> 

Thanks,
Quang Minh.
Michael S. Tsirkin May 14, 2023, 8:44 p.m. UTC | #3
On Sun, May 14, 2023 at 03:55:11PM +0700, Bui Quang Minh wrote:
> On 5/12/23 21:39, Michael S. Tsirkin wrote:
> > On Tue, Apr 11, 2023 at 09:24:40PM +0700, Bui Quang Minh wrote:
> > > This commit adds XTSup configuration to let user choose to whether enable
> > > this feature or not. When XTSup is enabled, additional bytes in IRTE with
> > > enabled guest virtual VAPIC are used to support 32-bit destination id.
> > > 
> > > Additionally, this commit changes to use IVHD type 0x11 in ACPI table for
> > > feature report to operating system. This is because Linux does not use
> > > XTSup in IOMMU Feature Reporting field of IVHD type 0x10 but only use XTSup
> > > bit in EFR Register Image of IVHD 0x11 to indicate x2APIC support (see
> > > init_iommu_one in linux/drivers/iommu/amd/init.c)
> > > 
> > > Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
> > 
> > I'm concerned that switching to type 11 will break some older guests.
> > It would be better if we could export both type 10 and type 11
> > ivhd. A question however would be how does this interact
> > with older guests. For example:
> > https://lists.linuxfoundation.org/pipermail/iommu/2016-January/015310.html
> > it looks like linux before 2016 only expected one ivhd entry?
> 
> Export both type 0x10 and 0x11 looks reasonable to me. Before the above
> commit, I see that Linux still loops through multiple ivhd but only handles
> one with type 0x10. On newer kernel, it will choose to handle the type that
> appears last corresponding the first devid, which is weird in my opinion.
> +static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs)
> +{
> +	u8 *base = (u8 *)ivrs;
> +	struct ivhd_header *ivhd = (struct ivhd_header *)
> +					(base + IVRS_HEADER_LENGTH);
> +	u8 last_type = ivhd->type;
> +	u16 devid = ivhd->devid;
> +
> +	while (((u8 *)ivhd - base < ivrs->length) &&
> +	       (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) {
> +		u8 *p = (u8 *) ivhd;
> +
> +		if (ivhd->devid == devid)
> +			last_type = ivhd->type;
> +		ivhd = (struct ivhd_header *)(p + ivhd->length);
> +	}
> +
> +	return last_type;
> +}

Yes I don't get the logic here either.
Talk to kernel devs who wrote this?

commit 8c7142f56fedfc6824b5bca56fee1f443e01746b
Author: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Date:   Fri Apr 1 09:05:59 2016 -0400

    iommu/amd: Use the most comprehensive IVHD type that the driver can support
    
    The IVRS in more recent AMD system usually contains multiple
    IVHD block types (e.g. 0x10, 0x11, and 0x40) for each IOMMU.
    The newer IVHD types provide more information (e.g. new features
    specified in the IOMMU spec), while maintain compatibility with
    the older IVHD type.
    
    Having multiple IVHD type allows older IOMMU drivers to still function
    (e.g. using the older IVHD type 0x10) while the newer IOMMU driver can use
    the newer IVHD types (e.g. 0x11 and 0x40). Therefore, the IOMMU driver
    should only make use of the newest IVHD type that it can support.
    
    This patch adds new logic to determine the highest level of IVHD type
    it can support, and use it throughout the to initialize the driver.
    This requires adding another pass to the IVRS parsing to determine
    appropriate IVHD type (see function get_highest_supported_ivhd_type())
    before parsing the contents.
    
    [Vincent: fix the build error of IVHD_DEV_ACPI_HID flag not found]
    
    Signed-off-by: Wan Zongshun <vincent.wan@amd.com>
    Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
    Signed-off-by: Joerg Roedel <jroedel@suse.de>



> 
> So when exposing type 0x10 following by 0x11, old kernel only parses 0x10
> and does not support x2APIC while new kernel parse 0x11 and support x2APIC.
> I will expose both types in the next version.
> 
> > Some research and testing here would be benefitial.
> > Similarly for windows guests.
> > 
> > Thanks!
> > 
> > 
> > 
> > > ---
> > >   hw/i386/acpi-build.c | 28 ++++++++++++++--------------
> > >   hw/i386/amd_iommu.c  | 21 +++++++++++++++++++--
> > >   hw/i386/amd_iommu.h  | 16 +++++++++++-----
> > >   3 files changed, 44 insertions(+), 21 deletions(-)
> > > 
> > > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> > > index ec857a117e..72d6bb2892 100644
> > > --- a/hw/i386/acpi-build.c
> > > +++ b/hw/i386/acpi-build.c
> > > @@ -2339,7 +2339,7 @@ static void
> > >   build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
> > >                   const char *oem_table_id)
> > >   {
> > > -    int ivhd_table_len = 24;
> > > +    int ivhd_table_len = 40;
> > >       AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default());
> > >       GArray *ivhd_blob = g_array_new(false, true, 1);
> > >       AcpiTable table = { .sig = "IVRS", .rev = 1, .oem_id = oem_id,
> > > @@ -2349,18 +2349,19 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
> > >       /* IVinfo - IO virtualization information common to all
> > >        * IOMMU units in a system
> > >        */
> > > -    build_append_int_noprefix(table_data, 40UL << 8/* PASize */, 4);
> > > +    build_append_int_noprefix(table_data,
> > > +                             (1UL << 0) | /* EFRSup */
> > > +                             (40UL << 8), /* PASize */
> > > +                             4);
> > >       /* reserved */
> > >       build_append_int_noprefix(table_data, 0, 8);
> > > -    /* IVHD definition - type 10h */
> > > -    build_append_int_noprefix(table_data, 0x10, 1);
> > > +    /* IVHD definition - type 11h */
> > > +    build_append_int_noprefix(table_data, 0x11, 1);
> > >       /* virtualization flags */
> > >       build_append_int_noprefix(table_data,
> > >                                (1UL << 0) | /* HtTunEn      */
> > > -                             (1UL << 4) | /* iotblSup     */
> > 
> > btw this should have been iotlbsup?
> > 
> > > -                             (1UL << 6) | /* PrefSup      */
> > > -                             (1UL << 7),  /* PPRSup       */
> > > +                             (1UL << 4),  /* iotblSup     */
> > >                                1);
> > >       /*
> > 
> > hmm why are you removing these other flags?
> 
> According to the AMD IOMMU specification, the bit 6, 7 are reserved in type
> 0x11 which are PerfSup, PPRSup respectively in type 0x10 so I remove those
> flags when changing to type 0x11. In type 0x11, these feature are reported
> via the below EFR Register Image I believe.
> 
> > 
> > > @@ -2404,13 +2405,12 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
> > >       build_append_int_noprefix(table_data, 0, 2);
> > >       /* IOMMU info */
> > >       build_append_int_noprefix(table_data, 0, 2);
> > > -    /* IOMMU Feature Reporting */
> > > -    build_append_int_noprefix(table_data,
> > > -                             (48UL << 30) | /* HATS   */
> > > -                             (48UL << 28) | /* GATS   */
> > > -                             (1UL << 2)   | /* GTSup  */
> > > -                             (1UL << 6),    /* GASup  */
> > > -                             4);
> > > +    /* IOMMU Attributes */
> > > +    build_append_int_noprefix(table_data, 0, 4);
> > > +    /* EFR Register Image */
> > > +    build_append_int_noprefix(table_data, s->efr_reg, 8);
> > > +    /* EFR Register Image 2 */
> > > +    build_append_int_noprefix(table_data, 0, 8);
> > 
> > 
> > here too. what's going on?
> 
> Same as the above, the structure of type 0x11 is different from 0x10. At
> offset 20 it is not IOMMU feature reporting but IOMMU attributes. And there
> are 2 more fields: EFR Register Image, EFR Register Image 2 before IVHD
> device entries.
> 
> > >       /* IVHD entries as found above */
> > >       g_array_append_vals(table_data, ivhd_blob->data, ivhd_blob->len);
> > > diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
> > > index bcd016f5c5..5dfa93d945 100644
> > > --- a/hw/i386/amd_iommu.c
> > > +++ b/hw/i386/amd_iommu.c
> > > @@ -31,6 +31,7 @@
> > >   #include "hw/i386/apic_internal.h"
> > >   #include "trace.h"
> > >   #include "hw/i386/apic-msidef.h"
> > > +#include "hw/qdev-properties.h"
> > >   /* used AMD-Vi MMIO registers */
> > >   const char *amdvi_mmio_low[] = {
> > > @@ -1155,7 +1156,12 @@ static int amdvi_int_remap_ga(AMDVIState *iommu,
> > >       irq->vector = irte.hi.fields.vector;
> > >       irq->dest_mode = irte.lo.fields_remap.dm;
> > >       irq->redir_hint = irte.lo.fields_remap.rq_eoi;
> > > -    irq->dest = irte.lo.fields_remap.destination;
> > > +    if (!iommu->xtsup) {
> > > +        irq->dest = irte.lo.fields_remap.destination & 0xff;
> > > +    } else {
> > > +        irq->dest = irte.lo.fields_remap.destination |
> > > +                    (irte.hi.fields.destination_hi << 24);
> > > +    }
> > 
> > Weird way to put it. Why not if (iommu->xtsup) ... ?
> 
> Okay, I'll fix this in next version.
> 
> > >       return 0;
> > >   }
> > > @@ -1503,10 +1509,15 @@ static void amdvi_init(AMDVIState *s)
> > >       s->enabled = false;
> > >       s->ats_enabled = false;
> > >       s->cmdbuf_enabled = false;
> > > +    s->efr_reg = AMDVI_DEFAULT_EXT_FEATURES;
> > > +
> > > +    if (s->xtsup) {
> > > +        s->efr_reg |= AMDVI_FEATURE_XT;
> > > +    }
> > >       /* reset MMIO */
> > >       memset(s->mmior, 0, AMDVI_MMIO_SIZE);
> > > -    amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES, AMDVI_EXT_FEATURES,
> > > +    amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES, s->efr_reg,
> > >               0xffffffffffffffef, 0);
> > >       amdvi_set_quad(s, AMDVI_MMIO_STATUS, 0, 0x98, 0x67);
> > > @@ -1586,6 +1597,11 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
> > >       amdvi_init(s);
> > >   }
> > > +static Property amdvi_properties[] = {
> > > +    DEFINE_PROP_BOOL("xtsup", AMDVIState, xtsup, false),
> > > +    DEFINE_PROP_END_OF_LIST(),
> > > +};
> > > +
> > >   static const VMStateDescription vmstate_amdvi_sysbus = {
> > >       .name = "amd-iommu",
> > >       .unmigratable = 1
> > > @@ -1612,6 +1628,7 @@ static void amdvi_sysbus_class_init(ObjectClass *klass, void *data)
> > >       dc->user_creatable = true;
> > >       set_bit(DEVICE_CATEGORY_MISC, dc->categories);
> > >       dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device";
> > > +    device_class_set_props(dc, amdvi_properties);
> > >   }
> > >   static const TypeInfo amdvi_sysbus = {
> > > diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
> > > index 79d38a3e41..96df7b0400 100644
> > > --- a/hw/i386/amd_iommu.h
> > > +++ b/hw/i386/amd_iommu.h
> > > @@ -154,6 +154,7 @@
> > >   #define AMDVI_FEATURE_PREFETCH            (1ULL << 0) /* page prefetch       */
> > >   #define AMDVI_FEATURE_PPR                 (1ULL << 1) /* PPR Support         */
> > > +#define AMDVI_FEATURE_XT                  (1ULL << 2) /* x2APIC Support      */
> > >   #define AMDVI_FEATURE_GT                  (1ULL << 4) /* Guest Translation   */
> > >   #define AMDVI_FEATURE_IA                  (1ULL << 6) /* inval all support   */
> > >   #define AMDVI_FEATURE_GA                  (1ULL << 7) /* guest VAPIC support */
> > > @@ -173,8 +174,9 @@
> > >   #define AMDVI_IOTLB_MAX_SIZE 1024
> > >   #define AMDVI_DEVID_SHIFT    36
> > > -/* extended feature support */
> > > -#define AMDVI_EXT_FEATURES (AMDVI_FEATURE_PREFETCH | AMDVI_FEATURE_PPR | \
> > > +/* default extended feature */
> > > +#define AMDVI_DEFAULT_EXT_FEATURES \
> > > +        (AMDVI_FEATURE_PREFETCH | AMDVI_FEATURE_PPR | \
> > >           AMDVI_FEATURE_IA | AMDVI_FEATURE_GT | AMDVI_FEATURE_HE | \
> > >           AMDVI_GATS_MODE | AMDVI_HATS_MODE | AMDVI_FEATURE_GA)
> > > @@ -278,8 +280,8 @@ union irte_ga_lo {
> > >                   dm:1,
> > >                   /* ------ */
> > >                   guest_mode:1,
> > > -                destination:8,
> > > -                rsvd_1:48;
> > > +                destination:24,
> > > +                rsvd_1:32;
> > >     } fields_remap;
> > >   };
> > > @@ -287,7 +289,8 @@ union irte_ga_hi {
> > >     uint64_t val;
> > >     struct {
> > >         uint64_t  vector:8,
> > > -                rsvd_2:56;
> > > +                rsvd_2:48,
> > > +                destination_hi:8;
> > >     } fields;
> > >   };
> > > @@ -367,6 +370,9 @@ struct AMDVIState {
> > >       /* Interrupt remapping */
> > >       bool ga_enabled;
> > > +    bool xtsup;
> > > +
> > > +    uint64_t efr_reg;            /* extended feature register */
> > >   };
> > >   #endif
> > > -- 
> > > 2.25.1
> > 
> 
> Thanks,
> Quang Minh.
Bui Quang Minh May 15, 2023, 2:41 p.m. UTC | #4
On 5/15/23 03:44, Michael S. Tsirkin wrote:
> On Sun, May 14, 2023 at 03:55:11PM +0700, Bui Quang Minh wrote:
>> On 5/12/23 21:39, Michael S. Tsirkin wrote:
>>> On Tue, Apr 11, 2023 at 09:24:40PM +0700, Bui Quang Minh wrote:
>>>> This commit adds XTSup configuration to let user choose to whether enable
>>>> this feature or not. When XTSup is enabled, additional bytes in IRTE with
>>>> enabled guest virtual VAPIC are used to support 32-bit destination id.
>>>>
>>>> Additionally, this commit changes to use IVHD type 0x11 in ACPI table for
>>>> feature report to operating system. This is because Linux does not use
>>>> XTSup in IOMMU Feature Reporting field of IVHD type 0x10 but only use XTSup
>>>> bit in EFR Register Image of IVHD 0x11 to indicate x2APIC support (see
>>>> init_iommu_one in linux/drivers/iommu/amd/init.c)
>>>>
>>>> Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
>>>
>>> I'm concerned that switching to type 11 will break some older guests.
>>> It would be better if we could export both type 10 and type 11
>>> ivhd. A question however would be how does this interact
>>> with older guests. For example:
>>> https://lists.linuxfoundation.org/pipermail/iommu/2016-January/015310.html
>>> it looks like linux before 2016 only expected one ivhd entry?
>>
>> Export both type 0x10 and 0x11 looks reasonable to me. Before the above
>> commit, I see that Linux still loops through multiple ivhd but only handles
>> one with type 0x10. On newer kernel, it will choose to handle the type that
>> appears last corresponding the first devid, which is weird in my opinion.
>> +static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs)
>> +{
>> +	u8 *base = (u8 *)ivrs;
>> +	struct ivhd_header *ivhd = (struct ivhd_header *)
>> +					(base + IVRS_HEADER_LENGTH);
>> +	u8 last_type = ivhd->type;
>> +	u16 devid = ivhd->devid;
>> +
>> +	while (((u8 *)ivhd - base < ivrs->length) &&
>> +	       (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) {
>> +		u8 *p = (u8 *) ivhd;
>> +
>> +		if (ivhd->devid == devid)
>> +			last_type = ivhd->type;
>> +		ivhd = (struct ivhd_header *)(p + ivhd->length);
>> +	}
>> +
>> +	return last_type;
>> +}
> 
> Yes I don't get the logic here either.
> Talk to kernel devs who wrote this?
> 
> commit 8c7142f56fedfc6824b5bca56fee1f443e01746b
> Author: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
> Date:   Fri Apr 1 09:05:59 2016 -0400
> 
>      iommu/amd: Use the most comprehensive IVHD type that the driver can support
>      
>      The IVRS in more recent AMD system usually contains multiple
>      IVHD block types (e.g. 0x10, 0x11, and 0x40) for each IOMMU.
>      The newer IVHD types provide more information (e.g. new features
>      specified in the IOMMU spec), while maintain compatibility with
>      the older IVHD type.
>      
>      Having multiple IVHD type allows older IOMMU drivers to still function
>      (e.g. using the older IVHD type 0x10) while the newer IOMMU driver can use
>      the newer IVHD types (e.g. 0x11 and 0x40). Therefore, the IOMMU driver
>      should only make use of the newest IVHD type that it can support.
>      
>      This patch adds new logic to determine the highest level of IVHD type
>      it can support, and use it throughout the to initialize the driver.
>      This requires adding another pass to the IVRS parsing to determine
>      appropriate IVHD type (see function get_highest_supported_ivhd_type())
>      before parsing the contents.
>      
>      [Vincent: fix the build error of IVHD_DEV_ACPI_HID flag not found]
>      
>      Signed-off-by: Wan Zongshun <vincent.wan@amd.com>
>      Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
>      Signed-off-by: Joerg Roedel <jroedel@suse.de>

I've sent a email to talk to kernel developers about this function. Here 
is the link to the email: 
https://lore.kernel.org/all/e8a87c2b-a29a-ccf9-49c6-3cfceaa208bb@gmail.com/
Bui Quang Minh May 22, 2023, 4:59 p.m. UTC | #5
On 5/14/23 15:55, Bui Quang Minh wrote:
> On 5/12/23 21:39, Michael S. Tsirkin wrote:
>> On Tue, Apr 11, 2023 at 09:24:40PM +0700, Bui Quang Minh wrote:
>>> This commit adds XTSup configuration to let user choose to whether 
>>> enable
>>> this feature or not. When XTSup is enabled, additional bytes in IRTE 
>>> with
>>> enabled guest virtual VAPIC are used to support 32-bit destination id.
>>>
>>> Additionally, this commit changes to use IVHD type 0x11 in ACPI table 
>>> for
>>> feature report to operating system. This is because Linux does not use
>>> XTSup in IOMMU Feature Reporting field of IVHD type 0x10 but only use 
>>> XTSup
>>> bit in EFR Register Image of IVHD 0x11 to indicate x2APIC support (see
>>> init_iommu_one in linux/drivers/iommu/amd/init.c)
>>>
>>> Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
>>
>> I'm concerned that switching to type 11 will break some older guests.
>> It would be better if we could export both type 10 and type 11
>> ivhd. A question however would be how does this interact
>> with older guests. For example:
>> https://lists.linuxfoundation.org/pipermail/iommu/2016-January/015310.html
>> it looks like linux before 2016 only expected one ivhd entry?
> 
> Export both type 0x10 and 0x11 looks reasonable to me. Before the above 
> commit, I see that Linux still loops through multiple ivhd but only 
> handles one with type 0x10. On newer kernel, it will choose to handle 
> the type that appears last corresponding the first devid, which is weird 
> in my opinion.
> 
> +static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs)
> +{
> +    u8 *base = (u8 *)ivrs;
> +    struct ivhd_header *ivhd = (struct ivhd_header *)
> +                    (base + IVRS_HEADER_LENGTH);
> +    u8 last_type = ivhd->type;
> +    u16 devid = ivhd->devid;
> +
> +    while (((u8 *)ivhd - base < ivrs->length) &&
> +           (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) {
> +        u8 *p = (u8 *) ivhd;
> +
> +        if (ivhd->devid == devid)
> +            last_type = ivhd->type;
> +        ivhd = (struct ivhd_header *)(p + ivhd->length);
> +    }
> +
> +    return last_type;
> +}
> 
> So when exposing type 0x10 following by 0x11, old kernel only parses 
> 0x10 and does not support x2APIC while new kernel parse 0x11 and support 
> x2APIC. I will expose both types in the next version.
> 
>> Some research and testing here would be benefitial.
>> Similarly for windows guests.
>>
>> Thanks!
>>
>>
>>
>>> ---
>>>   hw/i386/acpi-build.c | 28 ++++++++++++++--------------
>>>   hw/i386/amd_iommu.c  | 21 +++++++++++++++++++--
>>>   hw/i386/amd_iommu.h  | 16 +++++++++++-----
>>>   3 files changed, 44 insertions(+), 21 deletions(-)
>>>
>>> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
>>> index ec857a117e..72d6bb2892 100644
>>> --- a/hw/i386/acpi-build.c
>>> +++ b/hw/i386/acpi-build.c
>>> @@ -2339,7 +2339,7 @@ static void
>>>   build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char 
>>> *oem_id,
>>>                   const char *oem_table_id)
>>>   {
>>> -    int ivhd_table_len = 24;
>>> +    int ivhd_table_len = 40;
>>>       AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default());
>>>       GArray *ivhd_blob = g_array_new(false, true, 1);
>>>       AcpiTable table = { .sig = "IVRS", .rev = 1, .oem_id = oem_id,
>>> @@ -2349,18 +2349,19 @@ build_amd_iommu(GArray *table_data, 
>>> BIOSLinker *linker, const char *oem_id,
>>>       /* IVinfo - IO virtualization information common to all
>>>        * IOMMU units in a system
>>>        */
>>> -    build_append_int_noprefix(table_data, 40UL << 8/* PASize */, 4);
>>> +    build_append_int_noprefix(table_data,
>>> +                             (1UL << 0) | /* EFRSup */
>>> +                             (40UL << 8), /* PASize */
>>> +                             4);
>>>       /* reserved */
>>>       build_append_int_noprefix(table_data, 0, 8);
>>> -    /* IVHD definition - type 10h */
>>> -    build_append_int_noprefix(table_data, 0x10, 1);
>>> +    /* IVHD definition - type 11h */
>>> +    build_append_int_noprefix(table_data, 0x11, 1);
>>>       /* virtualization flags */
>>>       build_append_int_noprefix(table_data,
>>>                                (1UL << 0) | /* HtTunEn      */
>>> -                             (1UL << 4) | /* iotblSup     */
>>
>> btw this should have been iotlbsup?
>>
>>> -                             (1UL << 6) | /* PrefSup      */
>>> -                             (1UL << 7),  /* PPRSup       */
>>> +                             (1UL << 4),  /* iotblSup     */
>>>                                1);
>>>       /*
>>
>> hmm why are you removing these other flags?
> 
> According to the AMD IOMMU specification, the bit 6, 7 are reserved in 
> type 0x11 which are PerfSup, PPRSup respectively in type 0x10 so I 
> remove those flags when changing to type 0x11. In type 0x11, these 
> feature are reported via the below EFR Register Image I believe.
> 
>>
>>> @@ -2404,13 +2405,12 @@ build_amd_iommu(GArray *table_data, 
>>> BIOSLinker *linker, const char *oem_id,
>>>       build_append_int_noprefix(table_data, 0, 2);
>>>       /* IOMMU info */
>>>       build_append_int_noprefix(table_data, 0, 2);
>>> -    /* IOMMU Feature Reporting */
>>> -    build_append_int_noprefix(table_data,
>>> -                             (48UL << 30) | /* HATS   */
>>> -                             (48UL << 28) | /* GATS   */
>>> -                             (1UL << 2)   | /* GTSup  */
>>> -                             (1UL << 6),    /* GASup  */
>>> -                             4);
>>> +    /* IOMMU Attributes */
>>> +    build_append_int_noprefix(table_data, 0, 4);
>>> +    /* EFR Register Image */
>>> +    build_append_int_noprefix(table_data, s->efr_reg, 8);
>>> +    /* EFR Register Image 2 */
>>> +    build_append_int_noprefix(table_data, 0, 8);
>>
>>
>> here too. what's going on?
> 
> Same as the above, the structure of type 0x11 is different from 0x10. At 
> offset 20 it is not IOMMU feature reporting but IOMMU attributes. And 
> there are 2 more fields: EFR Register Image, EFR Register Image 2 before 
> IVHD device entries.
> 
>>>       /* IVHD entries as found above */
>>>       g_array_append_vals(table_data, ivhd_blob->data, ivhd_blob->len);
>>> diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
>>> index bcd016f5c5..5dfa93d945 100644
>>> --- a/hw/i386/amd_iommu.c
>>> +++ b/hw/i386/amd_iommu.c
>>> @@ -31,6 +31,7 @@
>>>   #include "hw/i386/apic_internal.h"
>>>   #include "trace.h"
>>>   #include "hw/i386/apic-msidef.h"
>>> +#include "hw/qdev-properties.h"
>>>   /* used AMD-Vi MMIO registers */
>>>   const char *amdvi_mmio_low[] = {
>>> @@ -1155,7 +1156,12 @@ static int amdvi_int_remap_ga(AMDVIState *iommu,
>>>       irq->vector = irte.hi.fields.vector;
>>>       irq->dest_mode = irte.lo.fields_remap.dm;
>>>       irq->redir_hint = irte.lo.fields_remap.rq_eoi;
>>> -    irq->dest = irte.lo.fields_remap.destination;
>>> +    if (!iommu->xtsup) {
>>> +        irq->dest = irte.lo.fields_remap.destination & 0xff;
>>> +    } else {
>>> +        irq->dest = irte.lo.fields_remap.destination |
>>> +                    (irte.hi.fields.destination_hi << 24);
>>> +    }
>>
>> Weird way to put it. Why not if (iommu->xtsup) ... ?
> 
> Okay, I'll fix this in next version.
> 
>>>       return 0;
>>>   }
>>> @@ -1503,10 +1509,15 @@ static void amdvi_init(AMDVIState *s)
>>>       s->enabled = false;
>>>       s->ats_enabled = false;
>>>       s->cmdbuf_enabled = false;
>>> +    s->efr_reg = AMDVI_DEFAULT_EXT_FEATURES;
>>> +
>>> +    if (s->xtsup) {
>>> +        s->efr_reg |= AMDVI_FEATURE_XT;
>>> +    }
>>>       /* reset MMIO */
>>>       memset(s->mmior, 0, AMDVI_MMIO_SIZE);
>>> -    amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES, AMDVI_EXT_FEATURES,
>>> +    amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES, s->efr_reg,
>>>               0xffffffffffffffef, 0);
>>>       amdvi_set_quad(s, AMDVI_MMIO_STATUS, 0, 0x98, 0x67);
>>> @@ -1586,6 +1597,11 @@ static void amdvi_sysbus_realize(DeviceState 
>>> *dev, Error **errp)
>>>       amdvi_init(s);
>>>   }
>>> +static Property amdvi_properties[] = {
>>> +    DEFINE_PROP_BOOL("xtsup", AMDVIState, xtsup, false),
>>> +    DEFINE_PROP_END_OF_LIST(),
>>> +};
>>> +
>>>   static const VMStateDescription vmstate_amdvi_sysbus = {
>>>       .name = "amd-iommu",
>>>       .unmigratable = 1
>>> @@ -1612,6 +1628,7 @@ static void amdvi_sysbus_class_init(ObjectClass 
>>> *klass, void *data)
>>>       dc->user_creatable = true;
>>>       set_bit(DEVICE_CATEGORY_MISC, dc->categories);
>>>       dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device";
>>> +    device_class_set_props(dc, amdvi_properties);
>>>   }
>>>   static const TypeInfo amdvi_sysbus = {
>>> diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
>>> index 79d38a3e41..96df7b0400 100644
>>> --- a/hw/i386/amd_iommu.h
>>> +++ b/hw/i386/amd_iommu.h
>>> @@ -154,6 +154,7 @@
>>>   #define AMDVI_FEATURE_PREFETCH            (1ULL << 0) /* page 
>>> prefetch       */
>>>   #define AMDVI_FEATURE_PPR                 (1ULL << 1) /* PPR 
>>> Support         */
>>> +#define AMDVI_FEATURE_XT                  (1ULL << 2) /* x2APIC 
>>> Support      */
>>>   #define AMDVI_FEATURE_GT                  (1ULL << 4) /* Guest 
>>> Translation   */
>>>   #define AMDVI_FEATURE_IA                  (1ULL << 6) /* inval all 
>>> support   */
>>>   #define AMDVI_FEATURE_GA                  (1ULL << 7) /* guest 
>>> VAPIC support */
>>> @@ -173,8 +174,9 @@
>>>   #define AMDVI_IOTLB_MAX_SIZE 1024
>>>   #define AMDVI_DEVID_SHIFT    36
>>> -/* extended feature support */
>>> -#define AMDVI_EXT_FEATURES (AMDVI_FEATURE_PREFETCH | 
>>> AMDVI_FEATURE_PPR | \
>>> +/* default extended feature */
>>> +#define AMDVI_DEFAULT_EXT_FEATURES \
>>> +        (AMDVI_FEATURE_PREFETCH | AMDVI_FEATURE_PPR | \
>>>           AMDVI_FEATURE_IA | AMDVI_FEATURE_GT | AMDVI_FEATURE_HE | \
>>>           AMDVI_GATS_MODE | AMDVI_HATS_MODE | AMDVI_FEATURE_GA)
>>> @@ -278,8 +280,8 @@ union irte_ga_lo {
>>>                   dm:1,
>>>                   /* ------ */
>>>                   guest_mode:1,
>>> -                destination:8,
>>> -                rsvd_1:48;
>>> +                destination:24,
>>> +                rsvd_1:32;
>>>     } fields_remap;
>>>   };
>>> @@ -287,7 +289,8 @@ union irte_ga_hi {
>>>     uint64_t val;
>>>     struct {
>>>         uint64_t  vector:8,
>>> -                rsvd_2:56;
>>> +                rsvd_2:48,
>>> +                destination_hi:8;
>>>     } fields;
>>>   };
>>> @@ -367,6 +370,9 @@ struct AMDVIState {
>>>       /* Interrupt remapping */
>>>       bool ga_enabled;
>>> +    bool xtsup;
>>> +
>>> +    uint64_t efr_reg;            /* extended feature register */
>>>   };
>>>   #endif
>>> -- 
>>> 2.25.1

I have posted version 4 to export both IVHD types and flip the 
iommu->xtsup for readability.

I have tested with old Linux before this commit 
8c7142f56fedfc6824b5bca56fee1f443e01746b (iommu/amd: Use the most 
comprehensive IVHD type that the driver can support) (Linux 4.6-rc2) 
that does not support IVHD type 0x11, it can still detect AMD IOMMU 
device via IVHD 0x10 but cannot detect x2APIC support. With newer Linux, 
it successfully detects x2APIC support.

With Windows, I don't have much experience, I have tried to get the 
driver for reverse engineer but looks like the driver can be installed 
via the installer only. And I don't have the device so installer aborts 
the driver installation.

Thanks,
Quang Minh.
diff mbox series

Patch

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index ec857a117e..72d6bb2892 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2339,7 +2339,7 @@  static void
 build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
                 const char *oem_table_id)
 {
-    int ivhd_table_len = 24;
+    int ivhd_table_len = 40;
     AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default());
     GArray *ivhd_blob = g_array_new(false, true, 1);
     AcpiTable table = { .sig = "IVRS", .rev = 1, .oem_id = oem_id,
@@ -2349,18 +2349,19 @@  build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
     /* IVinfo - IO virtualization information common to all
      * IOMMU units in a system
      */
-    build_append_int_noprefix(table_data, 40UL << 8/* PASize */, 4);
+    build_append_int_noprefix(table_data,
+                             (1UL << 0) | /* EFRSup */
+                             (40UL << 8), /* PASize */
+                             4);
     /* reserved */
     build_append_int_noprefix(table_data, 0, 8);
 
-    /* IVHD definition - type 10h */
-    build_append_int_noprefix(table_data, 0x10, 1);
+    /* IVHD definition - type 11h */
+    build_append_int_noprefix(table_data, 0x11, 1);
     /* virtualization flags */
     build_append_int_noprefix(table_data,
                              (1UL << 0) | /* HtTunEn      */
-                             (1UL << 4) | /* iotblSup     */
-                             (1UL << 6) | /* PrefSup      */
-                             (1UL << 7),  /* PPRSup       */
+                             (1UL << 4),  /* iotblSup     */
                              1);
 
     /*
@@ -2404,13 +2405,12 @@  build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
     build_append_int_noprefix(table_data, 0, 2);
     /* IOMMU info */
     build_append_int_noprefix(table_data, 0, 2);
-    /* IOMMU Feature Reporting */
-    build_append_int_noprefix(table_data,
-                             (48UL << 30) | /* HATS   */
-                             (48UL << 28) | /* GATS   */
-                             (1UL << 2)   | /* GTSup  */
-                             (1UL << 6),    /* GASup  */
-                             4);
+    /* IOMMU Attributes */
+    build_append_int_noprefix(table_data, 0, 4);
+    /* EFR Register Image */
+    build_append_int_noprefix(table_data, s->efr_reg, 8);
+    /* EFR Register Image 2 */
+    build_append_int_noprefix(table_data, 0, 8);
 
     /* IVHD entries as found above */
     g_array_append_vals(table_data, ivhd_blob->data, ivhd_blob->len);
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index bcd016f5c5..5dfa93d945 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -31,6 +31,7 @@ 
 #include "hw/i386/apic_internal.h"
 #include "trace.h"
 #include "hw/i386/apic-msidef.h"
+#include "hw/qdev-properties.h"
 
 /* used AMD-Vi MMIO registers */
 const char *amdvi_mmio_low[] = {
@@ -1155,7 +1156,12 @@  static int amdvi_int_remap_ga(AMDVIState *iommu,
     irq->vector = irte.hi.fields.vector;
     irq->dest_mode = irte.lo.fields_remap.dm;
     irq->redir_hint = irte.lo.fields_remap.rq_eoi;
-    irq->dest = irte.lo.fields_remap.destination;
+    if (!iommu->xtsup) {
+        irq->dest = irte.lo.fields_remap.destination & 0xff;
+    } else {
+        irq->dest = irte.lo.fields_remap.destination |
+                    (irte.hi.fields.destination_hi << 24);
+    }
 
     return 0;
 }
@@ -1503,10 +1509,15 @@  static void amdvi_init(AMDVIState *s)
     s->enabled = false;
     s->ats_enabled = false;
     s->cmdbuf_enabled = false;
+    s->efr_reg = AMDVI_DEFAULT_EXT_FEATURES;
+
+    if (s->xtsup) {
+        s->efr_reg |= AMDVI_FEATURE_XT;
+    }
 
     /* reset MMIO */
     memset(s->mmior, 0, AMDVI_MMIO_SIZE);
-    amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES, AMDVI_EXT_FEATURES,
+    amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES, s->efr_reg,
             0xffffffffffffffef, 0);
     amdvi_set_quad(s, AMDVI_MMIO_STATUS, 0, 0x98, 0x67);
 
@@ -1586,6 +1597,11 @@  static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
     amdvi_init(s);
 }
 
+static Property amdvi_properties[] = {
+    DEFINE_PROP_BOOL("xtsup", AMDVIState, xtsup, false),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static const VMStateDescription vmstate_amdvi_sysbus = {
     .name = "amd-iommu",
     .unmigratable = 1
@@ -1612,6 +1628,7 @@  static void amdvi_sysbus_class_init(ObjectClass *klass, void *data)
     dc->user_creatable = true;
     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
     dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device";
+    device_class_set_props(dc, amdvi_properties);
 }
 
 static const TypeInfo amdvi_sysbus = {
diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index 79d38a3e41..96df7b0400 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -154,6 +154,7 @@ 
 
 #define AMDVI_FEATURE_PREFETCH            (1ULL << 0) /* page prefetch       */
 #define AMDVI_FEATURE_PPR                 (1ULL << 1) /* PPR Support         */
+#define AMDVI_FEATURE_XT                  (1ULL << 2) /* x2APIC Support      */
 #define AMDVI_FEATURE_GT                  (1ULL << 4) /* Guest Translation   */
 #define AMDVI_FEATURE_IA                  (1ULL << 6) /* inval all support   */
 #define AMDVI_FEATURE_GA                  (1ULL << 7) /* guest VAPIC support */
@@ -173,8 +174,9 @@ 
 #define AMDVI_IOTLB_MAX_SIZE 1024
 #define AMDVI_DEVID_SHIFT    36
 
-/* extended feature support */
-#define AMDVI_EXT_FEATURES (AMDVI_FEATURE_PREFETCH | AMDVI_FEATURE_PPR | \
+/* default extended feature */
+#define AMDVI_DEFAULT_EXT_FEATURES \
+        (AMDVI_FEATURE_PREFETCH | AMDVI_FEATURE_PPR | \
         AMDVI_FEATURE_IA | AMDVI_FEATURE_GT | AMDVI_FEATURE_HE | \
         AMDVI_GATS_MODE | AMDVI_HATS_MODE | AMDVI_FEATURE_GA)
 
@@ -278,8 +280,8 @@  union irte_ga_lo {
                 dm:1,
                 /* ------ */
                 guest_mode:1,
-                destination:8,
-                rsvd_1:48;
+                destination:24,
+                rsvd_1:32;
   } fields_remap;
 };
 
@@ -287,7 +289,8 @@  union irte_ga_hi {
   uint64_t val;
   struct {
       uint64_t  vector:8,
-                rsvd_2:56;
+                rsvd_2:48,
+                destination_hi:8;
   } fields;
 };
 
@@ -367,6 +370,9 @@  struct AMDVIState {
 
     /* Interrupt remapping */
     bool ga_enabled;
+    bool xtsup;
+
+    uint64_t efr_reg;            /* extended feature register */
 };
 
 #endif