diff mbox series

[v2,2/3] hw/acpi: Implement the SRAT GI affinity structure

Message ID 20231007201740.30335-3-ankita@nvidia.com
State New
Headers show
Series [v2,1/3] qom: new object to associate device to numa node | expand

Commit Message

Ankit Agrawal Oct. 7, 2023, 8:17 p.m. UTC
From: Ankit Agrawal <ankita@nvidia.com>

ACPI spec provides a scheme to associate "Generic Initiators" [1]
(e.g. heterogeneous processors and accelerators, GPUs, and I/O devices with
integrated compute or DMA engines GPUs) with Proximity Domains. This is
achieved using Generic Initiator Affinity Structure in SRAT. During bootup,
Linux kernel parse the ACPI SRAT to determine the PXM ids and create a NUMA
node for each unique PXM ID encountered. Qemu currently do not implement
these structures while building SRAT.

Add GI structures while building VM ACPI SRAT. The association between
devices and PXM are stored using acpi-generic-initiator object. Lookup
presence of all such objects and use them to build these structures.

The structure needs a PCI device handle [2] that consists of the device BDF.
The vfio-pci-nohotplug device corresponding to the acpi-generic-initiator
object is located to determine the BDF.

[1] ACPI Spec 6.5, Section 5.2.16.6
[2] ACPI Spec 6.5, Table 5.66

Signed-off-by: Ankit Agrawal <ankita@nvidia.com>
---
 hw/acpi/acpi-generic-initiator.c         | 78 ++++++++++++++++++++++++
 hw/arm/virt-acpi-build.c                 |  3 +
 hw/vfio/pci.c                            |  2 -
 hw/vfio/pci.h                            |  2 +
 include/hw/acpi/acpi-generic-initiator.h | 22 +++++++
 5 files changed, 105 insertions(+), 2 deletions(-)

Comments

Alex Williamson Oct. 9, 2023, 9:16 p.m. UTC | #1
On Sun, 8 Oct 2023 01:47:39 +0530
<ankita@nvidia.com> wrote:

> From: Ankit Agrawal <ankita@nvidia.com>
> 
> ACPI spec provides a scheme to associate "Generic Initiators" [1]
> (e.g. heterogeneous processors and accelerators, GPUs, and I/O devices with
> integrated compute or DMA engines GPUs) with Proximity Domains. This is
> achieved using Generic Initiator Affinity Structure in SRAT. During bootup,
> Linux kernel parse the ACPI SRAT to determine the PXM ids and create a NUMA
> node for each unique PXM ID encountered. Qemu currently do not implement
> these structures while building SRAT.
> 
> Add GI structures while building VM ACPI SRAT. The association between
> devices and PXM are stored using acpi-generic-initiator object. Lookup
> presence of all such objects and use them to build these structures.
> 
> The structure needs a PCI device handle [2] that consists of the device BDF.
> The vfio-pci-nohotplug device corresponding to the acpi-generic-initiator
> object is located to determine the BDF.
> 
> [1] ACPI Spec 6.5, Section 5.2.16.6
> [2] ACPI Spec 6.5, Table 5.66
> 
> Signed-off-by: Ankit Agrawal <ankita@nvidia.com>
> ---
>  hw/acpi/acpi-generic-initiator.c         | 78 ++++++++++++++++++++++++
>  hw/arm/virt-acpi-build.c                 |  3 +
>  hw/vfio/pci.c                            |  2 -
>  hw/vfio/pci.h                            |  2 +
>  include/hw/acpi/acpi-generic-initiator.h | 22 +++++++
>  5 files changed, 105 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/acpi/acpi-generic-initiator.c b/hw/acpi/acpi-generic-initiator.c
> index 6406736090..1ae79639be 100644
> --- a/hw/acpi/acpi-generic-initiator.c
> +++ b/hw/acpi/acpi-generic-initiator.c
> @@ -72,3 +72,81 @@ static void acpi_generic_initiator_class_init(ObjectClass *oc, void *data)
>                                NULL, acpi_generic_initiator_set_node, NULL,
>                                NULL);
>  }
> +
> +static int acpi_generic_initiator_list(Object *obj, void *opaque)
> +{
> +    GSList **list = opaque;
> +
> +    if (object_dynamic_cast(obj, TYPE_ACPI_GENERIC_INITIATOR)) {
> +        *list = g_slist_append(*list, ACPI_GENERIC_INITIATOR(obj));
> +    }
> +
> +    object_child_foreach(obj, acpi_generic_initiator_list, opaque);
> +    return 0;
> +}
> +
> +/*
> + * Identify Generic Initiator objects and link them into the list which is
> + * returned to the caller.
> + *
> + * Note: it is the caller's responsibility to free the list to avoid
> + * memory leak.
> + */
> +static GSList *acpi_generic_initiator_get_list(void)
> +{
> +    GSList *list = NULL;
> +
> +    object_child_foreach(object_get_root(), acpi_generic_initiator_list, &list);
> +    return list;
> +}
> +
> +/*
> + * ACPI spec, Revision 6.5
> + * 5.2.16.6 Generic Initiator Affinity Structure
> + */
> +static void build_srat_generic_initiator_affinity(GArray *table_data, int node,
> +                                                  PCIDeviceHandle *handle,
> +                                                  GenericAffinityFlags flags)
> +{
> +    build_append_int_noprefix(table_data, 5, 1);     /* Type */
> +    build_append_int_noprefix(table_data, 32, 1);    /* Length */
> +    build_append_int_noprefix(table_data, 0, 1);     /* Reserved */
> +    build_append_int_noprefix(table_data, 1, 1);     /* Device Handle Type */
> +    build_append_int_noprefix(table_data, node, 4);  /* Proximity Domain */
> +    build_append_int_noprefix(table_data, handle->segment, 2);
> +    build_append_int_noprefix(table_data, handle->bdf, 2);
> +    build_append_int_noprefix(table_data, handle->res0, 4);
> +    build_append_int_noprefix(table_data, handle->res1, 8);

Why are we storing reserved fields in the PCIDeviceHandle?  This
function is already specific to building a PCI Device Handle, so we
could just loop build_append_byte() with a fixed zero value here.

> +    build_append_int_noprefix(table_data, flags, 4); /* Flags */
> +    build_append_int_noprefix(table_data, 0, 4);     /* Reserved */
> +}
> +
> +void build_srat_generic_initiator(GArray *table_data)
> +{
> +    GSList *gi_list, *list = acpi_generic_initiator_get_list();
> +    for (gi_list = list; gi_list; gi_list = gi_list->next) {
> +        AcpiGenericInitiator *gi = gi_list->data;
> +        Object *o;
> +        int count;
> +
> +        if (gi->node == MAX_NODES) {
> +            continue;
> +        }

Why do we have uninitialized AcpiGenericInitiator objects lingering?

> +
> +        o = object_resolve_path_type(gi->device, TYPE_VFIO_PCI_NOHOTPLUG, NULL);

TYPE_PCI_DEVICE?  Maybe you could check hotpluggable from the device
class, but certainly the generic code should not be dependent on being
a vfio-pci-nohotplug device.  The spec also supports an ACPI object
description, so should this be build_srat_generic_pci_initiator()?


> +        if (!o) {
> +            continue;
> +        }
> +
> +        for (count = 0; count < gi->node_count; count++) {
> +            PCIDeviceHandle dev_handle = {0};
> +            PCIDevice *pci_dev = PCI_DEVICE(o);
> +
> +            dev_handle.bdf = pci_dev->devfn;

Where does the bus part of the bdf get filled in?

> +            build_srat_generic_initiator_affinity(table_data,
> +                                                  gi->node + count, &dev_handle,
> +                                                  GEN_AFFINITY_ENABLED);

Seems like the code that built the AcpiGenericInitiator object should
supply the flags.  In fact the flag GEN_AFFINITY_ENABLED might be a
better indicator to populate the SRAT with the GI than the node value.
Thanks,

Alex

> +        }
> +    }
> +    g_slist_free(list);
> +}
> diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
> index 6b674231c2..7337d8076b 100644
> --- a/hw/arm/virt-acpi-build.c
> +++ b/hw/arm/virt-acpi-build.c
> @@ -58,6 +58,7 @@
>  #include "migration/vmstate.h"
>  #include "hw/acpi/ghes.h"
>  #include "hw/acpi/viot.h"
> +#include "hw/acpi/acpi-generic-initiator.h"
>  
>  #define ARM_SPI_BASE 32
>  
> @@ -558,6 +559,8 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
>          }
>      }
>  
> +    build_srat_generic_initiator(table_data);
> +
>      if (ms->nvdimms_state->is_enabled) {
>          nvdimm_build_srat(table_data);
>      }
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index a205c6b113..5e2a7c650a 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -43,8 +43,6 @@
>  #include "migration/blocker.h"
>  #include "migration/qemu-file.h"
>  
> -#define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
> -
>  /* Protected by BQL */
>  static KVMRouteChange vfio_route_change;
>  
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index a2771b9ff3..74ac77a260 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -118,6 +118,8 @@ typedef struct VFIOMSIXInfo {
>  #define TYPE_VFIO_PCI "vfio-pci"
>  OBJECT_DECLARE_SIMPLE_TYPE(VFIOPCIDevice, VFIO_PCI)
>  
> +#define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
> +
>  struct VFIOPCIDevice {
>      PCIDevice pdev;
>      VFIODevice vbasedev;
> diff --git a/include/hw/acpi/acpi-generic-initiator.h b/include/hw/acpi/acpi-generic-initiator.h
> index e67e6e23b1..e8e2670309 100644
> --- a/include/hw/acpi/acpi-generic-initiator.h
> +++ b/include/hw/acpi/acpi-generic-initiator.h
> @@ -27,4 +27,26 @@ typedef struct AcpiGenericInitiatorClass {
>          ObjectClass parent_class;
>  } AcpiGenericInitiatorClass;
>  
> +/*
> + * ACPI 6.5: Table 5-68 Flags - Generic Initiator
> + */
> +typedef enum {
> +    GEN_AFFINITY_NOFLAGS = 0,
> +    GEN_AFFINITY_ENABLED = (1 << 0),
> +    GEN_AFFINITY_ARCH_TRANS = (1 << 1),
> +} GenericAffinityFlags;
> +
> +/*
> + * ACPI 6.5: Table 5-66 Device Handle - PCI
> + * Device Handle definition
> + */
> +typedef struct PCIDeviceHandle {
> +    uint16_t segment;
> +    uint16_t bdf;
> +    uint32_t res0;
> +    uint64_t res1;
> +} PCIDeviceHandle;
> +
> +void build_srat_generic_initiator(GArray *table_data);
> +
>  #endif
Ankit Agrawal Oct. 17, 2023, 1:51 p.m. UTC | #2
>> +static void build_srat_generic_initiator_affinity(GArray *table_data, int node,
>> +                                                  PCIDeviceHandle *handle,
>> +                                                  GenericAffinityFlags flags)
>> +{
>> +    build_append_int_noprefix(table_data, 5, 1);     /* Type */
>> +    build_append_int_noprefix(table_data, 32, 1);    /* Length */
>> +    build_append_int_noprefix(table_data, 0, 1);     /* Reserved */
>> +    build_append_int_noprefix(table_data, 1, 1);     /* Device Handle Type */
>> +    build_append_int_noprefix(table_data, node, 4);  /* Proximity Domain */
>> +    build_append_int_noprefix(table_data, handle->segment, 2);
>> +    build_append_int_noprefix(table_data, handle->bdf, 2);
>> +    build_append_int_noprefix(table_data, handle->res0, 4);
>> +    build_append_int_noprefix(table_data, handle->res1, 8);
>
> Why are we storing reserved fields in the PCIDeviceHandle?  This
> function is already specific to building a PCI Device Handle, so we
> could just loop build_append_byte() with a fixed zero value here.

Good point, will make the change.


>> +void build_srat_generic_initiator(GArray *table_data)
>> +{
>> +    GSList *gi_list, *list = acpi_generic_initiator_get_list();
>> +    for (gi_list = list; gi_list; gi_list = gi_list->next) {
>> +        AcpiGenericInitiator *gi = gi_list->data;
>> +        Object *o;
>> +        int count;
>> +
>> +        if (gi->node == MAX_NODES) {
>> +            continue;
>> +        }
>
> Why do we have uninitialized AcpiGenericInitiator objects lingering?

Right, we don't need the check.

>> +
>> +        o = object_resolve_path_type(gi->device, TYPE_VFIO_PCI_NOHOTPLUG, NULL);
>
> TYPE_PCI_DEVICE?  Maybe you could check hotpluggable from the device
> class, but certainly the generic code should not be dependent on being
> a vfio-pci-nohotplug device.  

Understood.

> The spec also supports an ACPI object
> description, so should this be build_srat_generic_pci_initiator()?

Sure, makes sense.

>> +        if (!o) {
>> +            continue;
>> +        }
>> +
>> +        for (count = 0; count < gi->node_count; count++) {
>> +            PCIDeviceHandle dev_handle = {0};
>> +            PCIDevice *pci_dev = PCI_DEVICE(o);
>> +
>> +            dev_handle.bdf = pci_dev->devfn;
>
> Where does the bus part of the bdf get filled in?

Good catch, should have code to added the bus.

>> +            build_srat_generic_initiator_affinity(table_data,
>> +                                                  gi->node + count, &dev_handle,
>> +                                                  GEN_AFFINITY_ENABLED);
>
> Seems like the code that built the AcpiGenericInitiator object should
> supply the flags.  In fact the flag GEN_AFFINITY_ENABLED might be a
> better indicator to populate the SRAT with the GI than the node value.

Yeah, sure.
diff mbox series

Patch

diff --git a/hw/acpi/acpi-generic-initiator.c b/hw/acpi/acpi-generic-initiator.c
index 6406736090..1ae79639be 100644
--- a/hw/acpi/acpi-generic-initiator.c
+++ b/hw/acpi/acpi-generic-initiator.c
@@ -72,3 +72,81 @@  static void acpi_generic_initiator_class_init(ObjectClass *oc, void *data)
                               NULL, acpi_generic_initiator_set_node, NULL,
                               NULL);
 }
+
+static int acpi_generic_initiator_list(Object *obj, void *opaque)
+{
+    GSList **list = opaque;
+
+    if (object_dynamic_cast(obj, TYPE_ACPI_GENERIC_INITIATOR)) {
+        *list = g_slist_append(*list, ACPI_GENERIC_INITIATOR(obj));
+    }
+
+    object_child_foreach(obj, acpi_generic_initiator_list, opaque);
+    return 0;
+}
+
+/*
+ * Identify Generic Initiator objects and link them into the list which is
+ * returned to the caller.
+ *
+ * Note: it is the caller's responsibility to free the list to avoid
+ * memory leak.
+ */
+static GSList *acpi_generic_initiator_get_list(void)
+{
+    GSList *list = NULL;
+
+    object_child_foreach(object_get_root(), acpi_generic_initiator_list, &list);
+    return list;
+}
+
+/*
+ * ACPI spec, Revision 6.5
+ * 5.2.16.6 Generic Initiator Affinity Structure
+ */
+static void build_srat_generic_initiator_affinity(GArray *table_data, int node,
+                                                  PCIDeviceHandle *handle,
+                                                  GenericAffinityFlags flags)
+{
+    build_append_int_noprefix(table_data, 5, 1);     /* Type */
+    build_append_int_noprefix(table_data, 32, 1);    /* Length */
+    build_append_int_noprefix(table_data, 0, 1);     /* Reserved */
+    build_append_int_noprefix(table_data, 1, 1);     /* Device Handle Type */
+    build_append_int_noprefix(table_data, node, 4);  /* Proximity Domain */
+    build_append_int_noprefix(table_data, handle->segment, 2);
+    build_append_int_noprefix(table_data, handle->bdf, 2);
+    build_append_int_noprefix(table_data, handle->res0, 4);
+    build_append_int_noprefix(table_data, handle->res1, 8);
+    build_append_int_noprefix(table_data, flags, 4); /* Flags */
+    build_append_int_noprefix(table_data, 0, 4);     /* Reserved */
+}
+
+void build_srat_generic_initiator(GArray *table_data)
+{
+    GSList *gi_list, *list = acpi_generic_initiator_get_list();
+    for (gi_list = list; gi_list; gi_list = gi_list->next) {
+        AcpiGenericInitiator *gi = gi_list->data;
+        Object *o;
+        int count;
+
+        if (gi->node == MAX_NODES) {
+            continue;
+        }
+
+        o = object_resolve_path_type(gi->device, TYPE_VFIO_PCI_NOHOTPLUG, NULL);
+        if (!o) {
+            continue;
+        }
+
+        for (count = 0; count < gi->node_count; count++) {
+            PCIDeviceHandle dev_handle = {0};
+            PCIDevice *pci_dev = PCI_DEVICE(o);
+
+            dev_handle.bdf = pci_dev->devfn;
+            build_srat_generic_initiator_affinity(table_data,
+                                                  gi->node + count, &dev_handle,
+                                                  GEN_AFFINITY_ENABLED);
+        }
+    }
+    g_slist_free(list);
+}
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 6b674231c2..7337d8076b 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -58,6 +58,7 @@ 
 #include "migration/vmstate.h"
 #include "hw/acpi/ghes.h"
 #include "hw/acpi/viot.h"
+#include "hw/acpi/acpi-generic-initiator.h"
 
 #define ARM_SPI_BASE 32
 
@@ -558,6 +559,8 @@  build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
         }
     }
 
+    build_srat_generic_initiator(table_data);
+
     if (ms->nvdimms_state->is_enabled) {
         nvdimm_build_srat(table_data);
     }
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index a205c6b113..5e2a7c650a 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -43,8 +43,6 @@ 
 #include "migration/blocker.h"
 #include "migration/qemu-file.h"
 
-#define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
-
 /* Protected by BQL */
 static KVMRouteChange vfio_route_change;
 
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index a2771b9ff3..74ac77a260 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -118,6 +118,8 @@  typedef struct VFIOMSIXInfo {
 #define TYPE_VFIO_PCI "vfio-pci"
 OBJECT_DECLARE_SIMPLE_TYPE(VFIOPCIDevice, VFIO_PCI)
 
+#define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
+
 struct VFIOPCIDevice {
     PCIDevice pdev;
     VFIODevice vbasedev;
diff --git a/include/hw/acpi/acpi-generic-initiator.h b/include/hw/acpi/acpi-generic-initiator.h
index e67e6e23b1..e8e2670309 100644
--- a/include/hw/acpi/acpi-generic-initiator.h
+++ b/include/hw/acpi/acpi-generic-initiator.h
@@ -27,4 +27,26 @@  typedef struct AcpiGenericInitiatorClass {
         ObjectClass parent_class;
 } AcpiGenericInitiatorClass;
 
+/*
+ * ACPI 6.5: Table 5-68 Flags - Generic Initiator
+ */
+typedef enum {
+    GEN_AFFINITY_NOFLAGS = 0,
+    GEN_AFFINITY_ENABLED = (1 << 0),
+    GEN_AFFINITY_ARCH_TRANS = (1 << 1),
+} GenericAffinityFlags;
+
+/*
+ * ACPI 6.5: Table 5-66 Device Handle - PCI
+ * Device Handle definition
+ */
+typedef struct PCIDeviceHandle {
+    uint16_t segment;
+    uint16_t bdf;
+    uint32_t res0;
+    uint64_t res1;
+} PCIDeviceHandle;
+
+void build_srat_generic_initiator(GArray *table_data);
+
 #endif