diff mbox series

[v6,18/19] intel_iommu: Implement [set|unset]_iommu_device() callbacks

Message ID 20240603061023.269738-19-zhenzhong.duan@intel.com
State New
Headers show
Series Add a host IOMMU device abstraction to check with vIOMMU | expand

Commit Message

Duan, Zhenzhong June 3, 2024, 6:10 a.m. UTC
From: Yi Liu <yi.l.liu@intel.com>

Implement [set|unset]_iommu_device() callbacks in Intel vIOMMU.
In set call, a new structure VTDHostIOMMUDevice which holds
a reference to HostIOMMUDevice is stored in hash table
indexed by PCI BDF.

Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
 hw/i386/intel_iommu_internal.h |  9 ++++
 include/hw/i386/intel_iommu.h  |  2 +
 hw/i386/intel_iommu.c          | 76 ++++++++++++++++++++++++++++++++++
 3 files changed, 87 insertions(+)

Comments

CLEMENT MATHIEU--DRIF June 3, 2024, 10:12 a.m. UTC | #1
On 03/06/2024 08:10, Zhenzhong Duan wrote:
> Caution: External email. Do not open attachments or click links, unless this email comes from a known sender and you know the content is safe.
>
>
> From: Yi Liu <yi.l.liu@intel.com>
>
> Implement [set|unset]_iommu_device() callbacks in Intel vIOMMU.
> In set call, a new structure VTDHostIOMMUDevice which holds
> a reference to HostIOMMUDevice is stored in hash table
> indexed by PCI BDF.
>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>   hw/i386/intel_iommu_internal.h |  9 ++++
>   include/hw/i386/intel_iommu.h  |  2 +
>   hw/i386/intel_iommu.c          | 76 ++++++++++++++++++++++++++++++++++
>   3 files changed, 87 insertions(+)
>
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index f8cf99bddf..b800d62ca0 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -28,6 +28,7 @@
>   #ifndef HW_I386_INTEL_IOMMU_INTERNAL_H
>   #define HW_I386_INTEL_IOMMU_INTERNAL_H
>   #include "hw/i386/intel_iommu.h"
> +#include "sysemu/host_iommu_device.h"
>
>   /*
>    * Intel IOMMU register specification
> @@ -537,4 +538,12 @@ typedef struct VTDRootEntry VTDRootEntry;
>   #define VTD_SL_IGN_COM              0xbff0000000000000ULL
>   #define VTD_SL_TM                   (1ULL << 62)
>
> +
> +typedef struct VTDHostIOMMUDevice {
> +    IntelIOMMUState *iommu_state;
> +    PCIBus *bus;
> +    uint8_t devfn;
> +    HostIOMMUDevice *dev;
> +    QLIST_ENTRY(VTDHostIOMMUDevice) next;
> +} VTDHostIOMMUDevice;
>   #endif
> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> index 7d694b0813..2bbde41e45 100644
> --- a/include/hw/i386/intel_iommu.h
> +++ b/include/hw/i386/intel_iommu.h
> @@ -293,6 +293,8 @@ struct IntelIOMMUState {
>       /* list of registered notifiers */
>       QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;
>
> +    GHashTable *vtd_host_iommu_dev;             /* VTDHostIOMMUDevice */
> +
>       /* interrupt remapping */
>       bool intr_enabled;              /* Whether guest enabled IR */
>       dma_addr_t intr_root;           /* Interrupt remapping table pointer */
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 519063c8f8..747c988bc4 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -237,6 +237,13 @@ static gboolean vtd_as_equal(gconstpointer v1, gconstpointer v2)
>              (key1->pasid == key2->pasid);
>   }
>
> +static gboolean vtd_as_idev_equal(gconstpointer v1, gconstpointer v2)
> +{
> +    const struct vtd_as_key *key1 = v1;
> +    const struct vtd_as_key *key2 = v2;
> +
> +    return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
> +}
>   /*
>    * Note that we use pointer to PCIBus as the key, so hashing/shifting
>    * based on the pointer value is intended. Note that we deal with
> @@ -3812,6 +3819,70 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
>       return vtd_dev_as;
>   }
>
> +static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
> +                                     HostIOMMUDevice *hiod, Error **errp)
> +{
> +    IntelIOMMUState *s = opaque;
> +    VTDHostIOMMUDevice *vtd_hdev;
> +    struct vtd_as_key key = {
> +        .bus = bus,
> +        .devfn = devfn,
> +    };
> +    struct vtd_as_key *new_key;
> +
> +    assert(hiod);
> +
> +    vtd_iommu_lock(s);
> +
> +    vtd_hdev = g_hash_table_lookup(s->vtd_host_iommu_dev, &key);
> +
> +    if (vtd_hdev) {
> +        error_setg(errp, "IOMMUFD device already exist");
> +        vtd_iommu_unlock(s);
> +        return false;
> +    }
> +
> +    vtd_hdev = g_malloc0(sizeof(VTDHostIOMMUDevice));
> +    vtd_hdev->bus = bus;
> +    vtd_hdev->devfn = (uint8_t)devfn;
> +    vtd_hdev->iommu_state = s;
> +    vtd_hdev->dev = hiod;
> +
> +    new_key = g_malloc(sizeof(*new_key));
> +    new_key->bus = bus;
> +    new_key->devfn = devfn;
> +
> +    object_ref(hiod);
> +    g_hash_table_insert(s->vtd_host_iommu_dev, new_key, vtd_hdev);
> +
> +    vtd_iommu_unlock(s);
> +
> +    return true;
> +}
> +
> +static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque, int devfn)
> +{
> +    IntelIOMMUState *s = opaque;
> +    VTDHostIOMMUDevice *vtd_hdev;
> +    struct vtd_as_key key = {
> +        .bus = bus,
> +        .devfn = devfn,
> +    };
> +
> +    vtd_iommu_lock(s);
> +
> +    vtd_hdev = g_hash_table_lookup(s->vtd_host_iommu_dev, &key);
> +    if (!vtd_hdev) {
> +        vtd_iommu_unlock(s);
> +        return;
> +    }
> +
> +    g_hash_table_remove(s->vtd_host_iommu_dev, &key);
> +    object_unref(vtd_hdev->dev);
Not sure but isn't that a potential use after free?
> +
> +    vtd_iommu_unlock(s);
> +}
> +
>   /* Unmap the whole range in the notifier's scope. */
>   static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
>   {
> @@ -4116,6 +4187,8 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>
>   static PCIIOMMUOps vtd_iommu_ops = {
>       .get_address_space = vtd_host_dma_iommu,
> +    .set_iommu_device = vtd_dev_set_iommu_device,
> +    .unset_iommu_device = vtd_dev_unset_iommu_device,
>   };
>
>   static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
> @@ -4235,6 +4308,9 @@ static void vtd_realize(DeviceState *dev, Error **errp)
>                                        g_free, g_free);
>       s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal,
>                                         g_free, g_free);
> +    s->vtd_host_iommu_dev = g_hash_table_new_full(vtd_as_hash,
> +                                                  vtd_as_idev_equal,
> +                                                  g_free, g_free);
>       vtd_init(s);
>       pci_setup_iommu(bus, &vtd_iommu_ops, dev);
>       /* Pseudo address space under root PCI bus. */
> --
> 2.34.1
>
Duan, Zhenzhong June 3, 2024, 11:02 a.m. UTC | #2
>-----Original Message-----
>From: CLEMENT MATHIEU--DRIF <clement.mathieu--drif@eviden.com>
>Subject: Re: [PATCH v6 18/19] intel_iommu: Implement
>[set|unset]_iommu_device() callbacks
>
>
>On 03/06/2024 08:10, Zhenzhong Duan wrote:
>> Caution: External email. Do not open attachments or click links, unless this
>email comes from a known sender and you know the content is safe.
>>
>>
>> From: Yi Liu <yi.l.liu@intel.com>
>>
>> Implement [set|unset]_iommu_device() callbacks in Intel vIOMMU.
>> In set call, a new structure VTDHostIOMMUDevice which holds
>> a reference to HostIOMMUDevice is stored in hash table
>> indexed by PCI BDF.
>>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>>   hw/i386/intel_iommu_internal.h |  9 ++++
>>   include/hw/i386/intel_iommu.h  |  2 +
>>   hw/i386/intel_iommu.c          | 76
>++++++++++++++++++++++++++++++++++
>>   3 files changed, 87 insertions(+)
>>
>> diff --git a/hw/i386/intel_iommu_internal.h
>b/hw/i386/intel_iommu_internal.h
>> index f8cf99bddf..b800d62ca0 100644
>> --- a/hw/i386/intel_iommu_internal.h
>> +++ b/hw/i386/intel_iommu_internal.h
>> @@ -28,6 +28,7 @@
>>   #ifndef HW_I386_INTEL_IOMMU_INTERNAL_H
>>   #define HW_I386_INTEL_IOMMU_INTERNAL_H
>>   #include "hw/i386/intel_iommu.h"
>> +#include "sysemu/host_iommu_device.h"
>>
>>   /*
>>    * Intel IOMMU register specification
>> @@ -537,4 +538,12 @@ typedef struct VTDRootEntry VTDRootEntry;
>>   #define VTD_SL_IGN_COM              0xbff0000000000000ULL
>>   #define VTD_SL_TM                   (1ULL << 62)
>>
>> +
>> +typedef struct VTDHostIOMMUDevice {
>> +    IntelIOMMUState *iommu_state;
>> +    PCIBus *bus;
>> +    uint8_t devfn;
>> +    HostIOMMUDevice *dev;
>> +    QLIST_ENTRY(VTDHostIOMMUDevice) next;
>> +} VTDHostIOMMUDevice;
>>   #endif
>> diff --git a/include/hw/i386/intel_iommu.h
>b/include/hw/i386/intel_iommu.h
>> index 7d694b0813..2bbde41e45 100644
>> --- a/include/hw/i386/intel_iommu.h
>> +++ b/include/hw/i386/intel_iommu.h
>> @@ -293,6 +293,8 @@ struct IntelIOMMUState {
>>       /* list of registered notifiers */
>>       QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;
>>
>> +    GHashTable *vtd_host_iommu_dev;             /* VTDHostIOMMUDevice
>*/
>> +
>>       /* interrupt remapping */
>>       bool intr_enabled;              /* Whether guest enabled IR */
>>       dma_addr_t intr_root;           /* Interrupt remapping table pointer */
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index 519063c8f8..747c988bc4 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -237,6 +237,13 @@ static gboolean vtd_as_equal(gconstpointer v1,
>gconstpointer v2)
>>              (key1->pasid == key2->pasid);
>>   }
>>
>> +static gboolean vtd_as_idev_equal(gconstpointer v1, gconstpointer v2)
>> +{
>> +    const struct vtd_as_key *key1 = v1;
>> +    const struct vtd_as_key *key2 = v2;
>> +
>> +    return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
>> +}
>>   /*
>>    * Note that we use pointer to PCIBus as the key, so hashing/shifting
>>    * based on the pointer value is intended. Note that we deal with
>> @@ -3812,6 +3819,70 @@ VTDAddressSpace
>*vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
>>       return vtd_dev_as;
>>   }
>>
>> +static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int
>devfn,
>> +                                     HostIOMMUDevice *hiod, Error **errp)
>> +{
>> +    IntelIOMMUState *s = opaque;
>> +    VTDHostIOMMUDevice *vtd_hdev;
>> +    struct vtd_as_key key = {
>> +        .bus = bus,
>> +        .devfn = devfn,
>> +    };
>> +    struct vtd_as_key *new_key;
>> +
>> +    assert(hiod);
>> +
>> +    vtd_iommu_lock(s);
>> +
>> +    vtd_hdev = g_hash_table_lookup(s->vtd_host_iommu_dev, &key);
>> +
>> +    if (vtd_hdev) {
>> +        error_setg(errp, "IOMMUFD device already exist");
>> +        vtd_iommu_unlock(s);
>> +        return false;
>> +    }
>> +
>> +    vtd_hdev = g_malloc0(sizeof(VTDHostIOMMUDevice));
>> +    vtd_hdev->bus = bus;
>> +    vtd_hdev->devfn = (uint8_t)devfn;
>> +    vtd_hdev->iommu_state = s;
>> +    vtd_hdev->dev = hiod;
>> +
>> +    new_key = g_malloc(sizeof(*new_key));
>> +    new_key->bus = bus;
>> +    new_key->devfn = devfn;
>> +
>> +    object_ref(hiod);
>> +    g_hash_table_insert(s->vtd_host_iommu_dev, new_key, vtd_hdev);
>> +
>> +    vtd_iommu_unlock(s);
>> +
>> +    return true;
>> +}
>> +
>> +static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque, int
>devfn)
>> +{
>> +    IntelIOMMUState *s = opaque;
>> +    VTDHostIOMMUDevice *vtd_hdev;
>> +    struct vtd_as_key key = {
>> +        .bus = bus,
>> +        .devfn = devfn,
>> +    };
>> +
>> +    vtd_iommu_lock(s);
>> +
>> +    vtd_hdev = g_hash_table_lookup(s->vtd_host_iommu_dev, &key);
>> +    if (!vtd_hdev) {
>> +        vtd_iommu_unlock(s);
>> +        return;
>> +    }
>> +
>> +    g_hash_table_remove(s->vtd_host_iommu_dev, &key);
>> +    object_unref(vtd_hdev->dev);
>Not sure but isn't that a potential use after free?

Good catch! Will fix. Should be:

object_unref(vtd_hdev->dev);
g_hash_table_remove(s->vtd_host_iommu_dev, &key);

Thanks
Zhenzhong

>> +
>> +    vtd_iommu_unlock(s);
>> +}
>> +
>>   /* Unmap the whole range in the notifier's scope. */
>>   static void vtd_address_space_unmap(VTDAddressSpace *as,
>IOMMUNotifier *n)
>>   {
>> @@ -4116,6 +4187,8 @@ static AddressSpace
>*vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>>
>>   static PCIIOMMUOps vtd_iommu_ops = {
>>       .get_address_space = vtd_host_dma_iommu,
>> +    .set_iommu_device = vtd_dev_set_iommu_device,
>> +    .unset_iommu_device = vtd_dev_unset_iommu_device,
>>   };
>>
>>   static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
>> @@ -4235,6 +4308,9 @@ static void vtd_realize(DeviceState *dev, Error
>**errp)
>>                                        g_free, g_free);
>>       s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash,
>vtd_as_equal,
>>                                         g_free, g_free);
>> +    s->vtd_host_iommu_dev = g_hash_table_new_full(vtd_as_hash,
>> +                                                  vtd_as_idev_equal,
>> +                                                  g_free, g_free);
>>       vtd_init(s);
>>       pci_setup_iommu(bus, &vtd_iommu_ops, dev);
>>       /* Pseudo address space under root PCI bus. */
>> --
>> 2.34.1
>>
CĂ©dric Le Goater June 3, 2024, 12:56 p.m. UTC | #3
On 6/3/24 13:02, Duan, Zhenzhong wrote:
> 
> 
>> -----Original Message-----
>> From: CLEMENT MATHIEU--DRIF <clement.mathieu--drif@eviden.com>
>> Subject: Re: [PATCH v6 18/19] intel_iommu: Implement
>> [set|unset]_iommu_device() callbacks
>>
>>
>> On 03/06/2024 08:10, Zhenzhong Duan wrote:
>>> Caution: External email. Do not open attachments or click links, unless this
>> email comes from a known sender and you know the content is safe.
>>>
>>>
>>> From: Yi Liu <yi.l.liu@intel.com>
>>>
>>> Implement [set|unset]_iommu_device() callbacks in Intel vIOMMU.
>>> In set call, a new structure VTDHostIOMMUDevice which holds
>>> a reference to HostIOMMUDevice is stored in hash table
>>> indexed by PCI BDF.
>>>
>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> ---
>>>    hw/i386/intel_iommu_internal.h |  9 ++++
>>>    include/hw/i386/intel_iommu.h  |  2 +
>>>    hw/i386/intel_iommu.c          | 76
>> ++++++++++++++++++++++++++++++++++
>>>    3 files changed, 87 insertions(+)
>>>
>>> diff --git a/hw/i386/intel_iommu_internal.h
>> b/hw/i386/intel_iommu_internal.h
>>> index f8cf99bddf..b800d62ca0 100644
>>> --- a/hw/i386/intel_iommu_internal.h
>>> +++ b/hw/i386/intel_iommu_internal.h
>>> @@ -28,6 +28,7 @@
>>>    #ifndef HW_I386_INTEL_IOMMU_INTERNAL_H
>>>    #define HW_I386_INTEL_IOMMU_INTERNAL_H
>>>    #include "hw/i386/intel_iommu.h"
>>> +#include "sysemu/host_iommu_device.h"
>>>
>>>    /*
>>>     * Intel IOMMU register specification
>>> @@ -537,4 +538,12 @@ typedef struct VTDRootEntry VTDRootEntry;
>>>    #define VTD_SL_IGN_COM              0xbff0000000000000ULL
>>>    #define VTD_SL_TM                   (1ULL << 62)
>>>
>>> +
>>> +typedef struct VTDHostIOMMUDevice {
>>> +    IntelIOMMUState *iommu_state;
>>> +    PCIBus *bus;
>>> +    uint8_t devfn;
>>> +    HostIOMMUDevice *dev;
>>> +    QLIST_ENTRY(VTDHostIOMMUDevice) next;
>>> +} VTDHostIOMMUDevice;
>>>    #endif
>>> diff --git a/include/hw/i386/intel_iommu.h
>> b/include/hw/i386/intel_iommu.h
>>> index 7d694b0813..2bbde41e45 100644
>>> --- a/include/hw/i386/intel_iommu.h
>>> +++ b/include/hw/i386/intel_iommu.h
>>> @@ -293,6 +293,8 @@ struct IntelIOMMUState {
>>>        /* list of registered notifiers */
>>>        QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;
>>>
>>> +    GHashTable *vtd_host_iommu_dev;             /* VTDHostIOMMUDevice
>> */
>>> +
>>>        /* interrupt remapping */
>>>        bool intr_enabled;              /* Whether guest enabled IR */
>>>        dma_addr_t intr_root;           /* Interrupt remapping table pointer */
>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>> index 519063c8f8..747c988bc4 100644
>>> --- a/hw/i386/intel_iommu.c
>>> +++ b/hw/i386/intel_iommu.c
>>> @@ -237,6 +237,13 @@ static gboolean vtd_as_equal(gconstpointer v1,
>> gconstpointer v2)
>>>               (key1->pasid == key2->pasid);
>>>    }
>>>
>>> +static gboolean vtd_as_idev_equal(gconstpointer v1, gconstpointer v2)
>>> +{
>>> +    const struct vtd_as_key *key1 = v1;
>>> +    const struct vtd_as_key *key2 = v2;
>>> +
>>> +    return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
>>> +}
>>>    /*
>>>     * Note that we use pointer to PCIBus as the key, so hashing/shifting
>>>     * based on the pointer value is intended. Note that we deal with
>>> @@ -3812,6 +3819,70 @@ VTDAddressSpace
>> *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
>>>        return vtd_dev_as;
>>>    }
>>>
>>> +static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int
>> devfn,
>>> +                                     HostIOMMUDevice *hiod, Error **errp)
>>> +{
>>> +    IntelIOMMUState *s = opaque;
>>> +    VTDHostIOMMUDevice *vtd_hdev;
>>> +    struct vtd_as_key key = {
>>> +        .bus = bus,
>>> +        .devfn = devfn,
>>> +    };
>>> +    struct vtd_as_key *new_key;
>>> +
>>> +    assert(hiod);
>>> +
>>> +    vtd_iommu_lock(s);
>>> +
>>> +    vtd_hdev = g_hash_table_lookup(s->vtd_host_iommu_dev, &key);
>>> +
>>> +    if (vtd_hdev) {
>>> +        error_setg(errp, "IOMMUFD device already exist");
>>> +        vtd_iommu_unlock(s);
>>> +        return false;
>>> +    }
>>> +
>>> +    vtd_hdev = g_malloc0(sizeof(VTDHostIOMMUDevice));
>>> +    vtd_hdev->bus = bus;
>>> +    vtd_hdev->devfn = (uint8_t)devfn;
>>> +    vtd_hdev->iommu_state = s;
>>> +    vtd_hdev->dev = hiod;
>>> +
>>> +    new_key = g_malloc(sizeof(*new_key));
>>> +    new_key->bus = bus;
>>> +    new_key->devfn = devfn;
>>> +
>>> +    object_ref(hiod);
>>> +    g_hash_table_insert(s->vtd_host_iommu_dev, new_key, vtd_hdev);
>>> +
>>> +    vtd_iommu_unlock(s);
>>> +
>>> +    return true;
>>> +}
>>> +
>>> +static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque, int
>> devfn)
>>> +{
>>> +    IntelIOMMUState *s = opaque;
>>> +    VTDHostIOMMUDevice *vtd_hdev;
>>> +    struct vtd_as_key key = {
>>> +        .bus = bus,
>>> +        .devfn = devfn,
>>> +    };
>>> +
>>> +    vtd_iommu_lock(s);
>>> +
>>> +    vtd_hdev = g_hash_table_lookup(s->vtd_host_iommu_dev, &key);
>>> +    if (!vtd_hdev) {
>>> +        vtd_iommu_unlock(s);
>>> +        return;
>>> +    }
>>> +
>>> +    g_hash_table_remove(s->vtd_host_iommu_dev, &key);
>>> +    object_unref(vtd_hdev->dev);
>> Not sure but isn't that a potential use after free?
> 
> Good catch! Will fix. Should be:
> 
> object_unref(vtd_hdev->dev);
> g_hash_table_remove(s->vtd_host_iommu_dev, &key);

you could also implement a custom destroy hash function.


Thanks,

C.


> 
> Thanks
> Zhenzhong
> 
>>> +
>>> +    vtd_iommu_unlock(s);
>>> +}
>>> +
>>>    /* Unmap the whole range in the notifier's scope. */
>>>    static void vtd_address_space_unmap(VTDAddressSpace *as,
>> IOMMUNotifier *n)
>>>    {
>>> @@ -4116,6 +4187,8 @@ static AddressSpace
>> *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>>>
>>>    static PCIIOMMUOps vtd_iommu_ops = {
>>>        .get_address_space = vtd_host_dma_iommu,
>>> +    .set_iommu_device = vtd_dev_set_iommu_device,
>>> +    .unset_iommu_device = vtd_dev_unset_iommu_device,
>>>    };
>>>
>>>    static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
>>> @@ -4235,6 +4308,9 @@ static void vtd_realize(DeviceState *dev, Error
>> **errp)
>>>                                         g_free, g_free);
>>>        s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash,
>> vtd_as_equal,
>>>                                          g_free, g_free);
>>> +    s->vtd_host_iommu_dev = g_hash_table_new_full(vtd_as_hash,
>>> +                                                  vtd_as_idev_equal,
>>> +                                                  g_free, g_free);
>>>        vtd_init(s);
>>>        pci_setup_iommu(bus, &vtd_iommu_ops, dev);
>>>        /* Pseudo address space under root PCI bus. */
>>> --
>>> 2.34.1
>>>
Eric Auger June 3, 2024, 2:13 p.m. UTC | #4
Hi Zhenzhong,

On 6/3/24 08:10, Zhenzhong Duan wrote:
> From: Yi Liu <yi.l.liu@intel.com>
>
> Implement [set|unset]_iommu_device() callbacks in Intel vIOMMU.
> In set call, a new structure VTDHostIOMMUDevice which holds
> a reference to HostIOMMUDevice is stored in hash table
> indexed by PCI BDF.
maybe precise that this is not the aliased one?
>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>  hw/i386/intel_iommu_internal.h |  9 ++++
>  include/hw/i386/intel_iommu.h  |  2 +
>  hw/i386/intel_iommu.c          | 76 ++++++++++++++++++++++++++++++++++
>  3 files changed, 87 insertions(+)
>
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index f8cf99bddf..b800d62ca0 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -28,6 +28,7 @@
>  #ifndef HW_I386_INTEL_IOMMU_INTERNAL_H
>  #define HW_I386_INTEL_IOMMU_INTERNAL_H
>  #include "hw/i386/intel_iommu.h"
> +#include "sysemu/host_iommu_device.h"
>  
>  /*
>   * Intel IOMMU register specification
> @@ -537,4 +538,12 @@ typedef struct VTDRootEntry VTDRootEntry;
>  #define VTD_SL_IGN_COM              0xbff0000000000000ULL
>  #define VTD_SL_TM                   (1ULL << 62)
>  
> +
> +typedef struct VTDHostIOMMUDevice {
> +    IntelIOMMUState *iommu_state;
Why do you need the iommu_state?
> +    PCIBus *bus;
> +    uint8_t devfn;
> +    HostIOMMUDevice *dev;
> +    QLIST_ENTRY(VTDHostIOMMUDevice) next;
> +} VTDHostIOMMUDevice;
How VTD specific is it?
>  #endif
> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> index 7d694b0813..2bbde41e45 100644
> --- a/include/hw/i386/intel_iommu.h
> +++ b/include/hw/i386/intel_iommu.h
> @@ -293,6 +293,8 @@ struct IntelIOMMUState {
>      /* list of registered notifiers */
>      QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;
>  
> +    GHashTable *vtd_host_iommu_dev;             /* VTDHostIOMMUDevice */
> +
>      /* interrupt remapping */
>      bool intr_enabled;              /* Whether guest enabled IR */
>      dma_addr_t intr_root;           /* Interrupt remapping table pointer */
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 519063c8f8..747c988bc4 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -237,6 +237,13 @@ static gboolean vtd_as_equal(gconstpointer v1, gconstpointer v2)
>             (key1->pasid == key2->pasid);
>  }
>  
> +static gboolean vtd_as_idev_equal(gconstpointer v1, gconstpointer v2)
> +{
> +    const struct vtd_as_key *key1 = v1;
> +    const struct vtd_as_key *key2 = v2;
> +
> +    return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
> +}
can't you reuse the key with pasid?
>  /*
>   * Note that we use pointer to PCIBus as the key, so hashing/shifting
>   * based on the pointer value is intended. Note that we deal with
> @@ -3812,6 +3819,70 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
>      return vtd_dev_as;
>  }
>  
> +static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
> +                                     HostIOMMUDevice *hiod, Error **errp)
> +{
> +    IntelIOMMUState *s = opaque;
> +    VTDHostIOMMUDevice *vtd_hdev;
> +    struct vtd_as_key key = {
> +        .bus = bus,
> +        .devfn = devfn,
> +    };
> +    struct vtd_as_key *new_key;
> +
> +    assert(hiod);
> +
> +    vtd_iommu_lock(s);
> +
> +    vtd_hdev = g_hash_table_lookup(s->vtd_host_iommu_dev, &key);
> +
> +    if (vtd_hdev) {
> +        error_setg(errp, "IOMMUFD device already exist");
s/IOMMUFD/Host IOMMU device?
> +        vtd_iommu_unlock(s);
> +        return false;
> +    }
> +
> +    vtd_hdev = g_malloc0(sizeof(VTDHostIOMMUDevice));
> +    vtd_hdev->bus = bus;
> +    vtd_hdev->devfn = (uint8_t)devfn;
> +    vtd_hdev->iommu_state = s;
> +    vtd_hdev->dev = hiod;
> +
> +    new_key = g_malloc(sizeof(*new_key));
> +    new_key->bus = bus;
> +    new_key->devfn = devfn;
> +
> +    object_ref(hiod);
> +    g_hash_table_insert(s->vtd_host_iommu_dev, new_key, vtd_hdev);
> +
> +    vtd_iommu_unlock(s);
> +
> +    return true;
> +}
> +
> +static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque, int devfn)
> +{
> +    IntelIOMMUState *s = opaque;
> +    VTDHostIOMMUDevice *vtd_hdev;
> +    struct vtd_as_key key = {
> +        .bus = bus,
> +        .devfn = devfn,
> +    };
> +
> +    vtd_iommu_lock(s);
> +
> +    vtd_hdev = g_hash_table_lookup(s->vtd_host_iommu_dev, &key);
> +    if (!vtd_hdev) {
> +        vtd_iommu_unlock(s);
> +        return;
> +    }
> +
> +    g_hash_table_remove(s->vtd_host_iommu_dev, &key);
> +    object_unref(vtd_hdev->dev);
> +
> +    vtd_iommu_unlock(s);
> +}
> +
>  /* Unmap the whole range in the notifier's scope. */
>  static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
>  {
> @@ -4116,6 +4187,8 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>  
>  static PCIIOMMUOps vtd_iommu_ops = {
>      .get_address_space = vtd_host_dma_iommu,
> +    .set_iommu_device = vtd_dev_set_iommu_device,
> +    .unset_iommu_device = vtd_dev_unset_iommu_device,
>  };
>  
>  static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
> @@ -4235,6 +4308,9 @@ static void vtd_realize(DeviceState *dev, Error **errp)
>                                       g_free, g_free);
>      s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal,
>                                        g_free, g_free);
> +    s->vtd_host_iommu_dev = g_hash_table_new_full(vtd_as_hash,
> +                                                  vtd_as_idev_equal,
> +                                                  g_free, g_free);
>      vtd_init(s);
>      pci_setup_iommu(bus, &vtd_iommu_ops, dev);
>      /* Pseudo address space under root PCI bus. */
Thanks

Eric
Eric Auger June 3, 2024, 2:47 p.m. UTC | #5
On 6/3/24 08:10, Zhenzhong Duan wrote:
> From: Yi Liu <yi.l.liu@intel.com>
>
> Implement [set|unset]_iommu_device() callbacks in Intel vIOMMU.
> In set call, a new structure VTDHostIOMMUDevice which holds
> a reference to HostIOMMUDevice is stored in hash table
> indexed by PCI BDF.
>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>  hw/i386/intel_iommu_internal.h |  9 ++++
>  include/hw/i386/intel_iommu.h  |  2 +
>  hw/i386/intel_iommu.c          | 76 ++++++++++++++++++++++++++++++++++
>  3 files changed, 87 insertions(+)
>
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index f8cf99bddf..b800d62ca0 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -28,6 +28,7 @@
>  #ifndef HW_I386_INTEL_IOMMU_INTERNAL_H
>  #define HW_I386_INTEL_IOMMU_INTERNAL_H
>  #include "hw/i386/intel_iommu.h"
> +#include "sysemu/host_iommu_device.h"
>  
>  /*
>   * Intel IOMMU register specification
> @@ -537,4 +538,12 @@ typedef struct VTDRootEntry VTDRootEntry;
>  #define VTD_SL_IGN_COM              0xbff0000000000000ULL
>  #define VTD_SL_TM                   (1ULL << 62)
>  
> +
> +typedef struct VTDHostIOMMUDevice {
> +    IntelIOMMUState *iommu_state;
> +    PCIBus *bus;
> +    uint8_t devfn;
> +    HostIOMMUDevice *dev;
> +    QLIST_ENTRY(VTDHostIOMMUDevice) next;
> +} VTDHostIOMMUDevice;
>  #endif
> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> index 7d694b0813..2bbde41e45 100644
> --- a/include/hw/i386/intel_iommu.h
> +++ b/include/hw/i386/intel_iommu.h
> @@ -293,6 +293,8 @@ struct IntelIOMMUState {
>      /* list of registered notifiers */
>      QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;
>  
> +    GHashTable *vtd_host_iommu_dev;             /* VTDHostIOMMUDevice */
> +
>      /* interrupt remapping */
>      bool intr_enabled;              /* Whether guest enabled IR */
>      dma_addr_t intr_root;           /* Interrupt remapping table pointer */
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 519063c8f8..747c988bc4 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -237,6 +237,13 @@ static gboolean vtd_as_equal(gconstpointer v1, gconstpointer v2)
>             (key1->pasid == key2->pasid);
>  }
>  
> +static gboolean vtd_as_idev_equal(gconstpointer v1, gconstpointer v2)
> +{
> +    const struct vtd_as_key *key1 = v1;
> +    const struct vtd_as_key *key2 = v2;
> +
> +    return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
> +}
>  /*
>   * Note that we use pointer to PCIBus as the key, so hashing/shifting
>   * based on the pointer value is intended. Note that we deal with
> @@ -3812,6 +3819,70 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
>      return vtd_dev_as;
>  }
>  
> +static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
> +                                     HostIOMMUDevice *hiod, Error **errp)
> +{
> +    IntelIOMMUState *s = opaque;
> +    VTDHostIOMMUDevice *vtd_hdev;
> +    struct vtd_as_key key = {
> +        .bus = bus,
> +        .devfn = devfn,
> +    };
> +    struct vtd_as_key *new_key;
> +
> +    assert(hiod);
> +
> +    vtd_iommu_lock(s);
> +
> +    vtd_hdev = g_hash_table_lookup(s->vtd_host_iommu_dev, &key);
> +
> +    if (vtd_hdev) {
> +        error_setg(errp, "IOMMUFD device already exist");
> +        vtd_iommu_unlock(s);
> +        return false;
> +    }
> +
> +    vtd_hdev = g_malloc0(sizeof(VTDHostIOMMUDevice));
> +    vtd_hdev->bus = bus;
> +    vtd_hdev->devfn = (uint8_t)devfn;
> +    vtd_hdev->iommu_state = s;
> +    vtd_hdev->dev = hiod;
I am still not totally clear about why we couldn't reuse VTDAddressSpace
instance for this bus/devid. Is it a matter of aliased versus non
aliased bus/devfn, or a matter of pasid diff. An AddressSpace could back
an assigned device in which case a HostIOMMUDevice could be added to
this latter. I think this should be explained in the commit msg

Eric
> +
> +    new_key = g_malloc(sizeof(*new_key));
> +    new_key->bus = bus;
> +    new_key->devfn = devfn;
> +
> +    object_ref(hiod);
> +    g_hash_table_insert(s->vtd_host_iommu_dev, new_key, vtd_hdev);
> +
> +    vtd_iommu_unlock(s);
> +
> +    return true;
> +}
> +
> +static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque, int devfn)
> +{
> +    IntelIOMMUState *s = opaque;
> +    VTDHostIOMMUDevice *vtd_hdev;
> +    struct vtd_as_key key = {
> +        .bus = bus,
> +        .devfn = devfn,
> +    };
> +
> +    vtd_iommu_lock(s);
> +
> +    vtd_hdev = g_hash_table_lookup(s->vtd_host_iommu_dev, &key);
> +    if (!vtd_hdev) {
> +        vtd_iommu_unlock(s);
> +        return;
> +    }
> +
> +    g_hash_table_remove(s->vtd_host_iommu_dev, &key);
> +    object_unref(vtd_hdev->dev);
> +
> +    vtd_iommu_unlock(s);
> +}
> +
>  /* Unmap the whole range in the notifier's scope. */
>  static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
>  {
> @@ -4116,6 +4187,8 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>  
>  static PCIIOMMUOps vtd_iommu_ops = {
>      .get_address_space = vtd_host_dma_iommu,
> +    .set_iommu_device = vtd_dev_set_iommu_device,
> +    .unset_iommu_device = vtd_dev_unset_iommu_device,
>  };
>  
>  static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
> @@ -4235,6 +4308,9 @@ static void vtd_realize(DeviceState *dev, Error **errp)
>                                       g_free, g_free);
>      s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal,
>                                        g_free, g_free);
> +    s->vtd_host_iommu_dev = g_hash_table_new_full(vtd_as_hash,
> +                                                  vtd_as_idev_equal,
> +                                                  g_free, g_free);
>      vtd_init(s);
>      pci_setup_iommu(bus, &vtd_iommu_ops, dev);
>      /* Pseudo address space under root PCI bus. */
Duan, Zhenzhong June 4, 2024, 3:46 a.m. UTC | #6
>-----Original Message-----
>From: CĂ©dric Le Goater <clg@redhat.com>
>Subject: Re: [PATCH v6 18/19] intel_iommu: Implement
>[set|unset]_iommu_device() callbacks
>
>On 6/3/24 13:02, Duan, Zhenzhong wrote:
>>
>>
>>> -----Original Message-----
>>> From: CLEMENT MATHIEU--DRIF <clement.mathieu--drif@eviden.com>
>>> Subject: Re: [PATCH v6 18/19] intel_iommu: Implement
>>> [set|unset]_iommu_device() callbacks
>>>
>>>
>>> On 03/06/2024 08:10, Zhenzhong Duan wrote:
>>>> Caution: External email. Do not open attachments or click links, unless
>this
>>> email comes from a known sender and you know the content is safe.
>>>>
>>>>
>>>> From: Yi Liu <yi.l.liu@intel.com>
>>>>
>>>> Implement [set|unset]_iommu_device() callbacks in Intel vIOMMU.
>>>> In set call, a new structure VTDHostIOMMUDevice which holds
>>>> a reference to HostIOMMUDevice is stored in hash table
>>>> indexed by PCI BDF.
>>>>
>>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>> ---
>>>>    hw/i386/intel_iommu_internal.h |  9 ++++
>>>>    include/hw/i386/intel_iommu.h  |  2 +
>>>>    hw/i386/intel_iommu.c          | 76
>>> ++++++++++++++++++++++++++++++++++
>>>>    3 files changed, 87 insertions(+)
>>>>
>>>> diff --git a/hw/i386/intel_iommu_internal.h
>>> b/hw/i386/intel_iommu_internal.h
>>>> index f8cf99bddf..b800d62ca0 100644
>>>> --- a/hw/i386/intel_iommu_internal.h
>>>> +++ b/hw/i386/intel_iommu_internal.h
>>>> @@ -28,6 +28,7 @@
>>>>    #ifndef HW_I386_INTEL_IOMMU_INTERNAL_H
>>>>    #define HW_I386_INTEL_IOMMU_INTERNAL_H
>>>>    #include "hw/i386/intel_iommu.h"
>>>> +#include "sysemu/host_iommu_device.h"
>>>>
>>>>    /*
>>>>     * Intel IOMMU register specification
>>>> @@ -537,4 +538,12 @@ typedef struct VTDRootEntry VTDRootEntry;
>>>>    #define VTD_SL_IGN_COM              0xbff0000000000000ULL
>>>>    #define VTD_SL_TM                   (1ULL << 62)
>>>>
>>>> +
>>>> +typedef struct VTDHostIOMMUDevice {
>>>> +    IntelIOMMUState *iommu_state;
>>>> +    PCIBus *bus;
>>>> +    uint8_t devfn;
>>>> +    HostIOMMUDevice *dev;
>>>> +    QLIST_ENTRY(VTDHostIOMMUDevice) next;
>>>> +} VTDHostIOMMUDevice;
>>>>    #endif
>>>> diff --git a/include/hw/i386/intel_iommu.h
>>> b/include/hw/i386/intel_iommu.h
>>>> index 7d694b0813..2bbde41e45 100644
>>>> --- a/include/hw/i386/intel_iommu.h
>>>> +++ b/include/hw/i386/intel_iommu.h
>>>> @@ -293,6 +293,8 @@ struct IntelIOMMUState {
>>>>        /* list of registered notifiers */
>>>>        QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;
>>>>
>>>> +    GHashTable *vtd_host_iommu_dev;             /*
>VTDHostIOMMUDevice
>>> */
>>>> +
>>>>        /* interrupt remapping */
>>>>        bool intr_enabled;              /* Whether guest enabled IR */
>>>>        dma_addr_t intr_root;           /* Interrupt remapping table pointer
>*/
>>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>>> index 519063c8f8..747c988bc4 100644
>>>> --- a/hw/i386/intel_iommu.c
>>>> +++ b/hw/i386/intel_iommu.c
>>>> @@ -237,6 +237,13 @@ static gboolean vtd_as_equal(gconstpointer
>v1,
>>> gconstpointer v2)
>>>>               (key1->pasid == key2->pasid);
>>>>    }
>>>>
>>>> +static gboolean vtd_as_idev_equal(gconstpointer v1, gconstpointer v2)
>>>> +{
>>>> +    const struct vtd_as_key *key1 = v1;
>>>> +    const struct vtd_as_key *key2 = v2;
>>>> +
>>>> +    return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
>>>> +}
>>>>    /*
>>>>     * Note that we use pointer to PCIBus as the key, so hashing/shifting
>>>>     * based on the pointer value is intended. Note that we deal with
>>>> @@ -3812,6 +3819,70 @@ VTDAddressSpace
>>> *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
>>>>        return vtd_dev_as;
>>>>    }
>>>>
>>>> +static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int
>>> devfn,
>>>> +                                     HostIOMMUDevice *hiod, Error **errp)
>>>> +{
>>>> +    IntelIOMMUState *s = opaque;
>>>> +    VTDHostIOMMUDevice *vtd_hdev;
>>>> +    struct vtd_as_key key = {
>>>> +        .bus = bus,
>>>> +        .devfn = devfn,
>>>> +    };
>>>> +    struct vtd_as_key *new_key;
>>>> +
>>>> +    assert(hiod);
>>>> +
>>>> +    vtd_iommu_lock(s);
>>>> +
>>>> +    vtd_hdev = g_hash_table_lookup(s->vtd_host_iommu_dev, &key);
>>>> +
>>>> +    if (vtd_hdev) {
>>>> +        error_setg(errp, "IOMMUFD device already exist");
>>>> +        vtd_iommu_unlock(s);
>>>> +        return false;
>>>> +    }
>>>> +
>>>> +    vtd_hdev = g_malloc0(sizeof(VTDHostIOMMUDevice));
>>>> +    vtd_hdev->bus = bus;
>>>> +    vtd_hdev->devfn = (uint8_t)devfn;
>>>> +    vtd_hdev->iommu_state = s;
>>>> +    vtd_hdev->dev = hiod;
>>>> +
>>>> +    new_key = g_malloc(sizeof(*new_key));
>>>> +    new_key->bus = bus;
>>>> +    new_key->devfn = devfn;
>>>> +
>>>> +    object_ref(hiod);
>>>> +    g_hash_table_insert(s->vtd_host_iommu_dev, new_key, vtd_hdev);
>>>> +
>>>> +    vtd_iommu_unlock(s);
>>>> +
>>>> +    return true;
>>>> +}
>>>> +
>>>> +static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque,
>int
>>> devfn)
>>>> +{
>>>> +    IntelIOMMUState *s = opaque;
>>>> +    VTDHostIOMMUDevice *vtd_hdev;
>>>> +    struct vtd_as_key key = {
>>>> +        .bus = bus,
>>>> +        .devfn = devfn,
>>>> +    };
>>>> +
>>>> +    vtd_iommu_lock(s);
>>>> +
>>>> +    vtd_hdev = g_hash_table_lookup(s->vtd_host_iommu_dev, &key);
>>>> +    if (!vtd_hdev) {
>>>> +        vtd_iommu_unlock(s);
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    g_hash_table_remove(s->vtd_host_iommu_dev, &key);
>>>> +    object_unref(vtd_hdev->dev);
>>> Not sure but isn't that a potential use after free?
>>
>> Good catch! Will fix. Should be:
>>
>> object_unref(vtd_hdev->dev);
>> g_hash_table_remove(s->vtd_host_iommu_dev, &key);
>
>you could also implement a custom destroy hash function.

Yes, but I'd like to have it to match object_ref() call in vtd_dev_set_iommu_device()

Thanks
Zhenzhong
Duan, Zhenzhong June 4, 2024, 5:40 a.m. UTC | #7
>-----Original Message-----
>From: Eric Auger <eric.auger@redhat.com>
>Subject: Re: [PATCH v6 18/19] intel_iommu: Implement
>[set|unset]_iommu_device() callbacks
>
>Hi Zhenzhong,
>
>On 6/3/24 08:10, Zhenzhong Duan wrote:
>> From: Yi Liu <yi.l.liu@intel.com>
>>
>> Implement [set|unset]_iommu_device() callbacks in Intel vIOMMU.
>> In set call, a new structure VTDHostIOMMUDevice which holds
>> a reference to HostIOMMUDevice is stored in hash table
>> indexed by PCI BDF.
>maybe precise that this is not the aliased one?

Sure.

>>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>>  hw/i386/intel_iommu_internal.h |  9 ++++
>>  include/hw/i386/intel_iommu.h  |  2 +
>>  hw/i386/intel_iommu.c          | 76
>++++++++++++++++++++++++++++++++++
>>  3 files changed, 87 insertions(+)
>>
>> diff --git a/hw/i386/intel_iommu_internal.h
>b/hw/i386/intel_iommu_internal.h
>> index f8cf99bddf..b800d62ca0 100644
>> --- a/hw/i386/intel_iommu_internal.h
>> +++ b/hw/i386/intel_iommu_internal.h
>> @@ -28,6 +28,7 @@
>>  #ifndef HW_I386_INTEL_IOMMU_INTERNAL_H
>>  #define HW_I386_INTEL_IOMMU_INTERNAL_H
>>  #include "hw/i386/intel_iommu.h"
>> +#include "sysemu/host_iommu_device.h"
>>
>>  /*
>>   * Intel IOMMU register specification
>> @@ -537,4 +538,12 @@ typedef struct VTDRootEntry VTDRootEntry;
>>  #define VTD_SL_IGN_COM              0xbff0000000000000ULL
>>  #define VTD_SL_TM                   (1ULL << 62)
>>
>> +
>> +typedef struct VTDHostIOMMUDevice {
>> +    IntelIOMMUState *iommu_state;
>Why do you need the iommu_state?

It is used in nesting series.

>> +    PCIBus *bus;
>> +    uint8_t devfn;
>> +    HostIOMMUDevice *dev;
>> +    QLIST_ENTRY(VTDHostIOMMUDevice) next;
>> +} VTDHostIOMMUDevice;
>How VTD specific is it?

In nesting series, it has element iommu_state and errata
which are VTD specific.

>>  #endif
>> diff --git a/include/hw/i386/intel_iommu.h
>b/include/hw/i386/intel_iommu.h
>> index 7d694b0813..2bbde41e45 100644
>> --- a/include/hw/i386/intel_iommu.h
>> +++ b/include/hw/i386/intel_iommu.h
>> @@ -293,6 +293,8 @@ struct IntelIOMMUState {
>>      /* list of registered notifiers */
>>      QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;
>>
>> +    GHashTable *vtd_host_iommu_dev;             /* VTDHostIOMMUDevice
>*/
>> +
>>      /* interrupt remapping */
>>      bool intr_enabled;              /* Whether guest enabled IR */
>>      dma_addr_t intr_root;           /* Interrupt remapping table pointer */
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index 519063c8f8..747c988bc4 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -237,6 +237,13 @@ static gboolean vtd_as_equal(gconstpointer v1,
>gconstpointer v2)
>>             (key1->pasid == key2->pasid);
>>  }
>>
>> +static gboolean vtd_as_idev_equal(gconstpointer v1, gconstpointer v2)
>> +{
>> +    const struct vtd_as_key *key1 = v1;
>> +    const struct vtd_as_key *key2 = v2;
>> +
>> +    return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
>> +}
>can't you reuse the key with pasid?

s->vtd_host_iommu_dev isn't indexed by pasid but only BDF.
Maybe I'd better to define its own key struct, hash() and equal() functions.

>>  /*
>>   * Note that we use pointer to PCIBus as the key, so hashing/shifting
>>   * based on the pointer value is intended. Note that we deal with
>> @@ -3812,6 +3819,70 @@ VTDAddressSpace
>*vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
>>      return vtd_dev_as;
>>  }
>>
>> +static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int
>devfn,
>> +                                     HostIOMMUDevice *hiod, Error **errp)
>> +{
>> +    IntelIOMMUState *s = opaque;
>> +    VTDHostIOMMUDevice *vtd_hdev;
>> +    struct vtd_as_key key = {
>> +        .bus = bus,
>> +        .devfn = devfn,
>> +    };
>> +    struct vtd_as_key *new_key;
>> +
>> +    assert(hiod);
>> +
>> +    vtd_iommu_lock(s);
>> +
>> +    vtd_hdev = g_hash_table_lookup(s->vtd_host_iommu_dev, &key);
>> +
>> +    if (vtd_hdev) {
>> +        error_setg(errp, "IOMMUFD device already exist");
>s/IOMMUFD/Host IOMMU device?

Good catch, will fix.

Thanks
Zhenzhong

>> +        vtd_iommu_unlock(s);
>> +        return false;
>> +    }
>> +
>> +    vtd_hdev = g_malloc0(sizeof(VTDHostIOMMUDevice));
>> +    vtd_hdev->bus = bus;
>> +    vtd_hdev->devfn = (uint8_t)devfn;
>> +    vtd_hdev->iommu_state = s;
>> +    vtd_hdev->dev = hiod;
>> +
>> +    new_key = g_malloc(sizeof(*new_key));
>> +    new_key->bus = bus;
>> +    new_key->devfn = devfn;
>> +
>> +    object_ref(hiod);
>> +    g_hash_table_insert(s->vtd_host_iommu_dev, new_key, vtd_hdev);
>> +
>> +    vtd_iommu_unlock(s);
>> +
>> +    return true;
>> +}
>> +
>> +static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque, int
>devfn)
>> +{
>> +    IntelIOMMUState *s = opaque;
>> +    VTDHostIOMMUDevice *vtd_hdev;
>> +    struct vtd_as_key key = {
>> +        .bus = bus,
>> +        .devfn = devfn,
>> +    };
>> +
>> +    vtd_iommu_lock(s);
>> +
>> +    vtd_hdev = g_hash_table_lookup(s->vtd_host_iommu_dev, &key);
>> +    if (!vtd_hdev) {
>> +        vtd_iommu_unlock(s);
>> +        return;
>> +    }
>> +
>> +    g_hash_table_remove(s->vtd_host_iommu_dev, &key);
>> +    object_unref(vtd_hdev->dev);
>> +
>> +    vtd_iommu_unlock(s);
>> +}
>> +
>>  /* Unmap the whole range in the notifier's scope. */
>>  static void vtd_address_space_unmap(VTDAddressSpace *as,
>IOMMUNotifier *n)
>>  {
>> @@ -4116,6 +4187,8 @@ static AddressSpace
>*vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>>
>>  static PCIIOMMUOps vtd_iommu_ops = {
>>      .get_address_space = vtd_host_dma_iommu,
>> +    .set_iommu_device = vtd_dev_set_iommu_device,
>> +    .unset_iommu_device = vtd_dev_unset_iommu_device,
>>  };
>>
>>  static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
>> @@ -4235,6 +4308,9 @@ static void vtd_realize(DeviceState *dev, Error
>**errp)
>>                                       g_free, g_free);
>>      s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash,
>vtd_as_equal,
>>                                        g_free, g_free);
>> +    s->vtd_host_iommu_dev = g_hash_table_new_full(vtd_as_hash,
>> +                                                  vtd_as_idev_equal,
>> +                                                  g_free, g_free);
>>      vtd_init(s);
>>      pci_setup_iommu(bus, &vtd_iommu_ops, dev);
>>      /* Pseudo address space under root PCI bus. */
>Thanks
>
>Eric
Duan, Zhenzhong June 4, 2024, 5:46 a.m. UTC | #8
>-----Original Message-----
>From: Eric Auger <eric.auger@redhat.com>
>Subject: Re: [PATCH v6 18/19] intel_iommu: Implement
>[set|unset]_iommu_device() callbacks
>
>
>
>On 6/3/24 08:10, Zhenzhong Duan wrote:
>> From: Yi Liu <yi.l.liu@intel.com>
>>
>> Implement [set|unset]_iommu_device() callbacks in Intel vIOMMU.
>> In set call, a new structure VTDHostIOMMUDevice which holds
>> a reference to HostIOMMUDevice is stored in hash table
>> indexed by PCI BDF.
>>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>>  hw/i386/intel_iommu_internal.h |  9 ++++
>>  include/hw/i386/intel_iommu.h  |  2 +
>>  hw/i386/intel_iommu.c          | 76
>++++++++++++++++++++++++++++++++++
>>  3 files changed, 87 insertions(+)
>>
>> diff --git a/hw/i386/intel_iommu_internal.h
>b/hw/i386/intel_iommu_internal.h
>> index f8cf99bddf..b800d62ca0 100644
>> --- a/hw/i386/intel_iommu_internal.h
>> +++ b/hw/i386/intel_iommu_internal.h
>> @@ -28,6 +28,7 @@
>>  #ifndef HW_I386_INTEL_IOMMU_INTERNAL_H
>>  #define HW_I386_INTEL_IOMMU_INTERNAL_H
>>  #include "hw/i386/intel_iommu.h"
>> +#include "sysemu/host_iommu_device.h"
>>
>>  /*
>>   * Intel IOMMU register specification
>> @@ -537,4 +538,12 @@ typedef struct VTDRootEntry VTDRootEntry;
>>  #define VTD_SL_IGN_COM              0xbff0000000000000ULL
>>  #define VTD_SL_TM                   (1ULL << 62)
>>
>> +
>> +typedef struct VTDHostIOMMUDevice {
>> +    IntelIOMMUState *iommu_state;
>> +    PCIBus *bus;
>> +    uint8_t devfn;
>> +    HostIOMMUDevice *dev;
>> +    QLIST_ENTRY(VTDHostIOMMUDevice) next;
>> +} VTDHostIOMMUDevice;
>>  #endif
>> diff --git a/include/hw/i386/intel_iommu.h
>b/include/hw/i386/intel_iommu.h
>> index 7d694b0813..2bbde41e45 100644
>> --- a/include/hw/i386/intel_iommu.h
>> +++ b/include/hw/i386/intel_iommu.h
>> @@ -293,6 +293,8 @@ struct IntelIOMMUState {
>>      /* list of registered notifiers */
>>      QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;
>>
>> +    GHashTable *vtd_host_iommu_dev;             /* VTDHostIOMMUDevice
>*/
>> +
>>      /* interrupt remapping */
>>      bool intr_enabled;              /* Whether guest enabled IR */
>>      dma_addr_t intr_root;           /* Interrupt remapping table pointer */
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index 519063c8f8..747c988bc4 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -237,6 +237,13 @@ static gboolean vtd_as_equal(gconstpointer v1,
>gconstpointer v2)
>>             (key1->pasid == key2->pasid);
>>  }
>>
>> +static gboolean vtd_as_idev_equal(gconstpointer v1, gconstpointer v2)
>> +{
>> +    const struct vtd_as_key *key1 = v1;
>> +    const struct vtd_as_key *key2 = v2;
>> +
>> +    return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
>> +}
>>  /*
>>   * Note that we use pointer to PCIBus as the key, so hashing/shifting
>>   * based on the pointer value is intended. Note that we deal with
>> @@ -3812,6 +3819,70 @@ VTDAddressSpace
>*vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
>>      return vtd_dev_as;
>>  }
>>
>> +static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int
>devfn,
>> +                                     HostIOMMUDevice *hiod, Error **errp)
>> +{
>> +    IntelIOMMUState *s = opaque;
>> +    VTDHostIOMMUDevice *vtd_hdev;
>> +    struct vtd_as_key key = {
>> +        .bus = bus,
>> +        .devfn = devfn,
>> +    };
>> +    struct vtd_as_key *new_key;
>> +
>> +    assert(hiod);
>> +
>> +    vtd_iommu_lock(s);
>> +
>> +    vtd_hdev = g_hash_table_lookup(s->vtd_host_iommu_dev, &key);
>> +
>> +    if (vtd_hdev) {
>> +        error_setg(errp, "IOMMUFD device already exist");
>> +        vtd_iommu_unlock(s);
>> +        return false;
>> +    }
>> +
>> +    vtd_hdev = g_malloc0(sizeof(VTDHostIOMMUDevice));
>> +    vtd_hdev->bus = bus;
>> +    vtd_hdev->devfn = (uint8_t)devfn;
>> +    vtd_hdev->iommu_state = s;
>> +    vtd_hdev->dev = hiod;
>I am still not totally clear about why we couldn't reuse VTDAddressSpace
>instance for this bus/devid. Is it a matter of aliased versus non
>aliased bus/devfn, or a matter of pasid diff. An AddressSpace could back
>an assigned device in which case a HostIOMMUDevice could be added to
>this latter. I think this should be explained in the commit msg

Yes, as you said, it's a matter of aliased vs non aliased BDF.

VTDAddressSpace is per aliased BDF while VTDHostIOMMUDevice is per non aliased BDF.
There can be multiple assigned devices under same virtual iommu group and share same 
VTDAddressSpace, but they have their own VTDHostIOMMUDevice.

Will refine commit msg.

Thanks
Zhenzhong

>
>Eric
>> +
>> +    new_key = g_malloc(sizeof(*new_key));
>> +    new_key->bus = bus;
>> +    new_key->devfn = devfn;
>> +
>> +    object_ref(hiod);
>> +    g_hash_table_insert(s->vtd_host_iommu_dev, new_key, vtd_hdev);
>> +
>> +    vtd_iommu_unlock(s);
>> +
>> +    return true;
>> +}
>> +
>> +static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque, int
>devfn)
>> +{
>> +    IntelIOMMUState *s = opaque;
>> +    VTDHostIOMMUDevice *vtd_hdev;
>> +    struct vtd_as_key key = {
>> +        .bus = bus,
>> +        .devfn = devfn,
>> +    };
>> +
>> +    vtd_iommu_lock(s);
>> +
>> +    vtd_hdev = g_hash_table_lookup(s->vtd_host_iommu_dev, &key);
>> +    if (!vtd_hdev) {
>> +        vtd_iommu_unlock(s);
>> +        return;
>> +    }
>> +
>> +    g_hash_table_remove(s->vtd_host_iommu_dev, &key);
>> +    object_unref(vtd_hdev->dev);
>> +
>> +    vtd_iommu_unlock(s);
>> +}
>> +
>>  /* Unmap the whole range in the notifier's scope. */
>>  static void vtd_address_space_unmap(VTDAddressSpace *as,
>IOMMUNotifier *n)
>>  {
>> @@ -4116,6 +4187,8 @@ static AddressSpace
>*vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>>
>>  static PCIIOMMUOps vtd_iommu_ops = {
>>      .get_address_space = vtd_host_dma_iommu,
>> +    .set_iommu_device = vtd_dev_set_iommu_device,
>> +    .unset_iommu_device = vtd_dev_unset_iommu_device,
>>  };
>>
>>  static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
>> @@ -4235,6 +4308,9 @@ static void vtd_realize(DeviceState *dev, Error
>**errp)
>>                                       g_free, g_free);
>>      s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash,
>vtd_as_equal,
>>                                        g_free, g_free);
>> +    s->vtd_host_iommu_dev = g_hash_table_new_full(vtd_as_hash,
>> +                                                  vtd_as_idev_equal,
>> +                                                  g_free, g_free);
>>      vtd_init(s);
>>      pci_setup_iommu(bus, &vtd_iommu_ops, dev);
>>      /* Pseudo address space under root PCI bus. */
Eric Auger June 4, 2024, 8:14 a.m. UTC | #9
On 6/4/24 07:40, Duan, Zhenzhong wrote:
>
>> -----Original Message-----
>> From: Eric Auger <eric.auger@redhat.com>
>> Subject: Re: [PATCH v6 18/19] intel_iommu: Implement
>> [set|unset]_iommu_device() callbacks
>>
>> Hi Zhenzhong,
>>
>> On 6/3/24 08:10, Zhenzhong Duan wrote:
>>> From: Yi Liu <yi.l.liu@intel.com>
>>>
>>> Implement [set|unset]_iommu_device() callbacks in Intel vIOMMU.
>>> In set call, a new structure VTDHostIOMMUDevice which holds
>>> a reference to HostIOMMUDevice is stored in hash table
>>> indexed by PCI BDF.
>> maybe precise that this is not the aliased one?
> Sure.
>
>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> ---
>>>  hw/i386/intel_iommu_internal.h |  9 ++++
>>>  include/hw/i386/intel_iommu.h  |  2 +
>>>  hw/i386/intel_iommu.c          | 76
>> ++++++++++++++++++++++++++++++++++
>>>  3 files changed, 87 insertions(+)
>>>
>>> diff --git a/hw/i386/intel_iommu_internal.h
>> b/hw/i386/intel_iommu_internal.h
>>> index f8cf99bddf..b800d62ca0 100644
>>> --- a/hw/i386/intel_iommu_internal.h
>>> +++ b/hw/i386/intel_iommu_internal.h
>>> @@ -28,6 +28,7 @@
>>>  #ifndef HW_I386_INTEL_IOMMU_INTERNAL_H
>>>  #define HW_I386_INTEL_IOMMU_INTERNAL_H
>>>  #include "hw/i386/intel_iommu.h"
>>> +#include "sysemu/host_iommu_device.h"
>>>
>>>  /*
>>>   * Intel IOMMU register specification
>>> @@ -537,4 +538,12 @@ typedef struct VTDRootEntry VTDRootEntry;
>>>  #define VTD_SL_IGN_COM              0xbff0000000000000ULL
>>>  #define VTD_SL_TM                   (1ULL << 62)
>>>
>>> +
>>> +typedef struct VTDHostIOMMUDevice {
>>> +    IntelIOMMUState *iommu_state;
>> Why do you need the iommu_state?
> It is used in nesting series.
>
>>> +    PCIBus *bus;
>>> +    uint8_t devfn;
>>> +    HostIOMMUDevice *dev;
>>> +    QLIST_ENTRY(VTDHostIOMMUDevice) next;
>>> +} VTDHostIOMMUDevice;
>> How VTD specific is it?
> In nesting series, it has element iommu_state and errata
> which are VTD specific.

so at least I would add a comment in the commit message explaining this.


>
>>>  #endif
>>> diff --git a/include/hw/i386/intel_iommu.h
>> b/include/hw/i386/intel_iommu.h
>>> index 7d694b0813..2bbde41e45 100644
>>> --- a/include/hw/i386/intel_iommu.h
>>> +++ b/include/hw/i386/intel_iommu.h
>>> @@ -293,6 +293,8 @@ struct IntelIOMMUState {
>>>      /* list of registered notifiers */
>>>      QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;
>>>
>>> +    GHashTable *vtd_host_iommu_dev;             /* VTDHostIOMMUDevice
>> */
>>> +
>>>      /* interrupt remapping */
>>>      bool intr_enabled;              /* Whether guest enabled IR */
>>>      dma_addr_t intr_root;           /* Interrupt remapping table pointer */
>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>> index 519063c8f8..747c988bc4 100644
>>> --- a/hw/i386/intel_iommu.c
>>> +++ b/hw/i386/intel_iommu.c
>>> @@ -237,6 +237,13 @@ static gboolean vtd_as_equal(gconstpointer v1,
>> gconstpointer v2)
>>>             (key1->pasid == key2->pasid);
>>>  }
>>>
>>> +static gboolean vtd_as_idev_equal(gconstpointer v1, gconstpointer v2)
>>> +{
>>> +    const struct vtd_as_key *key1 = v1;
>>> +    const struct vtd_as_key *key2 = v2;
>>> +
>>> +    return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
>>> +}
>> can't you reuse the key with pasid?
> s->vtd_host_iommu_dev isn't indexed by pasid but only BDF.
> Maybe I'd better to define its own key struct, hash() and equal() functions.
you could set a default pasid. But up to you

Eric
>
>>>  /*
>>>   * Note that we use pointer to PCIBus as the key, so hashing/shifting
>>>   * based on the pointer value is intended. Note that we deal with
>>> @@ -3812,6 +3819,70 @@ VTDAddressSpace
>> *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
>>>      return vtd_dev_as;
>>>  }
>>>
>>> +static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int
>> devfn,
>>> +                                     HostIOMMUDevice *hiod, Error **errp)
>>> +{
>>> +    IntelIOMMUState *s = opaque;
>>> +    VTDHostIOMMUDevice *vtd_hdev;
>>> +    struct vtd_as_key key = {
>>> +        .bus = bus,
>>> +        .devfn = devfn,
>>> +    };
>>> +    struct vtd_as_key *new_key;
>>> +
>>> +    assert(hiod);
>>> +
>>> +    vtd_iommu_lock(s);
>>> +
>>> +    vtd_hdev = g_hash_table_lookup(s->vtd_host_iommu_dev, &key);
>>> +
>>> +    if (vtd_hdev) {
>>> +        error_setg(errp, "IOMMUFD device already exist");
>> s/IOMMUFD/Host IOMMU device?
> Good catch, will fix.
>
> Thanks
> Zhenzhong
>
>>> +        vtd_iommu_unlock(s);
>>> +        return false;
>>> +    }
>>> +
>>> +    vtd_hdev = g_malloc0(sizeof(VTDHostIOMMUDevice));
>>> +    vtd_hdev->bus = bus;
>>> +    vtd_hdev->devfn = (uint8_t)devfn;
>>> +    vtd_hdev->iommu_state = s;
>>> +    vtd_hdev->dev = hiod;
>>> +
>>> +    new_key = g_malloc(sizeof(*new_key));
>>> +    new_key->bus = bus;
>>> +    new_key->devfn = devfn;
>>> +
>>> +    object_ref(hiod);
>>> +    g_hash_table_insert(s->vtd_host_iommu_dev, new_key, vtd_hdev);
>>> +
>>> +    vtd_iommu_unlock(s);
>>> +
>>> +    return true;
>>> +}
>>> +
>>> +static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque, int
>> devfn)
>>> +{
>>> +    IntelIOMMUState *s = opaque;
>>> +    VTDHostIOMMUDevice *vtd_hdev;
>>> +    struct vtd_as_key key = {
>>> +        .bus = bus,
>>> +        .devfn = devfn,
>>> +    };
>>> +
>>> +    vtd_iommu_lock(s);
>>> +
>>> +    vtd_hdev = g_hash_table_lookup(s->vtd_host_iommu_dev, &key);
>>> +    if (!vtd_hdev) {
>>> +        vtd_iommu_unlock(s);
>>> +        return;
>>> +    }
>>> +
>>> +    g_hash_table_remove(s->vtd_host_iommu_dev, &key);
>>> +    object_unref(vtd_hdev->dev);
>>> +
>>> +    vtd_iommu_unlock(s);
>>> +}
>>> +
>>>  /* Unmap the whole range in the notifier's scope. */
>>>  static void vtd_address_space_unmap(VTDAddressSpace *as,
>> IOMMUNotifier *n)
>>>  {
>>> @@ -4116,6 +4187,8 @@ static AddressSpace
>> *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>>>  static PCIIOMMUOps vtd_iommu_ops = {
>>>      .get_address_space = vtd_host_dma_iommu,
>>> +    .set_iommu_device = vtd_dev_set_iommu_device,
>>> +    .unset_iommu_device = vtd_dev_unset_iommu_device,
>>>  };
>>>
>>>  static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
>>> @@ -4235,6 +4308,9 @@ static void vtd_realize(DeviceState *dev, Error
>> **errp)
>>>                                       g_free, g_free);
>>>      s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash,
>> vtd_as_equal,
>>>                                        g_free, g_free);
>>> +    s->vtd_host_iommu_dev = g_hash_table_new_full(vtd_as_hash,
>>> +                                                  vtd_as_idev_equal,
>>> +                                                  g_free, g_free);
>>>      vtd_init(s);
>>>      pci_setup_iommu(bus, &vtd_iommu_ops, dev);
>>>      /* Pseudo address space under root PCI bus. */
>> Thanks
>>
>> Eric
Eric Auger June 4, 2024, 8:17 a.m. UTC | #10
On 6/4/24 07:46, Duan, Zhenzhong wrote:
>
>> -----Original Message-----
>> From: Eric Auger <eric.auger@redhat.com>
>> Subject: Re: [PATCH v6 18/19] intel_iommu: Implement
>> [set|unset]_iommu_device() callbacks
>>
>>
>>
>> On 6/3/24 08:10, Zhenzhong Duan wrote:
>>> From: Yi Liu <yi.l.liu@intel.com>
>>>
>>> Implement [set|unset]_iommu_device() callbacks in Intel vIOMMU.
>>> In set call, a new structure VTDHostIOMMUDevice which holds
>>> a reference to HostIOMMUDevice is stored in hash table
>>> indexed by PCI BDF.
>>>
>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> ---
>>>  hw/i386/intel_iommu_internal.h |  9 ++++
>>>  include/hw/i386/intel_iommu.h  |  2 +
>>>  hw/i386/intel_iommu.c          | 76
>> ++++++++++++++++++++++++++++++++++
>>>  3 files changed, 87 insertions(+)
>>>
>>> diff --git a/hw/i386/intel_iommu_internal.h
>> b/hw/i386/intel_iommu_internal.h
>>> index f8cf99bddf..b800d62ca0 100644
>>> --- a/hw/i386/intel_iommu_internal.h
>>> +++ b/hw/i386/intel_iommu_internal.h
>>> @@ -28,6 +28,7 @@
>>>  #ifndef HW_I386_INTEL_IOMMU_INTERNAL_H
>>>  #define HW_I386_INTEL_IOMMU_INTERNAL_H
>>>  #include "hw/i386/intel_iommu.h"
>>> +#include "sysemu/host_iommu_device.h"
>>>
>>>  /*
>>>   * Intel IOMMU register specification
>>> @@ -537,4 +538,12 @@ typedef struct VTDRootEntry VTDRootEntry;
>>>  #define VTD_SL_IGN_COM              0xbff0000000000000ULL
>>>  #define VTD_SL_TM                   (1ULL << 62)
>>>
>>> +
>>> +typedef struct VTDHostIOMMUDevice {
>>> +    IntelIOMMUState *iommu_state;
>>> +    PCIBus *bus;
>>> +    uint8_t devfn;
>>> +    HostIOMMUDevice *dev;
>>> +    QLIST_ENTRY(VTDHostIOMMUDevice) next;
>>> +} VTDHostIOMMUDevice;
>>>  #endif
>>> diff --git a/include/hw/i386/intel_iommu.h
>> b/include/hw/i386/intel_iommu.h
>>> index 7d694b0813..2bbde41e45 100644
>>> --- a/include/hw/i386/intel_iommu.h
>>> +++ b/include/hw/i386/intel_iommu.h
>>> @@ -293,6 +293,8 @@ struct IntelIOMMUState {
>>>      /* list of registered notifiers */
>>>      QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;
>>>
>>> +    GHashTable *vtd_host_iommu_dev;             /* VTDHostIOMMUDevice
>> */
>>> +
>>>      /* interrupt remapping */
>>>      bool intr_enabled;              /* Whether guest enabled IR */
>>>      dma_addr_t intr_root;           /* Interrupt remapping table pointer */
>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>> index 519063c8f8..747c988bc4 100644
>>> --- a/hw/i386/intel_iommu.c
>>> +++ b/hw/i386/intel_iommu.c
>>> @@ -237,6 +237,13 @@ static gboolean vtd_as_equal(gconstpointer v1,
>> gconstpointer v2)
>>>             (key1->pasid == key2->pasid);
>>>  }
>>>
>>> +static gboolean vtd_as_idev_equal(gconstpointer v1, gconstpointer v2)
>>> +{
>>> +    const struct vtd_as_key *key1 = v1;
>>> +    const struct vtd_as_key *key2 = v2;
>>> +
>>> +    return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
>>> +}
>>>  /*
>>>   * Note that we use pointer to PCIBus as the key, so hashing/shifting
>>>   * based on the pointer value is intended. Note that we deal with
>>> @@ -3812,6 +3819,70 @@ VTDAddressSpace
>> *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
>>>      return vtd_dev_as;
>>>  }
>>>
>>> +static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int
>> devfn,
>>> +                                     HostIOMMUDevice *hiod, Error **errp)
>>> +{
>>> +    IntelIOMMUState *s = opaque;
>>> +    VTDHostIOMMUDevice *vtd_hdev;
>>> +    struct vtd_as_key key = {
>>> +        .bus = bus,
>>> +        .devfn = devfn,
>>> +    };
>>> +    struct vtd_as_key *new_key;
>>> +
>>> +    assert(hiod);
>>> +
>>> +    vtd_iommu_lock(s);
>>> +
>>> +    vtd_hdev = g_hash_table_lookup(s->vtd_host_iommu_dev, &key);
>>> +
>>> +    if (vtd_hdev) {
>>> +        error_setg(errp, "IOMMUFD device already exist");
>>> +        vtd_iommu_unlock(s);
>>> +        return false;
>>> +    }
>>> +
>>> +    vtd_hdev = g_malloc0(sizeof(VTDHostIOMMUDevice));
>>> +    vtd_hdev->bus = bus;
>>> +    vtd_hdev->devfn = (uint8_t)devfn;
>>> +    vtd_hdev->iommu_state = s;
>>> +    vtd_hdev->dev = hiod;
>> I am still not totally clear about why we couldn't reuse VTDAddressSpace
>> instance for this bus/devid. Is it a matter of aliased versus non
>> aliased bus/devfn, or a matter of pasid diff. An AddressSpace could back
>> an assigned device in which case a HostIOMMUDevice could be added to
>> this latter. I think this should be explained in the commit msg
> Yes, as you said, it's a matter of aliased vs non aliased BDF.
>
> VTDAddressSpace is per aliased BDF while VTDHostIOMMUDevice is per non aliased BDF.
> There can be multiple assigned devices under same virtual iommu group and share same 
> VTDAddressSpace, but they have their own VTDHostIOMMUDevice.
>
> Will refine commit msg.

OK thank you for the confirmation. A general concern is this is the kind
of code we are going to duplicate in each vIOMMU. This is beyond the
scope of this series but we shall really think about introducing a
common base object for vIOMMU. Unfortunately there are issues related to
multiple inheritence that may prevent us from using usual QOM
inheritence but just as we have done for backends and HostIOMMUDevice we
may implement inheritence another way.

Eric
>
> Thanks
> Zhenzhong
>
>> Eric
>>> +
>>> +    new_key = g_malloc(sizeof(*new_key));
>>> +    new_key->bus = bus;
>>> +    new_key->devfn = devfn;
>>> +
>>> +    object_ref(hiod);
>>> +    g_hash_table_insert(s->vtd_host_iommu_dev, new_key, vtd_hdev);
>>> +
>>> +    vtd_iommu_unlock(s);
>>> +
>>> +    return true;
>>> +}
>>> +
>>> +static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque, int
>> devfn)
>>> +{
>>> +    IntelIOMMUState *s = opaque;
>>> +    VTDHostIOMMUDevice *vtd_hdev;
>>> +    struct vtd_as_key key = {
>>> +        .bus = bus,
>>> +        .devfn = devfn,
>>> +    };
>>> +
>>> +    vtd_iommu_lock(s);
>>> +
>>> +    vtd_hdev = g_hash_table_lookup(s->vtd_host_iommu_dev, &key);
>>> +    if (!vtd_hdev) {
>>> +        vtd_iommu_unlock(s);
>>> +        return;
>>> +    }
>>> +
>>> +    g_hash_table_remove(s->vtd_host_iommu_dev, &key);
>>> +    object_unref(vtd_hdev->dev);
>>> +
>>> +    vtd_iommu_unlock(s);
>>> +}
>>> +
>>>  /* Unmap the whole range in the notifier's scope. */
>>>  static void vtd_address_space_unmap(VTDAddressSpace *as,
>> IOMMUNotifier *n)
>>>  {
>>> @@ -4116,6 +4187,8 @@ static AddressSpace
>> *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>>>  static PCIIOMMUOps vtd_iommu_ops = {
>>>      .get_address_space = vtd_host_dma_iommu,
>>> +    .set_iommu_device = vtd_dev_set_iommu_device,
>>> +    .unset_iommu_device = vtd_dev_unset_iommu_device,
>>>  };
>>>
>>>  static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
>>> @@ -4235,6 +4308,9 @@ static void vtd_realize(DeviceState *dev, Error
>> **errp)
>>>                                       g_free, g_free);
>>>      s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash,
>> vtd_as_equal,
>>>                                        g_free, g_free);
>>> +    s->vtd_host_iommu_dev = g_hash_table_new_full(vtd_as_hash,
>>> +                                                  vtd_as_idev_equal,
>>> +                                                  g_free, g_free);
>>>      vtd_init(s);
>>>      pci_setup_iommu(bus, &vtd_iommu_ops, dev);
>>>      /* Pseudo address space under root PCI bus. */
Duan, Zhenzhong June 4, 2024, 8:48 a.m. UTC | #11
>-----Original Message-----
>From: Eric Auger <eric.auger@redhat.com>
>Subject: Re: [PATCH v6 18/19] intel_iommu: Implement
>[set|unset]_iommu_device() callbacks
>
>
>
>On 6/4/24 07:40, Duan, Zhenzhong wrote:
>>
>>> -----Original Message-----
>>> From: Eric Auger <eric.auger@redhat.com>
>>> Subject: Re: [PATCH v6 18/19] intel_iommu: Implement
>>> [set|unset]_iommu_device() callbacks
>>>
>>> Hi Zhenzhong,
>>>
>>> On 6/3/24 08:10, Zhenzhong Duan wrote:
>>>> From: Yi Liu <yi.l.liu@intel.com>
>>>>
>>>> Implement [set|unset]_iommu_device() callbacks in Intel vIOMMU.
>>>> In set call, a new structure VTDHostIOMMUDevice which holds
>>>> a reference to HostIOMMUDevice is stored in hash table
>>>> indexed by PCI BDF.
>>> maybe precise that this is not the aliased one?
>> Sure.
>>
>>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>> ---
>>>>  hw/i386/intel_iommu_internal.h |  9 ++++
>>>>  include/hw/i386/intel_iommu.h  |  2 +
>>>>  hw/i386/intel_iommu.c          | 76
>>> ++++++++++++++++++++++++++++++++++
>>>>  3 files changed, 87 insertions(+)
>>>>
>>>> diff --git a/hw/i386/intel_iommu_internal.h
>>> b/hw/i386/intel_iommu_internal.h
>>>> index f8cf99bddf..b800d62ca0 100644
>>>> --- a/hw/i386/intel_iommu_internal.h
>>>> +++ b/hw/i386/intel_iommu_internal.h
>>>> @@ -28,6 +28,7 @@
>>>>  #ifndef HW_I386_INTEL_IOMMU_INTERNAL_H
>>>>  #define HW_I386_INTEL_IOMMU_INTERNAL_H
>>>>  #include "hw/i386/intel_iommu.h"
>>>> +#include "sysemu/host_iommu_device.h"
>>>>
>>>>  /*
>>>>   * Intel IOMMU register specification
>>>> @@ -537,4 +538,12 @@ typedef struct VTDRootEntry VTDRootEntry;
>>>>  #define VTD_SL_IGN_COM              0xbff0000000000000ULL
>>>>  #define VTD_SL_TM                   (1ULL << 62)
>>>>
>>>> +
>>>> +typedef struct VTDHostIOMMUDevice {
>>>> +    IntelIOMMUState *iommu_state;
>>> Why do you need the iommu_state?
>> It is used in nesting series.
>>
>>>> +    PCIBus *bus;
>>>> +    uint8_t devfn;
>>>> +    HostIOMMUDevice *dev;
>>>> +    QLIST_ENTRY(VTDHostIOMMUDevice) next;
>>>> +} VTDHostIOMMUDevice;
>>> How VTD specific is it?
>> In nesting series, it has element iommu_state and errata
>> which are VTD specific.
>
>so at least I would add a comment in the commit message explaining this.

I'd like to drop it and reintroduce it in nesting series.

Thanks
Zhenzhong
Eric Auger June 4, 2024, 9:38 a.m. UTC | #12
On 6/4/24 10:48, Duan, Zhenzhong wrote:
>
>> -----Original Message-----
>> From: Eric Auger <eric.auger@redhat.com>
>> Subject: Re: [PATCH v6 18/19] intel_iommu: Implement
>> [set|unset]_iommu_device() callbacks
>>
>>
>>
>> On 6/4/24 07:40, Duan, Zhenzhong wrote:
>>>> -----Original Message-----
>>>> From: Eric Auger <eric.auger@redhat.com>
>>>> Subject: Re: [PATCH v6 18/19] intel_iommu: Implement
>>>> [set|unset]_iommu_device() callbacks
>>>>
>>>> Hi Zhenzhong,
>>>>
>>>> On 6/3/24 08:10, Zhenzhong Duan wrote:
>>>>> From: Yi Liu <yi.l.liu@intel.com>
>>>>>
>>>>> Implement [set|unset]_iommu_device() callbacks in Intel vIOMMU.
>>>>> In set call, a new structure VTDHostIOMMUDevice which holds
>>>>> a reference to HostIOMMUDevice is stored in hash table
>>>>> indexed by PCI BDF.
>>>> maybe precise that this is not the aliased one?
>>> Sure.
>>>
>>>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>>>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>>>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>>> ---
>>>>>  hw/i386/intel_iommu_internal.h |  9 ++++
>>>>>  include/hw/i386/intel_iommu.h  |  2 +
>>>>>  hw/i386/intel_iommu.c          | 76
>>>> ++++++++++++++++++++++++++++++++++
>>>>>  3 files changed, 87 insertions(+)
>>>>>
>>>>> diff --git a/hw/i386/intel_iommu_internal.h
>>>> b/hw/i386/intel_iommu_internal.h
>>>>> index f8cf99bddf..b800d62ca0 100644
>>>>> --- a/hw/i386/intel_iommu_internal.h
>>>>> +++ b/hw/i386/intel_iommu_internal.h
>>>>> @@ -28,6 +28,7 @@
>>>>>  #ifndef HW_I386_INTEL_IOMMU_INTERNAL_H
>>>>>  #define HW_I386_INTEL_IOMMU_INTERNAL_H
>>>>>  #include "hw/i386/intel_iommu.h"
>>>>> +#include "sysemu/host_iommu_device.h"
>>>>>
>>>>>  /*
>>>>>   * Intel IOMMU register specification
>>>>> @@ -537,4 +538,12 @@ typedef struct VTDRootEntry VTDRootEntry;
>>>>>  #define VTD_SL_IGN_COM              0xbff0000000000000ULL
>>>>>  #define VTD_SL_TM                   (1ULL << 62)
>>>>>
>>>>> +
>>>>> +typedef struct VTDHostIOMMUDevice {
>>>>> +    IntelIOMMUState *iommu_state;
>>>> Why do you need the iommu_state?
>>> It is used in nesting series.
>>>
>>>>> +    PCIBus *bus;
>>>>> +    uint8_t devfn;
>>>>> +    HostIOMMUDevice *dev;
>>>>> +    QLIST_ENTRY(VTDHostIOMMUDevice) next;
>>>>> +} VTDHostIOMMUDevice;
>>>> How VTD specific is it?
>>> In nesting series, it has element iommu_state and errata
>>> which are VTD specific.
>> so at least I would add a comment in the commit message explaining this.
> I'd like to drop it and reintroduce it in nesting series.

OK then just justify the choice of introducing another struct  because
other VTD specific fields will be introduced later on

Eric
>
> Thanks
> Zhenzhong
>
Duan, Zhenzhong June 6, 2024, 4:04 a.m. UTC | #13
>-----Original Message-----
>From: Eric Auger <eric.auger@redhat.com>
>Subject: Re: [PATCH v6 18/19] intel_iommu: Implement
>[set|unset]_iommu_device() callbacks
>
>
>
>On 6/4/24 07:46, Duan, Zhenzhong wrote:
>>
>>> -----Original Message-----
>>> From: Eric Auger <eric.auger@redhat.com>
>>> Subject: Re: [PATCH v6 18/19] intel_iommu: Implement
>>> [set|unset]_iommu_device() callbacks
>>>
>>>
>>>
>>> On 6/3/24 08:10, Zhenzhong Duan wrote:
>>>> From: Yi Liu <yi.l.liu@intel.com>
>>>>
>>>> Implement [set|unset]_iommu_device() callbacks in Intel vIOMMU.
>>>> In set call, a new structure VTDHostIOMMUDevice which holds
>>>> a reference to HostIOMMUDevice is stored in hash table
>>>> indexed by PCI BDF.
>>>>
>>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>> ---
>>>>  hw/i386/intel_iommu_internal.h |  9 ++++
>>>>  include/hw/i386/intel_iommu.h  |  2 +
>>>>  hw/i386/intel_iommu.c          | 76
>>> ++++++++++++++++++++++++++++++++++
>>>>  3 files changed, 87 insertions(+)
>>>>
>>>> diff --git a/hw/i386/intel_iommu_internal.h
>>> b/hw/i386/intel_iommu_internal.h
>>>> index f8cf99bddf..b800d62ca0 100644
>>>> --- a/hw/i386/intel_iommu_internal.h
>>>> +++ b/hw/i386/intel_iommu_internal.h
>>>> @@ -28,6 +28,7 @@
>>>>  #ifndef HW_I386_INTEL_IOMMU_INTERNAL_H
>>>>  #define HW_I386_INTEL_IOMMU_INTERNAL_H
>>>>  #include "hw/i386/intel_iommu.h"
>>>> +#include "sysemu/host_iommu_device.h"
>>>>
>>>>  /*
>>>>   * Intel IOMMU register specification
>>>> @@ -537,4 +538,12 @@ typedef struct VTDRootEntry VTDRootEntry;
>>>>  #define VTD_SL_IGN_COM              0xbff0000000000000ULL
>>>>  #define VTD_SL_TM                   (1ULL << 62)
>>>>
>>>> +
>>>> +typedef struct VTDHostIOMMUDevice {
>>>> +    IntelIOMMUState *iommu_state;
>>>> +    PCIBus *bus;
>>>> +    uint8_t devfn;
>>>> +    HostIOMMUDevice *dev;
>>>> +    QLIST_ENTRY(VTDHostIOMMUDevice) next;
>>>> +} VTDHostIOMMUDevice;
>>>>  #endif
>>>> diff --git a/include/hw/i386/intel_iommu.h
>>> b/include/hw/i386/intel_iommu.h
>>>> index 7d694b0813..2bbde41e45 100644
>>>> --- a/include/hw/i386/intel_iommu.h
>>>> +++ b/include/hw/i386/intel_iommu.h
>>>> @@ -293,6 +293,8 @@ struct IntelIOMMUState {
>>>>      /* list of registered notifiers */
>>>>      QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;
>>>>
>>>> +    GHashTable *vtd_host_iommu_dev;             /*
>VTDHostIOMMUDevice
>>> */
>>>> +
>>>>      /* interrupt remapping */
>>>>      bool intr_enabled;              /* Whether guest enabled IR */
>>>>      dma_addr_t intr_root;           /* Interrupt remapping table pointer */
>>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>>> index 519063c8f8..747c988bc4 100644
>>>> --- a/hw/i386/intel_iommu.c
>>>> +++ b/hw/i386/intel_iommu.c
>>>> @@ -237,6 +237,13 @@ static gboolean vtd_as_equal(gconstpointer
>v1,
>>> gconstpointer v2)
>>>>             (key1->pasid == key2->pasid);
>>>>  }
>>>>
>>>> +static gboolean vtd_as_idev_equal(gconstpointer v1, gconstpointer v2)
>>>> +{
>>>> +    const struct vtd_as_key *key1 = v1;
>>>> +    const struct vtd_as_key *key2 = v2;
>>>> +
>>>> +    return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
>>>> +}
>>>>  /*
>>>>   * Note that we use pointer to PCIBus as the key, so hashing/shifting
>>>>   * based on the pointer value is intended. Note that we deal with
>>>> @@ -3812,6 +3819,70 @@ VTDAddressSpace
>>> *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
>>>>      return vtd_dev_as;
>>>>  }
>>>>
>>>> +static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int
>>> devfn,
>>>> +                                     HostIOMMUDevice *hiod, Error **errp)
>>>> +{
>>>> +    IntelIOMMUState *s = opaque;
>>>> +    VTDHostIOMMUDevice *vtd_hdev;
>>>> +    struct vtd_as_key key = {
>>>> +        .bus = bus,
>>>> +        .devfn = devfn,
>>>> +    };
>>>> +    struct vtd_as_key *new_key;
>>>> +
>>>> +    assert(hiod);
>>>> +
>>>> +    vtd_iommu_lock(s);
>>>> +
>>>> +    vtd_hdev = g_hash_table_lookup(s->vtd_host_iommu_dev, &key);
>>>> +
>>>> +    if (vtd_hdev) {
>>>> +        error_setg(errp, "IOMMUFD device already exist");
>>>> +        vtd_iommu_unlock(s);
>>>> +        return false;
>>>> +    }
>>>> +
>>>> +    vtd_hdev = g_malloc0(sizeof(VTDHostIOMMUDevice));
>>>> +    vtd_hdev->bus = bus;
>>>> +    vtd_hdev->devfn = (uint8_t)devfn;
>>>> +    vtd_hdev->iommu_state = s;
>>>> +    vtd_hdev->dev = hiod;
>>> I am still not totally clear about why we couldn't reuse VTDAddressSpace
>>> instance for this bus/devid. Is it a matter of aliased versus non
>>> aliased bus/devfn, or a matter of pasid diff. An AddressSpace could back
>>> an assigned device in which case a HostIOMMUDevice could be added to
>>> this latter. I think this should be explained in the commit msg
>> Yes, as you said, it's a matter of aliased vs non aliased BDF.
>>
>> VTDAddressSpace is per aliased BDF while VTDHostIOMMUDevice is per
>non aliased BDF.
>> There can be multiple assigned devices under same virtual iommu group
>and share same
>> VTDAddressSpace, but they have their own VTDHostIOMMUDevice.
>>
>> Will refine commit msg.
>
>OK thank you for the confirmation. A general concern is this is the kind
>of code we are going to duplicate in each vIOMMU.

The hash table code can be common, but will we have other duplicate code?
I feel most of the codes are VTD specific.

> This is beyond the
>scope of this series but we shall really think about introducing a
>common base object for vIOMMU. Unfortunately there are issues related to
>multiple inheritence that may prevent us from using usual QOM
>inheritence but just as we have done for backends and HostIOMMUDevice
>we may implement inheritence another way.

Yes, virtio-iommu is different from others, it inherits from TYPE_VIRTIO_DEVICE.
Not get about the another way, could you explain a bit?

Thanks
Zhenzhong
diff mbox series

Patch

diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index f8cf99bddf..b800d62ca0 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -28,6 +28,7 @@ 
 #ifndef HW_I386_INTEL_IOMMU_INTERNAL_H
 #define HW_I386_INTEL_IOMMU_INTERNAL_H
 #include "hw/i386/intel_iommu.h"
+#include "sysemu/host_iommu_device.h"
 
 /*
  * Intel IOMMU register specification
@@ -537,4 +538,12 @@  typedef struct VTDRootEntry VTDRootEntry;
 #define VTD_SL_IGN_COM              0xbff0000000000000ULL
 #define VTD_SL_TM                   (1ULL << 62)
 
+
+typedef struct VTDHostIOMMUDevice {
+    IntelIOMMUState *iommu_state;
+    PCIBus *bus;
+    uint8_t devfn;
+    HostIOMMUDevice *dev;
+    QLIST_ENTRY(VTDHostIOMMUDevice) next;
+} VTDHostIOMMUDevice;
 #endif
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 7d694b0813..2bbde41e45 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -293,6 +293,8 @@  struct IntelIOMMUState {
     /* list of registered notifiers */
     QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;
 
+    GHashTable *vtd_host_iommu_dev;             /* VTDHostIOMMUDevice */
+
     /* interrupt remapping */
     bool intr_enabled;              /* Whether guest enabled IR */
     dma_addr_t intr_root;           /* Interrupt remapping table pointer */
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 519063c8f8..747c988bc4 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -237,6 +237,13 @@  static gboolean vtd_as_equal(gconstpointer v1, gconstpointer v2)
            (key1->pasid == key2->pasid);
 }
 
+static gboolean vtd_as_idev_equal(gconstpointer v1, gconstpointer v2)
+{
+    const struct vtd_as_key *key1 = v1;
+    const struct vtd_as_key *key2 = v2;
+
+    return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
+}
 /*
  * Note that we use pointer to PCIBus as the key, so hashing/shifting
  * based on the pointer value is intended. Note that we deal with
@@ -3812,6 +3819,70 @@  VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
     return vtd_dev_as;
 }
 
+static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
+                                     HostIOMMUDevice *hiod, Error **errp)
+{
+    IntelIOMMUState *s = opaque;
+    VTDHostIOMMUDevice *vtd_hdev;
+    struct vtd_as_key key = {
+        .bus = bus,
+        .devfn = devfn,
+    };
+    struct vtd_as_key *new_key;
+
+    assert(hiod);
+
+    vtd_iommu_lock(s);
+
+    vtd_hdev = g_hash_table_lookup(s->vtd_host_iommu_dev, &key);
+
+    if (vtd_hdev) {
+        error_setg(errp, "IOMMUFD device already exist");
+        vtd_iommu_unlock(s);
+        return false;
+    }
+
+    vtd_hdev = g_malloc0(sizeof(VTDHostIOMMUDevice));
+    vtd_hdev->bus = bus;
+    vtd_hdev->devfn = (uint8_t)devfn;
+    vtd_hdev->iommu_state = s;
+    vtd_hdev->dev = hiod;
+
+    new_key = g_malloc(sizeof(*new_key));
+    new_key->bus = bus;
+    new_key->devfn = devfn;
+
+    object_ref(hiod);
+    g_hash_table_insert(s->vtd_host_iommu_dev, new_key, vtd_hdev);
+
+    vtd_iommu_unlock(s);
+
+    return true;
+}
+
+static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque, int devfn)
+{
+    IntelIOMMUState *s = opaque;
+    VTDHostIOMMUDevice *vtd_hdev;
+    struct vtd_as_key key = {
+        .bus = bus,
+        .devfn = devfn,
+    };
+
+    vtd_iommu_lock(s);
+
+    vtd_hdev = g_hash_table_lookup(s->vtd_host_iommu_dev, &key);
+    if (!vtd_hdev) {
+        vtd_iommu_unlock(s);
+        return;
+    }
+
+    g_hash_table_remove(s->vtd_host_iommu_dev, &key);
+    object_unref(vtd_hdev->dev);
+
+    vtd_iommu_unlock(s);
+}
+
 /* Unmap the whole range in the notifier's scope. */
 static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
 {
@@ -4116,6 +4187,8 @@  static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
 
 static PCIIOMMUOps vtd_iommu_ops = {
     .get_address_space = vtd_host_dma_iommu,
+    .set_iommu_device = vtd_dev_set_iommu_device,
+    .unset_iommu_device = vtd_dev_unset_iommu_device,
 };
 
 static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
@@ -4235,6 +4308,9 @@  static void vtd_realize(DeviceState *dev, Error **errp)
                                      g_free, g_free);
     s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal,
                                       g_free, g_free);
+    s->vtd_host_iommu_dev = g_hash_table_new_full(vtd_as_hash,
+                                                  vtd_as_idev_equal,
+                                                  g_free, g_free);
     vtd_init(s);
     pci_setup_iommu(bus, &vtd_iommu_ops, dev);
     /* Pseudo address space under root PCI bus. */