diff mbox series

[v4,04/17] intel_iommu: Flush stage-2 cache in PASID-selective PASID-based iotlb invalidation

Message ID 20240930092631.2997543-5-zhenzhong.duan@intel.com
State New
Headers show
Series intel_iommu: Enable stage-1 translation for emulated device | expand

Commit Message

Duan, Zhenzhong Sept. 30, 2024, 9:26 a.m. UTC
Per spec 6.5.2.4, PADID-selective PASID-based iotlb invalidation will
flush stage-2 iotlb entries with matching domain id and pasid.

With scalable modern mode introduced, guest could send PASID-selective
PASID-based iotlb invalidation to flush both stage-1 and stage-2 entries.

By this chance, remove old IOTLB related definitions which were unused.

Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Clément Mathieu--Drif<clement.mathieu--drif@eviden.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
 hw/i386/intel_iommu_internal.h | 14 ++++--
 hw/i386/intel_iommu.c          | 88 +++++++++++++++++++++++++++++++++-
 2 files changed, 96 insertions(+), 6 deletions(-)

Comments

Yi Liu Nov. 4, 2024, 2:49 a.m. UTC | #1
On 2024/9/30 17:26, Zhenzhong Duan wrote:
> Per spec 6.5.2.4, PADID-selective PASID-based iotlb invalidation will
> flush stage-2 iotlb entries with matching domain id and pasid.

Also, call out it's per table Table 21. PASID-based-IOTLB Invalidation of
VT-d spec 4.1.

> With scalable modern mode introduced, guest could send PASID-selective
> PASID-based iotlb invalidation to flush both stage-1 and stage-2 entries.
> 
> By this chance, remove old IOTLB related definitions which were unused.


> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> Reviewed-by: Clément Mathieu--Drif<clement.mathieu--drif@eviden.com>
> Acked-by: Jason Wang <jasowang@redhat.com>
> ---
>   hw/i386/intel_iommu_internal.h | 14 ++++--
>   hw/i386/intel_iommu.c          | 88 +++++++++++++++++++++++++++++++++-
>   2 files changed, 96 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index d0f9d4589d..eec8090190 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -403,11 +403,6 @@ typedef union VTDInvDesc VTDInvDesc;
>   #define VTD_INV_DESC_IOTLB_AM(val)      ((val) & 0x3fULL)
>   #define VTD_INV_DESC_IOTLB_RSVD_LO      0xffffffff0000f100ULL
>   #define VTD_INV_DESC_IOTLB_RSVD_HI      0xf80ULL
> -#define VTD_INV_DESC_IOTLB_PASID_PASID  (2ULL << 4)
> -#define VTD_INV_DESC_IOTLB_PASID_PAGE   (3ULL << 4)
> -#define VTD_INV_DESC_IOTLB_PASID(val)   (((val) >> 32) & VTD_PASID_ID_MASK)
> -#define VTD_INV_DESC_IOTLB_PASID_RSVD_LO      0xfff00000000001c0ULL
> -#define VTD_INV_DESC_IOTLB_PASID_RSVD_HI      0xf80ULL
>   
>   /* Mask for Device IOTLB Invalidate Descriptor */
>   #define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) & 0xfffffffffffff000ULL)
> @@ -433,6 +428,15 @@ typedef union VTDInvDesc VTDInvDesc;
>   #define VTD_SPTE_LPAGE_L3_RSVD_MASK(aw) \
>           (0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
>   
> +/* Masks for PIOTLB Invalidate Descriptor */
> +#define VTD_INV_DESC_PIOTLB_G             (3ULL << 4)
> +#define VTD_INV_DESC_PIOTLB_ALL_IN_PASID  (2ULL << 4)
> +#define VTD_INV_DESC_PIOTLB_PSI_IN_PASID  (3ULL << 4)
> +#define VTD_INV_DESC_PIOTLB_DID(val)      (((val) >> 16) & VTD_DOMAIN_ID_MASK)
> +#define VTD_INV_DESC_PIOTLB_PASID(val)    (((val) >> 32) & 0xfffffULL)
> +#define VTD_INV_DESC_PIOTLB_RSVD_VAL0     0xfff000000000f1c0ULL
> +#define VTD_INV_DESC_PIOTLB_RSVD_VAL1     0xf80ULL
> +
>   /* Information about page-selective IOTLB invalidate */
>   struct VTDIOTLBPageInvInfo {
>       uint16_t domain_id;
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 9e6ef0cb99..72c9c91d4f 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -2656,6 +2656,86 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
>       return true;
>   }
>   
> +static gboolean vtd_hash_remove_by_pasid(gpointer key, gpointer value,
> +                                         gpointer user_data)
> +{
> +    VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
> +    VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
> +
> +    return ((entry->domain_id == info->domain_id) &&
> +            (entry->pasid == info->pasid));
> +}
> +
> +static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
> +                                        uint16_t domain_id, uint32_t pasid)
> +{
> +    VTDIOTLBPageInvInfo info;
> +    VTDAddressSpace *vtd_as;
> +    VTDContextEntry ce;
> +
> +    info.domain_id = domain_id;
> +    info.pasid = pasid;
> +
> +    vtd_iommu_lock(s);
> +    g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_pasid,
> +                                &info);
> +    vtd_iommu_unlock(s);
> +
> +    QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
> +        if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
> +                                      vtd_as->devfn, &ce) &&
> +            domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) {
> +            uint32_t rid2pasid = VTD_CE_GET_RID2PASID(&ce);
> +
> +            if ((vtd_as->pasid != PCI_NO_PASID || pasid != rid2pasid) &&
> +                vtd_as->pasid != pasid) {
> +                continue;
> +            }
> +
> +            if (!s->scalable_modern) {
> +                vtd_address_space_sync(vtd_as);
> +            }
> +        }
> +    }
> +}
> +
> +static bool vtd_process_piotlb_desc(IntelIOMMUState *s,
> +                                    VTDInvDesc *inv_desc)
> +{
> +    uint16_t domain_id;
> +    uint32_t pasid;
> +
> +    if ((inv_desc->val[0] & VTD_INV_DESC_PIOTLB_RSVD_VAL0) ||
> +        (inv_desc->val[1] & VTD_INV_DESC_PIOTLB_RSVD_VAL1) ||
> +        inv_desc->val[2] || inv_desc->val[3]) {
> +        error_report_once("%s: invalid piotlb inv desc val[3]=0x%"PRIx64
> +                          " val[2]=0x%"PRIx64" val[1]=0x%"PRIx64
> +                          " val[0]=0x%"PRIx64" (reserved bits unzero)",
> +                          __func__, inv_desc->val[3], inv_desc->val[2],
> +                          inv_desc->val[1], inv_desc->val[0]);
> +        return false;
> +    }

Need to consider the below behaviour as well.

"
This
descriptor is a 256-bit descriptor and will result in an invalid descriptor 
error if submitted in an IQ that
is setup to provide hardware with 128-bit descriptors (IQA_REG.DW=0)
"

Also there are descriptions about the old inv desc types (e.g.
iotlb_inv_desc) that can be either 128bits or 256bits.

"If a 128-bit
version of this descriptor is submitted into an IQ that is setup to provide 
hardware with 256-bit
descriptors or vice-versa it will result in an invalid descriptor error.
"

If DW==1, vIOMMU fetches 32 bytes per desc. In such case, if the guest
submits 128bits desc, then the high 128bits would be non-zero if there is
more than one desc. But if there is only one desc in the queue, then the
high 128bits would be zero as well. While, it may be captured by the
tail register update. Bit4 is reserved when DW==1, and guest would use
bit4 when it only submits one desc.

If DW==0, vIOMMU fetchs 16bytes per desc. If guest submits 256bits desc,
it would appear to be two descs from vIOMMU p.o.v. The first 128bits
can be identified as valid except for the types that does not requires
256bits. The higher 128bits would be subjected to the desc sanity check
as well.

Based on the above, I think you may need to add two more checks. If DW==0,
vIOMMU should fail the inv types that requires 256bits; If DW==1, you
should check the inv_desc->val[2] and inv_desc->val[3]. You've already
done it in this patch.

Thoughts are welcomed here.

> +
> +    domain_id = VTD_INV_DESC_PIOTLB_DID(inv_desc->val[0]);
> +    pasid = VTD_INV_DESC_PIOTLB_PASID(inv_desc->val[0]);
> +    switch (inv_desc->val[0] & VTD_INV_DESC_PIOTLB_G) {
> +    case VTD_INV_DESC_PIOTLB_ALL_IN_PASID:
> +        vtd_piotlb_pasid_invalidate(s, domain_id, pasid);
> +        break;
> +
> +    case VTD_INV_DESC_PIOTLB_PSI_IN_PASID:
> +        break;
> +
> +    default:
> +        error_report_once("%s: invalid piotlb inv desc: hi=0x%"PRIx64
> +                          ", lo=0x%"PRIx64" (type mismatch: 0x%llx)",
> +                          __func__, inv_desc->val[1], inv_desc->val[0],
> +                          inv_desc->val[0] & VTD_INV_DESC_IOTLB_G);
> +        return false;
> +    }
> +    return true;
> +}
> +
>   static bool vtd_process_inv_iec_desc(IntelIOMMUState *s,
>                                        VTDInvDesc *inv_desc)
>   {
> @@ -2766,6 +2846,13 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
>           }
>           break;
>   
> +    case VTD_INV_DESC_PIOTLB:
> +        trace_vtd_inv_desc("p-iotlb", inv_desc.val[1], inv_desc.val[0]);
> +        if (!vtd_process_piotlb_desc(s, &inv_desc)) {
> +            return false;
> +        }
> +        break;
> +
>       case VTD_INV_DESC_WAIT:
>           trace_vtd_inv_desc("wait", inv_desc.hi, inv_desc.lo);
>           if (!vtd_process_wait_desc(s, &inv_desc)) {
> @@ -2793,7 +2880,6 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
>        * iommu driver) work, just return true is enough so far.
>        */
>       case VTD_INV_DESC_PC:
> -    case VTD_INV_DESC_PIOTLB:
>           if (s->scalable_mode) {
>               break;
>           }
CLEMENT MATHIEU--DRIF Nov. 4, 2024, 7:37 a.m. UTC | #2
On 04/11/2024 03:49, Yi Liu wrote:
> Caution: External email. Do not open attachments or click links, unless 
> this email comes from a known sender and you know the content is safe.
> 
> 
> On 2024/9/30 17:26, Zhenzhong Duan wrote:
>> Per spec 6.5.2.4, PADID-selective PASID-based iotlb invalidation will
>> flush stage-2 iotlb entries with matching domain id and pasid.
> 
> Also, call out it's per table Table 21. PASID-based-IOTLB Invalidation of
> VT-d spec 4.1.
> 
>> With scalable modern mode introduced, guest could send PASID-selective
>> PASID-based iotlb invalidation to flush both stage-1 and stage-2 entries.
>>
>> By this chance, remove old IOTLB related definitions which were unused.
> 
> 
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> Reviewed-by: Clément Mathieu--Drif<clement.mathieu--drif@eviden.com>
>> Acked-by: Jason Wang <jasowang@redhat.com>
>> ---
>>   hw/i386/intel_iommu_internal.h | 14 ++++--
>>   hw/i386/intel_iommu.c          | 88 +++++++++++++++++++++++++++++++++-
>>   2 files changed, 96 insertions(+), 6 deletions(-)
>>
>> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/ 
>> intel_iommu_internal.h
>> index d0f9d4589d..eec8090190 100644
>> --- a/hw/i386/intel_iommu_internal.h
>> +++ b/hw/i386/intel_iommu_internal.h
>> @@ -403,11 +403,6 @@ typedef union VTDInvDesc VTDInvDesc;
>>   #define VTD_INV_DESC_IOTLB_AM(val)      ((val) & 0x3fULL)
>>   #define VTD_INV_DESC_IOTLB_RSVD_LO      0xffffffff0000f100ULL
>>   #define VTD_INV_DESC_IOTLB_RSVD_HI      0xf80ULL
>> -#define VTD_INV_DESC_IOTLB_PASID_PASID  (2ULL << 4)
>> -#define VTD_INV_DESC_IOTLB_PASID_PAGE   (3ULL << 4)
>> -#define VTD_INV_DESC_IOTLB_PASID(val)   (((val) >> 32) & 
>> VTD_PASID_ID_MASK)
>> -#define VTD_INV_DESC_IOTLB_PASID_RSVD_LO      0xfff00000000001c0ULL
>> -#define VTD_INV_DESC_IOTLB_PASID_RSVD_HI      0xf80ULL
>>
>>   /* Mask for Device IOTLB Invalidate Descriptor */
>>   #define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) & 
>> 0xfffffffffffff000ULL)
>> @@ -433,6 +428,15 @@ typedef union VTDInvDesc VTDInvDesc;
>>   #define VTD_SPTE_LPAGE_L3_RSVD_MASK(aw) \
>>           (0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
>>
>> +/* Masks for PIOTLB Invalidate Descriptor */
>> +#define VTD_INV_DESC_PIOTLB_G             (3ULL << 4)
>> +#define VTD_INV_DESC_PIOTLB_ALL_IN_PASID  (2ULL << 4)
>> +#define VTD_INV_DESC_PIOTLB_PSI_IN_PASID  (3ULL << 4)
>> +#define VTD_INV_DESC_PIOTLB_DID(val)      (((val) >> 16) & 
>> VTD_DOMAIN_ID_MASK)
>> +#define VTD_INV_DESC_PIOTLB_PASID(val)    (((val) >> 32) & 0xfffffULL)
>> +#define VTD_INV_DESC_PIOTLB_RSVD_VAL0     0xfff000000000f1c0ULL
>> +#define VTD_INV_DESC_PIOTLB_RSVD_VAL1     0xf80ULL
>> +
>>   /* Information about page-selective IOTLB invalidate */
>>   struct VTDIOTLBPageInvInfo {
>>       uint16_t domain_id;
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index 9e6ef0cb99..72c9c91d4f 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -2656,6 +2656,86 @@ static bool 
>> vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
>>       return true;
>>   }
>>
>> +static gboolean vtd_hash_remove_by_pasid(gpointer key, gpointer value,
>> +                                         gpointer user_data)
>> +{
>> +    VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
>> +    VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
>> +
>> +    return ((entry->domain_id == info->domain_id) &&
>> +            (entry->pasid == info->pasid));
>> +}
>> +
>> +static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
>> +                                        uint16_t domain_id, uint32_t 
>> pasid)
>> +{
>> +    VTDIOTLBPageInvInfo info;
>> +    VTDAddressSpace *vtd_as;
>> +    VTDContextEntry ce;
>> +
>> +    info.domain_id = domain_id;
>> +    info.pasid = pasid;
>> +
>> +    vtd_iommu_lock(s);
>> +    g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_pasid,
>> +                                &info);
>> +    vtd_iommu_unlock(s);
>> +
>> +    QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
>> +        if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
>> +                                      vtd_as->devfn, &ce) &&
>> +            domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) {
>> +            uint32_t rid2pasid = VTD_CE_GET_RID2PASID(&ce);
>> +
>> +            if ((vtd_as->pasid != PCI_NO_PASID || pasid != rid2pasid) &&
>> +                vtd_as->pasid != pasid) {
>> +                continue;
>> +            }
>> +
>> +            if (!s->scalable_modern) {
>> +                vtd_address_space_sync(vtd_as);
>> +            }
>> +        }
>> +    }
>> +}
>> +
>> +static bool vtd_process_piotlb_desc(IntelIOMMUState *s,
>> +                                    VTDInvDesc *inv_desc)
>> +{
>> +    uint16_t domain_id;
>> +    uint32_t pasid;
>> +
>> +    if ((inv_desc->val[0] & VTD_INV_DESC_PIOTLB_RSVD_VAL0) ||
>> +        (inv_desc->val[1] & VTD_INV_DESC_PIOTLB_RSVD_VAL1) ||
>> +        inv_desc->val[2] || inv_desc->val[3]) {
>> +        error_report_once("%s: invalid piotlb inv desc val[3]=0x%"PRIx64
>> +                          " val[2]=0x%"PRIx64" val[1]=0x%"PRIx64
>> +                          " val[0]=0x%"PRIx64" (reserved bits unzero)",
>> +                          __func__, inv_desc->val[3], inv_desc->val[2],
>> +                          inv_desc->val[1], inv_desc->val[0]);
>> +        return false;
>> +    }
> 
> Need to consider the below behaviour as well.
> 
> "
> This
> descriptor is a 256-bit descriptor and will result in an invalid descriptor
> error if submitted in an IQ that
> is setup to provide hardware with 128-bit descriptors (IQA_REG.DW=0)
> "
> 
> Also there are descriptions about the old inv desc types (e.g.
> iotlb_inv_desc) that can be either 128bits or 256bits.
> 
> "If a 128-bit
> version of this descriptor is submitted into an IQ that is setup to provide
> hardware with 256-bit
> descriptors or vice-versa it will result in an invalid descriptor error.
> "
> 
> If DW==1, vIOMMU fetches 32 bytes per desc. In such case, if the guest
> submits 128bits desc, then the high 128bits would be non-zero if there is
> more than one desc. But if there is only one desc in the queue, then the
> high 128bits would be zero as well. While, it may be captured by the
> tail register update. Bit4 is reserved when DW==1, and guest would use
> bit4 when it only submits one desc.
> 
> If DW==0, vIOMMU fetchs 16bytes per desc. If guest submits 256bits desc,
> it would appear to be two descs from vIOMMU p.o.v. The first 128bits
> can be identified as valid except for the types that does not requires
> 256bits. The higher 128bits would be subjected to the desc sanity check
> as well.
> 
> Based on the above, I think you may need to add two more checks. If DW==0,
> vIOMMU should fail the inv types that requires 256bits; If DW==1, you
> should check the inv_desc->val[2] and inv_desc->val[3]. You've already
> done it in this patch.
> 
> Thoughts are welcomed here.

Good catch,
I think we should write the check in vtd_process_inv_desc
rather than updating the handlers.

What are your thoughts?

> 
>> +
>> +    domain_id = VTD_INV_DESC_PIOTLB_DID(inv_desc->val[0]);
>> +    pasid = VTD_INV_DESC_PIOTLB_PASID(inv_desc->val[0]);
>> +    switch (inv_desc->val[0] & VTD_INV_DESC_PIOTLB_G) {
>> +    case VTD_INV_DESC_PIOTLB_ALL_IN_PASID:
>> +        vtd_piotlb_pasid_invalidate(s, domain_id, pasid);
>> +        break;
>> +
>> +    case VTD_INV_DESC_PIOTLB_PSI_IN_PASID:
>> +        break;
>> +
>> +    default:
>> +        error_report_once("%s: invalid piotlb inv desc: hi=0x%"PRIx64
>> +                          ", lo=0x%"PRIx64" (type mismatch: 0x%llx)",
>> +                          __func__, inv_desc->val[1], inv_desc->val[0],
>> +                          inv_desc->val[0] & VTD_INV_DESC_IOTLB_G);
>> +        return false;
>> +    }
>> +    return true;
>> +}
>> +
>>   static bool vtd_process_inv_iec_desc(IntelIOMMUState *s,
>>                                        VTDInvDesc *inv_desc)
>>   {
>> @@ -2766,6 +2846,13 @@ static bool 
>> vtd_process_inv_desc(IntelIOMMUState *s)
>>           }
>>           break;
>>
>> +    case VTD_INV_DESC_PIOTLB:
>> +        trace_vtd_inv_desc("p-iotlb", inv_desc.val[1], inv_desc.val[0]);
>> +        if (!vtd_process_piotlb_desc(s, &inv_desc)) {
>> +            return false;
>> +        }
>> +        break;
>> +
>>       case VTD_INV_DESC_WAIT:
>>           trace_vtd_inv_desc("wait", inv_desc.hi, inv_desc.lo);
>>           if (!vtd_process_wait_desc(s, &inv_desc)) {
>> @@ -2793,7 +2880,6 @@ static bool vtd_process_inv_desc(IntelIOMMUState 
>> *s)
>>        * iommu driver) work, just return true is enough so far.
>>        */
>>       case VTD_INV_DESC_PC:
>> -    case VTD_INV_DESC_PIOTLB:
>>           if (s->scalable_mode) {
>>               break;
>>           }
> 
> -- 
> Regards,
> Yi Liu
Yi Liu Nov. 4, 2024, 8:45 a.m. UTC | #3
On 2024/11/4 15:37, CLEMENT MATHIEU--DRIF wrote:
> 
> 
> On 04/11/2024 03:49, Yi Liu wrote:
>> Caution: External email. Do not open attachments or click links, unless
>> this email comes from a known sender and you know the content is safe.
>>
>>
>> On 2024/9/30 17:26, Zhenzhong Duan wrote:
>>> Per spec 6.5.2.4, PADID-selective PASID-based iotlb invalidation will
>>> flush stage-2 iotlb entries with matching domain id and pasid.
>>
>> Also, call out it's per table Table 21. PASID-based-IOTLB Invalidation of
>> VT-d spec 4.1.
>>
>>> With scalable modern mode introduced, guest could send PASID-selective
>>> PASID-based iotlb invalidation to flush both stage-1 and stage-2 entries.
>>>
>>> By this chance, remove old IOTLB related definitions which were unused.
>>
>>
>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> Reviewed-by: Clément Mathieu--Drif<clement.mathieu--drif@eviden.com>
>>> Acked-by: Jason Wang <jasowang@redhat.com>
>>> ---
>>>    hw/i386/intel_iommu_internal.h | 14 ++++--
>>>    hw/i386/intel_iommu.c          | 88 +++++++++++++++++++++++++++++++++-
>>>    2 files changed, 96 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/
>>> intel_iommu_internal.h
>>> index d0f9d4589d..eec8090190 100644
>>> --- a/hw/i386/intel_iommu_internal.h
>>> +++ b/hw/i386/intel_iommu_internal.h
>>> @@ -403,11 +403,6 @@ typedef union VTDInvDesc VTDInvDesc;
>>>    #define VTD_INV_DESC_IOTLB_AM(val)      ((val) & 0x3fULL)
>>>    #define VTD_INV_DESC_IOTLB_RSVD_LO      0xffffffff0000f100ULL
>>>    #define VTD_INV_DESC_IOTLB_RSVD_HI      0xf80ULL
>>> -#define VTD_INV_DESC_IOTLB_PASID_PASID  (2ULL << 4)
>>> -#define VTD_INV_DESC_IOTLB_PASID_PAGE   (3ULL << 4)
>>> -#define VTD_INV_DESC_IOTLB_PASID(val)   (((val) >> 32) &
>>> VTD_PASID_ID_MASK)
>>> -#define VTD_INV_DESC_IOTLB_PASID_RSVD_LO      0xfff00000000001c0ULL
>>> -#define VTD_INV_DESC_IOTLB_PASID_RSVD_HI      0xf80ULL
>>>
>>>    /* Mask for Device IOTLB Invalidate Descriptor */
>>>    #define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) &
>>> 0xfffffffffffff000ULL)
>>> @@ -433,6 +428,15 @@ typedef union VTDInvDesc VTDInvDesc;
>>>    #define VTD_SPTE_LPAGE_L3_RSVD_MASK(aw) \
>>>            (0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
>>>
>>> +/* Masks for PIOTLB Invalidate Descriptor */
>>> +#define VTD_INV_DESC_PIOTLB_G             (3ULL << 4)
>>> +#define VTD_INV_DESC_PIOTLB_ALL_IN_PASID  (2ULL << 4)
>>> +#define VTD_INV_DESC_PIOTLB_PSI_IN_PASID  (3ULL << 4)
>>> +#define VTD_INV_DESC_PIOTLB_DID(val)      (((val) >> 16) &
>>> VTD_DOMAIN_ID_MASK)
>>> +#define VTD_INV_DESC_PIOTLB_PASID(val)    (((val) >> 32) & 0xfffffULL)
>>> +#define VTD_INV_DESC_PIOTLB_RSVD_VAL0     0xfff000000000f1c0ULL
>>> +#define VTD_INV_DESC_PIOTLB_RSVD_VAL1     0xf80ULL
>>> +
>>>    /* Information about page-selective IOTLB invalidate */
>>>    struct VTDIOTLBPageInvInfo {
>>>        uint16_t domain_id;
>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>> index 9e6ef0cb99..72c9c91d4f 100644
>>> --- a/hw/i386/intel_iommu.c
>>> +++ b/hw/i386/intel_iommu.c
>>> @@ -2656,6 +2656,86 @@ static bool
>>> vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
>>>        return true;
>>>    }
>>>
>>> +static gboolean vtd_hash_remove_by_pasid(gpointer key, gpointer value,
>>> +                                         gpointer user_data)
>>> +{
>>> +    VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
>>> +    VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
>>> +
>>> +    return ((entry->domain_id == info->domain_id) &&
>>> +            (entry->pasid == info->pasid));
>>> +}
>>> +
>>> +static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
>>> +                                        uint16_t domain_id, uint32_t
>>> pasid)
>>> +{
>>> +    VTDIOTLBPageInvInfo info;
>>> +    VTDAddressSpace *vtd_as;
>>> +    VTDContextEntry ce;
>>> +
>>> +    info.domain_id = domain_id;
>>> +    info.pasid = pasid;
>>> +
>>> +    vtd_iommu_lock(s);
>>> +    g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_pasid,
>>> +                                &info);
>>> +    vtd_iommu_unlock(s);
>>> +
>>> +    QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
>>> +        if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
>>> +                                      vtd_as->devfn, &ce) &&
>>> +            domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) {
>>> +            uint32_t rid2pasid = VTD_CE_GET_RID2PASID(&ce);
>>> +
>>> +            if ((vtd_as->pasid != PCI_NO_PASID || pasid != rid2pasid) &&
>>> +                vtd_as->pasid != pasid) {
>>> +                continue;
>>> +            }
>>> +
>>> +            if (!s->scalable_modern) {
>>> +                vtd_address_space_sync(vtd_as);
>>> +            }
>>> +        }
>>> +    }
>>> +}
>>> +
>>> +static bool vtd_process_piotlb_desc(IntelIOMMUState *s,
>>> +                                    VTDInvDesc *inv_desc)
>>> +{
>>> +    uint16_t domain_id;
>>> +    uint32_t pasid;
>>> +
>>> +    if ((inv_desc->val[0] & VTD_INV_DESC_PIOTLB_RSVD_VAL0) ||
>>> +        (inv_desc->val[1] & VTD_INV_DESC_PIOTLB_RSVD_VAL1) ||
>>> +        inv_desc->val[2] || inv_desc->val[3]) {
>>> +        error_report_once("%s: invalid piotlb inv desc val[3]=0x%"PRIx64
>>> +                          " val[2]=0x%"PRIx64" val[1]=0x%"PRIx64
>>> +                          " val[0]=0x%"PRIx64" (reserved bits unzero)",
>>> +                          __func__, inv_desc->val[3], inv_desc->val[2],
>>> +                          inv_desc->val[1], inv_desc->val[0]);
>>> +        return false;
>>> +    }
>>
>> Need to consider the below behaviour as well.
>>
>> "
>> This
>> descriptor is a 256-bit descriptor and will result in an invalid descriptor
>> error if submitted in an IQ that
>> is setup to provide hardware with 128-bit descriptors (IQA_REG.DW=0)
>> "
>>
>> Also there are descriptions about the old inv desc types (e.g.
>> iotlb_inv_desc) that can be either 128bits or 256bits.
>>
>> "If a 128-bit
>> version of this descriptor is submitted into an IQ that is setup to provide
>> hardware with 256-bit
>> descriptors or vice-versa it will result in an invalid descriptor error.
>> "
>>
>> If DW==1, vIOMMU fetches 32 bytes per desc. In such case, if the guest
>> submits 128bits desc, then the high 128bits would be non-zero if there is
>> more than one desc. But if there is only one desc in the queue, then the
>> high 128bits would be zero as well. While, it may be captured by the
>> tail register update. Bit4 is reserved when DW==1, and guest would use
>> bit4 when it only submits one desc.
>>
>> If DW==0, vIOMMU fetchs 16bytes per desc. If guest submits 256bits desc,
>> it would appear to be two descs from vIOMMU p.o.v. The first 128bits
>> can be identified as valid except for the types that does not requires
>> 256bits. The higher 128bits would be subjected to the desc sanity check
>> as well.
>>
>> Based on the above, I think you may need to add two more checks. If DW==0,
>> vIOMMU should fail the inv types that requires 256bits; If DW==1, you
>> should check the inv_desc->val[2] and inv_desc->val[3]. You've already
>> done it in this patch.
>>
>> Thoughts are welcomed here.
> 
> Good catch,
> I think we should write the check in vtd_process_inv_desc
> rather than updating the handlers.
> 
> What are your thoughts?

the first check can be done in vtd_process_inv_desc(). The second may
be better in the handlers as the handlers have the reserved bits check.
But given that none of the inv types use the high 128bits, so it is also
acceptable to do it in vtd_process_inv_desc(). Do add proper comment.
Duan, Zhenzhong Nov. 4, 2024, 11:46 a.m. UTC | #4
>-----Original Message-----
>From: Liu, Yi L <yi.l.liu@intel.com>
>Sent: Monday, November 4, 2024 4:45 PM
>Subject: Re: [PATCH v4 04/17] intel_iommu: Flush stage-2 cache in PASID-
>selective PASID-based iotlb invalidation
>
>On 2024/11/4 15:37, CLEMENT MATHIEU--DRIF wrote:
>>
>>
>> On 04/11/2024 03:49, Yi Liu wrote:
>>> Caution: External email. Do not open attachments or click links, unless
>>> this email comes from a known sender and you know the content is safe.
>>>
>>>
>>> On 2024/9/30 17:26, Zhenzhong Duan wrote:
>>>> Per spec 6.5.2.4, PADID-selective PASID-based iotlb invalidation will
>>>> flush stage-2 iotlb entries with matching domain id and pasid.
>>>
>>> Also, call out it's per table Table 21. PASID-based-IOTLB Invalidation of
>>> VT-d spec 4.1.
>>>
>>>> With scalable modern mode introduced, guest could send PASID-selective
>>>> PASID-based iotlb invalidation to flush both stage-1 and stage-2 entries.
>>>>
>>>> By this chance, remove old IOTLB related definitions which were unused.
>>>
>>>
>>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>> Reviewed-by: Clément Mathieu--Drif<clement.mathieu--drif@eviden.com>
>>>> Acked-by: Jason Wang <jasowang@redhat.com>
>>>> ---
>>>>    hw/i386/intel_iommu_internal.h | 14 ++++--
>>>>    hw/i386/intel_iommu.c          | 88 +++++++++++++++++++++++++++++++++-
>>>>    2 files changed, 96 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/
>>>> intel_iommu_internal.h
>>>> index d0f9d4589d..eec8090190 100644
>>>> --- a/hw/i386/intel_iommu_internal.h
>>>> +++ b/hw/i386/intel_iommu_internal.h
>>>> @@ -403,11 +403,6 @@ typedef union VTDInvDesc VTDInvDesc;
>>>>    #define VTD_INV_DESC_IOTLB_AM(val)      ((val) & 0x3fULL)
>>>>    #define VTD_INV_DESC_IOTLB_RSVD_LO      0xffffffff0000f100ULL
>>>>    #define VTD_INV_DESC_IOTLB_RSVD_HI      0xf80ULL
>>>> -#define VTD_INV_DESC_IOTLB_PASID_PASID  (2ULL << 4)
>>>> -#define VTD_INV_DESC_IOTLB_PASID_PAGE   (3ULL << 4)
>>>> -#define VTD_INV_DESC_IOTLB_PASID(val)   (((val) >> 32) &
>>>> VTD_PASID_ID_MASK)
>>>> -#define VTD_INV_DESC_IOTLB_PASID_RSVD_LO      0xfff00000000001c0ULL
>>>> -#define VTD_INV_DESC_IOTLB_PASID_RSVD_HI      0xf80ULL
>>>>
>>>>    /* Mask for Device IOTLB Invalidate Descriptor */
>>>>    #define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) &
>>>> 0xfffffffffffff000ULL)
>>>> @@ -433,6 +428,15 @@ typedef union VTDInvDesc VTDInvDesc;
>>>>    #define VTD_SPTE_LPAGE_L3_RSVD_MASK(aw) \
>>>>            (0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
>>>>
>>>> +/* Masks for PIOTLB Invalidate Descriptor */
>>>> +#define VTD_INV_DESC_PIOTLB_G             (3ULL << 4)
>>>> +#define VTD_INV_DESC_PIOTLB_ALL_IN_PASID  (2ULL << 4)
>>>> +#define VTD_INV_DESC_PIOTLB_PSI_IN_PASID  (3ULL << 4)
>>>> +#define VTD_INV_DESC_PIOTLB_DID(val)      (((val) >> 16) &
>>>> VTD_DOMAIN_ID_MASK)
>>>> +#define VTD_INV_DESC_PIOTLB_PASID(val)    (((val) >> 32) & 0xfffffULL)
>>>> +#define VTD_INV_DESC_PIOTLB_RSVD_VAL0     0xfff000000000f1c0ULL
>>>> +#define VTD_INV_DESC_PIOTLB_RSVD_VAL1     0xf80ULL
>>>> +
>>>>    /* Information about page-selective IOTLB invalidate */
>>>>    struct VTDIOTLBPageInvInfo {
>>>>        uint16_t domain_id;
>>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>>> index 9e6ef0cb99..72c9c91d4f 100644
>>>> --- a/hw/i386/intel_iommu.c
>>>> +++ b/hw/i386/intel_iommu.c
>>>> @@ -2656,6 +2656,86 @@ static bool
>>>> vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
>>>>        return true;
>>>>    }
>>>>
>>>> +static gboolean vtd_hash_remove_by_pasid(gpointer key, gpointer value,
>>>> +                                         gpointer user_data)
>>>> +{
>>>> +    VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
>>>> +    VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
>>>> +
>>>> +    return ((entry->domain_id == info->domain_id) &&
>>>> +            (entry->pasid == info->pasid));
>>>> +}
>>>> +
>>>> +static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
>>>> +                                        uint16_t domain_id, uint32_t
>>>> pasid)
>>>> +{
>>>> +    VTDIOTLBPageInvInfo info;
>>>> +    VTDAddressSpace *vtd_as;
>>>> +    VTDContextEntry ce;
>>>> +
>>>> +    info.domain_id = domain_id;
>>>> +    info.pasid = pasid;
>>>> +
>>>> +    vtd_iommu_lock(s);
>>>> +    g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_pasid,
>>>> +                                &info);
>>>> +    vtd_iommu_unlock(s);
>>>> +
>>>> +    QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
>>>> +        if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
>>>> +                                      vtd_as->devfn, &ce) &&
>>>> +            domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) {
>>>> +            uint32_t rid2pasid = VTD_CE_GET_RID2PASID(&ce);
>>>> +
>>>> +            if ((vtd_as->pasid != PCI_NO_PASID || pasid != rid2pasid) &&
>>>> +                vtd_as->pasid != pasid) {
>>>> +                continue;
>>>> +            }
>>>> +
>>>> +            if (!s->scalable_modern) {
>>>> +                vtd_address_space_sync(vtd_as);
>>>> +            }
>>>> +        }
>>>> +    }
>>>> +}
>>>> +
>>>> +static bool vtd_process_piotlb_desc(IntelIOMMUState *s,
>>>> +                                    VTDInvDesc *inv_desc)
>>>> +{
>>>> +    uint16_t domain_id;
>>>> +    uint32_t pasid;
>>>> +
>>>> +    if ((inv_desc->val[0] & VTD_INV_DESC_PIOTLB_RSVD_VAL0) ||
>>>> +        (inv_desc->val[1] & VTD_INV_DESC_PIOTLB_RSVD_VAL1) ||
>>>> +        inv_desc->val[2] || inv_desc->val[3]) {
>>>> +        error_report_once("%s: invalid piotlb inv desc val[3]=0x%"PRIx64
>>>> +                          " val[2]=0x%"PRIx64" val[1]=0x%"PRIx64
>>>> +                          " val[0]=0x%"PRIx64" (reserved bits unzero)",
>>>> +                          __func__, inv_desc->val[3], inv_desc->val[2],
>>>> +                          inv_desc->val[1], inv_desc->val[0]);
>>>> +        return false;
>>>> +    }
>>>
>>> Need to consider the below behaviour as well.
>>>
>>> "
>>> This
>>> descriptor is a 256-bit descriptor and will result in an invalid descriptor
>>> error if submitted in an IQ that
>>> is setup to provide hardware with 128-bit descriptors (IQA_REG.DW=0)
>>> "
>>>
>>> Also there are descriptions about the old inv desc types (e.g.
>>> iotlb_inv_desc) that can be either 128bits or 256bits.
>>>
>>> "If a 128-bit
>>> version of this descriptor is submitted into an IQ that is setup to provide
>>> hardware with 256-bit
>>> descriptors or vice-versa it will result in an invalid descriptor error.
>>> "
>>>
>>> If DW==1, vIOMMU fetches 32 bytes per desc. In such case, if the guest
>>> submits 128bits desc, then the high 128bits would be non-zero if there is
>>> more than one desc. But if there is only one desc in the queue, then the
>>> high 128bits would be zero as well. While, it may be captured by the
>>> tail register update. Bit4 is reserved when DW==1, and guest would use
>>> bit4 when it only submits one desc.
>>>
>>> If DW==0, vIOMMU fetchs 16bytes per desc. If guest submits 256bits desc,
>>> it would appear to be two descs from vIOMMU p.o.v. The first 128bits
>>> can be identified as valid except for the types that does not requires
>>> 256bits. The higher 128bits would be subjected to the desc sanity check
>>> as well.
>>>
>>> Based on the above, I think you may need to add two more checks. If DW==0,
>>> vIOMMU should fail the inv types that requires 256bits; If DW==1, you
>>> should check the inv_desc->val[2] and inv_desc->val[3]. You've already
>>> done it in this patch.
>>>
>>> Thoughts are welcomed here.
>>
>> Good catch,
>> I think we should write the check in vtd_process_inv_desc
>> rather than updating the handlers.
>>
>> What are your thoughts?
>
>the first check can be done in vtd_process_inv_desc(). The second may
>be better in the handlers as the handlers have the reserved bits check.
>But given that none of the inv types use the high 128bits, so it is also
>acceptable to do it in vtd_process_inv_desc(). Do add proper comment.

Thanks Yi and Clement's suggestion, I'll send a small series to fix that
for upstream.

BRs.
Zhenzhong
Michael S. Tsirkin Nov. 4, 2024, 11:50 a.m. UTC | #5
On Mon, Nov 04, 2024 at 11:46:00AM +0000, Duan, Zhenzhong wrote:
> 
> 
> >-----Original Message-----
> >From: Liu, Yi L <yi.l.liu@intel.com>
> >Sent: Monday, November 4, 2024 4:45 PM
> >Subject: Re: [PATCH v4 04/17] intel_iommu: Flush stage-2 cache in PASID-
> >selective PASID-based iotlb invalidation
> >
> >On 2024/11/4 15:37, CLEMENT MATHIEU--DRIF wrote:
> >>
> >>
> >> On 04/11/2024 03:49, Yi Liu wrote:
> >>> Caution: External email. Do not open attachments or click links, unless
> >>> this email comes from a known sender and you know the content is safe.
> >>>
> >>>
> >>> On 2024/9/30 17:26, Zhenzhong Duan wrote:
> >>>> Per spec 6.5.2.4, PADID-selective PASID-based iotlb invalidation will
> >>>> flush stage-2 iotlb entries with matching domain id and pasid.
> >>>
> >>> Also, call out it's per table Table 21. PASID-based-IOTLB Invalidation of
> >>> VT-d spec 4.1.
> >>>
> >>>> With scalable modern mode introduced, guest could send PASID-selective
> >>>> PASID-based iotlb invalidation to flush both stage-1 and stage-2 entries.
> >>>>
> >>>> By this chance, remove old IOTLB related definitions which were unused.
> >>>
> >>>
> >>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> >>>> Reviewed-by: Clément Mathieu--Drif<clement.mathieu--drif@eviden.com>
> >>>> Acked-by: Jason Wang <jasowang@redhat.com>
> >>>> ---
> >>>>    hw/i386/intel_iommu_internal.h | 14 ++++--
> >>>>    hw/i386/intel_iommu.c          | 88 +++++++++++++++++++++++++++++++++-
> >>>>    2 files changed, 96 insertions(+), 6 deletions(-)
> >>>>
> >>>> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/
> >>>> intel_iommu_internal.h
> >>>> index d0f9d4589d..eec8090190 100644
> >>>> --- a/hw/i386/intel_iommu_internal.h
> >>>> +++ b/hw/i386/intel_iommu_internal.h
> >>>> @@ -403,11 +403,6 @@ typedef union VTDInvDesc VTDInvDesc;
> >>>>    #define VTD_INV_DESC_IOTLB_AM(val)      ((val) & 0x3fULL)
> >>>>    #define VTD_INV_DESC_IOTLB_RSVD_LO      0xffffffff0000f100ULL
> >>>>    #define VTD_INV_DESC_IOTLB_RSVD_HI      0xf80ULL
> >>>> -#define VTD_INV_DESC_IOTLB_PASID_PASID  (2ULL << 4)
> >>>> -#define VTD_INV_DESC_IOTLB_PASID_PAGE   (3ULL << 4)
> >>>> -#define VTD_INV_DESC_IOTLB_PASID(val)   (((val) >> 32) &
> >>>> VTD_PASID_ID_MASK)
> >>>> -#define VTD_INV_DESC_IOTLB_PASID_RSVD_LO      0xfff00000000001c0ULL
> >>>> -#define VTD_INV_DESC_IOTLB_PASID_RSVD_HI      0xf80ULL
> >>>>
> >>>>    /* Mask for Device IOTLB Invalidate Descriptor */
> >>>>    #define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) &
> >>>> 0xfffffffffffff000ULL)
> >>>> @@ -433,6 +428,15 @@ typedef union VTDInvDesc VTDInvDesc;
> >>>>    #define VTD_SPTE_LPAGE_L3_RSVD_MASK(aw) \
> >>>>            (0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
> >>>>
> >>>> +/* Masks for PIOTLB Invalidate Descriptor */
> >>>> +#define VTD_INV_DESC_PIOTLB_G             (3ULL << 4)
> >>>> +#define VTD_INV_DESC_PIOTLB_ALL_IN_PASID  (2ULL << 4)
> >>>> +#define VTD_INV_DESC_PIOTLB_PSI_IN_PASID  (3ULL << 4)
> >>>> +#define VTD_INV_DESC_PIOTLB_DID(val)      (((val) >> 16) &
> >>>> VTD_DOMAIN_ID_MASK)
> >>>> +#define VTD_INV_DESC_PIOTLB_PASID(val)    (((val) >> 32) & 0xfffffULL)
> >>>> +#define VTD_INV_DESC_PIOTLB_RSVD_VAL0     0xfff000000000f1c0ULL
> >>>> +#define VTD_INV_DESC_PIOTLB_RSVD_VAL1     0xf80ULL
> >>>> +
> >>>>    /* Information about page-selective IOTLB invalidate */
> >>>>    struct VTDIOTLBPageInvInfo {
> >>>>        uint16_t domain_id;
> >>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> >>>> index 9e6ef0cb99..72c9c91d4f 100644
> >>>> --- a/hw/i386/intel_iommu.c
> >>>> +++ b/hw/i386/intel_iommu.c
> >>>> @@ -2656,6 +2656,86 @@ static bool
> >>>> vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
> >>>>        return true;
> >>>>    }
> >>>>
> >>>> +static gboolean vtd_hash_remove_by_pasid(gpointer key, gpointer value,
> >>>> +                                         gpointer user_data)
> >>>> +{
> >>>> +    VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
> >>>> +    VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
> >>>> +
> >>>> +    return ((entry->domain_id == info->domain_id) &&
> >>>> +            (entry->pasid == info->pasid));
> >>>> +}
> >>>> +
> >>>> +static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
> >>>> +                                        uint16_t domain_id, uint32_t
> >>>> pasid)
> >>>> +{
> >>>> +    VTDIOTLBPageInvInfo info;
> >>>> +    VTDAddressSpace *vtd_as;
> >>>> +    VTDContextEntry ce;
> >>>> +
> >>>> +    info.domain_id = domain_id;
> >>>> +    info.pasid = pasid;
> >>>> +
> >>>> +    vtd_iommu_lock(s);
> >>>> +    g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_pasid,
> >>>> +                                &info);
> >>>> +    vtd_iommu_unlock(s);
> >>>> +
> >>>> +    QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
> >>>> +        if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
> >>>> +                                      vtd_as->devfn, &ce) &&
> >>>> +            domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) {
> >>>> +            uint32_t rid2pasid = VTD_CE_GET_RID2PASID(&ce);
> >>>> +
> >>>> +            if ((vtd_as->pasid != PCI_NO_PASID || pasid != rid2pasid) &&
> >>>> +                vtd_as->pasid != pasid) {
> >>>> +                continue;
> >>>> +            }
> >>>> +
> >>>> +            if (!s->scalable_modern) {
> >>>> +                vtd_address_space_sync(vtd_as);
> >>>> +            }
> >>>> +        }
> >>>> +    }
> >>>> +}
> >>>> +
> >>>> +static bool vtd_process_piotlb_desc(IntelIOMMUState *s,
> >>>> +                                    VTDInvDesc *inv_desc)
> >>>> +{
> >>>> +    uint16_t domain_id;
> >>>> +    uint32_t pasid;
> >>>> +
> >>>> +    if ((inv_desc->val[0] & VTD_INV_DESC_PIOTLB_RSVD_VAL0) ||
> >>>> +        (inv_desc->val[1] & VTD_INV_DESC_PIOTLB_RSVD_VAL1) ||
> >>>> +        inv_desc->val[2] || inv_desc->val[3]) {
> >>>> +        error_report_once("%s: invalid piotlb inv desc val[3]=0x%"PRIx64
> >>>> +                          " val[2]=0x%"PRIx64" val[1]=0x%"PRIx64
> >>>> +                          " val[0]=0x%"PRIx64" (reserved bits unzero)",
> >>>> +                          __func__, inv_desc->val[3], inv_desc->val[2],
> >>>> +                          inv_desc->val[1], inv_desc->val[0]);
> >>>> +        return false;
> >>>> +    }
> >>>
> >>> Need to consider the below behaviour as well.
> >>>
> >>> "
> >>> This
> >>> descriptor is a 256-bit descriptor and will result in an invalid descriptor
> >>> error if submitted in an IQ that
> >>> is setup to provide hardware with 128-bit descriptors (IQA_REG.DW=0)
> >>> "
> >>>
> >>> Also there are descriptions about the old inv desc types (e.g.
> >>> iotlb_inv_desc) that can be either 128bits or 256bits.
> >>>
> >>> "If a 128-bit
> >>> version of this descriptor is submitted into an IQ that is setup to provide
> >>> hardware with 256-bit
> >>> descriptors or vice-versa it will result in an invalid descriptor error.
> >>> "
> >>>
> >>> If DW==1, vIOMMU fetches 32 bytes per desc. In such case, if the guest
> >>> submits 128bits desc, then the high 128bits would be non-zero if there is
> >>> more than one desc. But if there is only one desc in the queue, then the
> >>> high 128bits would be zero as well. While, it may be captured by the
> >>> tail register update. Bit4 is reserved when DW==1, and guest would use
> >>> bit4 when it only submits one desc.
> >>>
> >>> If DW==0, vIOMMU fetchs 16bytes per desc. If guest submits 256bits desc,
> >>> it would appear to be two descs from vIOMMU p.o.v. The first 128bits
> >>> can be identified as valid except for the types that does not requires
> >>> 256bits. The higher 128bits would be subjected to the desc sanity check
> >>> as well.
> >>>
> >>> Based on the above, I think you may need to add two more checks. If DW==0,
> >>> vIOMMU should fail the inv types that requires 256bits; If DW==1, you
> >>> should check the inv_desc->val[2] and inv_desc->val[3]. You've already
> >>> done it in this patch.
> >>>
> >>> Thoughts are welcomed here.
> >>
> >> Good catch,
> >> I think we should write the check in vtd_process_inv_desc
> >> rather than updating the handlers.
> >>
> >> What are your thoughts?
> >
> >the first check can be done in vtd_process_inv_desc(). The second may
> >be better in the handlers as the handlers have the reserved bits check.
> >But given that none of the inv types use the high 128bits, so it is also
> >acceptable to do it in vtd_process_inv_desc(). Do add proper comment.
> 
> Thanks Yi and Clement's suggestion, I'll send a small series to fix that
> for upstream.
> 
> BRs.
> Zhenzhong

Ok so you will send v5?
Duan, Zhenzhong Nov. 4, 2024, 11:55 a.m. UTC | #6
>-----Original Message-----
>From: Michael S. Tsirkin <mst@redhat.com>
>Sent: Monday, November 4, 2024 7:51 PM
>Subject: Re: [PATCH v4 04/17] intel_iommu: Flush stage-2 cache in PASID-
>selective PASID-based iotlb invalidation
>
>On Mon, Nov 04, 2024 at 11:46:00AM +0000, Duan, Zhenzhong wrote:
>>
>>
>> >-----Original Message-----
>> >From: Liu, Yi L <yi.l.liu@intel.com>
>> >Sent: Monday, November 4, 2024 4:45 PM
>> >Subject: Re: [PATCH v4 04/17] intel_iommu: Flush stage-2 cache in PASID-
>> >selective PASID-based iotlb invalidation
>> >
>> >On 2024/11/4 15:37, CLEMENT MATHIEU--DRIF wrote:
>> >>
>> >>
>> >> On 04/11/2024 03:49, Yi Liu wrote:
>> >>> Caution: External email. Do not open attachments or click links, unless
>> >>> this email comes from a known sender and you know the content is safe.
>> >>>
>> >>>
>> >>> On 2024/9/30 17:26, Zhenzhong Duan wrote:
>> >>>> Per spec 6.5.2.4, PADID-selective PASID-based iotlb invalidation will
>> >>>> flush stage-2 iotlb entries with matching domain id and pasid.
>> >>>
>> >>> Also, call out it's per table Table 21. PASID-based-IOTLB Invalidation of
>> >>> VT-d spec 4.1.
>> >>>
>> >>>> With scalable modern mode introduced, guest could send PASID-selective
>> >>>> PASID-based iotlb invalidation to flush both stage-1 and stage-2 entries.
>> >>>>
>> >>>> By this chance, remove old IOTLB related definitions which were unused.
>> >>>
>> >>>
>> >>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> >>>> Reviewed-by: Clément Mathieu--Drif<clement.mathieu--drif@eviden.com>
>> >>>> Acked-by: Jason Wang <jasowang@redhat.com>
>> >>>> ---
>> >>>>    hw/i386/intel_iommu_internal.h | 14 ++++--
>> >>>>    hw/i386/intel_iommu.c          | 88
>+++++++++++++++++++++++++++++++++-
>> >>>>    2 files changed, 96 insertions(+), 6 deletions(-)
>> >>>>
>> >>>> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/
>> >>>> intel_iommu_internal.h
>> >>>> index d0f9d4589d..eec8090190 100644
>> >>>> --- a/hw/i386/intel_iommu_internal.h
>> >>>> +++ b/hw/i386/intel_iommu_internal.h
>> >>>> @@ -403,11 +403,6 @@ typedef union VTDInvDesc VTDInvDesc;
>> >>>>    #define VTD_INV_DESC_IOTLB_AM(val)      ((val) & 0x3fULL)
>> >>>>    #define VTD_INV_DESC_IOTLB_RSVD_LO      0xffffffff0000f100ULL
>> >>>>    #define VTD_INV_DESC_IOTLB_RSVD_HI      0xf80ULL
>> >>>> -#define VTD_INV_DESC_IOTLB_PASID_PASID  (2ULL << 4)
>> >>>> -#define VTD_INV_DESC_IOTLB_PASID_PAGE   (3ULL << 4)
>> >>>> -#define VTD_INV_DESC_IOTLB_PASID(val)   (((val) >> 32) &
>> >>>> VTD_PASID_ID_MASK)
>> >>>> -#define
>VTD_INV_DESC_IOTLB_PASID_RSVD_LO      0xfff00000000001c0ULL
>> >>>> -#define VTD_INV_DESC_IOTLB_PASID_RSVD_HI      0xf80ULL
>> >>>>
>> >>>>    /* Mask for Device IOTLB Invalidate Descriptor */
>> >>>>    #define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) &
>> >>>> 0xfffffffffffff000ULL)
>> >>>> @@ -433,6 +428,15 @@ typedef union VTDInvDesc VTDInvDesc;
>> >>>>    #define VTD_SPTE_LPAGE_L3_RSVD_MASK(aw) \
>> >>>>            (0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
>> >>>>
>> >>>> +/* Masks for PIOTLB Invalidate Descriptor */
>> >>>> +#define VTD_INV_DESC_PIOTLB_G             (3ULL << 4)
>> >>>> +#define VTD_INV_DESC_PIOTLB_ALL_IN_PASID  (2ULL << 4)
>> >>>> +#define VTD_INV_DESC_PIOTLB_PSI_IN_PASID  (3ULL << 4)
>> >>>> +#define VTD_INV_DESC_PIOTLB_DID(val)      (((val) >> 16) &
>> >>>> VTD_DOMAIN_ID_MASK)
>> >>>> +#define VTD_INV_DESC_PIOTLB_PASID(val)    (((val) >> 32) & 0xfffffULL)
>> >>>> +#define VTD_INV_DESC_PIOTLB_RSVD_VAL0     0xfff000000000f1c0ULL
>> >>>> +#define VTD_INV_DESC_PIOTLB_RSVD_VAL1     0xf80ULL
>> >>>> +
>> >>>>    /* Information about page-selective IOTLB invalidate */
>> >>>>    struct VTDIOTLBPageInvInfo {
>> >>>>        uint16_t domain_id;
>> >>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> >>>> index 9e6ef0cb99..72c9c91d4f 100644
>> >>>> --- a/hw/i386/intel_iommu.c
>> >>>> +++ b/hw/i386/intel_iommu.c
>> >>>> @@ -2656,6 +2656,86 @@ static bool
>> >>>> vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
>> >>>>        return true;
>> >>>>    }
>> >>>>
>> >>>> +static gboolean vtd_hash_remove_by_pasid(gpointer key, gpointer value,
>> >>>> +                                         gpointer user_data)
>> >>>> +{
>> >>>> +    VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
>> >>>> +    VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
>> >>>> +
>> >>>> +    return ((entry->domain_id == info->domain_id) &&
>> >>>> +            (entry->pasid == info->pasid));
>> >>>> +}
>> >>>> +
>> >>>> +static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
>> >>>> +                                        uint16_t domain_id, uint32_t
>> >>>> pasid)
>> >>>> +{
>> >>>> +    VTDIOTLBPageInvInfo info;
>> >>>> +    VTDAddressSpace *vtd_as;
>> >>>> +    VTDContextEntry ce;
>> >>>> +
>> >>>> +    info.domain_id = domain_id;
>> >>>> +    info.pasid = pasid;
>> >>>> +
>> >>>> +    vtd_iommu_lock(s);
>> >>>> +    g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_pasid,
>> >>>> +                                &info);
>> >>>> +    vtd_iommu_unlock(s);
>> >>>> +
>> >>>> +    QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
>> >>>> +        if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
>> >>>> +                                      vtd_as->devfn, &ce) &&
>> >>>> +            domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) {
>> >>>> +            uint32_t rid2pasid = VTD_CE_GET_RID2PASID(&ce);
>> >>>> +
>> >>>> +            if ((vtd_as->pasid != PCI_NO_PASID || pasid != rid2pasid) &&
>> >>>> +                vtd_as->pasid != pasid) {
>> >>>> +                continue;
>> >>>> +            }
>> >>>> +
>> >>>> +            if (!s->scalable_modern) {
>> >>>> +                vtd_address_space_sync(vtd_as);
>> >>>> +            }
>> >>>> +        }
>> >>>> +    }
>> >>>> +}
>> >>>> +
>> >>>> +static bool vtd_process_piotlb_desc(IntelIOMMUState *s,
>> >>>> +                                    VTDInvDesc *inv_desc)
>> >>>> +{
>> >>>> +    uint16_t domain_id;
>> >>>> +    uint32_t pasid;
>> >>>> +
>> >>>> +    if ((inv_desc->val[0] & VTD_INV_DESC_PIOTLB_RSVD_VAL0) ||
>> >>>> +        (inv_desc->val[1] & VTD_INV_DESC_PIOTLB_RSVD_VAL1) ||
>> >>>> +        inv_desc->val[2] || inv_desc->val[3]) {
>> >>>> +        error_report_once("%s: invalid piotlb inv desc val[3]=0x%"PRIx64
>> >>>> +                          " val[2]=0x%"PRIx64" val[1]=0x%"PRIx64
>> >>>> +                          " val[0]=0x%"PRIx64" (reserved bits unzero)",
>> >>>> +                          __func__, inv_desc->val[3], inv_desc->val[2],
>> >>>> +                          inv_desc->val[1], inv_desc->val[0]);
>> >>>> +        return false;
>> >>>> +    }
>> >>>
>> >>> Need to consider the below behaviour as well.
>> >>>
>> >>> "
>> >>> This
>> >>> descriptor is a 256-bit descriptor and will result in an invalid descriptor
>> >>> error if submitted in an IQ that
>> >>> is setup to provide hardware with 128-bit descriptors (IQA_REG.DW=0)
>> >>> "
>> >>>
>> >>> Also there are descriptions about the old inv desc types (e.g.
>> >>> iotlb_inv_desc) that can be either 128bits or 256bits.
>> >>>
>> >>> "If a 128-bit
>> >>> version of this descriptor is submitted into an IQ that is setup to provide
>> >>> hardware with 256-bit
>> >>> descriptors or vice-versa it will result in an invalid descriptor error.
>> >>> "
>> >>>
>> >>> If DW==1, vIOMMU fetches 32 bytes per desc. In such case, if the guest
>> >>> submits 128bits desc, then the high 128bits would be non-zero if there is
>> >>> more than one desc. But if there is only one desc in the queue, then the
>> >>> high 128bits would be zero as well. While, it may be captured by the
>> >>> tail register update. Bit4 is reserved when DW==1, and guest would use
>> >>> bit4 when it only submits one desc.
>> >>>
>> >>> If DW==0, vIOMMU fetchs 16bytes per desc. If guest submits 256bits desc,
>> >>> it would appear to be two descs from vIOMMU p.o.v. The first 128bits
>> >>> can be identified as valid except for the types that does not requires
>> >>> 256bits. The higher 128bits would be subjected to the desc sanity check
>> >>> as well.
>> >>>
>> >>> Based on the above, I think you may need to add two more checks. If
>DW==0,
>> >>> vIOMMU should fail the inv types that requires 256bits; If DW==1, you
>> >>> should check the inv_desc->val[2] and inv_desc->val[3]. You've already
>> >>> done it in this patch.
>> >>>
>> >>> Thoughts are welcomed here.
>> >>
>> >> Good catch,
>> >> I think we should write the check in vtd_process_inv_desc
>> >> rather than updating the handlers.
>> >>
>> >> What are your thoughts?
>> >
>> >the first check can be done in vtd_process_inv_desc(). The second may
>> >be better in the handlers as the handlers have the reserved bits check.
>> >But given that none of the inv types use the high 128bits, so it is also
>> >acceptable to do it in vtd_process_inv_desc(). Do add proper comment.
>>
>> Thanks Yi and Clement's suggestion, I'll send a small series to fix that
>> for upstream.
>>
>> BRs.
>> Zhenzhong
>
>Ok so you will send v5?

No, what Yi pointed out is an upstream issue, I'll send a small series(3 patches)
to fix that issue for upstream.

Thanks
Zhenzhong
Michael S. Tsirkin Nov. 4, 2024, 12:01 p.m. UTC | #7
On Mon, Nov 04, 2024 at 11:55:39AM +0000, Duan, Zhenzhong wrote:
> 
> 
> >-----Original Message-----
> >From: Michael S. Tsirkin <mst@redhat.com>
> >Sent: Monday, November 4, 2024 7:51 PM
> >Subject: Re: [PATCH v4 04/17] intel_iommu: Flush stage-2 cache in PASID-
> >selective PASID-based iotlb invalidation
> >
> >On Mon, Nov 04, 2024 at 11:46:00AM +0000, Duan, Zhenzhong wrote:
> >>
> >>
> >> >-----Original Message-----
> >> >From: Liu, Yi L <yi.l.liu@intel.com>
> >> >Sent: Monday, November 4, 2024 4:45 PM
> >> >Subject: Re: [PATCH v4 04/17] intel_iommu: Flush stage-2 cache in PASID-
> >> >selective PASID-based iotlb invalidation
> >> >
> >> >On 2024/11/4 15:37, CLEMENT MATHIEU--DRIF wrote:
> >> >>
> >> >>
> >> >> On 04/11/2024 03:49, Yi Liu wrote:
> >> >>> Caution: External email. Do not open attachments or click links, unless
> >> >>> this email comes from a known sender and you know the content is safe.
> >> >>>
> >> >>>
> >> >>> On 2024/9/30 17:26, Zhenzhong Duan wrote:
> >> >>>> Per spec 6.5.2.4, PADID-selective PASID-based iotlb invalidation will
> >> >>>> flush stage-2 iotlb entries with matching domain id and pasid.
> >> >>>
> >> >>> Also, call out it's per table Table 21. PASID-based-IOTLB Invalidation of
> >> >>> VT-d spec 4.1.
> >> >>>
> >> >>>> With scalable modern mode introduced, guest could send PASID-selective
> >> >>>> PASID-based iotlb invalidation to flush both stage-1 and stage-2 entries.
> >> >>>>
> >> >>>> By this chance, remove old IOTLB related definitions which were unused.
> >> >>>
> >> >>>
> >> >>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> >> >>>> Reviewed-by: Clément Mathieu--Drif<clement.mathieu--drif@eviden.com>
> >> >>>> Acked-by: Jason Wang <jasowang@redhat.com>
> >> >>>> ---
> >> >>>>    hw/i386/intel_iommu_internal.h | 14 ++++--
> >> >>>>    hw/i386/intel_iommu.c          | 88
> >+++++++++++++++++++++++++++++++++-
> >> >>>>    2 files changed, 96 insertions(+), 6 deletions(-)
> >> >>>>
> >> >>>> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/
> >> >>>> intel_iommu_internal.h
> >> >>>> index d0f9d4589d..eec8090190 100644
> >> >>>> --- a/hw/i386/intel_iommu_internal.h
> >> >>>> +++ b/hw/i386/intel_iommu_internal.h
> >> >>>> @@ -403,11 +403,6 @@ typedef union VTDInvDesc VTDInvDesc;
> >> >>>>    #define VTD_INV_DESC_IOTLB_AM(val)      ((val) & 0x3fULL)
> >> >>>>    #define VTD_INV_DESC_IOTLB_RSVD_LO      0xffffffff0000f100ULL
> >> >>>>    #define VTD_INV_DESC_IOTLB_RSVD_HI      0xf80ULL
> >> >>>> -#define VTD_INV_DESC_IOTLB_PASID_PASID  (2ULL << 4)
> >> >>>> -#define VTD_INV_DESC_IOTLB_PASID_PAGE   (3ULL << 4)
> >> >>>> -#define VTD_INV_DESC_IOTLB_PASID(val)   (((val) >> 32) &
> >> >>>> VTD_PASID_ID_MASK)
> >> >>>> -#define
> >VTD_INV_DESC_IOTLB_PASID_RSVD_LO      0xfff00000000001c0ULL
> >> >>>> -#define VTD_INV_DESC_IOTLB_PASID_RSVD_HI      0xf80ULL
> >> >>>>
> >> >>>>    /* Mask for Device IOTLB Invalidate Descriptor */
> >> >>>>    #define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) &
> >> >>>> 0xfffffffffffff000ULL)
> >> >>>> @@ -433,6 +428,15 @@ typedef union VTDInvDesc VTDInvDesc;
> >> >>>>    #define VTD_SPTE_LPAGE_L3_RSVD_MASK(aw) \
> >> >>>>            (0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
> >> >>>>
> >> >>>> +/* Masks for PIOTLB Invalidate Descriptor */
> >> >>>> +#define VTD_INV_DESC_PIOTLB_G             (3ULL << 4)
> >> >>>> +#define VTD_INV_DESC_PIOTLB_ALL_IN_PASID  (2ULL << 4)
> >> >>>> +#define VTD_INV_DESC_PIOTLB_PSI_IN_PASID  (3ULL << 4)
> >> >>>> +#define VTD_INV_DESC_PIOTLB_DID(val)      (((val) >> 16) &
> >> >>>> VTD_DOMAIN_ID_MASK)
> >> >>>> +#define VTD_INV_DESC_PIOTLB_PASID(val)    (((val) >> 32) & 0xfffffULL)
> >> >>>> +#define VTD_INV_DESC_PIOTLB_RSVD_VAL0     0xfff000000000f1c0ULL
> >> >>>> +#define VTD_INV_DESC_PIOTLB_RSVD_VAL1     0xf80ULL
> >> >>>> +
> >> >>>>    /* Information about page-selective IOTLB invalidate */
> >> >>>>    struct VTDIOTLBPageInvInfo {
> >> >>>>        uint16_t domain_id;
> >> >>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> >> >>>> index 9e6ef0cb99..72c9c91d4f 100644
> >> >>>> --- a/hw/i386/intel_iommu.c
> >> >>>> +++ b/hw/i386/intel_iommu.c
> >> >>>> @@ -2656,6 +2656,86 @@ static bool
> >> >>>> vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
> >> >>>>        return true;
> >> >>>>    }
> >> >>>>
> >> >>>> +static gboolean vtd_hash_remove_by_pasid(gpointer key, gpointer value,
> >> >>>> +                                         gpointer user_data)
> >> >>>> +{
> >> >>>> +    VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
> >> >>>> +    VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
> >> >>>> +
> >> >>>> +    return ((entry->domain_id == info->domain_id) &&
> >> >>>> +            (entry->pasid == info->pasid));
> >> >>>> +}
> >> >>>> +
> >> >>>> +static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
> >> >>>> +                                        uint16_t domain_id, uint32_t
> >> >>>> pasid)
> >> >>>> +{
> >> >>>> +    VTDIOTLBPageInvInfo info;
> >> >>>> +    VTDAddressSpace *vtd_as;
> >> >>>> +    VTDContextEntry ce;
> >> >>>> +
> >> >>>> +    info.domain_id = domain_id;
> >> >>>> +    info.pasid = pasid;
> >> >>>> +
> >> >>>> +    vtd_iommu_lock(s);
> >> >>>> +    g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_pasid,
> >> >>>> +                                &info);
> >> >>>> +    vtd_iommu_unlock(s);
> >> >>>> +
> >> >>>> +    QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
> >> >>>> +        if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
> >> >>>> +                                      vtd_as->devfn, &ce) &&
> >> >>>> +            domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) {
> >> >>>> +            uint32_t rid2pasid = VTD_CE_GET_RID2PASID(&ce);
> >> >>>> +
> >> >>>> +            if ((vtd_as->pasid != PCI_NO_PASID || pasid != rid2pasid) &&
> >> >>>> +                vtd_as->pasid != pasid) {
> >> >>>> +                continue;
> >> >>>> +            }
> >> >>>> +
> >> >>>> +            if (!s->scalable_modern) {
> >> >>>> +                vtd_address_space_sync(vtd_as);
> >> >>>> +            }
> >> >>>> +        }
> >> >>>> +    }
> >> >>>> +}
> >> >>>> +
> >> >>>> +static bool vtd_process_piotlb_desc(IntelIOMMUState *s,
> >> >>>> +                                    VTDInvDesc *inv_desc)
> >> >>>> +{
> >> >>>> +    uint16_t domain_id;
> >> >>>> +    uint32_t pasid;
> >> >>>> +
> >> >>>> +    if ((inv_desc->val[0] & VTD_INV_DESC_PIOTLB_RSVD_VAL0) ||
> >> >>>> +        (inv_desc->val[1] & VTD_INV_DESC_PIOTLB_RSVD_VAL1) ||
> >> >>>> +        inv_desc->val[2] || inv_desc->val[3]) {
> >> >>>> +        error_report_once("%s: invalid piotlb inv desc val[3]=0x%"PRIx64
> >> >>>> +                          " val[2]=0x%"PRIx64" val[1]=0x%"PRIx64
> >> >>>> +                          " val[0]=0x%"PRIx64" (reserved bits unzero)",
> >> >>>> +                          __func__, inv_desc->val[3], inv_desc->val[2],
> >> >>>> +                          inv_desc->val[1], inv_desc->val[0]);
> >> >>>> +        return false;
> >> >>>> +    }
> >> >>>
> >> >>> Need to consider the below behaviour as well.
> >> >>>
> >> >>> "
> >> >>> This
> >> >>> descriptor is a 256-bit descriptor and will result in an invalid descriptor
> >> >>> error if submitted in an IQ that
> >> >>> is setup to provide hardware with 128-bit descriptors (IQA_REG.DW=0)
> >> >>> "
> >> >>>
> >> >>> Also there are descriptions about the old inv desc types (e.g.
> >> >>> iotlb_inv_desc) that can be either 128bits or 256bits.
> >> >>>
> >> >>> "If a 128-bit
> >> >>> version of this descriptor is submitted into an IQ that is setup to provide
> >> >>> hardware with 256-bit
> >> >>> descriptors or vice-versa it will result in an invalid descriptor error.
> >> >>> "
> >> >>>
> >> >>> If DW==1, vIOMMU fetches 32 bytes per desc. In such case, if the guest
> >> >>> submits 128bits desc, then the high 128bits would be non-zero if there is
> >> >>> more than one desc. But if there is only one desc in the queue, then the
> >> >>> high 128bits would be zero as well. While, it may be captured by the
> >> >>> tail register update. Bit4 is reserved when DW==1, and guest would use
> >> >>> bit4 when it only submits one desc.
> >> >>>
> >> >>> If DW==0, vIOMMU fetchs 16bytes per desc. If guest submits 256bits desc,
> >> >>> it would appear to be two descs from vIOMMU p.o.v. The first 128bits
> >> >>> can be identified as valid except for the types that does not requires
> >> >>> 256bits. The higher 128bits would be subjected to the desc sanity check
> >> >>> as well.
> >> >>>
> >> >>> Based on the above, I think you may need to add two more checks. If
> >DW==0,
> >> >>> vIOMMU should fail the inv types that requires 256bits; If DW==1, you
> >> >>> should check the inv_desc->val[2] and inv_desc->val[3]. You've already
> >> >>> done it in this patch.
> >> >>>
> >> >>> Thoughts are welcomed here.
> >> >>
> >> >> Good catch,
> >> >> I think we should write the check in vtd_process_inv_desc
> >> >> rather than updating the handlers.
> >> >>
> >> >> What are your thoughts?
> >> >
> >> >the first check can be done in vtd_process_inv_desc(). The second may
> >> >be better in the handlers as the handlers have the reserved bits check.
> >> >But given that none of the inv types use the high 128bits, so it is also
> >> >acceptable to do it in vtd_process_inv_desc(). Do add proper comment.
> >>
> >> Thanks Yi and Clement's suggestion, I'll send a small series to fix that
> >> for upstream.
> >>
> >> BRs.
> >> Zhenzhong
> >
> >Ok so you will send v5?
> 
> No, what Yi pointed out is an upstream issue, I'll send a small series(3 patches)
> to fix that issue for upstream.
> 
> Thanks
> Zhenzhong

Also ok. There's still gonnu be v5 because of other comments, right?
Duan, Zhenzhong Nov. 4, 2024, 12:03 p.m. UTC | #8
>-----Original Message-----
>From: Michael S. Tsirkin <mst@redhat.com>
>Sent: Monday, November 4, 2024 8:01 PM
>Subject: Re: [PATCH v4 04/17] intel_iommu: Flush stage-2 cache in PASID-
>selective PASID-based iotlb invalidation
>
>On Mon, Nov 04, 2024 at 11:55:39AM +0000, Duan, Zhenzhong wrote:
>>
>>
>> >-----Original Message-----
>> >From: Michael S. Tsirkin <mst@redhat.com>
>> >Sent: Monday, November 4, 2024 7:51 PM
>> >Subject: Re: [PATCH v4 04/17] intel_iommu: Flush stage-2 cache in PASID-
>> >selective PASID-based iotlb invalidation
>> >
>> >On Mon, Nov 04, 2024 at 11:46:00AM +0000, Duan, Zhenzhong wrote:
>> >>
>> >>
>> >> >-----Original Message-----
>> >> >From: Liu, Yi L <yi.l.liu@intel.com>
>> >> >Sent: Monday, November 4, 2024 4:45 PM
>> >> >Subject: Re: [PATCH v4 04/17] intel_iommu: Flush stage-2 cache in PASID-
>> >> >selective PASID-based iotlb invalidation
>> >> >
>> >> >On 2024/11/4 15:37, CLEMENT MATHIEU--DRIF wrote:
>> >> >>
>> >> >>
>> >> >> On 04/11/2024 03:49, Yi Liu wrote:
>> >> >>> Caution: External email. Do not open attachments or click links, unless
>> >> >>> this email comes from a known sender and you know the content is safe.
>> >> >>>
>> >> >>>
>> >> >>> On 2024/9/30 17:26, Zhenzhong Duan wrote:
>> >> >>>> Per spec 6.5.2.4, PADID-selective PASID-based iotlb invalidation will
>> >> >>>> flush stage-2 iotlb entries with matching domain id and pasid.
>> >> >>>
>> >> >>> Also, call out it's per table Table 21. PASID-based-IOTLB Invalidation of
>> >> >>> VT-d spec 4.1.
>> >> >>>
>> >> >>>> With scalable modern mode introduced, guest could send PASID-
>selective
>> >> >>>> PASID-based iotlb invalidation to flush both stage-1 and stage-2 entries.
>> >> >>>>
>> >> >>>> By this chance, remove old IOTLB related definitions which were
>unused.
>> >> >>>
>> >> >>>
>> >> >>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> >> >>>> Reviewed-by: Clément Mathieu--Drif<clement.mathieu--
>drif@eviden.com>
>> >> >>>> Acked-by: Jason Wang <jasowang@redhat.com>
>> >> >>>> ---
>> >> >>>>    hw/i386/intel_iommu_internal.h | 14 ++++--
>> >> >>>>    hw/i386/intel_iommu.c          | 88
>> >+++++++++++++++++++++++++++++++++-
>> >> >>>>    2 files changed, 96 insertions(+), 6 deletions(-)
>> >> >>>>
>> >> >>>> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/
>> >> >>>> intel_iommu_internal.h
>> >> >>>> index d0f9d4589d..eec8090190 100644
>> >> >>>> --- a/hw/i386/intel_iommu_internal.h
>> >> >>>> +++ b/hw/i386/intel_iommu_internal.h
>> >> >>>> @@ -403,11 +403,6 @@ typedef union VTDInvDesc VTDInvDesc;
>> >> >>>>    #define VTD_INV_DESC_IOTLB_AM(val)      ((val) & 0x3fULL)
>> >> >>>>    #define VTD_INV_DESC_IOTLB_RSVD_LO      0xffffffff0000f100ULL
>> >> >>>>    #define VTD_INV_DESC_IOTLB_RSVD_HI      0xf80ULL
>> >> >>>> -#define VTD_INV_DESC_IOTLB_PASID_PASID  (2ULL << 4)
>> >> >>>> -#define VTD_INV_DESC_IOTLB_PASID_PAGE   (3ULL << 4)
>> >> >>>> -#define VTD_INV_DESC_IOTLB_PASID(val)   (((val) >> 32) &
>> >> >>>> VTD_PASID_ID_MASK)
>> >> >>>> -#define
>> >VTD_INV_DESC_IOTLB_PASID_RSVD_LO      0xfff00000000001c0ULL
>> >> >>>> -#define VTD_INV_DESC_IOTLB_PASID_RSVD_HI      0xf80ULL
>> >> >>>>
>> >> >>>>    /* Mask for Device IOTLB Invalidate Descriptor */
>> >> >>>>    #define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) &
>> >> >>>> 0xfffffffffffff000ULL)
>> >> >>>> @@ -433,6 +428,15 @@ typedef union VTDInvDesc VTDInvDesc;
>> >> >>>>    #define VTD_SPTE_LPAGE_L3_RSVD_MASK(aw) \
>> >> >>>>            (0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
>> >> >>>>
>> >> >>>> +/* Masks for PIOTLB Invalidate Descriptor */
>> >> >>>> +#define VTD_INV_DESC_PIOTLB_G             (3ULL << 4)
>> >> >>>> +#define VTD_INV_DESC_PIOTLB_ALL_IN_PASID  (2ULL << 4)
>> >> >>>> +#define VTD_INV_DESC_PIOTLB_PSI_IN_PASID  (3ULL << 4)
>> >> >>>> +#define VTD_INV_DESC_PIOTLB_DID(val)      (((val) >> 16) &
>> >> >>>> VTD_DOMAIN_ID_MASK)
>> >> >>>> +#define VTD_INV_DESC_PIOTLB_PASID(val)    (((val) >> 32) &
>0xfffffULL)
>> >> >>>> +#define
>VTD_INV_DESC_PIOTLB_RSVD_VAL0     0xfff000000000f1c0ULL
>> >> >>>> +#define VTD_INV_DESC_PIOTLB_RSVD_VAL1     0xf80ULL
>> >> >>>> +
>> >> >>>>    /* Information about page-selective IOTLB invalidate */
>> >> >>>>    struct VTDIOTLBPageInvInfo {
>> >> >>>>        uint16_t domain_id;
>> >> >>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> >> >>>> index 9e6ef0cb99..72c9c91d4f 100644
>> >> >>>> --- a/hw/i386/intel_iommu.c
>> >> >>>> +++ b/hw/i386/intel_iommu.c
>> >> >>>> @@ -2656,6 +2656,86 @@ static bool
>> >> >>>> vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
>> >> >>>>        return true;
>> >> >>>>    }
>> >> >>>>
>> >> >>>> +static gboolean vtd_hash_remove_by_pasid(gpointer key, gpointer
>value,
>> >> >>>> +                                         gpointer user_data)
>> >> >>>> +{
>> >> >>>> +    VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
>> >> >>>> +    VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
>> >> >>>> +
>> >> >>>> +    return ((entry->domain_id == info->domain_id) &&
>> >> >>>> +            (entry->pasid == info->pasid));
>> >> >>>> +}
>> >> >>>> +
>> >> >>>> +static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
>> >> >>>> +                                        uint16_t domain_id, uint32_t
>> >> >>>> pasid)
>> >> >>>> +{
>> >> >>>> +    VTDIOTLBPageInvInfo info;
>> >> >>>> +    VTDAddressSpace *vtd_as;
>> >> >>>> +    VTDContextEntry ce;
>> >> >>>> +
>> >> >>>> +    info.domain_id = domain_id;
>> >> >>>> +    info.pasid = pasid;
>> >> >>>> +
>> >> >>>> +    vtd_iommu_lock(s);
>> >> >>>> +    g_hash_table_foreach_remove(s->iotlb,
>vtd_hash_remove_by_pasid,
>> >> >>>> +                                &info);
>> >> >>>> +    vtd_iommu_unlock(s);
>> >> >>>> +
>> >> >>>> +    QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
>> >> >>>> +        if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
>> >> >>>> +                                      vtd_as->devfn, &ce) &&
>> >> >>>> +            domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) {
>> >> >>>> +            uint32_t rid2pasid = VTD_CE_GET_RID2PASID(&ce);
>> >> >>>> +
>> >> >>>> +            if ((vtd_as->pasid != PCI_NO_PASID || pasid != rid2pasid) &&
>> >> >>>> +                vtd_as->pasid != pasid) {
>> >> >>>> +                continue;
>> >> >>>> +            }
>> >> >>>> +
>> >> >>>> +            if (!s->scalable_modern) {
>> >> >>>> +                vtd_address_space_sync(vtd_as);
>> >> >>>> +            }
>> >> >>>> +        }
>> >> >>>> +    }
>> >> >>>> +}
>> >> >>>> +
>> >> >>>> +static bool vtd_process_piotlb_desc(IntelIOMMUState *s,
>> >> >>>> +                                    VTDInvDesc *inv_desc)
>> >> >>>> +{
>> >> >>>> +    uint16_t domain_id;
>> >> >>>> +    uint32_t pasid;
>> >> >>>> +
>> >> >>>> +    if ((inv_desc->val[0] & VTD_INV_DESC_PIOTLB_RSVD_VAL0) ||
>> >> >>>> +        (inv_desc->val[1] & VTD_INV_DESC_PIOTLB_RSVD_VAL1) ||
>> >> >>>> +        inv_desc->val[2] || inv_desc->val[3]) {
>> >> >>>> +        error_report_once("%s: invalid piotlb inv desc val[3]=0x%"PRIx64
>> >> >>>> +                          " val[2]=0x%"PRIx64" val[1]=0x%"PRIx64
>> >> >>>> +                          " val[0]=0x%"PRIx64" (reserved bits unzero)",
>> >> >>>> +                          __func__, inv_desc->val[3], inv_desc->val[2],
>> >> >>>> +                          inv_desc->val[1], inv_desc->val[0]);
>> >> >>>> +        return false;
>> >> >>>> +    }
>> >> >>>
>> >> >>> Need to consider the below behaviour as well.
>> >> >>>
>> >> >>> "
>> >> >>> This
>> >> >>> descriptor is a 256-bit descriptor and will result in an invalid descriptor
>> >> >>> error if submitted in an IQ that
>> >> >>> is setup to provide hardware with 128-bit descriptors (IQA_REG.DW=0)
>> >> >>> "
>> >> >>>
>> >> >>> Also there are descriptions about the old inv desc types (e.g.
>> >> >>> iotlb_inv_desc) that can be either 128bits or 256bits.
>> >> >>>
>> >> >>> "If a 128-bit
>> >> >>> version of this descriptor is submitted into an IQ that is setup to provide
>> >> >>> hardware with 256-bit
>> >> >>> descriptors or vice-versa it will result in an invalid descriptor error.
>> >> >>> "
>> >> >>>
>> >> >>> If DW==1, vIOMMU fetches 32 bytes per desc. In such case, if the guest
>> >> >>> submits 128bits desc, then the high 128bits would be non-zero if there is
>> >> >>> more than one desc. But if there is only one desc in the queue, then the
>> >> >>> high 128bits would be zero as well. While, it may be captured by the
>> >> >>> tail register update. Bit4 is reserved when DW==1, and guest would use
>> >> >>> bit4 when it only submits one desc.
>> >> >>>
>> >> >>> If DW==0, vIOMMU fetchs 16bytes per desc. If guest submits 256bits
>desc,
>> >> >>> it would appear to be two descs from vIOMMU p.o.v. The first 128bits
>> >> >>> can be identified as valid except for the types that does not requires
>> >> >>> 256bits. The higher 128bits would be subjected to the desc sanity check
>> >> >>> as well.
>> >> >>>
>> >> >>> Based on the above, I think you may need to add two more checks. If
>> >DW==0,
>> >> >>> vIOMMU should fail the inv types that requires 256bits; If DW==1, you
>> >> >>> should check the inv_desc->val[2] and inv_desc->val[3]. You've already
>> >> >>> done it in this patch.
>> >> >>>
>> >> >>> Thoughts are welcomed here.
>> >> >>
>> >> >> Good catch,
>> >> >> I think we should write the check in vtd_process_inv_desc
>> >> >> rather than updating the handlers.
>> >> >>
>> >> >> What are your thoughts?
>> >> >
>> >> >the first check can be done in vtd_process_inv_desc(). The second may
>> >> >be better in the handlers as the handlers have the reserved bits check.
>> >> >But given that none of the inv types use the high 128bits, so it is also
>> >> >acceptable to do it in vtd_process_inv_desc(). Do add proper comment.
>> >>
>> >> Thanks Yi and Clement's suggestion, I'll send a small series to fix that
>> >> for upstream.
>> >>
>> >> BRs.
>> >> Zhenzhong
>> >
>> >Ok so you will send v5?
>>
>> No, what Yi pointed out is an upstream issue, I'll send a small series(3 patches)
>> to fix that issue for upstream.
>>
>> Thanks
>> Zhenzhong
>
>Also ok. There's still gonnu be v5 because of other comments, right?

Right.

Thanks
Zhenzhong
diff mbox series

Patch

diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index d0f9d4589d..eec8090190 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -403,11 +403,6 @@  typedef union VTDInvDesc VTDInvDesc;
 #define VTD_INV_DESC_IOTLB_AM(val)      ((val) & 0x3fULL)
 #define VTD_INV_DESC_IOTLB_RSVD_LO      0xffffffff0000f100ULL
 #define VTD_INV_DESC_IOTLB_RSVD_HI      0xf80ULL
-#define VTD_INV_DESC_IOTLB_PASID_PASID  (2ULL << 4)
-#define VTD_INV_DESC_IOTLB_PASID_PAGE   (3ULL << 4)
-#define VTD_INV_DESC_IOTLB_PASID(val)   (((val) >> 32) & VTD_PASID_ID_MASK)
-#define VTD_INV_DESC_IOTLB_PASID_RSVD_LO      0xfff00000000001c0ULL
-#define VTD_INV_DESC_IOTLB_PASID_RSVD_HI      0xf80ULL
 
 /* Mask for Device IOTLB Invalidate Descriptor */
 #define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) & 0xfffffffffffff000ULL)
@@ -433,6 +428,15 @@  typedef union VTDInvDesc VTDInvDesc;
 #define VTD_SPTE_LPAGE_L3_RSVD_MASK(aw) \
         (0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
 
+/* Masks for PIOTLB Invalidate Descriptor */
+#define VTD_INV_DESC_PIOTLB_G             (3ULL << 4)
+#define VTD_INV_DESC_PIOTLB_ALL_IN_PASID  (2ULL << 4)
+#define VTD_INV_DESC_PIOTLB_PSI_IN_PASID  (3ULL << 4)
+#define VTD_INV_DESC_PIOTLB_DID(val)      (((val) >> 16) & VTD_DOMAIN_ID_MASK)
+#define VTD_INV_DESC_PIOTLB_PASID(val)    (((val) >> 32) & 0xfffffULL)
+#define VTD_INV_DESC_PIOTLB_RSVD_VAL0     0xfff000000000f1c0ULL
+#define VTD_INV_DESC_PIOTLB_RSVD_VAL1     0xf80ULL
+
 /* Information about page-selective IOTLB invalidate */
 struct VTDIOTLBPageInvInfo {
     uint16_t domain_id;
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 9e6ef0cb99..72c9c91d4f 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2656,6 +2656,86 @@  static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
     return true;
 }
 
+static gboolean vtd_hash_remove_by_pasid(gpointer key, gpointer value,
+                                         gpointer user_data)
+{
+    VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
+    VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
+
+    return ((entry->domain_id == info->domain_id) &&
+            (entry->pasid == info->pasid));
+}
+
+static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
+                                        uint16_t domain_id, uint32_t pasid)
+{
+    VTDIOTLBPageInvInfo info;
+    VTDAddressSpace *vtd_as;
+    VTDContextEntry ce;
+
+    info.domain_id = domain_id;
+    info.pasid = pasid;
+
+    vtd_iommu_lock(s);
+    g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_pasid,
+                                &info);
+    vtd_iommu_unlock(s);
+
+    QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
+        if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
+                                      vtd_as->devfn, &ce) &&
+            domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) {
+            uint32_t rid2pasid = VTD_CE_GET_RID2PASID(&ce);
+
+            if ((vtd_as->pasid != PCI_NO_PASID || pasid != rid2pasid) &&
+                vtd_as->pasid != pasid) {
+                continue;
+            }
+
+            if (!s->scalable_modern) {
+                vtd_address_space_sync(vtd_as);
+            }
+        }
+    }
+}
+
+static bool vtd_process_piotlb_desc(IntelIOMMUState *s,
+                                    VTDInvDesc *inv_desc)
+{
+    uint16_t domain_id;
+    uint32_t pasid;
+
+    if ((inv_desc->val[0] & VTD_INV_DESC_PIOTLB_RSVD_VAL0) ||
+        (inv_desc->val[1] & VTD_INV_DESC_PIOTLB_RSVD_VAL1) ||
+        inv_desc->val[2] || inv_desc->val[3]) {
+        error_report_once("%s: invalid piotlb inv desc val[3]=0x%"PRIx64
+                          " val[2]=0x%"PRIx64" val[1]=0x%"PRIx64
+                          " val[0]=0x%"PRIx64" (reserved bits unzero)",
+                          __func__, inv_desc->val[3], inv_desc->val[2],
+                          inv_desc->val[1], inv_desc->val[0]);
+        return false;
+    }
+
+    domain_id = VTD_INV_DESC_PIOTLB_DID(inv_desc->val[0]);
+    pasid = VTD_INV_DESC_PIOTLB_PASID(inv_desc->val[0]);
+    switch (inv_desc->val[0] & VTD_INV_DESC_PIOTLB_G) {
+    case VTD_INV_DESC_PIOTLB_ALL_IN_PASID:
+        vtd_piotlb_pasid_invalidate(s, domain_id, pasid);
+        break;
+
+    case VTD_INV_DESC_PIOTLB_PSI_IN_PASID:
+        break;
+
+    default:
+        error_report_once("%s: invalid piotlb inv desc: hi=0x%"PRIx64
+                          ", lo=0x%"PRIx64" (type mismatch: 0x%llx)",
+                          __func__, inv_desc->val[1], inv_desc->val[0],
+                          inv_desc->val[0] & VTD_INV_DESC_IOTLB_G);
+        return false;
+    }
+    return true;
+}
+
 static bool vtd_process_inv_iec_desc(IntelIOMMUState *s,
                                      VTDInvDesc *inv_desc)
 {
@@ -2766,6 +2846,13 @@  static bool vtd_process_inv_desc(IntelIOMMUState *s)
         }
         break;
 
+    case VTD_INV_DESC_PIOTLB:
+        trace_vtd_inv_desc("p-iotlb", inv_desc.val[1], inv_desc.val[0]);
+        if (!vtd_process_piotlb_desc(s, &inv_desc)) {
+            return false;
+        }
+        break;
+
     case VTD_INV_DESC_WAIT:
         trace_vtd_inv_desc("wait", inv_desc.hi, inv_desc.lo);
         if (!vtd_process_wait_desc(s, &inv_desc)) {
@@ -2793,7 +2880,6 @@  static bool vtd_process_inv_desc(IntelIOMMUState *s)
      * iommu driver) work, just return true is enough so far.
      */
     case VTD_INV_DESC_PC:
-    case VTD_INV_DESC_PIOTLB:
         if (s->scalable_mode) {
             break;
         }