diff mbox

[RFC,v3,01/14] IOMMU: add option to enable VTD_CAP_CM to vIOMMU capility exposoed to guest

Message ID 1484276800-26814-2-git-send-email-peterx@redhat.com
State New
Headers show

Commit Message

Peter Xu Jan. 13, 2017, 3:06 a.m. UTC
From: Aviv Ben-David <bd.aviv@gmail.com>

This capability asks the guest to invalidate cache before each map operation.
We can use this invalidation to trap map operations in the hypervisor.

Signed-off-by: Aviv Ben-David <bd.aviv@gmail.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
---
 hw/i386/intel_iommu.c          | 5 +++++
 hw/i386/intel_iommu_internal.h | 1 +
 include/hw/i386/intel_iommu.h  | 2 ++
 3 files changed, 8 insertions(+)

Comments

Tian, Kevin Jan. 20, 2017, 8:32 a.m. UTC | #1
> From: Peter Xu [mailto:peterx@redhat.com]
> Sent: Friday, January 13, 2017 11:06 AM
> 
> From: Aviv Ben-David <bd.aviv@gmail.com>
> 
> This capability asks the guest to invalidate cache before each map operation.
> We can use this invalidation to trap map operations in the hypervisor.
> 
> Signed-off-by: Aviv Ben-David <bd.aviv@gmail.com>
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>  hw/i386/intel_iommu.c          | 5 +++++
>  hw/i386/intel_iommu_internal.h | 1 +
>  include/hw/i386/intel_iommu.h  | 2 ++
>  3 files changed, 8 insertions(+)
> 
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index ec62239..2868e37 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -2107,6 +2107,7 @@ static Property vtd_properties[] = {
>      DEFINE_PROP_ON_OFF_AUTO("eim", IntelIOMMUState, intr_eim,
>                              ON_OFF_AUTO_AUTO),
>      DEFINE_PROP_BOOL("x-buggy-eim", IntelIOMMUState, buggy_eim, false),
> +    DEFINE_PROP_BOOL("cache-mode", IntelIOMMUState, cache_mode_enabled,
> FALSE),
>      DEFINE_PROP_END_OF_LIST(),
>  };
> 
> @@ -2488,6 +2489,10 @@ static void vtd_init(IntelIOMMUState *s)
>          s->ecap |= VTD_ECAP_DT;
>      }
> 
> +    if (s->cache_mode_enabled) {
> +        s->cap |= VTD_CAP_CM;
> +    }
> +

I think some of my old comments are not answered:

1) Better to use caching_mode to follow spec

2) Does it make sense to automatically set this flag if any VFIO device
has been statically assigned when starting Qemu? Also for hot-add
device path, some check of caching mode is required. If not set, 
should we fail hot-add operation? I don't think we have such physical
platform with some devices behind IOMMU while others not.

>      vtd_reset_context_cache(s);
>      vtd_reset_iotlb(s);
> 
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index 356f188..4104121 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -202,6 +202,7 @@
>  #define VTD_CAP_MAMV                (VTD_MAMV << 48)
>  #define VTD_CAP_PSI                 (1ULL << 39)
>  #define VTD_CAP_SLLPS               ((1ULL << 34) | (1ULL << 35))
> +#define VTD_CAP_CM                  (1ULL << 7)
> 
>  /* Supported Adjusted Guest Address Widths */
>  #define VTD_CAP_SAGAW_SHIFT         8
> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> index 405c9d1..749eef9 100644
> --- a/include/hw/i386/intel_iommu.h
> +++ b/include/hw/i386/intel_iommu.h
> @@ -257,6 +257,8 @@ struct IntelIOMMUState {
>      uint8_t womask[DMAR_REG_SIZE];  /* WO (write only - read returns 0) */
>      uint32_t version;
> 
> +    bool cache_mode_enabled;        /* RO - is cap CM enabled? */
> +
>      dma_addr_t root;                /* Current root table pointer */
>      bool root_extended;             /* Type of root table (extended or not) */
>      bool dmar_enabled;              /* Set if DMA remapping is enabled */
> --
> 2.7.4
Peter Xu Jan. 20, 2017, 8:54 a.m. UTC | #2
On Fri, Jan 20, 2017 at 08:32:06AM +0000, Tian, Kevin wrote:
> > From: Peter Xu [mailto:peterx@redhat.com]
> > Sent: Friday, January 13, 2017 11:06 AM
> > 
> > From: Aviv Ben-David <bd.aviv@gmail.com>
> > 
> > This capability asks the guest to invalidate cache before each map operation.
> > We can use this invalidation to trap map operations in the hypervisor.
> > 
> > Signed-off-by: Aviv Ben-David <bd.aviv@gmail.com>
> > Signed-off-by: Peter Xu <peterx@redhat.com>
> > ---
> >  hw/i386/intel_iommu.c          | 5 +++++
> >  hw/i386/intel_iommu_internal.h | 1 +
> >  include/hw/i386/intel_iommu.h  | 2 ++
> >  3 files changed, 8 insertions(+)
> > 
> > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> > index ec62239..2868e37 100644
> > --- a/hw/i386/intel_iommu.c
> > +++ b/hw/i386/intel_iommu.c
> > @@ -2107,6 +2107,7 @@ static Property vtd_properties[] = {
> >      DEFINE_PROP_ON_OFF_AUTO("eim", IntelIOMMUState, intr_eim,
> >                              ON_OFF_AUTO_AUTO),
> >      DEFINE_PROP_BOOL("x-buggy-eim", IntelIOMMUState, buggy_eim, false),
> > +    DEFINE_PROP_BOOL("cache-mode", IntelIOMMUState, cache_mode_enabled,
> > FALSE),
> >      DEFINE_PROP_END_OF_LIST(),
> >  };
> > 
> > @@ -2488,6 +2489,10 @@ static void vtd_init(IntelIOMMUState *s)
> >          s->ecap |= VTD_ECAP_DT;
> >      }
> > 
> > +    if (s->cache_mode_enabled) {
> > +        s->cap |= VTD_CAP_CM;
> > +    }
> > +
> 
> I think some of my old comments are not answered:
> 
> 1) Better to use caching_mode to follow spec

Sure.

> 
> 2) Does it make sense to automatically set this flag if any VFIO device
> has been statically assigned when starting Qemu?

I'm okay with both, considering that people using this flag will be
possibly advanced users. So I would like to hear others' opinion.

> Also for hot-add
> device path, some check of caching mode is required. If not set, 
> should we fail hot-add operation? I don't think we have such physical
> platform with some devices behind IOMMU while others not.

Could you explain in what case will we fail a hot plug?

Thanks,

-- peterx
Tian, Kevin Jan. 20, 2017, 8:59 a.m. UTC | #3
> From: Peter Xu [mailto:peterx@redhat.com]

> Sent: Friday, January 20, 2017 4:55 PM

> 

> On Fri, Jan 20, 2017 at 08:32:06AM +0000, Tian, Kevin wrote:

> > > From: Peter Xu [mailto:peterx@redhat.com]

> > > Sent: Friday, January 13, 2017 11:06 AM

> > >

> > > From: Aviv Ben-David <bd.aviv@gmail.com>

> > >

> > > This capability asks the guest to invalidate cache before each map operation.

> > > We can use this invalidation to trap map operations in the hypervisor.

> > >

> > > Signed-off-by: Aviv Ben-David <bd.aviv@gmail.com>

> > > Signed-off-by: Peter Xu <peterx@redhat.com>

> > > ---

> > >  hw/i386/intel_iommu.c          | 5 +++++

> > >  hw/i386/intel_iommu_internal.h | 1 +

> > >  include/hw/i386/intel_iommu.h  | 2 ++

> > >  3 files changed, 8 insertions(+)

> > >

> > > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c

> > > index ec62239..2868e37 100644

> > > --- a/hw/i386/intel_iommu.c

> > > +++ b/hw/i386/intel_iommu.c

> > > @@ -2107,6 +2107,7 @@ static Property vtd_properties[] = {

> > >      DEFINE_PROP_ON_OFF_AUTO("eim", IntelIOMMUState, intr_eim,

> > >                              ON_OFF_AUTO_AUTO),

> > >      DEFINE_PROP_BOOL("x-buggy-eim", IntelIOMMUState, buggy_eim, false),

> > > +    DEFINE_PROP_BOOL("cache-mode", IntelIOMMUState, cache_mode_enabled,

> > > FALSE),

> > >      DEFINE_PROP_END_OF_LIST(),

> > >  };

> > >

> > > @@ -2488,6 +2489,10 @@ static void vtd_init(IntelIOMMUState *s)

> > >          s->ecap |= VTD_ECAP_DT;

> > >      }

> > >

> > > +    if (s->cache_mode_enabled) {

> > > +        s->cap |= VTD_CAP_CM;

> > > +    }

> > > +

> >

> > I think some of my old comments are not answered:

> >

> > 1) Better to use caching_mode to follow spec

> 

> Sure.

> 

> >

> > 2) Does it make sense to automatically set this flag if any VFIO device

> > has been statically assigned when starting Qemu?

> 

> I'm okay with both, considering that people using this flag will be

> possibly advanced users. So I would like to hear others' opinion.

> 

> > Also for hot-add

> > device path, some check of caching mode is required. If not set,

> > should we fail hot-add operation? I don't think we have such physical

> > platform with some devices behind IOMMU while others not.

> 

> Could you explain in what case will we fail a hot plug?

> 


user enables intel-iommu, but don't set caching mode.

Then later user hot-add a PCI device to the VM. Guest will assume
newly assigned device also behind the default vIOMMU, and thus
needs to setup IOVA mappings, which is then broken...

Thanks
Kevin
Peter Xu Jan. 20, 2017, 9:11 a.m. UTC | #4
On Fri, Jan 20, 2017 at 08:59:01AM +0000, Tian, Kevin wrote:

[...]

> > > Also for hot-add
> > > device path, some check of caching mode is required. If not set,
> > > should we fail hot-add operation? I don't think we have such physical
> > > platform with some devices behind IOMMU while others not.
> > 
> > Could you explain in what case will we fail a hot plug?
> > 
> 
> user enables intel-iommu, but don't set caching mode.
> 
> Then later user hot-add a PCI device to the VM. Guest will assume
> newly assigned device also behind the default vIOMMU, and thus
> needs to setup IOVA mappings, which is then broken...

Is the newly added device a vfio-pci device? If so, we should hit
this and VM will stops to work:

    if (!s->cache_mode_enabled && new & IOMMU_NOTIFIER_MAP) {
        error_report("We need to set cache_mode=1 for intel-iommu to enable "
                     "device assignment with IOMMU protection.");
        exit(1);
    }

I admit this is not user-friendly, and a better way may be that we
disallow the hot-plug in that case, telling the user about the error,
rather than crashing the VM. But, I think that can be a patch outside
this series, considering (again) that this only affects advanced
users.

Thanks,

-- peterx
Tian, Kevin Jan. 20, 2017, 9:20 a.m. UTC | #5
> From: Peter Xu [mailto:peterx@redhat.com]

> Sent: Friday, January 20, 2017 5:12 PM

> 

> On Fri, Jan 20, 2017 at 08:59:01AM +0000, Tian, Kevin wrote:

> 

> [...]

> 

> > > > Also for hot-add

> > > > device path, some check of caching mode is required. If not set,

> > > > should we fail hot-add operation? I don't think we have such physical

> > > > platform with some devices behind IOMMU while others not.

> > >

> > > Could you explain in what case will we fail a hot plug?

> > >

> >

> > user enables intel-iommu, but don't set caching mode.

> >

> > Then later user hot-add a PCI device to the VM. Guest will assume

> > newly assigned device also behind the default vIOMMU, and thus

> > needs to setup IOVA mappings, which is then broken...

> 

> Is the newly added device a vfio-pci device? If so, we should hit

> this and VM will stops to work:

> 

>     if (!s->cache_mode_enabled && new & IOMMU_NOTIFIER_MAP) {

>         error_report("We need to set cache_mode=1 for intel-iommu to enable "

>                      "device assignment with IOMMU protection.");

>         exit(1);

>     }


sorry I didn't found this code. In which code path is it hit?

> 

> I admit this is not user-friendly, and a better way may be that we

> disallow the hot-plug in that case, telling the user about the error,

> rather than crashing the VM. But, I think that can be a patch outside

> this series, considering (again) that this only affects advanced

> users.

> 


Crashing VM is bad.... but anyway, I'll leave maintainer to decide
whether they'd like it fixed now or later. :-)

Thanks
Kevin
Peter Xu Jan. 20, 2017, 9:30 a.m. UTC | #6
On Fri, Jan 20, 2017 at 09:20:01AM +0000, Tian, Kevin wrote:
> > From: Peter Xu [mailto:peterx@redhat.com]
> > Sent: Friday, January 20, 2017 5:12 PM
> > 
> > On Fri, Jan 20, 2017 at 08:59:01AM +0000, Tian, Kevin wrote:
> > 
> > [...]
> > 
> > > > > Also for hot-add
> > > > > device path, some check of caching mode is required. If not set,
> > > > > should we fail hot-add operation? I don't think we have such physical
> > > > > platform with some devices behind IOMMU while others not.
> > > >
> > > > Could you explain in what case will we fail a hot plug?
> > > >
> > >
> > > user enables intel-iommu, but don't set caching mode.
> > >
> > > Then later user hot-add a PCI device to the VM. Guest will assume
> > > newly assigned device also behind the default vIOMMU, and thus
> > > needs to setup IOVA mappings, which is then broken...
> > 
> > Is the newly added device a vfio-pci device? If so, we should hit
> > this and VM will stops to work:
> > 
> >     if (!s->cache_mode_enabled && new & IOMMU_NOTIFIER_MAP) {
> >         error_report("We need to set cache_mode=1 for intel-iommu to enable "
> >                      "device assignment with IOMMU protection.");
> >         exit(1);
> >     }
> 
> sorry I didn't found this code. In which code path is it hit?

It's in patch 14/14 of this series.

> 
> > 
> > I admit this is not user-friendly, and a better way may be that we
> > disallow the hot-plug in that case, telling the user about the error,
> > rather than crashing the VM. But, I think that can be a patch outside
> > this series, considering (again) that this only affects advanced
> > users.
> > 
> 
> Crashing VM is bad.... but anyway, I'll leave maintainer to decide
> whether they'd like it fixed now or later. :-)

Sure. Thanks,

-- peterx
Eric Blake Jan. 20, 2017, 3:42 p.m. UTC | #7
On 01/12/2017 09:06 PM, Peter Xu wrote:
> From: Aviv Ben-David <bd.aviv@gmail.com>

Long subject line, please try to keep it around 60 or so characters
(look at 'git shortlog -30' for comparison).  Also, fix the typos:
s/capility exposoed/capability exposed/

> 
> This capability asks the guest to invalidate cache before each map operation.
> We can use this invalidation to trap map operations in the hypervisor.
> 
> Signed-off-by: Aviv Ben-David <bd.aviv@gmail.com>
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
Peter Xu Jan. 22, 2017, 2:32 a.m. UTC | #8
On Fri, Jan 20, 2017 at 09:42:25AM -0600, Eric Blake wrote:
> On 01/12/2017 09:06 PM, Peter Xu wrote:
> > From: Aviv Ben-David <bd.aviv@gmail.com>
> 
> Long subject line, please try to keep it around 60 or so characters
> (look at 'git shortlog -30' for comparison).  Also, fix the typos:
> s/capility exposoed/capability exposed/

Will fix this and repost this single patch as v4.1 based on v4 series.

Thanks!

> 
> > 
> > This capability asks the guest to invalidate cache before each map operation.
> > We can use this invalidation to trap map operations in the hypervisor.
> > 
> > Signed-off-by: Aviv Ben-David <bd.aviv@gmail.com>
> > Signed-off-by: Peter Xu <peterx@redhat.com>
> > ---
> 
> -- 
> Eric Blake   eblake redhat com    +1-919-301-3266
> Libvirt virtualization library http://libvirt.org
> 

-- peterx
diff mbox

Patch

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index ec62239..2868e37 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2107,6 +2107,7 @@  static Property vtd_properties[] = {
     DEFINE_PROP_ON_OFF_AUTO("eim", IntelIOMMUState, intr_eim,
                             ON_OFF_AUTO_AUTO),
     DEFINE_PROP_BOOL("x-buggy-eim", IntelIOMMUState, buggy_eim, false),
+    DEFINE_PROP_BOOL("cache-mode", IntelIOMMUState, cache_mode_enabled, FALSE),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -2488,6 +2489,10 @@  static void vtd_init(IntelIOMMUState *s)
         s->ecap |= VTD_ECAP_DT;
     }
 
+    if (s->cache_mode_enabled) {
+        s->cap |= VTD_CAP_CM;
+    }
+
     vtd_reset_context_cache(s);
     vtd_reset_iotlb(s);
 
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 356f188..4104121 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -202,6 +202,7 @@ 
 #define VTD_CAP_MAMV                (VTD_MAMV << 48)
 #define VTD_CAP_PSI                 (1ULL << 39)
 #define VTD_CAP_SLLPS               ((1ULL << 34) | (1ULL << 35))
+#define VTD_CAP_CM                  (1ULL << 7)
 
 /* Supported Adjusted Guest Address Widths */
 #define VTD_CAP_SAGAW_SHIFT         8
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 405c9d1..749eef9 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -257,6 +257,8 @@  struct IntelIOMMUState {
     uint8_t womask[DMAR_REG_SIZE];  /* WO (write only - read returns 0) */
     uint32_t version;
 
+    bool cache_mode_enabled;        /* RO - is cap CM enabled? */
+
     dma_addr_t root;                /* Current root table pointer */
     bool root_extended;             /* Type of root table (extended or not) */
     bool dmar_enabled;              /* Set if DMA remapping is enabled */