diff mbox

[2/3] memory: add iommu_notify_flag

Message ID 1473060081-17835-3-git-send-email-peterx@redhat.com
State New
Headers show

Commit Message

Peter Xu Sept. 5, 2016, 7:21 a.m. UTC
When there are active IOMMU notify registers, the iommu_notify_flag will
cache existing notify flag. When notifiers are triggered, flag will be
checked against the cached result.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 hw/ppc/spapr_iommu.c     | 2 +-
 hw/s390x/s390-pci-inst.c | 2 +-
 include/exec/memory.h    | 6 +++++-
 memory.c                 | 6 +++++-
 4 files changed, 12 insertions(+), 4 deletions(-)

Comments

Paolo Bonzini Sept. 5, 2016, 8:04 a.m. UTC | #1
On 05/09/2016 09:21, Peter Xu wrote:
>  void memory_region_notify_iommu(MemoryRegion *mr,
> -                                IOMMUTLBEntry entry)
> +                                IOMMUTLBEntry entry, IOMMUAccessFlags flag)
>  {
>      assert(memory_region_is_iommu(mr));
> +    assert(flag == mr->iommu_notify_flag);
>      notifier_list_notify(&mr->iommu_notify, &entry);
>  }

Shouldn't it be possible to have IOMMU_RW and IOMMU_NONE on the same
IOMMU, if the IOMMU supports IOMMU_RW at all?

Thanks,

Paolo
Peter Xu Sept. 5, 2016, 8:38 a.m. UTC | #2
On Mon, Sep 05, 2016 at 10:04:42AM +0200, Paolo Bonzini wrote:
> 
> 
> On 05/09/2016 09:21, Peter Xu wrote:
> >  void memory_region_notify_iommu(MemoryRegion *mr,
> > -                                IOMMUTLBEntry entry)
> > +                                IOMMUTLBEntry entry, IOMMUAccessFlags flag)
> >  {
> >      assert(memory_region_is_iommu(mr));
> > +    assert(flag == mr->iommu_notify_flag);
> >      notifier_list_notify(&mr->iommu_notify, &entry);
> >  }
> 
> Shouldn't it be possible to have IOMMU_RW and IOMMU_NONE on the same
> IOMMU, if the IOMMU supports IOMMU_RW at all?

Yeah, this is a good point...

If we see IOMMU_NONE as a subset of IOMMU_RW, we should allow notify
IOMMU_NONE even if the cached flag is IOMMU_RW.

However in this patch I was not meant to do that. I made it an
exclusive flag to identify two different use cases. I don't know
whether this is good, but at least for Intel IOMMU's current use case,
these two types should be totally isolated from each other:

- IOMMU_NONE notification is used by future DMAR-enabled vhost, it
  should only be notified with device-IOTLB invalidations, this will
  only require "Device IOTLB" capability for Intel IOMMUs, and be
  notified in Device IOTLB invalidation handlers.

- IOMMU_RW notifications should only be used for vfio-pci, notified
  with IOTLB invalidations. This will only require Cache Mode (CM=1)
  capability, and will be notified in common IOTLB invalidations (no
  matter whether it's an cache invalidation or not, we will all use
  IOMMU_RW flag for this kind of notifies).

Maybe here naming the flags as IOMMU_{RW_NONE} is a little bit
confusing (just to leverage existing access flags), but what I was
trying to do is to make the two things not overlapped at all, since I
didn't find a mixture use case.

Thanks,

-- peterx
David Gibson Sept. 6, 2016, 5:12 a.m. UTC | #3
On Mon, Sep 05, 2016 at 03:21:20PM +0800, Peter Xu wrote:
> When there are active IOMMU notify registers, the iommu_notify_flag will
> cache existing notify flag. When notifiers are triggered, flag will be
> checked against the cached result.
> 
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>  hw/ppc/spapr_iommu.c     | 2 +-
>  hw/s390x/s390-pci-inst.c | 2 +-
>  include/exec/memory.h    | 6 +++++-
>  memory.c                 | 6 +++++-
>  4 files changed, 12 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
> index 09660fc..14b1f00 100644
> --- a/hw/ppc/spapr_iommu.c
> +++ b/hw/ppc/spapr_iommu.c
> @@ -411,7 +411,7 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
>      entry.translated_addr = tce & page_mask;
>      entry.addr_mask = ~page_mask;
>      entry.perm = spapr_tce_iommu_access_flags(tce);
> -    memory_region_notify_iommu(&tcet->iommu, entry);
> +    memory_region_notify_iommu(&tcet->iommu, entry, IOMMU_RW);
>  
>      return H_SUCCESS;
>  }
> diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
> index f069b11..29ef345 100644
> --- a/hw/s390x/s390-pci-inst.c
> +++ b/hw/s390x/s390-pci-inst.c
> @@ -616,7 +616,7 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2)
>              goto out;
>          }
>  
> -        memory_region_notify_iommu(mr, entry);
> +        memory_region_notify_iommu(mr, entry, IOMMU_RW);
>          start += entry.addr_mask + 1;
>      }
>  
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index 6c16a86..c67b3da 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -57,6 +57,7 @@ typedef enum {
>      IOMMU_RO   = 1,
>      IOMMU_WO   = 2,
>      IOMMU_RW   = 3,
> +    IOMMU_ACCESS_INVALID = 4,
>  } IOMMUAccessFlags;
>  
>  struct IOMMUTLBEntry {
> @@ -202,6 +203,7 @@ struct MemoryRegion {
>      unsigned ioeventfd_nb;
>      MemoryRegionIoeventfd *ioeventfds;
>      NotifierList iommu_notify;
> +    IOMMUAccessFlags iommu_notify_flag;
>  };
>  
>  /**
> @@ -611,9 +613,11 @@ uint64_t memory_region_iommu_get_min_page_size(MemoryRegion *mr);
>   * @entry: the new entry in the IOMMU translation table.  The entry
>   *         replaces all old entries for the same virtual I/O address range.
>   *         Deleted entries have .@perm == 0.
> + * @flag: type of IOMMU notification (IOMMU_RW, IOMMU_NONE)

This makes no sense.  The overall type of the notifier is already
noted in register / notify_start.  The permission on this specific
mapping is already included in the IOMMUTLBEntry.

>   */
>  void memory_region_notify_iommu(MemoryRegion *mr,
> -                                IOMMUTLBEntry entry);
> +                                IOMMUTLBEntry entry,
> +                                IOMMUAccessFlags flag);
>  
>  /**
>   * memory_region_register_iommu_notifier: register a notifier for changes to
> diff --git a/memory.c b/memory.c
> index 747a9ec..5b50d15 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -1418,6 +1418,7 @@ void memory_region_init_iommu(MemoryRegion *mr,
>      memory_region_init(mr, owner, name, size);
>      mr->iommu_ops = ops,
>      mr->terminates = true;  /* then re-forwards */
> +    mr->iommu_notify_flag = IOMMU_ACCESS_INVALID;
>      notifier_list_init(&mr->iommu_notify);
>  }
>  
> @@ -1520,6 +1521,7 @@ void memory_region_register_iommu_notifier(MemoryRegion *mr, Notifier *n,
>      if (mr->iommu_ops->notify_started &&
>          QLIST_EMPTY(&mr->iommu_notify.notifiers)) {
>          mr->iommu_ops->notify_started(mr, flag);
> +        mr->iommu_notify_flag = flag;
>      }
>      notifier_list_add(&mr->iommu_notify, n);
>  }
> @@ -1560,13 +1562,15 @@ void memory_region_unregister_iommu_notifier(MemoryRegion *mr, Notifier *n)
>      if (mr->iommu_ops->notify_stopped &&
>          QLIST_EMPTY(&mr->iommu_notify.notifiers)) {
>          mr->iommu_ops->notify_stopped(mr);
> +        mr->iommu_notify_flag = IOMMU_ACCESS_INVALID;
>      }
>  }
>  
>  void memory_region_notify_iommu(MemoryRegion *mr,
> -                                IOMMUTLBEntry entry)
> +                                IOMMUTLBEntry entry, IOMMUAccessFlags flag)
>  {
>      assert(memory_region_is_iommu(mr));
> +    assert(flag == mr->iommu_notify_flag);
>      notifier_list_notify(&mr->iommu_notify, &entry);
>  }
>
David Gibson Sept. 6, 2016, 5:18 a.m. UTC | #4
On Mon, Sep 05, 2016 at 04:38:04PM +0800, Peter Xu wrote:
> On Mon, Sep 05, 2016 at 10:04:42AM +0200, Paolo Bonzini wrote:
> > 
> > 
> > On 05/09/2016 09:21, Peter Xu wrote:
> > >  void memory_region_notify_iommu(MemoryRegion *mr,
> > > -                                IOMMUTLBEntry entry)
> > > +                                IOMMUTLBEntry entry, IOMMUAccessFlags flag)
> > >  {
> > >      assert(memory_region_is_iommu(mr));
> > > +    assert(flag == mr->iommu_notify_flag);
> > >      notifier_list_notify(&mr->iommu_notify, &entry);
> > >  }
> > 
> > Shouldn't it be possible to have IOMMU_RW and IOMMU_NONE on the same
> > IOMMU, if the IOMMU supports IOMMU_RW at all?
> 
> Yeah, this is a good point...
> 
> If we see IOMMU_NONE as a subset of IOMMU_RW, we should allow notify
> IOMMU_NONE even if the cached flag is IOMMU_RW.
> 
> However in this patch I was not meant to do that. I made it an
> exclusive flag to identify two different use cases. I don't know
> whether this is good, but at least for Intel IOMMU's current use case,
> these two types should be totally isolated from each other:

It's not good, it's horrible.  This makes a hideous interface that's
just a collection of separate use cases without any coherent
underlying semantics.

> - IOMMU_NONE notification is used by future DMAR-enabled vhost, it
>   should only be notified with device-IOTLB invalidations, this will
>   only require "Device IOTLB" capability for Intel IOMMUs, and be
>   notified in Device IOTLB invalidation handlers.
> 
> - IOMMU_RW notifications should only be used for vfio-pci, notified
>   with IOTLB invalidations. This will only require Cache Mode (CM=1)
>   capability, and will be notified in common IOTLB invalidations (no
>   matter whether it's an cache invalidation or not, we will all use
>   IOMMU_RW flag for this kind of notifies).
> 
> Maybe here naming the flags as IOMMU_{RW_NONE} is a little bit
> confusing (just to leverage existing access flags), but what I was
> trying to do is to make the two things not overlapped at all, since I
> didn't find a mixture use case.

Maybe not now, but a common use case is absolutely possible.  If you
had a single (guest) bus with a single IO address space, with vIOMMU
and both VFIO and vhost devices on it, the same vIOMMU would need to
send all notifications to VFIO and (at least) unmap notifications to
vhost.
Peter Xu Sept. 6, 2016, 5:33 a.m. UTC | #5
On Tue, Sep 06, 2016 at 03:12:26PM +1000, David Gibson wrote:
> >  /**
> > @@ -611,9 +613,11 @@ uint64_t memory_region_iommu_get_min_page_size(MemoryRegion *mr);
> >   * @entry: the new entry in the IOMMU translation table.  The entry
> >   *         replaces all old entries for the same virtual I/O address range.
> >   *         Deleted entries have .@perm == 0.
> > + * @flag: type of IOMMU notification (IOMMU_RW, IOMMU_NONE)
> 
> This makes no sense.  The overall type of the notifier is already
> noted in register / notify_start.  The permission on this specific
> mapping is already included in the IOMMUTLBEntry.

This is not meant to be the same as the one in IOMMUTLBEntry. For
example, we can have:

  memory_region_notify_iommu(..., entry, IOMMU_RW);

This means that the caller of notification will notify all changed
entries (will only be used for checking, see the assert() in
memory_region_notify_iommu()). While within the entry, we can have:

  entry.perm == IOMMU_NONE

Which means that this specific invalidation is an unmap() operation.

The naming is bad.

We can put this aside and see whether we can have better solution in
general...

Thanks,

-- peterx
Peter Xu Sept. 6, 2016, 5:55 a.m. UTC | #6
On Tue, Sep 06, 2016 at 03:18:46PM +1000, David Gibson wrote:
> > Maybe here naming the flags as IOMMU_{RW_NONE} is a little bit
> > confusing (just to leverage existing access flags), but what I was
> > trying to do is to make the two things not overlapped at all, since I
> > didn't find a mixture use case.
> 
> Maybe not now, but a common use case is absolutely possible.  If you
> had a single (guest) bus with a single IO address space, with vIOMMU
> and both VFIO and vhost devices on it, the same vIOMMU would need to
> send all notifications to VFIO and (at least) unmap notifications to
> vhost.

Yeah this is a good one... Thanks to point out.

Then I agree that splitting the use cases won't be enough... We may
need to exactly register IOMMU notifiers with notification type
(invalidations only, or we also need entry additions), and we just
selectively notify the consumers depending on what kind of
notification it is.

Or any smarter way to do it.

-- peterx
diff mbox

Patch

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 09660fc..14b1f00 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -411,7 +411,7 @@  static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
     entry.translated_addr = tce & page_mask;
     entry.addr_mask = ~page_mask;
     entry.perm = spapr_tce_iommu_access_flags(tce);
-    memory_region_notify_iommu(&tcet->iommu, entry);
+    memory_region_notify_iommu(&tcet->iommu, entry, IOMMU_RW);
 
     return H_SUCCESS;
 }
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index f069b11..29ef345 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -616,7 +616,7 @@  int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2)
             goto out;
         }
 
-        memory_region_notify_iommu(mr, entry);
+        memory_region_notify_iommu(mr, entry, IOMMU_RW);
         start += entry.addr_mask + 1;
     }
 
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 6c16a86..c67b3da 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -57,6 +57,7 @@  typedef enum {
     IOMMU_RO   = 1,
     IOMMU_WO   = 2,
     IOMMU_RW   = 3,
+    IOMMU_ACCESS_INVALID = 4,
 } IOMMUAccessFlags;
 
 struct IOMMUTLBEntry {
@@ -202,6 +203,7 @@  struct MemoryRegion {
     unsigned ioeventfd_nb;
     MemoryRegionIoeventfd *ioeventfds;
     NotifierList iommu_notify;
+    IOMMUAccessFlags iommu_notify_flag;
 };
 
 /**
@@ -611,9 +613,11 @@  uint64_t memory_region_iommu_get_min_page_size(MemoryRegion *mr);
  * @entry: the new entry in the IOMMU translation table.  The entry
  *         replaces all old entries for the same virtual I/O address range.
  *         Deleted entries have .@perm == 0.
+ * @flag: type of IOMMU notification (IOMMU_RW, IOMMU_NONE)
  */
 void memory_region_notify_iommu(MemoryRegion *mr,
-                                IOMMUTLBEntry entry);
+                                IOMMUTLBEntry entry,
+                                IOMMUAccessFlags flag);
 
 /**
  * memory_region_register_iommu_notifier: register a notifier for changes to
diff --git a/memory.c b/memory.c
index 747a9ec..5b50d15 100644
--- a/memory.c
+++ b/memory.c
@@ -1418,6 +1418,7 @@  void memory_region_init_iommu(MemoryRegion *mr,
     memory_region_init(mr, owner, name, size);
     mr->iommu_ops = ops,
     mr->terminates = true;  /* then re-forwards */
+    mr->iommu_notify_flag = IOMMU_ACCESS_INVALID;
     notifier_list_init(&mr->iommu_notify);
 }
 
@@ -1520,6 +1521,7 @@  void memory_region_register_iommu_notifier(MemoryRegion *mr, Notifier *n,
     if (mr->iommu_ops->notify_started &&
         QLIST_EMPTY(&mr->iommu_notify.notifiers)) {
         mr->iommu_ops->notify_started(mr, flag);
+        mr->iommu_notify_flag = flag;
     }
     notifier_list_add(&mr->iommu_notify, n);
 }
@@ -1560,13 +1562,15 @@  void memory_region_unregister_iommu_notifier(MemoryRegion *mr, Notifier *n)
     if (mr->iommu_ops->notify_stopped &&
         QLIST_EMPTY(&mr->iommu_notify.notifiers)) {
         mr->iommu_ops->notify_stopped(mr);
+        mr->iommu_notify_flag = IOMMU_ACCESS_INVALID;
     }
 }
 
 void memory_region_notify_iommu(MemoryRegion *mr,
-                                IOMMUTLBEntry entry)
+                                IOMMUTLBEntry entry, IOMMUAccessFlags flag)
 {
     assert(memory_region_is_iommu(mr));
+    assert(flag == mr->iommu_notify_flag);
     notifier_list_notify(&mr->iommu_notify, &entry);
 }