@@ -185,6 +185,107 @@ static int vhost_kernel_vsock_set_running(struct vhost_dev *dev, int start)
}
#endif /* CONFIG_VHOST_VSOCK */
+static void vhost_kernel_iotlb_read(void *opaque)
+{
+ struct vhost_dev *dev = opaque;
+ struct vhost_msg msg;
+ ssize_t len;
+
+ while ((len = read((uintptr_t)dev->opaque, &msg, sizeof msg)) > 0) {
+ struct vhost_iotlb_msg *imsg = &msg.iotlb;
+ if (len < sizeof msg) {
+ error_report("Wrong vhost message len: %d", (int)len);
+ break;
+ }
+ if (msg.type != VHOST_IOTLB_MSG) {
+ error_report("Unknown vhost iotlb message type");
+ break;
+ }
+ switch (imsg->type) {
+ case VHOST_IOTLB_MISS:
+ vhost_device_iotlb_miss(dev, imsg->iova,
+ imsg->perm != VHOST_ACCESS_RO);
+ break;
+ case VHOST_IOTLB_UPDATE:
+ case VHOST_IOTLB_INVALIDATE:
+ error_report("Unexpected IOTLB message type");
+ break;
+ case VHOST_IOTLB_ACCESS_FAIL:
+ /* FIXME: report device iotlb error */
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static int vhost_kernel_update_device_iotlb(struct vhost_dev *dev,
+ uint64_t iova, uint64_t uaddr,
+ uint64_t len,
+ IOMMUAccessFlags perm)
+{
+ struct vhost_msg msg = {
+ .type = VHOST_IOTLB_MSG,
+ .iotlb = {
+ .iova = iova,
+ .uaddr = uaddr,
+ .size = len,
+ .type = VHOST_IOTLB_UPDATE,
+ }
+ };
+
+ switch (perm) {
+ case IOMMU_RO:
+ msg.iotlb.perm = VHOST_ACCESS_RO;
+ break;
+ case IOMMU_WO:
+ msg.iotlb.perm = VHOST_ACCESS_WO;
+ break;
+ case IOMMU_RW:
+ msg.iotlb.perm = VHOST_ACCESS_RW;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (write((uintptr_t)dev->opaque, &msg, sizeof msg) != sizeof msg) {
+ error_report("Fail to update device iotlb");
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int vhost_kernel_invalidate_device_iotlb(struct vhost_dev *dev,
+ uint64_t iova, uint64_t len)
+{
+ struct vhost_msg msg = {
+ .type = VHOST_IOTLB_MSG,
+ .iotlb = {
+ .iova = iova,
+ .size = len,
+ .type = VHOST_IOTLB_INVALIDATE,
+ }
+ };
+
+ if (write((uintptr_t)dev->opaque, &msg, sizeof msg) != sizeof msg) {
+ error_report("Fail to invalidate device iotlb");
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static void vhost_kernel_set_iotlb_callback(struct vhost_dev *dev,
+ int enabled)
+{
+ if (enabled)
+ qemu_set_fd_handler((uintptr_t)dev->opaque,
+ vhost_kernel_iotlb_read, NULL, dev);
+ else
+ qemu_set_fd_handler((uintptr_t)dev->opaque, NULL, NULL, NULL);
+}
+
static const VhostOps kernel_ops = {
.backend_type = VHOST_BACKEND_TYPE_KERNEL,
.vhost_backend_init = vhost_kernel_init,
@@ -214,6 +315,9 @@ static const VhostOps kernel_ops = {
.vhost_vsock_set_guest_cid = vhost_kernel_vsock_set_guest_cid,
.vhost_vsock_set_running = vhost_kernel_vsock_set_running,
#endif /* CONFIG_VHOST_VSOCK */
+ .vhost_set_iotlb_callback = vhost_kernel_set_iotlb_callback,
+ .vhost_update_device_iotlb = vhost_kernel_update_device_iotlb,
+ .vhost_invalidate_device_iotlb = vhost_kernel_invalidate_device_iotlb,
};
int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type)
@@ -421,6 +421,33 @@ static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size)
dev->log_size = size;
}
+static void *vhost_memory_map(VirtIODevice *vdev, hwaddr addr,
+ hwaddr *plen, int is_write)
+{
+ AddressSpace *dma_as = virtio_get_dma_as(vdev);
+
+ if (!memory_region_is_iommu(dma_as->root)) {
+ return dma_memory_map(dma_as, addr, plen, is_write ?
+ DMA_DIRECTION_FROM_DEVICE :
+ DMA_DIRECTION_TO_DEVICE);
+ } else {
+ return (void *)(uintptr_t)addr;
+ }
+}
+
+static void vhost_memory_unmap(VirtIODevice *vdev, void *buffer,
+ hwaddr len, int is_write,
+ hwaddr access_len)
+{
+ AddressSpace *dma_as = virtio_get_dma_as(vdev);
+
+ if (!memory_region_is_iommu(dma_as->root)) {
+ dma_memory_unmap(dma_as, buffer, len, is_write ?
+ DMA_DIRECTION_FROM_DEVICE : DMA_DIRECTION_TO_DEVICE,
+ access_len);
+ }
+}
+
static int vhost_verify_ring_mappings(struct vhost_dev *dev,
uint64_t start_addr,
uint64_t size)
@@ -437,7 +464,7 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev,
continue;
}
l = vq->ring_size;
- p = cpu_physical_memory_map(vq->ring_phys, &l, 1);
+ p = vhost_memory_map(dev->vdev, vq->ring_phys, &l, 1);
if (!p || l != vq->ring_size) {
error_report("Unable to map ring buffer for ring %d", i);
r = -ENOMEM;
@@ -446,7 +473,7 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev,
error_report("Ring buffer relocated for ring %d", i);
r = -EBUSY;
}
- cpu_physical_memory_unmap(p, l, 0, 0);
+ vhost_memory_unmap(dev->vdev, p, l, 0, 0);
}
return r;
}
@@ -674,13 +701,27 @@ static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
return 0;
}
-static int vhost_dev_set_features(struct vhost_dev *dev, bool enable_log)
+static int vhost_dev_has_iommu(struct vhost_dev *dev)
+{
+ VirtIODevice *vdev = dev->vdev;
+ AddressSpace *dma_as = virtio_get_dma_as(vdev);
+
+ return memory_region_is_iommu(dma_as->root) &&
+ virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
+}
+
+static int vhost_dev_set_features(struct vhost_dev *dev,
+ bool enable_log)
{
uint64_t features = dev->acked_features;
+ bool has_iommu = vhost_dev_has_iommu(dev);
int r;
if (enable_log) {
features |= 0x1ULL << VHOST_F_LOG_ALL;
}
+ if (has_iommu) {
+ features |= 0x1ULL << VIRTIO_F_IOMMU_PLATFORM;
+ }
r = dev->vhost_ops->vhost_set_features(dev, features);
if (r < 0) {
VHOST_OPS_DEBUG("vhost_set_features failed");
@@ -817,6 +858,56 @@ static int vhost_virtqueue_set_vring_endian_legacy(struct vhost_dev *dev,
return -errno;
}
+static int vhost_memory_region_lookup(struct vhost_dev *hdev,
+ uint64_t gpa, uint64_t *uaddr,
+ uint64_t *len)
+{
+ int i;
+
+ for (i = 0; i < hdev->mem->nregions; i++) {
+ struct vhost_memory_region *reg = hdev->mem->regions + i;
+
+ if (gpa >= reg->guest_phys_addr &&
+ reg->guest_phys_addr + reg->memory_size > gpa) {
+ *uaddr = reg->userspace_addr + gpa - reg->guest_phys_addr;
+ *len = reg->guest_phys_addr + reg->memory_size - gpa;
+ return 0;
+ }
+ }
+
+ return -EFAULT;
+}
+
+void vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write)
+{
+ IOMMUTLBEntry iotlb;
+ uint64_t uaddr, len;
+
+ rcu_read_lock();
+
+ iotlb = address_space_get_iotlb_entry(virtio_get_dma_as(dev->vdev),
+ iova, write);
+ if (iotlb.target_as != NULL) {
+ if (vhost_memory_region_lookup(dev, iotlb.translated_addr,
+ &uaddr, &len)) {
+ error_report("Fail to lookup the translated address "
+ "%"PRIx64, iotlb.translated_addr);
+ goto out;
+ }
+
+ len = MIN(iotlb.addr_mask + 1, len);
+ iova = iova & ~iotlb.addr_mask;
+
+ if (dev->vhost_ops->vhost_update_device_iotlb(dev, iova, uaddr,
+ len, iotlb.perm)) {
+ error_report("Fail to update device iotlb");
+ goto out;
+ }
+ }
+out:
+ rcu_read_unlock();
+}
+
static int vhost_virtqueue_start(struct vhost_dev *dev,
struct VirtIODevice *vdev,
struct vhost_virtqueue *vq,
@@ -862,21 +953,21 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,
s = l = virtio_queue_get_desc_size(vdev, idx);
a = virtio_queue_get_desc_addr(vdev, idx);
- vq->desc = cpu_physical_memory_map(a, &l, 0);
+ vq->desc = vhost_memory_map(vdev, a, &l, 0);
if (!vq->desc || l != s) {
r = -ENOMEM;
goto fail_alloc_desc;
}
s = l = virtio_queue_get_avail_size(vdev, idx);
a = virtio_queue_get_avail_addr(vdev, idx);
- vq->avail = cpu_physical_memory_map(a, &l, 0);
+ vq->avail = vhost_memory_map(vdev, a, &l, 0);
if (!vq->avail || l != s) {
r = -ENOMEM;
goto fail_alloc_avail;
}
vq->used_size = s = l = virtio_queue_get_used_size(vdev, idx);
vq->used_phys = a = virtio_queue_get_used_addr(vdev, idx);
- vq->used = cpu_physical_memory_map(a, &l, 1);
+ vq->used = vhost_memory_map(vdev, a, &l, 1);
if (!vq->used || l != s) {
r = -ENOMEM;
goto fail_alloc_used;
@@ -884,7 +975,7 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,
vq->ring_size = s = l = virtio_queue_get_ring_size(vdev, idx);
vq->ring_phys = a = virtio_queue_get_ring_addr(vdev, idx);
- vq->ring = cpu_physical_memory_map(a, &l, 1);
+ vq->ring = vhost_memory_map(vdev, a, &l, 1);
if (!vq->ring || l != s) {
r = -ENOMEM;
goto fail_alloc_ring;
@@ -930,17 +1021,17 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,
fail_vector:
fail_kick:
fail_alloc:
- cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
- 0, 0);
+ vhost_memory_unmap(vdev, vq->ring, virtio_queue_get_ring_size(vdev, idx),
+ 0, 0);
fail_alloc_ring:
- cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx),
- 0, 0);
+ vhost_memory_unmap(vdev, vq->used, virtio_queue_get_used_size(vdev, idx),
+ 0, 0);
fail_alloc_used:
- cpu_physical_memory_unmap(vq->avail, virtio_queue_get_avail_size(vdev, idx),
- 0, 0);
+ vhost_memory_unmap(vdev, vq->avail, virtio_queue_get_avail_size(vdev, idx),
+ 0, 0);
fail_alloc_avail:
- cpu_physical_memory_unmap(vq->desc, virtio_queue_get_desc_size(vdev, idx),
- 0, 0);
+ vhost_memory_unmap(vdev, vq->desc, virtio_queue_get_desc_size(vdev, idx),
+ 0, 0);
fail_alloc_desc:
return r;
}
@@ -973,14 +1064,14 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev,
vhost_vq_index);
}
- cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
- 0, virtio_queue_get_ring_size(vdev, idx));
- cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx),
- 1, virtio_queue_get_used_size(vdev, idx));
- cpu_physical_memory_unmap(vq->avail, virtio_queue_get_avail_size(vdev, idx),
- 0, virtio_queue_get_avail_size(vdev, idx));
- cpu_physical_memory_unmap(vq->desc, virtio_queue_get_desc_size(vdev, idx),
- 0, virtio_queue_get_desc_size(vdev, idx));
+ vhost_memory_unmap(vdev, vq->ring, virtio_queue_get_ring_size(vdev, idx),
+ 0, virtio_queue_get_ring_size(vdev, idx));
+ vhost_memory_unmap(vdev, vq->used, virtio_queue_get_used_size(vdev, idx),
+ 1, virtio_queue_get_used_size(vdev, idx));
+ vhost_memory_unmap(vdev, vq->avail, virtio_queue_get_avail_size(vdev, idx),
+ 0, virtio_queue_get_avail_size(vdev, idx));
+ vhost_memory_unmap(vdev, vq->desc, virtio_queue_get_desc_size(vdev, idx),
+ 0, virtio_queue_get_desc_size(vdev, idx));
}
static void vhost_eventfd_add(MemoryListener *listener,
@@ -1037,6 +1128,9 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
r = -errno;
goto fail_call;
}
+
+ vq->dev = dev;
+
return 0;
fail_call:
event_notifier_cleanup(&vq->masked_notifier);
@@ -1048,12 +1142,24 @@ static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq)
event_notifier_cleanup(&vq->masked_notifier);
}
+static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
+{
+ struct vhost_dev *hdev = container_of(n, struct vhost_dev, n);
+
+ if (hdev->vhost_ops->vhost_invalidate_device_iotlb(hdev,
+ iotlb->iova,
+ iotlb->addr_mask + 1)) {
+ error_report("Fail to invalidate device iotlb");
+ }
+}
+
int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
VhostBackendType backend_type, uint32_t busyloop_timeout)
{
uint64_t features;
int i, r, n_initialized_vqs = 0;
+ hdev->vdev = NULL;
hdev->migration_blocker = NULL;
r = vhost_set_backend_type(hdev, backend_type);
@@ -1118,6 +1224,9 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
.priority = 10
};
+ hdev->n.notify = vhost_iommu_unmap_notify;
+ hdev->n.notifier_flags = IOMMU_NOTIFIER_UNMAP;
+
if (hdev->migration_blocker == NULL) {
if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) {
error_setg(&hdev->migration_blocker,
@@ -1310,11 +1419,18 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
assert(hdev->vhost_ops);
hdev->started = true;
+ hdev->vdev = vdev;
r = vhost_dev_set_features(hdev, hdev->log_enabled);
if (r < 0) {
goto fail_features;
}
+
+ if (vhost_dev_has_iommu(hdev)) {
+ memory_region_register_iommu_notifier(virtio_get_dma_as(vdev)->root,
+ &hdev->n);
+ }
+
r = hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem);
if (r < 0) {
VHOST_OPS_DEBUG("vhost_set_mem_table failed");
@@ -1348,6 +1464,16 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
}
}
+ hdev->vhost_ops->vhost_set_iotlb_callback(hdev, true);
+
+ if (vhost_dev_has_iommu(hdev)) {
+ /* Update used ring information for IOTLB to work correctly,
+ * vhost-kernel code requires for this.*/
+ for (i = 0; i < hdev->nvqs; ++i) {
+ struct vhost_virtqueue *vq = hdev->vqs + i;
+ vhost_device_iotlb_miss(hdev, vq->used_phys, true);
+ }
+ }
return 0;
fail_log:
vhost_log_put(hdev, false);
@@ -1359,6 +1485,7 @@ fail_vq:
hdev->vq_index + i);
}
i = hdev->nvqs;
+
fail_mem:
fail_features:
@@ -1373,6 +1500,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
/* should only be called after backend is connected */
assert(hdev->vhost_ops);
+ hdev->vhost_ops->vhost_set_iotlb_callback(hdev, false);
for (i = 0; i < hdev->nvqs; ++i) {
vhost_virtqueue_stop(hdev,
@@ -1381,8 +1509,13 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
hdev->vq_index + i);
}
+ if (vhost_dev_has_iommu(hdev)) {
+ memory_region_unregister_iommu_notifier(virtio_get_dma_as(vdev)->root,
+ &hdev->n);
+ }
vhost_log_put(hdev, true);
hdev->started = false;
+ hdev->vdev = NULL;
}
int vhost_net_set_backend(struct vhost_dev *hdev,
@@ -11,6 +11,8 @@
#ifndef VHOST_BACKEND_H
#define VHOST_BACKEND_H
+#include "exec/memory.h"
+
typedef enum VhostBackendType {
VHOST_BACKEND_TYPE_NONE = 0,
VHOST_BACKEND_TYPE_KERNEL = 1,
@@ -76,6 +78,14 @@ typedef bool (*vhost_backend_can_merge_op)(struct vhost_dev *dev,
typedef int (*vhost_vsock_set_guest_cid_op)(struct vhost_dev *dev,
uint64_t guest_cid);
typedef int (*vhost_vsock_set_running_op)(struct vhost_dev *dev, int start);
+typedef void (*vhost_set_iotlb_callback_op)(struct vhost_dev *dev,
+ int enabled);
+typedef int (*vhost_update_device_iotlb_op)(struct vhost_dev *dev,
+ uint64_t iova, uint64_t uaddr,
+ uint64_t len,
+ IOMMUAccessFlags perm);
+typedef int (*vhost_invalidate_device_iotlb_op)(struct vhost_dev *dev,
+ uint64_t iova, uint64_t len);
typedef struct VhostOps {
VhostBackendType backend_type;
@@ -107,6 +117,9 @@ typedef struct VhostOps {
vhost_backend_can_merge_op vhost_backend_can_merge;
vhost_vsock_set_guest_cid_op vhost_vsock_set_guest_cid;
vhost_vsock_set_running_op vhost_vsock_set_running;
+ vhost_set_iotlb_callback_op vhost_set_iotlb_callback;
+ vhost_update_device_iotlb_op vhost_update_device_iotlb;
+ vhost_invalidate_device_iotlb_op vhost_invalidate_device_iotlb;
} VhostOps;
extern const VhostOps user_ops;
@@ -20,6 +20,7 @@ struct vhost_virtqueue {
unsigned long long ring_phys;
unsigned ring_size;
EventNotifier masked_notifier;
+ struct vhost_dev *dev;
};
typedef unsigned long vhost_log_chunk_t;
@@ -37,6 +38,7 @@ struct vhost_log {
struct vhost_memory;
struct vhost_dev {
+ VirtIODevice *vdev;
MemoryListener memory_listener;
struct vhost_memory *mem;
int n_mem_sections;
@@ -61,6 +63,7 @@ struct vhost_dev {
void *opaque;
struct vhost_log *log;
QLIST_ENTRY(vhost_dev) entry;
+ IOMMUNotifier n;
};
int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
@@ -90,4 +93,5 @@ bool vhost_has_free_slot(void);
int vhost_net_set_backend(struct vhost_dev *hdev,
struct vhost_vring_file *file);
+void vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write);
#endif
@@ -18,6 +18,7 @@
#include "hw/virtio/virtio.h"
#include "hw/virtio/virtio-bus.h"
+#include "sysemu/dma.h"
#include "exec/address-spaces.h"
#if defined(TARGET_PPC64) || defined(TARGET_ARM)
@@ -193,4 +194,5 @@ static inline void virtio_tswap64s(VirtIODevice *vdev, uint64_t *s)
{
*s = virtio_tswap64(vdev, *s);
}
-#endif /* QEMU_VIRTIO_ACCESS_H */
+
+#endif /* _QEMU_VIRTIO_ACCESS_H */
@@ -696,6 +696,7 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
"tap: open vhost char device failed");
return;
}
+ fcntl(vhostfd, F_SETFL, O_NONBLOCK);
}
options.opaque = (void *)(uintptr_t)vhostfd;
This patches implements Device IOTLB support for vhost kernel. This is done through: 1) switch to use dma helpers when map/unmap vrings from vhost codes 2) kernel support for Device IOTLB API: - allow vhost-net to query the IOMMU IOTLB entry through eventfd - enable the ability for qemu to update a specified mapping of vhost - through ioctl. - enable the ability to invalidate a specified range of iova for the device IOTLB of vhost through ioctl. In x86/intel_iommu case this is triggered through iommu memory region notifier from device IOTLB invalidation descriptor processing routine. With all the above, kernel vhost_net can co-operate with IOMMU. Cc: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Jason Wang <jasowang@redhat.com> --- hw/virtio/vhost-backend.c | 104 ++++++++++++++++++++++ hw/virtio/vhost.c | 179 +++++++++++++++++++++++++++++++++----- include/hw/virtio/vhost-backend.h | 13 +++ include/hw/virtio/vhost.h | 4 + include/hw/virtio/virtio-access.h | 4 +- net/tap.c | 1 + 6 files changed, 281 insertions(+), 24 deletions(-)