Message ID | 20240202215332.118728-7-david@redhat.com |
---|---|
State | New |
Headers | show |
Series | libvhost-user: support more memslots and cleanup memslot handling code | expand |
On Fri, Feb 2, 2024 at 4:55 PM David Hildenbrand <david@redhat.com> wrote: > > Let's factor it out, reducing quite some code duplication and perparing > for further changes. > > If we fail to mmap a region and panic, we now simply don't add that > (broken) region. > > Note that we now increment dev->nregions as we are successfully > adding memory regions, and don't increment dev->nregions if anything went > wrong. > > Signed-off-by: David Hildenbrand <david@redhat.com> Reviewed-by: Raphael Norwitz <raphael@enfabrica.net> > --- > subprojects/libvhost-user/libvhost-user.c | 168 ++++++++-------------- > 1 file changed, 60 insertions(+), 108 deletions(-) > > diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c > index d9e2214ad2..a2baefe84b 100644 > --- a/subprojects/libvhost-user/libvhost-user.c > +++ b/subprojects/libvhost-user/libvhost-user.c > @@ -256,6 +256,61 @@ vu_remove_all_mem_regs(VuDev *dev) > dev->nregions = 0; > } > > +static void > +_vu_add_mem_reg(VuDev *dev, VhostUserMemoryRegion *msg_region, int fd) > +{ > + int prot = PROT_READ | PROT_WRITE; > + VuDevRegion *r; > + void *mmap_addr; > + > + DPRINT("Adding region %d\n", dev->nregions); > + DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n", > + msg_region->guest_phys_addr); > + DPRINT(" memory_size: 0x%016"PRIx64"\n", > + msg_region->memory_size); > + DPRINT(" userspace_addr 0x%016"PRIx64"\n", > + msg_region->userspace_addr); > + DPRINT(" mmap_offset 0x%016"PRIx64"\n", > + msg_region->mmap_offset); > + > + if (dev->postcopy_listening) { > + /* > + * In postcopy we're using PROT_NONE here to catch anyone > + * accessing it before we userfault > + */ > + prot = PROT_NONE; > + } > + > + /* > + * We don't use offset argument of mmap() since the mapped address has > + * to be page aligned, and we use huge pages. > + */ > + mmap_addr = mmap(0, msg_region->memory_size + msg_region->mmap_offset, > + prot, MAP_SHARED | MAP_NORESERVE, fd, 0); > + if (mmap_addr == MAP_FAILED) { > + vu_panic(dev, "region mmap error: %s", strerror(errno)); > + return; > + } > + DPRINT(" mmap_addr: 0x%016"PRIx64"\n", > + (uint64_t)(uintptr_t)mmap_addr); > + > + r = &dev->regions[dev->nregions]; > + r->gpa = msg_region->guest_phys_addr; > + r->size = msg_region->memory_size; > + r->qva = msg_region->userspace_addr; > + r->mmap_addr = (uint64_t)(uintptr_t)mmap_addr; > + r->mmap_offset = msg_region->mmap_offset; > + dev->nregions++; > + > + if (dev->postcopy_listening) { > + /* > + * Return the address to QEMU so that it can translate the ufd > + * fault addresses back. > + */ > + msg_region->userspace_addr = r->mmap_addr + r->mmap_offset; > + } > +} > + > static void > vmsg_close_fds(VhostUserMsg *vmsg) > { > @@ -727,10 +782,7 @@ generate_faults(VuDev *dev) { > static bool > vu_add_mem_reg(VuDev *dev, VhostUserMsg *vmsg) { > int i; > - bool track_ramblocks = dev->postcopy_listening; > VhostUserMemoryRegion m = vmsg->payload.memreg.region, *msg_region = &m; > - VuDevRegion *dev_region = &dev->regions[dev->nregions]; > - void *mmap_addr; > > if (vmsg->fd_num != 1) { > vmsg_close_fds(vmsg); > @@ -760,69 +812,20 @@ vu_add_mem_reg(VuDev *dev, VhostUserMsg *vmsg) { > * we know all the postcopy client bases have been received, and we > * should start generating faults. > */ > - if (track_ramblocks && > + if (dev->postcopy_listening && > vmsg->size == sizeof(vmsg->payload.u64) && > vmsg->payload.u64 == 0) { > (void)generate_faults(dev); > return false; > } > > - DPRINT("Adding region: %u\n", dev->nregions); > - DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n", > - msg_region->guest_phys_addr); > - DPRINT(" memory_size: 0x%016"PRIx64"\n", > - msg_region->memory_size); > - DPRINT(" userspace_addr 0x%016"PRIx64"\n", > - msg_region->userspace_addr); > - DPRINT(" mmap_offset 0x%016"PRIx64"\n", > - msg_region->mmap_offset); > - > - dev_region->gpa = msg_region->guest_phys_addr; > - dev_region->size = msg_region->memory_size; > - dev_region->qva = msg_region->userspace_addr; > - dev_region->mmap_offset = msg_region->mmap_offset; > - > - /* > - * We don't use offset argument of mmap() since the > - * mapped address has to be page aligned, and we use huge > - * pages. > - */ > - if (track_ramblocks) { > - /* > - * In postcopy we're using PROT_NONE here to catch anyone > - * accessing it before we userfault. > - */ > - mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset, > - PROT_NONE, MAP_SHARED | MAP_NORESERVE, > - vmsg->fds[0], 0); > - } else { > - mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset, > - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_NORESERVE, > - vmsg->fds[0], 0); > - } > - > - if (mmap_addr == MAP_FAILED) { > - vu_panic(dev, "region mmap error: %s", strerror(errno)); > - } else { > - dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr; > - DPRINT(" mmap_addr: 0x%016"PRIx64"\n", > - dev_region->mmap_addr); > - } > - > + _vu_add_mem_reg(dev, msg_region, vmsg->fds[0]); > close(vmsg->fds[0]); > > - if (track_ramblocks) { > - /* > - * Return the address to QEMU so that it can translate the ufd > - * fault addresses back. > - */ > - msg_region->userspace_addr = dev_region->mmap_addr + > - dev_region->mmap_offset; > - > + if (dev->postcopy_listening) { > /* Send the message back to qemu with the addresses filled in. */ > vmsg->fd_num = 0; > DPRINT("Successfully added new region in postcopy\n"); > - dev->nregions++; > return true; > } else { > for (i = 0; i < dev->max_queues; i++) { > @@ -835,7 +838,6 @@ vu_add_mem_reg(VuDev *dev, VhostUserMsg *vmsg) { > } > > DPRINT("Successfully added new region\n"); > - dev->nregions++; > return false; > } > } > @@ -940,63 +942,13 @@ static bool > vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg) > { > VhostUserMemory m = vmsg->payload.memory, *memory = &m; > - int prot = PROT_READ | PROT_WRITE; > unsigned int i; > > - if (dev->postcopy_listening) { > - /* > - * In postcopy we're using PROT_NONE here to catch anyone > - * accessing it before we userfault > - */ > - prot = PROT_NONE; > - } > - > vu_remove_all_mem_regs(dev); > - dev->nregions = memory->nregions; > > DPRINT("Nregions: %u\n", memory->nregions); > - for (i = 0; i < dev->nregions; i++) { > - void *mmap_addr; > - VhostUserMemoryRegion *msg_region = &memory->regions[i]; > - VuDevRegion *dev_region = &dev->regions[i]; > - > - DPRINT("Region %d\n", i); > - DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n", > - msg_region->guest_phys_addr); > - DPRINT(" memory_size: 0x%016"PRIx64"\n", > - msg_region->memory_size); > - DPRINT(" userspace_addr 0x%016"PRIx64"\n", > - msg_region->userspace_addr); > - DPRINT(" mmap_offset 0x%016"PRIx64"\n", > - msg_region->mmap_offset); > - > - dev_region->gpa = msg_region->guest_phys_addr; > - dev_region->size = msg_region->memory_size; > - dev_region->qva = msg_region->userspace_addr; > - dev_region->mmap_offset = msg_region->mmap_offset; > - > - /* We don't use offset argument of mmap() since the > - * mapped address has to be page aligned, and we use huge > - * pages. */ > - mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset, > - prot, MAP_SHARED | MAP_NORESERVE, vmsg->fds[i], 0); > - > - if (mmap_addr == MAP_FAILED) { > - vu_panic(dev, "region mmap error: %s", strerror(errno)); > - } else { > - dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr; > - DPRINT(" mmap_addr: 0x%016"PRIx64"\n", > - dev_region->mmap_addr); > - } > - > - if (dev->postcopy_listening) { > - /* > - * Return the address to QEMU so that it can translate the ufd > - * fault addresses back. > - */ > - msg_region->userspace_addr = dev_region->mmap_addr + > - dev_region->mmap_offset; > - } > + for (i = 0; i < memory->nregions; i++) { > + _vu_add_mem_reg(dev, &memory->regions[i], vmsg->fds[i]); > close(vmsg->fds[i]); > } > > -- > 2.43.0 > >
diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c index d9e2214ad2..a2baefe84b 100644 --- a/subprojects/libvhost-user/libvhost-user.c +++ b/subprojects/libvhost-user/libvhost-user.c @@ -256,6 +256,61 @@ vu_remove_all_mem_regs(VuDev *dev) dev->nregions = 0; } +static void +_vu_add_mem_reg(VuDev *dev, VhostUserMemoryRegion *msg_region, int fd) +{ + int prot = PROT_READ | PROT_WRITE; + VuDevRegion *r; + void *mmap_addr; + + DPRINT("Adding region %d\n", dev->nregions); + DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n", + msg_region->guest_phys_addr); + DPRINT(" memory_size: 0x%016"PRIx64"\n", + msg_region->memory_size); + DPRINT(" userspace_addr 0x%016"PRIx64"\n", + msg_region->userspace_addr); + DPRINT(" mmap_offset 0x%016"PRIx64"\n", + msg_region->mmap_offset); + + if (dev->postcopy_listening) { + /* + * In postcopy we're using PROT_NONE here to catch anyone + * accessing it before we userfault + */ + prot = PROT_NONE; + } + + /* + * We don't use offset argument of mmap() since the mapped address has + * to be page aligned, and we use huge pages. + */ + mmap_addr = mmap(0, msg_region->memory_size + msg_region->mmap_offset, + prot, MAP_SHARED | MAP_NORESERVE, fd, 0); + if (mmap_addr == MAP_FAILED) { + vu_panic(dev, "region mmap error: %s", strerror(errno)); + return; + } + DPRINT(" mmap_addr: 0x%016"PRIx64"\n", + (uint64_t)(uintptr_t)mmap_addr); + + r = &dev->regions[dev->nregions]; + r->gpa = msg_region->guest_phys_addr; + r->size = msg_region->memory_size; + r->qva = msg_region->userspace_addr; + r->mmap_addr = (uint64_t)(uintptr_t)mmap_addr; + r->mmap_offset = msg_region->mmap_offset; + dev->nregions++; + + if (dev->postcopy_listening) { + /* + * Return the address to QEMU so that it can translate the ufd + * fault addresses back. + */ + msg_region->userspace_addr = r->mmap_addr + r->mmap_offset; + } +} + static void vmsg_close_fds(VhostUserMsg *vmsg) { @@ -727,10 +782,7 @@ generate_faults(VuDev *dev) { static bool vu_add_mem_reg(VuDev *dev, VhostUserMsg *vmsg) { int i; - bool track_ramblocks = dev->postcopy_listening; VhostUserMemoryRegion m = vmsg->payload.memreg.region, *msg_region = &m; - VuDevRegion *dev_region = &dev->regions[dev->nregions]; - void *mmap_addr; if (vmsg->fd_num != 1) { vmsg_close_fds(vmsg); @@ -760,69 +812,20 @@ vu_add_mem_reg(VuDev *dev, VhostUserMsg *vmsg) { * we know all the postcopy client bases have been received, and we * should start generating faults. */ - if (track_ramblocks && + if (dev->postcopy_listening && vmsg->size == sizeof(vmsg->payload.u64) && vmsg->payload.u64 == 0) { (void)generate_faults(dev); return false; } - DPRINT("Adding region: %u\n", dev->nregions); - DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n", - msg_region->guest_phys_addr); - DPRINT(" memory_size: 0x%016"PRIx64"\n", - msg_region->memory_size); - DPRINT(" userspace_addr 0x%016"PRIx64"\n", - msg_region->userspace_addr); - DPRINT(" mmap_offset 0x%016"PRIx64"\n", - msg_region->mmap_offset); - - dev_region->gpa = msg_region->guest_phys_addr; - dev_region->size = msg_region->memory_size; - dev_region->qva = msg_region->userspace_addr; - dev_region->mmap_offset = msg_region->mmap_offset; - - /* - * We don't use offset argument of mmap() since the - * mapped address has to be page aligned, and we use huge - * pages. - */ - if (track_ramblocks) { - /* - * In postcopy we're using PROT_NONE here to catch anyone - * accessing it before we userfault. - */ - mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset, - PROT_NONE, MAP_SHARED | MAP_NORESERVE, - vmsg->fds[0], 0); - } else { - mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset, - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_NORESERVE, - vmsg->fds[0], 0); - } - - if (mmap_addr == MAP_FAILED) { - vu_panic(dev, "region mmap error: %s", strerror(errno)); - } else { - dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr; - DPRINT(" mmap_addr: 0x%016"PRIx64"\n", - dev_region->mmap_addr); - } - + _vu_add_mem_reg(dev, msg_region, vmsg->fds[0]); close(vmsg->fds[0]); - if (track_ramblocks) { - /* - * Return the address to QEMU so that it can translate the ufd - * fault addresses back. - */ - msg_region->userspace_addr = dev_region->mmap_addr + - dev_region->mmap_offset; - + if (dev->postcopy_listening) { /* Send the message back to qemu with the addresses filled in. */ vmsg->fd_num = 0; DPRINT("Successfully added new region in postcopy\n"); - dev->nregions++; return true; } else { for (i = 0; i < dev->max_queues; i++) { @@ -835,7 +838,6 @@ vu_add_mem_reg(VuDev *dev, VhostUserMsg *vmsg) { } DPRINT("Successfully added new region\n"); - dev->nregions++; return false; } } @@ -940,63 +942,13 @@ static bool vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg) { VhostUserMemory m = vmsg->payload.memory, *memory = &m; - int prot = PROT_READ | PROT_WRITE; unsigned int i; - if (dev->postcopy_listening) { - /* - * In postcopy we're using PROT_NONE here to catch anyone - * accessing it before we userfault - */ - prot = PROT_NONE; - } - vu_remove_all_mem_regs(dev); - dev->nregions = memory->nregions; DPRINT("Nregions: %u\n", memory->nregions); - for (i = 0; i < dev->nregions; i++) { - void *mmap_addr; - VhostUserMemoryRegion *msg_region = &memory->regions[i]; - VuDevRegion *dev_region = &dev->regions[i]; - - DPRINT("Region %d\n", i); - DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n", - msg_region->guest_phys_addr); - DPRINT(" memory_size: 0x%016"PRIx64"\n", - msg_region->memory_size); - DPRINT(" userspace_addr 0x%016"PRIx64"\n", - msg_region->userspace_addr); - DPRINT(" mmap_offset 0x%016"PRIx64"\n", - msg_region->mmap_offset); - - dev_region->gpa = msg_region->guest_phys_addr; - dev_region->size = msg_region->memory_size; - dev_region->qva = msg_region->userspace_addr; - dev_region->mmap_offset = msg_region->mmap_offset; - - /* We don't use offset argument of mmap() since the - * mapped address has to be page aligned, and we use huge - * pages. */ - mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset, - prot, MAP_SHARED | MAP_NORESERVE, vmsg->fds[i], 0); - - if (mmap_addr == MAP_FAILED) { - vu_panic(dev, "region mmap error: %s", strerror(errno)); - } else { - dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr; - DPRINT(" mmap_addr: 0x%016"PRIx64"\n", - dev_region->mmap_addr); - } - - if (dev->postcopy_listening) { - /* - * Return the address to QEMU so that it can translate the ufd - * fault addresses back. - */ - msg_region->userspace_addr = dev_region->mmap_addr + - dev_region->mmap_offset; - } + for (i = 0; i < memory->nregions; i++) { + _vu_add_mem_reg(dev, &memory->regions[i], vmsg->fds[i]); close(vmsg->fds[i]); }
Let's factor it out, reducing quite some code duplication and perparing for further changes. If we fail to mmap a region and panic, we now simply don't add that (broken) region. Note that we now increment dev->nregions as we are successfully adding memory regions, and don't increment dev->nregions if anything went wrong. Signed-off-by: David Hildenbrand <david@redhat.com> --- subprojects/libvhost-user/libvhost-user.c | 168 ++++++++-------------- 1 file changed, 60 insertions(+), 108 deletions(-)