Message ID | 20221230121235.1282915-15-dwmw2@infradead.org |
---|---|
State | New |
Headers | show |
Series | Xen support under KVM | expand |
* David Woodhouse (dwmw2@infradead.org) wrote: > From: David Woodhouse <dwmw@amazon.co.uk> > > For the shared info page and for grant tables, Xen shares its own pages > from the "Xen heap" to the guest. The guest requests that a given page > from a certain address space (XENMAPSPACE_shared_info, etc.) be mapped > to a given GPA using the XENMEM_add_to_physmap hypercall. > > To support that in qemu when *emulating* Xen, create a memory region > (migratable) and allow it to be mapped as an overlay when requested. > > Xen theoretically allows the same page to be mapped multiple times > into the guest, but that's hard to track and reinstate over migration, > so we automatically *unmap* any previous mapping when creating a new > one. This approach has been used in production with.... a non-trivial > number of guests expecting true Xen, without any problems yet being > noticed. > > This adds just the shared info page for now. The grant tables will be > a larger region, and will need to be overlaid one page at a time. I > think that means I need to create separate aliases for each page of > the overall grant_frames region, so that they can be mapped individually. > > Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> > --- > hw/i386/kvm/meson.build | 1 + > hw/i386/kvm/xen_overlay.c | 200 ++++++++++++++++++++++++++++++++++++++ > hw/i386/kvm/xen_overlay.h | 20 ++++ > include/sysemu/kvm_xen.h | 4 + > 4 files changed, 225 insertions(+) > create mode 100644 hw/i386/kvm/xen_overlay.c > create mode 100644 hw/i386/kvm/xen_overlay.h > > diff --git a/hw/i386/kvm/meson.build b/hw/i386/kvm/meson.build > index 95467f1ded..6165cbf019 100644 > --- a/hw/i386/kvm/meson.build > +++ b/hw/i386/kvm/meson.build > @@ -4,5 +4,6 @@ i386_kvm_ss.add(when: 'CONFIG_APIC', if_true: files('apic.c')) > i386_kvm_ss.add(when: 'CONFIG_I8254', if_true: files('i8254.c')) > i386_kvm_ss.add(when: 'CONFIG_I8259', if_true: files('i8259.c')) > i386_kvm_ss.add(when: 'CONFIG_IOAPIC', if_true: files('ioapic.c')) > +i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen_overlay.c')) > > i386_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss) > diff --git a/hw/i386/kvm/xen_overlay.c b/hw/i386/kvm/xen_overlay.c > new file mode 100644 > index 0000000000..331dea6b8b > --- /dev/null > +++ b/hw/i386/kvm/xen_overlay.c > @@ -0,0 +1,200 @@ > +/* > + * QEMU Xen emulation: Shared/overlay pages support > + * > + * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. > + * > + * Authors: David Woodhouse <dwmw2@infradead.org> > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or later. > + * See the COPYING file in the top-level directory. > + */ > + > +#include "qemu/osdep.h" > +#include "qemu/host-utils.h" > +#include "qemu/module.h" > +#include "qemu/main-loop.h" > +#include "qapi/error.h" > +#include "qom/object.h" > +#include "exec/target_page.h" > +#include "exec/address-spaces.h" > +#include "migration/vmstate.h" > + > +#include "hw/sysbus.h" > +#include "hw/xen/xen.h" > +#include "xen_overlay.h" > + > +#include "sysemu/kvm.h" > +#include "sysemu/kvm_xen.h" > +#include <linux/kvm.h> > + > +#include "standard-headers/xen/memory.h" > + > + > +#define TYPE_XEN_OVERLAY "xen-overlay" > +OBJECT_DECLARE_SIMPLE_TYPE(XenOverlayState, XEN_OVERLAY) > + > +#define XEN_PAGE_SHIFT 12 > +#define XEN_PAGE_SIZE (1ULL << XEN_PAGE_SHIFT) > + > +struct XenOverlayState { > + /*< private >*/ > + SysBusDevice busdev; > + /*< public >*/ > + > + MemoryRegion shinfo_mem; > + void *shinfo_ptr; > + uint64_t shinfo_gpa; > +}; > + > +struct XenOverlayState *xen_overlay_singleton; > + > +static void xen_overlay_map_page_locked(MemoryRegion *page, uint64_t gpa) > +{ > + /* > + * Xen allows guests to map the same page as many times as it likes > + * into guest physical frames. We don't, because it would be hard > + * to track and restore them all. One mapping of each page is > + * perfectly sufficient for all known guests... and we've tested > + * that theory on a few now in other implementations. dwmw2. > + */ > + if (memory_region_is_mapped(page)) { > + if (gpa == INVALID_GPA) { > + memory_region_del_subregion(get_system_memory(), page); > + } else { > + /* Just move it */ > + memory_region_set_address(page, gpa); > + } > + } else if (gpa != INVALID_GPA) { > + memory_region_add_subregion_overlap(get_system_memory(), gpa, page, 0); > + } > +} > + > +/* KVM is the only existing back end for now. Let's not overengineer it yet. */ > +static int xen_overlay_set_be_shinfo(uint64_t gfn) > +{ > + struct kvm_xen_hvm_attr xa = { > + .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, > + .u.shared_info.gfn = gfn, > + }; > + > + return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &xa); > +} > + > + > +static void xen_overlay_realize(DeviceState *dev, Error **errp) > +{ > + XenOverlayState *s = XEN_OVERLAY(dev); > + > + if (xen_mode != XEN_EMULATE) { > + error_setg(errp, "Xen overlay page support is for Xen emulation"); > + return; > + } > + > + memory_region_init_ram(&s->shinfo_mem, OBJECT(dev), "xen:shared_info", > + XEN_PAGE_SIZE, &error_abort); > + memory_region_set_enabled(&s->shinfo_mem, true); > + > + s->shinfo_ptr = memory_region_get_ram_ptr(&s->shinfo_mem); > + s->shinfo_gpa = INVALID_GPA; > + memset(s->shinfo_ptr, 0, XEN_PAGE_SIZE); > +} > + > +static int xen_overlay_post_load(void *opaque, int version_id) > +{ > + XenOverlayState *s = opaque; > + > + if (s->shinfo_gpa != INVALID_GPA) { > + xen_overlay_map_page_locked(&s->shinfo_mem, s->shinfo_gpa); > + xen_overlay_set_be_shinfo(s->shinfo_gpa >> XEN_PAGE_SHIFT); > + } > + > + return 0; > +} > + > +static bool xen_overlay_is_needed(void *opaque) > +{ > + return xen_mode == XEN_EMULATE; > +} > + > +static const VMStateDescription xen_overlay_vmstate = { > + .name = "xen_overlay", > + .version_id = 1, > + .minimum_version_id = 1, > + .needed = xen_overlay_is_needed, > + .post_load = xen_overlay_post_load, > + .fields = (VMStateField[]) { > + VMSTATE_UINT64(shinfo_gpa, XenOverlayState), > + VMSTATE_END_OF_LIST() > + } > +}; > + > +static void xen_overlay_class_init(ObjectClass *klass, void *data) > +{ > + DeviceClass *dc = DEVICE_CLASS(klass); > + > + dc->realize = xen_overlay_realize; > + dc->vmsd = &xen_overlay_vmstate; That looks OK from a migration point of view > +} > + > +static const TypeInfo xen_overlay_info = { > + .name = TYPE_XEN_OVERLAY, > + .parent = TYPE_SYS_BUS_DEVICE, > + .instance_size = sizeof(XenOverlayState), > + .class_init = xen_overlay_class_init, > +}; > + > +void xen_overlay_create(void) > +{ > + xen_overlay_singleton = XEN_OVERLAY(sysbus_create_simple(TYPE_XEN_OVERLAY, > + -1, NULL)); > +} > + > +static void xen_overlay_register_types(void) > +{ > + type_register_static(&xen_overlay_info); > +} > + > +type_init(xen_overlay_register_types) > + > +int xen_overlay_map_shinfo_page(uint64_t gpa) > +{ > + XenOverlayState *s = xen_overlay_singleton; > + int ret; > + > + if (!s) { > + return -ENOENT; > + } > + > + qemu_mutex_lock_iothread(); > + if (s->shinfo_gpa) { > + /* If removing shinfo page, turn the kernel magic off first */ Odd indent? Dave > + ret = xen_overlay_set_be_shinfo(INVALID_GFN); > + if (ret) { > + goto out; > + } > + } > + > + xen_overlay_map_page_locked(&s->shinfo_mem, gpa); > + if (gpa != INVALID_GPA) { > + ret = xen_overlay_set_be_shinfo(gpa >> XEN_PAGE_SHIFT); > + if (ret) { > + goto out; > + } > + } > + s->shinfo_gpa = gpa; > + out: > + qemu_mutex_unlock_iothread(); > + > + return ret; > +} > + > +void *xen_overlay_get_shinfo_ptr(void) > +{ > + XenOverlayState *s = xen_overlay_singleton; > + > + if (!s) { > + return NULL; > + } > + > + return s->shinfo_ptr; > +} > diff --git a/hw/i386/kvm/xen_overlay.h b/hw/i386/kvm/xen_overlay.h > new file mode 100644 > index 0000000000..00cff05bb0 > --- /dev/null > +++ b/hw/i386/kvm/xen_overlay.h > @@ -0,0 +1,20 @@ > +/* > + * QEMU Xen emulation: Shared/overlay pages support > + * > + * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. > + * > + * Authors: David Woodhouse <dwmw2@infradead.org> > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or later. > + * See the COPYING file in the top-level directory. > + */ > + > +#ifndef QEMU_XEN_OVERLAY_H > +#define QEMU_XEN_OVERLAY_H > + > +void xen_overlay_create(void); > + > +int xen_overlay_map_shinfo_page(uint64_t gpa); > +void *xen_overlay_get_shinfo_ptr(void); > + > +#endif /* QEMU_XEN_OVERLAY_H */ > diff --git a/include/sysemu/kvm_xen.h b/include/sysemu/kvm_xen.h > index 296533f2d5..3e43cd7843 100644 > --- a/include/sysemu/kvm_xen.h > +++ b/include/sysemu/kvm_xen.h > @@ -12,6 +12,10 @@ > #ifndef QEMU_SYSEMU_KVM_XEN_H > #define QEMU_SYSEMU_KVM_XEN_H > > +/* The KVM API uses these to indicate "no GPA" or "no GFN" */ > +#define INVALID_GPA UINT64_MAX > +#define INVALID_GFN UINT64_MAX > + > uint32_t kvm_xen_get_caps(void); > > #define kvm_xen_has_cap(cap) (!!(kvm_xen_get_caps() & \ > -- > 2.35.3 > >
diff --git a/hw/i386/kvm/meson.build b/hw/i386/kvm/meson.build index 95467f1ded..6165cbf019 100644 --- a/hw/i386/kvm/meson.build +++ b/hw/i386/kvm/meson.build @@ -4,5 +4,6 @@ i386_kvm_ss.add(when: 'CONFIG_APIC', if_true: files('apic.c')) i386_kvm_ss.add(when: 'CONFIG_I8254', if_true: files('i8254.c')) i386_kvm_ss.add(when: 'CONFIG_I8259', if_true: files('i8259.c')) i386_kvm_ss.add(when: 'CONFIG_IOAPIC', if_true: files('ioapic.c')) +i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen_overlay.c')) i386_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss) diff --git a/hw/i386/kvm/xen_overlay.c b/hw/i386/kvm/xen_overlay.c new file mode 100644 index 0000000000..331dea6b8b --- /dev/null +++ b/hw/i386/kvm/xen_overlay.c @@ -0,0 +1,200 @@ +/* + * QEMU Xen emulation: Shared/overlay pages support + * + * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Authors: David Woodhouse <dwmw2@infradead.org> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/host-utils.h" +#include "qemu/module.h" +#include "qemu/main-loop.h" +#include "qapi/error.h" +#include "qom/object.h" +#include "exec/target_page.h" +#include "exec/address-spaces.h" +#include "migration/vmstate.h" + +#include "hw/sysbus.h" +#include "hw/xen/xen.h" +#include "xen_overlay.h" + +#include "sysemu/kvm.h" +#include "sysemu/kvm_xen.h" +#include <linux/kvm.h> + +#include "standard-headers/xen/memory.h" + + +#define TYPE_XEN_OVERLAY "xen-overlay" +OBJECT_DECLARE_SIMPLE_TYPE(XenOverlayState, XEN_OVERLAY) + +#define XEN_PAGE_SHIFT 12 +#define XEN_PAGE_SIZE (1ULL << XEN_PAGE_SHIFT) + +struct XenOverlayState { + /*< private >*/ + SysBusDevice busdev; + /*< public >*/ + + MemoryRegion shinfo_mem; + void *shinfo_ptr; + uint64_t shinfo_gpa; +}; + +struct XenOverlayState *xen_overlay_singleton; + +static void xen_overlay_map_page_locked(MemoryRegion *page, uint64_t gpa) +{ + /* + * Xen allows guests to map the same page as many times as it likes + * into guest physical frames. We don't, because it would be hard + * to track and restore them all. One mapping of each page is + * perfectly sufficient for all known guests... and we've tested + * that theory on a few now in other implementations. dwmw2. + */ + if (memory_region_is_mapped(page)) { + if (gpa == INVALID_GPA) { + memory_region_del_subregion(get_system_memory(), page); + } else { + /* Just move it */ + memory_region_set_address(page, gpa); + } + } else if (gpa != INVALID_GPA) { + memory_region_add_subregion_overlap(get_system_memory(), gpa, page, 0); + } +} + +/* KVM is the only existing back end for now. Let's not overengineer it yet. */ +static int xen_overlay_set_be_shinfo(uint64_t gfn) +{ + struct kvm_xen_hvm_attr xa = { + .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, + .u.shared_info.gfn = gfn, + }; + + return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &xa); +} + + +static void xen_overlay_realize(DeviceState *dev, Error **errp) +{ + XenOverlayState *s = XEN_OVERLAY(dev); + + if (xen_mode != XEN_EMULATE) { + error_setg(errp, "Xen overlay page support is for Xen emulation"); + return; + } + + memory_region_init_ram(&s->shinfo_mem, OBJECT(dev), "xen:shared_info", + XEN_PAGE_SIZE, &error_abort); + memory_region_set_enabled(&s->shinfo_mem, true); + + s->shinfo_ptr = memory_region_get_ram_ptr(&s->shinfo_mem); + s->shinfo_gpa = INVALID_GPA; + memset(s->shinfo_ptr, 0, XEN_PAGE_SIZE); +} + +static int xen_overlay_post_load(void *opaque, int version_id) +{ + XenOverlayState *s = opaque; + + if (s->shinfo_gpa != INVALID_GPA) { + xen_overlay_map_page_locked(&s->shinfo_mem, s->shinfo_gpa); + xen_overlay_set_be_shinfo(s->shinfo_gpa >> XEN_PAGE_SHIFT); + } + + return 0; +} + +static bool xen_overlay_is_needed(void *opaque) +{ + return xen_mode == XEN_EMULATE; +} + +static const VMStateDescription xen_overlay_vmstate = { + .name = "xen_overlay", + .version_id = 1, + .minimum_version_id = 1, + .needed = xen_overlay_is_needed, + .post_load = xen_overlay_post_load, + .fields = (VMStateField[]) { + VMSTATE_UINT64(shinfo_gpa, XenOverlayState), + VMSTATE_END_OF_LIST() + } +}; + +static void xen_overlay_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = xen_overlay_realize; + dc->vmsd = &xen_overlay_vmstate; +} + +static const TypeInfo xen_overlay_info = { + .name = TYPE_XEN_OVERLAY, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(XenOverlayState), + .class_init = xen_overlay_class_init, +}; + +void xen_overlay_create(void) +{ + xen_overlay_singleton = XEN_OVERLAY(sysbus_create_simple(TYPE_XEN_OVERLAY, + -1, NULL)); +} + +static void xen_overlay_register_types(void) +{ + type_register_static(&xen_overlay_info); +} + +type_init(xen_overlay_register_types) + +int xen_overlay_map_shinfo_page(uint64_t gpa) +{ + XenOverlayState *s = xen_overlay_singleton; + int ret; + + if (!s) { + return -ENOENT; + } + + qemu_mutex_lock_iothread(); + if (s->shinfo_gpa) { + /* If removing shinfo page, turn the kernel magic off first */ + ret = xen_overlay_set_be_shinfo(INVALID_GFN); + if (ret) { + goto out; + } + } + + xen_overlay_map_page_locked(&s->shinfo_mem, gpa); + if (gpa != INVALID_GPA) { + ret = xen_overlay_set_be_shinfo(gpa >> XEN_PAGE_SHIFT); + if (ret) { + goto out; + } + } + s->shinfo_gpa = gpa; + out: + qemu_mutex_unlock_iothread(); + + return ret; +} + +void *xen_overlay_get_shinfo_ptr(void) +{ + XenOverlayState *s = xen_overlay_singleton; + + if (!s) { + return NULL; + } + + return s->shinfo_ptr; +} diff --git a/hw/i386/kvm/xen_overlay.h b/hw/i386/kvm/xen_overlay.h new file mode 100644 index 0000000000..00cff05bb0 --- /dev/null +++ b/hw/i386/kvm/xen_overlay.h @@ -0,0 +1,20 @@ +/* + * QEMU Xen emulation: Shared/overlay pages support + * + * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Authors: David Woodhouse <dwmw2@infradead.org> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_XEN_OVERLAY_H +#define QEMU_XEN_OVERLAY_H + +void xen_overlay_create(void); + +int xen_overlay_map_shinfo_page(uint64_t gpa); +void *xen_overlay_get_shinfo_ptr(void); + +#endif /* QEMU_XEN_OVERLAY_H */ diff --git a/include/sysemu/kvm_xen.h b/include/sysemu/kvm_xen.h index 296533f2d5..3e43cd7843 100644 --- a/include/sysemu/kvm_xen.h +++ b/include/sysemu/kvm_xen.h @@ -12,6 +12,10 @@ #ifndef QEMU_SYSEMU_KVM_XEN_H #define QEMU_SYSEMU_KVM_XEN_H +/* The KVM API uses these to indicate "no GPA" or "no GFN" */ +#define INVALID_GPA UINT64_MAX +#define INVALID_GFN UINT64_MAX + uint32_t kvm_xen_get_caps(void); #define kvm_xen_has_cap(cap) (!!(kvm_xen_get_caps() & \