diff mbox series

[v3,2/2] virt: vmgenid: introduce driver for reinitializing RNG on VM fork

Message ID 20220224133906.751587-3-Jason@zx2c4.com
State New
Headers show
Series VM fork detection for RNG | expand

Commit Message

Jason A. Donenfeld Feb. 24, 2022, 1:39 p.m. UTC
VM Generation ID is a feature from Microsoft, described at
<https://go.microsoft.com/fwlink/?LinkId=260709>, and supported by
Hyper-V and QEMU. Its usage is described in Microsoft's RNG whitepaper,
<https://aka.ms/win10rng>, as:

    If the OS is running in a VM, there is a problem that most
    hypervisors can snapshot the state of the machine and later rewind
    the VM state to the saved state. This results in the machine running
    a second time with the exact same RNG state, which leads to serious
    security problems.  To reduce the window of vulnerability, Windows
    10 on a Hyper-V VM will detect when the VM state is reset, retrieve
    a unique (not random) value from the hypervisor, and reseed the root
    RNG with that unique value.  This does not eliminate the
    vulnerability, but it greatly reduces the time during which the RNG
    system will produce the same outputs as it did during a previous
    instantiation of the same VM state.

Linux has the same issue, and given that vmgenid is supported already by
multiple hypervisors, we can implement more or less the same solution.
So this commit wires up the vmgenid ACPI notification to the RNG's newly
added add_vmfork_randomness() function.

It can be used from qemu via the `-device vmgenid,guid=auto` parameter.
After setting that, use `savevm` in the monitor to save the VM state,
then quit QEMU, start it again, and use `loadvm`. That will trigger this
driver's notify function, which hands the new UUID to the RNG. This is
described in <https://git.qemu.org/?p=qemu.git;a=blob;f=docs/specs/vmgenid.txt>.
And there are hooks for this in libvirt as well, described in
<https://libvirt.org/formatdomain.html#general-metadata>.

Note, however, that the treatment of this as a UUID is considered to be
an accidental QEMU nuance, per
<https://github.com/libguestfs/virt-v2v/blob/master/docs/vm-generation-id-across-hypervisors.txt>,
so this driver simply treats these bytes as an opaque 128-bit binary
blob, as per the spec. This doesn't really make a difference anyway,
considering that's how it ends up when handed to the RNG in the end.

This driver builds on prior work from Adrian Catangiu at Amazon, and it
is my hope that that team can resume maintenance of this driver.

Cc: Adrian Catangiu <adrian@parity.io>
Cc: Laszlo Ersek <lersek@redhat.com>
Cc: Daniel P. Berrangé <berrange@redhat.com>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/virt/Kconfig   |   9 +++
 drivers/virt/Makefile  |   1 +
 drivers/virt/vmgenid.c | 121 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 131 insertions(+)
 create mode 100644 drivers/virt/vmgenid.c

Comments

Laszlo Ersek Feb. 25, 2022, 10:37 a.m. UTC | #1
On 02/24/22 14:39, Jason A. Donenfeld wrote:
> VM Generation ID is a feature from Microsoft, described at
> <https://go.microsoft.com/fwlink/?LinkId=260709>, and supported by
> Hyper-V and QEMU. Its usage is described in Microsoft's RNG whitepaper,
> <https://aka.ms/win10rng>, as:
> 
>     If the OS is running in a VM, there is a problem that most
>     hypervisors can snapshot the state of the machine and later rewind
>     the VM state to the saved state. This results in the machine running
>     a second time with the exact same RNG state, which leads to serious
>     security problems.  To reduce the window of vulnerability, Windows
>     10 on a Hyper-V VM will detect when the VM state is reset, retrieve
>     a unique (not random) value from the hypervisor, and reseed the root
>     RNG with that unique value.  This does not eliminate the
>     vulnerability, but it greatly reduces the time during which the RNG
>     system will produce the same outputs as it did during a previous
>     instantiation of the same VM state.
> 
> Linux has the same issue, and given that vmgenid is supported already by
> multiple hypervisors, we can implement more or less the same solution.
> So this commit wires up the vmgenid ACPI notification to the RNG's newly
> added add_vmfork_randomness() function.
> 
> It can be used from qemu via the `-device vmgenid,guid=auto` parameter.
> After setting that, use `savevm` in the monitor to save the VM state,
> then quit QEMU, start it again, and use `loadvm`. That will trigger this
> driver's notify function, which hands the new UUID to the RNG. This is
> described in <https://git.qemu.org/?p=qemu.git;a=blob;f=docs/specs/vmgenid.txt>.
> And there are hooks for this in libvirt as well, described in
> <https://libvirt.org/formatdomain.html#general-metadata>.
> 
> Note, however, that the treatment of this as a UUID is considered to be
> an accidental QEMU nuance, per
> <https://github.com/libguestfs/virt-v2v/blob/master/docs/vm-generation-id-across-hypervisors.txt>,
> so this driver simply treats these bytes as an opaque 128-bit binary
> blob, as per the spec. This doesn't really make a difference anyway,
> considering that's how it ends up when handed to the RNG in the end.
> 
> This driver builds on prior work from Adrian Catangiu at Amazon, and it
> is my hope that that team can resume maintenance of this driver.
> 
> Cc: Adrian Catangiu <adrian@parity.io>
> Cc: Laszlo Ersek <lersek@redhat.com>
> Cc: Daniel P. Berrangé <berrange@redhat.com>
> Cc: Dominik Brodowski <linux@dominikbrodowski.net>
> Cc: Ard Biesheuvel <ardb@kernel.org>
> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
> ---
>  drivers/virt/Kconfig   |   9 +++
>  drivers/virt/Makefile  |   1 +
>  drivers/virt/vmgenid.c | 121 +++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 131 insertions(+)
>  create mode 100644 drivers/virt/vmgenid.c
> 
> diff --git a/drivers/virt/Kconfig b/drivers/virt/Kconfig
> index 8061e8ef449f..d3276dc2095c 100644
> --- a/drivers/virt/Kconfig
> +++ b/drivers/virt/Kconfig
> @@ -13,6 +13,15 @@ menuconfig VIRT_DRIVERS
>  
>  if VIRT_DRIVERS
>  
> +config VMGENID
> +	tristate "Virtual Machine Generation ID driver"
> +	default y
> +	depends on ACPI
> +	help
> +	  Say Y here to use the hypervisor-provided Virtual Machine Generation ID
> +	  to reseed the RNG when the VM is cloned. This is highly recommended if
> +	  you intend to do any rollback / cloning / snapshotting of VMs.
> +
>  config FSL_HV_MANAGER
>  	tristate "Freescale hypervisor management driver"
>  	depends on FSL_SOC
> diff --git a/drivers/virt/Makefile b/drivers/virt/Makefile
> index 3e272ea60cd9..108d0ffcc9aa 100644
> --- a/drivers/virt/Makefile
> +++ b/drivers/virt/Makefile
> @@ -4,6 +4,7 @@
>  #
>  
>  obj-$(CONFIG_FSL_HV_MANAGER)	+= fsl_hypervisor.o
> +obj-$(CONFIG_VMGENID)		+= vmgenid.o
>  obj-y				+= vboxguest/
>  
>  obj-$(CONFIG_NITRO_ENCLAVES)	+= nitro_enclaves/
> diff --git a/drivers/virt/vmgenid.c b/drivers/virt/vmgenid.c
> new file mode 100644
> index 000000000000..5da4dc8f25e3
> --- /dev/null
> +++ b/drivers/virt/vmgenid.c
> @@ -0,0 +1,121 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Virtual Machine Generation ID driver
> + *
> + * Copyright (C) 2022 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
> + * Copyright (C) 2020 Amazon. All rights reserved.
> + * Copyright (C) 2018 Red Hat Inc. All rights reserved.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/acpi.h>
> +#include <linux/random.h>
> +
> +ACPI_MODULE_NAME("vmgenid");
> +
> +enum { VMGENID_SIZE = 16 };
> +
> +static struct {
> +	u8 this_id[VMGENID_SIZE];
> +	u8 *next_id;
> +} state;
> +
> +static int vmgenid_acpi_add(struct acpi_device *device)
> +{
> +	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER };
> +	union acpi_object *pss;
> +	phys_addr_t phys_addr;
> +	acpi_status status;
> +	int ret = 0;
> +
> +	if (!device)
> +		return -EINVAL;
> +
> +	status = acpi_evaluate_object(device->handle, "ADDR", NULL, &buffer);
> +	if (ACPI_FAILURE(status)) {
> +		ACPI_EXCEPTION((AE_INFO, status, "Evaluating ADDR"));
> +		return -ENODEV;
> +	}
> +	pss = buffer.pointer;
> +	if (!pss || pss->type != ACPI_TYPE_PACKAGE || pss->package.count != 2 ||
> +	    pss->package.elements[0].type != ACPI_TYPE_INTEGER ||
> +	    pss->package.elements[1].type != ACPI_TYPE_INTEGER) {
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
> +	phys_addr = (pss->package.elements[0].integer.value << 0) |
> +		    (pss->package.elements[1].integer.value << 32);
> +	state.next_id = acpi_os_map_memory(phys_addr, VMGENID_SIZE);
> +	if (!state.next_id) {
> +		ret = -ENOMEM;
> +		goto out;
> +	}
> +	device->driver_data = &state;
> +
> +	memcpy(state.this_id, state.next_id, sizeof(state.this_id));
> +	add_device_randomness(state.this_id, sizeof(state.this_id));
> +
> +out:
> +	ACPI_FREE(buffer.pointer);
> +	return ret;
> +}
> +
> +static int vmgenid_acpi_remove(struct acpi_device *device)
> +{
> +	if (!device || acpi_driver_data(device) != &state)
> +		return -EINVAL;
> +	device->driver_data = NULL;
> +	if (state.next_id)
> +		acpi_os_unmap_memory(state.next_id, VMGENID_SIZE);
> +	state.next_id = NULL;
> +	return 0;
> +}
> +
> +static void vmgenid_acpi_notify(struct acpi_device *device, u32 event)
> +{
> +	u8 old_id[VMGENID_SIZE];
> +
> +	if (!device || acpi_driver_data(device) != &state)
> +		return;
> +	memcpy(old_id, state.this_id, sizeof(old_id));
> +	memcpy(state.this_id, state.next_id, sizeof(state.this_id));
> +	if (!memcmp(old_id, state.this_id, sizeof(old_id)))
> +		return;
> +	add_vmfork_randomness(state.this_id, sizeof(state.this_id));
> +}
> +
> +static const struct acpi_device_id vmgenid_ids[] = {
> +	{"VMGENID", 0},
> +	{"QEMUVGID", 0},
> +	{ },
> +};
> +
> +static struct acpi_driver acpi_driver = {
> +	.name = "vm_generation_id",
> +	.ids = vmgenid_ids,
> +	.owner = THIS_MODULE,
> +	.ops = {
> +		.add = vmgenid_acpi_add,
> +		.remove = vmgenid_acpi_remove,
> +		.notify = vmgenid_acpi_notify,
> +	}
> +};
> +
> +static int __init vmgenid_init(void)
> +{
> +	return acpi_bus_register_driver(&acpi_driver);
> +}
> +
> +static void __exit vmgenid_exit(void)
> +{
> +	acpi_bus_unregister_driver(&acpi_driver);
> +}
> +
> +module_init(vmgenid_init);
> +module_exit(vmgenid_exit);
> +
> +MODULE_DEVICE_TABLE(acpi, vmgenid_ids);
> +MODULE_DESCRIPTION("Virtual Machine Generation ID");
> +MODULE_LICENSE("GPL v2");
> 

I'm not an experienced reviewer for the kernel.

I've made an effort to check several -- although not all -- aspects of
this patch, and it looks OK to me.

Reviewed-by: Laszlo Ersek <lersek@redhat.com>

Thanks
Laszlo
Ard Biesheuvel Feb. 25, 2022, 11:24 a.m. UTC | #2
On Thu, 24 Feb 2022 at 14:39, Jason A. Donenfeld <Jason@zx2c4.com> wrote:
>
> VM Generation ID is a feature from Microsoft, described at
> <https://go.microsoft.com/fwlink/?LinkId=260709>, and supported by
> Hyper-V and QEMU. Its usage is described in Microsoft's RNG whitepaper,
> <https://aka.ms/win10rng>, as:
>
>     If the OS is running in a VM, there is a problem that most
>     hypervisors can snapshot the state of the machine and later rewind
>     the VM state to the saved state. This results in the machine running
>     a second time with the exact same RNG state, which leads to serious
>     security problems.  To reduce the window of vulnerability, Windows
>     10 on a Hyper-V VM will detect when the VM state is reset, retrieve
>     a unique (not random) value from the hypervisor, and reseed the root
>     RNG with that unique value.  This does not eliminate the
>     vulnerability, but it greatly reduces the time during which the RNG
>     system will produce the same outputs as it did during a previous
>     instantiation of the same VM state.
>
> Linux has the same issue, and given that vmgenid is supported already by
> multiple hypervisors, we can implement more or less the same solution.
> So this commit wires up the vmgenid ACPI notification to the RNG's newly
> added add_vmfork_randomness() function.
>
> It can be used from qemu via the `-device vmgenid,guid=auto` parameter.
> After setting that, use `savevm` in the monitor to save the VM state,
> then quit QEMU, start it again, and use `loadvm`. That will trigger this
> driver's notify function, which hands the new UUID to the RNG. This is
> described in <https://git.qemu.org/?p=qemu.git;a=blob;f=docs/specs/vmgenid.txt>.
> And there are hooks for this in libvirt as well, described in
> <https://libvirt.org/formatdomain.html#general-metadata>.
>
> Note, however, that the treatment of this as a UUID is considered to be
> an accidental QEMU nuance, per
> <https://github.com/libguestfs/virt-v2v/blob/master/docs/vm-generation-id-across-hypervisors.txt>,
> so this driver simply treats these bytes as an opaque 128-bit binary
> blob, as per the spec. This doesn't really make a difference anyway,
> considering that's how it ends up when handed to the RNG in the end.
>
> This driver builds on prior work from Adrian Catangiu at Amazon, and it
> is my hope that that team can resume maintenance of this driver.
>
> Cc: Adrian Catangiu <adrian@parity.io>
> Cc: Laszlo Ersek <lersek@redhat.com>
> Cc: Daniel P. Berrangé <berrange@redhat.com>
> Cc: Dominik Brodowski <linux@dominikbrodowski.net>
> Cc: Ard Biesheuvel <ardb@kernel.org>
> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
> ---
>  drivers/virt/Kconfig   |   9 +++
>  drivers/virt/Makefile  |   1 +
>  drivers/virt/vmgenid.c | 121 +++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 131 insertions(+)
>  create mode 100644 drivers/virt/vmgenid.c
>
> diff --git a/drivers/virt/Kconfig b/drivers/virt/Kconfig
> index 8061e8ef449f..d3276dc2095c 100644
> --- a/drivers/virt/Kconfig
> +++ b/drivers/virt/Kconfig

drivers/virt does not have a maintainer and this code needs one.

> @@ -13,6 +13,15 @@ menuconfig VIRT_DRIVERS
>
>  if VIRT_DRIVERS
>
> +config VMGENID
> +       tristate "Virtual Machine Generation ID driver"
> +       default y

Please make this default m - this code can run as a module and the
feature it relies on is discoverable by udev

> +       depends on ACPI
> +       help
> +         Say Y here to use the hypervisor-provided Virtual Machine Generation ID
> +         to reseed the RNG when the VM is cloned. This is highly recommended if
> +         you intend to do any rollback / cloning / snapshotting of VMs.
> +
>  config FSL_HV_MANAGER
>         tristate "Freescale hypervisor management driver"
>         depends on FSL_SOC
> diff --git a/drivers/virt/Makefile b/drivers/virt/Makefile
> index 3e272ea60cd9..108d0ffcc9aa 100644
> --- a/drivers/virt/Makefile
> +++ b/drivers/virt/Makefile
> @@ -4,6 +4,7 @@
>  #
>
>  obj-$(CONFIG_FSL_HV_MANAGER)   += fsl_hypervisor.o
> +obj-$(CONFIG_VMGENID)          += vmgenid.o
>  obj-y                          += vboxguest/
>
>  obj-$(CONFIG_NITRO_ENCLAVES)   += nitro_enclaves/
> diff --git a/drivers/virt/vmgenid.c b/drivers/virt/vmgenid.c
> new file mode 100644
> index 000000000000..5da4dc8f25e3
> --- /dev/null
> +++ b/drivers/virt/vmgenid.c
> @@ -0,0 +1,121 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Virtual Machine Generation ID driver
> + *
> + * Copyright (C) 2022 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
> + * Copyright (C) 2020 Amazon. All rights reserved.
> + * Copyright (C) 2018 Red Hat Inc. All rights reserved.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/acpi.h>
> +#include <linux/random.h>
> +
> +ACPI_MODULE_NAME("vmgenid");
> +
> +enum { VMGENID_SIZE = 16 };
> +
> +static struct {
> +       u8 this_id[VMGENID_SIZE];
> +       u8 *next_id;
> +} state;
> +

This state is singular


> +static int vmgenid_acpi_add(struct acpi_device *device)
> +{

... whereas this may be called for multiple instances of the device.
This likely makes no sense, so it is better to reject it here.

Otherwise, the state should be allocated dynamically.

> +       struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER };
> +       union acpi_object *pss;
> +       phys_addr_t phys_addr;
> +       acpi_status status;
> +       int ret = 0;
> +
> +       if (!device)
> +               return -EINVAL;
> +
> +       status = acpi_evaluate_object(device->handle, "ADDR", NULL, &buffer);
> +       if (ACPI_FAILURE(status)) {
> +               ACPI_EXCEPTION((AE_INFO, status, "Evaluating ADDR"));
> +               return -ENODEV;
> +       }
> +       pss = buffer.pointer;
> +       if (!pss || pss->type != ACPI_TYPE_PACKAGE || pss->package.count != 2 ||
> +           pss->package.elements[0].type != ACPI_TYPE_INTEGER ||
> +           pss->package.elements[1].type != ACPI_TYPE_INTEGER) {
> +               ret = -EINVAL;
> +               goto out;
> +       }
> +
> +       phys_addr = (pss->package.elements[0].integer.value << 0) |
> +                   (pss->package.elements[1].integer.value << 32);
> +       state.next_id = acpi_os_map_memory(phys_addr, VMGENID_SIZE);

No need to use acpi_os_map_memory() here, plain memremap() should be fine.

> +       if (!state.next_id) {
> +               ret = -ENOMEM;
> +               goto out;
> +       }
> +       device->driver_data = &state;
> +
> +       memcpy(state.this_id, state.next_id, sizeof(state.this_id));
> +       add_device_randomness(state.this_id, sizeof(state.this_id));
> +
> +out:
> +       ACPI_FREE(buffer.pointer);
> +       return ret;
> +}
> +
> +static int vmgenid_acpi_remove(struct acpi_device *device)
> +{
> +       if (!device || acpi_driver_data(device) != &state)
> +               return -EINVAL;
> +       device->driver_data = NULL;
> +       if (state.next_id)
> +               acpi_os_unmap_memory(state.next_id, VMGENID_SIZE);

memunmap() here

> +       state.next_id = NULL;
> +       return 0;
> +}
> +
> +static void vmgenid_acpi_notify(struct acpi_device *device, u32 event)
> +{
> +       u8 old_id[VMGENID_SIZE];
> +
> +       if (!device || acpi_driver_data(device) != &state)
> +               return;
> +       memcpy(old_id, state.this_id, sizeof(old_id));
> +       memcpy(state.this_id, state.next_id, sizeof(state.this_id));
> +       if (!memcmp(old_id, state.this_id, sizeof(old_id)))
> +               return;

Is this little dance really necessary? I.e., can we just do

add_vmfork_randomness(state.next_id, VMGENID_SIZE)

and be done with it?

And if we cannot, is it ok to just return without some kind of
diagnostic message?

> +       add_vmfork_randomness(state.this_id, sizeof(state.this_id));
> +}
> +
> +static const struct acpi_device_id vmgenid_ids[] = {
> +       {"VMGENID", 0},
> +       {"QEMUVGID", 0},
> +       { },
> +};
> +
> +static struct acpi_driver acpi_driver = {
> +       .name = "vm_generation_id",
> +       .ids = vmgenid_ids,
> +       .owner = THIS_MODULE,
> +       .ops = {
> +               .add = vmgenid_acpi_add,
> +               .remove = vmgenid_acpi_remove,
> +               .notify = vmgenid_acpi_notify,
> +       }
> +};
> +
> +static int __init vmgenid_init(void)
> +{
> +       return acpi_bus_register_driver(&acpi_driver);
> +}
> +
> +static void __exit vmgenid_exit(void)
> +{
> +       acpi_bus_unregister_driver(&acpi_driver);
> +}
> +
> +module_init(vmgenid_init);
> +module_exit(vmgenid_exit);
> +
> +MODULE_DEVICE_TABLE(acpi, vmgenid_ids);
> +MODULE_DESCRIPTION("Virtual Machine Generation ID");
> +MODULE_LICENSE("GPL v2");
> --
> 2.35.1
>
Michael S. Tsirkin Feb. 25, 2022, 11:51 a.m. UTC | #3
On Fri, Feb 25, 2022 at 12:24:05PM +0100, Ard Biesheuvel wrote:
> On Thu, 24 Feb 2022 at 14:39, Jason A. Donenfeld <Jason@zx2c4.com> wrote:
> >
> > VM Generation ID is a feature from Microsoft, described at
> > <https://go.microsoft.com/fwlink/?LinkId=260709>, and supported by
> > Hyper-V and QEMU. Its usage is described in Microsoft's RNG whitepaper,
> > <https://aka.ms/win10rng>, as:
> >
> >     If the OS is running in a VM, there is a problem that most
> >     hypervisors can snapshot the state of the machine and later rewind
> >     the VM state to the saved state. This results in the machine running
> >     a second time with the exact same RNG state, which leads to serious
> >     security problems.  To reduce the window of vulnerability, Windows
> >     10 on a Hyper-V VM will detect when the VM state is reset, retrieve
> >     a unique (not random) value from the hypervisor, and reseed the root
> >     RNG with that unique value.  This does not eliminate the
> >     vulnerability, but it greatly reduces the time during which the RNG
> >     system will produce the same outputs as it did during a previous
> >     instantiation of the same VM state.
> >
> > Linux has the same issue, and given that vmgenid is supported already by
> > multiple hypervisors, we can implement more or less the same solution.
> > So this commit wires up the vmgenid ACPI notification to the RNG's newly
> > added add_vmfork_randomness() function.
> >
> > It can be used from qemu via the `-device vmgenid,guid=auto` parameter.
> > After setting that, use `savevm` in the monitor to save the VM state,
> > then quit QEMU, start it again, and use `loadvm`. That will trigger this
> > driver's notify function, which hands the new UUID to the RNG. This is
> > described in <https://git.qemu.org/?p=qemu.git;a=blob;f=docs/specs/vmgenid.txt>.
> > And there are hooks for this in libvirt as well, described in
> > <https://libvirt.org/formatdomain.html#general-metadata>.
> >
> > Note, however, that the treatment of this as a UUID is considered to be
> > an accidental QEMU nuance, per
> > <https://github.com/libguestfs/virt-v2v/blob/master/docs/vm-generation-id-across-hypervisors.txt>,
> > so this driver simply treats these bytes as an opaque 128-bit binary
> > blob, as per the spec. This doesn't really make a difference anyway,
> > considering that's how it ends up when handed to the RNG in the end.
> >
> > This driver builds on prior work from Adrian Catangiu at Amazon, and it
> > is my hope that that team can resume maintenance of this driver.
> >
> > Cc: Adrian Catangiu <adrian@parity.io>
> > Cc: Laszlo Ersek <lersek@redhat.com>
> > Cc: Daniel P. Berrangé <berrange@redhat.com>
> > Cc: Dominik Brodowski <linux@dominikbrodowski.net>
> > Cc: Ard Biesheuvel <ardb@kernel.org>
> > Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
> > ---
> >  drivers/virt/Kconfig   |   9 +++
> >  drivers/virt/Makefile  |   1 +
> >  drivers/virt/vmgenid.c | 121 +++++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 131 insertions(+)
> >  create mode 100644 drivers/virt/vmgenid.c
> >
> > diff --git a/drivers/virt/Kconfig b/drivers/virt/Kconfig
> > index 8061e8ef449f..d3276dc2095c 100644
> > --- a/drivers/virt/Kconfig
> > +++ b/drivers/virt/Kconfig
> 
> drivers/virt does not have a maintainer and this code needs one.
> 
> > @@ -13,6 +13,15 @@ menuconfig VIRT_DRIVERS
> >
> >  if VIRT_DRIVERS
> >
> > +config VMGENID
> > +       tristate "Virtual Machine Generation ID driver"
> > +       default y
> 
> Please make this default m - this code can run as a module and the
> feature it relies on is discoverable by udev

Or don't supply a default - I don't see why this has any preference.

> > +       depends on ACPI
> > +       help
> > +         Say Y here to use the hypervisor-provided Virtual Machine Generation ID
> > +         to reseed the RNG when the VM is cloned. This is highly recommended if
> > +         you intend to do any rollback / cloning / snapshotting of VMs.
> > +
> >  config FSL_HV_MANAGER
> >         tristate "Freescale hypervisor management driver"
> >         depends on FSL_SOC
> > diff --git a/drivers/virt/Makefile b/drivers/virt/Makefile
> > index 3e272ea60cd9..108d0ffcc9aa 100644
> > --- a/drivers/virt/Makefile
> > +++ b/drivers/virt/Makefile
> > @@ -4,6 +4,7 @@
> >  #
> >
> >  obj-$(CONFIG_FSL_HV_MANAGER)   += fsl_hypervisor.o
> > +obj-$(CONFIG_VMGENID)          += vmgenid.o
> >  obj-y                          += vboxguest/
> >
> >  obj-$(CONFIG_NITRO_ENCLAVES)   += nitro_enclaves/
> > diff --git a/drivers/virt/vmgenid.c b/drivers/virt/vmgenid.c
> > new file mode 100644
> > index 000000000000..5da4dc8f25e3
> > --- /dev/null
> > +++ b/drivers/virt/vmgenid.c
> > @@ -0,0 +1,121 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Virtual Machine Generation ID driver
> > + *
> > + * Copyright (C) 2022 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
> > + * Copyright (C) 2020 Amazon. All rights reserved.
> > + * Copyright (C) 2018 Red Hat Inc. All rights reserved.
> > + */
> > +
> > +#include <linux/kernel.h>
> > +#include <linux/module.h>
> > +#include <linux/acpi.h>
> > +#include <linux/random.h>
> > +
> > +ACPI_MODULE_NAME("vmgenid");
> > +
> > +enum { VMGENID_SIZE = 16 };
> > +
> > +static struct {
> > +       u8 this_id[VMGENID_SIZE];
> > +       u8 *next_id;
> > +} state;
> > +
> 
> This state is singular
> 
> 
> > +static int vmgenid_acpi_add(struct acpi_device *device)
> > +{
> 
> ... whereas this may be called for multiple instances of the device.
> This likely makes no sense, so it is better to reject it here.
> 
> Otherwise, the state should be allocated dynamically.
> 
> > +       struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER };
> > +       union acpi_object *pss;
> > +       phys_addr_t phys_addr;
> > +       acpi_status status;
> > +       int ret = 0;
> > +
> > +       if (!device)
> > +               return -EINVAL;
> > +
> > +       status = acpi_evaluate_object(device->handle, "ADDR", NULL, &buffer);
> > +       if (ACPI_FAILURE(status)) {
> > +               ACPI_EXCEPTION((AE_INFO, status, "Evaluating ADDR"));
> > +               return -ENODEV;
> > +       }
> > +       pss = buffer.pointer;
> > +       if (!pss || pss->type != ACPI_TYPE_PACKAGE || pss->package.count != 2 ||
> > +           pss->package.elements[0].type != ACPI_TYPE_INTEGER ||
> > +           pss->package.elements[1].type != ACPI_TYPE_INTEGER) {
> > +               ret = -EINVAL;
> > +               goto out;
> > +       }
> > +
> > +       phys_addr = (pss->package.elements[0].integer.value << 0) |
> > +                   (pss->package.elements[1].integer.value << 32);
> > +       state.next_id = acpi_os_map_memory(phys_addr, VMGENID_SIZE);
> 
> No need to use acpi_os_map_memory() here, plain memremap() should be fine.
> 
> > +       if (!state.next_id) {
> > +               ret = -ENOMEM;
> > +               goto out;
> > +       }
> > +       device->driver_data = &state;
> > +
> > +       memcpy(state.this_id, state.next_id, sizeof(state.this_id));
> > +       add_device_randomness(state.this_id, sizeof(state.this_id));
> > +
> > +out:
> > +       ACPI_FREE(buffer.pointer);
> > +       return ret;
> > +}
> > +
> > +static int vmgenid_acpi_remove(struct acpi_device *device)
> > +{
> > +       if (!device || acpi_driver_data(device) != &state)
> > +               return -EINVAL;
> > +       device->driver_data = NULL;
> > +       if (state.next_id)
> > +               acpi_os_unmap_memory(state.next_id, VMGENID_SIZE);
> 
> memunmap() here
> 
> > +       state.next_id = NULL;
> > +       return 0;
> > +}
> > +
> > +static void vmgenid_acpi_notify(struct acpi_device *device, u32 event)
> > +{
> > +       u8 old_id[VMGENID_SIZE];
> > +
> > +       if (!device || acpi_driver_data(device) != &state)
> > +               return;
> > +       memcpy(old_id, state.this_id, sizeof(old_id));
> > +       memcpy(state.this_id, state.next_id, sizeof(state.this_id));
> > +       if (!memcmp(old_id, state.this_id, sizeof(old_id)))
> > +               return;
> 
> Is this little dance really necessary? I.e., can we just do
> 
> add_vmfork_randomness(state.next_id, VMGENID_SIZE)
> 
> and be done with it?
> 
> And if we cannot, is it ok to just return without some kind of
> diagnostic message?
> 
> > +       add_vmfork_randomness(state.this_id, sizeof(state.this_id));
> > +}
> > +
> > +static const struct acpi_device_id vmgenid_ids[] = {
> > +       {"VMGENID", 0},
> > +       {"QEMUVGID", 0},
> > +       { },
> > +};
> > +
> > +static struct acpi_driver acpi_driver = {
> > +       .name = "vm_generation_id",
> > +       .ids = vmgenid_ids,
> > +       .owner = THIS_MODULE,
> > +       .ops = {
> > +               .add = vmgenid_acpi_add,
> > +               .remove = vmgenid_acpi_remove,
> > +               .notify = vmgenid_acpi_notify,
> > +       }
> > +};
> > +
> > +static int __init vmgenid_init(void)
> > +{
> > +       return acpi_bus_register_driver(&acpi_driver);
> > +}
> > +
> > +static void __exit vmgenid_exit(void)
> > +{
> > +       acpi_bus_unregister_driver(&acpi_driver);
> > +}
> > +
> > +module_init(vmgenid_init);
> > +module_exit(vmgenid_exit);
> > +
> > +MODULE_DEVICE_TABLE(acpi, vmgenid_ids);
> > +MODULE_DESCRIPTION("Virtual Machine Generation ID");
> > +MODULE_LICENSE("GPL v2");
> > --
> > 2.35.1
> >
Jason A. Donenfeld Feb. 25, 2022, noon UTC | #4
On Fri, Feb 25, 2022 at 12:24 PM Ard Biesheuvel <ardb@kernel.org> wrote:
>
> > @@ -13,6 +13,15 @@ menuconfig VIRT_DRIVERS
> >
> >  if VIRT_DRIVERS
> >
> > +config VMGENID
> > +       tristate "Virtual Machine Generation ID driver"
> > +       default y
>
> Please make this default m - this code can run as a module and the
> feature it relies on is discoverable by udev

Will do.

> > index 000000000000..5da4dc8f25e3
> > --- /dev/null
> > +++ b/drivers/virt/vmgenid.c
> > @@ -0,0 +1,121 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Virtual Machine Generation ID driver
> > + *
> > + * Copyright (C) 2022 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
> > + * Copyright (C) 2020 Amazon. All rights reserved.
> > + * Copyright (C) 2018 Red Hat Inc. All rights reserved.
> > + */
> > +
> > +#include <linux/kernel.h>
> > +#include <linux/module.h>
> > +#include <linux/acpi.h>
> > +#include <linux/random.h>
> > +
> > +ACPI_MODULE_NAME("vmgenid");
> > +
> > +enum { VMGENID_SIZE = 16 };
> > +
> > +static struct {
> > +       u8 this_id[VMGENID_SIZE];
> > +       u8 *next_id;
> > +} state;
> > +
>
> This state is singular
>
>
> > +static int vmgenid_acpi_add(struct acpi_device *device)
> > +{
>
> ... whereas this may be called for multiple instances of the device.
> This likely makes no sense, so it is better to reject it here.

Like, return early if it's called a second time? I can do that.

>
> > +       struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER };
> > +       union acpi_object *pss;
> > +       phys_addr_t phys_addr;
> > +       acpi_status status;
> > +       int ret = 0;
> > +
> > +       if (!device)
> > +               return -EINVAL;
> > +
> > +       status = acpi_evaluate_object(device->handle, "ADDR", NULL, &buffer);
> > +       if (ACPI_FAILURE(status)) {
> > +               ACPI_EXCEPTION((AE_INFO, status, "Evaluating ADDR"));
> > +               return -ENODEV;
> > +       }
> > +       pss = buffer.pointer;
> > +       if (!pss || pss->type != ACPI_TYPE_PACKAGE || pss->package.count != 2 ||
> > +           pss->package.elements[0].type != ACPI_TYPE_INTEGER ||
> > +           pss->package.elements[1].type != ACPI_TYPE_INTEGER) {
> > +               ret = -EINVAL;
> > +               goto out;
> > +       }
> > +
> > +       phys_addr = (pss->package.elements[0].integer.value << 0) |
> > +                   (pss->package.elements[1].integer.value << 32);
> > +       state.next_id = acpi_os_map_memory(phys_addr, VMGENID_SIZE);
>
> No need to use acpi_os_map_memory() here, plain memremap() should be fine.

As we discussed online, I'll use dev_memremap(), and then get rid of
the `.remove = ` function all together.

> > +               acpi_os_unmap_memory(state.next_id, VMGENID_SIZE);
>
> memunmap() here

And then as discussed, this goes away too.

>
> > +       state.next_id = NULL;
> > +       return 0;
> > +}
> > +
> > +static void vmgenid_acpi_notify(struct acpi_device *device, u32 event)
> > +{
> > +       u8 old_id[VMGENID_SIZE];
> > +
> > +       if (!device || acpi_driver_data(device) != &state)
> > +               return;
> > +       memcpy(old_id, state.this_id, sizeof(old_id));
> > +       memcpy(state.this_id, state.next_id, sizeof(state.this_id));
> > +       if (!memcmp(old_id, state.this_id, sizeof(old_id)))
> > +               return;
>
> Is this little dance really necessary? I.e., can we just do
>
> add_vmfork_randomness(state.next_id, VMGENID_SIZE)
>
> and be done with it?

Yes it is. vmfork induces a "premature-next" which we really should
not do unless it's really necessary. It torches the whole entropy
pool. In the even that this notifier fires but the ID hasn't changed,
we shouldn't touch anything.

Thanks for the review. v+1 coming up shortly.
Jason A. Donenfeld Feb. 25, 2022, 12:01 p.m. UTC | #5
On Fri, Feb 25, 2022 at 12:52 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >  if VIRT_DRIVERS
> > >
> > > +config VMGENID
> > > +       tristate "Virtual Machine Generation ID driver"
> > > +       default y
> >
> > Please make this default m - this code can run as a module and the
> > feature it relies on is discoverable by udev
>
> Or don't supply a default - I don't see why this has any preference.

It's inside of VIRT_DRIVERS. If you enabled VIRT_DRIVERS, you more
than likely want and need this.
diff mbox series

Patch

diff --git a/drivers/virt/Kconfig b/drivers/virt/Kconfig
index 8061e8ef449f..d3276dc2095c 100644
--- a/drivers/virt/Kconfig
+++ b/drivers/virt/Kconfig
@@ -13,6 +13,15 @@  menuconfig VIRT_DRIVERS
 
 if VIRT_DRIVERS
 
+config VMGENID
+	tristate "Virtual Machine Generation ID driver"
+	default y
+	depends on ACPI
+	help
+	  Say Y here to use the hypervisor-provided Virtual Machine Generation ID
+	  to reseed the RNG when the VM is cloned. This is highly recommended if
+	  you intend to do any rollback / cloning / snapshotting of VMs.
+
 config FSL_HV_MANAGER
 	tristate "Freescale hypervisor management driver"
 	depends on FSL_SOC
diff --git a/drivers/virt/Makefile b/drivers/virt/Makefile
index 3e272ea60cd9..108d0ffcc9aa 100644
--- a/drivers/virt/Makefile
+++ b/drivers/virt/Makefile
@@ -4,6 +4,7 @@ 
 #
 
 obj-$(CONFIG_FSL_HV_MANAGER)	+= fsl_hypervisor.o
+obj-$(CONFIG_VMGENID)		+= vmgenid.o
 obj-y				+= vboxguest/
 
 obj-$(CONFIG_NITRO_ENCLAVES)	+= nitro_enclaves/
diff --git a/drivers/virt/vmgenid.c b/drivers/virt/vmgenid.c
new file mode 100644
index 000000000000..5da4dc8f25e3
--- /dev/null
+++ b/drivers/virt/vmgenid.c
@@ -0,0 +1,121 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Virtual Machine Generation ID driver
+ *
+ * Copyright (C) 2022 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ * Copyright (C) 2020 Amazon. All rights reserved.
+ * Copyright (C) 2018 Red Hat Inc. All rights reserved.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/acpi.h>
+#include <linux/random.h>
+
+ACPI_MODULE_NAME("vmgenid");
+
+enum { VMGENID_SIZE = 16 };
+
+static struct {
+	u8 this_id[VMGENID_SIZE];
+	u8 *next_id;
+} state;
+
+static int vmgenid_acpi_add(struct acpi_device *device)
+{
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER };
+	union acpi_object *pss;
+	phys_addr_t phys_addr;
+	acpi_status status;
+	int ret = 0;
+
+	if (!device)
+		return -EINVAL;
+
+	status = acpi_evaluate_object(device->handle, "ADDR", NULL, &buffer);
+	if (ACPI_FAILURE(status)) {
+		ACPI_EXCEPTION((AE_INFO, status, "Evaluating ADDR"));
+		return -ENODEV;
+	}
+	pss = buffer.pointer;
+	if (!pss || pss->type != ACPI_TYPE_PACKAGE || pss->package.count != 2 ||
+	    pss->package.elements[0].type != ACPI_TYPE_INTEGER ||
+	    pss->package.elements[1].type != ACPI_TYPE_INTEGER) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	phys_addr = (pss->package.elements[0].integer.value << 0) |
+		    (pss->package.elements[1].integer.value << 32);
+	state.next_id = acpi_os_map_memory(phys_addr, VMGENID_SIZE);
+	if (!state.next_id) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	device->driver_data = &state;
+
+	memcpy(state.this_id, state.next_id, sizeof(state.this_id));
+	add_device_randomness(state.this_id, sizeof(state.this_id));
+
+out:
+	ACPI_FREE(buffer.pointer);
+	return ret;
+}
+
+static int vmgenid_acpi_remove(struct acpi_device *device)
+{
+	if (!device || acpi_driver_data(device) != &state)
+		return -EINVAL;
+	device->driver_data = NULL;
+	if (state.next_id)
+		acpi_os_unmap_memory(state.next_id, VMGENID_SIZE);
+	state.next_id = NULL;
+	return 0;
+}
+
+static void vmgenid_acpi_notify(struct acpi_device *device, u32 event)
+{
+	u8 old_id[VMGENID_SIZE];
+
+	if (!device || acpi_driver_data(device) != &state)
+		return;
+	memcpy(old_id, state.this_id, sizeof(old_id));
+	memcpy(state.this_id, state.next_id, sizeof(state.this_id));
+	if (!memcmp(old_id, state.this_id, sizeof(old_id)))
+		return;
+	add_vmfork_randomness(state.this_id, sizeof(state.this_id));
+}
+
+static const struct acpi_device_id vmgenid_ids[] = {
+	{"VMGENID", 0},
+	{"QEMUVGID", 0},
+	{ },
+};
+
+static struct acpi_driver acpi_driver = {
+	.name = "vm_generation_id",
+	.ids = vmgenid_ids,
+	.owner = THIS_MODULE,
+	.ops = {
+		.add = vmgenid_acpi_add,
+		.remove = vmgenid_acpi_remove,
+		.notify = vmgenid_acpi_notify,
+	}
+};
+
+static int __init vmgenid_init(void)
+{
+	return acpi_bus_register_driver(&acpi_driver);
+}
+
+static void __exit vmgenid_exit(void)
+{
+	acpi_bus_unregister_driver(&acpi_driver);
+}
+
+module_init(vmgenid_init);
+module_exit(vmgenid_exit);
+
+MODULE_DEVICE_TABLE(acpi, vmgenid_ids);
+MODULE_DESCRIPTION("Virtual Machine Generation ID");
+MODULE_LICENSE("GPL v2");