Message ID | 1254953315-5761-10-git-send-email-glommer@redhat.com |
---|---|
State | Changes Requested |
Headers | show |
Glauber Costa wrote: > This option deprecates --enable-kvm. It is a more flexible option, > that makes use of qemu-opts, and allow us to pass on options to enable or > disable kernel irqchip, for example. > With proper qdev support, you could select kvm device models based on -device so I think this option isn't all that useful. What I'd like to see in the interim is a kvm specific machine type that's defaulted to if kvm is enabled. I think this would be useful not only for enabling things like in-kernel apic, but also for selecting a default cpu model. > Signed-off-by: Glauber Costa <glommer@redhat.com> > --- > kvm-all.c | 1 + > kvm.h | 1 + > qemu-config.c | 16 ++++++++++++++++ > qemu-config.h | 1 + > qemu-options.hx | 13 +++++++++---- > vl.c | 11 +++++++++++ > 6 files changed, 39 insertions(+), 4 deletions(-) > > diff --git a/kvm-all.c b/kvm-all.c > index f33354d..b31d085 100644 > --- a/kvm-all.c > +++ b/kvm-all.c > @@ -51,6 +51,7 @@ typedef struct KVMSlot > typedef struct kvm_dirty_log KVMDirtyLog; > > int kvm_allowed = 0; > +int kvm_use_kernel_chip = 0; > > struct KVMState > { > diff --git a/kvm.h b/kvm.h > index f0c9201..49a2b56 100644 > --- a/kvm.h > +++ b/kvm.h > @@ -20,6 +20,7 @@ > > #ifdef CONFIG_KVM > extern int kvm_allowed; > +extern int kvm_use_kernel_chip; > > #define kvm_enabled() (kvm_allowed) > #else > diff --git a/qemu-config.c b/qemu-config.c > index bafaea2..9461766 100644 > --- a/qemu-config.c > +++ b/qemu-config.c > @@ -184,12 +184,28 @@ QemuOptsList qemu_rtc_opts = { > }, > }; > > +QemuOptsList qemu_kvm_opts = { > + .name = "kvm", > + .head = QTAILQ_HEAD_INITIALIZER(qemu_kvm_opts.head), > + .desc = { > + { > + .name = "irqchip-in-kernel", > + .type = QEMU_OPT_BOOL, > + },{ > + .name = "enabled", > + .type = QEMU_OPT_BOOL, > + }, > + { /* end if list */ } > + }, > +}; > + > static QemuOptsList *lists[] = { > &qemu_drive_opts, > &qemu_chardev_opts, > &qemu_device_opts, > &qemu_net_opts, > &qemu_rtc_opts, > + &qemu_kvm_opts, > NULL, > }; > > diff --git a/qemu-config.h b/qemu-config.h > index cdad5ac..58cead2 100644 > --- a/qemu-config.h > +++ b/qemu-config.h > @@ -6,6 +6,7 @@ extern QemuOptsList qemu_chardev_opts; > extern QemuOptsList qemu_device_opts; > extern QemuOptsList qemu_net_opts; > extern QemuOptsList qemu_rtc_opts; > +extern QemuOptsList qemu_kvm_opts; > > int qemu_set_option(const char *str); > > diff --git a/qemu-options.hx b/qemu-options.hx > index 3dd76b3..1cb8431 100644 > --- a/qemu-options.hx > +++ b/qemu-options.hx > @@ -1431,15 +1431,20 @@ Set the filename for the BIOS. > ETEXI > > #ifdef CONFIG_KVM > -DEF("enable-kvm", 0, QEMU_OPTION_enable_kvm, \ > - "-enable-kvm enable KVM full virtualization support\n") > -#endif > +HXCOMM Options deprecated by -kvm > +DEF("enable-kvm", 0, QEMU_OPTION_enable_kvm, "") > + > +DEF("kvm", HAS_ARG, QEMU_OPTION_kvm, \ > + "-kvm enable=on|off,irqchip-in-kernel=on|off\n" \ > + " enable KVM full virtualization support\n") > STEXI > -@item -enable-kvm > +@item -kvm [enable=on|off][,irqchip-in-kernel=on|off] > Enable KVM full virtualization support. This option is only available > if KVM support is enabled when compiling. > ETEXI > > +#endif > + > #ifdef CONFIG_XEN > DEF("xen-domid", HAS_ARG, QEMU_OPTION_xen_domid, > "-xen-domid id specify xen guest domain id\n") > diff --git a/vl.c b/vl.c > index afe01af..a6f9eb7 100644 > --- a/vl.c > +++ b/vl.c > @@ -5353,6 +5353,17 @@ int main(int argc, char **argv, char **envp) > case QEMU_OPTION_enable_kvm: > kvm_allowed = 1; > break; > + case QEMU_OPTION_kvm: > + > + opts = qemu_opts_parse(&qemu_kvm_opts, optarg, NULL); > + if (!opts) { > + fprintf(stderr, "parse error: %s\n", optarg); > + exit(1); > + } > + > + kvm_allowed = qemu_opt_get_bool(opts, "enabled", 1); > + kvm_use_kernel_chip = qemu_opt_get_bool(opts, "irqchip-in-kernel", 1); > + break; > #endif > case QEMU_OPTION_usb: > usb_enabled = 1; >
On Wed, Oct 07, 2009 at 06:00:34PM -0500, Anthony Liguori wrote: > Glauber Costa wrote: >> This option deprecates --enable-kvm. It is a more flexible option, >> that makes use of qemu-opts, and allow us to pass on options to enable or >> disable kernel irqchip, for example. >> > > With proper qdev support, you could select kvm device models based on > -device so I think this option isn't all that useful. > > What I'd like to see in the interim is a kvm specific machine type > that's defaulted to if kvm is enabled. I think this would be useful not > only for enabling things like in-kernel apic, but also for selecting a > default cpu model. I don't really follow. even if we have qdev on the irq controllers, one could still come up with situations in which we'd like to force the use of one device over another. > >> Signed-off-by: Glauber Costa <glommer@redhat.com> >> --- >> kvm-all.c | 1 + >> kvm.h | 1 + >> qemu-config.c | 16 ++++++++++++++++ >> qemu-config.h | 1 + >> qemu-options.hx | 13 +++++++++---- >> vl.c | 11 +++++++++++ >> 6 files changed, 39 insertions(+), 4 deletions(-) >> >> diff --git a/kvm-all.c b/kvm-all.c >> index f33354d..b31d085 100644 >> --- a/kvm-all.c >> +++ b/kvm-all.c >> @@ -51,6 +51,7 @@ typedef struct KVMSlot >> typedef struct kvm_dirty_log KVMDirtyLog; >> >> int kvm_allowed = 0; >> +int kvm_use_kernel_chip = 0; >> >> struct KVMState >> { >> diff --git a/kvm.h b/kvm.h >> index f0c9201..49a2b56 100644 >> --- a/kvm.h >> +++ b/kvm.h >> @@ -20,6 +20,7 @@ >> >> #ifdef CONFIG_KVM >> extern int kvm_allowed; >> +extern int kvm_use_kernel_chip; >> >> #define kvm_enabled() (kvm_allowed) >> #else >> diff --git a/qemu-config.c b/qemu-config.c >> index bafaea2..9461766 100644 >> --- a/qemu-config.c >> +++ b/qemu-config.c >> @@ -184,12 +184,28 @@ QemuOptsList qemu_rtc_opts = { >> }, >> }; >> >> +QemuOptsList qemu_kvm_opts = { >> + .name = "kvm", >> + .head = QTAILQ_HEAD_INITIALIZER(qemu_kvm_opts.head), >> + .desc = { >> + { >> + .name = "irqchip-in-kernel", >> + .type = QEMU_OPT_BOOL, >> + },{ >> + .name = "enabled", >> + .type = QEMU_OPT_BOOL, >> + }, >> + { /* end if list */ } >> + }, >> +}; >> + >> static QemuOptsList *lists[] = { >> &qemu_drive_opts, >> &qemu_chardev_opts, >> &qemu_device_opts, >> &qemu_net_opts, >> &qemu_rtc_opts, >> + &qemu_kvm_opts, >> NULL, >> }; >> >> diff --git a/qemu-config.h b/qemu-config.h >> index cdad5ac..58cead2 100644 >> --- a/qemu-config.h >> +++ b/qemu-config.h >> @@ -6,6 +6,7 @@ extern QemuOptsList qemu_chardev_opts; >> extern QemuOptsList qemu_device_opts; >> extern QemuOptsList qemu_net_opts; >> extern QemuOptsList qemu_rtc_opts; >> +extern QemuOptsList qemu_kvm_opts; >> >> int qemu_set_option(const char *str); >> >> diff --git a/qemu-options.hx b/qemu-options.hx >> index 3dd76b3..1cb8431 100644 >> --- a/qemu-options.hx >> +++ b/qemu-options.hx >> @@ -1431,15 +1431,20 @@ Set the filename for the BIOS. >> ETEXI >> >> #ifdef CONFIG_KVM >> -DEF("enable-kvm", 0, QEMU_OPTION_enable_kvm, \ >> - "-enable-kvm enable KVM full virtualization support\n") >> -#endif >> +HXCOMM Options deprecated by -kvm >> +DEF("enable-kvm", 0, QEMU_OPTION_enable_kvm, "") >> + >> +DEF("kvm", HAS_ARG, QEMU_OPTION_kvm, \ >> + "-kvm enable=on|off,irqchip-in-kernel=on|off\n" \ >> + " enable KVM full virtualization support\n") >> STEXI >> -@item -enable-kvm >> +@item -kvm [enable=on|off][,irqchip-in-kernel=on|off] >> Enable KVM full virtualization support. This option is only available >> if KVM support is enabled when compiling. >> ETEXI >> >> +#endif >> + >> #ifdef CONFIG_XEN >> DEF("xen-domid", HAS_ARG, QEMU_OPTION_xen_domid, >> "-xen-domid id specify xen guest domain id\n") >> diff --git a/vl.c b/vl.c >> index afe01af..a6f9eb7 100644 >> --- a/vl.c >> +++ b/vl.c >> @@ -5353,6 +5353,17 @@ int main(int argc, char **argv, char **envp) >> case QEMU_OPTION_enable_kvm: >> kvm_allowed = 1; >> break; >> + case QEMU_OPTION_kvm: >> + >> + opts = qemu_opts_parse(&qemu_kvm_opts, optarg, NULL); >> + if (!opts) { >> + fprintf(stderr, "parse error: %s\n", optarg); >> + exit(1); >> + } >> + >> + kvm_allowed = qemu_opt_get_bool(opts, "enabled", 1); >> + kvm_use_kernel_chip = qemu_opt_get_bool(opts, "irqchip-in-kernel", 1); >> + break; >> #endif >> case QEMU_OPTION_usb: >> usb_enabled = 1; >> > > > -- > Regards, > > Anthony Liguori >
Glauber Costa wrote: > On Wed, Oct 07, 2009 at 06:00:34PM -0500, Anthony Liguori wrote: > >> Glauber Costa wrote: >> >>> This option deprecates --enable-kvm. It is a more flexible option, >>> that makes use of qemu-opts, and allow us to pass on options to enable or >>> disable kernel irqchip, for example. >>> >>> >> With proper qdev support, you could select kvm device models based on >> -device so I think this option isn't all that useful. >> >> What I'd like to see in the interim is a kvm specific machine type >> that's defaulted to if kvm is enabled. I think this would be useful not >> only for enabling things like in-kernel apic, but also for selecting a >> default cpu model. >> > I don't really follow. > > even if we have qdev on the irq controllers, one could still come up with > situations in which we'd like to force the use of one device over another. > Right, my assumption is that the devices will have different qdev names and therefore a user can select which one gets used. Right now, -device is just additive. I'm not sure the best way to express something like, replace this existing device with this new device. Maybe some trickery with id=. Gerd, any thoughts? Regards, Anthony Liguori
On 10/08/2009 01:00 AM, Anthony Liguori wrote: > Glauber Costa wrote: >> This option deprecates --enable-kvm. It is a more flexible option, >> that makes use of qemu-opts, and allow us to pass on options to >> enable or >> disable kernel irqchip, for example. > > With proper qdev support, you could select kvm device models based on > -device so I think this option isn't all that useful. qdev (mostly?) is about the guest interface; this is more about the host implementation, so akin to -drive file= and cache=. > What I'd like to see in the interim is a kvm specific machine type > that's defaulted to if kvm is enabled. I think this would be useful > not only for enabling things like in-kernel apic, but also for > selecting a default cpu model. We should make a distinction between guest-visible changes and accelerators like kvm-irqchip and vhost.
Avi Kivity wrote: > On 10/08/2009 01:00 AM, Anthony Liguori wrote: >> Glauber Costa wrote: >>> This option deprecates --enable-kvm. It is a more flexible option, >>> that makes use of qemu-opts, and allow us to pass on options to >>> enable or >>> disable kernel irqchip, for example. >> >> With proper qdev support, you could select kvm device models based on >> -device so I think this option isn't all that useful. > > qdev (mostly?) is about the guest interface; this is more about the > host implementation, so akin to -drive file= and cache=. If we make the kvm device a separate device, then it's about choosing which device to use. Regardless, even if it were a single device, it's still a property of the device so a qdev property would be the logical way to expose it. >> What I'd like to see in the interim is a kvm specific machine type >> that's defaulted to if kvm is enabled. I think this would be useful >> not only for enabling things like in-kernel apic, but also for >> selecting a default cpu model. > > We should make a distinction between guest-visible changes and > accelerators like kvm-irqchip and vhost. They are different device models that happen to implement the same guest-visible device. Regards, Anthony Liguori
On 10/08/2009 04:23 PM, Anthony Liguori wrote: >>> What I'd like to see in the interim is a kvm specific machine type >>> that's defaulted to if kvm is enabled. I think this would be useful >>> not only for enabling things like in-kernel apic, but also for >>> selecting a default cpu model. >> >> We should make a distinction between guest-visible changes and >> accelerators like kvm-irqchip and vhost. > > They are different device models that happen to implement the same > guest-visible device. I think this separation is artificial. From both the guest's view and the user's view, there's no difference between qemu ioapic and kvm ioapic (modulo bugs). To me this indicates they're the same device. We'll have the same problem with vhost-net, only there the duplication will be much greater if we split the implementation.
Avi Kivity wrote: > On 10/08/2009 04:23 PM, Anthony Liguori wrote: >>>> What I'd like to see in the interim is a kvm specific machine type >>>> that's defaulted to if kvm is enabled. I think this would be >>>> useful not only for enabling things like in-kernel apic, but also >>>> for selecting a default cpu model. >>> >>> We should make a distinction between guest-visible changes and >>> accelerators like kvm-irqchip and vhost. >> >> They are different device models that happen to implement the same >> guest-visible device. > > I think this separation is artificial. From both the guest's view and > the user's view, there's no difference between qemu ioapic and kvm > ioapic (modulo bugs). To me this indicates they're the same device. That's not always been true historically. For instance, the in-kernel pit does interrupt catchup whereas the userspace pit does not. > We'll have the same problem with vhost-net, only there the duplication > will be much greater if we split the implementation. I'd rather not treat vhost-net like a device model and instead treat it like a -net backend. If we can bounce requests through userspace (and we can), we should be able to use it for any device model. In the case of virtio-net, we should be able to short-circuit things such that we don't have to go through userspace. It's admittedly very hairy without a point-to-point net abstraction. Regards, Anthony Liguori
On Thu, Oct 08, 2009 at 04:30:17PM +0200, Avi Kivity wrote: > On 10/08/2009 04:23 PM, Anthony Liguori wrote: > >>>What I'd like to see in the interim is a kvm specific machine > >>>type that's defaulted to if kvm is enabled. I think this > >>>would be useful not only for enabling things like in-kernel > >>>apic, but also for selecting a default cpu model. > >> > >>We should make a distinction between guest-visible changes and > >>accelerators like kvm-irqchip and vhost. > > > >They are different device models that happen to implement the same > >guest-visible device. > > I think this separation is artificial. From both the guest's view > and the user's view, there's no difference between qemu ioapic and > kvm ioapic (modulo bugs). To me this indicates they're the same > device. > Same device but different implementation. The only two things that are used from qemu apic implementation with in-kernel apic are migration and reset. Both depends on implementation. Reusing code will require us to keep implementation in sync. This reuse is a constant source of bugs BTW. > We'll have the same problem with vhost-net, only there the > duplication will be much greater if we split the implementation. > > -- > error compiling committee.c: too many arguments to function > > -- Gleb.
On 10/08/2009 04:33 PM, Anthony Liguori wrote: > Avi Kivity wrote: >> On 10/08/2009 04:23 PM, Anthony Liguori wrote: >>>>> What I'd like to see in the interim is a kvm specific machine type >>>>> that's defaulted to if kvm is enabled. I think this would be >>>>> useful not only for enabling things like in-kernel apic, but also >>>>> for selecting a default cpu model. >>>> >>>> We should make a distinction between guest-visible changes and >>>> accelerators like kvm-irqchip and vhost. >>> >>> They are different device models that happen to implement the same >>> guest-visible device. >> >> I think this separation is artificial. From both the guest's view >> and the user's view, there's no difference between qemu ioapic and >> kvm ioapic (modulo bugs). To me this indicates they're the same device. > > That's not always been true historically. For instance, the in-kernel > pit does interrupt catchup whereas the userspace pit does not. That's a bug in the userspace pit :) I'm worried about things like 'info pit' needing dual implementations. >> We'll have the same problem with vhost-net, only there the >> duplication will be much greater if we split the implementation. > > I'd rather not treat vhost-net like a device model and instead treat > it like a -net backend. If we can bounce requests through userspace > (and we can), we should be able to use it for any device model. In > the case of virtio-net, we should be able to short-circuit things such > that we don't have to go through userspace. It's admittedly very > hairy without a point-to-point net abstraction. > Interesting idea. I think it'll be hairy even with a point-to-point net abstraction, but it's worth trying.
Avi Kivity wrote: > That's a bug in the userspace pit :) > > I'm worried about things like 'info pit' needing dual implementations. You mean info qdm? It'll Just Work. >>> We'll have the same problem with vhost-net, only there the >>> duplication will be much greater if we split the implementation. >> >> I'd rather not treat vhost-net like a device model and instead treat >> it like a -net backend. If we can bounce requests through userspace >> (and we can), we should be able to use it for any device model. In >> the case of virtio-net, we should be able to short-circuit things >> such that we don't have to go through userspace. It's admittedly >> very hairy without a point-to-point net abstraction. >> > > Interesting idea. I think it'll be hairy even with a point-to-point > net abstraction, but it's worth trying. Could be interesting trying to tie the e1000 to vhost-net performance wise... Regards, Anthony Liguori
On 10/08/2009 04:56 PM, Anthony Liguori wrote: > Avi Kivity wrote: >> That's a bug in the userspace pit :) >> >> I'm worried about things like 'info pit' needing dual implementations. > > You mean info qdm? > > It'll Just Work. > I mean (qemu) info pit pit: counter 0x1234 period 0x3453 or something. >> Interesting idea. I think it'll be hairy even with a point-to-point >> net abstraction, but it's worth trying. > > Could be interesting trying to tie the e1000 to vhost-net performance > wise... I don't think there's much chance of that - e1000 places important registers (like interrupt disable) in BARs whereas virtio puts them in the ring.
Avi Kivity wrote: > On 10/08/2009 04:56 PM, Anthony Liguori wrote: >> Avi Kivity wrote: >>> That's a bug in the userspace pit :) >>> >>> I'm worried about things like 'info pit' needing dual implementations. >> >> You mean info qdm? >> >> It'll Just Work. >> > > I mean > > (qemu) info pit > pit: counter 0x1234 period 0x3453 > > or something. When the pit is qdev converted, it will have properties. counter and period could be two of those properties in which case, 'info qtree i8254' would show the above output or 'info qtree i8254-kvm' would show it. In fact, a very good reason to have two device models is that there are certain properties that make sense for i8254-kvm that don't make sense for i8254. For instance, you could expose information about interrupt catch up. Regards, Anthony Liguori
>> (qemu) info pit >> pit: counter 0x1234 period 0x3453 >> >> or something. > > When the pit is qdev converted, it will have properties. counter and > period could be two of those properties in which case, 'info qtree > i8254' would show the above output or 'info qtree i8254-kvm' would show it. Device state belongs more into vmstate than into properties. Ideally each qdev device has the vmstate entry filled as well, so looking up the device in the tree then dump the state can be done. cheers, Gerd
On 10/08/09 01:28, Anthony Liguori wrote: > Glauber Costa wrote: >> even if we have qdev on the irq controllers, one could still come up with >> situations in which we'd like to force the use of one device over >> another. > > Right, my assumption is that the devices will have different qdev names > and therefore a user can select which one gets used. > > Right now, -device is just additive. I'm not sure the best way to > express something like, replace this existing device with this new > device. Maybe some trickery with id=. Gerd, any thoughts? -M $machine gives you a barebone machine with all core devices which belong to it. You can't easily remove and/or replace devices. Especially not something central as the IRQ controller. But also no other core components, i.e. you wouldn't stick a piix4 ide controller into a Q35 machine. Just say 'no'. That leaves two options: (1) create two devices, create new machines which use the kvm versions (aka -M pc-kvm). (2) make using the in-kernel kvm code a device property. I think (2) is the way to go. Especially with multiple devices having kvm support (apic, pit, more?) (1) becomes unmanageable. Right now there is no way to set properties for devices *not* added via -device[1]. We'll need support for that anyway though (to set rtc properties for example), so we can have a apic.kvm property and switch between user/kernel implementions using it. cheers, Gerd [1] my idea for implementing that is extending the compat property mechanism.
Gerd Hoffmann wrote: > On 10/08/09 01:28, Anthony Liguori wrote: >> Glauber Costa wrote: >>> even if we have qdev on the irq controllers, one could still come up >>> with >>> situations in which we'd like to force the use of one device over >>> another. >> >> Right, my assumption is that the devices will have different qdev names >> and therefore a user can select which one gets used. >> >> Right now, -device is just additive. I'm not sure the best way to >> express something like, replace this existing device with this new >> device. Maybe some trickery with id=. Gerd, any thoughts? > > -M $machine gives you a barebone machine with all core devices which > belong to it. You can't easily remove and/or replace devices. > Especially not something central as the IRQ controller. But also no > other core components, i.e. you wouldn't stick a piix4 ide controller > into a Q35 machine. Just say 'no'. This seems fundamentally flawed to me. If you cannot remove a device from a machine using command line options, then how do we support something like -net none? Do we make the default machine not contain a nic? The value of a machine type to a user is that it presents a useful machine--not a barebones machine. A user should not have to think about which type of nic they need or whether they want to enable usb. > > That leaves two options: > > (1) create two devices, create new machines which use the kvm > versions (aka -M pc-kvm). > (2) make using the in-kernel kvm code a device property. (3) Add the ability to remove device from a machine type.
On 10/12/09 16:04, Anthony Liguori wrote: > Gerd Hoffmann wrote: >> -M $machine gives you a barebone machine with all core devices which >> belong to it. You can't easily remove and/or replace devices. >> Especially not something central as the IRQ controller. But also no >> other core components, i.e. you wouldn't stick a piix4 ide controller >> into a Q35 machine. Just say 'no'. > > This seems fundamentally flawed to me. If you cannot remove a device > from a machine using command line options, then how do we support > something like -net none? '-net none' does not remove a nic. It makes qemu not add the default nic. > Do we make the default machine not contain a nic? > > The value of a machine type to a user is that it presents a useful > machine--not a barebones machine. A user should not have to think about > which type of nic they need or whether they want to enable usb. There are a bunch of places where qemu does something like if (user-did-not-specify-a-$foo-device) { add_$foo_device_with_defaults() } That applies (for pc) to: * nic. * vga. * serial port. * parallel port. * cdrom drive. For all of these (except cdrom) you can disable the creation of the default device via "-$dev none". I don't consider them core devices. Core devices are the ones which a machine can't live without, i.e. rtc, pic, ... You can't add/remove core stuff like rtc, interrupt controller. The virtual machine will simply not work then ... >> (1) create two devices, create new machines which use the kvm >> versions (aka -M pc-kvm). >> (2) make using the in-kernel kvm code a device property. > > (3) Add the ability to remove device from a machine type. -rtc none ? Have fun booting your machine. cheers, Gerd
Gerd Hoffmann wrote: > On 10/12/09 16:04, Anthony Liguori wrote: >> Gerd Hoffmann wrote: >>> -M $machine gives you a barebone machine with all core devices which >>> belong to it. You can't easily remove and/or replace devices. >>> Especially not something central as the IRQ controller. But also no >>> other core components, i.e. you wouldn't stick a piix4 ide controller >>> into a Q35 machine. Just say 'no'. >> >> This seems fundamentally flawed to me. If you cannot remove a device >> from a machine using command line options, then how do we support >> something like -net none? > > '-net none' does not remove a nic. It makes qemu not add the default > nic. qemu should not have the concept of a default nic. Instead, if you choose the pc machine type, by default you get an e1000. I think it's also reasonable to get a USB controller too along with the balloon driver and anything else we think a user could benefit from. If you give devices well known ids, then changing a default device is not really that big of a deal. It involves removing the device and adding a new device with that same device id. For platform devices, like the interrupt controller/pic, the same principle could be applied to switch out a userspace irqchip/pit with the kvm kernel implementations.
On 10/12/09 18:31, Anthony Liguori wrote: > Gerd Hoffmann wrote: >> On 10/12/09 16:04, Anthony Liguori wrote: >>> Gerd Hoffmann wrote: >>>> -M $machine gives you a barebone machine with all core devices which >>>> belong to it. You can't easily remove and/or replace devices. >>>> Especially not something central as the IRQ controller. But also no >>>> other core components, i.e. you wouldn't stick a piix4 ide controller >>>> into a Q35 machine. Just say 'no'. >>> >>> This seems fundamentally flawed to me. If you cannot remove a device >>> from a machine using command line options, then how do we support >>> something like -net none? >> >> '-net none' does not remove a nic. It makes qemu not add the default nic. > > qemu should not have the concept of a default nic. Well, right now it has (likewise for serial, vga, ...). And it causes problems with the qdev way of managing devices, so we have to find a way to deal with it. Right now the default devices are tied to a command line switch, i.e. in case there isn't a -serial switch specified qemu will add a default serial device, even in case one was added via -device isa-serial. > Instead, if you > choose the pc machine type, by default you get an e1000. I think it's > also reasonable to get a USB controller too along with the balloon > driver and anything else we think a user could benefit from. Hmm. It makes sense to have a usable default or example configuration. I don't think the machine type should be that though. IMO the machine type should be more like a chipset, i.e. the current pc type should include all core stuff (pit, pic, apic, rtc, ...) and the piix devices (host bridge, isa bridge, apci controller, ide controller, usb controller) but nothing else. > If you give devices well known ids, then changing a default device is > not really that big of a deal. It involves removing the device and > adding a new device with that same device id. Might be workable for anything added via -device, because you could do the operations in QemuOpts space, before actually creating qdev device instances. And the user could switch the nic type via '-set device.defaultnic.driver=rtl8139' then. I still don't like the concept though. Configuring a second nic would be different from configuring the first nic, because for the first you'll modify the default device, the second is added instead. libvirt folks will hate us for doing this. > For platform devices, like the interrupt controller/pic, the same > principle could be applied to switch out a userspace irqchip/pit with > the kvm kernel implementations. Doesn't fly. You can't simply add interrupt controllers via -device. They are tied way to much with the other core devices. cheers, Gerd
Gerd Hoffmann wrote: >> qemu should not have the concept of a default nic. > > Well, right now it has (likewise for serial, vga, ...). And it causes > problems with the qdev way of managing devices, so we have to find a > way to deal with it. There are a few separate issues that should be treated differently. For vga, it should just be a default device in the machine description. A special id should be used for the default vga card and -vga std should have the effect of modifying the a vga device of that id to be std. IOW, it should be -device-remove id=default_vga -device stdvga,id=default_vga. For serial, we default to having one port. Again, it should have a well known id and -serial none should be -device-remove id=default_serial. For nics, it's a bit complicated by the fact that -net nic doesn't require a model and by default, each machine may have a default model. We will likely have to preserve this as a config option which is unfortunate. That said, without -net none, we get a single network device. Again, it should have a well known id and -net none should remove that device. > Right now the default devices are tied to a command line switch, i.e. > in case there isn't a -serial switch specified qemu will add a default > serial device, even in case one was added via -device isa-serial. I don't think -device has to have the same property of -serial. That is, -device isa-serial doesn't need to replace the default serial device which is connected to a vc. Rather, we should allow a user to modify the char device associated with that default serial device via -set. We shouldn't think of it like we add a default -serial device if no -serial switch was used. Instead, there has always been a default isa-serial device. You can modify it via -set or you can remove it. -serial none removes it. For the first occurrence of -serial, it behaves like -set. Future occurrences of -set behave like -device isa-serial.
Gerd Hoffmann wrote: >> For platform devices, like the interrupt controller/pic, the same >> principle could be applied to switch out a userspace irqchip/pit with >> the kvm kernel implementations. > > Doesn't fly. You can't simply add interrupt controllers via -device. > They are tied way to much with the other core devices. We definitely want a pc machine type and a kvm-pc machine type. The later would change the default cpu to kvm64 and would use the in-kernel apic. If we want to support non in-kernel apic for debugging, I'd suggest that we support it by introducing a third machine type: kvm-pc-debug. When we complete the qdev conversion of pc, different machine types are just config files so it seems like a reasonable thing to do. > cheers, > Gerd
Anthony Liguori <aliguori@us.ibm.com> writes: > Gerd Hoffmann wrote: >>> qemu should not have the concept of a default nic. >> >> Well, right now it has (likewise for serial, vga, ...). And it >> causes problems with the qdev way of managing devices, so we have to >> find a way to deal with it. > > There are a few separate issues that should be treated differently. > For vga, it should just be a default device in the machine > description. A special id should be used for the default vga card and > -vga std should have the effect of modifying the a vga device of that > id to be std. IOW, it should be -device-remove id=default_vga -device > stdvga,id=default_vga. > > For serial, we default to having one port. Again, it should have a > well known id and -serial none should be -device-remove > id=default_serial. For nics, it's a bit complicated by the fact that > -net nic doesn't require a model and by default, each machine may have > a default model. We will likely have to preserve this as a config > option which is unfortunate. > > That said, without -net none, we get a single network device. Again, > it should have a well known id and -net none should remove that > device. >> Right now the default devices are tied to a command line switch, >> i.e. in case there isn't a -serial switch specified qemu will add a >> default serial device, even in case one was added via -device >> isa-serial. > > I don't think -device has to have the same property of -serial. That > is, -device isa-serial doesn't need to replace the default serial > device which is connected to a vc. Rather, we should allow a user to > modify the char device associated with that default serial device via > -set. > > We shouldn't think of it like we add a default -serial device if no > -serial switch was used. Instead, there has always been a default > isa-serial device. You can modify it via -set or you can remove it. > -serial none removes it. For the first occurrence of -serial, it > behaves like -set. Future occurrences of -set behave like -device > isa-serial. The problem with that approach is in a part of Gerd's reply you snipped: >> I still don't like the concept though. Configuring a second nic would >> be different from configuring the first nic, because for the first >> you'll modify the default device, the second is added instead. >> libvirt folks will hate us for doing this. Having to use -set for configuring the first device of a kind, and -device for the second is a bad user interface. It's made worse by the fact that you need -set only for some kinds of devices, namely the kinds where QEMU provides a default. I agree with Gerd that we should distinguish between required and optional devices. It's fine to require -set for modifying required devices like RTC. But when I configure my first and second NIC, I don't really want to know that I'm actually modifying the first NIC and adding the second NIC. What about this: give the user a default FOO (for FOO in serial, NIC, ...) if he didn't configure one (no matter how, be it -device or some legacy option to ask for FOOs). This way, you ask for your first FOO exactly like any other: -device. Extra points for providing a way to say "do not give me any optional devices I didn't explicitely ask for".
> >>> I still don't like the concept though. Configuring a second nic would >>> be different from configuring the first nic, because for the first >>> you'll modify the default device, the second is added instead. >>> libvirt folks will hate us for doing this. >>> > > Having to use -set for configuring the first device of a kind, and > -device for the second is a bad user interface. It's made worse by the > fact that you need -set only for some kinds of devices, namely the kinds > where QEMU provides a default. > The guest already has a nic, if you want to modify an existing nic, then you use -set. If you want to add a second nic, you use -device. This isn't two difference behaviors. > I agree with Gerd that we should distinguish between required and > optional devices. It's fine to require -set for modifying required > devices like RTC. But when I configure my first and second NIC, I don't > really want to know that I'm actually modifying the first NIC and adding > the second NIC. > I think what you're arguing for is a truly bare bones machine type where there are no assumptions about having a first nic. But let's look at this from a much higher level. What is a user typically going to want to do? They're probably going to want to change the networking settings globally. They'll either want to always use tap and use the default network device or they're going to want to always have two nics because they have to physical devices with separate networks. A user would either create a new machine type to use for all of their machines, or they would modify a global host config to change from slirp to tap. It shouldn't be the common case that they are manually specifying -device command line options. -device is pretty obtuse and really is an expert option for things like libvirt and as a placeholder for a proper config. > What about this: give the user a default FOO (for FOO in serial, NIC, > ...) if he didn't configure one (no matter how, be it -device or some > legacy option to ask for FOOs). This way, you ask for your first FOO > exactly like any other: -device. > You cannot express the concept "give a user FOO if they didn't ask for one" in a machine config file. That forces machine information to be baked into qemu which would be unfortunate. It suggests that you need another mechanism to configure what the type of these defaults are. Regards, Anthony Liguori
Anthony Liguori wrote: > We definitely want a pc machine type and a kvm-pc machine type. The > later would change the default cpu to kvm64 and would use the in-kernel > apic. kvm32-pc for 32-bit hosts? -- Jamie
Hi, > I think what you're arguing for is a truly bare bones machine type where > there are no assumptions about having a first nic. > > But let's look at this from a much higher level. What is a user > typically going to want to do? They're probably going to want to change > the networking settings globally. They'll either want to always use tap > and use the default network device or they're going to want to always > have two nics because they have to physical devices with separate networks. > > A user would either create a new machine type to use for all of their > machines, or they would modify a global host config to change from slirp > to tap. It shouldn't be the common case that they are manually > specifying -device command line options. -device is pretty obtuse and > really is an expert option for things like libvirt and as a placeholder > for a proper config. Time to polish and post my config file patches for discussion ... I expect we'll have *two* kinds of config files: #1 describes a machine type, i.e. -M $machine with only the really essential hardware. Users should not have to deal with this at all. Something like the dtc bits from pbrook. #2 describes the configuration of a virtual machine, i.e. what additional devices are plugged in, virtual disks, network setup, ... I have patches for #2 which essentially read/write QemuOpts to a git-style config file. We could simply ship a type #2 default config file with nic, vga, serial etc. Or two, with the second being the -nographic variant. cheers, Gerd
diff --git a/kvm-all.c b/kvm-all.c index f33354d..b31d085 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -51,6 +51,7 @@ typedef struct KVMSlot typedef struct kvm_dirty_log KVMDirtyLog; int kvm_allowed = 0; +int kvm_use_kernel_chip = 0; struct KVMState { diff --git a/kvm.h b/kvm.h index f0c9201..49a2b56 100644 --- a/kvm.h +++ b/kvm.h @@ -20,6 +20,7 @@ #ifdef CONFIG_KVM extern int kvm_allowed; +extern int kvm_use_kernel_chip; #define kvm_enabled() (kvm_allowed) #else diff --git a/qemu-config.c b/qemu-config.c index bafaea2..9461766 100644 --- a/qemu-config.c +++ b/qemu-config.c @@ -184,12 +184,28 @@ QemuOptsList qemu_rtc_opts = { }, }; +QemuOptsList qemu_kvm_opts = { + .name = "kvm", + .head = QTAILQ_HEAD_INITIALIZER(qemu_kvm_opts.head), + .desc = { + { + .name = "irqchip-in-kernel", + .type = QEMU_OPT_BOOL, + },{ + .name = "enabled", + .type = QEMU_OPT_BOOL, + }, + { /* end if list */ } + }, +}; + static QemuOptsList *lists[] = { &qemu_drive_opts, &qemu_chardev_opts, &qemu_device_opts, &qemu_net_opts, &qemu_rtc_opts, + &qemu_kvm_opts, NULL, }; diff --git a/qemu-config.h b/qemu-config.h index cdad5ac..58cead2 100644 --- a/qemu-config.h +++ b/qemu-config.h @@ -6,6 +6,7 @@ extern QemuOptsList qemu_chardev_opts; extern QemuOptsList qemu_device_opts; extern QemuOptsList qemu_net_opts; extern QemuOptsList qemu_rtc_opts; +extern QemuOptsList qemu_kvm_opts; int qemu_set_option(const char *str); diff --git a/qemu-options.hx b/qemu-options.hx index 3dd76b3..1cb8431 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -1431,15 +1431,20 @@ Set the filename for the BIOS. ETEXI #ifdef CONFIG_KVM -DEF("enable-kvm", 0, QEMU_OPTION_enable_kvm, \ - "-enable-kvm enable KVM full virtualization support\n") -#endif +HXCOMM Options deprecated by -kvm +DEF("enable-kvm", 0, QEMU_OPTION_enable_kvm, "") + +DEF("kvm", HAS_ARG, QEMU_OPTION_kvm, \ + "-kvm enable=on|off,irqchip-in-kernel=on|off\n" \ + " enable KVM full virtualization support\n") STEXI -@item -enable-kvm +@item -kvm [enable=on|off][,irqchip-in-kernel=on|off] Enable KVM full virtualization support. This option is only available if KVM support is enabled when compiling. ETEXI +#endif + #ifdef CONFIG_XEN DEF("xen-domid", HAS_ARG, QEMU_OPTION_xen_domid, "-xen-domid id specify xen guest domain id\n") diff --git a/vl.c b/vl.c index afe01af..a6f9eb7 100644 --- a/vl.c +++ b/vl.c @@ -5353,6 +5353,17 @@ int main(int argc, char **argv, char **envp) case QEMU_OPTION_enable_kvm: kvm_allowed = 1; break; + case QEMU_OPTION_kvm: + + opts = qemu_opts_parse(&qemu_kvm_opts, optarg, NULL); + if (!opts) { + fprintf(stderr, "parse error: %s\n", optarg); + exit(1); + } + + kvm_allowed = qemu_opt_get_bool(opts, "enabled", 1); + kvm_use_kernel_chip = qemu_opt_get_bool(opts, "irqchip-in-kernel", 1); + break; #endif case QEMU_OPTION_usb: usb_enabled = 1;
This option deprecates --enable-kvm. It is a more flexible option, that makes use of qemu-opts, and allow us to pass on options to enable or disable kernel irqchip, for example. Signed-off-by: Glauber Costa <glommer@redhat.com> --- kvm-all.c | 1 + kvm.h | 1 + qemu-config.c | 16 ++++++++++++++++ qemu-config.h | 1 + qemu-options.hx | 13 +++++++++---- vl.c | 11 +++++++++++ 6 files changed, 39 insertions(+), 4 deletions(-)