Message ID | 1361284822.13482.209.camel@i7.infradead.org |
---|---|
State | New |
Headers | show |
Il 19/02/2013 15:40, David Woodhouse ha scritto: > On Tue, 2013-02-19 at 13:31 +0100, Paolo Bonzini wrote: >> So you can make the hard reset line a qemu_irq (output in PIIX, input in >> i440FX) using qdev_init_gpio_in/out. The input side in the i440FX then >> can reset the PAM registers while the output side can pulse it before >> calling qemu_system_reset_request() if RCR bit 1 is set. Then you can >> connect it using qdev_connect_gpio_out/qdev_get_gpio_in in >> i440fx_common_init. > > Like this? Exactly. > I've still left i440fx_reset() hooked up as dc->reset, and > conditionally do the full reset based on a flag in the state. If I > actually *do* the reset directly from i440fx_handle_reset() rather than > just setting the flag there, it might happen too early? I think it'd be okay, but without actually tracing what happens I agree that it's safer this way. Paolo > diff --git a/hw/piix_pci.c b/hw/piix_pci.c > index 6c77e49..0c8f613 100644 > --- a/hw/piix_pci.c > +++ b/hw/piix_pci.c > @@ -71,6 +71,7 @@ typedef struct PIIX3State { > uint64_t pic_levels; > > qemu_irq *pic; > + qemu_irq reset_out; > > /* This member isn't used. Just for save/load compatibility */ > int32_t pci_irq_levels_vmstate[PIIX_NUM_PIRQS]; > @@ -92,6 +93,7 @@ struct PCII440FXState { > PAMMemoryRegion pam_regions[13]; > MemoryRegion smram_region; > uint8_t smm_enabled; > + uint8_t hard_reset; > }; > > > @@ -171,6 +173,42 @@ static int i440fx_load_old(QEMUFile* f, void *opaque, int version_id) > return 0; > } > > +static void i440fx_reset(DeviceState *ds) > +{ > + PCIDevice *dev = DO_UPCAST(PCIDevice, qdev, ds); > + PCII440FXState *d = DO_UPCAST(PCII440FXState, dev, dev); > + uint8_t *pci_conf = d->dev.config; > + > + /* We only reset the PAM configuration if it was a *hard* reset, > + * triggered from the RCR on the PIIX3 (or from the TRCR on the > + * i440FX itself, but we don't implement that yet and software > + * is advised not to use it when there's a PIIX). */ > + if (!d->hard_reset) > + return; > + > + d->hard_reset = 0; > + > + pci_conf[0x59] = 0x00; // Reset PAM setup > + pci_conf[0x5a] = 0x00; > + pci_conf[0x5b] = 0x00; > + pci_conf[0x5c] = 0x00; > + pci_conf[0x5d] = 0x00; > + pci_conf[0x5e] = 0x00; > + pci_conf[0x5f] = 0x00; > + pci_conf[0x72] = 0x02; // And SMM > + > + i440fx_update_memory_mappings(d); > +} > + > +static void i440fx_handle_reset(void *opaque, int n, int level) > +{ > + PCIDevice *dev = DO_UPCAST(PCIDevice, qdev, opaque); > + PCII440FXState *d = DO_UPCAST(PCII440FXState, dev, dev); > + > + if (level) > + d->hard_reset = 1; > +} > + > static int i440fx_post_load(void *opaque, int version_id) > { > PCII440FXState *d = opaque; > @@ -216,6 +254,8 @@ static int i440fx_initfn(PCIDevice *dev) > > d->dev.config[I440FX_SMRAM] = 0x02; > > + qdev_init_gpio_in(&d->dev.qdev, i440fx_handle_reset, 1); > + > cpu_smm_register(&i440fx_set_smm, d); > return 0; > } > @@ -297,6 +337,8 @@ static PCIBus *i440fx_common_init(const char *device_name, > pci_bus_set_route_irq_fn(b, piix3_route_intx_pin_to_irq); > } > piix3->pic = pic; > + qdev_connect_gpio_out(&piix3->dev.qdev, 0, > + qdev_get_gpio_in(&f->dev.qdev, 0)); > *isa_bus = DO_UPCAST(ISABus, qbus, > qdev_get_child_bus(&piix3->dev.qdev, "isa.0")); > > @@ -521,6 +563,8 @@ static void rcr_write(void *opaque, hwaddr addr, uint64_t val, unsigned len) > PIIX3State *d = opaque; > > if (val & 4) { > + if (val & 2) > + qemu_irq_pulse(d->reset_out); > qemu_system_reset_request(); > return; > } > @@ -550,6 +594,7 @@ static int piix3_initfn(PCIDevice *dev) > memory_region_add_subregion_overlap(pci_address_space_io(dev), RCR_IOPORT, > &d->rcr_mem, 1); > > + qdev_init_gpio_out(&dev->qdev, &d->reset_out, 1); > qemu_register_reset(piix3_reset, d); > return 0; > } > @@ -615,6 +660,7 @@ static void i440fx_class_init(ObjectClass *klass, void *data) > dc->desc = "Host bridge"; > dc->no_user = 1; > dc->vmsd = &vmstate_i440fx; > + dc->reset = i440fx_reset; > } > > static const TypeInfo i440fx_info = { > > > >
On 02/19/13 15:40, David Woodhouse wrote: > On Tue, 2013-02-19 at 13:31 +0100, Paolo Bonzini wrote: >> So you can make the hard reset line a qemu_irq (output in PIIX, input in >> i440FX) using qdev_init_gpio_in/out. The input side in the i440FX then >> can reset the PAM registers while the output side can pulse it before >> calling qemu_system_reset_request() if RCR bit 1 is set. Then you can >> connect it using qdev_connect_gpio_out/qdev_get_gpio_in in >> i440fx_common_init. > > Like this? I've still left i440fx_reset() hooked up as dc->reset, and > conditionally do the full reset based on a flag in the state. If I > actually *do* the reset directly from i440fx_handle_reset() rather than > just setting the flag there, it might happen too early? > > @@ -521,6 +563,8 @@ static void rcr_write(void *opaque, hwaddr addr, uint64_t val, unsigned len) > PIIX3State *d = opaque; > > if (val & 4) { > + if (val & 2) > + qemu_irq_pulse(d->reset_out); > qemu_system_reset_request(); > return; > } We can't start to savevm between qemu_irq_pulse() and qemu_system_reset_request(); OK. I wonder though if we can enter do_savevm() after rcr_write() returns, but before i440fx_reset() -- the registered reset handler -- is called. Consider a monitor sitting on stdio. First we create the chardev: main() [vl.c] chardev_init_func() qemu_chr_new_from_opts() [qemu-char.c] qemu_chr_open_stdio() via funcptr qemu_chr_open_fd() registers "fd_chr_update_read_handler" Then we hook it to the monitor: main() [vl.c] mon_init_func() qemu_chr_find() [qemu-char.c] monitor_init() [monitor.c] qemu_chr_add_handlers() [qemu-char.c] fd_chr_update_read_handler() via earlier registration qemu_set_fd_handler2() [iohandler.c] append to global list "io_handlers" Then, CPU thread: qemu_kvm_cpu_thread_fn() [cpus.c] kvm_cpu_exec() [kvm-all.c] qemu_mutex_lock_iothread() kvm_handle_io() ... rcr_write() ... loops back qemu_mutex_unlock_iothread() UNLOCK IO thread (with kvm_enabled()==true): main_loop() [vl.c] main_loop_wait(nonblocking = false) [main-loop.c] os_host_main_loop_wait(timeout > 0) qemu_mutex_unlock_iothread() select() qemu_mutex_lock_iothread() LOCK glib_select_poll() qemu_iohandler_poll() [iohandler.c] scans global "io_handlers" list, monitor fd ready monitor_read() / monitor_control_read() handle_user_command() / handle_qmp_command() do_savevm() [savevm.c] via funcptr Apparently, in theory at least, the guest can trap to rcr_write() and set "PCII440FXState.hard_reset", then immediately qemu can save a VM snapshot. If we restore this snapshot, instead of the hard reset we'll do a soft reset. Consider adding "vmstate_i440fx" to "vmstate_i440fx", perhaps? :) Laszlo
On 02/19/13 18:51, Laszlo Ersek wrote: > Apparently, in theory at least, the guest can trap to rcr_write() and > set "PCII440FXState.hard_reset", then immediately qemu can save a VM > snapshot. If we restore this snapshot, instead of the hard reset we'll > do a soft reset. > > Consider adding "vmstate_i440fx" to "vmstate_i440fx", perhaps? :) Aargh. "adding hard_reset to vmstate_i440fx" of course. L.
David Woodhouse <dwmw2@infradead.org> writes: > On Tue, 2013-02-19 at 13:31 +0100, Paolo Bonzini wrote: >> So you can make the hard reset line a qemu_irq (output in PIIX, input in >> i440FX) using qdev_init_gpio_in/out. The input side in the i440FX then >> can reset the PAM registers while the output side can pulse it before >> calling qemu_system_reset_request() if RCR bit 1 is set. Then you can >> connect it using qdev_connect_gpio_out/qdev_get_gpio_in in >> i440fx_common_init. > > Like this? I've still left i440fx_reset() hooked up as dc->reset, and > conditionally do the full reset based on a flag in the state. If I > actually *do* the reset directly from i440fx_handle_reset() rather than > just setting the flag there, it might happen too early? > > diff --git a/hw/piix_pci.c b/hw/piix_pci.c > index 6c77e49..0c8f613 100644 > --- a/hw/piix_pci.c > +++ b/hw/piix_pci.c > @@ -71,6 +71,7 @@ typedef struct PIIX3State { > uint64_t pic_levels; > > qemu_irq *pic; > + qemu_irq reset_out; > > /* This member isn't used. Just for save/load compatibility */ > int32_t pci_irq_levels_vmstate[PIIX_NUM_PIRQS]; > @@ -92,6 +93,7 @@ struct PCII440FXState { > PAMMemoryRegion pam_regions[13]; > MemoryRegion smram_region; > uint8_t smm_enabled; > + uint8_t hard_reset; > }; > > > @@ -171,6 +173,42 @@ static int i440fx_load_old(QEMUFile* f, void *opaque, int version_id) > return 0; > } > > +static void i440fx_reset(DeviceState *ds) > +{ > + PCIDevice *dev = DO_UPCAST(PCIDevice, qdev, ds); > + PCII440FXState *d = DO_UPCAST(PCII440FXState, dev, dev); > + uint8_t *pci_conf = d->dev.config; > + > + /* We only reset the PAM configuration if it was a *hard* reset, > + * triggered from the RCR on the PIIX3 (or from the TRCR on the > + * i440FX itself, but we don't implement that yet and software > + * is advised not to use it when there's a PIIX). */ > + if (!d->hard_reset) > + return; > + > + d->hard_reset = 0; > + > + pci_conf[0x59] = 0x00; // Reset PAM setup > + pci_conf[0x5a] = 0x00; > + pci_conf[0x5b] = 0x00; > + pci_conf[0x5c] = 0x00; > + pci_conf[0x5d] = 0x00; > + pci_conf[0x5e] = 0x00; > + pci_conf[0x5f] = 0x00; > + pci_conf[0x72] = 0x02; // And SMM > + > + i440fx_update_memory_mappings(d); > +} > + > +static void i440fx_handle_reset(void *opaque, int n, int level) > +{ > + PCIDevice *dev = DO_UPCAST(PCIDevice, qdev, opaque); > + PCII440FXState *d = DO_UPCAST(PCII440FXState, dev, dev); > + > + if (level) > + d->hard_reset = 1; > +} > + > static int i440fx_post_load(void *opaque, int version_id) > { > PCII440FXState *d = opaque; > @@ -216,6 +254,8 @@ static int i440fx_initfn(PCIDevice *dev) > > d->dev.config[I440FX_SMRAM] = 0x02; > > + qdev_init_gpio_in(&d->dev.qdev, i440fx_handle_reset, 1); > + > cpu_smm_register(&i440fx_set_smm, d); > return 0; > } > @@ -297,6 +337,8 @@ static PCIBus *i440fx_common_init(const char *device_name, > pci_bus_set_route_irq_fn(b, piix3_route_intx_pin_to_irq); > } > piix3->pic = pic; > + qdev_connect_gpio_out(&piix3->dev.qdev, 0, > + qdev_get_gpio_in(&f->dev.qdev, 0)); > *isa_bus = DO_UPCAST(ISABus, qbus, > qdev_get_child_bus(&piix3->dev.qdev, "isa.0")); > > @@ -521,6 +563,8 @@ static void rcr_write(void *opaque, hwaddr addr, uint64_t val, unsigned len) > PIIX3State *d = opaque; > > if (val & 4) { > + if (val & 2) > + qemu_irq_pulse(d->reset_out); > qemu_system_reset_request(); This is a bit strange to me. From the spec: "Bits 1 and 2 in this register are used by PIIX4 to generate a hard reset or a soft reset. During a hard reset, PIIX4 asserts CPURST, PCIRST#, and RSTDRV, as well as reset its core and suspend well logic. During a soft reset, PIIX4 asserts INIT." Bit 2 Reset CPU (RCPU). This bit is used to initiate (transitions from 0 to 1) a hard reset (bit 1 in this register is set to 1) or a soft reset to the CPU. PIIX4 will also initiate a hard reset when PWROK is asserted. This bit cannot be read as a 1. 1 System Reset (SRST). This bit is used to select the type of reset generated when bit 2 in this register is set to 1. When SRST=1, PIIX4 initiates a hard reset to the CPU when bit 2 in this register transitions from 0 to 1. When SRST=0, PIIX4 initiates a soft reset when bit 2 in this register transitions from 0 to 1. Now this maps to two signals: INIT# and CPURST#. PCIRST# is strictly about resetting the PCI bus. PCIRST# is only asserted during hard reset. So should we even be resetting anything other than the CPU during soft reset? In the very least, shouldn't we expose qemu_irqs from the PIIX and let the i440fx decide what to do with them? In this case, it would be an INIT# and CPURST# qemu_irq corresponding to soft and hard reset respectively. Regards, Anthony Liguori > return; > } > @@ -550,6 +594,7 @@ static int piix3_initfn(PCIDevice *dev) > memory_region_add_subregion_overlap(pci_address_space_io(dev), RCR_IOPORT, > &d->rcr_mem, 1); > > + qdev_init_gpio_out(&dev->qdev, &d->reset_out, 1); > qemu_register_reset(piix3_reset, d); > return 0; > } > @@ -615,6 +660,7 @@ static void i440fx_class_init(ObjectClass *klass, void *data) > dc->desc = "Host bridge"; > dc->no_user = 1; > dc->vmsd = &vmstate_i440fx; > + dc->reset = i440fx_reset; > } > > static const TypeInfo i440fx_info = { > > > > > -- > dwmw2
Il 19/02/2013 21:29, Anthony Liguori ha scritto: > In the very least, shouldn't we expose qemu_irqs from the PIIX and let > the i440fx decide what to do with them? In this case, it would be an > INIT# and CPURST# qemu_irq corresponding to soft and hard reset > respectively. Yes, see the other patch that David posted. It only exposes PCIRST# but it's a start. Paolo
On Tue, 2013-02-19 at 14:29 -0600, Anthony Liguori wrote: > David Woodhouse <dwmw2@infradead.org> writes: > > if (val & 4) { > > + if (val & 2) > > + qemu_irq_pulse(d->reset_out); > > qemu_system_reset_request(); > > > This is a bit strange to me. The reset_out "IRQ" isn't actually what triggers the I440FX/PAM reset. That just sets a flag to say that the coming *system* reset is a hard reset and not a soft reset. I did comment about the possibility of doing the reset directly from the qemu_irq handler, and why I hadn't done it that way... > So should we even be resetting anything other than the CPU during soft > reset? I suspect not. A soft reset triggered by the RCR, keyboard controller, port 92 etc. should all just reset the CPU and nothing else. > In the very least, shouldn't we expose qemu_irqs from the PIIX and let > the i440fx decide what to do with them? In this case, it would be an > INIT# and CPURST# qemu_irq corresponding to soft and hard reset > respectively. How far down this road do we go? Do we end up wiring up the full reset topology and abandoning the special-case qemu_system_reset() altogether?
David Woodhouse <dwmw2@infradead.org> writes: > On Tue, 2013-02-19 at 14:29 -0600, Anthony Liguori wrote: >> David Woodhouse <dwmw2@infradead.org> writes: >> > if (val & 4) { >> > + if (val & 2) >> > + qemu_irq_pulse(d->reset_out); >> > qemu_system_reset_request(); >> >> >> This is a bit strange to me. > > The reset_out "IRQ" isn't actually what triggers the I440FX/PAM reset. Right, this is what's strange to me. There's no hardware analog AFAICT so I'm not sure why we're exposing it as a qemu_irq other than we want to jump through a function pointer invocation instead of making a straight funciton call :-) > That just sets a flag to say that the coming *system* reset is a hard > reset and not a soft reset. Yes, but this flag is in PIIX, not the i440fx. > I did comment about the possibility of doing > the reset directly from the qemu_irq handler, and why I hadn't done it > that way... > >> So should we even be resetting anything other than the CPU during soft >> reset? > > I suspect not. A soft reset triggered by the RCR, keyboard controller, > port 92 etc. should all just reset the CPU and nothing else. I suspect what we need to do is convert qemu_system_reset_request() into a qemu_system_cpu_reset() that takes a callback. Once the VCPUs have been reset, the callback can then be used to reset all or some of the device model. This of course means removing the reset handlers in the CPUs as they exist today. Cc'ing Andreas to get his thoughts. FWIW, I'm not expecting you to do this to fix this issue. Just thinking out loud here really. >> In the very least, shouldn't we expose qemu_irqs from the PIIX and let >> the i440fx decide what to do with them? In this case, it would be an >> INIT# and CPURST# qemu_irq corresponding to soft and hard reset >> respectively. > > How far down this road do we go? Do we end up wiring up the full reset > topology and abandoning the special-case qemu_system_reset() > altogether? Long term, yes. Short term, whatever we need that's reasonable to get the CSM happy without making things worse. I'm not terribly happy exposing an IRQ that doesn't exist in real life to "model hardware". We could just as easily call into i440fx to set the hard_reset flag without jumping through qemu_irq hoops if we're just looking to make it work. I think that's clearer if what we're doing is essentially a short term hack. If we were going to model an INIT# and CPURST# qemu_irq and raise them based on what the hardware does, I'm happy with that. But AFAICT 'reset_out' has no hardware analogy. Regards, Anthony Liguori > > -- > dwmw2
On Tue, 2013-02-19 at 16:17 -0600, Anthony Liguori wrote: > Right, this is what's strange to me. There's no hardware analog AFAICT > so I'm not sure why we're exposing it as a qemu_irq other than we want > to jump through a function pointer invocation instead of making a > straight funciton call :-) Hey, don't ask me. I was just trying to do what Paolo said in response to me first attempt — which just accessed the 'hard reset' flag in the PIIX directly from the I440FX reset handler. > > That just sets a flag to say that the coming *system* reset is a hard > > reset and not a soft reset. > > Yes, but this flag is in PIIX, not the i440fx. Yes. The Reset Control Register is in the PIIX. And the i440fx just happens to be the only other device that *cares* if it's a hard reset or a soft reset. For now. Thankfully they're implemented in the same C file and even initialised together, which lets us do a special-case hack relatively easily. > I suspect what we need to do is convert qemu_system_reset_request() into > a qemu_system_cpu_reset() that takes a callback. Once the VCPUs have > been reset, the callback can then be used to reset all or some of the > device model. This of course means removing the reset handlers in the > CPUs as they exist today. > > Cc'ing Andreas to get his thoughts. > > FWIW, I'm not expecting you to do this to fix this issue. Just thinking > out loud here really. Sounds good to me. I'm beginning to wish I'd just ignored the fact that we need a properly working "soft" reset to get back from 286 protected mode to real mode, and wired up the damn PAM reset unconditionally. I'm not convinced that the protected->real mode transition will work for anyone anyway. > I'm not terribly happy exposing an IRQ that doesn't exist in real life > to "model hardware". We could just as easily call into i440fx to set > the hard_reset flag without jumping through qemu_irq hoops if we're just > looking to make it work. I think that's clearer if what we're doing is > essentially a short term hack. That was basically my first attempt, before Paulo's feedback? I had i440fx calling into PIIX to *read* the flag, rather than PIIX calling into i440fx to *set* it, but if you feel strongly it wouldn't be hard to switch that round.
Il 19/02/2013 23:17, Anthony Liguori ha scritto: >>> >> > if (val & 4) { >>> >> > + if (val & 2) >>> >> > + qemu_irq_pulse(d->reset_out); >>> >> > qemu_system_reset_request(); >> >> >> >> >> >> This is a bit strange to me. > > > > The reset_out "IRQ" isn't actually what triggers the I440FX/PAM reset. > > Right, this is what's strange to me. There's no hardware analog AFAICT > so I'm not sure why we're exposing it as a qemu_irq other than we want > to jump through a function pointer invocation instead of making a > straight funciton call :-) True, OTOH I agreed with David's explanation that the hard reset could happen too early. IOW, doing the irq this way is a consequence of having qemu_system_reset_request() instead of qemu_system_reset(). Paolo
On 19 February 2013 22:17, Anthony Liguori <anthony@codemonkey.ws> wrote: > David Woodhouse <dwmw2@infradead.org> writes: >> On Tue, 2013-02-19 at 14:29 -0600, Anthony Liguori wrote: >>> So should we even be resetting anything other than the CPU during soft >>> reset? >> >> I suspect not. A soft reset triggered by the RCR, keyboard controller, >> port 92 etc. should all just reset the CPU and nothing else. > > I suspect what we need to do is convert qemu_system_reset_request() into > a qemu_system_cpu_reset() that takes a callback. Once the VCPUs have > been reset, the callback can then be used to reset all or some of the > device model. If we're just solving a PC problem here and it really is just "only reset the CPU, nothing else", why don't we give the x86 CPU a qemu_irq input for "reset this CPU core" and wire it up to the relevant bit of hardware on the PC board? I don't see the need for a specific 'qemu_system_cpu_reset()' here (and not having one avoids the swamp of trying to define its semantics...) >> How far down this road do we go? Do we end up wiring up the full reset >> topology and abandoning the special-case qemu_system_reset() >> altogether? > > Long term, yes. Short term, whatever we need that's reasonable to get > the CSM happy without making things worse. I definitely think we should be modelling reset lines, yes. It would be nice if we could sketch a path for how we get from here to there. Here's a strawman proposal that's probably full of holes: (1) we retain the existing 'reset' Device method as meaning "full power-cycle style reset" and qemu_system_reset_request() as meaning "power cycle entire machine". (Eventually the latter might go away as I doubt much real hardware has a "power cycle the world" wiring.) (2) we recommend that for new devices etc, where the device has one or more physical reset pins those should be modeled as qdev_gpio input lines, with the behaviour the hardware has when those are asserted. [Q: what do we do about logic-low-is-assert vs logic-high-is-assert hardware?] This reset can obviously share code with the DeviceState::reset in many cases, but it's conceptually separate. (3) when we need to implement a particular effect on a particular board (as here with the PC) we do that by: a. making sure all affected devices implement reset b. wiring up reset on the board model c. having the implementation of the 'reset' register or whatever assert the irq line (4) as and when we have time, convert existing code (ho ho) This obviously works best when the "not actually a full power cycle" reset you want in (3) is a very limited focus one, like "just reset the CPU"... It also exposes some "not there yet" features like the fact we can't have named gpio input lines so you have to have a numbering convention for smooshing all your inputs into a single array. Pins, anybody? :-) -- PMM
Hi Peter, On Wed, Feb 20, 2013 at 8:50 AM, Peter Maydell <peter.maydell@linaro.org> wrote: > On 19 February 2013 22:17, Anthony Liguori <anthony@codemonkey.ws> wrote: >> David Woodhouse <dwmw2@infradead.org> writes: >>> On Tue, 2013-02-19 at 14:29 -0600, Anthony Liguori wrote: >>>> So should we even be resetting anything other than the CPU during soft >>>> reset? >>> >>> I suspect not. A soft reset triggered by the RCR, keyboard controller, >>> port 92 etc. should all just reset the CPU and nothing else. >> >> I suspect what we need to do is convert qemu_system_reset_request() into >> a qemu_system_cpu_reset() that takes a callback. Once the VCPUs have >> been reset, the callback can then be used to reset all or some of the >> device model. > > If we're just solving a PC problem here and it really is just > "only reset the CPU, nothing else", why don't we give the > x86 CPU a qemu_irq input for "reset this CPU core" and wire it > up to the relevant bit of hardware on the PC board? I don't > see the need for a specific 'qemu_system_cpu_reset()' here > (and not having one avoids the swamp of trying to define its > semantics...) > Could we be more general and implement this on the TYPE_DEVICE level (rather than X86_CPU)? I want this GPIO-as-reset feature for all Zynq devices cpus and preihperals alike. The Zynq power controller has software controllable individual reset for every device in the system and my plan-A was to do it as GPIOs. To implement the reset gpio-ins however I was thinking do it in one swift stroke by adding the single GPIO on the TYPE_DEVICE layer that backs onto DeviceClass-.>reset. With recent QOM efforts (making CPUs DEVICEs) this catchall will also implment the feature for all CPUs. Power controllers define gpio_outs and then machine model just play connect the dots. RFC! I was planning at some stage to formally RFC this but yet to get around to it. I bring it up because the topic is hot here. >>> How far down this road do we go? Do we end up wiring up the full reset >>> topology and abandoning the special-case qemu_system_reset() >>> altogether? >> >> Long term, yes. Short term, whatever we need that's reasonable to get >> the CSM happy without making things worse. > > I definitely think we should be modelling reset lines, yes. > It would be nice if we could sketch a path for how we get from > here to there. Here's a strawman proposal that's probably full > of holes: > > (1) we retain the existing 'reset' Device method as meaning "full > power-cycle style reset" and qemu_system_reset_request() as > meaning "power cycle entire machine". (Eventually the latter > might go away as I doubt much real hardware has a "power > cycle the world" wiring.) > > (2) we recommend that for new devices etc, where the device has > one or more physical reset pins those should be modeled as > qdev_gpio input lines, with the behaviour the hardware has > when those are asserted. [Q: what do we do about logic-low-is-assert > vs logic-high-is-assert hardware?] This reset can obviously share > code with the DeviceState::reset in many cases, but it's > conceptually separate. > > (3) when we need to implement a particular effect on a particular > board (as here with the PC) we do that by: > a. making sure all affected devices implement reset > b. wiring up reset on the board model > c. having the implementation of the 'reset' register or whatever > assert the irq line > > (4) as and when we have time, convert existing code (ho ho) > The TYPE_DEVICE level implementation would give a reset pin to every device that implements DeviceClass->reset which should minimise the pain here. The hard part is devices that dont implement reset at all which are a lost cause WRT this discussion. > This obviously works best when the "not actually a full power > cycle" reset you want in (3) is a very limited focus one, > like "just reset the CPU"... > > It also exposes some "not there yet" features like the fact > we can't have named gpio input lines so you have to have a > numbering convention for smooshing all your inputs into a > single array. Pins, anybody? :-) > Yes my idea requires this so would have to bite the bullet and get this one through. Regards, Peter > -- PMM >
On 20 February 2013 12:21, Peter Crosthwaite <peter.crosthwaite@xilinx.com> wrote: > On Wed, Feb 20, 2013 at 8:50 AM, Peter Maydell <peter.maydell@linaro.org> wrote: >> If we're just solving a PC problem here and it really is just >> "only reset the CPU, nothing else", why don't we give the >> x86 CPU a qemu_irq input for "reset this CPU core" and wire it >> up to the relevant bit of hardware on the PC board? I don't >> see the need for a specific 'qemu_system_cpu_reset()' here >> (and not having one avoids the swamp of trying to define its >> semantics...) > Could we be more general and implement this on the TYPE_DEVICE level > (rather than X86_CPU)? I want this GPIO-as-reset feature for all Zynq > devices cpus and preihperals alike. The Zynq power controller has > software controllable individual reset for every device in the system > and my plan-A was to do it as GPIOs. To implement the reset gpio-ins > however I was thinking do it in one swift stroke by adding the single > GPIO on the TYPE_DEVICE layer that backs onto DeviceClass-.>reset. The trouble is that: * some devices have no reset GPIO line * some devices have more than one (eg a Cortex-A9MPx4 has 18 different reset input lines) * the reset line doesn't always match up with the DeviceClass::reset semantics I guess maybe if there was a way to say 'this device suppresses the default reset input implementation'. (Plus as you note we'd have to actually support named GPIO inputs to have the base class provide an input pin that didn't get tangled up with the device's own inputs.) -- PMM
On Wed, Feb 20, 2013 at 10:29 PM, Peter Maydell <peter.maydell@linaro.org> wrote: > On 20 February 2013 12:21, Peter Crosthwaite > <peter.crosthwaite@xilinx.com> wrote: >> On Wed, Feb 20, 2013 at 8:50 AM, Peter Maydell <peter.maydell@linaro.org> wrote: >>> If we're just solving a PC problem here and it really is just >>> "only reset the CPU, nothing else", why don't we give the >>> x86 CPU a qemu_irq input for "reset this CPU core" and wire it >>> up to the relevant bit of hardware on the PC board? I don't >>> see the need for a specific 'qemu_system_cpu_reset()' here >>> (and not having one avoids the swamp of trying to define its >>> semantics...) > >> Could we be more general and implement this on the TYPE_DEVICE level >> (rather than X86_CPU)? I want this GPIO-as-reset feature for all Zynq >> devices cpus and preihperals alike. The Zynq power controller has >> software controllable individual reset for every device in the system >> and my plan-A was to do it as GPIOs. To implement the reset gpio-ins >> however I was thinking do it in one swift stroke by adding the single >> GPIO on the TYPE_DEVICE layer that backs onto DeviceClass-.>reset. > > The trouble is that: > * some devices have no reset GPIO line Is there any harm in just not connecting the default reset GPIO? Or if you are pedantic allow the class definition to opt-out and inhibit generation of the GPIO. > * some devices have more than one (eg a Cortex-A9MPx4 has 18 different > reset input lines) Yeh you are lost in this case. But my intended semantics for the TYPE_DEVICE reset GPIO is it is a power on reset (PoR) with equivalent function to DeviceClass->reset > * the reset line doesn't always match up with the DeviceClass::reset > semantics > Then its not a PoR equivalent (and thus from QEMUs perspective not a "reset" at all). Its a device specific GPIO. The same applies to 18 lines of A9MPx4, althought that is a container object so im guessing some of those 18 resets will pass through as PoR equivalents to the subdevices? So working on that case, suppose GIC (a subcomponent of Cortex-A9MPx4) has a PoR equivalent wired directly as one of the 18 resets. The container will have to explicitly define all 18 resets, however, it can pass GICs through to the TYPE_DEVICE reset for the GIC instance, saving on having to hack up GIC to explicitly have a reset GPIO. It just strikes me as a workable solution for the 90% case then we can go you full custom GPIO solution for the harder ones. Regards, Peter > I guess maybe if there was a way to say 'this device suppresses > the default reset input implementation'. > > (Plus as you note we'd have to actually support named GPIO inputs > to have the base class provide an input pin that didn't get > tangled up with the device's own inputs.) > > -- PMM >
On 20 February 2013 12:47, Peter Crosthwaite <peter.crosthwaite@xilinx.com> wrote: > On Wed, Feb 20, 2013 at 10:29 PM, Peter Maydell > <peter.maydell@linaro.org> wrote: >> On 20 February 2013 12:21, Peter Crosthwaite >> <peter.crosthwaite@xilinx.com> wrote: >>> Could we be more general and implement this on the TYPE_DEVICE level >>> (rather than X86_CPU)? I want this GPIO-as-reset feature for all Zynq >>> devices cpus and preihperals alike. The Zynq power controller has >>> software controllable individual reset for every device in the system >>> and my plan-A was to do it as GPIOs. To implement the reset gpio-ins >>> however I was thinking do it in one swift stroke by adding the single >>> GPIO on the TYPE_DEVICE layer that backs onto DeviceClass-.>reset. >> >> The trouble is that: >> * some devices have no reset GPIO line > > Is there any harm in just not connecting the default reset GPIO? Or if > you are pedantic allow the class definition to opt-out and inhibit > generation of the GPIO. > >> * some devices have more than one (eg a Cortex-A9MPx4 has 18 different >> reset input lines) > > Yeh you are lost in this case. But my intended semantics for the > TYPE_DEVICE reset GPIO is it is a power on reset (PoR) with equivalent > function to DeviceClass->reset I don't think this is a useful thing to model. What is actually useful to model is "what does the device do when its hardware reset line is asserted" (which might be the same as power on reset but isn't required to be so). We can have a default convenience implementation which is "reset like a power cycle", but that doesn't mean the semantics of the reset GPIO line are *defined* to be "reset like a power cycle". It just means that if your reset line's semantics are different you can't use the convenience implementation. (Also that you can't rely on every device having a 'reset' GPIO line, if the hardware doesn't either. But that's not a problem I think.) If you're trying to model the case of "my power controller actually forcibly powers down subsets of the board" you should already be able to do that by calling the relevant object reset methods [possibly via some indirection through suitably constructed containers]. Powering down a section of the board isn't done by asserting input lines to each affected device; the mechanism is different and the modelling should be too. > It just strikes me as a workable solution for the 90% case then we can > go you full custom GPIO solution for the harder ones. Mmm, as long as there's a way to turn it off and/or to override the implementation it should be ok as a default/convenience method. -- PMM
On 02/19/13 23:45, David Woodhouse wrote: > I'm beginning to wish I'd just ignored the fact that > we need a properly working "soft" reset to get back from 286 protected > mode to real mode, and wired up the damn PAM reset unconditionally. I'm > not convinced that the protected->real mode transition will work for > anyone anyway. I believe currently we must be somewhere "between" soft reset & hard reset. I estimate getting closer to a well-emulated hard reset is more important than getting closer to a soft one. If you were to extend the i440FX reset handler so that it unconditionally resets the PAMs, I'd give my Rb. (Take it for what it's worth :)) The long-term solution covering all targets is being designed, but a quick shortcut would be nice, and not only for CSM development / testing. (I gather qemu_prep_reset() in SeaBIOS would not have had to be written that way, for example.) For CSM stuff we can still carry private qemu patches of course... Thanks Laszlo
Il 20/02/2013 16:18, Laszlo Ersek ha scritto: >> > I'm beginning to wish I'd just ignored the fact that >> > we need a properly working "soft" reset to get back from 286 protected >> > mode to real mode, and wired up the damn PAM reset unconditionally. I'm >> > not convinced that the protected->real mode transition will work for >> > anyone anyway. > I believe currently we must be somewhere "between" soft reset & hard > reset. I estimate getting closer to a well-emulated hard reset is more > important than getting closer to a soft one. If you were to extend the > i440FX reset handler so that it unconditionally resets the PAMs, I'd > give my Rb. (Take it for what it's worth :)) It would actually make sense. The right way to do soft reset has nothing to do with qemu_system_reset_request(). Paolo
On Wed, 2013-02-20 at 16:34 +0100, Paolo Bonzini wrote: > Il 20/02/2013 16:18, Laszlo Ersek ha scritto: > >> > I'm beginning to wish I'd just ignored the fact that > >> > we need a properly working "soft" reset to get back from 286 protected > >> > mode to real mode, and wired up the damn PAM reset unconditionally. I'm > >> > not convinced that the protected->real mode transition will work for > >> > anyone anyway. > > I believe currently we must be somewhere "between" soft reset & hard > > reset. I estimate getting closer to a well-emulated hard reset is more > > important than getting closer to a soft one. If you were to extend the > > i440FX reset handler so that it unconditionally resets the PAMs, I'd > > give my Rb. (Take it for what it's worth :)) > > It would actually make sense. The right way to do soft reset has > nothing to do with qemu_system_reset_request(). I've posted that version of the patch, with a suitable comment. Thanks.
David Woodhouse <dwmw2@infradead.org> writes: > On Tue, 2013-02-19 at 16:17 -0600, Anthony Liguori wrote: >> Right, this is what's strange to me. There's no hardware analog AFAICT >> so I'm not sure why we're exposing it as a qemu_irq other than we want >> to jump through a function pointer invocation instead of making a >> straight funciton call :-) > > Hey, don't ask me. I was just trying to do what Paolo said in response > to me first attempt — which just accessed the 'hard reset' flag in the > PIIX directly from the I440FX reset handler. > >> > That just sets a flag to say that the coming *system* reset is a hard >> > reset and not a soft reset. >> >> Yes, but this flag is in PIIX, not the i440fx. > > Yes. The Reset Control Register is in the PIIX. And the i440fx just > happens to be the only other device that *cares* if it's a hard reset or > a soft reset. For now. Thankfully they're implemented in the same C file > and even initialised together, which lets us do a special-case hack > relatively easily. > >> I suspect what we need to do is convert qemu_system_reset_request() into >> a qemu_system_cpu_reset() that takes a callback. Once the VCPUs have >> been reset, the callback can then be used to reset all or some of the >> device model. This of course means removing the reset handlers in the >> CPUs as they exist today. >> >> Cc'ing Andreas to get his thoughts. >> >> FWIW, I'm not expecting you to do this to fix this issue. Just thinking >> out loud here really. > > Sounds good to me. I'm beginning to wish I'd just ignored the fact that > we need a properly working "soft" reset to get back from 286 protected > mode to real mode, and wired up the damn PAM reset unconditionally. I'm > not convinced that the protected->real mode transition will work for > anyone anyway. Can you try out the following tree: git://github.com/aliguori/qemu.git soft-reset.2 Or better yet, post binaries of Tiano Core + SeaBIOS as a CSM for me to try out? The above implements a CPU-only soft reset that should fix the problem you're having with PAM resetting unconditionally. If it works, I'll fixup the other PC callers of reset too. Regards, Anthony Liguori
On 02/20/13 19:12, Anthony Liguori wrote: > Or better yet, post binaries of Tiano Core + SeaBIOS as a CSM for me to > try out? I tested David's recent PAM-resetting series with these: http://people.redhat.com/~lersek/csm-test.tar.xz (Debug output of OVMF, SeaBIOS and SeaVGABIOS all goes to the qemu debug console; -debugcon file:XXXX -global isa-debugcon.iobase=0x402.) > The above implements a CPU-only soft reset that should fix the problem > you're having with PAM resetting unconditionally. If it works, I'll > fixup the other PC callers of reset too. The problem I was facing on my workstation is that the PAM registers were *not* reset, unconditionally. What I needed was KVM continuing at 0xfffffff0, or to make the reset "as hard as possible": it was "too soft". So if the linked branch makes resets softer, that's the opposite direction for my problem. Thanks Laszlo
On Thu, 2013-02-21 at 00:50 +0100, Laszlo Ersek wrote: > On 02/20/13 19:12, Anthony Liguori wrote: > > > Or better yet, post binaries of Tiano Core + SeaBIOS as a CSM for me to > > try out? > > I tested David's recent PAM-resetting series with these: > > http://people.redhat.com/~lersek/csm-test.tar.xz > > (Debug output of OVMF, SeaBIOS and SeaVGABIOS all goes to the qemu debug > console; -debugcon file:XXXX -global isa-debugcon.iobase=0x402.) Oops, sorry for not noticing that request earlier. > > The above implements a CPU-only soft reset that should fix the problem > > you're having with PAM resetting unconditionally. If it works, I'll > > fixup the other PC callers of reset too. > > The problem I was facing on my workstation is that the PAM registers > were *not* reset, unconditionally. What I needed was KVM continuing at > 0xfffffff0, or to make the reset "as hard as possible": it was "too > soft". So if the linked branch makes resets softer, that's the opposite > direction for my problem. Well... your test now works because of the bug that Anthony is trying to fix :) SeaBIOS is doing a keyboard-controller reset. And that's supposed to be a soft reset and thus *shouldn't* reset the PAM configuration. We need to fix SeaBIOS to do a "PCI" (0xcf9) reset. Or, since SeaBIOS isn't supposed to be hardware-specific, we need to fix SeaBIOS to use the ACPI RESET_REG, and fix OVMF to *set* the RESET_REG in the ACPI tables it generates. I posted patches for the SeaBIOS side of that today, if you want to do the OVMF side...
On Wed, 2013-02-20 at 12:12 -0600, Anthony Liguori wrote: > The above implements a CPU-only soft reset that should fix the problem > you're having with PAM resetting unconditionally. If it works, I'll > fixup the other PC callers of reset too. To be clear: that wasn't my problem :) If people want to run OS/2, VDISK or whatever else needs to get back from protected mode to real mode with a CPU reset but *without* a full system reset, it's going to be *their* problem. And I suspect they have other problems already, so I don't feel too guilty about the final patch in my tree at git.infradead.org/users/dwmw2/qemu.git doing the PAM reset unconditionally. That works just fine for me. if (val & 4) { - qemu_system_reset_request(); + if (val & 2) { + piix_soft_reset_request(); + } else { + qemu_system_reset_request(); + } return; Those are the wrong way round, I think. If bit 1 is set, that's a hard reset and should use qemu_system_reset_request(). If it's *not* set, then it's supposed to be a soft reset (like a port 92 or keyboard controller reset).
On 02/21/13 00:55, David Woodhouse wrote: >>> The above implements a CPU-only soft reset that should fix the problem >>> you're having with PAM resetting unconditionally. If it works, I'll >>> fixup the other PC callers of reset too. >> >> The problem I was facing on my workstation is that the PAM registers >> were *not* reset, unconditionally. What I needed was KVM continuing at >> 0xfffffff0, or to make the reset "as hard as possible": it was "too >> soft". So if the linked branch makes resets softer, that's the opposite >> direction for my problem. > > Well... your test now works because of the bug that Anthony is trying to > fix :) I don't believe so, > SeaBIOS is doing a keyboard-controller reset. And that's supposed to be > a soft reset and thus *shouldn't* reset the PAM configuration. the reset I tested was requested by the Linux kernel (in one of the ways we discussed elsewhere). Granted, one of those methods may have been a keyboard-controller reset. I booted Fedora 18 from OVMF+SeaBIOS CSM to the xfce GUI, then clicked Reboot on the virt-manager window. That generates an ACPI event, which starts the shutdown sequence, at the end of which the kernel reboots the VM. SeaBIOS cannot come into the picture until after that Linux request. (And if the host machine supports unrestricted guest, or the host kernel has a very recent KVM, or qemu resets the PAMs, then SeaBIOS doesn't come into the picture at all, at least before OVMF invokes again it after reboot.) > We need to fix SeaBIOS to do a "PCI" (0xcf9) reset. Or, since SeaBIOS > isn't supposed to be hardware-specific, we need to fix SeaBIOS to use > the ACPI RESET_REG, and fix OVMF to *set* the RESET_REG in the ACPI > tables it generates. I posted patches for the SeaBIOS side of that > today, if you want to do the OVMF side... (adding Jordan & edk2-devel) We might want to discuss two things here: (1) The reset capability that OVMF exports via ACPI -- I agree that I should be effecting the 0xCF9 thing in the appropriate table. (2) There's also one point (that I've ever seen) where OVMF itself resets the platform. It is on the initial config TUI; there's a Reset System option somewhere. ... It's implemented in "IntelFrameworkModulePkg/Universal/BdsDxe/BootMaint/BootMaint.c", function BootMaintCallback(). It calls gRT->ResetSystem (EfiResetCold, EFI_SUCCESS, 0, NULL); This runtime service comes from "PcAtChipsetPkg/KbcResetDxe" for OvmfPkg, which delegates the task to whichever ResetSystemLib flavor the module being built specifies. For OvmfPkg that's "OvmfPkg/Library/ResetSystemLib", and for a cold reset it uses /** Calling this function causes a system-wide reset. This sets all circuitry within the system to its initial state. This type of reset is asynchronous to system operation and operates without regard to cycle boundaries. System reset should not return, if it returns, it means the system does not support cold reset. **/ VOID EFIAPI ResetCold ( VOID ) { IoWrite8 (0x64, 0xfe); } The same code is used for ResetWarm(), this is the keyboard reset. I guess I could change it to access 0xCF9 instead, but - that won't work on earlier qemu versions, - it turns the intermediate module "PcAtChipsetPkg/KbcResetDxe" a misnomer, since we wouldn't be using the keyboard controller any longer. (Actually, in theory, "KbcResetDxe" itself is incapable of hard reset.) OTOH if the keyboard reset gets soft in qemu, then OVMF's hard reset (the above code) will break. Maybe I could cycle between 0xCF9 and 0x64 in ResetCold(), starting with 0xCF9. (The full logic in the Linux kernel seems a bit too much, see the list of source files in <http://thread.gmane.org/gmane.comp.bios.tianocore.devel/2093/focus=195441>.) ((3) Apparently a pre-DXE reset service (PPI, Pre-EFI-Initialization-Module to Pre-Efi-Initialization-Module Interface) exists as well, called PeiResetService(). It tries to delegate to the architectural protocol designated with "gEfiPeiResetPpiGuid" (MdeModulePkg/Core/Pei/Reset/Reset.c). The protocol has one function of type EFI_PEI_RESET_SYSTEM ("This service resets the entire platform, including all processors and devices, and reboots the system"), but it doesn't seem to be implemented anywhere in edk2. It's documented in Volume 1, Chapter 4.8 of the Plat. Init. Spec. We probably don't need to provide it.) Laszlo
Il 21/02/2013 02:10, Laszlo Ersek ha scritto: > OTOH if the keyboard reset gets soft in qemu, then OVMF's hard reset > (the above code) will break. Maybe I could cycle between 0xCF9 and 0x64 > in ResetCold(), starting with 0xCF9. Yes, that's the right thing to do. Also, in QEMU you're doing: if (val & 4) { qemu_system_reset_request(); return; } d->rcr = val & 2; /* keep System Reset type only */ It looks like d->rcr should be set first to match what hardware does (writing 0x6 causes a cold reset, and that's what you usually find in the ACPI tables). Paolo
On Thu, 2013-02-21 at 09:36 +0100, Paolo Bonzini wrote: > Il 21/02/2013 02:10, Laszlo Ersek ha scritto: > > OTOH if the keyboard reset gets soft in qemu, then OVMF's hard reset > > (the above code) will break. Maybe I could cycle between 0xCF9 and 0x64 > > in ResetCold(), starting with 0xCF9. > > Yes, that's the right thing to do. > > Also, in QEMU you're doing: > > if (val & 4) { > qemu_system_reset_request(); > return; > } > d->rcr = val & 2; /* keep System Reset type only */ > > It looks like d->rcr should be set first to match what hardware does > (writing 0x6 causes a cold reset, and that's what you usually find in > the ACPI tables). What Laszlo implemented (above) is fairly much as the data sheet describes it. You're supposed to write 0x02 to set the reset type, and *then* write 0x06 to actually trigger the reset. However, in practice on real hardware that isn't necessary. Just writing 0x06 once will do the trick. Which is just as well, because otherwise the ACPI RESET_REG wouldn't be able to represent it; that only describes *one* write, not two. So yes, you're right. Once qemu starts to distinguish properly between hard and soft reset, it should honour the value of bit 1 in the write which actually triggers the reset. Anthony's patch set did that.. albeit backwards, doing hard and soft reset the wrong way round.
On Thu, 2013-02-21 at 02:10 +0100, Laszlo Ersek wrote: > On 02/21/13 00:55, David Woodhouse wrote: > > Well... your test now works because of the bug that Anthony is trying to > > fix :) > > I don't believe so, Ok, for the *specific* variant of the test that you did. But there are many tests you could have done which *do* only work because of the bug that Anthony is trying to fix. From what you say, it looks like if the kernel had used the EFI runtime services 'ResetSystem' call, that "should" have failed too since it only does a KBC soft reset. > We might want to discuss two things here: > > (1) The reset capability that OVMF exports via ACPI -- I agree that I > should be effecting the 0xCF9 thing in the appropriate table. Right. When doing that, we should bear in mind your observation (which I can confirm here) that boards in the field tend to have the RESET_REG filled in to point at 0xcf9, but *without* the corresponding flag set in the FADT flags to say that it's supported. It would be interesting to know if there's a *reason* for that, or if it's just the typical failure of BIOS engineers to actually read a specification or care about quality any further than "it boots one version of Windows when the wind is in an easterly direction". > (2) There's also one point (that I've ever seen) where OVMF itself > resets the platform. It is on the initial config TUI; there's a Reset > System option somewhere. It's a runtime services call. The kernel can use it too, and perhaps one might even argue that the kernel *should* use it by default, in preference to anything else? But again I suppose that's only true if Windows uses it; if Windows doesn't then we can expect that nobody out there bothers to *test* its implementation on real hardware :( > OTOH if the keyboard reset gets soft in qemu, then OVMF's hard reset > (the above code) will break. Maybe I could cycle between 0xCF9 and 0x64 > in ResetCold(), starting with 0xCF9. (The full logic in the Linux kernel > seems a bit too much, see the list of source files in > <http://thread.gmane.org/gmane.comp.bios.tianocore.devel/2093/focus=195441>.) Just cf9, kbc, cf9, kbc perhaps? http://mjg59.dreamwidth.org/3561.html
David Woodhouse <dwmw2@infradead.org> writes: > On Thu, 2013-02-21 at 02:10 +0100, Laszlo Ersek wrote: >> On 02/21/13 00:55, David Woodhouse wrote: >> > Well... your test now works because of the bug that Anthony is trying to >> > fix :) >> >> I don't believe so, > > Ok, for the *specific* variant of the test that you did. > > But there are many tests you could have done which *do* only work > because of the bug that Anthony is trying to fix. From what you say, it > looks like if the kernel had used the EFI runtime services 'ResetSystem' > call, that "should" have failed too since it only does a KBC soft > reset. Help me understand where we're at. If you fix the bugs in UEFI and SeaBIOS, and I correct the reset patches I pointed you at earlier, we're good? Or is the lack of big real mode at startup on non-unrestricted mode boxes also a problem? I never quite understood how the two related to each other in this thread. Regards, Anthony Liguori > >> We might want to discuss two things here: >> >> (1) The reset capability that OVMF exports via ACPI -- I agree that I >> should be effecting the 0xCF9 thing in the appropriate table. > > Right. When doing that, we should bear in mind your observation (which I > can confirm here) that boards in the field tend to have the RESET_REG > filled in to point at 0xcf9, but *without* the corresponding flag set in > the FADT flags to say that it's supported. It would be interesting to > know if there's a *reason* for that, or if it's just the typical failure > of BIOS engineers to actually read a specification or care about quality > any further than "it boots one version of Windows when the wind is in an > easterly direction". > >> (2) There's also one point (that I've ever seen) where OVMF itself >> resets the platform. It is on the initial config TUI; there's a Reset >> System option somewhere. > > It's a runtime services call. The kernel can use it too, and perhaps one > might even argue that the kernel *should* use it by default, in > preference to anything else? But again I suppose that's only true if > Windows uses it; if Windows doesn't then we can expect that nobody out > there bothers to *test* its implementation on real hardware :( > >> OTOH if the keyboard reset gets soft in qemu, then OVMF's hard reset >> (the above code) will break. Maybe I could cycle between 0xCF9 and 0x64 >> in ResetCold(), starting with 0xCF9. (The full logic in the Linux kernel >> seems a bit too much, see the list of source files in >> <http://thread.gmane.org/gmane.comp.bios.tianocore.devel/2093/focus=195441>.) > > Just cf9, kbc, cf9, kbc perhaps? http://mjg59.dreamwidth.org/3561.html > > > -- > dwmw2
On Thu, 2013-02-21 at 08:37 -0600, Anthony Liguori wrote: > > Help me understand where we're at. If you fix the bugs in UEFI and > SeaBIOS, and I correct the reset patches I pointed you at earlier, we're > good? Yes. And merge the bit which resets the PAM configuration in the i440FX on a hard reset. And make sure that suspend/resume, keyboard controller, etc., are *not* hard resets. > Or is the lack of big real mode at startup on non-unrestricted > mode boxes also a problem? No. > I never quite understood how the two related to each other in this thread. It's one or the other. If big real mode actually worked, we'd run from 0xfffffff0 after reset (*any* reset), and we wouldn't *care* what we get when we read from 0xffff0. Since big real mode is broken, we *do* care what's at 0xffff0. And thus we need to make sure that after a *hard* reset it's the ROM (OVMF), and after a *soft* reset it's still RAM (SeaBIOS after it's done its init, and can find the ACPI resume vector, etc.)
On 02/21/13 09:36, Paolo Bonzini wrote: > Il 21/02/2013 02:10, Laszlo Ersek ha scritto: >> OTOH if the keyboard reset gets soft in qemu, then OVMF's hard reset >> (the above code) will break. Maybe I could cycle between 0xCF9 and 0x64 >> in ResetCold(), starting with 0xCF9. > > Yes, that's the right thing to do. > > Also, in QEMU you're doing: > > if (val & 4) { > qemu_system_reset_request(); > return; > } > d->rcr = val & 2; /* keep System Reset type only */ > > It looks like d->rcr should be set first to match what hardware does > (writing 0x6 causes a cold reset, and that's what you usually find in > the ACPI tables). I was thinking about that, yes. The language in the PIIX3 spec in fact suggested something like this to me: if (val & 4) { if ((val & 2) || (d->rcr & 2)) { /* Either current write or the previous setting is asking * for hard reset. Don't bother storing any value since the * register will be cleared anyway during hard reset. */ request_hard_reset(); return; } /* Soft reset requested. The reset type is stored & survives the * reset, but bit 2 (value 4) can never read as 1. * * Actually if we reach this point, * (val & 2) == 0 && (d->rcr & 2) == 0, so there's nothing to * store. Other bits in d->rcr don't have to be cleared because * we never allow them to be set. */ request_soft_reset(); return; } /* No reset requested, just save the reset type for later. */ d->rcr = val & 2; (Regarding the logical disjunction before the hard reset, the PIIX3 spec says "For example, to initiate a soft reset via the CF9 Reset Control Register, write 00h then 04h, then read the CF9 register." Ie. you can't just blindly write 0x04, because the previously set SRST could still force a hard reset.) Now add to the above code: "if we reset, we always do it the hard way" -- I took that for an "axiom": if (val & 4) { if (true) { request_hard_reset(); return; } /* not reached */ } d->rcr = val & 2; And then you get what I submitted in the patch. (Plus of course if reset callbacks actually care about d->rcr, then the "don't bother" thing isn't valid anymore, but when I submitted the patch, I was just adding d->rcr so nobody could have cared.) Laszlo
On 02/21/13 15:37, Anthony Liguori wrote: > David Woodhouse <dwmw2@infradead.org> writes: > >> On Thu, 2013-02-21 at 02:10 +0100, Laszlo Ersek wrote: >>> On 02/21/13 00:55, David Woodhouse wrote: >>>> Well... your test now works because of the bug that Anthony is trying to >>>> fix :) >>> >>> I don't believe so, >> >> Ok, for the *specific* variant of the test that you did. >> >> But there are many tests you could have done which *do* only work >> because of the bug that Anthony is trying to fix. From what you say, it >> looks like if the kernel had used the EFI runtime services 'ResetSystem' >> call, that "should" have failed too since it only does a KBC soft >> reset. > > Help me understand where we're at. If you fix the bugs in UEFI and > SeaBIOS, and I correct the reset patches I pointed you at earlier, we're > good? Or is the lack of big real mode at startup on non-unrestricted > mode boxes also a problem? > > I never quite understood how the two related to each other in this thread. In addition to what David wrote, I'll try to formalize the problem I ran into. assert(firmware is OVMF + SeaBIOS CSM); assert(at 0xFFFFFFF0, OVMF reset vector / startup code is visible); assert(reset, soft or hard, was requested); if (!kvm_enabled() || (host_cpu_supports_unrestricted_guest() && !host_admin_has_disabled_unrestricted_guest()) || kvm_is_recent_and_doesnt_approximate_real_mode_with_vm86()) { jump_target = 0xFFFFFFF0; } else { /* KVM problem hits */ jump_target = 0xFFFF0; } if (reset_kind_is_hard() && qemu_is_recent_and_clears_i440FX_PAM_regs_at_hard_reset()) { /* workable */ visible_at_FFFF0h = low_image_of_OVMF_reset_vector_from_ROM; } else { /* unusable under OVMF+SeaBIOS CSM */ visible_at_FFFF0h = SeaBIOS_reset_code_retriggering_reset; } jump_to(jump_target); If kvm is disabled, or it is enabled and the host has support for unrestricted guest and the sysadmin hasn't disabled UG, or the host is old or UG is disabled BUT KVM fully-emulates real mode instead of approximating it, *then* we jump to the correct OVMF reboot code at hard reset time, and we don't care what's visible at 0xFFFF0 at all. If however the KVM problem affects us, then we *do* care about what's visible at 0xFFFF0. If we want hard reset, and qemu is recent and resets the PAM regs at hard reset, then the incorrectly low jump_target value ultimately points into good code. Otherwise at 0xFFFF0 we'll find a vestigial SeaBIOS reset vector from the CSM binary, which just re-requests a reset, and we reenter the above pseudo-code at the top. (Infinite loop, if SeaBIOS requests a soft reset *or* it requests a hard one but qemu doesn't have the PAM fix.) Since - I like kvm, and - I cannot easily change my hardware (which doesn't support UG), and - I prefer to run the RHEL-6 kernel which has "old" KVM, I depend on David's fix for the PAM registers. Laszlo
On Thu, 2013-02-21 at 18:10 +0100, Laszlo Ersek wrote: > Since > - I like kvm, and > - I cannot easily change my hardware (which doesn't support UG), and > - I prefer to run the RHEL-6 kernel which has "old" KVM, > I depend on David's fix for the PAM registers. Which means your suspend/resume is broken. Yay! :) Anthony's patchset should hopefully fix that. I wonder what it would take to work around the KVM bug in qemu though; can we actually *emulate* 16-bit mode instead of using KVM, on the kernels where KVM is broken? And then transition back to using KVM when we switch to 32-bit or 64-bit mode?
Il 21/02/2013 18:12, David Woodhouse ha scritto: >> > Since >> > - I like kvm, and >> > - I cannot easily change my hardware (which doesn't support UG), and >> > - I prefer to run the RHEL-6 kernel which has "old" KVM, >> > I depend on David's fix for the PAM registers. > Which means your suspend/resume is broken. Yay! :) > > Anthony's patchset should hopefully fix that. > > I wonder what it would take to work around the KVM bug in qemu though; > can we actually *emulate* 16-bit mode instead of using KVM, on the > kernels where KVM is broken? And then transition back to using KVM when > we switch to 32-bit or 64-bit mode? I don't think you want to do that. It would be really slow, it would trade some emulation bugs for others, and people who mind security would not appreciate the additional attack surface of TCG. (BTW, that's how the very first version of KVM worked. It didn't use hardware virtualization at all until entering 64-bit mode, IIRC). I think we just have to fix it in 1.5, and suggest a newer kernel to everyone else. Paolo
On 02/21/13 18:12, David Woodhouse wrote: > On Thu, 2013-02-21 at 18:10 +0100, Laszlo Ersek wrote: >> Since >> - I like kvm, and >> - I cannot easily change my hardware (which doesn't support UG), and >> - I prefer to run the RHEL-6 kernel which has "old" KVM, >> I depend on David's fix for the PAM registers. > > Which means your suspend/resume is broken. Yay! :) > > Anthony's patchset should hopefully fix that. > > I wonder what it would take to work around the KVM bug in qemu though; > can we actually *emulate* 16-bit mode instead of using KVM, on the > kernels where KVM is broken? And then transition back to using KVM when > we switch to 32-bit or 64-bit mode? I can but join you in wondering! :) Laszlo
David Woodhouse <dwmw2@infradead.org> writes: > On Thu, 2013-02-21 at 18:10 +0100, Laszlo Ersek wrote: >> Since >> - I like kvm, and >> - I cannot easily change my hardware (which doesn't support UG), and >> - I prefer to run the RHEL-6 kernel which has "old" KVM, >> I depend on David's fix for the PAM registers. > > Which means your suspend/resume is broken. Yay! :) > > Anthony's patchset should hopefully fix that. Will post a new version later today. Am testing now with Laszlo's mock up from a previous note. I'm also going to write a qtest test case to try and validate all of this behavior (minus the KVM bits). > I wonder what it would take to work around the KVM bug in qemu though; > can we actually *emulate* 16-bit mode instead of using KVM, on the > kernels where KVM is broken? And then transition back to using KVM when > we switch to 32-bit or 64-bit mode? It's in the realm of possibility but ugly enough that we never did it. I even worked on this way back in the day with Xen. It sucked pretty bad. The challenge in support SMP eventually scared me off of it. http://new-wiki.xen.org/old-wiki/xenwiki/HVM/V2E.html Regards, Anthony Liguori > > -- > dwmw2
> (1) The reset capability that OVMF exports via ACPI -- I agree that I > should be effecting the 0xCF9 thing in the appropriate table. On a second thought, this will require a new build -D flag, or a PCD. I'm not worried about the ACPI 1.0 --> ACPI 2.0 change in the FADT, the table struct itself forward compatible. However currently we're not saying anything about the reset capabilities of the platform. A client looking at the FADT for reset info will find nothing and follow its own logic, which may or may not include writing to 0xCF9, but we don't have any part in it. If now the FADT starts to claim 0xCF9 on a qemu version that doesn't actually support it, we could mislead the client. Unless we can interrogate qemu about the support (and I think we can't), we'll have to depend on a build-time option. (Or should I shoehorn it into -D CSM_ENABLE?) Jordan, what do you think? (Of course this *too* would go away if qemu itself prepared all ACPI tables over fw_cfg for whichever firmware the user selects. Then we wouldn't have to duplicate ACPI work between SeaBIOS and OVMF any longer. This task has been on my todo list forever, but it never seems to near the top. Maybe moving to a hermit cabin for a month and sleeping even less would make it happen. I have no clue how others do it.) Thanks! Laszlo
Paolo Bonzini <pbonzini@redhat.com> writes: > Il 21/02/2013 18:12, David Woodhouse ha scritto: >>> > Since >>> > - I like kvm, and >>> > - I cannot easily change my hardware (which doesn't support UG), and >>> > - I prefer to run the RHEL-6 kernel which has "old" KVM, >>> > I depend on David's fix for the PAM registers. >> Which means your suspend/resume is broken. Yay! :) >> >> Anthony's patchset should hopefully fix that. >> >> I wonder what it would take to work around the KVM bug in qemu though; >> can we actually *emulate* 16-bit mode instead of using KVM, on the >> kernels where KVM is broken? And then transition back to using KVM when >> we switch to 32-bit or 64-bit mode? > > I don't think you want to do that. It would be really slow, it would > trade some emulation bugs for others, and people who mind security would > not appreciate the additional attack surface of TCG. > > (BTW, that's how the very first version of KVM worked. It didn't use > hardware virtualization at all until entering 64-bit mode, IIRC). Pre-released versions, yes. Avi started with emulating up to 64-bit and worked up from there. None of the public versions ever implemented this. Regards, Anthony Liguori > > I think we just have to fix it in 1.5, and suggest a newer kernel to > everyone else. > > Paolo
Il 21/02/2013 19:24, Laszlo Ersek ha scritto: >> > (1) The reset capability that OVMF exports via ACPI -- I agree that I >> > should be effecting the 0xCF9 thing in the appropriate table. > On a second thought, this will require a new build -D flag, or a PCD. > > I'm not worried about the ACPI 1.0 --> ACPI 2.0 change in the FADT, the > table struct itself forward compatible. > > However currently we're not saying anything about the reset capabilities > of the platform. A client looking at the FADT for reset info will find > nothing and follow its own logic, which may or may not include writing > to 0xCF9, but we don't have any part in it. > > If now the FADT starts to claim 0xCF9 on a qemu version that doesn't > actually support it, we could mislead the client. Unless we can > interrogate qemu about the support (and I think we can't), we'll have to > depend on a build-time option. (Or should I shoehorn it into -D CSM_ENABLE?) > > Jordan, what do you think? ACPI tables are hosed enough on some real system that we can assume that all guests will fall back to something else---typically a keyboard controller reset. See the sequence that Windows does: ACPI, kbd, ACPI, kbd. Paolo
On 02/21/13 20:31, Paolo Bonzini wrote: > Il 21/02/2013 19:24, Laszlo Ersek ha scritto: >>>> (1) The reset capability that OVMF exports via ACPI -- I agree that I >>>> should be effecting the 0xCF9 thing in the appropriate table. >> On a second thought, this will require a new build -D flag, or a PCD. >> >> I'm not worried about the ACPI 1.0 --> ACPI 2.0 change in the FADT, the >> table struct itself forward compatible. >> >> However currently we're not saying anything about the reset capabilities >> of the platform. A client looking at the FADT for reset info will find >> nothing and follow its own logic, which may or may not include writing >> to 0xCF9, but we don't have any part in it. >> >> If now the FADT starts to claim 0xCF9 on a qemu version that doesn't >> actually support it, we could mislead the client. Unless we can >> interrogate qemu about the support (and I think we can't), we'll have to >> depend on a build-time option. (Or should I shoehorn it into -D CSM_ENABLE?) >> >> Jordan, what do you think? > > ACPI tables are hosed enough on some real system that we can assume that > all guests will fall back to something else---typically a keyboard > controller reset. That works for me. Thx. (BTW I can "plausibly justify" why a BIOS vendor would advertize 0xCF9 (or anything else) in RESET_REG, but deny it by clearing RESET_REG_SUP: they're probably not sure what hardware the BIOS will be flashed to. So RESET_REG is a hint for OSPM, but if it doesn't work, "we told you so in RESET_REG_SUP!" :) Repudiation of responsibility. Maybe we should follow suit :)) Laszlo
On 02/20/2013 07:37 AM, David Woodhouse wrote: > On Wed, 2013-02-20 at 16:34 +0100, Paolo Bonzini wrote: >> Il 20/02/2013 16:18, Laszlo Ersek ha scritto: >>>>> I'm beginning to wish I'd just ignored the fact that >>>>> we need a properly working "soft" reset to get back from 286 protected >>>>> mode to real mode, and wired up the damn PAM reset unconditionally. I'm >>>>> not convinced that the protected->real mode transition will work for >>>>> anyone anyway. >>> I believe currently we must be somewhere "between" soft reset & hard >>> reset. I estimate getting closer to a well-emulated hard reset is more >>> important than getting closer to a soft one. If you were to extend the >>> i440FX reset handler so that it unconditionally resets the PAMs, I'd >>> give my Rb. (Take it for what it's worth :)) >> >> It would actually make sense. The right way to do soft reset has >> nothing to do with qemu_system_reset_request(). > > I've posted that version of the patch, with a suitable comment. > Right... the "soft reset" described above is really INIT, which isn't even a reset in modern CPUs (it couldn't be, it has to preserve caches) but more of a special interrupt. It is also used during multiprocessor init. -hpa
diff --git a/hw/piix_pci.c b/hw/piix_pci.c index 6c77e49..0c8f613 100644 --- a/hw/piix_pci.c +++ b/hw/piix_pci.c @@ -71,6 +71,7 @@ typedef struct PIIX3State { uint64_t pic_levels; qemu_irq *pic; + qemu_irq reset_out; /* This member isn't used. Just for save/load compatibility */ int32_t pci_irq_levels_vmstate[PIIX_NUM_PIRQS]; @@ -92,6 +93,7 @@ struct PCII440FXState { PAMMemoryRegion pam_regions[13]; MemoryRegion smram_region; uint8_t smm_enabled; + uint8_t hard_reset; }; @@ -171,6 +173,42 @@ static int i440fx_load_old(QEMUFile* f, void *opaque, int version_id) return 0; } +static void i440fx_reset(DeviceState *ds) +{ + PCIDevice *dev = DO_UPCAST(PCIDevice, qdev, ds); + PCII440FXState *d = DO_UPCAST(PCII440FXState, dev, dev); + uint8_t *pci_conf = d->dev.config; + + /* We only reset the PAM configuration if it was a *hard* reset, + * triggered from the RCR on the PIIX3 (or from the TRCR on the + * i440FX itself, but we don't implement that yet and software + * is advised not to use it when there's a PIIX). */ + if (!d->hard_reset) + return; + + d->hard_reset = 0; + + pci_conf[0x59] = 0x00; // Reset PAM setup + pci_conf[0x5a] = 0x00; + pci_conf[0x5b] = 0x00; + pci_conf[0x5c] = 0x00; + pci_conf[0x5d] = 0x00; + pci_conf[0x5e] = 0x00; + pci_conf[0x5f] = 0x00; + pci_conf[0x72] = 0x02; // And SMM + + i440fx_update_memory_mappings(d); +} + +static void i440fx_handle_reset(void *opaque, int n, int level) +{ + PCIDevice *dev = DO_UPCAST(PCIDevice, qdev, opaque); + PCII440FXState *d = DO_UPCAST(PCII440FXState, dev, dev); + + if (level) + d->hard_reset = 1; +} + static int i440fx_post_load(void *opaque, int version_id) { PCII440FXState *d = opaque; @@ -216,6 +254,8 @@ static int i440fx_initfn(PCIDevice *dev) d->dev.config[I440FX_SMRAM] = 0x02; + qdev_init_gpio_in(&d->dev.qdev, i440fx_handle_reset, 1); + cpu_smm_register(&i440fx_set_smm, d); return 0; } @@ -297,6 +337,8 @@ static PCIBus *i440fx_common_init(const char *device_name, pci_bus_set_route_irq_fn(b, piix3_route_intx_pin_to_irq); } piix3->pic = pic; + qdev_connect_gpio_out(&piix3->dev.qdev, 0, + qdev_get_gpio_in(&f->dev.qdev, 0)); *isa_bus = DO_UPCAST(ISABus, qbus, qdev_get_child_bus(&piix3->dev.qdev, "isa.0")); @@ -521,6 +563,8 @@ static void rcr_write(void *opaque, hwaddr addr, uint64_t val, unsigned len) PIIX3State *d = opaque; if (val & 4) { + if (val & 2) + qemu_irq_pulse(d->reset_out); qemu_system_reset_request(); return; } @@ -550,6 +594,7 @@ static int piix3_initfn(PCIDevice *dev) memory_region_add_subregion_overlap(pci_address_space_io(dev), RCR_IOPORT, &d->rcr_mem, 1); + qdev_init_gpio_out(&dev->qdev, &d->reset_out, 1); qemu_register_reset(piix3_reset, d); return 0; } @@ -615,6 +660,7 @@ static void i440fx_class_init(ObjectClass *klass, void *data) dc->desc = "Host bridge"; dc->no_user = 1; dc->vmsd = &vmstate_i440fx; + dc->reset = i440fx_reset; } static const TypeInfo i440fx_info = {