Message ID | 20101116131452.GA23963@redhat.com |
---|---|
State | New |
Headers | show |
Am 16.11.2010 14:14, schrieb mst@redhat.com: > Although explicitly disallowed by the PCI spec, some guests read a > single byte or word from mmio. Likely a guest OS bug, but I have an OS > which reads single bytes and it works fine on real hardware. > > Signed-off-by: Bernhard Kohl<bernhard.kohl@nsn.com> > Signed-off-by: Michael S. Tsirkin<mst@redhat.com> > --- > > OK so it could like something like the below. Yes, this looks good for me. > However, my question is: > do we need to put this in or can the guest simply be fixed? > I tried to locate the code where the readw occurs, but not successful. It only occurs during booting our OS, and the virtio-net driver seems to be OK. With 4 virtio NICs we have the following readw accesses, thats all! 3 accesses per NIC and the first NIC appears twice. MSI-X: msix_mmio_readw dev=0x9767c58 addr=0000000000000008 MSI-X: msix_mmio_readw dev=0x9767c58 addr=0000000000000018 MSI-X: msix_mmio_readw dev=0x9767c58 addr=0000000000000028 MSI-X: msix_mmio_readw dev=0x9772c40 addr=0000000000000008 MSI-X: msix_mmio_readw dev=0x9772c40 addr=0000000000000018 MSI-X: msix_mmio_readw dev=0x9772c40 addr=0000000000000028 MSI-X: msix_mmio_readw dev=0x977dc38 addr=0000000000000008 MSI-X: msix_mmio_readw dev=0x977dc38 addr=0000000000000018 MSI-X: msix_mmio_readw dev=0x977dc38 addr=0000000000000028 MSI-X: msix_mmio_readw dev=0x9788d90 addr=0000000000000008 MSI-X: msix_mmio_readw dev=0x9788d90 addr=0000000000000018 MSI-X: msix_mmio_readw dev=0x9788d90 addr=0000000000000028 MSI-X: msix_mmio_readw dev=0x9767c58 addr=0000000000000008 MSI-X: msix_mmio_readw dev=0x9767c58 addr=0000000000000018 MSI-X: msix_mmio_readw dev=0x9767c58 addr=0000000000000028 Is it possible to add a stack back tace printing to the readw function? > hw/msix.c | 31 +++++++++++++++++++++++++++---- > 1 files changed, 27 insertions(+), 4 deletions(-) > > diff --git a/hw/msix.c b/hw/msix.c > index f66d255..38dff59 100644 > --- a/hw/msix.c > +++ b/hw/msix.c > @@ -102,10 +102,28 @@ static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr) > return pci_get_long(page + offset); > } > > -static uint32_t msix_mmio_read_unallowed(void *opaque, target_phys_addr_t addr) > + /* Note: > + * PCI spec requires that all MSI-X table accesses are either DWORD or QWORD, > + * size aligned. Some guests seem to violate this rule for read accesses, > + * performing single byte reads. Since it's easy to support this, let's do so. > + * Also support 16 bit size aligned reads, just in case. > + */ > +static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr) > { > - fprintf(stderr, "MSI-X: only dword read is allowed!\n"); > - return 0; > + PCIDevice *dev = opaque; > + unsigned int offset = addr& (MSIX_PAGE_SIZE - 1)& ~0x1; > + void *page = dev->msix_table_page; > + > + return pci_get_word(page + offset); > +} > + > +static uint32_t msix_mmio_readb(void *opaque, target_phys_addr_t addr) > +{ > + PCIDevice *dev = opaque; > + unsigned int offset = addr& (MSIX_PAGE_SIZE - 1); > + void *page = dev->msix_table_page; > + > + return pci_get_byte(page + offset); > } > > static uint8_t msix_pending_mask(int vector) > @@ -192,6 +210,11 @@ static void msix_mmio_writel(void *opaque, target_phys_addr_t addr, > msix_handle_mask_update(dev, vector); > } > > +/* PCI spec: > + * For all accesses to MSI-X Table and MSI-X PBA fields, software must use > + * aligned full DWORD or aligned full QWORD transactions; otherwise, the result > + * is undefined. > + */ > static void msix_mmio_write_unallowed(void *opaque, target_phys_addr_t addr, > uint32_t val) > { > @@ -203,7 +226,7 @@ static CPUWriteMemoryFunc * const msix_mmio_write[] = { > }; > > static CPUReadMemoryFunc * const msix_mmio_read[] = { > - msix_mmio_read_unallowed, msix_mmio_read_unallowed, msix_mmio_readl > + msix_mmio_readb, msix_mmio_readw, msix_mmio_readl > }; > > /* Should be called from device's map method. */ >
On Tue, Nov 16, 2010 at 05:43:06PM +0100, Bernhard Kohl wrote: > Am 16.11.2010 14:14, schrieb mst@redhat.com: > >Although explicitly disallowed by the PCI spec, some guests read a > >single byte or word from mmio. Likely a guest OS bug, but I have an OS > >which reads single bytes and it works fine on real hardware. > > > >Signed-off-by: Bernhard Kohl<bernhard.kohl@nsn.com> > >Signed-off-by: Michael S. Tsirkin<mst@redhat.com> > >--- > > > >OK so it could like something like the below. > > Yes, this looks good for me. Wait, the below shows single-word reads. I thought you said bytes? > > However, my question is: > >do we need to put this in or can the guest simply be fixed? > > I tried to locate the code where the readw occurs, > but not successful. It only occurs during booting our OS, > and the virtio-net driver seems to be OK. With 4 virtio > NICs we have the following readw accesses, thats all! > 3 accesses per NIC and the first NIC appears twice. > > MSI-X: msix_mmio_readw dev=0x9767c58 addr=0000000000000008 > MSI-X: msix_mmio_readw dev=0x9767c58 addr=0000000000000018 > MSI-X: msix_mmio_readw dev=0x9767c58 addr=0000000000000028 > MSI-X: msix_mmio_readw dev=0x9772c40 addr=0000000000000008 > MSI-X: msix_mmio_readw dev=0x9772c40 addr=0000000000000018 > MSI-X: msix_mmio_readw dev=0x9772c40 addr=0000000000000028 > MSI-X: msix_mmio_readw dev=0x977dc38 addr=0000000000000008 > MSI-X: msix_mmio_readw dev=0x977dc38 addr=0000000000000018 > MSI-X: msix_mmio_readw dev=0x977dc38 addr=0000000000000028 > MSI-X: msix_mmio_readw dev=0x9788d90 addr=0000000000000008 > MSI-X: msix_mmio_readw dev=0x9788d90 addr=0000000000000018 > MSI-X: msix_mmio_readw dev=0x9788d90 addr=0000000000000028 > MSI-X: msix_mmio_readw dev=0x9767c58 addr=0000000000000008 > MSI-X: msix_mmio_readw dev=0x9767c58 addr=0000000000000018 > MSI-X: msix_mmio_readw dev=0x9767c58 addr=0000000000000028 Hmm, message data is being read for some reason. > Is it possible to add a stack back tace printing to the > readw function? There's the qemu -S option, it will let you debug the guest. Still, question is, do we need work-around in qemu, because a broken guest is in production and can not be fixed, or can guest just be fixed? > > hw/msix.c | 31 +++++++++++++++++++++++++++---- > > 1 files changed, 27 insertions(+), 4 deletions(-) > > > >diff --git a/hw/msix.c b/hw/msix.c > >index f66d255..38dff59 100644 > >--- a/hw/msix.c > >+++ b/hw/msix.c > >@@ -102,10 +102,28 @@ static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr) > > return pci_get_long(page + offset); > > } > > > >-static uint32_t msix_mmio_read_unallowed(void *opaque, target_phys_addr_t addr) > >+ /* Note: > >+ * PCI spec requires that all MSI-X table accesses are either DWORD or QWORD, > >+ * size aligned. Some guests seem to violate this rule for read accesses, > >+ * performing single byte reads. Since it's easy to support this, let's do so. > >+ * Also support 16 bit size aligned reads, just in case. > >+ */ > >+static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr) > > { > >- fprintf(stderr, "MSI-X: only dword read is allowed!\n"); > >- return 0; > >+ PCIDevice *dev = opaque; > >+ unsigned int offset = addr& (MSIX_PAGE_SIZE - 1)& ~0x1; > >+ void *page = dev->msix_table_page; > >+ > >+ return pci_get_word(page + offset); > >+} > >+ > >+static uint32_t msix_mmio_readb(void *opaque, target_phys_addr_t addr) > >+{ > >+ PCIDevice *dev = opaque; > >+ unsigned int offset = addr& (MSIX_PAGE_SIZE - 1); > >+ void *page = dev->msix_table_page; > >+ > >+ return pci_get_byte(page + offset); > > } > > > > static uint8_t msix_pending_mask(int vector) > >@@ -192,6 +210,11 @@ static void msix_mmio_writel(void *opaque, target_phys_addr_t addr, > > msix_handle_mask_update(dev, vector); > > } > > > >+/* PCI spec: > >+ * For all accesses to MSI-X Table and MSI-X PBA fields, software must use > >+ * aligned full DWORD or aligned full QWORD transactions; otherwise, the result > >+ * is undefined. > >+ */ > > static void msix_mmio_write_unallowed(void *opaque, target_phys_addr_t addr, > > uint32_t val) > > { > >@@ -203,7 +226,7 @@ static CPUWriteMemoryFunc * const msix_mmio_write[] = { > > }; > > > > static CPUReadMemoryFunc * const msix_mmio_read[] = { > >- msix_mmio_read_unallowed, msix_mmio_read_unallowed, msix_mmio_readl > >+ msix_mmio_readb, msix_mmio_readw, msix_mmio_readl > > }; > > > > /* Should be called from device's map method. */
On Tue, Nov 16, 2010 at 05:43:06PM +0100, Bernhard Kohl wrote: > Am 16.11.2010 14:14, schrieb mst@redhat.com: > >Although explicitly disallowed by the PCI spec, some guests read a > >single byte or word from mmio. Likely a guest OS bug, but I have an OS > >which reads single bytes and it works fine on real hardware. > > > >Signed-off-by: Bernhard Kohl<bernhard.kohl@nsn.com> > >Signed-off-by: Michael S. Tsirkin<mst@redhat.com> > >--- > > > >OK so it could like something like the below. > > Yes, this looks good for me. > > > However, my question is: > >do we need to put this in or can the guest simply be fixed? > > I tried to locate the code where the readw occurs, > but not successful. It only occurs during booting our OS, > and the virtio-net driver seems to be OK. With 4 virtio > NICs we have the following readw accesses, thats all! > 3 accesses per NIC and the first NIC appears twice. > > MSI-X: msix_mmio_readw dev=0x9767c58 addr=0000000000000008 > MSI-X: msix_mmio_readw dev=0x9767c58 addr=0000000000000018 > MSI-X: msix_mmio_readw dev=0x9767c58 addr=0000000000000028 > MSI-X: msix_mmio_readw dev=0x9772c40 addr=0000000000000008 > MSI-X: msix_mmio_readw dev=0x9772c40 addr=0000000000000018 > MSI-X: msix_mmio_readw dev=0x9772c40 addr=0000000000000028 > MSI-X: msix_mmio_readw dev=0x977dc38 addr=0000000000000008 > MSI-X: msix_mmio_readw dev=0x977dc38 addr=0000000000000018 > MSI-X: msix_mmio_readw dev=0x977dc38 addr=0000000000000028 > MSI-X: msix_mmio_readw dev=0x9788d90 addr=0000000000000008 > MSI-X: msix_mmio_readw dev=0x9788d90 addr=0000000000000018 > MSI-X: msix_mmio_readw dev=0x9788d90 addr=0000000000000028 > MSI-X: msix_mmio_readw dev=0x9767c58 addr=0000000000000008 > MSI-X: msix_mmio_readw dev=0x9767c58 addr=0000000000000018 > MSI-X: msix_mmio_readw dev=0x9767c58 addr=0000000000000028 > > Is it possible to add a stack back tace printing to the > readw function? See some words of wisdom from Marcelo (this appliues to qemu-kvm.git): <mst> marcelot, there is a guest that misbehaves <mst> marcelot, so we get an fprintf in qemu <mst> marcelot, but we don't know which place in guest caused the problem, no backtrace, etc <marcelot> mst, print guest RIP there? <mst> marcelot, 1. how to? <marcelot> mst, cpu_synchronize_state(cpu_single_env) + print (env->eip) <marcelot> s/env/cpu_single_env/ <mst> marcelot, 2. can we also make it so that when io completes, guest stops with backtrace in gdb? <marcelot> mst, see kvm_show_code in qemu-kvm-x86.c <marcelot> mst, either add a breakpoint there, or call abort() and gdb core later <mst> marcelot, abort will exit qemu though. gdb core will give me qemu backtrace, not guest one, right? <mst> marcelot, I would like kvm to pretend it hit a breakpoint <mst> marcelot, or a segfault ... something <marcelot> mst, well, vmstop() then. you can see guest RIP from there. <marcelot> mst, yes gdb core will give qemu backtrace. inject a #GP to see guest backtrace (see kvm_put_vcpu_events in qemu-kvm-x86.c <marcelot> )
Am 17.11.2010 15:12, schrieb ext Michael S. Tsirkin: > On Tue, Nov 16, 2010 at 05:43:06PM +0100, Bernhard Kohl wrote: > >> Am 16.11.2010 14:14, schrieb mst@redhat.com: >> >>> Although explicitly disallowed by the PCI spec, some guests read a >>> single byte or word from mmio. Likely a guest OS bug, but I have an OS >>> which reads single bytes and it works fine on real hardware. >>> >>> Signed-off-by: Bernhard Kohl<bernhard.kohl@nsn.com> >>> Signed-off-by: Michael S. Tsirkin<mst@redhat.com> >>> --- >>> >>> OK so it could like something like the below. >>> >> Yes, this looks good for me. >> > Wait, the below shows single-word reads. I thought you said bytes? > The original patch was several month old. When posting it I added some more description text. Sorry, I mismatched words with bytes. In fact there are only these word accesses as seen below. > >>> However, my question is: >>> do we need to put this in or can the guest simply be fixed? >>> >> I tried to locate the code where the readw occurs, >> but not successful. It only occurs during booting our OS, >> and the virtio-net driver seems to be OK. With 4 virtio >> NICs we have the following readw accesses, thats all! >> 3 accesses per NIC and the first NIC appears twice. >> >> MSI-X: msix_mmio_readw dev=0x9767c58 addr=0000000000000008 >> MSI-X: msix_mmio_readw dev=0x9767c58 addr=0000000000000018 >> MSI-X: msix_mmio_readw dev=0x9767c58 addr=0000000000000028 >> MSI-X: msix_mmio_readw dev=0x9772c40 addr=0000000000000008 >> MSI-X: msix_mmio_readw dev=0x9772c40 addr=0000000000000018 >> MSI-X: msix_mmio_readw dev=0x9772c40 addr=0000000000000028 >> MSI-X: msix_mmio_readw dev=0x977dc38 addr=0000000000000008 >> MSI-X: msix_mmio_readw dev=0x977dc38 addr=0000000000000018 >> MSI-X: msix_mmio_readw dev=0x977dc38 addr=0000000000000028 >> MSI-X: msix_mmio_readw dev=0x9788d90 addr=0000000000000008 >> MSI-X: msix_mmio_readw dev=0x9788d90 addr=0000000000000018 >> MSI-X: msix_mmio_readw dev=0x9788d90 addr=0000000000000028 >> MSI-X: msix_mmio_readw dev=0x9767c58 addr=0000000000000008 >> MSI-X: msix_mmio_readw dev=0x9767c58 addr=0000000000000018 >> MSI-X: msix_mmio_readw dev=0x9767c58 addr=0000000000000028 >> > Hmm, message data is being read for some reason. > Yes there is a helper library in our OS which does the following (the MOVZX does 16-bit mem read): *int_level_ptr = (byte) msi_x_table_ptr->message_data; ; STATEMENT # 2593 00001FC0 640FB74008 MOVZX EAX,FS:[EAX]+8H @29: 00001FC5 0FB44D10 LFS ECX,[EBP].int_level_ptr 00001FC9 648801 MOV FS:[ECX],AL > >> Is it possible to add a stack back tace printing to the >> readw function? >> > There's the qemu -S option, it will let you debug the guest. > Still, question is, do we need work-around in qemu, > because a broken guest is in production and can not be fixed, > or can guest just be fixed? > I will fix the guest and inform you after testing. Then we can skip this patch. > >>> hw/msix.c | 31 +++++++++++++++++++++++++++---- >>> 1 files changed, 27 insertions(+), 4 deletions(-) >>> >>> diff --git a/hw/msix.c b/hw/msix.c >>> index f66d255..38dff59 100644 >>> --- a/hw/msix.c >>> +++ b/hw/msix.c >>> @@ -102,10 +102,28 @@ static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr) >>> return pci_get_long(page + offset); >>> } >>> >>> -static uint32_t msix_mmio_read_unallowed(void *opaque, target_phys_addr_t addr) >>> + /* Note: >>> + * PCI spec requires that all MSI-X table accesses are either DWORD or QWORD, >>> + * size aligned. Some guests seem to violate this rule for read accesses, >>> + * performing single byte reads. Since it's easy to support this, let's do so. >>> + * Also support 16 bit size aligned reads, just in case. >>> + */ >>> +static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr) >>> { >>> - fprintf(stderr, "MSI-X: only dword read is allowed!\n"); >>> - return 0; >>> + PCIDevice *dev = opaque; >>> + unsigned int offset = addr& (MSIX_PAGE_SIZE - 1)& ~0x1; >>> + void *page = dev->msix_table_page; >>> + >>> + return pci_get_word(page + offset); >>> +} >>> + >>> +static uint32_t msix_mmio_readb(void *opaque, target_phys_addr_t addr) >>> +{ >>> + PCIDevice *dev = opaque; >>> + unsigned int offset = addr& (MSIX_PAGE_SIZE - 1); >>> + void *page = dev->msix_table_page; >>> + >>> + return pci_get_byte(page + offset); >>> } >>> >>> static uint8_t msix_pending_mask(int vector) >>> @@ -192,6 +210,11 @@ static void msix_mmio_writel(void *opaque, target_phys_addr_t addr, >>> msix_handle_mask_update(dev, vector); >>> } >>> >>> +/* PCI spec: >>> + * For all accesses to MSI-X Table and MSI-X PBA fields, software must use >>> + * aligned full DWORD or aligned full QWORD transactions; otherwise, the result >>> + * is undefined. >>> + */ >>> static void msix_mmio_write_unallowed(void *opaque, target_phys_addr_t addr, >>> uint32_t val) >>> { >>> @@ -203,7 +226,7 @@ static CPUWriteMemoryFunc * const msix_mmio_write[] = { >>> }; >>> >>> static CPUReadMemoryFunc * const msix_mmio_read[] = { >>> - msix_mmio_read_unallowed, msix_mmio_read_unallowed, msix_mmio_readl >>> + msix_mmio_readb, msix_mmio_readw, msix_mmio_readl >>> }; >>> >>> /* Should be called from device's map method. */ >>> >
On Wed, Nov 17, 2010 at 05:12:12PM +0100, Bernhard Kohl wrote: > I will fix the guest and inform you after testing. > Then we can skip this patch. OK, that's best,
Am 17.11.2010 17:30, schrieb ext Michael S. Tsirkin: > On Wed, Nov 17, 2010 at 05:12:12PM +0100, Bernhard Kohl wrote: > >> I will fix the guest and inform you after testing. >> Then we can skip this patch. >> > OK, that's best, > > > Yes, the guest is fixed now. Only dword accesses to msix! Thanks for your support Bernhard
diff --git a/hw/msix.c b/hw/msix.c index f66d255..38dff59 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -102,10 +102,28 @@ static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr) return pci_get_long(page + offset); } -static uint32_t msix_mmio_read_unallowed(void *opaque, target_phys_addr_t addr) + /* Note: + * PCI spec requires that all MSI-X table accesses are either DWORD or QWORD, + * size aligned. Some guests seem to violate this rule for read accesses, + * performing single byte reads. Since it's easy to support this, let's do so. + * Also support 16 bit size aligned reads, just in case. + */ +static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr) { - fprintf(stderr, "MSI-X: only dword read is allowed!\n"); - return 0; + PCIDevice *dev = opaque; + unsigned int offset = addr & (MSIX_PAGE_SIZE - 1) & ~0x1; + void *page = dev->msix_table_page; + + return pci_get_word(page + offset); +} + +static uint32_t msix_mmio_readb(void *opaque, target_phys_addr_t addr) +{ + PCIDevice *dev = opaque; + unsigned int offset = addr & (MSIX_PAGE_SIZE - 1); + void *page = dev->msix_table_page; + + return pci_get_byte(page + offset); } static uint8_t msix_pending_mask(int vector) @@ -192,6 +210,11 @@ static void msix_mmio_writel(void *opaque, target_phys_addr_t addr, msix_handle_mask_update(dev, vector); } +/* PCI spec: + * For all accesses to MSI-X Table and MSI-X PBA fields, software must use + * aligned full DWORD or aligned full QWORD transactions; otherwise, the result + * is undefined. + */ static void msix_mmio_write_unallowed(void *opaque, target_phys_addr_t addr, uint32_t val) { @@ -203,7 +226,7 @@ static CPUWriteMemoryFunc * const msix_mmio_write[] = { }; static CPUReadMemoryFunc * const msix_mmio_read[] = { - msix_mmio_read_unallowed, msix_mmio_read_unallowed, msix_mmio_readl + msix_mmio_readb, msix_mmio_readw, msix_mmio_readl }; /* Should be called from device's map method. */