Message ID | 20100406111818.GA17820@redhat.com |
---|---|
State | New |
Headers | show |
On Tue, Apr 06, 2010 at 02:18:19PM +0300, Michael S. Tsirkin wrote: > exec.c has a comment 'XXX: optimize' for lduw_phys/stw_phys, > so let's do it, along the lines of stl_phys. > > The reason to address 16 bit accesses specifically is that virtio relies > on these accesses to be done atomically, using memset as we do now > breaks this assumption, which is reported to cause qemu with kvm > to read wrong index values under stress. > > https://bugzilla.redhat.com/show_bug.cgi?id=525323 The patch looks ok in principle, but I am worried by the fact it is mostly a copy and paste of ldl_phys() and stl_phys(). Wouldn't it be possible to factorize the code a bit, maybe using macros? > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > --- > exec.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++------ > 1 files changed, 60 insertions(+), 7 deletions(-) > > diff --git a/exec.c b/exec.c > index 33854e1..262c255 100644 > --- a/exec.c > +++ b/exec.c > @@ -3788,12 +3788,36 @@ uint32_t ldub_phys(target_phys_addr_t addr) > return val; > } > > -/* XXX: optimize */ > +/* warning: addr must be aligned */ > uint32_t lduw_phys(target_phys_addr_t addr) > { > - uint16_t val; > - cpu_physical_memory_read(addr, (uint8_t *)&val, 2); > - return tswap16(val); > + int io_index; > + uint8_t *ptr; > + uint64_t val; > + unsigned long pd; > + PhysPageDesc *p; > + > + p = phys_page_find(addr >> TARGET_PAGE_BITS); > + if (!p) { > + pd = IO_MEM_UNASSIGNED; > + } else { > + pd = p->phys_offset; > + } > + > + if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && > + !(pd & IO_MEM_ROMD)) { > + /* I/O case */ > + io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1); > + if (p) > + addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset; > + val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr); > + } else { > + /* RAM case */ > + ptr = phys_ram_base + (pd & TARGET_PAGE_MASK) + > + (addr & ~TARGET_PAGE_MASK); > + val = lduw_p(ptr); > + } > + return val; > } > > /* warning: addr must be aligned. The ram page is not masked as dirty > @@ -3910,11 +3934,40 @@ void stb_phys(target_phys_addr_t addr, uint32_t val) > cpu_physical_memory_write(addr, &v, 1); > } > > -/* XXX: optimize */ > +/* warning: addr must be aligned */ > void stw_phys(target_phys_addr_t addr, uint32_t val) > { > - uint16_t v = tswap16(val); > - cpu_physical_memory_write(addr, (const uint8_t *)&v, 2); > + int io_index; > + uint8_t *ptr; > + unsigned long pd; > + PhysPageDesc *p; > + > + p = phys_page_find(addr >> TARGET_PAGE_BITS); > + if (!p) { > + pd = IO_MEM_UNASSIGNED; > + } else { > + pd = p->phys_offset; > + } > + > + if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) { > + io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1); > + if (p) > + addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset; > + io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val); > + } else { > + unsigned long addr1; > + addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK); > + /* RAM case */ > + ptr = phys_ram_base + addr1; > + stw_p(ptr, val); > + if (!cpu_physical_memory_is_dirty(addr1)) { > + /* invalidate code */ > + tb_invalidate_phys_page_range(addr1, addr1 + 2, 0); > + /* set dirty bit */ > + phys_ram_dirty[addr1 >> TARGET_PAGE_BITS] |= > + (0xff & ~CODE_DIRTY_FLAG); > + } > + } > } > > /* XXX: optimize */ > -- > 1.7.0.2.280.gc6f05 > > >
On Sat, Apr 10, 2010 at 12:33:53AM +0200, Aurelien Jarno wrote: > On Tue, Apr 06, 2010 at 02:18:19PM +0300, Michael S. Tsirkin wrote: > > exec.c has a comment 'XXX: optimize' for lduw_phys/stw_phys, > > so let's do it, along the lines of stl_phys. > > > > The reason to address 16 bit accesses specifically is that virtio relies > > on these accesses to be done atomically, using memset as we do now > > breaks this assumption, which is reported to cause qemu with kvm > > to read wrong index values under stress. > > > > https://bugzilla.redhat.com/show_bug.cgi?id=525323 > > The patch looks ok in principle, but I am worried by the fact it is > mostly a copy and paste of ldl_phys() and stl_phys(). Wouldn't it be > possible to factorize the code a bit, maybe using macros? I'm not sure 30-line macros are such a good idea. Patches wellcome though. > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > --- > > exec.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++------ > > 1 files changed, 60 insertions(+), 7 deletions(-) > > > > diff --git a/exec.c b/exec.c > > index 33854e1..262c255 100644 > > --- a/exec.c > > +++ b/exec.c > > @@ -3788,12 +3788,36 @@ uint32_t ldub_phys(target_phys_addr_t addr) > > return val; > > } > > > > -/* XXX: optimize */ > > +/* warning: addr must be aligned */ > > uint32_t lduw_phys(target_phys_addr_t addr) > > { > > - uint16_t val; > > - cpu_physical_memory_read(addr, (uint8_t *)&val, 2); > > - return tswap16(val); > > + int io_index; > > + uint8_t *ptr; > > + uint64_t val; > > + unsigned long pd; > > + PhysPageDesc *p; > > + > > + p = phys_page_find(addr >> TARGET_PAGE_BITS); > > + if (!p) { > > + pd = IO_MEM_UNASSIGNED; > > + } else { > > + pd = p->phys_offset; > > + } > > + > > + if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && > > + !(pd & IO_MEM_ROMD)) { > > + /* I/O case */ > > + io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1); > > + if (p) > > + addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset; > > + val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr); > > + } else { > > + /* RAM case */ > > + ptr = phys_ram_base + (pd & TARGET_PAGE_MASK) + > > + (addr & ~TARGET_PAGE_MASK); > > + val = lduw_p(ptr); > > + } > > + return val; > > } > > > > /* warning: addr must be aligned. The ram page is not masked as dirty > > @@ -3910,11 +3934,40 @@ void stb_phys(target_phys_addr_t addr, uint32_t val) > > cpu_physical_memory_write(addr, &v, 1); > > } > > > > -/* XXX: optimize */ > > +/* warning: addr must be aligned */ > > void stw_phys(target_phys_addr_t addr, uint32_t val) > > { > > - uint16_t v = tswap16(val); > > - cpu_physical_memory_write(addr, (const uint8_t *)&v, 2); > > + int io_index; > > + uint8_t *ptr; > > + unsigned long pd; > > + PhysPageDesc *p; > > + > > + p = phys_page_find(addr >> TARGET_PAGE_BITS); > > + if (!p) { > > + pd = IO_MEM_UNASSIGNED; > > + } else { > > + pd = p->phys_offset; > > + } > > + > > + if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) { > > + io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1); > > + if (p) > > + addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset; > > + io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val); > > + } else { > > + unsigned long addr1; > > + addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK); > > + /* RAM case */ > > + ptr = phys_ram_base + addr1; > > + stw_p(ptr, val); > > + if (!cpu_physical_memory_is_dirty(addr1)) { > > + /* invalidate code */ > > + tb_invalidate_phys_page_range(addr1, addr1 + 2, 0); > > + /* set dirty bit */ > > + phys_ram_dirty[addr1 >> TARGET_PAGE_BITS] |= > > + (0xff & ~CODE_DIRTY_FLAG); > > + } > > + } > > } > > > > /* XXX: optimize */ > > -- > > 1.7.0.2.280.gc6f05 > > > > > > > > -- > Aurelien Jarno GPG: 1024D/F1BCDB73 > aurelien@aurel32.net http://www.aurel32.net
On Tue, Apr 06, 2010 at 02:18:19PM +0300, Michael S. Tsirkin wrote: > exec.c has a comment 'XXX: optimize' for lduw_phys/stw_phys, > so let's do it, along the lines of stl_phys. > > The reason to address 16 bit accesses specifically is that virtio relies > on these accesses to be done atomically, using memset as we do now > breaks this assumption, which is reported to cause qemu with kvm > to read wrong index values under stress. > > https://bugzilla.redhat.com/show_bug.cgi?id=525323 Thanks, applied. > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > --- > exec.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++------ > 1 files changed, 60 insertions(+), 7 deletions(-) > > diff --git a/exec.c b/exec.c > index 33854e1..262c255 100644 > --- a/exec.c > +++ b/exec.c > @@ -3788,12 +3788,36 @@ uint32_t ldub_phys(target_phys_addr_t addr) > return val; > } > > -/* XXX: optimize */ > +/* warning: addr must be aligned */ > uint32_t lduw_phys(target_phys_addr_t addr) > { > - uint16_t val; > - cpu_physical_memory_read(addr, (uint8_t *)&val, 2); > - return tswap16(val); > + int io_index; > + uint8_t *ptr; > + uint64_t val; > + unsigned long pd; > + PhysPageDesc *p; > + > + p = phys_page_find(addr >> TARGET_PAGE_BITS); > + if (!p) { > + pd = IO_MEM_UNASSIGNED; > + } else { > + pd = p->phys_offset; > + } > + > + if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && > + !(pd & IO_MEM_ROMD)) { > + /* I/O case */ > + io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1); > + if (p) > + addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset; > + val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr); > + } else { > + /* RAM case */ > + ptr = phys_ram_base + (pd & TARGET_PAGE_MASK) + > + (addr & ~TARGET_PAGE_MASK); > + val = lduw_p(ptr); > + } > + return val; > } > > /* warning: addr must be aligned. The ram page is not masked as dirty > @@ -3910,11 +3934,40 @@ void stb_phys(target_phys_addr_t addr, uint32_t val) > cpu_physical_memory_write(addr, &v, 1); > } > > -/* XXX: optimize */ > +/* warning: addr must be aligned */ > void stw_phys(target_phys_addr_t addr, uint32_t val) > { > - uint16_t v = tswap16(val); > - cpu_physical_memory_write(addr, (const uint8_t *)&v, 2); > + int io_index; > + uint8_t *ptr; > + unsigned long pd; > + PhysPageDesc *p; > + > + p = phys_page_find(addr >> TARGET_PAGE_BITS); > + if (!p) { > + pd = IO_MEM_UNASSIGNED; > + } else { > + pd = p->phys_offset; > + } > + > + if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) { > + io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1); > + if (p) > + addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset; > + io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val); > + } else { > + unsigned long addr1; > + addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK); > + /* RAM case */ > + ptr = phys_ram_base + addr1; > + stw_p(ptr, val); > + if (!cpu_physical_memory_is_dirty(addr1)) { > + /* invalidate code */ > + tb_invalidate_phys_page_range(addr1, addr1 + 2, 0); > + /* set dirty bit */ > + phys_ram_dirty[addr1 >> TARGET_PAGE_BITS] |= > + (0xff & ~CODE_DIRTY_FLAG); > + } > + } > } > > /* XXX: optimize */ > -- > 1.7.0.2.280.gc6f05 > > >
diff --git a/exec.c b/exec.c index 33854e1..262c255 100644 --- a/exec.c +++ b/exec.c @@ -3788,12 +3788,36 @@ uint32_t ldub_phys(target_phys_addr_t addr) return val; } -/* XXX: optimize */ +/* warning: addr must be aligned */ uint32_t lduw_phys(target_phys_addr_t addr) { - uint16_t val; - cpu_physical_memory_read(addr, (uint8_t *)&val, 2); - return tswap16(val); + int io_index; + uint8_t *ptr; + uint64_t val; + unsigned long pd; + PhysPageDesc *p; + + p = phys_page_find(addr >> TARGET_PAGE_BITS); + if (!p) { + pd = IO_MEM_UNASSIGNED; + } else { + pd = p->phys_offset; + } + + if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && + !(pd & IO_MEM_ROMD)) { + /* I/O case */ + io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1); + if (p) + addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset; + val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr); + } else { + /* RAM case */ + ptr = phys_ram_base + (pd & TARGET_PAGE_MASK) + + (addr & ~TARGET_PAGE_MASK); + val = lduw_p(ptr); + } + return val; } /* warning: addr must be aligned. The ram page is not masked as dirty @@ -3910,11 +3934,40 @@ void stb_phys(target_phys_addr_t addr, uint32_t val) cpu_physical_memory_write(addr, &v, 1); } -/* XXX: optimize */ +/* warning: addr must be aligned */ void stw_phys(target_phys_addr_t addr, uint32_t val) { - uint16_t v = tswap16(val); - cpu_physical_memory_write(addr, (const uint8_t *)&v, 2); + int io_index; + uint8_t *ptr; + unsigned long pd; + PhysPageDesc *p; + + p = phys_page_find(addr >> TARGET_PAGE_BITS); + if (!p) { + pd = IO_MEM_UNASSIGNED; + } else { + pd = p->phys_offset; + } + + if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) { + io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1); + if (p) + addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset; + io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val); + } else { + unsigned long addr1; + addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK); + /* RAM case */ + ptr = phys_ram_base + addr1; + stw_p(ptr, val); + if (!cpu_physical_memory_is_dirty(addr1)) { + /* invalidate code */ + tb_invalidate_phys_page_range(addr1, addr1 + 2, 0); + /* set dirty bit */ + phys_ram_dirty[addr1 >> TARGET_PAGE_BITS] |= + (0xff & ~CODE_DIRTY_FLAG); + } + } } /* XXX: optimize */
exec.c has a comment 'XXX: optimize' for lduw_phys/stw_phys, so let's do it, along the lines of stl_phys. The reason to address 16 bit accesses specifically is that virtio relies on these accesses to be done atomically, using memset as we do now breaks this assumption, which is reported to cause qemu with kvm to read wrong index values under stress. https://bugzilla.redhat.com/show_bug.cgi?id=525323 Signed-off-by: Michael S. Tsirkin <mst@redhat.com> --- exec.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 files changed, 60 insertions(+), 7 deletions(-)