Message ID | 1487659615-15820-1-git-send-email-xyjxie@linux.vnet.ibm.com |
---|---|
State | New |
Headers | show |
On Tue, 21 Feb 2017 14:46:55 +0800 Yongji Xie <xyjxie@linux.vnet.ibm.com> wrote: > At the moment ram device's memory regions are NATIVE_ENDIAN. This does > not work on PPC64 because VFIO PCI device is little endian but PPC64 > always defines static macro TARGET_WORDS_BIGENDIAN. > > This fixes endianness for ram device the same way as it is done > for VFIO region in commit 6758008e2c4e79fb6bd04fe8e7a41665fa583965. The referenced commit was to vfio code where the endianness is fixed, here you're modifying shared generic code to assume the same endianness as vfio. That seems wrong. Should memory_region_init_ram_device_ptr() instead take an endian option and ram_device_mem_ops should take the backing endianness into account? Thanks, Alex > Signed-off-by: Yongji Xie <xyjxie@linux.vnet.ibm.com> > --- > memory.c | 14 +++++++------- > 1 files changed, 7 insertions(+), 7 deletions(-) > > diff --git a/memory.c b/memory.c > index 6c58373..1ccb99f 100644 > --- a/memory.c > +++ b/memory.c > @@ -1139,13 +1139,13 @@ static uint64_t memory_region_ram_device_read(void *opaque, > data = *(uint8_t *)(mr->ram_block->host + addr); > break; > case 2: > - data = *(uint16_t *)(mr->ram_block->host + addr); > + data = le16_to_cpu(*(uint16_t *)(mr->ram_block->host + addr)); > break; > case 4: > - data = *(uint32_t *)(mr->ram_block->host + addr); > + data = le32_to_cpu(*(uint32_t *)(mr->ram_block->host + addr)); > break; > case 8: > - data = *(uint64_t *)(mr->ram_block->host + addr); > + data = le64_to_cpu(*(uint64_t *)(mr->ram_block->host + addr)); > break; > } > > @@ -1166,13 +1166,13 @@ static void memory_region_ram_device_write(void *opaque, hwaddr addr, > *(uint8_t *)(mr->ram_block->host + addr) = (uint8_t)data; > break; > case 2: > - *(uint16_t *)(mr->ram_block->host + addr) = (uint16_t)data; > + *(uint16_t *)(mr->ram_block->host + addr) = cpu_to_le16((uint16_t)data); > break; > case 4: > - *(uint32_t *)(mr->ram_block->host + addr) = (uint32_t)data; > + *(uint32_t *)(mr->ram_block->host + addr) = cpu_to_le32((uint32_t)data); > break; > case 8: > - *(uint64_t *)(mr->ram_block->host + addr) = data; > + *(uint64_t *)(mr->ram_block->host + addr) = cpu_to_le64(data); > break; > } > } > @@ -1180,7 +1180,7 @@ static void memory_region_ram_device_write(void *opaque, hwaddr addr, > static const MemoryRegionOps ram_device_mem_ops = { > .read = memory_region_ram_device_read, > .write = memory_region_ram_device_write, > - .endianness = DEVICE_NATIVE_ENDIAN, > + .endianness = DEVICE_LITTLE_ENDIAN, > .valid = { > .min_access_size = 1, > .max_access_size = 8,
On 21/02/2017 17:21, Alex Williamson wrote: > On Tue, 21 Feb 2017 14:46:55 +0800 > Yongji Xie <xyjxie@linux.vnet.ibm.com> wrote: > >> At the moment ram device's memory regions are NATIVE_ENDIAN. This does >> not work on PPC64 because VFIO PCI device is little endian but PPC64 >> always defines static macro TARGET_WORDS_BIGENDIAN. >> >> This fixes endianness for ram device the same way as it is done >> for VFIO region in commit 6758008e2c4e79fb6bd04fe8e7a41665fa583965. > > The referenced commit was to vfio code where the endianness is fixed, > here you're modifying shared generic code to assume the same > endianness as vfio. That seems wrong. Is the goal to have the same endianness as VFIO? Or is it just a trick to ensure the number of swaps is always 0 or 2, so that they cancel away? In other words, would Yongji's patch just work if it used DEVICE_BIG_ENDIAN and beNN_to_cpu/cpu_to_beNN? If so, then I think the patch is okay. Paolo > > Alex > >> Signed-off-by: Yongji Xie <xyjxie@linux.vnet.ibm.com> >> --- >> memory.c | 14 +++++++------- >> 1 files changed, 7 insertions(+), 7 deletions(-) >> >> diff --git a/memory.c b/memory.c >> index 6c58373..1ccb99f 100644 >> --- a/memory.c >> +++ b/memory.c >> @@ -1139,13 +1139,13 @@ static uint64_t memory_region_ram_device_read(void *opaque, >> data = *(uint8_t *)(mr->ram_block->host + addr); >> break; >> case 2: >> - data = *(uint16_t *)(mr->ram_block->host + addr); >> + data = le16_to_cpu(*(uint16_t *)(mr->ram_block->host + addr)); >> break; >> case 4: >> - data = *(uint32_t *)(mr->ram_block->host + addr); >> + data = le32_to_cpu(*(uint32_t *)(mr->ram_block->host + addr)); >> break; >> case 8: >> - data = *(uint64_t *)(mr->ram_block->host + addr); >> + data = le64_to_cpu(*(uint64_t *)(mr->ram_block->host + addr)); >> break; >> } >> >> @@ -1166,13 +1166,13 @@ static void memory_region_ram_device_write(void *opaque, hwaddr addr, >> *(uint8_t *)(mr->ram_block->host + addr) = (uint8_t)data; >> break; >> case 2: >> - *(uint16_t *)(mr->ram_block->host + addr) = (uint16_t)data; >> + *(uint16_t *)(mr->ram_block->host + addr) = cpu_to_le16((uint16_t)data); >> break; >> case 4: >> - *(uint32_t *)(mr->ram_block->host + addr) = (uint32_t)data; >> + *(uint32_t *)(mr->ram_block->host + addr) = cpu_to_le32((uint32_t)data); >> break; >> case 8: >> - *(uint64_t *)(mr->ram_block->host + addr) = data; >> + *(uint64_t *)(mr->ram_block->host + addr) = cpu_to_le64(data); >> break; >> } >> } >> @@ -1180,7 +1180,7 @@ static void memory_region_ram_device_write(void *opaque, hwaddr addr, >> static const MemoryRegionOps ram_device_mem_ops = { >> .read = memory_region_ram_device_read, >> .write = memory_region_ram_device_write, >> - .endianness = DEVICE_NATIVE_ENDIAN, >> + .endianness = DEVICE_LITTLE_ENDIAN, >> .valid = { >> .min_access_size = 1, >> .max_access_size = 8, >
On 21 February 2017 at 16:34, Paolo Bonzini <pbonzini@redhat.com> wrote: > > > On 21/02/2017 17:21, Alex Williamson wrote: >> On Tue, 21 Feb 2017 14:46:55 +0800 >> Yongji Xie <xyjxie@linux.vnet.ibm.com> wrote: >> >>> At the moment ram device's memory regions are NATIVE_ENDIAN. This does >>> not work on PPC64 because VFIO PCI device is little endian but PPC64 >>> always defines static macro TARGET_WORDS_BIGENDIAN. >>> >>> This fixes endianness for ram device the same way as it is done >>> for VFIO region in commit 6758008e2c4e79fb6bd04fe8e7a41665fa583965. >> >> The referenced commit was to vfio code where the endianness is fixed, >> here you're modifying shared generic code to assume the same >> endianness as vfio. That seems wrong. > > Is the goal to have the same endianness as VFIO? Or is it just a trick > to ensure the number of swaps is always 0 or 2, so that they cancel away? > > In other words, would Yongji's patch just work if it used > DEVICE_BIG_ENDIAN and beNN_to_cpu/cpu_to_beNN? If so, then I think the > patch is okay. I think any patch that proposes adding or removing one or more endianness related swaps should come with a commit message that states very clearly why this exact point in the stack is the correct place to do these swaps, from a design point of view. (This is so we can avoid the pitfall of putting in enough swaps to cancel each other out but at the wrong point in the design.) In this instance I don't understand the patch. The ram_device mem-ops are there to deal with memory regions backed by a lump of RAM, right? Lumps of memory are always the endianness of the host CPU by definition, so DEVICE_NATIVE_ENDIAN and no swapping in the accessors seems like it ought to be the right thing... thanks -- PMM
On Tue, 21 Feb 2017 18:09:04 +0000 Peter Maydell <peter.maydell@linaro.org> wrote: > On 21 February 2017 at 16:34, Paolo Bonzini <pbonzini@redhat.com> wrote: > > > > > > On 21/02/2017 17:21, Alex Williamson wrote: > >> On Tue, 21 Feb 2017 14:46:55 +0800 > >> Yongji Xie <xyjxie@linux.vnet.ibm.com> wrote: > >> > >>> At the moment ram device's memory regions are NATIVE_ENDIAN. This does > >>> not work on PPC64 because VFIO PCI device is little endian but PPC64 > >>> always defines static macro TARGET_WORDS_BIGENDIAN. > >>> > >>> This fixes endianness for ram device the same way as it is done > >>> for VFIO region in commit 6758008e2c4e79fb6bd04fe8e7a41665fa583965. > >> > >> The referenced commit was to vfio code where the endianness is fixed, > >> here you're modifying shared generic code to assume the same > >> endianness as vfio. That seems wrong. > > > > Is the goal to have the same endianness as VFIO? Or is it just a trick > > to ensure the number of swaps is always 0 or 2, so that they cancel away? > > > > In other words, would Yongji's patch just work if it used > > DEVICE_BIG_ENDIAN and beNN_to_cpu/cpu_to_beNN? If so, then I think the > > patch is okay. > > I think any patch that proposes adding or removing one or more > endianness related swaps should come with a commit message that > states very clearly why this exact point in the stack is the > correct place to do these swaps, from a design point of view. > (This is so we can avoid the pitfall of putting in enough swaps > to cancel each other out but at the wrong point in the design.) > > In this instance I don't understand the patch. The ram_device > mem-ops are there to deal with memory regions backed by a > lump of RAM, right? Lumps of memory are always the endianness > of the host CPU by definition, so DEVICE_NATIVE_ENDIAN and > no swapping in the accessors seems like it ought to be the right > thing... I'm glad I'm not the only one confused. The clarity I can add is that for vfio, we define that any read/write to the vfio file descriptor is little endian. The kernel does cpu_to_leXX/leXX_to_cpu around the ioreadXX/iowriteXX calls. For better or worse, this gets us around the implicit CPU endianness of the data there and gives us a standard endianness. mmaps are of course device native endian, but the point of the ram device was that some devices (realtek NICs) don't respond well to using memcpy through the mmap and we're better off using explicit reads and writes. The part where I get lost is that if PPC64 always sets the target to big endian, then adjust_endianness() we will always bswap after a read and before a write. I don't follow how that bswap necessarily gets us to QEMU CPU endian such that the cpu_to_leXX/leXX_to_cpu in the access functions always do the right thing. I know we do this elsewhere in vfio, perhaps I've deferred to someone convincing me it was the right thing at the time, but I'm not able to derive it myself currently. To answer Paolo's question, if PPC64 sets TARGET_WORDS_BIGENDIAN, which is a compile time setting, then using DEVICE_BIG_ENDIAN would deterministically remove the bswap from adjust_endianness(), but I don't know how anything is deterministic about CPU-relative byte swaps since (I think) PPC64 could actually be running in either. +1 to being very explicit about which swaps are occurring where. Thanks, Alex
On 21/02/2017 19:09, Peter Maydell wrote: > In this instance I don't understand the patch. The ram_device > mem-ops are there to deal with memory regions backed by a > lump of RAM, right? Lumps of memory are always the endianness > of the host CPU by definition, so DEVICE_NATIVE_ENDIAN and > no swapping in the accessors seems like it ought to be the right > thing... DEVICE_NATIVE_ENDIAN is the endianness of the target CPU however. So perhaps we need to add DEVICE_HOST_ENDIAN instead? Paolo
On 21 February 2017 at 18:53, Paolo Bonzini <pbonzini@redhat.com> wrote: > On 21/02/2017 19:09, Peter Maydell wrote: >> In this instance I don't understand the patch. The ram_device >> mem-ops are there to deal with memory regions backed by a >> lump of RAM, right? Lumps of memory are always the endianness >> of the host CPU by definition, so DEVICE_NATIVE_ENDIAN and >> no swapping in the accessors seems like it ought to be the right >> thing... > > DEVICE_NATIVE_ENDIAN is the endianness of the target CPU however. ...I meant 'target CPU' there (at any rate, endianness of the bus interface between target CPU and its RAM; CPUs like ARM with runtime configurable endianness do it by swapping data before it hits the bus interface, conceptually speaking). Endianness always confuses me, though (I think I understand it and then I realize I don't; I also have trouble telling whether I understand it and am discussing it with somebody who doesn't understand it, or whether I don't understand it and they do...) thanks -- PMM
on 2017/2/22 2:44, Alex Williamson wrote: > On Tue, 21 Feb 2017 18:09:04 +0000 > Peter Maydell <peter.maydell@linaro.org> wrote: > >> On 21 February 2017 at 16:34, Paolo Bonzini <pbonzini@redhat.com> wrote: >>> >>> On 21/02/2017 17:21, Alex Williamson wrote: >>>> On Tue, 21 Feb 2017 14:46:55 +0800 >>>> Yongji Xie <xyjxie@linux.vnet.ibm.com> wrote: >>>> >>>>> At the moment ram device's memory regions are NATIVE_ENDIAN. This does >>>>> not work on PPC64 because VFIO PCI device is little endian but PPC64 >>>>> always defines static macro TARGET_WORDS_BIGENDIAN. >>>>> >>>>> This fixes endianness for ram device the same way as it is done >>>>> for VFIO region in commit 6758008e2c4e79fb6bd04fe8e7a41665fa583965. >>>> The referenced commit was to vfio code where the endianness is fixed, >>>> here you're modifying shared generic code to assume the same >>>> endianness as vfio. That seems wrong. >>> Is the goal to have the same endianness as VFIO? Or is it just a trick >>> to ensure the number of swaps is always 0 or 2, so that they cancel away? >>> >>> In other words, would Yongji's patch just work if it used >>> DEVICE_BIG_ENDIAN and beNN_to_cpu/cpu_to_beNN? If so, then I think the >>> patch is okay. >> I think any patch that proposes adding or removing one or more >> endianness related swaps should come with a commit message that >> states very clearly why this exact point in the stack is the >> correct place to do these swaps, from a design point of view. >> (This is so we can avoid the pitfall of putting in enough swaps >> to cancel each other out but at the wrong point in the design.) >> >> In this instance I don't understand the patch. The ram_device >> mem-ops are there to deal with memory regions backed by a >> lump of RAM, right? Lumps of memory are always the endianness >> of the host CPU by definition, so DEVICE_NATIVE_ENDIAN and >> no swapping in the accessors seems like it ought to be the right >> thing... > I'm glad I'm not the only one confused. The clarity I can add is that > for vfio, we define that any read/write to the vfio file descriptor is > little endian. The kernel does cpu_to_leXX/leXX_to_cpu around the > ioreadXX/iowriteXX calls. For better or worse, this gets us around the > implicit CPU endianness of the data there and gives us a standard > endianness. mmaps are of course device native endian, but the point of > the ram device was that some devices (realtek NICs) don't respond well > to using memcpy through the mmap and we're better off using explicit > reads and writes. > > The part where I get lost is that if PPC64 always sets the target > to big endian, then adjust_endianness() we will always bswap after a > read and before a write. I don't follow how that bswap necessarily > gets us to QEMU CPU endian such that the cpu_to_leXX/leXX_to_cpu in the > access functions always do the right thing. I know we do this > elsewhere in vfio, perhaps I've deferred to someone convincing me it > was the right thing at the time, but I'm not able to derive it myself > currently. > > To answer Paolo's question, if PPC64 sets TARGET_WORDS_BIGENDIAN, which > is a compile time setting, then using DEVICE_BIG_ENDIAN would > deterministically remove the bswap from adjust_endianness(), but I > don't know how anything is deterministic about CPU-relative byte swaps > since (I think) PPC64 could actually be running in either. +1 to being > very explicit about which swaps are occurring where. Thanks, > Actually I also have some confusions about the endianness during fixing this issue... I assume that kvm_run->mmio.data is stored as the endianness of device that guest try to accesss. For one mmio write, we may do the *first* bswap in address_space_write_continue(). The ldX_p() will do the bswap when target and host endianness are different. Then adjust_endianness() may do the *second* bswap when target and device endianness are different. Here are all possible cases: run->mmio.data device target host for mr->ops->write() ============================================================ little native big big little little native little little little little native big little big (this is the wrong case we found) little native little big big ============================================================== little little big big big (could be fixed by cpu_to_leXX) little little little little little little little big little little little little little big big (could be fixed by cpu_to_leXX) ============================================================== big native big big big big native little little big big native big little little big native little big little ============================================================== big big big big big big big little little little (could be fixed by cpu_to_beXX) big big big little little (could be fixed by cpu_to_beXX) big big little big big We can easily find that native_endian device doesn't work when target and host endianness are different. So I fix it like this patch does. But I'm wrong to assume the same endianness as vfio in this patch like Alex said. So maybe we should consider an endian option in memory_region_init_ram_device_ptr(). Back to my confusion, I failed to understand the objective of those two swaps. Is it possible to pass the run->mmio.data directly to mr->ops->write()? Seems like those two swaps are based on the fact that run->mmio.data is stored as host-endian rather than the endianness of device that guest try to accesss. Am I missed something important here? Thanks, Yongji
diff --git a/memory.c b/memory.c index 6c58373..1ccb99f 100644 --- a/memory.c +++ b/memory.c @@ -1139,13 +1139,13 @@ static uint64_t memory_region_ram_device_read(void *opaque, data = *(uint8_t *)(mr->ram_block->host + addr); break; case 2: - data = *(uint16_t *)(mr->ram_block->host + addr); + data = le16_to_cpu(*(uint16_t *)(mr->ram_block->host + addr)); break; case 4: - data = *(uint32_t *)(mr->ram_block->host + addr); + data = le32_to_cpu(*(uint32_t *)(mr->ram_block->host + addr)); break; case 8: - data = *(uint64_t *)(mr->ram_block->host + addr); + data = le64_to_cpu(*(uint64_t *)(mr->ram_block->host + addr)); break; } @@ -1166,13 +1166,13 @@ static void memory_region_ram_device_write(void *opaque, hwaddr addr, *(uint8_t *)(mr->ram_block->host + addr) = (uint8_t)data; break; case 2: - *(uint16_t *)(mr->ram_block->host + addr) = (uint16_t)data; + *(uint16_t *)(mr->ram_block->host + addr) = cpu_to_le16((uint16_t)data); break; case 4: - *(uint32_t *)(mr->ram_block->host + addr) = (uint32_t)data; + *(uint32_t *)(mr->ram_block->host + addr) = cpu_to_le32((uint32_t)data); break; case 8: - *(uint64_t *)(mr->ram_block->host + addr) = data; + *(uint64_t *)(mr->ram_block->host + addr) = cpu_to_le64(data); break; } } @@ -1180,7 +1180,7 @@ static void memory_region_ram_device_write(void *opaque, hwaddr addr, static const MemoryRegionOps ram_device_mem_ops = { .read = memory_region_ram_device_read, .write = memory_region_ram_device_write, - .endianness = DEVICE_NATIVE_ENDIAN, + .endianness = DEVICE_LITTLE_ENDIAN, .valid = { .min_access_size = 1, .max_access_size = 8,
At the moment ram device's memory regions are NATIVE_ENDIAN. This does not work on PPC64 because VFIO PCI device is little endian but PPC64 always defines static macro TARGET_WORDS_BIGENDIAN. This fixes endianness for ram device the same way as it is done for VFIO region in commit 6758008e2c4e79fb6bd04fe8e7a41665fa583965. Signed-off-by: Yongji Xie <xyjxie@linux.vnet.ibm.com> --- memory.c | 14 +++++++------- 1 files changed, 7 insertions(+), 7 deletions(-)