Message ID | 1339773030-1317-2-git-send-email-borntraeger@de.ibm.com |
---|---|
State | New |
Headers | show |
On 2012-06-15 17:10, Christian Borntraeger wrote: > By default qemu will use MAP_PRIVATE for guest pages. This will write > protect pages and thus break on s390 systems that dont support this feature. > Therefore qemu has a hack to always use MAP_SHARED for s390. But MAP_SHARED > has other problems (no dirty pages tracking, a lot more swap overhead etc.) > Newer systems allow the distinction via KVM_CAP_S390_COW. With this feature > qemu can use the standard qemu alloc if available, otherwise it will use > the old s390 hack. > > Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> > Signed-off-by: Jens Freimann <jfrei@linux.vnet.ibm.com> > --- > exec.c | 18 +++--------------- > kvm-all.c | 13 +++++++++++++ > kvm.h | 2 ++ > oslib-posix.c | 3 +++ > target-s390x/kvm.c | 35 +++++++++++++++++++++++++++++++++++ > 5 files changed, 56 insertions(+), 15 deletions(-) > > diff --git a/exec.c b/exec.c > index 5c9b762..584a484 100644 > --- a/exec.c > +++ b/exec.c > @@ -2647,26 +2647,14 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, > exit(1); > #endif > } else { > -#if defined(TARGET_S390X) && defined(CONFIG_KVM) > - /* S390 KVM requires the topmost vma of the RAM to be smaller than > - an system defined value, which is at least 256GB. Larger systems > - have larger values. We put the guest between the end of data > - segment (system break) and this value. We use 32GB as a base to > - have enough room for the system break to grow. */ > - new_block->host = mmap((void*)0x800000000, size, > - PROT_EXEC|PROT_READ|PROT_WRITE, > - MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0); > - if (new_block->host == MAP_FAILED) { > - fprintf(stderr, "Allocating RAM failed\n"); > - abort(); > - } > -#else > if (xen_enabled()) { > xen_ram_alloc(new_block->offset, size, mr); > + } else if (kvm_enabled()) { > + /* some s390/kvm configurations have special constraints */ > + new_block->host = kvm_vmalloc(size); > } else { > new_block->host = qemu_vmalloc(size); > } > -#endif > qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE); > } > } > diff --git a/kvm-all.c b/kvm-all.c > index 4ea7d85..0372f7a 100644 > --- a/kvm-all.c > +++ b/kvm-all.c > @@ -1653,6 +1653,19 @@ int kvm_allows_irq0_override(void) > return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing(); > } > > +void *kvm_vmalloc(ram_addr_t size) > +{ > +#ifdef TARGET_S390X > + void *mem; > + > + mem = kvm_arch_vmalloc(size); > + if (mem) { > + return mem; > + } > +#endif > + return qemu_vmalloc(size); > +} > + > void kvm_setup_guest_memory(void *start, size_t size) > { > if (!kvm_has_sync_mmu()) { > diff --git a/kvm.h b/kvm.h > index 9c7b0ea..ddc7c53 100644 > --- a/kvm.h > +++ b/kvm.h > @@ -70,6 +70,8 @@ int kvm_init_vcpu(CPUArchState *env); > int kvm_cpu_exec(CPUArchState *env); > > #if !defined(CONFIG_USER_ONLY) > +void *kvm_vmalloc(ram_addr_t size); > +void *kvm_arch_vmalloc(ram_addr_t size); > void kvm_setup_guest_memory(void *start, size_t size); > > int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size); > diff --git a/oslib-posix.c b/oslib-posix.c > index b6a3c7f..93902ac 100644 > --- a/oslib-posix.c > +++ b/oslib-posix.c > @@ -41,6 +41,9 @@ extern int daemon(int, int); > therefore we need special code which handles running on Valgrind. */ > # define QEMU_VMALLOC_ALIGN (512 * 4096) > # define CONFIG_VALGRIND > +#elif defined(__linux__) && defined(__s390x__) > + /* Use 1 MiB (segment size) alignment so gmap can be used by KVM. */ > +# define QEMU_VMALLOC_ALIGN (256 * 4096) > #else > # define QEMU_VMALLOC_ALIGN getpagesize() > #endif > diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c > index 5800fd6..4d38820 100644 > --- a/target-s390x/kvm.c > +++ b/target-s390x/kvm.c > @@ -135,6 +135,41 @@ int kvm_arch_get_registers(CPUS390XState *env) > return 0; > } > > +/* > + * Legacy layout for s390: > + * Older S390 KVM requires the topmost vma of the RAM to be > + * smaller than an system defined value, which is at least 256GB. > + * Larger systems have larger values. We put the guest between > + * the end of data segment (system break) and this value. We > + * use 32GB as a base to have enough room for the system break > + * to grow. We also have to use MAP parameters that avoid > + * read-only mapping of guest pages. > + */ > +static void *legacy_s390_alloc(ram_addr_t size) > +{ > + void *mem; > + > + mem = mmap((void *) 0x800000000ULL, size, > + PROT_EXEC|PROT_READ|PROT_WRITE, > + MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0); > + if (mem == MAP_FAILED) { > + fprintf(stderr, "Allocating RAM failed\n"); > + abort(); > + } > + return mem; > +} > + > +void *kvm_arch_vmalloc(ram_addr_t size) > +{ > + /* Can we use the standard allocation ? */ > + if (kvm_check_extension(kvm_state, KVM_CAP_S390_GMAP) && > + kvm_check_extension(kvm_state, KVM_CAP_S390_COW)) { > + return NULL; > + } else { > + return legacy_s390_alloc(size); > + } > +} > + > int kvm_arch_insert_sw_breakpoint(CPUS390XState *env, struct kvm_sw_breakpoint *bp) > { > static const uint8_t diag_501[] = {0x83, 0x24, 0x05, 0x01}; Looks good to me. Jan
On 15.06.2012, at 17:10, Christian Borntraeger wrote: > By default qemu will use MAP_PRIVATE for guest pages. This will write > protect pages and thus break on s390 systems that dont support this feature. > Therefore qemu has a hack to always use MAP_SHARED for s390. But MAP_SHARED > has other problems (no dirty pages tracking, a lot more swap overhead etc.) > Newer systems allow the distinction via KVM_CAP_S390_COW. With this feature > qemu can use the standard qemu alloc if available, otherwise it will use > the old s390 hack. Thanks, applied to s390-next. Alex
diff --git a/exec.c b/exec.c index 5c9b762..584a484 100644 --- a/exec.c +++ b/exec.c @@ -2647,26 +2647,14 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, exit(1); #endif } else { -#if defined(TARGET_S390X) && defined(CONFIG_KVM) - /* S390 KVM requires the topmost vma of the RAM to be smaller than - an system defined value, which is at least 256GB. Larger systems - have larger values. We put the guest between the end of data - segment (system break) and this value. We use 32GB as a base to - have enough room for the system break to grow. */ - new_block->host = mmap((void*)0x800000000, size, - PROT_EXEC|PROT_READ|PROT_WRITE, - MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0); - if (new_block->host == MAP_FAILED) { - fprintf(stderr, "Allocating RAM failed\n"); - abort(); - } -#else if (xen_enabled()) { xen_ram_alloc(new_block->offset, size, mr); + } else if (kvm_enabled()) { + /* some s390/kvm configurations have special constraints */ + new_block->host = kvm_vmalloc(size); } else { new_block->host = qemu_vmalloc(size); } -#endif qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE); } } diff --git a/kvm-all.c b/kvm-all.c index 4ea7d85..0372f7a 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -1653,6 +1653,19 @@ int kvm_allows_irq0_override(void) return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing(); } +void *kvm_vmalloc(ram_addr_t size) +{ +#ifdef TARGET_S390X + void *mem; + + mem = kvm_arch_vmalloc(size); + if (mem) { + return mem; + } +#endif + return qemu_vmalloc(size); +} + void kvm_setup_guest_memory(void *start, size_t size) { if (!kvm_has_sync_mmu()) { diff --git a/kvm.h b/kvm.h index 9c7b0ea..ddc7c53 100644 --- a/kvm.h +++ b/kvm.h @@ -70,6 +70,8 @@ int kvm_init_vcpu(CPUArchState *env); int kvm_cpu_exec(CPUArchState *env); #if !defined(CONFIG_USER_ONLY) +void *kvm_vmalloc(ram_addr_t size); +void *kvm_arch_vmalloc(ram_addr_t size); void kvm_setup_guest_memory(void *start, size_t size); int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size); diff --git a/oslib-posix.c b/oslib-posix.c index b6a3c7f..93902ac 100644 --- a/oslib-posix.c +++ b/oslib-posix.c @@ -41,6 +41,9 @@ extern int daemon(int, int); therefore we need special code which handles running on Valgrind. */ # define QEMU_VMALLOC_ALIGN (512 * 4096) # define CONFIG_VALGRIND +#elif defined(__linux__) && defined(__s390x__) + /* Use 1 MiB (segment size) alignment so gmap can be used by KVM. */ +# define QEMU_VMALLOC_ALIGN (256 * 4096) #else # define QEMU_VMALLOC_ALIGN getpagesize() #endif diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c index 5800fd6..4d38820 100644 --- a/target-s390x/kvm.c +++ b/target-s390x/kvm.c @@ -135,6 +135,41 @@ int kvm_arch_get_registers(CPUS390XState *env) return 0; } +/* + * Legacy layout for s390: + * Older S390 KVM requires the topmost vma of the RAM to be + * smaller than an system defined value, which is at least 256GB. + * Larger systems have larger values. We put the guest between + * the end of data segment (system break) and this value. We + * use 32GB as a base to have enough room for the system break + * to grow. We also have to use MAP parameters that avoid + * read-only mapping of guest pages. + */ +static void *legacy_s390_alloc(ram_addr_t size) +{ + void *mem; + + mem = mmap((void *) 0x800000000ULL, size, + PROT_EXEC|PROT_READ|PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + if (mem == MAP_FAILED) { + fprintf(stderr, "Allocating RAM failed\n"); + abort(); + } + return mem; +} + +void *kvm_arch_vmalloc(ram_addr_t size) +{ + /* Can we use the standard allocation ? */ + if (kvm_check_extension(kvm_state, KVM_CAP_S390_GMAP) && + kvm_check_extension(kvm_state, KVM_CAP_S390_COW)) { + return NULL; + } else { + return legacy_s390_alloc(size); + } +} + int kvm_arch_insert_sw_breakpoint(CPUS390XState *env, struct kvm_sw_breakpoint *bp) { static const uint8_t diag_501[] = {0x83, 0x24, 0x05, 0x01};