diff mbox

[v3] s390: autodetect map private

Message ID 1339773030-1317-2-git-send-email-borntraeger@de.ibm.com
State New
Headers show

Commit Message

Christian Borntraeger June 15, 2012, 3:10 p.m. UTC
By default qemu will use MAP_PRIVATE for guest pages. This will write
protect pages and thus break on s390 systems that dont support this feature.
Therefore qemu has a hack to always use MAP_SHARED for s390. But MAP_SHARED
has other problems (no dirty pages tracking, a lot more swap overhead etc.)
Newer systems allow the distinction via KVM_CAP_S390_COW. With this feature
qemu can use the standard qemu alloc if available, otherwise it will use
the old s390 hack.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Jens Freimann <jfrei@linux.vnet.ibm.com>
---
 exec.c             |   18 +++---------------
 kvm-all.c          |   13 +++++++++++++
 kvm.h              |    2 ++
 oslib-posix.c      |    3 +++
 target-s390x/kvm.c |   35 +++++++++++++++++++++++++++++++++++
 5 files changed, 56 insertions(+), 15 deletions(-)

Comments

Jan Kiszka June 15, 2012, 5:01 p.m. UTC | #1
On 2012-06-15 17:10, Christian Borntraeger wrote:
> By default qemu will use MAP_PRIVATE for guest pages. This will write
> protect pages and thus break on s390 systems that dont support this feature.
> Therefore qemu has a hack to always use MAP_SHARED for s390. But MAP_SHARED
> has other problems (no dirty pages tracking, a lot more swap overhead etc.)
> Newer systems allow the distinction via KVM_CAP_S390_COW. With this feature
> qemu can use the standard qemu alloc if available, otherwise it will use
> the old s390 hack.
> 
> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
> Signed-off-by: Jens Freimann <jfrei@linux.vnet.ibm.com>
> ---
>  exec.c             |   18 +++---------------
>  kvm-all.c          |   13 +++++++++++++
>  kvm.h              |    2 ++
>  oslib-posix.c      |    3 +++
>  target-s390x/kvm.c |   35 +++++++++++++++++++++++++++++++++++
>  5 files changed, 56 insertions(+), 15 deletions(-)
> 
> diff --git a/exec.c b/exec.c
> index 5c9b762..584a484 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -2647,26 +2647,14 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
>              exit(1);
>  #endif
>          } else {
> -#if defined(TARGET_S390X) && defined(CONFIG_KVM)
> -            /* S390 KVM requires the topmost vma of the RAM to be smaller than
> -               an system defined value, which is at least 256GB. Larger systems
> -               have larger values. We put the guest between the end of data
> -               segment (system break) and this value. We use 32GB as a base to
> -               have enough room for the system break to grow. */
> -            new_block->host = mmap((void*)0x800000000, size,
> -                                   PROT_EXEC|PROT_READ|PROT_WRITE,
> -                                   MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
> -            if (new_block->host == MAP_FAILED) {
> -                fprintf(stderr, "Allocating RAM failed\n");
> -                abort();
> -            }
> -#else
>              if (xen_enabled()) {
>                  xen_ram_alloc(new_block->offset, size, mr);
> +            } else if (kvm_enabled()) {
> +                /* some s390/kvm configurations have special constraints */
> +                new_block->host = kvm_vmalloc(size);
>              } else {
>                  new_block->host = qemu_vmalloc(size);
>              }
> -#endif
>              qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
>          }
>      }
> diff --git a/kvm-all.c b/kvm-all.c
> index 4ea7d85..0372f7a 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -1653,6 +1653,19 @@ int kvm_allows_irq0_override(void)
>      return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing();
>  }
>  
> +void *kvm_vmalloc(ram_addr_t size)
> +{
> +#ifdef TARGET_S390X
> +    void *mem;
> +
> +    mem = kvm_arch_vmalloc(size);
> +    if (mem) {
> +        return mem;
> +    }
> +#endif
> +    return qemu_vmalloc(size);
> +}
> +
>  void kvm_setup_guest_memory(void *start, size_t size)
>  {
>      if (!kvm_has_sync_mmu()) {
> diff --git a/kvm.h b/kvm.h
> index 9c7b0ea..ddc7c53 100644
> --- a/kvm.h
> +++ b/kvm.h
> @@ -70,6 +70,8 @@ int kvm_init_vcpu(CPUArchState *env);
>  int kvm_cpu_exec(CPUArchState *env);
>  
>  #if !defined(CONFIG_USER_ONLY)
> +void *kvm_vmalloc(ram_addr_t size);
> +void *kvm_arch_vmalloc(ram_addr_t size);
>  void kvm_setup_guest_memory(void *start, size_t size);
>  
>  int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size);
> diff --git a/oslib-posix.c b/oslib-posix.c
> index b6a3c7f..93902ac 100644
> --- a/oslib-posix.c
> +++ b/oslib-posix.c
> @@ -41,6 +41,9 @@ extern int daemon(int, int);
>        therefore we need special code which handles running on Valgrind. */
>  #  define QEMU_VMALLOC_ALIGN (512 * 4096)
>  #  define CONFIG_VALGRIND
> +#elif defined(__linux__) && defined(__s390x__)
> +   /* Use 1 MiB (segment size) alignment so gmap can be used by KVM. */
> +#  define QEMU_VMALLOC_ALIGN (256 * 4096)
>  #else
>  #  define QEMU_VMALLOC_ALIGN getpagesize()
>  #endif
> diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
> index 5800fd6..4d38820 100644
> --- a/target-s390x/kvm.c
> +++ b/target-s390x/kvm.c
> @@ -135,6 +135,41 @@ int kvm_arch_get_registers(CPUS390XState *env)
>      return 0;
>  }
>  
> +/*
> + * Legacy layout for s390:
> + * Older S390 KVM requires the topmost vma of the RAM to be
> + * smaller than an system defined value, which is at least 256GB.
> + * Larger systems have larger values. We put the guest between
> + * the end of data segment (system break) and this value. We
> + * use 32GB as a base to have enough room for the system break
> + * to grow. We also have to use MAP parameters that avoid
> + * read-only mapping of guest pages.
> + */
> +static void *legacy_s390_alloc(ram_addr_t size)
> +{
> +    void *mem;
> +
> +    mem = mmap((void *) 0x800000000ULL, size,
> +               PROT_EXEC|PROT_READ|PROT_WRITE,
> +               MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
> +    if (mem == MAP_FAILED) {
> +        fprintf(stderr, "Allocating RAM failed\n");
> +        abort();
> +    }
> +    return mem;
> +}
> +
> +void *kvm_arch_vmalloc(ram_addr_t size)
> +{
> +    /* Can we use the standard allocation ? */
> +    if (kvm_check_extension(kvm_state, KVM_CAP_S390_GMAP) &&
> +        kvm_check_extension(kvm_state, KVM_CAP_S390_COW)) {
> +        return NULL;
> +    } else {
> +        return legacy_s390_alloc(size);
> +    }
> +}
> +
>  int kvm_arch_insert_sw_breakpoint(CPUS390XState *env, struct kvm_sw_breakpoint *bp)
>  {
>      static const uint8_t diag_501[] = {0x83, 0x24, 0x05, 0x01};

Looks good to me.

Jan
Alexander Graf June 18, 2012, 1:44 p.m. UTC | #2
On 15.06.2012, at 17:10, Christian Borntraeger wrote:

> By default qemu will use MAP_PRIVATE for guest pages. This will write
> protect pages and thus break on s390 systems that dont support this feature.
> Therefore qemu has a hack to always use MAP_SHARED for s390. But MAP_SHARED
> has other problems (no dirty pages tracking, a lot more swap overhead etc.)
> Newer systems allow the distinction via KVM_CAP_S390_COW. With this feature
> qemu can use the standard qemu alloc if available, otherwise it will use
> the old s390 hack.

Thanks, applied to s390-next.


Alex
diff mbox

Patch

diff --git a/exec.c b/exec.c
index 5c9b762..584a484 100644
--- a/exec.c
+++ b/exec.c
@@ -2647,26 +2647,14 @@  ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
             exit(1);
 #endif
         } else {
-#if defined(TARGET_S390X) && defined(CONFIG_KVM)
-            /* S390 KVM requires the topmost vma of the RAM to be smaller than
-               an system defined value, which is at least 256GB. Larger systems
-               have larger values. We put the guest between the end of data
-               segment (system break) and this value. We use 32GB as a base to
-               have enough room for the system break to grow. */
-            new_block->host = mmap((void*)0x800000000, size,
-                                   PROT_EXEC|PROT_READ|PROT_WRITE,
-                                   MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
-            if (new_block->host == MAP_FAILED) {
-                fprintf(stderr, "Allocating RAM failed\n");
-                abort();
-            }
-#else
             if (xen_enabled()) {
                 xen_ram_alloc(new_block->offset, size, mr);
+            } else if (kvm_enabled()) {
+                /* some s390/kvm configurations have special constraints */
+                new_block->host = kvm_vmalloc(size);
             } else {
                 new_block->host = qemu_vmalloc(size);
             }
-#endif
             qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
         }
     }
diff --git a/kvm-all.c b/kvm-all.c
index 4ea7d85..0372f7a 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1653,6 +1653,19 @@  int kvm_allows_irq0_override(void)
     return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing();
 }
 
+void *kvm_vmalloc(ram_addr_t size)
+{
+#ifdef TARGET_S390X
+    void *mem;
+
+    mem = kvm_arch_vmalloc(size);
+    if (mem) {
+        return mem;
+    }
+#endif
+    return qemu_vmalloc(size);
+}
+
 void kvm_setup_guest_memory(void *start, size_t size)
 {
     if (!kvm_has_sync_mmu()) {
diff --git a/kvm.h b/kvm.h
index 9c7b0ea..ddc7c53 100644
--- a/kvm.h
+++ b/kvm.h
@@ -70,6 +70,8 @@  int kvm_init_vcpu(CPUArchState *env);
 int kvm_cpu_exec(CPUArchState *env);
 
 #if !defined(CONFIG_USER_ONLY)
+void *kvm_vmalloc(ram_addr_t size);
+void *kvm_arch_vmalloc(ram_addr_t size);
 void kvm_setup_guest_memory(void *start, size_t size);
 
 int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size);
diff --git a/oslib-posix.c b/oslib-posix.c
index b6a3c7f..93902ac 100644
--- a/oslib-posix.c
+++ b/oslib-posix.c
@@ -41,6 +41,9 @@  extern int daemon(int, int);
       therefore we need special code which handles running on Valgrind. */
 #  define QEMU_VMALLOC_ALIGN (512 * 4096)
 #  define CONFIG_VALGRIND
+#elif defined(__linux__) && defined(__s390x__)
+   /* Use 1 MiB (segment size) alignment so gmap can be used by KVM. */
+#  define QEMU_VMALLOC_ALIGN (256 * 4096)
 #else
 #  define QEMU_VMALLOC_ALIGN getpagesize()
 #endif
diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index 5800fd6..4d38820 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -135,6 +135,41 @@  int kvm_arch_get_registers(CPUS390XState *env)
     return 0;
 }
 
+/*
+ * Legacy layout for s390:
+ * Older S390 KVM requires the topmost vma of the RAM to be
+ * smaller than an system defined value, which is at least 256GB.
+ * Larger systems have larger values. We put the guest between
+ * the end of data segment (system break) and this value. We
+ * use 32GB as a base to have enough room for the system break
+ * to grow. We also have to use MAP parameters that avoid
+ * read-only mapping of guest pages.
+ */
+static void *legacy_s390_alloc(ram_addr_t size)
+{
+    void *mem;
+
+    mem = mmap((void *) 0x800000000ULL, size,
+               PROT_EXEC|PROT_READ|PROT_WRITE,
+               MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+    if (mem == MAP_FAILED) {
+        fprintf(stderr, "Allocating RAM failed\n");
+        abort();
+    }
+    return mem;
+}
+
+void *kvm_arch_vmalloc(ram_addr_t size)
+{
+    /* Can we use the standard allocation ? */
+    if (kvm_check_extension(kvm_state, KVM_CAP_S390_GMAP) &&
+        kvm_check_extension(kvm_state, KVM_CAP_S390_COW)) {
+        return NULL;
+    } else {
+        return legacy_s390_alloc(size);
+    }
+}
+
 int kvm_arch_insert_sw_breakpoint(CPUS390XState *env, struct kvm_sw_breakpoint *bp)
 {
     static const uint8_t diag_501[] = {0x83, 0x24, 0x05, 0x01};