diff mbox

[1/1] s390: autodetect map private

Message ID 1337086184-1535-2-git-send-email-borntraeger@de.ibm.com
State New
Headers show

Commit Message

Christian Borntraeger May 15, 2012, 12:49 p.m. UTC
From: Christian Borntraeger <borntraeger@de.ibm.com>

kvm on specific s390 systems must not use MAP_PRIVATE since
host read-only page faults are delivered to the guest. Newer
systems allow the distinction via KVM_CAP_S390_COW

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
CC: Avi Kivity <avi@redhat.com>
---
 exec.c                    |   54 ++++++++++++++++++++++++++++++++-------------
 kvm-all.c                 |    6 +++++
 kvm.h                     |    1 +
 linux-headers/linux/kvm.h |    1 +
 oslib-posix.c             |    3 +++
 5 files changed, 50 insertions(+), 15 deletions(-)

Comments

Alexander Graf June 5, 2012, 12:50 p.m. UTC | #1
On 15.05.2012, at 14:49, Christian Borntraeger wrote:

> From: Christian Borntraeger <borntraeger@de.ibm.com>
> 
> kvm on specific s390 systems must not use MAP_PRIVATE since
> host read-only page faults are delivered to the guest. Newer
> systems allow the distinction via KVM_CAP_S390_COW
> 
> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
> CC: Avi Kivity <avi@redhat.com>
> ---
> exec.c                    |   54 ++++++++++++++++++++++++++++++++-------------
> kvm-all.c                 |    6 +++++
> kvm.h                     |    1 +
> linux-headers/linux/kvm.h |    1 +
> oslib-posix.c             |    3 +++
> 5 files changed, 50 insertions(+), 15 deletions(-)
> 
> diff --git a/exec.c b/exec.c
> index 0607c9b..68c2940 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -2601,6 +2601,43 @@ void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
>     }
> }
> 
> +/*
> + * lets make sure that we dont have the old s390x limitations regarding
> + * guest mappings
> + */
> +static int legacy_s390x_mem_layout(void)
> +{
> +#if defined(TARGET_S390X) && defined(CONFIG_KVM)

No need for defined(CONFIG_KVM). Please see below for details.

> +    return kvm_has_legacy_s390x_memlayout();
> +#else
> +    return 0;
> +#endif
> +}
> +
> +/*
> + * Legacy layout for s390:
> + * Older S390 KVM requires the topmost vma of the RAM to be
> + * smaller than an system defined value, which is at least 256GB.
> + * Larger systems have larger values. We put the guest between
> + * the end of data segment (system break) and this value. We
> + * use 32GB as a base to have enough room for the system break
> + * to grow. We also have to use MAP parameters that avoid
> + * read-only mapping of guest pages.
> + */
> +static void *legacy_s390_alloc(ram_addr_t size)
> +{
> +    void *mem;
> +
> +    mem = mmap((void *) 0x800000000ULL, size,
> +               PROT_EXEC|PROT_READ|PROT_WRITE,
> +               MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
> +    if (mem == MAP_FAILED) {
> +        fprintf(stderr, "Allocating RAM failed\n");
> +        abort();
> +    }
> +    return mem;
> +}
> +
> ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
>                                    MemoryRegion *mr)
> {
> @@ -2627,26 +2664,13 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
>             exit(1);
> #endif
>         } else {
> -#if defined(TARGET_S390X) && defined(CONFIG_KVM)
> -            /* S390 KVM requires the topmost vma of the RAM to be smaller than
> -               an system defined value, which is at least 256GB. Larger systems
> -               have larger values. We put the guest between the end of data
> -               segment (system break) and this value. We use 32GB as a base to
> -               have enough room for the system break to grow. */
> -            new_block->host = mmap((void*)0x800000000, size,
> -                                   PROT_EXEC|PROT_READ|PROT_WRITE,
> -                                   MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
> -            if (new_block->host == MAP_FAILED) {
> -                fprintf(stderr, "Allocating RAM failed\n");
> -                abort();
> -            }
> -#else
>             if (xen_enabled()) {
>                 xen_ram_alloc(new_block->offset, size, mr);
> +            } else if (legacy_s390x_mem_layout()) {
> +                new_block->host = legacy_s390_alloc(size);
>             } else {
>                 new_block->host = qemu_vmalloc(size);
>             }
> -#endif
>             qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
>         }
>     }
> diff --git a/kvm-all.c b/kvm-all.c
> index 9b73ccf..f794546 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -1417,6 +1417,12 @@ int kvm_has_many_ioeventfds(void)
>     return kvm_state->many_ioeventfds;
> }
> 
> +int kvm_has_legacy_s390x_memlayout(void)

Since this one is only ever called from #ifdef TARGET_S390X code, please move it to target-s390x/. Check out target-ppc/kvm_ppc.h on how we ensure compatibility with the non-kvm case.

> +{
> +    return !kvm_check_extension(kvm_state, KVM_CAP_S390_GMAP) ||
> +           !kvm_check_extension(kvm_state, KVM_CAP_S390_COW);
> +}
> +
> int kvm_has_gsi_routing(void)
> {
> #ifdef KVM_CAP_IRQ_ROUTING
> diff --git a/kvm.h b/kvm.h

As mentioned in the other mail, please extract the header update into its own patch :).

> index 4ccae8c..37f40ee 100644
> --- a/kvm.h
> +++ b/kvm.h
> @@ -56,6 +56,7 @@ int kvm_has_xsave(void);
> int kvm_has_xcrs(void);
> int kvm_has_pit_state2(void);
> int kvm_has_many_ioeventfds(void);
> +int kvm_has_legacy_s390x_memlayout(void);
> int kvm_has_gsi_routing(void);
> 
> int kvm_allows_irq0_override(void);
> diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
> index ee7bd9c..a7a2d6f 100644
> --- a/linux-headers/linux/kvm.h
> +++ b/linux-headers/linux/kvm.h
> @@ -590,6 +590,7 @@ struct kvm_ppc_pvinfo {
> #define KVM_CAP_SYNC_REGS 74
> #define KVM_CAP_PCI_2_3 75
> #define KVM_CAP_KVMCLOCK_CTRL 76
> +#define KVM_CAP_S390_COW 79
> 
> #ifdef KVM_CAP_IRQ_ROUTING
> 
> diff --git a/oslib-posix.c b/oslib-posix.c
> index b6a3c7f..93902ac 100644
> --- a/oslib-posix.c
> +++ b/oslib-posix.c
> @@ -41,6 +41,9 @@ extern int daemon(int, int);
>       therefore we need special code which handles running on Valgrind. */
> #  define QEMU_VMALLOC_ALIGN (512 * 4096)
> #  define CONFIG_VALGRIND
> +#elif defined(__linux__) && defined(__s390x__)
> +   /* Use 1 MiB (segment size) alignment so gmap can be used by KVM. */
> +#  define QEMU_VMALLOC_ALIGN (256 * 4096)

I certainly wouldn't be opposed to always forcing the vmalloc align to 1MB. But for now this should be ok.


Alex

> #else
> #  define QEMU_VMALLOC_ALIGN getpagesize()
> #endif
> -- 
> 1.7.10.2
>
diff mbox

Patch

diff --git a/exec.c b/exec.c
index 0607c9b..68c2940 100644
--- a/exec.c
+++ b/exec.c
@@ -2601,6 +2601,43 @@  void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
     }
 }
 
+/*
+ * lets make sure that we dont have the old s390x limitations regarding
+ * guest mappings
+ */
+static int legacy_s390x_mem_layout(void)
+{
+#if defined(TARGET_S390X) && defined(CONFIG_KVM)
+    return kvm_has_legacy_s390x_memlayout();
+#else
+    return 0;
+#endif
+}
+
+/*
+ * Legacy layout for s390:
+ * Older S390 KVM requires the topmost vma of the RAM to be
+ * smaller than an system defined value, which is at least 256GB.
+ * Larger systems have larger values. We put the guest between
+ * the end of data segment (system break) and this value. We
+ * use 32GB as a base to have enough room for the system break
+ * to grow. We also have to use MAP parameters that avoid
+ * read-only mapping of guest pages.
+ */
+static void *legacy_s390_alloc(ram_addr_t size)
+{
+    void *mem;
+
+    mem = mmap((void *) 0x800000000ULL, size,
+               PROT_EXEC|PROT_READ|PROT_WRITE,
+               MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+    if (mem == MAP_FAILED) {
+        fprintf(stderr, "Allocating RAM failed\n");
+        abort();
+    }
+    return mem;
+}
+
 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
                                    MemoryRegion *mr)
 {
@@ -2627,26 +2664,13 @@  ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
             exit(1);
 #endif
         } else {
-#if defined(TARGET_S390X) && defined(CONFIG_KVM)
-            /* S390 KVM requires the topmost vma of the RAM to be smaller than
-               an system defined value, which is at least 256GB. Larger systems
-               have larger values. We put the guest between the end of data
-               segment (system break) and this value. We use 32GB as a base to
-               have enough room for the system break to grow. */
-            new_block->host = mmap((void*)0x800000000, size,
-                                   PROT_EXEC|PROT_READ|PROT_WRITE,
-                                   MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
-            if (new_block->host == MAP_FAILED) {
-                fprintf(stderr, "Allocating RAM failed\n");
-                abort();
-            }
-#else
             if (xen_enabled()) {
                 xen_ram_alloc(new_block->offset, size, mr);
+            } else if (legacy_s390x_mem_layout()) {
+                new_block->host = legacy_s390_alloc(size);
             } else {
                 new_block->host = qemu_vmalloc(size);
             }
-#endif
             qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
         }
     }
diff --git a/kvm-all.c b/kvm-all.c
index 9b73ccf..f794546 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1417,6 +1417,12 @@  int kvm_has_many_ioeventfds(void)
     return kvm_state->many_ioeventfds;
 }
 
+int kvm_has_legacy_s390x_memlayout(void)
+{
+    return !kvm_check_extension(kvm_state, KVM_CAP_S390_GMAP) ||
+           !kvm_check_extension(kvm_state, KVM_CAP_S390_COW);
+}
+
 int kvm_has_gsi_routing(void)
 {
 #ifdef KVM_CAP_IRQ_ROUTING
diff --git a/kvm.h b/kvm.h
index 4ccae8c..37f40ee 100644
--- a/kvm.h
+++ b/kvm.h
@@ -56,6 +56,7 @@  int kvm_has_xsave(void);
 int kvm_has_xcrs(void);
 int kvm_has_pit_state2(void);
 int kvm_has_many_ioeventfds(void);
+int kvm_has_legacy_s390x_memlayout(void);
 int kvm_has_gsi_routing(void);
 
 int kvm_allows_irq0_override(void);
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index ee7bd9c..a7a2d6f 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -590,6 +590,7 @@  struct kvm_ppc_pvinfo {
 #define KVM_CAP_SYNC_REGS 74
 #define KVM_CAP_PCI_2_3 75
 #define KVM_CAP_KVMCLOCK_CTRL 76
+#define KVM_CAP_S390_COW 79
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/oslib-posix.c b/oslib-posix.c
index b6a3c7f..93902ac 100644
--- a/oslib-posix.c
+++ b/oslib-posix.c
@@ -41,6 +41,9 @@  extern int daemon(int, int);
       therefore we need special code which handles running on Valgrind. */
 #  define QEMU_VMALLOC_ALIGN (512 * 4096)
 #  define CONFIG_VALGRIND
+#elif defined(__linux__) && defined(__s390x__)
+   /* Use 1 MiB (segment size) alignment so gmap can be used by KVM. */
+#  define QEMU_VMALLOC_ALIGN (256 * 4096)
 #else
 #  define QEMU_VMALLOC_ALIGN getpagesize()
 #endif