@@ -471,6 +471,11 @@ static inline void qemu_cleanup_generic_vfree(void *p)
#else
#define QEMU_MADV_REMOVE QEMU_MADV_DONTNEED
#endif
+#ifdef MADV_POPULATE_WRITE
+#define QEMU_MADV_POPULATE_WRITE MADV_POPULATE_WRITE
+#else
+#define QEMU_MADV_POPULATE_WRITE QEMU_MADV_INVALID
+#endif
#elif defined(CONFIG_POSIX_MADVISE)
@@ -484,6 +489,7 @@ static inline void qemu_cleanup_generic_vfree(void *p)
#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
#define QEMU_MADV_NOHUGEPAGE QEMU_MADV_INVALID
#define QEMU_MADV_REMOVE QEMU_MADV_DONTNEED
+#define QEMU_MADV_POPULATE_WRITE QEMU_MADV_INVALID
#else /* no-op */
@@ -497,6 +503,7 @@ static inline void qemu_cleanup_generic_vfree(void *p)
#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
#define QEMU_MADV_NOHUGEPAGE QEMU_MADV_INVALID
#define QEMU_MADV_REMOVE QEMU_MADV_INVALID
+#define QEMU_MADV_POPULATE_WRITE QEMU_MADV_INVALID
#endif
@@ -484,10 +484,6 @@ static void *do_touch_pages(void *arg)
*
* 'volatile' to stop compiler optimizing this away
* to a no-op
- *
- * TODO: get a better solution from kernel so we
- * don't need to write at all so we don't cause
- * wear on the storage backing the region...
*/
*(volatile char *)addr = *addr;
addr += hpagesize;
@@ -497,6 +493,26 @@ static void *do_touch_pages(void *arg)
return (void *)(uintptr_t)ret;
}
+static void *do_madv_populate_write_pages(void *arg)
+{
+ MemsetThread *memset_args = (MemsetThread *)arg;
+ const size_t size = memset_args->numpages * memset_args->hpagesize;
+ char * const addr = memset_args->addr;
+ int ret = 0;
+
+ /* See do_touch_pages(). */
+ qemu_mutex_lock(&page_mutex);
+ while (!threads_created_flag) {
+ qemu_cond_wait(&page_cond, &page_mutex);
+ }
+ qemu_mutex_unlock(&page_mutex);
+
+ if (size && qemu_madvise(addr, size, QEMU_MADV_POPULATE_WRITE)) {
+ ret = -errno;
+ }
+ return (void *)(uintptr_t)ret;
+}
+
static inline int get_memset_num_threads(int smp_cpus)
{
long host_procs = sysconf(_SC_NPROCESSORS_ONLN);
@@ -510,10 +526,11 @@ static inline int get_memset_num_threads(int smp_cpus)
}
static int touch_all_pages(char *area, size_t hpagesize, size_t numpages,
- int smp_cpus)
+ int smp_cpus, bool use_madv_populate_write)
{
static gsize initialized = 0;
size_t numpages_per_thread, leftover;
+ void *(*touch_fn)(void *);
int ret = 0, i = 0;
char *addr = area;
@@ -523,6 +540,12 @@ static int touch_all_pages(char *area, size_t hpagesize, size_t numpages,
g_once_init_leave(&initialized, 1);
}
+ if (use_madv_populate_write) {
+ touch_fn = do_madv_populate_write_pages;
+ } else {
+ touch_fn = do_touch_pages;
+ }
+
threads_created_flag = false;
memset_num_threads = get_memset_num_threads(smp_cpus);
memset_thread = g_new0(MemsetThread, memset_num_threads);
@@ -533,7 +556,7 @@ static int touch_all_pages(char *area, size_t hpagesize, size_t numpages,
memset_thread[i].numpages = numpages_per_thread + (i < leftover);
memset_thread[i].hpagesize = hpagesize;
qemu_thread_create(&memset_thread[i].pgthread, "touch_pages",
- do_touch_pages, &memset_thread[i],
+ touch_fn, &memset_thread[i],
QEMU_THREAD_JOINABLE);
addr += memset_thread[i].numpages * hpagesize;
}
@@ -556,6 +579,12 @@ static int touch_all_pages(char *area, size_t hpagesize, size_t numpages,
return ret;
}
+static bool madv_populate_write_possible(char *area, size_t pagesize)
+{
+ return !qemu_madvise(area, pagesize, QEMU_MADV_POPULATE_WRITE) ||
+ errno != EINVAL;
+}
+
void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus,
Error **errp)
{
@@ -563,30 +592,42 @@ void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus,
struct sigaction act, oldact;
size_t hpagesize = qemu_fd_getpagesize(fd);
size_t numpages = DIV_ROUND_UP(memory, hpagesize);
+ bool use_madv_populate_write;
- memset(&act, 0, sizeof(act));
- act.sa_handler = &sigbus_handler;
- act.sa_flags = 0;
-
- ret = sigaction(SIGBUS, &act, &oldact);
- if (ret) {
- error_setg_errno(errp, errno,
- "os_mem_prealloc: failed to install signal handler");
- return;
+ /*
+ * Sense on every invocation, as MADV_POPULATE_WRITE cannot be used for
+ * some special mappings, such as mapping /dev/mem.
+ */
+ use_madv_populate_write = madv_populate_write_possible(area, hpagesize);
+
+ if (!use_madv_populate_write) {
+ memset(&act, 0, sizeof(act));
+ act.sa_handler = &sigbus_handler;
+ act.sa_flags = 0;
+
+ ret = sigaction(SIGBUS, &act, &oldact);
+ if (ret) {
+ error_setg_errno(errp, errno,
+ "os_mem_prealloc: failed to install signal handler");
+ return;
+ }
}
/* touch pages simultaneously */
- ret = touch_all_pages(area, hpagesize, numpages, smp_cpus);
+ ret = touch_all_pages(area, hpagesize, numpages, smp_cpus,
+ use_madv_populate_write);
if (ret) {
error_setg_errno(errp, -ret,
"os_mem_prealloc: preallocating memory failed");
}
- ret = sigaction(SIGBUS, &oldact, NULL);
- if (ret) {
- /* Terminate QEMU since it can't recover from error */
- perror("os_mem_prealloc: failed to reinstall signal handler");
- exit(1);
+ if (!use_madv_populate_write) {
+ ret = sigaction(SIGBUS, &oldact, NULL);
+ if (ret) {
+ /* Terminate QEMU since it can't recover from error */
+ perror("os_mem_prealloc: failed to reinstall signal handler");
+ exit(1);
+ }
}
}