@@ -735,17 +735,27 @@ static int htab_save_setup(QEMUFile *f, void *opaque)
{
sPAPREnvironment *spapr = opaque;
- spapr->htab_save_index = 0;
- spapr->htab_first_pass = true;
-
/* "Iteration" header */
qemu_put_be32(f, spapr->htab_shift);
+ if (spapr->htab) {
+ spapr->htab_save_index = 0;
+ spapr->htab_first_pass = true;
+ } else {
+ assert(kvm_enabled());
+
+ spapr->htab_fd = kvmppc_get_htab_fd(false);
+ if (spapr->htab_fd < 0) {
+ fprintf(stderr, "Unable to open fd for reading hash table from KVM: %s\n",
+ strerror(errno));
+ return -1;
+ }
+ }
+
+
return 0;
}
-#define MAX_ITERATION_NS 5000000 /* 5 ms */
-
static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr,
int64_t max_ns)
{
@@ -796,8 +806,8 @@ static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr,
spapr->htab_save_index = index;
}
-static bool htab_save_later_pass(QEMUFile *f, sPAPREnvironment *spapr,
- int64_t max_ns)
+static int htab_save_later_pass(QEMUFile *f, sPAPREnvironment *spapr,
+ int64_t max_ns)
{
bool final = max_ns < 0;
int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
@@ -870,21 +880,32 @@ static bool htab_save_later_pass(QEMUFile *f, sPAPREnvironment *spapr,
spapr->htab_save_index = index;
- return (examined >= htabslots) && (sent == 0);
+ return (examined >= htabslots) && (sent == 0) ? 1 : 0;
}
+#define MAX_ITERATION_NS 5000000 /* 5 ms */
+#define MAX_KVM_BUF_SIZE 2048
+
static int htab_save_iterate(QEMUFile *f, void *opaque)
{
sPAPREnvironment *spapr = opaque;
- bool nothingleft = false;;
+ int rc = 0;
/* Iteration header */
qemu_put_be32(f, 0);
- if (spapr->htab_first_pass) {
+ if (!spapr->htab) {
+ assert(kvm_enabled());
+
+ rc = kvmppc_save_htab(f, spapr->htab_fd,
+ MAX_KVM_BUF_SIZE, MAX_ITERATION_NS);
+ if (rc < 0) {
+ return rc;
+ }
+ } else if (spapr->htab_first_pass) {
htab_save_first_pass(f, spapr, MAX_ITERATION_NS);
} else {
- nothingleft = htab_save_later_pass(f, spapr, MAX_ITERATION_NS);
+ rc = htab_save_later_pass(f, spapr, MAX_ITERATION_NS);
}
/* End marker */
@@ -892,7 +913,7 @@ static int htab_save_iterate(QEMUFile *f, void *opaque)
qemu_put_be16(f, 0);
qemu_put_be16(f, 0);
- return nothingleft ? 1 : 0;
+ return rc;
}
static int htab_save_complete(QEMUFile *f, void *opaque)
@@ -902,7 +923,20 @@ static int htab_save_complete(QEMUFile *f, void *opaque)
/* Iteration header */
qemu_put_be32(f, 0);
- htab_save_later_pass(f, spapr, -1);
+ if (!spapr->htab) {
+ int rc;
+
+ assert(kvm_enabled());
+
+ rc = kvmppc_save_htab(f, spapr->htab_fd, MAX_KVM_BUF_SIZE, -1);
+ if (rc < 0) {
+ return rc;
+ }
+ close(spapr->htab_fd);
+ spapr->htab_fd = -1;
+ } else {
+ htab_save_later_pass(f, spapr, -1);
+ }
/* End marker */
qemu_put_be32(f, 0);
@@ -916,6 +950,7 @@ static int htab_load(QEMUFile *f, void *opaque, int version_id)
{
sPAPREnvironment *spapr = opaque;
uint32_t section_hdr;
+ int fd = -1;
if (version_id < 1 || version_id > 1) {
fprintf(stderr, "htab_load() bad version\n");
@@ -932,6 +967,16 @@ static int htab_load(QEMUFile *f, void *opaque, int version_id)
return 0;
}
+ if (!spapr->htab) {
+ assert(kvm_enabled());
+
+ fd = kvmppc_get_htab_fd(true);
+ if (fd < 0) {
+ fprintf(stderr, "Unable to open fd to restore KVM hash table: %s\n",
+ strerror(errno));
+ }
+ }
+
while (true) {
uint32_t index;
uint16_t n_valid, n_invalid;
@@ -945,24 +990,41 @@ static int htab_load(QEMUFile *f, void *opaque, int version_id)
break;
}
- if ((index + n_valid + n_invalid) >=
+ if ((index + n_valid + n_invalid) >
(HTAB_SIZE(spapr) / HASH_PTE_SIZE_64)) {
/* Bad index in stream */
fprintf(stderr, "htab_load() bad index %d (%hd+%hd entries) "
- "in htab stream\n", index, n_valid, n_invalid);
+ "in htab stream (htab_shift=%d)\n", index, n_valid, n_invalid,
+ spapr->htab_shift);
return -EINVAL;
}
- if (n_valid) {
- qemu_get_buffer(f, HPTE(spapr->htab, index),
- HASH_PTE_SIZE_64 * n_valid);
- }
- if (n_invalid) {
- memset(HPTE(spapr->htab, index + n_valid), 0,
- HASH_PTE_SIZE_64 * n_invalid);
+ if (spapr->htab) {
+ if (n_valid) {
+ qemu_get_buffer(f, HPTE(spapr->htab, index),
+ HASH_PTE_SIZE_64 * n_valid);
+ }
+ if (n_invalid) {
+ memset(HPTE(spapr->htab, index + n_valid), 0,
+ HASH_PTE_SIZE_64 * n_invalid);
+ }
+ } else {
+ int rc;
+
+ assert(fd >= 0);
+
+ rc = kvmppc_load_htab_chunk(f, fd, index, n_valid, n_invalid);
+ if (rc < 0) {
+ return rc;
+ }
}
}
+ if (!spapr->htab) {
+ assert(fd >= 0);
+ close(fd);
+ }
+
return 0;
}
@@ -37,6 +37,7 @@ typedef struct sPAPREnvironment {
/* Migration state */
int htab_save_index;
bool htab_first_pass;
+ int htab_fd;
} sPAPREnvironment;
#define H_SUCCESS 0
@@ -65,6 +65,7 @@ static int cap_one_reg;
static int cap_epr;
static int cap_ppc_watchdog;
static int cap_papr;
+static int cap_htab_fd;
/* XXX We have a race condition where we actually have a level triggered
* interrupt, but the infrastructure can't expose that yet, so the guest
@@ -101,6 +102,7 @@ int kvm_arch_init(KVMState *s)
cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
/* Note: we don't set cap_papr here, because this capability is
* only activated after this by kvmppc_set_papr() */
+ cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
if (!cap_interrupt_level) {
fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
@@ -1788,6 +1790,73 @@ static int kvm_ppc_register_host_cpu_type(void)
}
+int kvmppc_get_htab_fd(bool write)
+{
+ struct kvm_get_htab_fd s = {
+ .flags = write ? KVM_GET_HTAB_WRITE : 0,
+ .start_index = 0,
+ };
+
+ if (!cap_htab_fd) {
+ fprintf(stderr, "KVM version doesn't support saving the hash table\n");
+ return -1;
+ }
+
+ return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
+}
+
+int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
+{
+ int64_t starttime = qemu_get_clock_ns(rt_clock);
+ uint8_t buf[bufsize];
+ ssize_t rc;
+
+ do {
+ rc = read(fd, buf, bufsize);
+ if (rc < 0) {
+ fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
+ strerror(errno));
+ return rc;
+ } else if (rc) {
+ /* Kernel already retuns data in BE format for the file */
+ qemu_put_buffer(f, buf, rc);
+ }
+ } while ((rc != 0)
+ && ((max_ns < 0)
+ || ((qemu_get_clock_ns(rt_clock) - starttime) < max_ns)));
+
+ return (rc == 0) ? 1 : 0;
+}
+
+int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
+ uint16_t n_valid, uint16_t n_invalid)
+{
+ struct kvm_get_htab_header *buf;
+ size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
+ ssize_t rc;
+
+ buf = alloca(chunksize);
+ /* This is KVM on ppc, so this is all big-endian */
+ buf->index = index;
+ buf->n_valid = n_valid;
+ buf->n_invalid = n_invalid;
+
+ qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
+
+ rc = write(fd, buf, chunksize);
+ if (rc < 0) {
+ fprintf(stderr, "Error writing KVM hash table: %s\n",
+ strerror(errno));
+ return rc;
+ }
+ if (rc != chunksize) {
+ /* We should never get a short write on a single chunk */
+ fprintf(stderr, "Short write, restoring KVM hash table\n");
+ return -1;
+ }
+ return 0;
+}
+
bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
{
return true;
@@ -38,6 +38,10 @@ uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift);
#endif /* !CONFIG_USER_ONLY */
int kvmppc_fixup_cpu(PowerPCCPU *cpu);
bool kvmppc_has_cap_epr(void);
+int kvmppc_get_htab_fd(bool write);
+int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns);
+int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
+ uint16_t n_valid, uint16_t n_invalid);
#else
@@ -159,6 +163,24 @@ static inline bool kvmppc_has_cap_epr(void)
{
return false;
}
+
+static inline int kvmppc_get_htab_fd(bool write)
+{
+ return -1;
+}
+
+static inline int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize,
+ int64_t max_ns)
+{
+ abort();
+}
+
+static inline int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
+ uint16_t n_valid, uint16_t n_invalid)
+{
+ abort();
+}
+
#endif
#ifndef CONFIG_KVM