Message ID | 20111212222821.GE18868@bloggs.ozlabs.ibm.com |
---|---|
State | New, archived |
Headers | show |
On 12.12.2011, at 23:28, Paul Mackerras wrote: > This allocates an array for each memory slot that is added to store > the physical addresses of the pages in the slot. This array is > vmalloc'd and accessed in kvmppc_h_enter using real_vmalloc_addr(). > This allows us to remove the ram_pginfo field from the kvm_arch > struct, and removes the 64GB guest RAM limit that we had. > > We use the low-order bits of the array entries to store a flag > indicating that we have done get_page on the corresponding page, > and therefore need to call put_page when we are finished with the > page. Currently this is set for all pages except those in our > special RMO regions. > > Signed-off-by: Paul Mackerras <paulus@samba.org> > --- > arch/powerpc/include/asm/kvm_host.h | 9 ++- > arch/powerpc/kvm/book3s_64_mmu_hv.c | 18 +++--- > arch/powerpc/kvm/book3s_hv.c | 114 +++++++++++++++++------------------ > arch/powerpc/kvm/book3s_hv_rm_mmu.c | 41 +++++++++++- > 4 files changed, 107 insertions(+), 75 deletions(-) > > diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h > index 629df2e..7a17ab5 100644 > --- a/arch/powerpc/include/asm/kvm_host.h > +++ b/arch/powerpc/include/asm/kvm_host.h > @@ -38,6 +38,7 @@ > #define KVM_MEMORY_SLOTS 32 > /* memory slots that does not exposed to userspace */ > #define KVM_PRIVATE_MEM_SLOTS 4 > +#define KVM_MEM_SLOTS_NUM (KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) > > #ifdef CONFIG_KVM_MMIO > #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 > @@ -175,25 +176,27 @@ struct revmap_entry { > unsigned long guest_rpte; > }; > > +/* Low-order bits in kvm->arch.slot_phys[][] */ > +#define KVMPPC_GOT_PAGE 0x80 > + > struct kvm_arch { > #ifdef CONFIG_KVM_BOOK3S_64_HV > unsigned long hpt_virt; > struct revmap_entry *revmap; > - unsigned long ram_npages; > unsigned long ram_psize; > unsigned long ram_porder; > - struct kvmppc_pginfo *ram_pginfo; > unsigned int lpid; > unsigned int host_lpid; > unsigned long host_lpcr; > unsigned long sdr1; > unsigned long host_sdr1; > int tlbie_lock; > - int n_rma_pages; > unsigned long lpcr; > unsigned long rmor; > struct kvmppc_rma_info *rma; > struct list_head spapr_tce_tables; > + unsigned long *slot_phys[KVM_MEM_SLOTS_NUM]; > + int slot_npages[KVM_MEM_SLOTS_NUM]; > unsigned short last_vcpu[NR_CPUS]; > struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; > #endif /* CONFIG_KVM_BOOK3S_64_HV */ > diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c > index 80ece8d..e4c6069 100644 > --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c > +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c > @@ -98,16 +98,16 @@ void kvmppc_free_hpt(struct kvm *kvm) > void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem) > { > unsigned long i; > - unsigned long npages = kvm->arch.ram_npages; > - unsigned long pfn; > + unsigned long npages; > + unsigned long pa; > unsigned long *hpte; > unsigned long hash; > unsigned long porder = kvm->arch.ram_porder; > struct revmap_entry *rev; > - struct kvmppc_pginfo *pginfo = kvm->arch.ram_pginfo; > + unsigned long *physp; > > - if (!pginfo) > - return; > + physp = kvm->arch.slot_phys[mem->slot]; > + npages = kvm->arch.slot_npages[mem->slot]; > > /* VRMA can't be > 1TB */ > if (npages > 1ul << (40 - porder)) > @@ -117,9 +117,10 @@ void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem) > npages = HPT_NPTEG; > > for (i = 0; i < npages; ++i) { > - pfn = pginfo[i].pfn; > - if (!pfn) > + pa = physp[i]; > + if (!pa) > break; > + pa &= PAGE_MASK; > /* can't use hpt_hash since va > 64 bits */ > hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK; > /* > @@ -131,8 +132,7 @@ void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem) > hash = (hash << 3) + 7; > hpte = (unsigned long *) (kvm->arch.hpt_virt + (hash << 4)); > /* HPTE low word - RPN, protection, etc. */ > - hpte[1] = (pfn << PAGE_SHIFT) | HPTE_R_R | HPTE_R_C | > - HPTE_R_M | PP_RWXX; > + hpte[1] = pa | HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX; > smp_wmb(); > hpte[0] = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) | > (i << (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED | > diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c > index da7db14..86d3e4b 100644 > --- a/arch/powerpc/kvm/book3s_hv.c > +++ b/arch/powerpc/kvm/book3s_hv.c > @@ -50,14 +50,6 @@ > #include <linux/vmalloc.h> > #include <linux/highmem.h> > > -/* > - * For now, limit memory to 64GB and require it to be large pages. > - * This value is chosen because it makes the ram_pginfo array be > - * 64kB in size, which is about as large as we want to be trying > - * to allocate with kmalloc. > - */ > -#define MAX_MEM_ORDER 36 > - > #define LARGE_PAGE_ORDER 24 /* 16MB pages */ > > /* #define EXIT_DEBUG */ > @@ -147,10 +139,12 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, > unsigned long vcpuid, unsigned long vpa) > { > struct kvm *kvm = vcpu->kvm; > - unsigned long pg_index, ra, len; > + unsigned long gfn, pg_index, ra, len; > unsigned long pg_offset; > void *va; > struct kvm_vcpu *tvcpu; > + struct kvm_memory_slot *memslot; > + unsigned long *physp; > > tvcpu = kvmppc_find_vcpu(kvm, vcpuid); > if (!tvcpu) > @@ -164,14 +158,20 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, > if (vpa & 0x7f) > return H_PARAMETER; > /* registering new area; convert logical addr to real */ > - pg_index = vpa >> kvm->arch.ram_porder; > - pg_offset = vpa & (kvm->arch.ram_psize - 1); > - if (pg_index >= kvm->arch.ram_npages) > + gfn = vpa >> PAGE_SHIFT; > + memslot = gfn_to_memslot(kvm, gfn); > + if (!memslot || !(memslot->flags & KVM_MEMSLOT_INVALID)) > + return H_PARAMETER; > + physp = kvm->arch.slot_phys[memslot->id]; > + if (!physp) > return H_PARAMETER; > - if (kvm->arch.ram_pginfo[pg_index].pfn == 0) > + pg_index = (gfn - memslot->base_gfn) >> > + (kvm->arch.ram_porder - PAGE_SHIFT); > + pg_offset = vpa & (kvm->arch.ram_psize - 1); > + ra = physp[pg_index]; > + if (!ra) > return H_PARAMETER; > - ra = kvm->arch.ram_pginfo[pg_index].pfn << PAGE_SHIFT; > - ra |= pg_offset; > + ra = (ra & PAGE_MASK) | pg_offset; > va = __va(ra); > if (flags <= 1) > len = *(unsigned short *)(va + 4); > @@ -1108,12 +1108,11 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, > struct kvm_userspace_memory_region *mem) > { > unsigned long psize, porder; > - unsigned long i, npages, totalpages; > - unsigned long pg_ix; > - struct kvmppc_pginfo *pginfo; > + unsigned long i, npages; > unsigned long hva; > struct kvmppc_rma_info *ri = NULL; > struct page *page; > + unsigned long *phys; > > /* For now, only allow 16MB pages */ > porder = LARGE_PAGE_ORDER; > @@ -1125,20 +1124,21 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, > return -EINVAL; > } > > + /* Allocate a slot_phys array */ > npages = mem->memory_size >> porder; > - totalpages = (mem->guest_phys_addr + mem->memory_size) >> porder; > - > - /* More memory than we have space to track? */ > - if (totalpages > (1ul << (MAX_MEM_ORDER - LARGE_PAGE_ORDER))) > - return -EINVAL; > + phys = kvm->arch.slot_phys[mem->slot]; > + if (!phys) { > + phys = vzalloc(npages * sizeof(unsigned long)); > + if (!phys) > + return -ENOMEM; > + kvm->arch.slot_phys[mem->slot] = phys; > + kvm->arch.slot_npages[mem->slot] = npages; > + } > > /* Do we already have an RMA registered? */ > if (mem->guest_phys_addr == 0 && kvm->arch.rma) > return -EINVAL; > > - if (totalpages > kvm->arch.ram_npages) > - kvm->arch.ram_npages = totalpages; > - > /* Is this one of our preallocated RMAs? */ > if (mem->guest_phys_addr == 0) { > struct vm_area_struct *vma; > @@ -1171,7 +1171,6 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, > } > atomic_inc(&ri->use_count); > kvm->arch.rma = ri; > - kvm->arch.n_rma_pages = rma_size >> porder; > > /* Update LPCR and RMOR */ > lpcr = kvm->arch.lpcr; > @@ -1195,12 +1194,9 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, > ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); > } > > - pg_ix = mem->guest_phys_addr >> porder; > - pginfo = kvm->arch.ram_pginfo + pg_ix; > - for (i = 0; i < npages; ++i, ++pg_ix) { > - if (ri && pg_ix < kvm->arch.n_rma_pages) { > - pginfo[i].pfn = ri->base_pfn + > - (pg_ix << (porder - PAGE_SHIFT)); > + for (i = 0; i < npages; ++i) { > + if (ri && i < ri->npages) { > + phys[i] = (ri->base_pfn << PAGE_SHIFT) + (i << porder); > continue; > } > hva = mem->userspace_addr + (i << porder); > @@ -1216,7 +1212,7 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, > hva, compound_order(page)); > goto err; > } > - pginfo[i].pfn = page_to_pfn(page); > + phys[i] = (page_to_pfn(page) << PAGE_SHIFT) | KVMPPC_GOT_PAGE; > } > > return 0; > @@ -1225,6 +1221,28 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, > return -EINVAL; > } > > +static void unpin_slot(struct kvm *kvm, int slot_id) > +{ > + unsigned long *physp; > + unsigned long j, npages, pfn; > + struct page *page; > + > + physp = kvm->arch.slot_phys[slot_id]; > + npages = kvm->arch.slot_npages[slot_id]; > + if (physp) { > + for (j = 0; j < npages; j++) { > + if (!(physp[j] & KVMPPC_GOT_PAGE)) > + continue; > + pfn = physp[j] >> PAGE_SHIFT; > + page = pfn_to_page(pfn); > + SetPageDirty(page); > + put_page(page); > + } > + vfree(physp); > + kvm->arch.slot_phys[slot_id] = NULL; > + } > +} > + > void kvmppc_core_commit_memory_region(struct kvm *kvm, > struct kvm_userspace_memory_region *mem) > { > @@ -1236,8 +1254,6 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm, > int kvmppc_core_init_vm(struct kvm *kvm) > { > long r; > - unsigned long npages = 1ul << (MAX_MEM_ORDER - LARGE_PAGE_ORDER); > - long err = -ENOMEM; > unsigned long lpcr; > > /* Allocate hashed page table */ > @@ -1247,19 +1263,9 @@ int kvmppc_core_init_vm(struct kvm *kvm) > > INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); > > - kvm->arch.ram_pginfo = kzalloc(npages * sizeof(struct kvmppc_pginfo), > - GFP_KERNEL); > - if (!kvm->arch.ram_pginfo) { > - pr_err("kvmppc_core_init_vm: couldn't alloc %lu bytes\n", > - npages * sizeof(struct kvmppc_pginfo)); > - goto out_free; > - } > - > - kvm->arch.ram_npages = 0; > kvm->arch.ram_psize = 1ul << LARGE_PAGE_ORDER; > kvm->arch.ram_porder = LARGE_PAGE_ORDER; > kvm->arch.rma = NULL; > - kvm->arch.n_rma_pages = 0; > > kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); > > @@ -1282,25 +1288,15 @@ int kvmppc_core_init_vm(struct kvm *kvm) > kvm->arch.lpcr = lpcr; > > return 0; > - > - out_free: > - kvmppc_free_hpt(kvm); > - return err; > } > > void kvmppc_core_destroy_vm(struct kvm *kvm) > { > - struct kvmppc_pginfo *pginfo; > unsigned long i; > > - if (kvm->arch.ram_pginfo) { > - pginfo = kvm->arch.ram_pginfo; > - kvm->arch.ram_pginfo = NULL; > - for (i = kvm->arch.n_rma_pages; i < kvm->arch.ram_npages; ++i) > - if (pginfo[i].pfn) > - put_page(pfn_to_page(pginfo[i].pfn)); > - kfree(pginfo); > - } > + for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) > + unpin_slot(kvm, i); > + > if (kvm->arch.rma) { > kvm_release_rma(kvm->arch.rma); > kvm->arch.rma = NULL; > diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c > index 6148493..84dae82 100644 > --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c > +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c > @@ -20,6 +20,25 @@ > #include <asm/synch.h> > #include <asm/ppc-opcode.h> > > +/* > + * Since this file is built in even if KVM is a module, we need > + * a local copy of this function for the case where kvm_main.c is > + * modular. > + */ > +static struct kvm_memory_slot *builtin_gfn_to_memslot(struct kvm *kvm, > + gfn_t gfn) > +{ Shouldn't this rather be in a header file then? I'd rather not have this code duplicated. Please follow up with a patch to merge this copy and the real one into something in a header file. Alex -- To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 629df2e..7a17ab5 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -38,6 +38,7 @@ #define KVM_MEMORY_SLOTS 32 /* memory slots that does not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 4 +#define KVM_MEM_SLOTS_NUM (KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) #ifdef CONFIG_KVM_MMIO #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -175,25 +176,27 @@ struct revmap_entry { unsigned long guest_rpte; }; +/* Low-order bits in kvm->arch.slot_phys[][] */ +#define KVMPPC_GOT_PAGE 0x80 + struct kvm_arch { #ifdef CONFIG_KVM_BOOK3S_64_HV unsigned long hpt_virt; struct revmap_entry *revmap; - unsigned long ram_npages; unsigned long ram_psize; unsigned long ram_porder; - struct kvmppc_pginfo *ram_pginfo; unsigned int lpid; unsigned int host_lpid; unsigned long host_lpcr; unsigned long sdr1; unsigned long host_sdr1; int tlbie_lock; - int n_rma_pages; unsigned long lpcr; unsigned long rmor; struct kvmppc_rma_info *rma; struct list_head spapr_tce_tables; + unsigned long *slot_phys[KVM_MEM_SLOTS_NUM]; + int slot_npages[KVM_MEM_SLOTS_NUM]; unsigned short last_vcpu[NR_CPUS]; struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; #endif /* CONFIG_KVM_BOOK3S_64_HV */ diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 80ece8d..e4c6069 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -98,16 +98,16 @@ void kvmppc_free_hpt(struct kvm *kvm) void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem) { unsigned long i; - unsigned long npages = kvm->arch.ram_npages; - unsigned long pfn; + unsigned long npages; + unsigned long pa; unsigned long *hpte; unsigned long hash; unsigned long porder = kvm->arch.ram_porder; struct revmap_entry *rev; - struct kvmppc_pginfo *pginfo = kvm->arch.ram_pginfo; + unsigned long *physp; - if (!pginfo) - return; + physp = kvm->arch.slot_phys[mem->slot]; + npages = kvm->arch.slot_npages[mem->slot]; /* VRMA can't be > 1TB */ if (npages > 1ul << (40 - porder)) @@ -117,9 +117,10 @@ void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem) npages = HPT_NPTEG; for (i = 0; i < npages; ++i) { - pfn = pginfo[i].pfn; - if (!pfn) + pa = physp[i]; + if (!pa) break; + pa &= PAGE_MASK; /* can't use hpt_hash since va > 64 bits */ hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK; /* @@ -131,8 +132,7 @@ void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem) hash = (hash << 3) + 7; hpte = (unsigned long *) (kvm->arch.hpt_virt + (hash << 4)); /* HPTE low word - RPN, protection, etc. */ - hpte[1] = (pfn << PAGE_SHIFT) | HPTE_R_R | HPTE_R_C | - HPTE_R_M | PP_RWXX; + hpte[1] = pa | HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX; smp_wmb(); hpte[0] = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) | (i << (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED | diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index da7db14..86d3e4b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -50,14 +50,6 @@ #include <linux/vmalloc.h> #include <linux/highmem.h> -/* - * For now, limit memory to 64GB and require it to be large pages. - * This value is chosen because it makes the ram_pginfo array be - * 64kB in size, which is about as large as we want to be trying - * to allocate with kmalloc. - */ -#define MAX_MEM_ORDER 36 - #define LARGE_PAGE_ORDER 24 /* 16MB pages */ /* #define EXIT_DEBUG */ @@ -147,10 +139,12 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, unsigned long vcpuid, unsigned long vpa) { struct kvm *kvm = vcpu->kvm; - unsigned long pg_index, ra, len; + unsigned long gfn, pg_index, ra, len; unsigned long pg_offset; void *va; struct kvm_vcpu *tvcpu; + struct kvm_memory_slot *memslot; + unsigned long *physp; tvcpu = kvmppc_find_vcpu(kvm, vcpuid); if (!tvcpu) @@ -164,14 +158,20 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, if (vpa & 0x7f) return H_PARAMETER; /* registering new area; convert logical addr to real */ - pg_index = vpa >> kvm->arch.ram_porder; - pg_offset = vpa & (kvm->arch.ram_psize - 1); - if (pg_index >= kvm->arch.ram_npages) + gfn = vpa >> PAGE_SHIFT; + memslot = gfn_to_memslot(kvm, gfn); + if (!memslot || !(memslot->flags & KVM_MEMSLOT_INVALID)) + return H_PARAMETER; + physp = kvm->arch.slot_phys[memslot->id]; + if (!physp) return H_PARAMETER; - if (kvm->arch.ram_pginfo[pg_index].pfn == 0) + pg_index = (gfn - memslot->base_gfn) >> + (kvm->arch.ram_porder - PAGE_SHIFT); + pg_offset = vpa & (kvm->arch.ram_psize - 1); + ra = physp[pg_index]; + if (!ra) return H_PARAMETER; - ra = kvm->arch.ram_pginfo[pg_index].pfn << PAGE_SHIFT; - ra |= pg_offset; + ra = (ra & PAGE_MASK) | pg_offset; va = __va(ra); if (flags <= 1) len = *(unsigned short *)(va + 4); @@ -1108,12 +1108,11 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem) { unsigned long psize, porder; - unsigned long i, npages, totalpages; - unsigned long pg_ix; - struct kvmppc_pginfo *pginfo; + unsigned long i, npages; unsigned long hva; struct kvmppc_rma_info *ri = NULL; struct page *page; + unsigned long *phys; /* For now, only allow 16MB pages */ porder = LARGE_PAGE_ORDER; @@ -1125,20 +1124,21 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, return -EINVAL; } + /* Allocate a slot_phys array */ npages = mem->memory_size >> porder; - totalpages = (mem->guest_phys_addr + mem->memory_size) >> porder; - - /* More memory than we have space to track? */ - if (totalpages > (1ul << (MAX_MEM_ORDER - LARGE_PAGE_ORDER))) - return -EINVAL; + phys = kvm->arch.slot_phys[mem->slot]; + if (!phys) { + phys = vzalloc(npages * sizeof(unsigned long)); + if (!phys) + return -ENOMEM; + kvm->arch.slot_phys[mem->slot] = phys; + kvm->arch.slot_npages[mem->slot] = npages; + } /* Do we already have an RMA registered? */ if (mem->guest_phys_addr == 0 && kvm->arch.rma) return -EINVAL; - if (totalpages > kvm->arch.ram_npages) - kvm->arch.ram_npages = totalpages; - /* Is this one of our preallocated RMAs? */ if (mem->guest_phys_addr == 0) { struct vm_area_struct *vma; @@ -1171,7 +1171,6 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, } atomic_inc(&ri->use_count); kvm->arch.rma = ri; - kvm->arch.n_rma_pages = rma_size >> porder; /* Update LPCR and RMOR */ lpcr = kvm->arch.lpcr; @@ -1195,12 +1194,9 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); } - pg_ix = mem->guest_phys_addr >> porder; - pginfo = kvm->arch.ram_pginfo + pg_ix; - for (i = 0; i < npages; ++i, ++pg_ix) { - if (ri && pg_ix < kvm->arch.n_rma_pages) { - pginfo[i].pfn = ri->base_pfn + - (pg_ix << (porder - PAGE_SHIFT)); + for (i = 0; i < npages; ++i) { + if (ri && i < ri->npages) { + phys[i] = (ri->base_pfn << PAGE_SHIFT) + (i << porder); continue; } hva = mem->userspace_addr + (i << porder); @@ -1216,7 +1212,7 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, hva, compound_order(page)); goto err; } - pginfo[i].pfn = page_to_pfn(page); + phys[i] = (page_to_pfn(page) << PAGE_SHIFT) | KVMPPC_GOT_PAGE; } return 0; @@ -1225,6 +1221,28 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, return -EINVAL; } +static void unpin_slot(struct kvm *kvm, int slot_id) +{ + unsigned long *physp; + unsigned long j, npages, pfn; + struct page *page; + + physp = kvm->arch.slot_phys[slot_id]; + npages = kvm->arch.slot_npages[slot_id]; + if (physp) { + for (j = 0; j < npages; j++) { + if (!(physp[j] & KVMPPC_GOT_PAGE)) + continue; + pfn = physp[j] >> PAGE_SHIFT; + page = pfn_to_page(pfn); + SetPageDirty(page); + put_page(page); + } + vfree(physp); + kvm->arch.slot_phys[slot_id] = NULL; + } +} + void kvmppc_core_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem) { @@ -1236,8 +1254,6 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm, int kvmppc_core_init_vm(struct kvm *kvm) { long r; - unsigned long npages = 1ul << (MAX_MEM_ORDER - LARGE_PAGE_ORDER); - long err = -ENOMEM; unsigned long lpcr; /* Allocate hashed page table */ @@ -1247,19 +1263,9 @@ int kvmppc_core_init_vm(struct kvm *kvm) INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); - kvm->arch.ram_pginfo = kzalloc(npages * sizeof(struct kvmppc_pginfo), - GFP_KERNEL); - if (!kvm->arch.ram_pginfo) { - pr_err("kvmppc_core_init_vm: couldn't alloc %lu bytes\n", - npages * sizeof(struct kvmppc_pginfo)); - goto out_free; - } - - kvm->arch.ram_npages = 0; kvm->arch.ram_psize = 1ul << LARGE_PAGE_ORDER; kvm->arch.ram_porder = LARGE_PAGE_ORDER; kvm->arch.rma = NULL; - kvm->arch.n_rma_pages = 0; kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); @@ -1282,25 +1288,15 @@ int kvmppc_core_init_vm(struct kvm *kvm) kvm->arch.lpcr = lpcr; return 0; - - out_free: - kvmppc_free_hpt(kvm); - return err; } void kvmppc_core_destroy_vm(struct kvm *kvm) { - struct kvmppc_pginfo *pginfo; unsigned long i; - if (kvm->arch.ram_pginfo) { - pginfo = kvm->arch.ram_pginfo; - kvm->arch.ram_pginfo = NULL; - for (i = kvm->arch.n_rma_pages; i < kvm->arch.ram_npages; ++i) - if (pginfo[i].pfn) - put_page(pfn_to_page(pginfo[i].pfn)); - kfree(pginfo); - } + for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) + unpin_slot(kvm, i); + if (kvm->arch.rma) { kvm_release_rma(kvm->arch.rma); kvm->arch.rma = NULL; diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 6148493..84dae82 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -20,6 +20,25 @@ #include <asm/synch.h> #include <asm/ppc-opcode.h> +/* + * Since this file is built in even if KVM is a module, we need + * a local copy of this function for the case where kvm_main.c is + * modular. + */ +static struct kvm_memory_slot *builtin_gfn_to_memslot(struct kvm *kvm, + gfn_t gfn) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + + slots = kvm_memslots(kvm); + kvm_for_each_memslot(memslot, slots) + if (gfn >= memslot->base_gfn && + gfn < memslot->base_gfn + memslot->npages) + return memslot; + return NULL; +} + /* Translate address of a vmalloc'd thing to a linear map address */ static void *real_vmalloc_addr(void *x) { @@ -59,10 +78,12 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, { unsigned long porder; struct kvm *kvm = vcpu->kvm; - unsigned long i, lpn, pa; + unsigned long i, gfn, lpn, pa; unsigned long *hpte; struct revmap_entry *rev; unsigned long g_ptel = ptel; + struct kvm_memory_slot *memslot; + unsigned long *physp; /* only handle 4k, 64k and 16M pages for now */ porder = 12; @@ -80,12 +101,24 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, } else return H_PARAMETER; } - lpn = (ptel & HPTE_R_RPN) >> kvm->arch.ram_porder; - if (lpn >= kvm->arch.ram_npages || porder > kvm->arch.ram_porder) + if (porder > kvm->arch.ram_porder) return H_PARAMETER; - pa = kvm->arch.ram_pginfo[lpn].pfn << PAGE_SHIFT; + + gfn = ((ptel & HPTE_R_RPN) & ~((1ul << porder) - 1)) >> PAGE_SHIFT; + memslot = builtin_gfn_to_memslot(kvm, gfn); + if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) + return H_PARAMETER; + physp = kvm->arch.slot_phys[memslot->id]; + if (!physp) + return H_PARAMETER; + + lpn = (gfn - memslot->base_gfn) >> (kvm->arch.ram_porder - PAGE_SHIFT); + physp = real_vmalloc_addr(physp + lpn); + pa = *physp; if (!pa) return H_PARAMETER; + pa &= PAGE_MASK; + /* Check WIMG */ if ((ptel & HPTE_R_WIMG) != HPTE_R_M && (ptel & HPTE_R_WIMG) != (HPTE_R_W | HPTE_R_I | HPTE_R_M))
This allocates an array for each memory slot that is added to store the physical addresses of the pages in the slot. This array is vmalloc'd and accessed in kvmppc_h_enter using real_vmalloc_addr(). This allows us to remove the ram_pginfo field from the kvm_arch struct, and removes the 64GB guest RAM limit that we had. We use the low-order bits of the array entries to store a flag indicating that we have done get_page on the corresponding page, and therefore need to call put_page when we are finished with the page. Currently this is set for all pages except those in our special RMO regions. Signed-off-by: Paul Mackerras <paulus@samba.org> --- arch/powerpc/include/asm/kvm_host.h | 9 ++- arch/powerpc/kvm/book3s_64_mmu_hv.c | 18 +++--- arch/powerpc/kvm/book3s_hv.c | 114 +++++++++++++++++------------------ arch/powerpc/kvm/book3s_hv_rm_mmu.c | 41 +++++++++++- 4 files changed, 107 insertions(+), 75 deletions(-)