@@ -713,7 +713,7 @@ struct kvm_vcpu_arch {
struct kvm_mmu_memory_cache mmu_pte_list_desc_cache;
struct kvm_mmu_memory_cache mmu_shadow_page_cache;
- struct kvm_mmu_memory_cache mmu_gfn_array_cache;
+ struct kvm_mmu_memory_cache mmu_shadowed_info_cache;
struct kvm_mmu_memory_cache mmu_page_header_cache;
/*
@@ -656,7 +656,7 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu, bool maybe_indirect)
if (r)
return r;
if (maybe_indirect) {
- r = kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_gfn_array_cache,
+ r = kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_shadowed_info_cache,
PT64_ROOT_MAX_LEVEL);
if (r)
return r;
@@ -669,7 +669,7 @@ static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
{
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache);
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_shadow_page_cache);
- kvm_mmu_free_memory_cache(&vcpu->arch.mmu_gfn_array_cache);
+ kvm_mmu_free_memory_cache(&vcpu->arch.mmu_shadowed_info_cache);
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache);
}
@@ -678,34 +678,68 @@ static void mmu_free_pte_list_desc(struct pte_list_desc *pte_list_desc)
kmem_cache_free(pte_list_desc_cache, pte_list_desc);
}
+static bool sp_has_gptes(struct kvm_mmu_page *sp);
+
static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
{
if (sp->role.passthrough)
return sp->gfn;
if (!sp->role.direct)
- return sp->gfns[index];
+ return sp->shadowed_translation[index] >> PAGE_SHIFT;
return sp->gfn + (index << ((sp->role.level - 1) * SPTE_LEVEL_BITS));
}
-static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn)
+/*
+ * For leaf SPTEs, fetch the *guest* access permissions being shadowed. Note
+ * that the SPTE itself may have a more constrained access permissions that
+ * what the guest enforces. For example, a guest may create an executable
+ * huge PTE but KVM may disallow execution to mitigate iTLB multihit.
+ */
+static u32 kvm_mmu_page_get_access(struct kvm_mmu_page *sp, int index)
{
- if (sp->role.passthrough) {
- WARN_ON_ONCE(gfn != sp->gfn);
- return;
- }
+ if (sp_has_gptes(sp))
+ return sp->shadowed_translation[index] & ACC_ALL;
- if (!sp->role.direct) {
- sp->gfns[index] = gfn;
+ /*
+ * For direct MMUs (e.g. TDP or non-paging guests) or passthrough SPs,
+ * KVM is not shadowing any guest page tables, so the "guest access
+ * permissions" are just ACC_ALL.
+ *
+ * For direct SPs in indirect MMUs (shadow paging), i.e. when KVM
+ * is shadowing a guest huge page with small pages, the guest access
+ * permissions being shadowed are the access permissions of the huge
+ * page.
+ *
+ * In both cases, sp->role.access contains the correct access bits.
+ */
+ return sp->role.access;
+}
+
+static void kvm_mmu_page_set_translation(struct kvm_mmu_page *sp, int index, gfn_t gfn, u32 access)
+{
+ if (sp_has_gptes(sp)) {
+ sp->shadowed_translation[index] = (gfn << PAGE_SHIFT) | access;
return;
}
- if (WARN_ON(gfn != kvm_mmu_page_get_gfn(sp, index)))
- pr_err_ratelimited("gfn mismatch under direct page %llx "
- "(expected %llx, got %llx)\n",
- sp->gfn,
- kvm_mmu_page_get_gfn(sp, index), gfn);
+ WARN_ONCE(access != kvm_mmu_page_get_access(sp, index),
+ "access mismatch under %s page %llx (expected %u, got %u)\n",
+ sp->role.passthrough ? "passthrough" : "direct",
+ sp->gfn, kvm_mmu_page_get_access(sp, index), access);
+
+ WARN_ONCE(gfn != kvm_mmu_page_get_gfn(sp, index),
+ "gfn mismatch under %s page %llx (expected %llx, got %llx)\n",
+ sp->role.passthrough ? "passthrough" : "direct",
+ sp->gfn, kvm_mmu_page_get_gfn(sp, index), gfn);
+}
+
+static void kvm_mmu_page_set_access(struct kvm_mmu_page *sp, int index, u32 access)
+{
+ gfn_t gfn = kvm_mmu_page_get_gfn(sp, index);
+
+ kvm_mmu_page_set_translation(sp, index, gfn, access);
}
/*
@@ -1554,14 +1588,14 @@ static bool kvm_test_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
static void __rmap_add(struct kvm *kvm,
struct kvm_mmu_memory_cache *cache,
const struct kvm_memory_slot *slot,
- u64 *spte, gfn_t gfn)
+ u64 *spte, gfn_t gfn, u32 access)
{
struct kvm_mmu_page *sp;
struct kvm_rmap_head *rmap_head;
int rmap_count;
sp = sptep_to_sp(spte);
- kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn);
+ kvm_mmu_page_set_translation(sp, spte - sp->spt, gfn, access);
kvm_update_page_stats(kvm, sp->role.level, 1);
rmap_head = gfn_to_rmap(gfn, sp->role.level, slot);
@@ -1575,11 +1609,11 @@ static void __rmap_add(struct kvm *kvm,
}
static void rmap_add(struct kvm_vcpu *vcpu, const struct kvm_memory_slot *slot,
- u64 *spte, gfn_t gfn)
+ u64 *spte, gfn_t gfn, u32 access)
{
struct kvm_mmu_memory_cache *cache = &vcpu->arch.mmu_pte_list_desc_cache;
- __rmap_add(vcpu->kvm, cache, slot, spte, gfn);
+ __rmap_add(vcpu->kvm, cache, slot, spte, gfn, access);
}
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
@@ -1643,7 +1677,7 @@ static void kvm_mmu_free_shadow_page(struct kvm_mmu_page *sp)
list_del(&sp->link);
free_page((unsigned long)sp->spt);
if (!sp->role.direct)
- free_page((unsigned long)sp->gfns);
+ free_page((unsigned long)sp->shadowed_translation);
kmem_cache_free(mmu_page_header_cache, sp);
}
@@ -2070,7 +2104,7 @@ static struct kvm_mmu_page *kvm_mmu_find_shadow_page(struct kvm *kvm,
struct shadow_page_caches {
struct kvm_mmu_memory_cache *page_header_cache;
struct kvm_mmu_memory_cache *shadow_page_cache;
- struct kvm_mmu_memory_cache *gfn_array_cache;
+ struct kvm_mmu_memory_cache *shadowed_info_cache;
};
static struct kvm_mmu_page *kvm_mmu_alloc_shadow_page(struct kvm *kvm,
@@ -2084,7 +2118,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_shadow_page(struct kvm *kvm,
sp = kvm_mmu_memory_cache_alloc(caches->page_header_cache);
sp->spt = kvm_mmu_memory_cache_alloc(caches->shadow_page_cache);
if (!role.direct)
- sp->gfns = kvm_mmu_memory_cache_alloc(caches->gfn_array_cache);
+ sp->shadowed_translation = kvm_mmu_memory_cache_alloc(caches->shadowed_info_cache);
set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
@@ -2136,7 +2170,7 @@ static struct kvm_mmu_page *kvm_mmu_get_shadow_page(struct kvm_vcpu *vcpu,
struct shadow_page_caches caches = {
.page_header_cache = &vcpu->arch.mmu_page_header_cache,
.shadow_page_cache = &vcpu->arch.mmu_shadow_page_cache,
- .gfn_array_cache = &vcpu->arch.mmu_gfn_array_cache,
+ .shadowed_info_cache = &vcpu->arch.mmu_shadowed_info_cache,
};
return __kvm_mmu_get_shadow_page(vcpu->kvm, vcpu, &caches, gfn, role);
@@ -2785,7 +2819,10 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
if (!was_rmapped) {
WARN_ON_ONCE(ret == RET_PF_SPURIOUS);
- rmap_add(vcpu, slot, sptep, gfn);
+ rmap_add(vcpu, slot, sptep, gfn, pte_access);
+ } else {
+ /* Already rmapped but the pte_access bits may have changed. */
+ kvm_mmu_page_set_access(sp, sptep - sp->spt, pte_access);
}
return ret;
@@ -67,8 +67,21 @@ struct kvm_mmu_page {
gfn_t gfn;
u64 *spt;
- /* hold the gfn of each spte inside spt */
- gfn_t *gfns;
+
+ /*
+ * Stores the result of the guest translation being shadowed by each
+ * SPTE. KVM shadows two types of guest translations: nGPA -> GPA
+ * (shadow EPT/NPT) and GVA -> GPA (traditional shadow paging). In both
+ * cases the result of the translation is a GPA and a set of access
+ * constraints.
+ *
+ * The GFN is stored in the upper bits (PAGE_SHIFT) and the shadowed
+ * access permissions are stored in the lower bits. Note, for
+ * convenience and uniformity across guests, the access permissions are
+ * stored in KVM format (e.g. ACC_EXEC_MASK) not the raw guest format.
+ */
+ u64 *shadowed_translation;
+
/* Currently serving as active root */
union {
int root_count;
@@ -985,7 +985,8 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
}
/*
- * Using the cached information from sp->gfns is safe because:
+ * Using the information in sp->shadowed_translation (kvm_mmu_page_get_gfn()) is
+ * safe because:
* - The spte has a reference to the struct page, so the pfn for a given gfn
* can't change unless all sptes pointing to it are nuked first.
*
@@ -1067,12 +1068,16 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
* "present" bit, as all other paging modes will create a
* read-only SPTE if pte_access is zero.
*/
- if ((!pte_access && !shadow_present_mask) || gfn != sp->gfns[i]) {
+ if ((!pte_access && !shadow_present_mask) ||
+ gfn != kvm_mmu_page_get_gfn(sp, i)) {
drop_spte(vcpu->kvm, &sp->spt[i]);
flush = true;
continue;
}
+ /* Update the shadowed access bits in case they changed. */
+ kvm_mmu_page_set_access(sp, i, pte_access);
+
sptep = &sp->spt[i];
spte = *sptep;
host_writable = spte & shadow_host_writable_mask;