diff mbox series

[RFC,06/37] KVM: MMU: Move struct kvm_mmu_page to common code

Message ID 20221208193857.4090582-7-dmatlack@google.com
State RFC
Headers show
Series KVM: Refactor the KVM/x86 TDP MMU into common code | expand

Commit Message

David Matlack Dec. 8, 2022, 7:38 p.m. UTC
Move struct kvm_mmu_page to common code and all x86-specific fields into
kvm_mmu_page_arch.

This commit increases the size of struct kvm_mmu_page by 64 bytes on
x86_64 (184 bytes -> 248 bytes). The size of this struct can be reduced
in future commits by moving TDP MMU root fields into a separate struct
and by dynamically allocating fields only used by the Shadow MMU.

No functional change intended.

Signed-off-by: David Matlack <dmatlack@google.com>
---
 arch/x86/include/asm/kvm/mmu_types.h |  62 ++++++++++++++
 arch/x86/include/asm/kvm_host.h      |   4 -
 arch/x86/kvm/mmu/mmu.c               | 122 ++++++++++++++-------------
 arch/x86/kvm/mmu/mmu_internal.h      |  83 ------------------
 arch/x86/kvm/mmu/mmutrace.h          |   4 +-
 arch/x86/kvm/mmu/paging_tmpl.h       |  10 +--
 arch/x86/kvm/mmu/tdp_mmu.c           |   8 +-
 arch/x86/kvm/mmu/tdp_mmu.h           |   2 +-
 include/kvm/mmu_types.h              |  32 ++++++-
 9 files changed, 167 insertions(+), 160 deletions(-)

Comments

Ben Gardon Dec. 12, 2022, 6:07 p.m. UTC | #1
On Thu, Dec 8, 2022 at 11:39 AM David Matlack <dmatlack@google.com> wrote:
>
> Move struct kvm_mmu_page to common code and all x86-specific fields into
> kvm_mmu_page_arch.
>
> This commit increases the size of struct kvm_mmu_page by 64 bytes on
> x86_64 (184 bytes -> 248 bytes). The size of this struct can be reduced
> in future commits by moving TDP MMU root fields into a separate struct
> and by dynamically allocating fields only used by the Shadow MMU.
>
> No functional change intended.
>
> Signed-off-by: David Matlack <dmatlack@google.com>

I haven't reviewed every line of the mechanical refactor of adding
"arch." all over the place, but at a high level, this looks like a
good split on struct kvm_mmu_page.

> ---
>  arch/x86/include/asm/kvm/mmu_types.h |  62 ++++++++++++++
>  arch/x86/include/asm/kvm_host.h      |   4 -
>  arch/x86/kvm/mmu/mmu.c               | 122 ++++++++++++++-------------
>  arch/x86/kvm/mmu/mmu_internal.h      |  83 ------------------
>  arch/x86/kvm/mmu/mmutrace.h          |   4 +-
>  arch/x86/kvm/mmu/paging_tmpl.h       |  10 +--
>  arch/x86/kvm/mmu/tdp_mmu.c           |   8 +-
>  arch/x86/kvm/mmu/tdp_mmu.h           |   2 +-
>  include/kvm/mmu_types.h              |  32 ++++++-
>  9 files changed, 167 insertions(+), 160 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm/mmu_types.h b/arch/x86/include/asm/kvm/mmu_types.h
> index 35f893ebab5a..affcb520b482 100644
> --- a/arch/x86/include/asm/kvm/mmu_types.h
> +++ b/arch/x86/include/asm/kvm/mmu_types.h
> @@ -2,6 +2,8 @@
>  #ifndef __ASM_KVM_MMU_TYPES_H
>  #define __ASM_KVM_MMU_TYPES_H
>
> +#include <linux/bitmap.h>
> +#include <linux/list.h>
>  #include <linux/types.h>
>
>  /*
> @@ -53,4 +55,64 @@ struct kvm_mmu_page_role_arch {
>         u16 passthrough:1;
>  };
>
> +struct kvm_rmap_head {
> +       unsigned long val;
> +};
> +
> +struct kvm_mmu_page_arch {
> +       struct hlist_node hash_link;
> +
> +       bool shadow_mmu_page;
> +       bool unsync;
> +       u8 mmu_valid_gen;
> +
> +        /*
> +         * The shadow page can't be replaced by an equivalent huge page
> +         * because it is being used to map an executable page in the guest
> +         * and the NX huge page mitigation is enabled.
> +         */
> +       bool nx_huge_page_disallowed;
> +
> +       /*
> +        * Stores the result of the guest translation being shadowed by each
> +        * SPTE.  KVM shadows two types of guest translations: nGPA -> GPA
> +        * (shadow EPT/NPT) and GVA -> GPA (traditional shadow paging). In both
> +        * cases the result of the translation is a GPA and a set of access
> +        * constraints.
> +        *
> +        * The GFN is stored in the upper bits (PAGE_SHIFT) and the shadowed
> +        * access permissions are stored in the lower bits. Note, for
> +        * convenience and uniformity across guests, the access permissions are
> +        * stored in KVM format (e.g.  ACC_EXEC_MASK) not the raw guest format.
> +        */
> +       u64 *shadowed_translation;
> +
> +       unsigned int unsync_children;
> +
> +       /* Rmap pointers to all parent sptes. */
> +       struct kvm_rmap_head parent_ptes;
> +
> +       DECLARE_BITMAP(unsync_child_bitmap, 512);
> +
> +       /*
> +        * Tracks shadow pages that, if zapped, would allow KVM to create an NX
> +        * huge page.  A shadow page will have nx_huge_page_disallowed set but
> +        * not be on the list if a huge page is disallowed for other reasons,
> +        * e.g. because KVM is shadowing a PTE at the same gfn, the memslot
> +        * isn't properly aligned, etc...
> +        */
> +       struct list_head possible_nx_huge_page_link;
> +
> +#ifdef CONFIG_X86_32
> +       /*
> +        * Used out of the mmu-lock to avoid reading spte values while an
> +        * update is in progress; see the comments in __get_spte_lockless().
> +        */
> +       int clear_spte_count;
> +#endif
> +
> +       /* Number of writes since the last time traversal visited this page.  */
> +       atomic_t write_flooding_count;
> +};
> +
>  #endif /* !__ASM_KVM_MMU_TYPES_H */
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index ebcd7a0dabef..f5743a652e10 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -329,10 +329,6 @@ union kvm_cpu_role {
>         };
>  };
>
> -struct kvm_rmap_head {
> -       unsigned long val;
> -};
> -
>  struct kvm_pio_request {
>         unsigned long linear_rip;
>         unsigned long count;
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 11cef930d5ed..e47f35878ab5 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -350,7 +350,7 @@ static void count_spte_clear(u64 *sptep, u64 spte)
>
>         /* Ensure the spte is completely set before we increase the count */
>         smp_wmb();
> -       sp->clear_spte_count++;
> +       sp->arch.clear_spte_count++;
>  }
>
>  static void __set_spte(u64 *sptep, u64 spte)
> @@ -432,7 +432,7 @@ static u64 __get_spte_lockless(u64 *sptep)
>         int count;
>
>  retry:
> -       count = sp->clear_spte_count;
> +       count = sp->arch.clear_spte_count;
>         smp_rmb();
>
>         spte.spte_low = orig->spte_low;
> @@ -442,7 +442,7 @@ static u64 __get_spte_lockless(u64 *sptep)
>         smp_rmb();
>
>         if (unlikely(spte.spte_low != orig->spte_low ||
> -             count != sp->clear_spte_count))
> +             count != sp->arch.clear_spte_count))
>                 goto retry;
>
>         return spte.spte;
> @@ -699,7 +699,7 @@ static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
>                 return sp->gfn;
>
>         if (!sp->role.arch.direct)
> -               return sp->shadowed_translation[index] >> PAGE_SHIFT;
> +               return sp->arch.shadowed_translation[index] >> PAGE_SHIFT;
>
>         return sp->gfn + (index << ((sp->role.level - 1) * SPTE_LEVEL_BITS));
>  }
> @@ -713,7 +713,7 @@ static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
>  static u32 kvm_mmu_page_get_access(struct kvm_mmu_page *sp, int index)
>  {
>         if (sp_has_gptes(sp))
> -               return sp->shadowed_translation[index] & ACC_ALL;
> +               return sp->arch.shadowed_translation[index] & ACC_ALL;
>
>         /*
>          * For direct MMUs (e.g. TDP or non-paging guests) or passthrough SPs,
> @@ -734,7 +734,7 @@ static void kvm_mmu_page_set_translation(struct kvm_mmu_page *sp, int index,
>                                          gfn_t gfn, unsigned int access)
>  {
>         if (sp_has_gptes(sp)) {
> -               sp->shadowed_translation[index] = (gfn << PAGE_SHIFT) | access;
> +               sp->arch.shadowed_translation[index] = (gfn << PAGE_SHIFT) | access;
>                 return;
>         }
>
> @@ -825,18 +825,18 @@ void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
>          * on the list if KVM is reusing an existing shadow page, i.e. if KVM
>          * links a shadow page at multiple points.
>          */
> -       if (!list_empty(&sp->possible_nx_huge_page_link))
> +       if (!list_empty(&sp->arch.possible_nx_huge_page_link))
>                 return;
>
>         ++kvm->stat.nx_lpage_splits;
> -       list_add_tail(&sp->possible_nx_huge_page_link,
> +       list_add_tail(&sp->arch.possible_nx_huge_page_link,
>                       &kvm->arch.possible_nx_huge_pages);
>  }
>
>  static void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
>                                  bool nx_huge_page_possible)
>  {
> -       sp->nx_huge_page_disallowed = true;
> +       sp->arch.nx_huge_page_disallowed = true;
>
>         if (nx_huge_page_possible)
>                 track_possible_nx_huge_page(kvm, sp);
> @@ -861,16 +861,16 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
>
>  void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
>  {
> -       if (list_empty(&sp->possible_nx_huge_page_link))
> +       if (list_empty(&sp->arch.possible_nx_huge_page_link))
>                 return;
>
>         --kvm->stat.nx_lpage_splits;
> -       list_del_init(&sp->possible_nx_huge_page_link);
> +       list_del_init(&sp->arch.possible_nx_huge_page_link);
>  }
>
>  static void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
>  {
> -       sp->nx_huge_page_disallowed = false;
> +       sp->arch.nx_huge_page_disallowed = false;
>
>         untrack_possible_nx_huge_page(kvm, sp);
>  }
> @@ -1720,11 +1720,11 @@ static void kvm_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
>  static void kvm_mmu_free_shadow_page(struct kvm_mmu_page *sp)
>  {
>         MMU_WARN_ON(!is_empty_shadow_page(sp->spt));
> -       hlist_del(&sp->hash_link);
> +       hlist_del(&sp->arch.hash_link);
>         list_del(&sp->link);
>         free_page((unsigned long)sp->spt);
>         if (!sp->role.arch.direct)
> -               free_page((unsigned long)sp->shadowed_translation);
> +               free_page((unsigned long)sp->arch.shadowed_translation);
>         kmem_cache_free(mmu_page_header_cache, sp);
>  }
>
> @@ -1739,13 +1739,13 @@ static void mmu_page_add_parent_pte(struct kvm_mmu_memory_cache *cache,
>         if (!parent_pte)
>                 return;
>
> -       pte_list_add(cache, parent_pte, &sp->parent_ptes);
> +       pte_list_add(cache, parent_pte, &sp->arch.parent_ptes);
>  }
>
>  static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,
>                                        u64 *parent_pte)
>  {
> -       pte_list_remove(parent_pte, &sp->parent_ptes);
> +       pte_list_remove(parent_pte, &sp->arch.parent_ptes);
>  }
>
>  static void drop_parent_pte(struct kvm_mmu_page *sp,
> @@ -1761,7 +1761,7 @@ static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp)
>         u64 *sptep;
>         struct rmap_iterator iter;
>
> -       for_each_rmap_spte(&sp->parent_ptes, &iter, sptep) {
> +       for_each_rmap_spte(&sp->arch.parent_ptes, &iter, sptep) {
>                 mark_unsync(sptep);
>         }
>  }
> @@ -1771,9 +1771,9 @@ static void mark_unsync(u64 *spte)
>         struct kvm_mmu_page *sp;
>
>         sp = sptep_to_sp(spte);
> -       if (__test_and_set_bit(spte_index(spte), sp->unsync_child_bitmap))
> +       if (__test_and_set_bit(spte_index(spte), sp->arch.unsync_child_bitmap))
>                 return;
> -       if (sp->unsync_children++)
> +       if (sp->arch.unsync_children++)
>                 return;
>         kvm_mmu_mark_parents_unsync(sp);
>  }
> @@ -1799,7 +1799,7 @@ static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
>  {
>         int i;
>
> -       if (sp->unsync)
> +       if (sp->arch.unsync)
>                 for (i=0; i < pvec->nr; i++)
>                         if (pvec->page[i].sp == sp)
>                                 return 0;
> @@ -1812,9 +1812,9 @@ static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
>
>  static inline void clear_unsync_child_bit(struct kvm_mmu_page *sp, int idx)
>  {
> -       --sp->unsync_children;
> -       WARN_ON((int)sp->unsync_children < 0);
> -       __clear_bit(idx, sp->unsync_child_bitmap);
> +       --sp->arch.unsync_children;
> +       WARN_ON((int)sp->arch.unsync_children < 0);
> +       __clear_bit(idx, sp->arch.unsync_child_bitmap);
>  }
>
>  static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
> @@ -1822,7 +1822,7 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
>  {
>         int i, ret, nr_unsync_leaf = 0;
>
> -       for_each_set_bit(i, sp->unsync_child_bitmap, 512) {
> +       for_each_set_bit(i, sp->arch.unsync_child_bitmap, 512) {
>                 struct kvm_mmu_page *child;
>                 u64 ent = sp->spt[i];
>
> @@ -1833,7 +1833,7 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
>
>                 child = spte_to_child_sp(ent);
>
> -               if (child->unsync_children) {
> +               if (child->arch.unsync_children) {
>                         if (mmu_pages_add(pvec, child, i))
>                                 return -ENOSPC;
>
> @@ -1845,7 +1845,7 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
>                                 nr_unsync_leaf += ret;
>                         } else
>                                 return ret;
> -               } else if (child->unsync) {
> +               } else if (child->arch.unsync) {
>                         nr_unsync_leaf++;
>                         if (mmu_pages_add(pvec, child, i))
>                                 return -ENOSPC;
> @@ -1862,7 +1862,7 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp,
>                            struct kvm_mmu_pages *pvec)
>  {
>         pvec->nr = 0;
> -       if (!sp->unsync_children)
> +       if (!sp->arch.unsync_children)
>                 return 0;
>
>         mmu_pages_add(pvec, sp, INVALID_INDEX);
> @@ -1871,9 +1871,9 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp,
>
>  static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
>  {
> -       WARN_ON(!sp->unsync);
> +       WARN_ON(!sp->arch.unsync);
>         trace_kvm_mmu_sync_page(sp);
> -       sp->unsync = 0;
> +       sp->arch.unsync = 0;
>         --kvm->stat.mmu_unsync;
>  }
>
> @@ -1894,7 +1894,7 @@ static bool sp_has_gptes(struct kvm_mmu_page *sp)
>  }
>
>  #define for_each_valid_sp(_kvm, _sp, _list)                            \
> -       hlist_for_each_entry(_sp, _list, hash_link)                     \
> +       hlist_for_each_entry(_sp, _list, arch.hash_link)                        \
>                 if (is_obsolete_sp((_kvm), (_sp))) {                    \
>                 } else
>
> @@ -1934,7 +1934,7 @@ static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
>
>         /* TDP MMU pages do not use the MMU generation. */
>         return !is_tdp_mmu_page(sp) &&
> -              unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
> +              unlikely(sp->arch.mmu_valid_gen != kvm->arch.mmu_valid_gen);
>  }
>
>  struct mmu_page_path {
> @@ -2006,7 +2006,7 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents)
>                 WARN_ON(idx == INVALID_INDEX);
>                 clear_unsync_child_bit(sp, idx);
>                 level++;
> -       } while (!sp->unsync_children);
> +       } while (!sp->arch.unsync_children);
>  }
>
>  static int mmu_sync_children(struct kvm_vcpu *vcpu,
> @@ -2053,7 +2053,7 @@ static int mmu_sync_children(struct kvm_vcpu *vcpu,
>
>  static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp)
>  {
> -       atomic_set(&sp->write_flooding_count,  0);
> +       atomic_set(&sp->arch.write_flooding_count,  0);
>  }
>
>  static void clear_sp_write_flooding_count(u64 *spte)
> @@ -2094,7 +2094,7 @@ static struct kvm_mmu_page *kvm_mmu_find_shadow_page(struct kvm *kvm,
>                          * Unsync pages must not be left as is, because the new
>                          * upper-level page will be write-protected.
>                          */
> -                       if (role.level > PG_LEVEL_4K && sp->unsync)
> +                       if (role.level > PG_LEVEL_4K && sp->arch.unsync)
>                                 kvm_mmu_prepare_zap_page(kvm, sp,
>                                                          &invalid_list);
>                         continue;
> @@ -2104,7 +2104,7 @@ static struct kvm_mmu_page *kvm_mmu_find_shadow_page(struct kvm *kvm,
>                 if (sp->role.arch.direct)
>                         goto out;
>
> -               if (sp->unsync) {
> +               if (sp->arch.unsync) {
>                         if (KVM_BUG_ON(!vcpu, kvm))
>                                 break;
>
> @@ -2163,25 +2163,26 @@ static struct kvm_mmu_page *kvm_mmu_alloc_shadow_page(struct kvm *kvm,
>         sp = kvm_mmu_memory_cache_alloc(caches->page_header_cache);
>         sp->spt = kvm_mmu_memory_cache_alloc(caches->shadow_page_cache);
>         if (!role.arch.direct)
> -               sp->shadowed_translation = kvm_mmu_memory_cache_alloc(caches->shadowed_info_cache);
> +               sp->arch.shadowed_translation =
> +                       kvm_mmu_memory_cache_alloc(caches->shadowed_info_cache);
>
>         set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
>
> -       INIT_LIST_HEAD(&sp->possible_nx_huge_page_link);
> +       INIT_LIST_HEAD(&sp->arch.possible_nx_huge_page_link);
>
>         /*
>          * active_mmu_pages must be a FIFO list, as kvm_zap_obsolete_pages()
>          * depends on valid pages being added to the head of the list.  See
>          * comments in kvm_zap_obsolete_pages().
>          */
> -       sp->mmu_valid_gen = kvm->arch.mmu_valid_gen;
> +       sp->arch.mmu_valid_gen = kvm->arch.mmu_valid_gen;
>         list_add(&sp->link, &kvm->arch.active_mmu_pages);
>         kvm_account_mmu_page(kvm, sp);
>
>         sp->gfn = gfn;
>         sp->role = role;
> -       sp->shadow_mmu_page = true;
> -       hlist_add_head(&sp->hash_link, sp_list);
> +       sp->arch.shadow_mmu_page = true;
> +       hlist_add_head(&sp->arch.hash_link, sp_list);
>         if (sp_has_gptes(sp))
>                 account_shadowed(kvm, sp);
>
> @@ -2368,7 +2369,7 @@ static void __link_shadow_page(struct kvm *kvm,
>
>         mmu_page_add_parent_pte(cache, sp, sptep);
>
> -       if (sp->unsync_children || sp->unsync)
> +       if (sp->arch.unsync_children || sp->arch.unsync)
>                 mark_unsync(sptep);
>  }
>
> @@ -2421,7 +2422,8 @@ static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
>                          * avoids retaining a large number of stale nested SPs.
>                          */
>                         if (tdp_enabled && invalid_list &&
> -                           child->role.arch.guest_mode && !child->parent_ptes.val)
> +                           child->role.arch.guest_mode &&
> +                           !child->arch.parent_ptes.val)
>                                 return kvm_mmu_prepare_zap_page(kvm, child,
>                                                                 invalid_list);
>                 }
> @@ -2449,7 +2451,7 @@ static void kvm_mmu_unlink_parents(struct kvm_mmu_page *sp)
>         u64 *sptep;
>         struct rmap_iterator iter;
>
> -       while ((sptep = rmap_get_first(&sp->parent_ptes, &iter)))
> +       while ((sptep = rmap_get_first(&sp->arch.parent_ptes, &iter)))
>                 drop_parent_pte(sp, sptep);
>  }
>
> @@ -2496,7 +2498,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
>         if (!sp->role.invalid && sp_has_gptes(sp))
>                 unaccount_shadowed(kvm, sp);
>
> -       if (sp->unsync)
> +       if (sp->arch.unsync)
>                 kvm_unlink_unsync_page(kvm, sp);
>         if (!refcount_read(&sp->root_refcount)) {
>                 /* Count self */
> @@ -2527,7 +2529,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
>                 zapped_root = !is_obsolete_sp(kvm, sp);
>         }
>
> -       if (sp->nx_huge_page_disallowed)
> +       if (sp->arch.nx_huge_page_disallowed)
>                 unaccount_nx_huge_page(kvm, sp);
>
>         sp->role.invalid = 1;
> @@ -2704,7 +2706,7 @@ static void kvm_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
>  {
>         trace_kvm_mmu_unsync_page(sp);
>         ++kvm->stat.mmu_unsync;
> -       sp->unsync = 1;
> +       sp->arch.unsync = 1;
>
>         kvm_mmu_mark_parents_unsync(sp);
>  }
> @@ -2739,7 +2741,7 @@ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
>                 if (!can_unsync)
>                         return -EPERM;
>
> -               if (sp->unsync)
> +               if (sp->arch.unsync)
>                         continue;
>
>                 if (prefetch)
> @@ -2764,7 +2766,7 @@ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
>                          * for write, i.e. unsync cannot transition from 0->1
>                          * while this CPU holds mmu_lock for read (or write).
>                          */
> -                       if (READ_ONCE(sp->unsync))
> +                       if (READ_ONCE(sp->arch.unsync))
>                                 continue;
>                 }
>
> @@ -2796,8 +2798,8 @@ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
>          *                      2.2 Guest issues TLB flush.
>          *                          That causes a VM Exit.
>          *
> -        *                      2.3 Walking of unsync pages sees sp->unsync is
> -        *                          false and skips the page.
> +        *                      2.3 Walking of unsync pages sees sp->arch.unsync
> +        *                          is false and skips the page.
>          *
>          *                      2.4 Guest accesses GVA X.
>          *                          Since the mapping in the SP was not updated,
> @@ -2805,7 +2807,7 @@ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
>          *                          gets used.
>          * 1.1 Host marks SP
>          *     as unsync
> -        *     (sp->unsync = true)
> +        *     (sp->arch.unsync = true)
>          *
>          * The write barrier below ensures that 1.1 happens before 1.2 and thus
>          * the situation in 2.4 does not arise.  It pairs with the read barrier
> @@ -3126,7 +3128,7 @@ void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_
>             cur_level == fault->goal_level &&
>             is_shadow_present_pte(spte) &&
>             !is_large_pte(spte) &&
> -           spte_to_child_sp(spte)->nx_huge_page_disallowed) {
> +           spte_to_child_sp(spte)->arch.nx_huge_page_disallowed) {
>                 /*
>                  * A small SPTE exists for this pfn, but FNAME(fetch),
>                  * direct_map(), or kvm_tdp_mmu_map() would like to create a
> @@ -3902,7 +3904,7 @@ static bool is_unsync_root(hpa_t root)
>
>         /*
>          * The read barrier orders the CPU's read of SPTE.W during the page table
> -        * walk before the reads of sp->unsync/sp->unsync_children here.
> +        * walk before the reads of sp->arch.{unsync,unsync_children} here.
>          *
>          * Even if another CPU was marking the SP as unsync-ed simultaneously,
>          * any guest page table changes are not guaranteed to be visible anyway
> @@ -3922,7 +3924,7 @@ static bool is_unsync_root(hpa_t root)
>         if (WARN_ON_ONCE(!sp))
>                 return false;
>
> -       if (sp->unsync || sp->unsync_children)
> +       if (sp->arch.unsync || sp->arch.unsync_children)
>                 return true;
>
>         return false;
> @@ -5510,8 +5512,8 @@ static bool detect_write_flooding(struct kvm_mmu_page *sp)
>         if (sp->role.level == PG_LEVEL_4K)
>                 return false;
>
> -       atomic_inc(&sp->write_flooding_count);
> -       return atomic_read(&sp->write_flooding_count) >= 3;
> +       atomic_inc(&sp->arch.write_flooding_count);
> +       return atomic_read(&sp->arch.write_flooding_count) >= 3;
>  }
>
>  /*
> @@ -6389,7 +6391,7 @@ static bool shadow_mmu_try_split_huge_pages(struct kvm *kvm,
>                         continue;
>
>                 /* SPs with level >PG_LEVEL_4K should never by unsync. */
> -               if (WARN_ON_ONCE(sp->unsync))
> +               if (WARN_ON_ONCE(sp->arch.unsync))
>                         continue;
>
>                 /* Don't bother splitting huge pages on invalid SPs. */
> @@ -6941,8 +6943,8 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm)
>                  */
>                 sp = list_first_entry(&kvm->arch.possible_nx_huge_pages,
>                                       struct kvm_mmu_page,
> -                                     possible_nx_huge_page_link);
> -               WARN_ON_ONCE(!sp->nx_huge_page_disallowed);
> +                                     arch.possible_nx_huge_page_link);
> +               WARN_ON_ONCE(!sp->arch.nx_huge_page_disallowed);
>                 WARN_ON_ONCE(!sp->role.arch.direct);
>
>                 /*
> @@ -6977,7 +6979,7 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm)
>                         flush |= kvm_tdp_mmu_zap_sp(kvm, sp);
>                 else
>                         kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
> -               WARN_ON_ONCE(sp->nx_huge_page_disallowed);
> +               WARN_ON_ONCE(sp->arch.nx_huge_page_disallowed);
>
>                 if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
>                         kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
> diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> index fd4990c8b0e9..af2ae4887e35 100644
> --- a/arch/x86/kvm/mmu/mmu_internal.h
> +++ b/arch/x86/kvm/mmu/mmu_internal.h
> @@ -44,89 +44,6 @@ extern bool dbg;
>  #define INVALID_PAE_ROOT       0
>  #define IS_VALID_PAE_ROOT(x)   (!!(x))
>
> -struct kvm_mmu_page {
> -       /*
> -        * Note, "link" through "spt" fit in a single 64 byte cache line on
> -        * 64-bit kernels, keep it that way unless there's a reason not to.
> -        */
> -       struct list_head link;
> -       struct hlist_node hash_link;
> -
> -       bool shadow_mmu_page;
> -       bool unsync;
> -       u8 mmu_valid_gen;
> -
> -        /*
> -         * The shadow page can't be replaced by an equivalent huge page
> -         * because it is being used to map an executable page in the guest
> -         * and the NX huge page mitigation is enabled.
> -         */
> -       bool nx_huge_page_disallowed;
> -
> -       /*
> -        * The following two entries are used to key the shadow page in the
> -        * hash table.
> -        */
> -       union kvm_mmu_page_role role;
> -       gfn_t gfn;
> -
> -       u64 *spt;
> -
> -       /*
> -        * Stores the result of the guest translation being shadowed by each
> -        * SPTE.  KVM shadows two types of guest translations: nGPA -> GPA
> -        * (shadow EPT/NPT) and GVA -> GPA (traditional shadow paging). In both
> -        * cases the result of the translation is a GPA and a set of access
> -        * constraints.
> -        *
> -        * The GFN is stored in the upper bits (PAGE_SHIFT) and the shadowed
> -        * access permissions are stored in the lower bits. Note, for
> -        * convenience and uniformity across guests, the access permissions are
> -        * stored in KVM format (e.g.  ACC_EXEC_MASK) not the raw guest format.
> -        */
> -       u64 *shadowed_translation;
> -
> -       /* Currently serving as active root */
> -       refcount_t root_refcount;
> -
> -       unsigned int unsync_children;
> -       union {
> -               struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
> -               tdp_ptep_t ptep;
> -       };
> -       union {
> -               DECLARE_BITMAP(unsync_child_bitmap, 512);
> -               struct {
> -                       struct work_struct tdp_mmu_async_work;
> -                       void *tdp_mmu_async_data;
> -               };
> -       };
> -
> -       /*
> -        * Tracks shadow pages that, if zapped, would allow KVM to create an NX
> -        * huge page.  A shadow page will have nx_huge_page_disallowed set but
> -        * not be on the list if a huge page is disallowed for other reasons,
> -        * e.g. because KVM is shadowing a PTE at the same gfn, the memslot
> -        * isn't properly aligned, etc...
> -        */
> -       struct list_head possible_nx_huge_page_link;
> -#ifdef CONFIG_X86_32
> -       /*
> -        * Used out of the mmu-lock to avoid reading spte values while an
> -        * update is in progress; see the comments in __get_spte_lockless().
> -        */
> -       int clear_spte_count;
> -#endif
> -
> -       /* Number of writes since the last time traversal visited this page.  */
> -       atomic_t write_flooding_count;
> -
> -#ifdef CONFIG_X86_64
> -       /* Used for freeing the page asynchronously if it is a TDP MMU page. */
> -       struct rcu_head rcu_head;
> -#endif
> -};
> -
>  extern struct kmem_cache *mmu_page_header_cache;
>
>  static inline bool kvm_mmu_page_ad_need_write_protect(struct kvm_mmu_page *sp)
> diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h
> index ffd10ce3eae3..335f26dabdf3 100644
> --- a/arch/x86/kvm/mmu/mmutrace.h
> +++ b/arch/x86/kvm/mmu/mmutrace.h
> @@ -16,11 +16,11 @@
>         __field(bool, unsync)
>
>  #define KVM_MMU_PAGE_ASSIGN(sp)                                \
> -       __entry->mmu_valid_gen = sp->mmu_valid_gen;     \
> +       __entry->mmu_valid_gen = sp->arch.mmu_valid_gen;        \
>         __entry->gfn = sp->gfn;                         \
>         __entry->role = sp->role.word;                  \
>         __entry->root_count = refcount_read(&sp->root_refcount);        \
> -       __entry->unsync = sp->unsync;
> +       __entry->unsync = sp->arch.unsync;
>
>  #define KVM_MMU_PAGE_PRINTK() ({                                       \
>         const char *saved_ptr = trace_seq_buffer_ptr(p);                \
> diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
> index e15ec1c473da..18bb92b70a01 100644
> --- a/arch/x86/kvm/mmu/paging_tmpl.h
> +++ b/arch/x86/kvm/mmu/paging_tmpl.h
> @@ -671,7 +671,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
>                          * KVM_REQ_MMU_SYNC is not necessary but it
>                          * expedites the process.
>                          */
> -                       if (sp->unsync_children &&
> +                       if (sp->arch.unsync_children &&
>                             mmu_sync_children(vcpu, sp, false))
>                                 return RET_PF_RETRY;
>                 }
> @@ -921,7 +921,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
>                         pt_element_t gpte;
>                         gpa_t pte_gpa;
>
> -                       if (!sp->unsync)
> +                       if (!sp->arch.unsync)
>                                 break;
>
>                         pte_gpa = FNAME(get_level1_sp_gpa)(sp);
> @@ -942,7 +942,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
>                         FNAME(prefetch_gpte)(vcpu, sp, sptep, gpte, false);
>                 }
>
> -               if (!sp->unsync_children)
> +               if (!sp->arch.unsync_children)
>                         break;
>         }
>         write_unlock(&vcpu->kvm->mmu_lock);
> @@ -974,8 +974,8 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
>  }
>
>  /*
> - * Using the information in sp->shadowed_translation (kvm_mmu_page_get_gfn()) is
> - * safe because:
> + * Using the information in sp->arch.shadowed_translation
> + * (kvm_mmu_page_get_gfn()) is safe because:
>   * - The spte has a reference to the struct page, so the pfn for a given gfn
>   *   can't change unless all sptes pointing to it are nuked first.
>   *
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index 34d674080170..66231c7ed31e 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -270,7 +270,7 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp(struct kvm_vcpu *vcpu)
>  static void tdp_mmu_init_sp(struct kvm_mmu_page *sp, tdp_ptep_t sptep,
>                             gfn_t gfn, union kvm_mmu_page_role role)
>  {
> -       INIT_LIST_HEAD(&sp->possible_nx_huge_page_link);
> +       INIT_LIST_HEAD(&sp->arch.possible_nx_huge_page_link);
>
>         set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
>
> @@ -385,7 +385,7 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp,
>  {
>         tdp_unaccount_mmu_page(kvm, sp);
>
> -       if (!sp->nx_huge_page_disallowed)
> +       if (!sp->arch.nx_huge_page_disallowed)
>                 return;
>
>         if (shared)
> @@ -393,7 +393,7 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp,
>         else
>                 lockdep_assert_held_write(&kvm->mmu_lock);
>
> -       sp->nx_huge_page_disallowed = false;
> +       sp->arch.nx_huge_page_disallowed = false;
>         untrack_possible_nx_huge_page(kvm, sp);
>
>         if (shared)
> @@ -1181,7 +1181,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
>                 sp = tdp_mmu_alloc_sp(vcpu);
>                 tdp_mmu_init_child_sp(sp, &iter);
>
> -               sp->nx_huge_page_disallowed = fault->huge_page_disallowed;
> +               sp->arch.nx_huge_page_disallowed = fault->huge_page_disallowed;
>
>                 if (is_shadow_present_pte(iter.old_spte))
>                         r = tdp_mmu_split_huge_page(kvm, &iter, sp, true);
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
> index 19d3153051a3..e6a929089715 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.h
> +++ b/arch/x86/kvm/mmu/tdp_mmu.h
> @@ -73,7 +73,7 @@ u64 *kvm_tdp_mmu_fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, u64 addr,
>  #ifdef CONFIG_X86_64
>  static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp)
>  {
> -       return !sp->shadow_mmu_page;
> +       return !sp->arch.shadow_mmu_page;
>  }
>  #else
>  static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return false; }
> diff --git a/include/kvm/mmu_types.h b/include/kvm/mmu_types.h
> index 14099956fdac..a9da33d4baa8 100644
> --- a/include/kvm/mmu_types.h
> +++ b/include/kvm/mmu_types.h
> @@ -3,8 +3,11 @@
>  #define __KVM_MMU_TYPES_H
>
>  #include <linux/bug.h>
> -#include <linux/types.h>
> +#include <linux/kvm_types.h>
> +#include <linux/refcount.h>
>  #include <linux/stddef.h>
> +#include <linux/types.h>
> +#include <linux/workqueue.h>
>
>  #include <asm/kvm/mmu_types.h>
>
> @@ -36,4 +39,31 @@ static_assert(sizeof(union kvm_mmu_page_role) == sizeof_field(union kvm_mmu_page
>
>  typedef u64 __rcu *tdp_ptep_t;
>
> +struct kvm_mmu_page {
> +       struct list_head link;
> +
> +       union kvm_mmu_page_role role;
> +
> +       /* The start of the GFN region mapped by this shadow page. */
> +       gfn_t gfn;

I'd like to put in a vote for getting rid of the "shadow page" /
"shadow page table entry" terminology through this refactor.
I can't count the number of times folks trying to understand the MMU
have gotten confused by the overloaded terminology.

> +
> +       /* The page table page. */
> +       u64 *spt;
> +
> +       /* The PTE that points to this shadow page. */
> +       tdp_ptep_t ptep;

Totally fine to not change this, but something like parent_ptep would
be clearer. The Shadow MMU uses parent_pteps, which I think is much
clearer.
Could also change the comment to just:
/* The PTE that points to this kvm_mmu_page. */

> +
> +       /* Used for freeing TDP MMU pages asynchronously. */
> +       struct rcu_head rcu_head;
> +
> +       /* The number of references to this shadow page as a root. */
> +       refcount_t root_refcount;
> +
> +       /* Used for tearing down an entire page table tree. */
> +       struct work_struct tdp_mmu_async_work;
> +       void *tdp_mmu_async_data;
> +
> +       struct kvm_mmu_page_arch arch;
> +};
> +
>  #endif /* !__KVM_MMU_TYPES_H */
> --
> 2.39.0.rc1.256.g54fd8350bd-goog
>
Paolo Bonzini Dec. 12, 2022, 10:32 p.m. UTC | #2
On 12/8/22 20:38, David Matlack wrote:
> This commit increases the size of struct kvm_mmu_page by 64 bytes on
> x86_64 (184 bytes -> 248 bytes). The size of this struct can be reduced
> in future commits by moving TDP MMU root fields into a separate struct
> and by dynamically allocating fields only used by the Shadow MMU.

I think it's already possible to use a union like

	union {
		struct kvm_mmu_page_arch arch;
		struct {
			struct work_struct work;
			void *data;
		};
	};

Paolo
David Matlack Dec. 12, 2022, 10:49 p.m. UTC | #3
On Mon, Dec 12, 2022 at 2:32 PM Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> On 12/8/22 20:38, David Matlack wrote:
> > This commit increases the size of struct kvm_mmu_page by 64 bytes on
> > x86_64 (184 bytes -> 248 bytes). The size of this struct can be reduced
> > in future commits by moving TDP MMU root fields into a separate struct
> > and by dynamically allocating fields only used by the Shadow MMU.
>
> I think it's already possible to use a union like
>
>         union {
>                 struct kvm_mmu_page_arch arch;
>                 struct {
>                         struct work_struct work;
>                         void *data;
>                 };
>         };

That would potentially corrupt
kvm_mmu_page_arch.nx_huge_page_disallowed and
possible_nx_huge_page_link, both of which still need to be used by the
TDP MMU on x86.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm/mmu_types.h b/arch/x86/include/asm/kvm/mmu_types.h
index 35f893ebab5a..affcb520b482 100644
--- a/arch/x86/include/asm/kvm/mmu_types.h
+++ b/arch/x86/include/asm/kvm/mmu_types.h
@@ -2,6 +2,8 @@ 
 #ifndef __ASM_KVM_MMU_TYPES_H
 #define __ASM_KVM_MMU_TYPES_H
 
+#include <linux/bitmap.h>
+#include <linux/list.h>
 #include <linux/types.h>
 
 /*
@@ -53,4 +55,64 @@  struct kvm_mmu_page_role_arch {
 	u16 passthrough:1;
 };
 
+struct kvm_rmap_head {
+	unsigned long val;
+};
+
+struct kvm_mmu_page_arch {
+	struct hlist_node hash_link;
+
+	bool shadow_mmu_page;
+	bool unsync;
+	u8 mmu_valid_gen;
+
+	 /*
+	  * The shadow page can't be replaced by an equivalent huge page
+	  * because it is being used to map an executable page in the guest
+	  * and the NX huge page mitigation is enabled.
+	  */
+	bool nx_huge_page_disallowed;
+
+	/*
+	 * Stores the result of the guest translation being shadowed by each
+	 * SPTE.  KVM shadows two types of guest translations: nGPA -> GPA
+	 * (shadow EPT/NPT) and GVA -> GPA (traditional shadow paging). In both
+	 * cases the result of the translation is a GPA and a set of access
+	 * constraints.
+	 *
+	 * The GFN is stored in the upper bits (PAGE_SHIFT) and the shadowed
+	 * access permissions are stored in the lower bits. Note, for
+	 * convenience and uniformity across guests, the access permissions are
+	 * stored in KVM format (e.g.  ACC_EXEC_MASK) not the raw guest format.
+	 */
+	u64 *shadowed_translation;
+
+	unsigned int unsync_children;
+
+	/* Rmap pointers to all parent sptes. */
+	struct kvm_rmap_head parent_ptes;
+
+	DECLARE_BITMAP(unsync_child_bitmap, 512);
+
+	/*
+	 * Tracks shadow pages that, if zapped, would allow KVM to create an NX
+	 * huge page.  A shadow page will have nx_huge_page_disallowed set but
+	 * not be on the list if a huge page is disallowed for other reasons,
+	 * e.g. because KVM is shadowing a PTE at the same gfn, the memslot
+	 * isn't properly aligned, etc...
+	 */
+	struct list_head possible_nx_huge_page_link;
+
+#ifdef CONFIG_X86_32
+	/*
+	 * Used out of the mmu-lock to avoid reading spte values while an
+	 * update is in progress; see the comments in __get_spte_lockless().
+	 */
+	int clear_spte_count;
+#endif
+
+	/* Number of writes since the last time traversal visited this page.  */
+	atomic_t write_flooding_count;
+};
+
 #endif /* !__ASM_KVM_MMU_TYPES_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ebcd7a0dabef..f5743a652e10 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -329,10 +329,6 @@  union kvm_cpu_role {
 	};
 };
 
-struct kvm_rmap_head {
-	unsigned long val;
-};
-
 struct kvm_pio_request {
 	unsigned long linear_rip;
 	unsigned long count;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 11cef930d5ed..e47f35878ab5 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -350,7 +350,7 @@  static void count_spte_clear(u64 *sptep, u64 spte)
 
 	/* Ensure the spte is completely set before we increase the count */
 	smp_wmb();
-	sp->clear_spte_count++;
+	sp->arch.clear_spte_count++;
 }
 
 static void __set_spte(u64 *sptep, u64 spte)
@@ -432,7 +432,7 @@  static u64 __get_spte_lockless(u64 *sptep)
 	int count;
 
 retry:
-	count = sp->clear_spte_count;
+	count = sp->arch.clear_spte_count;
 	smp_rmb();
 
 	spte.spte_low = orig->spte_low;
@@ -442,7 +442,7 @@  static u64 __get_spte_lockless(u64 *sptep)
 	smp_rmb();
 
 	if (unlikely(spte.spte_low != orig->spte_low ||
-	      count != sp->clear_spte_count))
+	      count != sp->arch.clear_spte_count))
 		goto retry;
 
 	return spte.spte;
@@ -699,7 +699,7 @@  static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
 		return sp->gfn;
 
 	if (!sp->role.arch.direct)
-		return sp->shadowed_translation[index] >> PAGE_SHIFT;
+		return sp->arch.shadowed_translation[index] >> PAGE_SHIFT;
 
 	return sp->gfn + (index << ((sp->role.level - 1) * SPTE_LEVEL_BITS));
 }
@@ -713,7 +713,7 @@  static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
 static u32 kvm_mmu_page_get_access(struct kvm_mmu_page *sp, int index)
 {
 	if (sp_has_gptes(sp))
-		return sp->shadowed_translation[index] & ACC_ALL;
+		return sp->arch.shadowed_translation[index] & ACC_ALL;
 
 	/*
 	 * For direct MMUs (e.g. TDP or non-paging guests) or passthrough SPs,
@@ -734,7 +734,7 @@  static void kvm_mmu_page_set_translation(struct kvm_mmu_page *sp, int index,
 					 gfn_t gfn, unsigned int access)
 {
 	if (sp_has_gptes(sp)) {
-		sp->shadowed_translation[index] = (gfn << PAGE_SHIFT) | access;
+		sp->arch.shadowed_translation[index] = (gfn << PAGE_SHIFT) | access;
 		return;
 	}
 
@@ -825,18 +825,18 @@  void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 	 * on the list if KVM is reusing an existing shadow page, i.e. if KVM
 	 * links a shadow page at multiple points.
 	 */
-	if (!list_empty(&sp->possible_nx_huge_page_link))
+	if (!list_empty(&sp->arch.possible_nx_huge_page_link))
 		return;
 
 	++kvm->stat.nx_lpage_splits;
-	list_add_tail(&sp->possible_nx_huge_page_link,
+	list_add_tail(&sp->arch.possible_nx_huge_page_link,
 		      &kvm->arch.possible_nx_huge_pages);
 }
 
 static void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
 				 bool nx_huge_page_possible)
 {
-	sp->nx_huge_page_disallowed = true;
+	sp->arch.nx_huge_page_disallowed = true;
 
 	if (nx_huge_page_possible)
 		track_possible_nx_huge_page(kvm, sp);
@@ -861,16 +861,16 @@  static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
 
 void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
-	if (list_empty(&sp->possible_nx_huge_page_link))
+	if (list_empty(&sp->arch.possible_nx_huge_page_link))
 		return;
 
 	--kvm->stat.nx_lpage_splits;
-	list_del_init(&sp->possible_nx_huge_page_link);
+	list_del_init(&sp->arch.possible_nx_huge_page_link);
 }
 
 static void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
-	sp->nx_huge_page_disallowed = false;
+	sp->arch.nx_huge_page_disallowed = false;
 
 	untrack_possible_nx_huge_page(kvm, sp);
 }
@@ -1720,11 +1720,11 @@  static void kvm_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 static void kvm_mmu_free_shadow_page(struct kvm_mmu_page *sp)
 {
 	MMU_WARN_ON(!is_empty_shadow_page(sp->spt));
-	hlist_del(&sp->hash_link);
+	hlist_del(&sp->arch.hash_link);
 	list_del(&sp->link);
 	free_page((unsigned long)sp->spt);
 	if (!sp->role.arch.direct)
-		free_page((unsigned long)sp->shadowed_translation);
+		free_page((unsigned long)sp->arch.shadowed_translation);
 	kmem_cache_free(mmu_page_header_cache, sp);
 }
 
@@ -1739,13 +1739,13 @@  static void mmu_page_add_parent_pte(struct kvm_mmu_memory_cache *cache,
 	if (!parent_pte)
 		return;
 
-	pte_list_add(cache, parent_pte, &sp->parent_ptes);
+	pte_list_add(cache, parent_pte, &sp->arch.parent_ptes);
 }
 
 static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,
 				       u64 *parent_pte)
 {
-	pte_list_remove(parent_pte, &sp->parent_ptes);
+	pte_list_remove(parent_pte, &sp->arch.parent_ptes);
 }
 
 static void drop_parent_pte(struct kvm_mmu_page *sp,
@@ -1761,7 +1761,7 @@  static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp)
 	u64 *sptep;
 	struct rmap_iterator iter;
 
-	for_each_rmap_spte(&sp->parent_ptes, &iter, sptep) {
+	for_each_rmap_spte(&sp->arch.parent_ptes, &iter, sptep) {
 		mark_unsync(sptep);
 	}
 }
@@ -1771,9 +1771,9 @@  static void mark_unsync(u64 *spte)
 	struct kvm_mmu_page *sp;
 
 	sp = sptep_to_sp(spte);
-	if (__test_and_set_bit(spte_index(spte), sp->unsync_child_bitmap))
+	if (__test_and_set_bit(spte_index(spte), sp->arch.unsync_child_bitmap))
 		return;
-	if (sp->unsync_children++)
+	if (sp->arch.unsync_children++)
 		return;
 	kvm_mmu_mark_parents_unsync(sp);
 }
@@ -1799,7 +1799,7 @@  static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
 {
 	int i;
 
-	if (sp->unsync)
+	if (sp->arch.unsync)
 		for (i=0; i < pvec->nr; i++)
 			if (pvec->page[i].sp == sp)
 				return 0;
@@ -1812,9 +1812,9 @@  static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
 
 static inline void clear_unsync_child_bit(struct kvm_mmu_page *sp, int idx)
 {
-	--sp->unsync_children;
-	WARN_ON((int)sp->unsync_children < 0);
-	__clear_bit(idx, sp->unsync_child_bitmap);
+	--sp->arch.unsync_children;
+	WARN_ON((int)sp->arch.unsync_children < 0);
+	__clear_bit(idx, sp->arch.unsync_child_bitmap);
 }
 
 static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
@@ -1822,7 +1822,7 @@  static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
 {
 	int i, ret, nr_unsync_leaf = 0;
 
-	for_each_set_bit(i, sp->unsync_child_bitmap, 512) {
+	for_each_set_bit(i, sp->arch.unsync_child_bitmap, 512) {
 		struct kvm_mmu_page *child;
 		u64 ent = sp->spt[i];
 
@@ -1833,7 +1833,7 @@  static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
 
 		child = spte_to_child_sp(ent);
 
-		if (child->unsync_children) {
+		if (child->arch.unsync_children) {
 			if (mmu_pages_add(pvec, child, i))
 				return -ENOSPC;
 
@@ -1845,7 +1845,7 @@  static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
 				nr_unsync_leaf += ret;
 			} else
 				return ret;
-		} else if (child->unsync) {
+		} else if (child->arch.unsync) {
 			nr_unsync_leaf++;
 			if (mmu_pages_add(pvec, child, i))
 				return -ENOSPC;
@@ -1862,7 +1862,7 @@  static int mmu_unsync_walk(struct kvm_mmu_page *sp,
 			   struct kvm_mmu_pages *pvec)
 {
 	pvec->nr = 0;
-	if (!sp->unsync_children)
+	if (!sp->arch.unsync_children)
 		return 0;
 
 	mmu_pages_add(pvec, sp, INVALID_INDEX);
@@ -1871,9 +1871,9 @@  static int mmu_unsync_walk(struct kvm_mmu_page *sp,
 
 static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
-	WARN_ON(!sp->unsync);
+	WARN_ON(!sp->arch.unsync);
 	trace_kvm_mmu_sync_page(sp);
-	sp->unsync = 0;
+	sp->arch.unsync = 0;
 	--kvm->stat.mmu_unsync;
 }
 
@@ -1894,7 +1894,7 @@  static bool sp_has_gptes(struct kvm_mmu_page *sp)
 }
 
 #define for_each_valid_sp(_kvm, _sp, _list)				\
-	hlist_for_each_entry(_sp, _list, hash_link)			\
+	hlist_for_each_entry(_sp, _list, arch.hash_link)			\
 		if (is_obsolete_sp((_kvm), (_sp))) {			\
 		} else
 
@@ -1934,7 +1934,7 @@  static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
 
 	/* TDP MMU pages do not use the MMU generation. */
 	return !is_tdp_mmu_page(sp) &&
-	       unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
+	       unlikely(sp->arch.mmu_valid_gen != kvm->arch.mmu_valid_gen);
 }
 
 struct mmu_page_path {
@@ -2006,7 +2006,7 @@  static void mmu_pages_clear_parents(struct mmu_page_path *parents)
 		WARN_ON(idx == INVALID_INDEX);
 		clear_unsync_child_bit(sp, idx);
 		level++;
-	} while (!sp->unsync_children);
+	} while (!sp->arch.unsync_children);
 }
 
 static int mmu_sync_children(struct kvm_vcpu *vcpu,
@@ -2053,7 +2053,7 @@  static int mmu_sync_children(struct kvm_vcpu *vcpu,
 
 static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp)
 {
-	atomic_set(&sp->write_flooding_count,  0);
+	atomic_set(&sp->arch.write_flooding_count,  0);
 }
 
 static void clear_sp_write_flooding_count(u64 *spte)
@@ -2094,7 +2094,7 @@  static struct kvm_mmu_page *kvm_mmu_find_shadow_page(struct kvm *kvm,
 			 * Unsync pages must not be left as is, because the new
 			 * upper-level page will be write-protected.
 			 */
-			if (role.level > PG_LEVEL_4K && sp->unsync)
+			if (role.level > PG_LEVEL_4K && sp->arch.unsync)
 				kvm_mmu_prepare_zap_page(kvm, sp,
 							 &invalid_list);
 			continue;
@@ -2104,7 +2104,7 @@  static struct kvm_mmu_page *kvm_mmu_find_shadow_page(struct kvm *kvm,
 		if (sp->role.arch.direct)
 			goto out;
 
-		if (sp->unsync) {
+		if (sp->arch.unsync) {
 			if (KVM_BUG_ON(!vcpu, kvm))
 				break;
 
@@ -2163,25 +2163,26 @@  static struct kvm_mmu_page *kvm_mmu_alloc_shadow_page(struct kvm *kvm,
 	sp = kvm_mmu_memory_cache_alloc(caches->page_header_cache);
 	sp->spt = kvm_mmu_memory_cache_alloc(caches->shadow_page_cache);
 	if (!role.arch.direct)
-		sp->shadowed_translation = kvm_mmu_memory_cache_alloc(caches->shadowed_info_cache);
+		sp->arch.shadowed_translation =
+			kvm_mmu_memory_cache_alloc(caches->shadowed_info_cache);
 
 	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
 
-	INIT_LIST_HEAD(&sp->possible_nx_huge_page_link);
+	INIT_LIST_HEAD(&sp->arch.possible_nx_huge_page_link);
 
 	/*
 	 * active_mmu_pages must be a FIFO list, as kvm_zap_obsolete_pages()
 	 * depends on valid pages being added to the head of the list.  See
 	 * comments in kvm_zap_obsolete_pages().
 	 */
-	sp->mmu_valid_gen = kvm->arch.mmu_valid_gen;
+	sp->arch.mmu_valid_gen = kvm->arch.mmu_valid_gen;
 	list_add(&sp->link, &kvm->arch.active_mmu_pages);
 	kvm_account_mmu_page(kvm, sp);
 
 	sp->gfn = gfn;
 	sp->role = role;
-	sp->shadow_mmu_page = true;
-	hlist_add_head(&sp->hash_link, sp_list);
+	sp->arch.shadow_mmu_page = true;
+	hlist_add_head(&sp->arch.hash_link, sp_list);
 	if (sp_has_gptes(sp))
 		account_shadowed(kvm, sp);
 
@@ -2368,7 +2369,7 @@  static void __link_shadow_page(struct kvm *kvm,
 
 	mmu_page_add_parent_pte(cache, sp, sptep);
 
-	if (sp->unsync_children || sp->unsync)
+	if (sp->arch.unsync_children || sp->arch.unsync)
 		mark_unsync(sptep);
 }
 
@@ -2421,7 +2422,8 @@  static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
 			 * avoids retaining a large number of stale nested SPs.
 			 */
 			if (tdp_enabled && invalid_list &&
-			    child->role.arch.guest_mode && !child->parent_ptes.val)
+			    child->role.arch.guest_mode &&
+			    !child->arch.parent_ptes.val)
 				return kvm_mmu_prepare_zap_page(kvm, child,
 								invalid_list);
 		}
@@ -2449,7 +2451,7 @@  static void kvm_mmu_unlink_parents(struct kvm_mmu_page *sp)
 	u64 *sptep;
 	struct rmap_iterator iter;
 
-	while ((sptep = rmap_get_first(&sp->parent_ptes, &iter)))
+	while ((sptep = rmap_get_first(&sp->arch.parent_ptes, &iter)))
 		drop_parent_pte(sp, sptep);
 }
 
@@ -2496,7 +2498,7 @@  static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
 	if (!sp->role.invalid && sp_has_gptes(sp))
 		unaccount_shadowed(kvm, sp);
 
-	if (sp->unsync)
+	if (sp->arch.unsync)
 		kvm_unlink_unsync_page(kvm, sp);
 	if (!refcount_read(&sp->root_refcount)) {
 		/* Count self */
@@ -2527,7 +2529,7 @@  static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
 		zapped_root = !is_obsolete_sp(kvm, sp);
 	}
 
-	if (sp->nx_huge_page_disallowed)
+	if (sp->arch.nx_huge_page_disallowed)
 		unaccount_nx_huge_page(kvm, sp);
 
 	sp->role.invalid = 1;
@@ -2704,7 +2706,7 @@  static void kvm_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 	trace_kvm_mmu_unsync_page(sp);
 	++kvm->stat.mmu_unsync;
-	sp->unsync = 1;
+	sp->arch.unsync = 1;
 
 	kvm_mmu_mark_parents_unsync(sp);
 }
@@ -2739,7 +2741,7 @@  int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
 		if (!can_unsync)
 			return -EPERM;
 
-		if (sp->unsync)
+		if (sp->arch.unsync)
 			continue;
 
 		if (prefetch)
@@ -2764,7 +2766,7 @@  int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
 			 * for write, i.e. unsync cannot transition from 0->1
 			 * while this CPU holds mmu_lock for read (or write).
 			 */
-			if (READ_ONCE(sp->unsync))
+			if (READ_ONCE(sp->arch.unsync))
 				continue;
 		}
 
@@ -2796,8 +2798,8 @@  int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
 	 *                      2.2 Guest issues TLB flush.
 	 *                          That causes a VM Exit.
 	 *
-	 *                      2.3 Walking of unsync pages sees sp->unsync is
-	 *                          false and skips the page.
+	 *                      2.3 Walking of unsync pages sees sp->arch.unsync
+	 *                          is false and skips the page.
 	 *
 	 *                      2.4 Guest accesses GVA X.
 	 *                          Since the mapping in the SP was not updated,
@@ -2805,7 +2807,7 @@  int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
 	 *                          gets used.
 	 * 1.1 Host marks SP
 	 *     as unsync
-	 *     (sp->unsync = true)
+	 *     (sp->arch.unsync = true)
 	 *
 	 * The write barrier below ensures that 1.1 happens before 1.2 and thus
 	 * the situation in 2.4 does not arise.  It pairs with the read barrier
@@ -3126,7 +3128,7 @@  void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_
 	    cur_level == fault->goal_level &&
 	    is_shadow_present_pte(spte) &&
 	    !is_large_pte(spte) &&
-	    spte_to_child_sp(spte)->nx_huge_page_disallowed) {
+	    spte_to_child_sp(spte)->arch.nx_huge_page_disallowed) {
 		/*
 		 * A small SPTE exists for this pfn, but FNAME(fetch),
 		 * direct_map(), or kvm_tdp_mmu_map() would like to create a
@@ -3902,7 +3904,7 @@  static bool is_unsync_root(hpa_t root)
 
 	/*
 	 * The read barrier orders the CPU's read of SPTE.W during the page table
-	 * walk before the reads of sp->unsync/sp->unsync_children here.
+	 * walk before the reads of sp->arch.{unsync,unsync_children} here.
 	 *
 	 * Even if another CPU was marking the SP as unsync-ed simultaneously,
 	 * any guest page table changes are not guaranteed to be visible anyway
@@ -3922,7 +3924,7 @@  static bool is_unsync_root(hpa_t root)
 	if (WARN_ON_ONCE(!sp))
 		return false;
 
-	if (sp->unsync || sp->unsync_children)
+	if (sp->arch.unsync || sp->arch.unsync_children)
 		return true;
 
 	return false;
@@ -5510,8 +5512,8 @@  static bool detect_write_flooding(struct kvm_mmu_page *sp)
 	if (sp->role.level == PG_LEVEL_4K)
 		return false;
 
-	atomic_inc(&sp->write_flooding_count);
-	return atomic_read(&sp->write_flooding_count) >= 3;
+	atomic_inc(&sp->arch.write_flooding_count);
+	return atomic_read(&sp->arch.write_flooding_count) >= 3;
 }
 
 /*
@@ -6389,7 +6391,7 @@  static bool shadow_mmu_try_split_huge_pages(struct kvm *kvm,
 			continue;
 
 		/* SPs with level >PG_LEVEL_4K should never by unsync. */
-		if (WARN_ON_ONCE(sp->unsync))
+		if (WARN_ON_ONCE(sp->arch.unsync))
 			continue;
 
 		/* Don't bother splitting huge pages on invalid SPs. */
@@ -6941,8 +6943,8 @@  static void kvm_recover_nx_huge_pages(struct kvm *kvm)
 		 */
 		sp = list_first_entry(&kvm->arch.possible_nx_huge_pages,
 				      struct kvm_mmu_page,
-				      possible_nx_huge_page_link);
-		WARN_ON_ONCE(!sp->nx_huge_page_disallowed);
+				      arch.possible_nx_huge_page_link);
+		WARN_ON_ONCE(!sp->arch.nx_huge_page_disallowed);
 		WARN_ON_ONCE(!sp->role.arch.direct);
 
 		/*
@@ -6977,7 +6979,7 @@  static void kvm_recover_nx_huge_pages(struct kvm *kvm)
 			flush |= kvm_tdp_mmu_zap_sp(kvm, sp);
 		else
 			kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
-		WARN_ON_ONCE(sp->nx_huge_page_disallowed);
+		WARN_ON_ONCE(sp->arch.nx_huge_page_disallowed);
 
 		if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
 			kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index fd4990c8b0e9..af2ae4887e35 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -44,89 +44,6 @@  extern bool dbg;
 #define INVALID_PAE_ROOT	0
 #define IS_VALID_PAE_ROOT(x)	(!!(x))
 
-struct kvm_mmu_page {
-	/*
-	 * Note, "link" through "spt" fit in a single 64 byte cache line on
-	 * 64-bit kernels, keep it that way unless there's a reason not to.
-	 */
-	struct list_head link;
-	struct hlist_node hash_link;
-
-	bool shadow_mmu_page;
-	bool unsync;
-	u8 mmu_valid_gen;
-
-	 /*
-	  * The shadow page can't be replaced by an equivalent huge page
-	  * because it is being used to map an executable page in the guest
-	  * and the NX huge page mitigation is enabled.
-	  */
-	bool nx_huge_page_disallowed;
-
-	/*
-	 * The following two entries are used to key the shadow page in the
-	 * hash table.
-	 */
-	union kvm_mmu_page_role role;
-	gfn_t gfn;
-
-	u64 *spt;
-
-	/*
-	 * Stores the result of the guest translation being shadowed by each
-	 * SPTE.  KVM shadows two types of guest translations: nGPA -> GPA
-	 * (shadow EPT/NPT) and GVA -> GPA (traditional shadow paging). In both
-	 * cases the result of the translation is a GPA and a set of access
-	 * constraints.
-	 *
-	 * The GFN is stored in the upper bits (PAGE_SHIFT) and the shadowed
-	 * access permissions are stored in the lower bits. Note, for
-	 * convenience and uniformity across guests, the access permissions are
-	 * stored in KVM format (e.g.  ACC_EXEC_MASK) not the raw guest format.
-	 */
-	u64 *shadowed_translation;
-
-	/* Currently serving as active root */
-	refcount_t root_refcount;
-
-	unsigned int unsync_children;
-	union {
-		struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
-		tdp_ptep_t ptep;
-	};
-	union {
-		DECLARE_BITMAP(unsync_child_bitmap, 512);
-		struct {
-			struct work_struct tdp_mmu_async_work;
-			void *tdp_mmu_async_data;
-		};
-	};
-
-	/*
-	 * Tracks shadow pages that, if zapped, would allow KVM to create an NX
-	 * huge page.  A shadow page will have nx_huge_page_disallowed set but
-	 * not be on the list if a huge page is disallowed for other reasons,
-	 * e.g. because KVM is shadowing a PTE at the same gfn, the memslot
-	 * isn't properly aligned, etc...
-	 */
-	struct list_head possible_nx_huge_page_link;
-#ifdef CONFIG_X86_32
-	/*
-	 * Used out of the mmu-lock to avoid reading spte values while an
-	 * update is in progress; see the comments in __get_spte_lockless().
-	 */
-	int clear_spte_count;
-#endif
-
-	/* Number of writes since the last time traversal visited this page.  */
-	atomic_t write_flooding_count;
-
-#ifdef CONFIG_X86_64
-	/* Used for freeing the page asynchronously if it is a TDP MMU page. */
-	struct rcu_head rcu_head;
-#endif
-};
-
 extern struct kmem_cache *mmu_page_header_cache;
 
 static inline bool kvm_mmu_page_ad_need_write_protect(struct kvm_mmu_page *sp)
diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h
index ffd10ce3eae3..335f26dabdf3 100644
--- a/arch/x86/kvm/mmu/mmutrace.h
+++ b/arch/x86/kvm/mmu/mmutrace.h
@@ -16,11 +16,11 @@ 
 	__field(bool, unsync)
 
 #define KVM_MMU_PAGE_ASSIGN(sp)				\
-	__entry->mmu_valid_gen = sp->mmu_valid_gen;	\
+	__entry->mmu_valid_gen = sp->arch.mmu_valid_gen;	\
 	__entry->gfn = sp->gfn;				\
 	__entry->role = sp->role.word;			\
 	__entry->root_count = refcount_read(&sp->root_refcount);	\
-	__entry->unsync = sp->unsync;
+	__entry->unsync = sp->arch.unsync;
 
 #define KVM_MMU_PAGE_PRINTK() ({				        \
 	const char *saved_ptr = trace_seq_buffer_ptr(p);		\
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index e15ec1c473da..18bb92b70a01 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -671,7 +671,7 @@  static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
 			 * KVM_REQ_MMU_SYNC is not necessary but it
 			 * expedites the process.
 			 */
-			if (sp->unsync_children &&
+			if (sp->arch.unsync_children &&
 			    mmu_sync_children(vcpu, sp, false))
 				return RET_PF_RETRY;
 		}
@@ -921,7 +921,7 @@  static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
 			pt_element_t gpte;
 			gpa_t pte_gpa;
 
-			if (!sp->unsync)
+			if (!sp->arch.unsync)
 				break;
 
 			pte_gpa = FNAME(get_level1_sp_gpa)(sp);
@@ -942,7 +942,7 @@  static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
 			FNAME(prefetch_gpte)(vcpu, sp, sptep, gpte, false);
 		}
 
-		if (!sp->unsync_children)
+		if (!sp->arch.unsync_children)
 			break;
 	}
 	write_unlock(&vcpu->kvm->mmu_lock);
@@ -974,8 +974,8 @@  static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 }
 
 /*
- * Using the information in sp->shadowed_translation (kvm_mmu_page_get_gfn()) is
- * safe because:
+ * Using the information in sp->arch.shadowed_translation
+ * (kvm_mmu_page_get_gfn()) is safe because:
  * - The spte has a reference to the struct page, so the pfn for a given gfn
  *   can't change unless all sptes pointing to it are nuked first.
  *
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 34d674080170..66231c7ed31e 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -270,7 +270,7 @@  static struct kvm_mmu_page *tdp_mmu_alloc_sp(struct kvm_vcpu *vcpu)
 static void tdp_mmu_init_sp(struct kvm_mmu_page *sp, tdp_ptep_t sptep,
 			    gfn_t gfn, union kvm_mmu_page_role role)
 {
-	INIT_LIST_HEAD(&sp->possible_nx_huge_page_link);
+	INIT_LIST_HEAD(&sp->arch.possible_nx_huge_page_link);
 
 	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
 
@@ -385,7 +385,7 @@  static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp,
 {
 	tdp_unaccount_mmu_page(kvm, sp);
 
-	if (!sp->nx_huge_page_disallowed)
+	if (!sp->arch.nx_huge_page_disallowed)
 		return;
 
 	if (shared)
@@ -393,7 +393,7 @@  static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp,
 	else
 		lockdep_assert_held_write(&kvm->mmu_lock);
 
-	sp->nx_huge_page_disallowed = false;
+	sp->arch.nx_huge_page_disallowed = false;
 	untrack_possible_nx_huge_page(kvm, sp);
 
 	if (shared)
@@ -1181,7 +1181,7 @@  int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 		sp = tdp_mmu_alloc_sp(vcpu);
 		tdp_mmu_init_child_sp(sp, &iter);
 
-		sp->nx_huge_page_disallowed = fault->huge_page_disallowed;
+		sp->arch.nx_huge_page_disallowed = fault->huge_page_disallowed;
 
 		if (is_shadow_present_pte(iter.old_spte))
 			r = tdp_mmu_split_huge_page(kvm, &iter, sp, true);
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 19d3153051a3..e6a929089715 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -73,7 +73,7 @@  u64 *kvm_tdp_mmu_fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, u64 addr,
 #ifdef CONFIG_X86_64
 static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp)
 {
-	return !sp->shadow_mmu_page;
+	return !sp->arch.shadow_mmu_page;
 }
 #else
 static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return false; }
diff --git a/include/kvm/mmu_types.h b/include/kvm/mmu_types.h
index 14099956fdac..a9da33d4baa8 100644
--- a/include/kvm/mmu_types.h
+++ b/include/kvm/mmu_types.h
@@ -3,8 +3,11 @@ 
 #define __KVM_MMU_TYPES_H
 
 #include <linux/bug.h>
-#include <linux/types.h>
+#include <linux/kvm_types.h>
+#include <linux/refcount.h>
 #include <linux/stddef.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
 
 #include <asm/kvm/mmu_types.h>
 
@@ -36,4 +39,31 @@  static_assert(sizeof(union kvm_mmu_page_role) == sizeof_field(union kvm_mmu_page
 
 typedef u64 __rcu *tdp_ptep_t;
 
+struct kvm_mmu_page {
+	struct list_head link;
+
+	union kvm_mmu_page_role role;
+
+	/* The start of the GFN region mapped by this shadow page. */
+	gfn_t gfn;
+
+	/* The page table page. */
+	u64 *spt;
+
+	/* The PTE that points to this shadow page. */
+	tdp_ptep_t ptep;
+
+	/* Used for freeing TDP MMU pages asynchronously. */
+	struct rcu_head rcu_head;
+
+	/* The number of references to this shadow page as a root. */
+	refcount_t root_refcount;
+
+	/* Used for tearing down an entire page table tree. */
+	struct work_struct tdp_mmu_async_work;
+	void *tdp_mmu_async_data;
+
+	struct kvm_mmu_page_arch arch;
+};
+
 #endif /* !__KVM_MMU_TYPES_H */