[2/2] UBUNTU: SAUCE: Refresh the TDX support and support DDA for a TDX VM with paravisor

Message ID	20231031131909.99632-3-tim.gardner@canonical.com
State	New
Headers	show Return-Path: <kernel-team-bounces@lists.ubuntu.com> From: Tim Gardner <tim.gardner@canonical.com> To: kernel-team@lists.ubuntu.com Subject: [PATCH 2/2] UBUNTU: SAUCE: Refresh the TDX support and support DDA for a TDX VM with paravisor Date: Tue, 31 Oct 2023 07:19:09 -0600 Message-Id: <20231031131909.99632-3-tim.gardner@canonical.com> In-Reply-To: <20231031131909.99632-1-tim.gardner@canonical.com> References: <20231031131909.99632-1-tim.gardner@canonical.com> MIME-Version: 1.0 Precedence: list Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: kernel-team-bounces@lists.ubuntu.com Sender: "kernel-team" <kernel-team-bounces@lists.ubuntu.com>
Series	Azure: Update TDX with HCL support \| expand [0/2,l,linux-azure] Azure: Update TDX with HCL support [1/2] UBUNTU: SAUCE: x86/mm: Print the encryption features correctly when a paravisor is present [2/2] UBUNTU: SAUCE: Refresh the TDX support and support DDA for a TDX VM with paravisor

diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index a6ccc041539d..af3653caefd3 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -51,7 +51,7 @@ static int hyperv_init_ghcb(void) void *ghcb_va; void **ghcb_base; - if (!hv_isolation_type_snp()) + if (!ms_hyperv.paravisor_present || !hv_isolation_type_snp()) return 0; if (!hv_ghcb_pg) @@ -457,7 +457,7 @@ void __init hyperv_init(void) goto common_free; } - if (hv_isolation_type_snp()) { + if (ms_hyperv.paravisor_present && hv_isolation_type_snp()) { /* Negotiate GHCB Version. */ if (!hv_ghcb_negotiate_protocol()) hv_ghcb_terminate(SEV_TERM_SET_GEN, @@ -468,36 +468,39 @@ void __init hyperv_init(void) goto free_vp_assist_page; } + cpuhp = cpuhp_setup_state(CPUHP_AP_HYPERV_ONLINE, "x86/hyperv_init:online", + hv_cpu_init, hv_cpu_die); + if (cpuhp < 0) + goto free_ghcb_page; + /* * Setup the hypercall page and enable hypercalls. * 1. Register the guest ID * 2. Enable the hypercall and register the hypercall page * - * A TDX VM with no paravisor uses GHCI rather than hv_hypercall_pg. - * When the VM needs to pass an input page to Hyper-V, the page must - * be a shared page, e.g. hv_post_message() uses the per-CPU shared - * page hyperv_pcpu_input_arg. + * A TDX VM with no paravisor only uses TDX GHCI rather than hv_hypercall_pg: + * when the hypercall input is a page, such a VM must pass a decrypted + * page to Hyper-V, e.g. hv_post_message() uses the per-CPU page + * hyperv_pcpu_input_arg, which is decrypted if no paravisor is present. * * A TDX VM with the paravisor uses hv_hypercall_pg for most hypercalls, - * which are handled by the paravisor and a private input page must be - * used, e.g. see hv_mark_gpa_visibility(). The VM uses GHCI for - * two hypercalls: HVCALL_SIGNAL_EVENT (see vmbus_set_event()) and - * HVCALL_POST_MESSAGE (the input page must be a shared page, i.e. - * hv_post_message() uses the per-CPU shared hyperv_pcpu_input_arg.) - * NOTE: we must initialize hv_hypercall_pg before hv_cpu_init(), - * because hv_cpu_init() -> hv_common_cpu_init() -> set_memory_decrypted() - * -> ... -> hv_vtom_set_host_visibility() -> ... -> hv_do_hypercall() - * needs to call the hv_hypercall_pg. - */ - - /* - * In the case of TDX with the paravisor, we should write the MSR - * before hv_cpu_init(), which needs to call the paravisor-handled - * HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY. + * which are handled by the paravisor and the VM must use an encrypted + * input page: in such a VM, the hyperv_pcpu_input_arg is encrypted and + * used in the hypercalls, e.g. see hv_mark_gpa_visibility() and + * hv_arch_irq_unmask(). Such a VM uses TDX GHCI for two hypercalls: + * 1. HVCALL_SIGNAL_EVENT: see vmbus_set_event() and _hv_do_fast_hypercall8(). + * 2. HVCALL_POST_MESSAGE: the input page must be a decrypted page, i.e. + * hv_post_message() in such a VM can't use the encrypted hyperv_pcpu_input_arg; + * instead, hv_post_message() uses the post_msg_page, which is decrypted + * in such a VM and is only used in such a VM. */ guest_id = hv_generate_guest_id(LINUX_VERSION_CODE); wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id); + /* With the paravisor, the VM must also write the ID via GHCB/GHCI */ + hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id); + + /* A TDX VM with no paravisor only uses TDX GHCI rather than hv_hypercall_pg */ if (hv_isolation_type_tdx() && !hyperv_paravisor_present) goto skip_hypercall_pg_init; @@ -506,7 +509,7 @@ void __init hyperv_init(void) VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, __builtin_return_address(0)); if (hv_hypercall_pg == NULL) - goto free_ghcb_page; + goto clean_guest_os_id; rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); hypercall_msr.enable = 1; @@ -541,18 +544,6 @@ void __init hyperv_init(void) } skip_hypercall_pg_init: - cpuhp = cpuhp_setup_state(CPUHP_AP_HYPERV_ONLINE, "x86/hyperv_init:online", - hv_cpu_init, hv_cpu_die); - if (cpuhp < 0) - goto clean_guest_os_id; - - /* - * In the case of SNP with the paravisor, we must write the MSR to - * the hypervisor after hv_cpu_init(), which maps the hv_ghcb_pg first. - */ - if (hyperv_paravisor_present) - hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id); - /* * hyperv_init() is called before LAPIC is initialized: see * apic_intr_mode_init() -> x86_platform.apic_post_init() and @@ -592,8 +583,8 @@ void __init hyperv_init(void) clean_guest_os_id: wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); - if (hyperv_paravisor_present) - hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0); + hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0); + cpuhp_remove_state(cpuhp); free_ghcb_page: free_percpu(hv_ghcb_pg); free_vp_assist_page: @@ -613,8 +604,7 @@ void hyperv_cleanup(void) /* Reset our OS id */ wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); - if (hyperv_paravisor_present) - hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0); + hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0); /* * Reset hypercall page reference before reset the page, diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index a5d388f3706c..8c5fa0807456 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -93,7 +93,17 @@ int hv_post_message(union hv_connection_id connection_id, local_irq_save(flags); - aligned_msg = *this_cpu_ptr(hyperv_pcpu_input_arg); + /* + * A TDX VM with the paravisor must use the decrypted post_msg_page: see + * the comment in struct hv_per_cpu_context. A SNP VM with the paravisor + * can use the encrypted hyperv_pcpu_input_arg because it copies the + * input into the GHCB page, which has been decrypted by the paravisor. + */ + if (hv_isolation_type_tdx() && ms_hyperv.paravisor_present) + aligned_msg = this_cpu_ptr(hv_context.cpu_context)->post_msg_page; + else + aligned_msg = *this_cpu_ptr(hyperv_pcpu_input_arg); + aligned_msg->connectionid = connection_id; aligned_msg->reserved = 0; aligned_msg->message_type = message_type; @@ -142,6 +152,24 @@ int hv_synic_alloc(void) tasklet_init(&hv_cpu->msg_dpc, vmbus_on_msg_dpc, (unsigned long) hv_cpu); + if (ms_hyperv.paravisor_present && hv_isolation_type_tdx()) { + hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC); + if (hv_cpu->post_msg_page == NULL) { + pr_err("Unable to allocate post msg page\n"); + goto err; + } + + ret = set_memory_decrypted((unsigned long)hv_cpu->post_msg_page, 1); + if (ret) { + pr_err("Failed to decrypt post msg page: %d\n", ret); + /* Just leak the page, as it's unsafe to free the page. */ + hv_cpu->post_msg_page = NULL; + goto err; + } + + memset(hv_cpu->post_msg_page, 0, PAGE_SIZE); + } + /* * Synic message and event pages are allocated by paravisor. * Skip these pages allocation here. @@ -158,6 +186,9 @@ int hv_synic_alloc(void) (void *)get_zeroed_page(GFP_ATOMIC); if (hv_cpu->synic_event_page == NULL) { pr_err("Unable to allocate SYNIC event page\n"); + + free_page((unsigned long)hv_cpu->synic_message_page); + hv_cpu->synic_message_page = NULL; goto err; } } @@ -168,6 +199,14 @@ int hv_synic_alloc(void) (unsigned long)hv_cpu->synic_message_page, 1); if (ret) { pr_err("Failed to decrypt SYNIC msg page\n"); + hv_cpu->synic_message_page = NULL; + + /* + * Free the event page here so that hv_synic_free() + * won't later try to re-encrypt it. + */ + free_page((unsigned long)hv_cpu->synic_event_page); + hv_cpu->synic_event_page = NULL; goto err; } @@ -175,8 +214,12 @@ int hv_synic_alloc(void) (unsigned long)hv_cpu->synic_event_page, 1); if (ret) { pr_err("Failed to decrypt SYNIC event page\n"); + hv_cpu->synic_event_page = NULL; goto err; } + + memset(hv_cpu->synic_message_page, 0, PAGE_SIZE); + memset(hv_cpu->synic_event_page, 0, PAGE_SIZE); } } @@ -200,6 +243,17 @@ void hv_synic_free(void) = per_cpu_ptr(hv_context.cpu_context, cpu); /* It's better to leak the page if the encryption fails. */ + if (ms_hyperv.paravisor_present && hv_isolation_type_tdx()) { + if (hv_cpu->post_msg_page) { + ret = set_memory_encrypted((unsigned long) + hv_cpu->post_msg_page, 1); + if (ret) { + pr_err("Failed to encrypt post msg page: %d\n", ret); + hv_cpu->post_msg_page = NULL; + } + } + } + if (hv_isolation_type_tdx() && !hyperv_paravisor_present) { if (hv_cpu->synic_message_page) { ret = set_memory_encrypted((unsigned long) @@ -210,14 +264,17 @@ void hv_synic_free(void) } } - ret = set_memory_encrypted( - (unsigned long)hv_cpu->synic_event_page, 1); - if (ret) { - pr_err("Failed to encrypt SYNIC event page\n"); - continue; + if (hv_cpu->synic_event_page) { + ret = set_memory_encrypted( + (unsigned long)hv_cpu->synic_event_page, 1); + if (ret) { + pr_err("Failed to encrypt SYNIC event page\n"); + hv_cpu->synic_event_page = NULL; + } } } + free_page((unsigned long)hv_cpu->post_msg_page); free_page((unsigned long)hv_cpu->synic_event_page); free_page((unsigned long)hv_cpu->synic_message_page); } diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index 308d7d485803..20033df9031d 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -149,7 +149,8 @@ int hv_common_cpu_init(unsigned int cpu) if (!mem) return -ENOMEM; - if (hv_isolation_type_tdx()) { + if (!ms_hyperv.paravisor_present && + (hv_isolation_type_snp() || hv_isolation_type_tdx())) { ret = set_memory_decrypted((unsigned long)mem, pgcount); /* It may be unsafe to free mem upon error. */ diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 55f2086841ae..f6b1e710f805 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -123,6 +123,17 @@ struct hv_per_cpu_context { void *synic_message_page; void *synic_event_page; + /* + * The page is only used in hv_post_message() for a TDX VM (with the + * paravisor) to post a messages to Hyper-V: when such a VM calls + * HVCALL_POST_MESSAGE, it can't use the hyperv_pcpu_input_arg (which + * is encrypted in such a VM) as the hypercall input page, because + * the input page for HVCALL_POST_MESSAGE must be decrypted in such a + * VM, so post_msg_page (which is decrypted in hv_synic_alloc()) is + * introduced for this purpose. See hyperv_init() for more comments. + */ + void *post_msg_page; + /* * Starting with win8, we can take channel interrupts on any CPU; * we will manage the tasklet that handles events messages on a per CPU

[2/2] UBUNTU: SAUCE: Refresh the TDX support and support DDA for a TDX VM with paravisor

Commit Message

Patch