@@ -51,7 +51,7 @@ static int hyperv_init_ghcb(void)
void *ghcb_va;
void **ghcb_base;
- if (!hv_isolation_type_snp())
+ if (!ms_hyperv.paravisor_present || !hv_isolation_type_snp())
return 0;
if (!hv_ghcb_pg)
@@ -457,7 +457,7 @@ void __init hyperv_init(void)
goto common_free;
}
- if (hv_isolation_type_snp()) {
+ if (ms_hyperv.paravisor_present && hv_isolation_type_snp()) {
/* Negotiate GHCB Version. */
if (!hv_ghcb_negotiate_protocol())
hv_ghcb_terminate(SEV_TERM_SET_GEN,
@@ -468,36 +468,39 @@ void __init hyperv_init(void)
goto free_vp_assist_page;
}
+ cpuhp = cpuhp_setup_state(CPUHP_AP_HYPERV_ONLINE, "x86/hyperv_init:online",
+ hv_cpu_init, hv_cpu_die);
+ if (cpuhp < 0)
+ goto free_ghcb_page;
+
/*
* Setup the hypercall page and enable hypercalls.
* 1. Register the guest ID
* 2. Enable the hypercall and register the hypercall page
*
- * A TDX VM with no paravisor uses GHCI rather than hv_hypercall_pg.
- * When the VM needs to pass an input page to Hyper-V, the page must
- * be a shared page, e.g. hv_post_message() uses the per-CPU shared
- * page hyperv_pcpu_input_arg.
+ * A TDX VM with no paravisor only uses TDX GHCI rather than hv_hypercall_pg:
+ * when the hypercall input is a page, such a VM must pass a decrypted
+ * page to Hyper-V, e.g. hv_post_message() uses the per-CPU page
+ * hyperv_pcpu_input_arg, which is decrypted if no paravisor is present.
*
* A TDX VM with the paravisor uses hv_hypercall_pg for most hypercalls,
- * which are handled by the paravisor and a private input page must be
- * used, e.g. see hv_mark_gpa_visibility(). The VM uses GHCI for
- * two hypercalls: HVCALL_SIGNAL_EVENT (see vmbus_set_event()) and
- * HVCALL_POST_MESSAGE (the input page must be a shared page, i.e.
- * hv_post_message() uses the per-CPU shared hyperv_pcpu_input_arg.)
- * NOTE: we must initialize hv_hypercall_pg before hv_cpu_init(),
- * because hv_cpu_init() -> hv_common_cpu_init() -> set_memory_decrypted()
- * -> ... -> hv_vtom_set_host_visibility() -> ... -> hv_do_hypercall()
- * needs to call the hv_hypercall_pg.
- */
-
- /*
- * In the case of TDX with the paravisor, we should write the MSR
- * before hv_cpu_init(), which needs to call the paravisor-handled
- * HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY.
+ * which are handled by the paravisor and the VM must use an encrypted
+ * input page: in such a VM, the hyperv_pcpu_input_arg is encrypted and
+ * used in the hypercalls, e.g. see hv_mark_gpa_visibility() and
+ * hv_arch_irq_unmask(). Such a VM uses TDX GHCI for two hypercalls:
+ * 1. HVCALL_SIGNAL_EVENT: see vmbus_set_event() and _hv_do_fast_hypercall8().
+ * 2. HVCALL_POST_MESSAGE: the input page must be a decrypted page, i.e.
+ * hv_post_message() in such a VM can't use the encrypted hyperv_pcpu_input_arg;
+ * instead, hv_post_message() uses the post_msg_page, which is decrypted
+ * in such a VM and is only used in such a VM.
*/
guest_id = hv_generate_guest_id(LINUX_VERSION_CODE);
wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id);
+ /* With the paravisor, the VM must also write the ID via GHCB/GHCI */
+ hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id);
+
+ /* A TDX VM with no paravisor only uses TDX GHCI rather than hv_hypercall_pg */
if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
goto skip_hypercall_pg_init;
@@ -506,7 +509,7 @@ void __init hyperv_init(void)
VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
__builtin_return_address(0));
if (hv_hypercall_pg == NULL)
- goto free_ghcb_page;
+ goto clean_guest_os_id;
rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
hypercall_msr.enable = 1;
@@ -541,18 +544,6 @@ void __init hyperv_init(void)
}
skip_hypercall_pg_init:
- cpuhp = cpuhp_setup_state(CPUHP_AP_HYPERV_ONLINE, "x86/hyperv_init:online",
- hv_cpu_init, hv_cpu_die);
- if (cpuhp < 0)
- goto clean_guest_os_id;
-
- /*
- * In the case of SNP with the paravisor, we must write the MSR to
- * the hypervisor after hv_cpu_init(), which maps the hv_ghcb_pg first.
- */
- if (hyperv_paravisor_present)
- hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id);
-
/*
* hyperv_init() is called before LAPIC is initialized: see
* apic_intr_mode_init() -> x86_platform.apic_post_init() and
@@ -592,8 +583,8 @@ void __init hyperv_init(void)
clean_guest_os_id:
wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
- if (hyperv_paravisor_present)
- hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);
+ hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);
+ cpuhp_remove_state(cpuhp);
free_ghcb_page:
free_percpu(hv_ghcb_pg);
free_vp_assist_page:
@@ -613,8 +604,7 @@ void hyperv_cleanup(void)
/* Reset our OS id */
wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
- if (hyperv_paravisor_present)
- hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);
+ hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);
/*
* Reset hypercall page reference before reset the page,
@@ -93,7 +93,17 @@ int hv_post_message(union hv_connection_id connection_id,
local_irq_save(flags);
- aligned_msg = *this_cpu_ptr(hyperv_pcpu_input_arg);
+ /*
+ * A TDX VM with the paravisor must use the decrypted post_msg_page: see
+ * the comment in struct hv_per_cpu_context. A SNP VM with the paravisor
+ * can use the encrypted hyperv_pcpu_input_arg because it copies the
+ * input into the GHCB page, which has been decrypted by the paravisor.
+ */
+ if (hv_isolation_type_tdx() && ms_hyperv.paravisor_present)
+ aligned_msg = this_cpu_ptr(hv_context.cpu_context)->post_msg_page;
+ else
+ aligned_msg = *this_cpu_ptr(hyperv_pcpu_input_arg);
+
aligned_msg->connectionid = connection_id;
aligned_msg->reserved = 0;
aligned_msg->message_type = message_type;
@@ -142,6 +152,24 @@ int hv_synic_alloc(void)
tasklet_init(&hv_cpu->msg_dpc,
vmbus_on_msg_dpc, (unsigned long) hv_cpu);
+ if (ms_hyperv.paravisor_present && hv_isolation_type_tdx()) {
+ hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC);
+ if (hv_cpu->post_msg_page == NULL) {
+ pr_err("Unable to allocate post msg page\n");
+ goto err;
+ }
+
+ ret = set_memory_decrypted((unsigned long)hv_cpu->post_msg_page, 1);
+ if (ret) {
+ pr_err("Failed to decrypt post msg page: %d\n", ret);
+ /* Just leak the page, as it's unsafe to free the page. */
+ hv_cpu->post_msg_page = NULL;
+ goto err;
+ }
+
+ memset(hv_cpu->post_msg_page, 0, PAGE_SIZE);
+ }
+
/*
* Synic message and event pages are allocated by paravisor.
* Skip these pages allocation here.
@@ -158,6 +186,9 @@ int hv_synic_alloc(void)
(void *)get_zeroed_page(GFP_ATOMIC);
if (hv_cpu->synic_event_page == NULL) {
pr_err("Unable to allocate SYNIC event page\n");
+
+ free_page((unsigned long)hv_cpu->synic_message_page);
+ hv_cpu->synic_message_page = NULL;
goto err;
}
}
@@ -168,6 +199,14 @@ int hv_synic_alloc(void)
(unsigned long)hv_cpu->synic_message_page, 1);
if (ret) {
pr_err("Failed to decrypt SYNIC msg page\n");
+ hv_cpu->synic_message_page = NULL;
+
+ /*
+ * Free the event page here so that hv_synic_free()
+ * won't later try to re-encrypt it.
+ */
+ free_page((unsigned long)hv_cpu->synic_event_page);
+ hv_cpu->synic_event_page = NULL;
goto err;
}
@@ -175,8 +214,12 @@ int hv_synic_alloc(void)
(unsigned long)hv_cpu->synic_event_page, 1);
if (ret) {
pr_err("Failed to decrypt SYNIC event page\n");
+ hv_cpu->synic_event_page = NULL;
goto err;
}
+
+ memset(hv_cpu->synic_message_page, 0, PAGE_SIZE);
+ memset(hv_cpu->synic_event_page, 0, PAGE_SIZE);
}
}
@@ -200,6 +243,17 @@ void hv_synic_free(void)
= per_cpu_ptr(hv_context.cpu_context, cpu);
/* It's better to leak the page if the encryption fails. */
+ if (ms_hyperv.paravisor_present && hv_isolation_type_tdx()) {
+ if (hv_cpu->post_msg_page) {
+ ret = set_memory_encrypted((unsigned long)
+ hv_cpu->post_msg_page, 1);
+ if (ret) {
+ pr_err("Failed to encrypt post msg page: %d\n", ret);
+ hv_cpu->post_msg_page = NULL;
+ }
+ }
+ }
+
if (hv_isolation_type_tdx() && !hyperv_paravisor_present) {
if (hv_cpu->synic_message_page) {
ret = set_memory_encrypted((unsigned long)
@@ -210,14 +264,17 @@ void hv_synic_free(void)
}
}
- ret = set_memory_encrypted(
- (unsigned long)hv_cpu->synic_event_page, 1);
- if (ret) {
- pr_err("Failed to encrypt SYNIC event page\n");
- continue;
+ if (hv_cpu->synic_event_page) {
+ ret = set_memory_encrypted(
+ (unsigned long)hv_cpu->synic_event_page, 1);
+ if (ret) {
+ pr_err("Failed to encrypt SYNIC event page\n");
+ hv_cpu->synic_event_page = NULL;
+ }
}
}
+ free_page((unsigned long)hv_cpu->post_msg_page);
free_page((unsigned long)hv_cpu->synic_event_page);
free_page((unsigned long)hv_cpu->synic_message_page);
}
@@ -149,7 +149,8 @@ int hv_common_cpu_init(unsigned int cpu)
if (!mem)
return -ENOMEM;
- if (hv_isolation_type_tdx()) {
+ if (!ms_hyperv.paravisor_present &&
+ (hv_isolation_type_snp() || hv_isolation_type_tdx())) {
ret = set_memory_decrypted((unsigned long)mem, pgcount);
/* It may be unsafe to free mem upon error. */
@@ -123,6 +123,17 @@ struct hv_per_cpu_context {
void *synic_message_page;
void *synic_event_page;
+ /*
+ * The page is only used in hv_post_message() for a TDX VM (with the
+ * paravisor) to post a messages to Hyper-V: when such a VM calls
+ * HVCALL_POST_MESSAGE, it can't use the hyperv_pcpu_input_arg (which
+ * is encrypted in such a VM) as the hypercall input page, because
+ * the input page for HVCALL_POST_MESSAGE must be decrypted in such a
+ * VM, so post_msg_page (which is decrypted in hv_synic_alloc()) is
+ * introduced for this purpose. See hyperv_init() for more comments.
+ */
+ void *post_msg_page;
+
/*
* Starting with win8, we can take channel interrupts on any CPU;
* we will manage the tasklet that handles events messages on a per CPU