diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index bfde0a3498b90..c9808a51fa371 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -53,6 +54,11 @@ static void hv_apic_icr_write(u32 low, u32 id) wrmsrq(HV_X64_MSR_ICR, reg_val); } +void hv_enable_coco_interrupt(unsigned int cpu, unsigned int vector, bool set) +{ + apic_update_vector(cpu, vector, set); +} + static u32 hv_apic_read(u32 reg) { u32 reg_val, hi; @@ -293,6 +299,9 @@ static void hv_send_ipi_self(int vector) void __init hv_apic_init(void) { + if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + return; + if (ms_hyperv.hints & HV_X64_CLUSTER_IPI_RECOMMENDED) { pr_info("Hyper-V: Using IPI hypercalls\n"); /* diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 13792fca4489a..eb000e11871f5 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -76,6 +76,7 @@ static inline void hv_set_hypercall_pg(void *ptr) EXPORT_SYMBOL_GPL(hv_hypercall_pg); #endif +void *hv_vp_early_input_arg; union hv_ghcb * __percpu *hv_ghcb_pg; /* Storage to save the hypercall page temporarily for hibernation */ @@ -120,6 +121,10 @@ static int hv_cpu_init(unsigned int cpu) if (ret) return ret; + /* Allow Hyper-V stimer vector to be injected from Hypervisor. */ + if (ms_hyperv.misc_features & HV_STIMER_DIRECT_MODE_AVAILABLE) + apic_update_vector(cpu, HYPERV_STIMER0_VECTOR, true); + return hyperv_init_ghcb(); } @@ -227,6 +232,9 @@ static int hv_cpu_die(unsigned int cpu) *ghcb_va = NULL; } + if (ms_hyperv.misc_features & HV_STIMER_DIRECT_MODE_AVAILABLE) + apic_update_vector(cpu, HYPERV_STIMER0_VECTOR, false); + hv_common_cpu_die(cpu); if (hv_reenlightenment_cb == NULL) @@ -375,6 +383,7 @@ void __init hyperv_init(void) u64 guest_id; union hv_x64_msr_hypercall_contents hypercall_msr; int cpuhp; + int ret; if (x86_hyper_type != X86_HYPER_MS_HYPERV) return; @@ -382,6 +391,24 @@ void __init hyperv_init(void) if (hv_common_init()) return; + if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) { + hv_vp_early_input_arg = (void *)__get_free_pages( + GFP_KERNEL | __GFP_ZERO, + get_order(num_possible_cpus() * PAGE_SIZE)); + if (hv_vp_early_input_arg) { + ret = set_memory_decrypted((u64)hv_vp_early_input_arg, + num_possible_cpus()); + if (ret) { + free_pages((unsigned long)hv_vp_early_input_arg, + get_order(num_possible_cpus() * PAGE_SIZE)); + hv_vp_early_input_arg = NULL; + goto common_free; + } + } else { + goto common_free; + } + } + if (ms_hyperv.paravisor_present && hv_isolation_type_snp()) { /* Negotiate GHCB Version. */ if (!hv_ghcb_negotiate_protocol()) @@ -519,6 +546,16 @@ void __init hyperv_init(void) free_vp_assist_page: kfree(hv_vp_assist_page); hv_vp_assist_page = NULL; +free_vp_early_input_arg: + if (hv_vp_early_input_arg) { + set_memory_encrypted((u64)hv_vp_early_input_arg, + num_possible_cpus()); + free_pages((unsigned long)hv_vp_early_input_arg, + get_order(num_possible_cpus() * PAGE_SIZE)); + hv_vp_early_input_arg = NULL; + } +common_free: + hv_common_free(); } /* diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 59c8d623b6dac..bcda1116394c4 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -291,6 +291,45 @@ static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa) free_page((unsigned long)vmsa); } +enum es_result hv_set_savic_backing_page(u64 gfn) +{ + u64 control = HV_HYPERCALL_REP_COMP_1 | HVCALL_SET_VP_REGISTERS; + struct hv_set_vp_registers_input *input = + (struct hv_set_vp_registers_input *) + ((u8 *)hv_vp_early_input_arg + smp_processor_id() * PAGE_SIZE); + union hv_x64_register_sev_gpa_page value; + unsigned long flags; + int retry = 5; + u64 ret; + + local_irq_save(flags); + + value.enabled = 1; + value.reserved = 0; + value.pagenumber = gfn; + + memset(input, 0, struct_size(input, element, 1)); + input->header.partitionid = HV_PARTITION_ID_SELF; + input->header.vpindex = HV_VP_INDEX_SELF; + input->header.inputvtl = ms_hyperv.vtl; + input->element[0].name = HV_X64_REGISTER_SEV_AVIC_GPA; + input->element[0].value.reg64 = value.u64; + + do { + ret = hv_do_hypercall(control, input, NULL); + } while (ret == HV_STATUS_TIME_OUT && retry--); + + if (!hv_result_success(ret)) + pr_err("Failed to set secure AVIC backing page %llx.\n", ret); + + local_irq_restore(flags); + + if (hv_result_success(ret)) + return ES_OK; + else + return ES_VMM_ERROR; +} + int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip, unsigned int cpu) { struct sev_es_save_area *vmsa = (struct sev_es_save_area *) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index a26e66d66444a..973e3581ebef7 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -241,6 +241,12 @@ static inline u64 native_x2apic_icr_read(void) return val; } +#if defined(CONFIG_AMD_SECURE_AVIC) +extern void x2apic_savic_init_backing_page(void *backing_page); +#else +static inline void x2apic_savic_init_backing_page(void *backing_page) {} +#endif + extern int x2apic_mode; extern int x2apic_phys; extern void __init x2apic_set_max_apicid(u32 apicid); diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 8992f5a0f47ac..3a5fcf09b24e9 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -44,6 +44,7 @@ extern u64 hv_std_hypercall(u64 control, u64 param1, u64 param2); #if IS_ENABLED(CONFIG_HYPERV) extern void *hv_hypercall_pg; +extern void *hv_vp_early_input_arg; extern union hv_ghcb * __percpu *hv_ghcb_pg; @@ -197,6 +198,7 @@ int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry); bool hv_ghcb_negotiate_protocol(void); void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason); int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip, unsigned int cpu); +enum es_result hv_set_savic_backing_page(u64 gfn); #else static inline bool hv_ghcb_negotiate_protocol(void) { return false; } static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {} @@ -300,6 +302,20 @@ static inline void hv_vtl_idle(void) native_safe_halt(); } +/* + * Registers are only accessible via HVCALL_GET_VP_REGISTERS hvcall and + * there is not associated MSR address. + */ +#ifndef HV_X64_REGISTER_VSM_VP_STATUS +#define HV_X64_REGISTER_VSM_VP_STATUS 0x000D0003 +#endif +#ifndef HV_X64_VTL_MASK +#define HV_X64_VTL_MASK GENMASK(3, 0) +#endif +#ifndef HV_X64_REGISTER_SEV_AVIC_GPA +#define HV_X64_REGISTER_SEV_AVIC_GPA 0x00090043 +#endif + #ifdef CONFIG_HYPERV_VTL_MODE void __init hv_vtl_init_platform(void); int __init hv_vtl_early_init(void); diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index e5aeaf25a1136..1af502cb50d66 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -141,7 +141,12 @@ struct rmp_state { u32 asid; } __packed; -#define RMPADJUST_VMSA_PAGE_BIT BIT(16) +/* Target VMPL takes the first byte */ +#define RMPADJUST_ENABLE_READ BIT(8) +#define RMPADJUST_ENABLE_WRITE BIT(9) +#define RMPADJUST_USER_EXECUTE BIT(10) +#define RMPADJUST_KERNEL_EXECUTE BIT(11) +#define RMPADJUST_VMSA_PAGE_BIT BIT(16) /* SNP Guest message request */ struct snp_req_data { diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 17f6c3fedeee7..3844529021763 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -190,6 +190,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define V_GIF_SHIFT 9 #define V_GIF_MASK (1 << V_GIF_SHIFT) +#define V_INT_SHADOW_SHIFT 10 +#define V_INT_SHADOW_MASK (1 << V_INT_SHADOW_SHIFT) + #define V_NMI_PENDING_SHIFT 11 #define V_NMI_PENDING_MASK (1 << V_NMI_PENDING_SHIFT) @@ -202,6 +205,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define V_IGN_TPR_SHIFT 20 #define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT) +#define V_GUEST_BUSY_SHIFT 63 +#define V_GUEST_BUSY_MASK (1ULL << V_GUEST_BUSY_SHIFT) + #define V_IRQ_INJECTION_BITS_MASK (V_IRQ_MASK | V_INTR_PRIO_MASK | V_IGN_TPR_MASK) #define V_INTR_MASKING_SHIFT 24 diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index dbc5678bc3b68..d4afb99c42e25 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -14,6 +14,7 @@ #include #include +#include #include "local.h" @@ -330,6 +331,18 @@ static void savic_eoi(void) } } +void x2apic_savic_init_backing_page(void *ap) +{ + u32 apic_id; + + /* + * Before Secure AVIC is enabled, APIC msr reads are intercepted. + * APIC_ID msr read returns the value from the Hypervisor. + */ + apic_id = native_apic_msr_read(APIC_ID); + apic_set_reg(ap, APIC_ID, apic_id); +} + static void savic_teardown(void) { /* Disable Secure AVIC */ @@ -342,15 +355,16 @@ static void savic_setup(void) void *ap = this_cpu_ptr(savic_page); enum es_result res; unsigned long gpa; + unsigned long gfn; - /* - * Before Secure AVIC is enabled, APIC MSR reads are intercepted. - * APIC_ID MSR read returns the value from the hypervisor. - */ - apic_set_reg(ap, APIC_ID, native_apic_msr_read(APIC_ID)); + if (!cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + return; + x2apic_savic_init_backing_page(ap); gpa = __pa(ap); + gfn = gpa >> PAGE_SHIFT; + /* * The NPT entry for a vCPU's APIC backing page must always be * present when the vCPU is running in order for Secure AVIC to @@ -361,7 +375,11 @@ static void savic_setup(void) * VMRUN, the hypervisor makes use of this information to make sure * the APIC backing page is mapped in NPT. */ - res = savic_register_gpa(gpa); + if (hv_isolation_type_snp()) + res = hv_set_savic_backing_page(gfn); + else + res = savic_register_gpa(gpa); + if (res != ES_OK) sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 8a698808a15dc..e86bbebba46e6 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -470,6 +470,9 @@ static void __init ms_hyperv_init_platform(void) hv_identify_partition_type(); + if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + ms_hyperv.hints |= HV_DEPRECATING_AEOI_RECOMMENDED; + if (ms_hyperv.hints & HV_X64_HYPERV_NESTED) { hv_nested = true; pr_info("Hyper-V: running on a nested hypervisor\n"); diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 08b6af9b1b3a4..9da2a382138cb 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -310,6 +310,7 @@ void hv_synic_enable_regs(unsigned int cpu) } hv_set_msr(HV_MSR_SIEFP, siefp.as_uint64); + hv_enable_coco_interrupt(cpu, vmbus_interrupt, true); /* Setup the shared SINT. */ if (vmbus_irq != -1) @@ -353,6 +354,7 @@ void hv_synic_disable_regs(unsigned int cpu) /* Need to correctly cleanup in the case of SMP!!! */ /* Disable the interrupt */ hv_set_msr(HV_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64); + hv_enable_coco_interrupt(cpu, vmbus_interrupt, false); simp.as_uint64 = hv_get_msr(HV_MSR_SIMP); /* diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index a6c3ce941a898..9f260503abb1f 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -816,6 +816,11 @@ u64 __weak hv_tdx_hypercall(u64 control, u64 param1, u64 param2) } EXPORT_SYMBOL_GPL(hv_tdx_hypercall); +void __weak hv_enable_coco_interrupt(unsigned int cpu, unsigned int vector, bool set) +{ +} +EXPORT_SYMBOL_GPL(hv_enable_coco_interrupt); + void hv_identify_partition_type(void) { /* Assume guest role */ diff --git a/drivers/hv/mshv_vtl_main.c b/drivers/hv/mshv_vtl_main.c index 6d5ea05588d5a..9e639fb40ae8f 100644 --- a/drivers/hv/mshv_vtl_main.c +++ b/drivers/hv/mshv_vtl_main.c @@ -42,6 +42,7 @@ #include #include +#include #include #include #include @@ -192,6 +193,9 @@ struct mshv_vtl_per_cpu { bool msrs_are_guest; struct user_return_notifier mshv_urn; #endif +#if defined(CONFIG_X86_64) && defined(CONFIG_SEV_GUEST) + struct page *snp_secure_avic_page; +#endif }; /* SYNIC_OVERLAY_PAGE_MSR - internal, identical to hv_synic_simp */ @@ -354,20 +358,56 @@ static struct page *mshv_vtl_cpu_reg_page(int cpu) return *per_cpu_ptr(&mshv_vtl_per_cpu.reg_page, cpu); } -#if defined(CONFIG_X86_64) && defined(CONFIG_INTEL_TDX_GUEST) +#if defined(CONFIG_X86_64) + +#if defined(CONFIG_INTEL_TDX_GUEST) + +static struct page *tdx_this_apic_page(void) +{ + return *this_cpu_ptr(&mshv_vtl_per_cpu.tdx_apic_page); +} + +static u32 *mshv_tdx_vapic_irr(void) +{ + return (u32 *)((char *)page_address(tdx_this_apic_page()) + APIC_IRR); +} + +#endif /* defined(CONFIG_INTEL_TDX_GUEST) */ static struct page *tdx_apic_page(int cpu) { +#if defined(CONFIG_INTEL_TDX_GUEST) return *per_cpu_ptr(&mshv_vtl_per_cpu.tdx_apic_page, cpu); +#else + (void)cpu; + return NULL; +#endif } -static struct page *tdx_this_apic_page(void) +static struct page *snp_secure_avic_page(int cpu) { - return *this_cpu_ptr(&mshv_vtl_per_cpu.tdx_apic_page); +#if defined(CONFIG_SEV_GUEST) + return *per_cpu_ptr(&mshv_vtl_per_cpu.snp_secure_avic_page, cpu); +#else + (void)cpu; + return NULL; +#endif } +static struct page *mshv_apic_page(int cpu) +{ + if (hv_isolation_type_tdx()) + return tdx_apic_page(cpu); + else if (hv_isolation_type_snp()) + return snp_secure_avic_page(cpu); + + return NULL; +} + +#if defined(CONFIG_SEV_GUEST) || defined(CONFIG_INTEL_TDX_GUEST) /* - * For ICR emulation on TDX, we need a fast way to map APICIDs to CPUIDs. + * For ICR emulation when running a hardware isolated guest, we need a fast way to map + * APICIDs to CPUIDs. * Instead of iterating through all CPUs for each target in the ICR destination field * precompute a mapping. APICIDs can be sparse so we have to use a hash table. * Note: CPU hotplug is not supported (both by this code and by the paravisor in general) @@ -395,7 +435,7 @@ static int get_cpuid(int apicid) * Sets the cpu described by apicid in cpu_mask. * Returns 0 on success, -EINVAL if no cpu matches the apicid. */ -static int mshv_tdx_set_cpumask_from_apicid(int apicid, struct cpumask *cpu_mask) +static int mshv_set_cpumask_from_apicid(int apicid, struct cpumask *cpu_mask) { int cpu = get_cpuid(apicid); @@ -407,7 +447,221 @@ static int mshv_tdx_set_cpumask_from_apicid(int apicid, struct cpumask *cpu_mask return -EINVAL; } -#endif + +/* + * Returns the cpumask described by dest, where dest is a logical destination. + * cpu_mask should have no CPUs set. + * Returns 0 on success + */ +static int mshv_get_logical_cpumask(u32 dest, struct cpumask *cpu_mask) +{ + int ret = 0; + + while ((u16)dest) { + const u16 i = fls((u16)dest) - 1; + const u32 physical_id = (dest >> 16 << 4) | i; + + ret = mshv_set_cpumask_from_apicid(physical_id, cpu_mask); + dest &= ~BIT(i); + if (ret) + break; + } + + return ret; +} + +/* + * Interrupt handling (particularly sending (via ICR writes) and receiving interrupts), + * is a hot path on hardware-isolated VMs. By performing some of the common functionality + * entirely in-kernel we eliminate costly user<->kernel transitions. + */ +static void mshv_free_apicid_to_cpuid_mapping(void) +{ + int bkt; + struct apicid_to_cpuid_entry *entry; + struct hlist_node *tmp; + + hash_for_each_safe(apicid_to_cpuid, bkt, tmp, entry, node) { + hash_del(&entry->node); + kfree(entry); + } +} + +/* + * Creates and populates the apicid_to_cpuid hash table. + * This mapping is used for fast ICR emulation on hardware-isolated VMs. + * Returns 0 on success. + */ +static int mshv_create_apicid_to_cpuid_mapping(struct device *dev) +{ + int cpu, ret = 0; + + for_each_online_cpu(cpu) { + struct apicid_to_cpuid_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); + + if (!entry) { + ret = -ENOMEM; + break; + } + + entry->apicid = cpuid_to_apicid[cpu]; + entry->cpuid = cpu; + + if (entry->apicid == BAD_APICID) { + dev_emerg(dev, "Bad APICID: %d !!\n", entry->apicid); + kfree(entry); + ret = -ENODEV; + break; + } + + hash_add(apicid_to_cpuid, &entry->node, entry->apicid); + } + + if (ret) + mshv_free_apicid_to_cpuid_mapping(); + + return ret; +} + +/* + * Attempts to handle an ICR write. Returns 0 if successful, other values + * indicate user-space should be invoked to gracefully handle the error. + */ +static int mshv_cpu_mask_for_icr_write(u32 icr_lo, u32 dest, struct cpumask *local_mask) +{ + const u8 shorthand = (icr_lo >> 18) & 0b11; + const u32 self = smp_processor_id(); + int ret = 0; + + cpumask_clear(local_mask); + if (shorthand == 0b10 || dest == (u32)-1) { /* shorthand all or destination id == all */ + cpumask_copy(local_mask, cpu_online_mask); + } else if (shorthand == 0b11) { /* shorthand all but self */ + cpumask_copy(local_mask, cpu_online_mask); + cpumask_clear_cpu(self, local_mask); + } else if (shorthand == 0b01) { /* shorthand self */ + cpumask_set_cpu(self, local_mask); + } else if (icr_lo & BIT(11)) { /* logical */ + ret = mshv_get_logical_cpumask(dest, local_mask); + } else { /* physical */ + ret = mshv_set_cpumask_from_apicid(dest, local_mask); + } + + return ret; +} + +/* + * Attempts to handle an ICR write. Returns 0 if successful, other values + * indicate user-space should be invoked to gracefully handle the error. + */ +static int mshv_update_proxy_irr_for_icr_write(u32 icr_lo, struct cpumask *local_mask) +{ + const u8 vector = icr_lo; + const u64 bank = vector / 32; + const u32 mask = BIT(vector % 32); + const u32 self = smp_processor_id(); + + unsigned int cpu; + bool send_ipi; + + send_ipi = false; + for_each_cpu(cpu, local_mask) { + /* + * The kernel doesn't provide an atomic_or which operates on u32, + * so cast to atomic_t, which should have the same layout + */ + static_assert(sizeof(atomic_t) == sizeof(u32)); + atomic_or(mask, (atomic_t *) + (&(mshv_vtl_cpu_run(cpu)->proxy_irr[bank]))); + /* Make update visible to other CPUs */ + smp_store_release(&mshv_vtl_cpu_run(cpu)->scan_proxy_irr, 1); + send_ipi |= cpu != self; + } + + if (send_ipi) { + cpumask_clear_cpu(self, local_mask); + __apic_send_IPI_mask(local_mask, RESCHEDULE_VECTOR); + } + + return 0; +} + +/* + * Attempts to handle an ICR write. Returns 0 if successful, other values + * indicate user-space should be invoked to gracefully handle the error. + * Secure AVIC accelerates self-IPI only. + */ +static int mshv_snp_handle_simple_icr_write(u32 icr_lo, u32 dest) +{ + struct cpumask local_mask; + int ret; + + ret = mshv_cpu_mask_for_icr_write(icr_lo, dest, &local_mask); + if (ret) + return ret; + ret = mshv_update_proxy_irr_for_icr_write(icr_lo, &local_mask); + if (ret) + return ret; + + return 0; +} + +/* + * Pull the interrupts in the `proxy_irr` field into the VAPIC page + * Returns true if an exit to user-space is required (sync tmr state) + */ +static bool __mshv_pull_proxy_irr(struct mshv_vtl_run *run, struct page *apic_page) +{ + u32 *apic_page_irr; + + if (!apic_page) + return false; + + apic_page_irr = (u32 *)((char *)page_address(apic_page) + APIC_IRR); + + if (!xchg(&run->scan_proxy_irr, 0)) + return false; + + for (int i = 0; i < 8; i++) { + const u32 val = xchg(&run->proxy_irr[i], 0); + + if (!val) + continue; + + if (run->proxy_irr_exit_mask[i] & val) { + /* + * This vector was previously used for a level-triggered interrupt. + * An edge-triggered interrupt has now arrived, so we need to involve + * user-space to clear its copy of the tmr. + * Put the interrupt(s) back on the run page so it can do so. + * nb atomic_t cast: See comment in mshv_tdx_handle_simple_icr_write + */ + atomic_or(val, (atomic_t *)(&run->proxy_irr[i])); + WRITE_ONCE(run->scan_proxy_irr, 1); + return true; + } + + /* + * IRR is non-contiguous. + * Each bank is 4 bytes with 12 bytes of padding between banks. + */ + apic_page_irr[i * 4] |= val; + } + + return false; +} + +#else + +static int mshv_create_apicid_to_cpuid_mapping(struct device *dev) { return 0; } +static void mshv_free_apicid_to_cpuid_mapping(void) {} + +#endif /* defined(CONFIG_SEV_GUEST) || defined(CONFIG_INTEL_TDX_GUEST) */ + +#else +static int mshv_create_apicid_to_cpuid_mapping(struct device *dev) { return 0; } +static void mshv_free_apicid_to_cpuid_mapping(void) {} +#endif /* defined(CONFIG_X86_64) */ static void mshv_vtl_configure_reg_page(struct mshv_vtl_per_cpu *per_cpu) { @@ -472,7 +726,7 @@ static void mshv_vtl_configure_reg_page(struct mshv_vtl_per_cpu *per_cpu) } #ifdef CONFIG_X86_64 -static int mshv_configure_vmsa_page(u8 target_vtl, struct page** vmsa_page) +static int mshv_snp_configure_vmsa_page(u8 target_vtl, struct page **vmsa_page) { struct page *page; struct hv_register_assoc reg_assoc = {}; @@ -509,6 +763,8 @@ static int mshv_configure_vmsa_page(u8 target_vtl, struct page** vmsa_page) RMP_PG_SIZE_4K, 1 | RMPADJUST_VMSA_PAGE_BIT); if (ret) { pr_emerg("failed to set VMSA page bit: %d\n", ret); + if (target_vtl != 0) + __free_page(page); return ret; } @@ -803,12 +1059,36 @@ static int mshv_vtl_alloc_context(unsigned int cpu) mshv_tdx_init_halt_timer(); #endif } else if (hv_isolation_type_snp()) { -#ifdef CONFIG_X86_64 +#if defined(CONFIG_X86_64) && defined(CONFIG_SEV_GUEST) + struct page *snp_secure_avic_page; int ret; - ret = mshv_configure_vmsa_page(0, &per_cpu->vmsa_page); + ret = mshv_snp_configure_vmsa_page(0, &per_cpu->vmsa_page); if (ret < 0) return ret; + + if (!cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) + goto synic; + + snp_secure_avic_page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!snp_secure_avic_page) + return -ENOMEM; + + /* VMPL 2 for the VTL0 */ + ret = rmpadjust((unsigned long)page_address(snp_secure_avic_page), + RMP_PG_SIZE_4K, + 2 | RMPADJUST_ENABLE_READ | RMPADJUST_ENABLE_WRITE); + if (ret) { + pr_err("failed to adjust RMP for the secure AVIC page: %d\n", ret); + __free_page(snp_secure_avic_page); + return -EINVAL; + } + + x2apic_savic_init_backing_page(page_address(snp_secure_avic_page)); + + per_cpu->snp_secure_avic_page = snp_secure_avic_page; + +synic: #endif } else if (mshv_vsm_capabilities.intercept_page_available) { mshv_vtl_configure_reg_page(per_cpu); @@ -1499,58 +1779,7 @@ static void mshv_vtl_idle(void) * is a hot path on TDX. By performing some of the common functionality entirely in-kernel * we eliminate costly user<->kernel transitions. */ -#ifndef CONFIG_INTEL_TDX_GUEST -static void mshv_tdx_free_apicid_to_cpuid_mapping(void) {} -static int mshv_tdx_create_apicid_to_cpuid_mapping(struct device *dev) { return 0; } -static bool mshv_tdx_try_handle_exit(struct mshv_vtl_run *run) { return false; } -#else -static void mshv_tdx_free_apicid_to_cpuid_mapping(void) -{ - int bkt; - struct apicid_to_cpuid_entry *entry; - struct hlist_node *tmp; - - hash_for_each_safe(apicid_to_cpuid, bkt, tmp, entry, node) { - hash_del(&entry->node); - kfree(entry); - } -} - -/* - * Creates and populates the apicid_to_cpuid hash table. - * This mapping is used for fast ICR emulation on TDX. - * Returns 0 on success. - */ -static int mshv_tdx_create_apicid_to_cpuid_mapping(struct device *dev) -{ - int cpu, ret = 0; - - for_each_online_cpu(cpu) { - struct apicid_to_cpuid_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); - - if (!entry) { - ret = -ENOMEM; - break; - } - - entry->apicid = cpuid_to_apicid[cpu]; - entry->cpuid = cpu; - - if (entry->apicid == BAD_APICID) { - dev_emerg(dev, "Bad APICID: %d !!\n", entry->apicid); - ret = -ENODEV; - break; - } - - hash_add(apicid_to_cpuid, &entry->node, entry->apicid); - } - - if (ret) - mshv_tdx_free_apicid_to_cpuid_mapping(); - - return ret; -} - +#ifdef CONFIG_INTEL_TDX_GUEST static void mshv_tdx_advance_to_next_instruction(struct tdx_vp_context *context) { const u32 instr_length = context->exit_info.r11 >> 32ULL; @@ -1587,28 +1816,6 @@ static bool mshv_tdx_is_simple_icr_write(const struct tdx_vp_context *context) return fixed && edge; } -/* - * Returns the cpumask described by dest, where dest is a logical destination. - * cpu_mask should have no CPUs set. - * Returns 0 on success - */ -static int mshv_tdx_get_logical_cpumask(u32 dest, struct cpumask *cpu_mask) -{ - int ret = 0; - - while ((u16)dest) { - const u16 i = fls((u16)dest) - 1; - const u32 physical_id = (dest >> 16 << 4) | i; - - ret = mshv_tdx_set_cpumask_from_apicid(physical_id, cpu_mask); - dest &= ~BIT(i); - if (ret) - break; - } - - return ret; -} - /* * Attempts to handle an ICR write. Returns 0 if successful, other values * indicate user-space should be invoked to gracefully handle the error. @@ -1617,101 +1824,21 @@ static int mshv_tdx_handle_simple_icr_write(struct tdx_vp_context *context) { const u32 icr_lo = context->l2_enter_guest_state.rax; const u32 dest = context->l2_enter_guest_state.rdx; - const u8 shorthand = (icr_lo >> 18) & 0b11; - const u8 vector = icr_lo; - const u64 bank = vector >> 5; /* Each bank is 32 bits. Divide by 32 to find the bank. */ - const u32 mask = BIT(vector & 0x1f); /* Bit in the bank is the remainder of the division. */ - const u32 self = smp_processor_id(); - - bool send_ipi = false; struct cpumask local_mask = {}; - unsigned int cpu = 0; int ret = 0; - if (shorthand == 0b10 || dest == (u32)-1) { /* shorthand all or destination id == all */ - cpumask_copy(&local_mask, cpu_online_mask); - } else if (shorthand == 0b11) { /* shorthand all but self */ - cpumask_copy(&local_mask, cpu_online_mask); - cpumask_clear_cpu(self, &local_mask); - } else if (shorthand == 0b01) { /* shorthand self */ - cpumask_set_cpu(self, &local_mask); - } else if (icr_lo & BIT(11)) { /* logical */ - ret = mshv_tdx_get_logical_cpumask(dest, &local_mask); - } else { /* physical */ - ret = mshv_tdx_set_cpumask_from_apicid(dest, &local_mask); - } - + ret = mshv_cpu_mask_for_icr_write(icr_lo, dest, &local_mask); + if (ret) + return ret; + ret = mshv_update_proxy_irr_for_icr_write(icr_lo, &local_mask); if (ret) return ret; - - for_each_cpu(cpu, &local_mask) { - /* - * The kernel doesn't provide an atomic_or which operates on u32, - * so cast to atomic_t, which should have the same layout - */ - static_assert(sizeof(atomic_t) == sizeof(u32)); - atomic_or(mask, (atomic_t *) - (&(mshv_vtl_cpu_run(cpu)->proxy_irr[bank]))); - smp_store_release(&mshv_vtl_cpu_run(cpu)->scan_proxy_irr, 1); - send_ipi |= cpu != self; - } - - if (send_ipi) { - cpumask_clear_cpu(self, &local_mask); - __apic_send_IPI_mask(&local_mask, RESCHEDULE_VECTOR); - } - mshv_tdx_advance_to_next_instruction(context); mshv_tdx_clear_exit_reason(context); return 0; } -static u32 *mshv_tdx_vapic_irr(void) -{ - return (u32 *)((char *)page_address(tdx_this_apic_page()) + APIC_IRR); -} - -/* - * Pull the interrupts in the `proxy_irr` field into the VAPIC page - * Returns true if an exit to user-space is required (sync tmr state) - */ -static bool mshv_tdx_pull_proxy_irr(struct mshv_vtl_run *run) -{ - u32 *apic_page_irr = mshv_tdx_vapic_irr(); - - if (!xchg(&run->scan_proxy_irr, 0)) - return false; - - for (int i = 0; i < 8; i++) { - const u32 val = xchg(&run->proxy_irr[i], 0); - - if (!val) - continue; - - if (run->proxy_irr_exit_mask[i] & val) { - /* - * This vector was previously used for a level-triggered interrupt. - * An edge-triggered interrupt has now arrived, so we need to involve - * user-space to clear its copy of the tmr. - * Put the interrupt(s) back on the run page so it can do so. - * nb atomic_t cast: See comment in mshv_tdx_handle_simple_icr_write - */ - atomic_or(val, (atomic_t *)(&run->proxy_irr[i])); - WRITE_ONCE(run->scan_proxy_irr, 1); - return true; - } - - /* - * IRR is non-contiguous. - * Each bank is 4 bytes with 12 bytes of padding between banks. - */ - apic_page_irr[i * 4] |= val; - } - - return false; -} - /* * Checks if exit reason is due: * - An interrupt for the L1 @@ -1839,8 +1966,188 @@ static bool mshv_tdx_try_handle_exit(struct mshv_vtl_run *run) return !ret_to_user; } + #endif /* CONFIG_INTEL_TDX_GUEST */ +#if defined(CONFIG_SEV_GUEST) + +static struct page *snp_this_savic_page(void) +{ + return *this_cpu_ptr(&mshv_vtl_per_cpu.snp_secure_avic_page); +} + +static struct sev_es_save_area *snp_this_vmsa(void) +{ + struct page *vmsa_page = *this_cpu_ptr(&mshv_vtl_per_cpu.vmsa_page); + + return page_address(vmsa_page); +} + +/* + * Sets a benign guest error code so that there won't be another + * #VMEXIT for the just processed one and marks the VMSA as + * runnable. + */ +static void mshv_snp_clear_exit_code(struct sev_es_save_area *vmsa, bool int_shadow) +{ + if (int_shadow) + vmsa->vintr_ctrl |= V_INT_SHADOW_MASK; + else + vmsa->vintr_ctrl &= ~V_INT_SHADOW_MASK; + vmsa->guest_exit_code = SVM_EXIT_INTR; + vmsa->vintr_ctrl &= ~V_GUEST_BUSY_MASK; +} + +/* + * Try to handle the incomplete IPI SEV-SNP exit. + * + * Returns true if the exit was handled entirely in kernel, and the VMPL should be re-entered. + * Returns false if the exit must be handled by user-space. + */ +static bool mshv_snp_try_handle_incomplete_ipi(struct mshv_vtl_run *run, + struct sev_es_save_area *vmsa) +{ + u32 icr_lo = vmsa->guest_exit_info_1; + u32 dest = vmsa->guest_exit_info_1 >> 32; + + /* Route the INIT, SIPI, NMI to the user mode for now. */ + if ((icr_lo & APIC_DM_FIXED_MASK) != APIC_DM_FIXED) + return false; + /* Can handle only edge-triggered interrupts. */ + if (icr_lo & APIC_INT_LEVELTRIG) + return false; + + if (mshv_snp_handle_simple_icr_write(icr_lo, dest)) + return false; + + return true; +} + +/* + * Try to handle an SEV-SNP exit entirely in kernel, to avoid the overhead of a + * user<->kernel transition. + * + * Returns true if the exit was handled entirely in kernel, and the VMPL should be re-entered. + * Returns false if the exit must be handled by user-space. + */ +static bool mshv_snp_try_handle_exit(struct mshv_vtl_run *run) +{ + const bool intr_inject = MSHV_VTL_OFFLOAD_FLAG_INTR_INJECT & run->offload_flags; + const bool x2apic = MSHV_VTL_OFFLOAD_FLAG_X2APIC & run->offload_flags; + struct sev_es_save_area *vmsa; + u8 *offload_flags; + + if (!intr_inject) + return false; + + vmsa = snp_this_vmsa(); + + switch (vmsa->guest_exit_code) { + case SVM_EXIT_AVIC_INCOMPLETE_IPI: + if (x2apic && mshv_snp_try_handle_incomplete_ipi(run, vmsa)) + goto handled; + break; + case SVM_EXIT_HLT: + run->flags |= MSHV_VTL_RUN_FLAG_HALTED; + run->offload_flags |= MSHV_VTL_OFFLOAD_FLAG_HALT_HLT; + goto handled; + case SVM_EXIT_IDLE_HLT: + run->flags |= MSHV_VTL_RUN_FLAG_HALTED; + run->offload_flags |= MSHV_VTL_OFFLOAD_FLAG_HALT_IDLE; + goto handled; + case SVM_EXIT_MSR: + if (vmsa->rcx == HV_X64_MSR_GUEST_IDLE && !(vmsa->guest_exit_info_1 & 1)) { + /* The guest indicates it's idle by reading this synthetic MSR. */ + vmsa->rax = 0; + vmsa->rdx = 0; + vmsa->rip = vmsa->guest_nrip; + + run->offload_flags |= MSHV_VTL_OFFLOAD_FLAG_HALT_IDLE; + run->flags |= MSHV_VTL_RUN_FLAG_HALTED; + + goto handled; + } + break; + default: + break; + } + + offload_flags = &run->offload_flags; + (*offload_flags) &= ~MSHV_VTL_OFFLOAD_FLAG_HALT_HLT; + (*offload_flags) &= ~MSHV_VTL_OFFLOAD_FLAG_HALT_IDLE; + if (!(*offload_flags & MSHV_VTL_OFFLOAD_FLAG_HALT_OTHER)) + run->flags &= ~MSHV_VTL_RUN_FLAG_HALTED; + + return false; + +handled: + + mshv_snp_clear_exit_code(vmsa, false); + return true; +} + +static bool mshv_snp_try_handle_intercept(struct mshv_vtl_run *run) +{ + struct hv_vp_assist_page *hvp = hv_vp_assist_page[smp_processor_id()]; + u32 msg_type = HVMSG_NONE; + struct hv_message *msg = NULL; + bool is_interrupt = false; + + switch (hvp->vtl_entry_reason) { + case MSHV_ENTRY_REASON_INTERRUPT: + if (!mshv_vsm_capabilities.intercept_page_available) { + struct hv_per_cpu_context *mshv_cpu = this_cpu_ptr(hv_context.cpu_context); + void *synic_message_page = mshv_cpu->synic_message_page; + + if (likely(synic_message_page)) { + msg = (struct hv_message *)synic_message_page + + HV_SYNIC_INTERCEPTION_SINT_INDEX; + is_interrupt = true; + } + } + break; + + case MSHV_ENTRY_REASON_INTERCEPT: + WARN_ON(!mshv_vsm_capabilities.intercept_page_available); + msg = (struct hv_message *)hvp->intercept_message; + break; + + default: + panic("unknown entry reason: %d", hvp->vtl_entry_reason); + } + + if (!msg) + return true; + msg_type = READ_ONCE(msg->header.message_type); + + switch (msg_type) { + case HVMSG_NONE: + break; + case HVMSG_X64_EXCEPTION_INTERCEPT: + { + struct hv_x64_exception_intercept_message *expt_msg = + (struct hv_x64_exception_intercept_message *)msg->u.payload; + if (expt_msg->exception_vector != X86_TRAP_VC) + return false; + } + break; + case HVMSG_SYNIC_SINT_DELIVERABLE: + return false; + case HVMSG_X64_HALT: + run->flags |= MSHV_VTL_RUN_FLAG_HALTED; + run->offload_flags |= MSHV_VTL_OFFLOAD_FLAG_HALT_HLT; + break; + default: + return false; + } + + if (is_interrupt) + vmbus_signal_eom(msg, msg_type); + + return true; +} +#endif /* CONFIG_SEV_GUEST */ + /* * Attempts to directly inject the interrupts in the proxy_irr field. * Returns true if an exit to user-space is required. @@ -1849,14 +2156,20 @@ static bool mshv_pull_proxy_irr(struct mshv_vtl_run *run) { bool ret = READ_ONCE(run->scan_proxy_irr); - if (!hv_isolation_type_tdx() || - !(run->offload_flags & MSHV_VTL_OFFLOAD_FLAG_INTR_INJECT)) + if (!(run->offload_flags & MSHV_VTL_OFFLOAD_FLAG_INTR_INJECT)) return ret; + if (hv_isolation_type_tdx()) { #ifdef CONFIG_INTEL_TDX_GUEST - ret = mshv_tdx_pull_proxy_irr(run); - mshv_tdx_update_rvi_halt(run); + ret = __mshv_pull_proxy_irr(run, tdx_this_apic_page()); + mshv_tdx_update_rvi_halt(run); #endif + } else if (hv_isolation_type_snp()) { +#ifdef CONFIG_SEV_GUEST + ret = __mshv_pull_proxy_irr(run, snp_this_savic_page()); +#endif + } + return ret; } @@ -1922,10 +2235,18 @@ static int mshv_vtl_ioctl_return_to_lower_vtl(void) } if (hv_isolation_type_tdx()) { +#ifdef CONFIG_INTEL_TDX_GUEST if (mshv_tdx_try_handle_exit(mshv_vtl_this_run())) continue; /* Exit handled entirely in kernel */ else goto done; +#endif + } else if (hv_isolation_type_snp()) { +#ifdef CONFIG_SEV_GUEST + if (mshv_snp_try_handle_intercept(mshv_vtl_this_run()) && + mshv_snp_try_handle_exit(mshv_vtl_this_run())) + continue; /* Exit handled entirely in kernel */ +#endif } hvp = hv_vp_assist_page[smp_processor_id()]; @@ -2734,7 +3055,9 @@ static void guest_vsm_vmsa_pfn_this_cpu(void *arg) cpu = get_cpu(); vmsa_guest_vsm_page = *this_cpu_ptr(&mshv_vtl_per_cpu.vmsa_guest_vsm_page); if (!vmsa_guest_vsm_page) { - if (mshv_configure_vmsa_page(1, per_cpu_ptr(&mshv_vtl_per_cpu.vmsa_guest_vsm_page, cpu))) + if (mshv_snp_configure_vmsa_page( + 1, + per_cpu_ptr(&mshv_vtl_per_cpu.vmsa_guest_vsm_page, cpu))) *pfn = -ENOMEM; else vmsa_guest_vsm_page = *this_cpu_ptr(&mshv_vtl_per_cpu.vmsa_guest_vsm_page); @@ -2765,6 +3088,38 @@ static long mshv_vtl_ioctl_guest_vsm_vmsa_pfn(void __user *user_arg) return ret; } + +static void secure_avic_vtl0_this_cpu(void *arg) +{ + struct page *snp_secure_avic_page; + u64 *pfn = arg; + + snp_secure_avic_page = *this_cpu_ptr(&mshv_vtl_per_cpu.snp_secure_avic_page); + + *pfn = snp_secure_avic_page ? page_to_pfn(snp_secure_avic_page) : -EOPNOTSUPP; +} + +static long mshv_vtl_ioctl_secure_avic_vtl0_pfn(void __user *user_arg) +{ + u64 pfn; + u32 cpu_id; + long ret; + + ret = copy_from_user(&cpu_id, user_arg, sizeof(cpu_id)) ? -EFAULT : 0; + if (ret) + return ret; + + ret = smp_call_function_single(cpu_id, secure_avic_vtl0_this_cpu, &pfn, true); + if (ret) + return ret; + ret = (long)pfn; + if (ret < 0) + return ret; + + ret = copy_to_user(user_arg, &pfn, sizeof(pfn)) ? -EFAULT : 0; + + return ret; +} #endif static long @@ -2830,6 +3185,9 @@ mshv_vtl_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) case MSHV_VTL_GUEST_VSM_VMSA_PFN: ret = mshv_vtl_ioctl_guest_vsm_vmsa_pfn((void __user *)arg); break; + case MSHV_VTL_SECURE_AVIC_VTL0_PFN: + ret = mshv_vtl_ioctl_secure_avic_vtl0_pfn((void __user *)arg); + break; #endif default: @@ -2842,7 +3200,7 @@ mshv_vtl_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) static vm_fault_t mshv_vtl_fault(struct vm_fault *vmf) { - struct page *page; + struct page *page = NULL; int cpu = vmf->pgoff & MSHV_PG_OFF_CPU_MASK; int real_off = vmf->pgoff >> MSHV_REAL_OFF_SHIFT; @@ -2870,26 +3228,20 @@ static vm_fault_t mshv_vtl_fault(struct vm_fault *vmf) return VM_FAULT_SIGBUS; page_ptr_ptr = per_cpu_ptr(&mshv_vtl_per_cpu.vmsa_guest_vsm_page, cpu); if (!*page_ptr_ptr) { - if (mshv_configure_vmsa_page(1, page_ptr_ptr) < 0) + if (mshv_snp_configure_vmsa_page(1, page_ptr_ptr) < 0) return VM_FAULT_SIGBUS; } page = *page_ptr_ptr; - } else if (real_off == MSHV_VMSA_PAGE_OFFSET) { - if (!hv_isolation_type_snp()) - return VM_FAULT_SIGBUS; - page = *per_cpu_ptr(&mshv_vtl_per_cpu.vmsa_page, cpu); -#ifdef CONFIG_INTEL_TDX_GUEST } else if (real_off == MSHV_APIC_PAGE_OFFSET) { - if (!hv_isolation_type_tdx()) - return VM_FAULT_SIGBUS; - - page = tdx_apic_page(cpu); -#endif + page = mshv_apic_page(cpu); #endif } else { return VM_FAULT_NOPAGE; } + if (!page) + return VM_FAULT_SIGBUS; + get_page(page); vmf->page = page; @@ -3479,9 +3831,11 @@ static int __init mshv_vtl_init(void) goto free_dev; } - ret = mshv_tdx_create_apicid_to_cpuid_mapping(dev); +#ifdef CONFIG_X86_64 + ret = mshv_create_apicid_to_cpuid_mapping(dev); if (ret) goto free_dev; +#endif mshv_vtl_return_call_init(mshv_vsm_page_offsets.vtl_return_offset); ret = hv_vtl_setup_synic(); @@ -3570,7 +3924,7 @@ static int __init mshv_vtl_init(void) static void __exit mshv_vtl_exit(void) { ms_hyperv_free_redirected_intr(); - mshv_tdx_free_apicid_to_cpuid_mapping(); + mshv_free_apicid_to_cpuid_mapping(); misc_deregister(&mshv_vtl_sint_dev); misc_deregister(&mshv_vtl_low); device_del(mem_dev); diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index 15a9c54a263c6..6c450d7aec48e 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -342,6 +342,7 @@ bool hv_is_isolation_supported(void); bool hv_isolation_type_snp(void); u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size); u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2); +void hv_enable_coco_interrupt(unsigned int cpu, unsigned int vector, bool set); void hyperv_cleanup(void); bool hv_query_ext_cap(u64 cap_query); void hv_setup_dma_ops(struct device *dev, bool coherent); diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h index ea7f5585fb7b7..e6a5158b7ccf8 100644 --- a/include/hyperv/hvgdk_mini.h +++ b/include/hyperv/hvgdk_mini.h @@ -1141,11 +1141,15 @@ enum hv_register_name { /* AMD SEV SNP configuration register */ HV_X64_REGISTER_SEV_CONTROL = 0x00090040, + HV_X64_REGISTER_SEV_AVIC_GPA = 0x00090043, #endif - HV_REGISTER_REG_PAGE = 0x0009001C, + HV_REGISTER_REG_PAGE = 0x0009001C, }; +#define HV_X64_REGISTER_VSM_VP_STATUS 0x000D0003 +#define HV_X64_VTL_MASK GENMASK(3, 0) + /* * Arch compatibility regs for use with hv_set/get_register */ @@ -1371,6 +1375,36 @@ struct hv_input_get_vp_registers { u32 names[]; } __packed; +union hv_x64_register_sev_gpa_page { + u64 u64; + struct { + u64 enabled:1; + u64 reserved:11; + u64 pagenumber:52; + }; +} __packed; + +struct hv_set_vp_registers_input { + struct { + u64 partitionid; + u32 vpindex; + u8 inputvtl; + u8 padding[3]; + } header; + struct { + u32 name; + u32 padding1; + u64 padding2; + union { + union hv_register_value value; + struct { + u64 valuelow; + u64 valuehigh; + }; + }; + } element[]; +} __packed; + struct hv_input_set_vp_registers { u64 partition_id; u32 vp_index; diff --git a/include/hyperv/hvhdk.h b/include/hyperv/hvhdk.h index 775b5846a92f6..ae8c96799c01b 100644 --- a/include/hyperv/hvhdk.h +++ b/include/hyperv/hvhdk.h @@ -742,6 +742,15 @@ union hv_x64_memory_access_info { } __packed; }; +union hv_x64_exception_info { + __u8 as_uint8; + struct { + __u8 error_code_valid:1; + __u8 software_exception:1; + __u8 reserved:6; + } __packed; +}; + struct hv_x64_memory_intercept_message { struct hv_x64_intercept_message_header header; u32 cache_type; /* enum hv_cache_type */ @@ -754,6 +763,35 @@ struct hv_x64_memory_intercept_message { u8 instruction_bytes[16]; } __packed; +struct hv_x64_exception_intercept_message { + struct hv_x64_intercept_message_header header; + __u16 exception_vector; + union hv_x64_exception_info exception_info; + __u8 instruction_byte_count; + __u32 error_code; + __u64 exception_parameter; + __u64 reserved; + __u8 instruction_bytes[16]; + struct hv_x64_segment_register ds_segment; + struct hv_x64_segment_register ss_segment; + __u64 rax; + __u64 rcx; + __u64 rdx; + __u64 rbx; + __u64 rsp; + __u64 rbp; + __u64 rsi; + __u64 rdi; + __u64 r8; + __u64 r9; + __u64 r10; + __u64 r11; + __u64 r12; + __u64 r13; + __u64 r14; + __u64 r15; +} __packed; + /* * Dispatch state for the VP communicated by the hypervisor to the * VP-dispatching thread in the root on return from HVCALL_DISPATCH_VP. diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h index 540d325d9b70c..6f34cacb4049a 100644 --- a/include/uapi/linux/mshv.h +++ b/include/uapi/linux/mshv.h @@ -460,6 +460,7 @@ struct mshv_partition_time { #define MSHV_VTL_RMPQUERY _IOW(MSHV_IOCTL, 0x35, struct mshv_rmpquery) #define MSHV_VTL_INVLPGB _IOW(MSHV_IOCTL, 0x36, struct mshv_invlpgb) #define MSHV_VTL_TLBSYNC _IO(MSHV_IOCTL, 0x37) +#define MSHV_VTL_SECURE_AVIC_VTL0_PFN _IOWR(MSHV_IOCTL, 0x40, __u64) /* VMBus device IOCTLs */ #define MSHV_SINT_SIGNAL_EVENT _IOW(MSHV_IOCTL, 0x22, struct mshv_vtl_signal_event)