diff --git a/openhcl/hcl/src/ioctl.rs b/openhcl/hcl/src/ioctl.rs index bb90fa4f74..29ec231b35 100644 --- a/openhcl/hcl/src/ioctl.rs +++ b/openhcl/hcl/src/ioctl.rs @@ -2113,6 +2113,7 @@ impl<'a, T: Backing<'a>> ProcessorRunner<'a, T> { | HvX64RegisterName::VsmVpWaitForTlbLock | HvX64RegisterName::VsmVpSecureConfigVtl0 | HvX64RegisterName::VsmVpSecureConfigVtl1 + | HvX64RegisterName::CrInterceptControl ) )); self.set_vp_registers_hvcall_inner(vtl, ®isters) diff --git a/openhcl/virt_mshv_vtl/src/lib.rs b/openhcl/virt_mshv_vtl/src/lib.rs index 3e3e6fb036..e5fb25fced 100644 --- a/openhcl/virt_mshv_vtl/src/lib.rs +++ b/openhcl/virt_mshv_vtl/src/lib.rs @@ -350,6 +350,17 @@ struct GuestVsmVpState { /// next exit to VTL 0. #[inspect(with = "|x| x.as_ref().map(inspect::AsDebug)")] vtl0_exit_pending_event: Option, + reg_intercept: SecureRegisterInterceptState, +} + +#[cfg(guest_arch = "x86_64")] +impl GuestVsmVpState { + fn new() -> Self { + GuestVsmVpState { + vtl0_exit_pending_event: None, + reg_intercept: Default::default(), + } + } } #[cfg(guest_arch = "x86_64")] @@ -386,7 +397,10 @@ impl UhCvmVpState { let apic_base = virt::vp::Apic::at_reset(&inner.caps, vp_info).apic_base; let lapics = VtlArray::from_fn(|vtl| { let apic_set = &cvm_partition.lapic[vtl]; - let mut lapic = apic_set.add_apic(vp_info); + + // The APIC is software-enabled after reset for secure VTLs, to + // maintain compatibility with released versions of secure kernel + let mut lapic = apic_set.add_apic(vp_info, vtl == Vtl::Vtl1); // Initialize APIC base to match the reset VM state. lapic.set_apic_base(apic_base).unwrap(); // Only the VTL 0 non-BSP LAPICs should be in the WaitForSipi state. @@ -410,6 +424,19 @@ impl UhCvmVpState { } } +#[cfg(guest_arch = "x86_64")] +#[derive(Inspect, Default)] +/// Configuration of VTL 1 registration for intercepts on certain registers +pub struct SecureRegisterInterceptState { + #[inspect(with = "|x| inspect::AsHex(u64::from(*x))")] + intercept_control: hvdef::HvRegisterCrInterceptControl, + cr0_mask: u64, + cr4_mask: u64, + // Writes to X86X_IA32_MSR_MISC_ENABLE are dropped, so this is only used so + // that get_vp_register returns the correct value from a set_vp_register + ia32_misc_enable_mask: u64, +} + #[derive(Inspect)] /// Partition-wide state for CVMs. struct UhCvmPartitionState { diff --git a/openhcl/virt_mshv_vtl/src/processor/hardware_cvm/mod.rs b/openhcl/virt_mshv_vtl/src/processor/hardware_cvm/mod.rs index cdadf68e70..b72ba0dfc7 100644 --- a/openhcl/virt_mshv_vtl/src/processor/hardware_cvm/mod.rs +++ b/openhcl/virt_mshv_vtl/src/processor/hardware_cvm/mod.rs @@ -129,6 +129,15 @@ impl UhHypercallHandler<'_, '_, T, B> { } Ok(()) } + HvX64RegisterName::CrInterceptControl + | HvX64RegisterName::CrInterceptCr0Mask + | HvX64RegisterName::CrInterceptCr4Mask + | HvX64RegisterName::CrInterceptIa32MiscEnableMask => { + if vtl != GuestVtl::Vtl1 { + return Err(HvError::AccessDenied); + } + Ok(()) + } _ => Ok(()), } } @@ -369,6 +378,30 @@ impl UhHypercallHandler<'_, '_, T, B> { .lapic .apic_base() .into()), + control_reg @ (HvX64RegisterName::CrInterceptControl + | HvX64RegisterName::CrInterceptCr0Mask + | HvX64RegisterName::CrInterceptCr4Mask + | HvX64RegisterName::CrInterceptIa32MiscEnableMask) => { + let vtl1 = self + .vp + .backing + .cvm_state_mut() + .vtl1 + .as_ref() + .ok_or(HvError::InvalidVtlState)?; + Ok(match control_reg { + HvX64RegisterName::CrInterceptControl => { + u64::from(vtl1.reg_intercept.intercept_control) + } + HvX64RegisterName::CrInterceptCr0Mask => vtl1.reg_intercept.cr0_mask, + HvX64RegisterName::CrInterceptCr4Mask => vtl1.reg_intercept.cr4_mask, + HvX64RegisterName::CrInterceptIa32MiscEnableMask => { + vtl1.reg_intercept.ia32_misc_enable_mask + } + _ => unreachable!(), + } + .into()) + } _ => { tracing::error!( ?name, @@ -412,6 +445,9 @@ impl UhHypercallHandler<'_, '_, T, B> { | HvX64RegisterName::SysenterEip | HvX64RegisterName::SysenterEsp | HvX64RegisterName::Sfmask) => { + // Checked that the intercepted vtl is strictly higher than the + // target VTL, so no need to check for registered intercepts. + let mut msrs = self .vp .access_state(vtl.into()) @@ -432,13 +468,16 @@ impl UhHypercallHandler<'_, '_, T, B> { .set_virtual_msrs(&msrs) .map_err(Self::reg_access_error_to_hv_err) } - HvX64RegisterName::TscAux => self - .vp - .access_state(vtl.into()) - .set_tsc_aux(&virt::vp::TscAux { - value: reg.value.as_u64(), - }) - .map_err(Self::reg_access_error_to_hv_err), + HvX64RegisterName::TscAux => { + // Checked that the intercepted vtl is strictly higher than the + // target VTL, so no need to check for registered intercepts. + self.vp + .access_state(vtl.into()) + .set_tsc_aux(&virt::vp::TscAux { + value: reg.value.as_u64(), + }) + .map_err(Self::reg_access_error_to_hv_err) + } debug_reg @ (HvX64RegisterName::Dr3 | HvX64RegisterName::Dr7) => { let mut debug_registers = self @@ -474,6 +513,8 @@ impl UhHypercallHandler<'_, '_, T, B> { | HvX64RegisterName::Rip | HvX64RegisterName::Rflags | HvX64RegisterName::Rsp) => { + // Checked that the intercepted vtl is strictly higher than the + // target VTL, so no need to check for registered intercepts. let mut registers = self .vp .access_state(vtl.into()) @@ -554,6 +595,39 @@ impl UhHypercallHandler<'_, '_, T, B> { self.set_vtl0_pending_event(HvX64PendingExceptionEvent::from(reg.value.as_u128())) } + HvX64RegisterName::CrInterceptControl => { + if vtl != GuestVtl::Vtl1 { + return Err(HvError::AccessDenied); + } + + self.set_vtl1_cr_intercept_control(hvdef::HvRegisterCrInterceptControl::from( + reg.value.as_u64(), + )) + } + mask_reg @ (HvX64RegisterName::CrInterceptCr0Mask + | HvX64RegisterName::CrInterceptCr4Mask + | HvX64RegisterName::CrInterceptIa32MiscEnableMask) => { + let vtl1 = self + .vp + .backing + .cvm_state_mut() + .vtl1 + .as_mut() + .ok_or(HvError::InvalidVtlState)?; + match mask_reg { + HvX64RegisterName::CrInterceptCr0Mask => { + vtl1.reg_intercept.cr0_mask = reg.value.as_u64(); + } + HvX64RegisterName::CrInterceptCr4Mask => { + vtl1.reg_intercept.cr4_mask = reg.value.as_u64(); + } + HvX64RegisterName::CrInterceptIa32MiscEnableMask => { + vtl1.reg_intercept.ia32_misc_enable_mask = reg.value.as_u64(); + } + _ => unreachable!(), + } + Ok(()) + } _ => { tracing::error!( ?reg, @@ -639,6 +713,54 @@ impl UhHypercallHandler<'_, '_, T, B> { Ok(()) } + + fn set_vtl1_cr_intercept_control( + &mut self, + intercept_control: hvdef::HvRegisterCrInterceptControl, + ) -> HvResult<()> { + let supported_controls = hvdef::HvRegisterCrInterceptControl::new() + .with_cr0_write(true) + .with_cr4_write(true) + .with_xcr0_write(true) + .with_ia32_misc_enable_write(true) + .with_msr_lstar_write(true) + .with_msr_star_write(true) + .with_msr_cstar_write(true) + .with_apic_base_msr_write(true) + .with_msr_efer_write(true) + .with_gdtr_write(true) + .with_idtr_write(true) + .with_ldtr_write(true) + .with_tr_write(true) + .with_msr_sysenter_cs_write(true) + .with_msr_sysenter_eip_write(true) + .with_msr_sysenter_esp_write(true) + .with_msr_sfmask_write(true) + .with_msr_tsc_aux_write(true) + .with_msr_xss_write(true) + .with_msr_scet_write(true) + .with_msr_pls_ssp_write(true) + .with_msr_interrupt_ssp_table_addr_write(true); + + if u64::from(intercept_control) & !u64::from(supported_controls) != 0 { + return Err(HvError::InvalidRegisterValue); + } + + // TODO TDX GUEST VSM + if let virt::IsolationType::Snp = self.vp.partition.isolation { + B::cr_intercept_registration(self.vp, intercept_control); + } + + self.vp + .backing + .cvm_state_mut() + .vtl1 + .as_mut() + .unwrap() + .reg_intercept + .intercept_control = intercept_control; + Ok(()) + } } impl hv1_hypercall::ModifySparseGpaPageHostVisibility @@ -1431,7 +1553,7 @@ impl UhProcessor<'_, B> { // may cause the partition to be constructed improperly. match CpuidFunction(leaf) { CpuidFunction::VersionAndFeatures => { - let cr4 = B::cr4_for_cpuid(self, vtl); + let cr4 = B::cr4(self, vtl); ecx = cpuid::VersionAndFeaturesEcx::from(ecx) .with_os_xsave(cr4 & x86defs::X64_CR4_OSXSAVE != 0) .into(); @@ -1665,9 +1787,7 @@ impl UhProcessor<'_, B> { if vtl == GuestVtl::Vtl1 { assert!(*self.cvm_vp_inner().vtl1_enable_called.lock()); if let InitialVpContextOperation::EnableVpVtl = start_enable_vtl_state.operation { - self.backing.cvm_state_mut().vtl1 = Some(crate::GuestVsmVpState { - vtl0_exit_pending_event: None, - }); + self.backing.cvm_state_mut().vtl1 = Some(crate::GuestVsmVpState::new()); } } @@ -1678,6 +1798,58 @@ impl UhProcessor<'_, B> { "setting up vp with initial registers" ); + if vtl == GuestVtl::Vtl0 { + let is_protected_register = |reg, value| -> Result<(), UhRunVpError> { + if self.cvm_is_protected_register_write(vtl, reg, value) { + // In this case, it doesn't matter what VTL the calling + // VP was in, just fail the startup. No need to send an + // intercept message. + return Err(UhRunVpError::StateAccessDenied); + } + Ok(()) + }; + + let hvdef::hypercall::InitialVpContextX64 { + rip: _, + rsp: _, + rflags: _, + cs: _, + ds: _, + es: _, + fs: _, + gs: _, + ss: _, + tr, + ldtr, + idtr, + gdtr, + efer: _, + cr0, + cr3: _, + cr4, + msr_cr_pat: _, + } = start_enable_vtl_state.context; + + is_protected_register(HvX64RegisterName::Cr0, cr0)?; + is_protected_register(HvX64RegisterName::Cr4, cr4)?; + is_protected_register( + HvX64RegisterName::Gdtr, + hvdef::HvRegisterValue::from(gdtr).as_u64(), + )?; + is_protected_register( + HvX64RegisterName::Idtr, + hvdef::HvRegisterValue::from(idtr).as_u64(), + )?; + is_protected_register( + HvX64RegisterName::Ldtr, + hvdef::HvRegisterValue::from(ldtr).as_u64(), + )?; + is_protected_register( + HvX64RegisterName::Tr, + hvdef::HvRegisterValue::from(tr).as_u64(), + )?; + } + hv1_emulator::hypercall::set_x86_vp_context( &mut self.access_state(vtl.into()), &(start_enable_vtl_state.context), @@ -1820,6 +1992,137 @@ impl UhProcessor<'_, B> { self.backing.cvm_state().vtl1.is_some() } + /// Returns whether a higher VTL has registered for write intercepts on the + /// register. + pub(crate) fn cvm_is_protected_register_write( + &self, + vtl: GuestVtl, + reg: HvX64RegisterName, + value: u64, + ) -> bool { + if vtl == GuestVtl::Vtl0 && self.backing.cvm_state().vtl1.is_some() { + let configured_intercepts = &self + .backing + .cvm_state() + .vtl1 + .as_ref() + .unwrap() + .reg_intercept; + let intercept_control = configured_intercepts.intercept_control; + return match reg { + HvX64RegisterName::Cr0 => { + if intercept_control.cr0_write() { + true + } else { + (B::cr0(self, vtl) ^ value) & configured_intercepts.cr0_mask != 0 + } + } + HvX64RegisterName::Cr4 => { + if intercept_control.cr4_write() { + true + } else { + (B::cr4(self, vtl) ^ value) & configured_intercepts.cr4_mask != 0 + } + } + HvX64RegisterName::Xfem => intercept_control.xcr0_write(), + HvX64RegisterName::Gdtr => intercept_control.gdtr_write(), + HvX64RegisterName::Idtr => intercept_control.idtr_write(), + HvX64RegisterName::Ldtr => intercept_control.ldtr_write(), + HvX64RegisterName::Tr => intercept_control.tr_write(), + _ => unreachable!("unexpected secure register"), + }; + } + false + } + + /// Checks if a higher VTL registered for write intercepts on the register, + /// and sends the intercept as required. + pub(crate) fn cvm_protect_secure_register_write( + &mut self, + vtl: GuestVtl, + reg: HvX64RegisterName, + value: u64, + ) -> bool { + let send_intercept = self.cvm_is_protected_register_write(vtl, reg, value); + if send_intercept { + let message_state = B::intercept_message_state(self, vtl); + + tracing::debug!( + ?reg, + ?value, + "sending intercept to vtl 1 for secure register write" + ); + + self.inner.post_message( + GuestVtl::Vtl1, + hvdef::HV_SYNIC_INTERCEPTION_SINT_INDEX, + &crate::processor::InterceptMessageType::Register { reg, value } + .generate_hv_message(self.vp_index(), vtl, message_state), + ); + } + + send_intercept + } + + /// Checks if a higher VTL registered for write intercepts on the MSR, and + /// sends the intercept as required. + pub(crate) fn cvm_protect_msr_write(&self, vtl: GuestVtl, msr: u32) -> bool { + if vtl == GuestVtl::Vtl0 && self.backing.cvm_state().vtl1.is_some() { + let configured_intercepts = self + .backing + .cvm_state() + .vtl1 + .as_ref() + .unwrap() + .reg_intercept + .intercept_control; + + // Note: writes to X86X_IA32_MSR_MISC_ENABLE are dropped, so don't + // need to check the mask. + + let generate_intercept = match msr { + x86defs::X86X_MSR_LSTAR => configured_intercepts.msr_lstar_write(), + x86defs::X86X_MSR_STAR => configured_intercepts.msr_star_write(), + x86defs::X86X_MSR_CSTAR => configured_intercepts.msr_cstar_write(), + x86defs::X86X_MSR_APIC_BASE => configured_intercepts.apic_base_msr_write(), + x86defs::X86X_MSR_EFER => configured_intercepts.msr_efer_write(), + x86defs::X86X_MSR_SYSENTER_CS => configured_intercepts.msr_sysenter_cs_write(), + x86defs::X86X_MSR_SYSENTER_EIP => configured_intercepts.msr_sysenter_eip_write(), + x86defs::X86X_MSR_SYSENTER_ESP => configured_intercepts.msr_sysenter_esp_write(), + x86defs::X86X_MSR_SFMASK => configured_intercepts.msr_sfmask_write(), + x86defs::X86X_MSR_TSC_AUX => configured_intercepts.msr_tsc_aux_write(), + x86defs::X86X_MSR_XSS => configured_intercepts.msr_xss_write(), + x86defs::X86X_MSR_S_CET => configured_intercepts.msr_scet_write(), + x86defs::X86X_MSR_PL0_SSP + | x86defs::X86X_MSR_PL1_SSP + | x86defs::X86X_MSR_PL2_SSP => configured_intercepts.msr_pls_ssp_write(), + x86defs::X86X_MSR_INTERRUPT_SSP_TABLE_ADDR => { + configured_intercepts.msr_interrupt_ssp_table_addr_write() + } + _ => false, + }; + + if generate_intercept { + let message_state = B::intercept_message_state(self, vtl); + + tracing::debug!(?msr, "sending intercept to vtl 1 for secure msr write"); + + self.inner.post_message( + GuestVtl::Vtl1, + hvdef::HV_SYNIC_INTERCEPTION_SINT_INDEX, + &crate::processor::InterceptMessageType::Msr { msr }.generate_hv_message( + self.vp_index(), + vtl, + message_state, + ), + ); + + return true; + } + } + false + } + fn get_vsm_vp_secure_config_vtl( &mut self, requesting_vtl: GuestVtl, diff --git a/openhcl/virt_mshv_vtl/src/processor/mod.rs b/openhcl/virt_mshv_vtl/src/processor/mod.rs index 970d9da4bf..c986a7252c 100644 --- a/openhcl/virt_mshv_vtl/src/processor/mod.rs +++ b/openhcl/virt_mshv_vtl/src/processor/mod.rs @@ -24,6 +24,8 @@ cfg_if::cfg_if! { use virt_support_apic::LocalApic; use virt_support_x86emu::translate::TranslationRegisters; use virt::vp::AccessVpState; + use zerocopy::IntoBytes; + use hvdef::HvRegisterCrInterceptControl; } else if #[cfg(guest_arch = "aarch64")] { use hv1_hypercall::Arm64RegisterState; use hvdef::HvArm64RegisterName; @@ -281,6 +283,89 @@ pub(crate) struct BackingSharedParams<'a> { pub _phantom: PhantomData<&'a ()>, } +/// Supported intercept message types. +#[cfg_attr(guest_arch = "aarch64", expect(dead_code))] +enum InterceptMessageType { + #[cfg(guest_arch = "x86_64")] + Register { + reg: HvX64RegisterName, + value: u64, + }, + Msr { + msr: u32, + }, +} + +/// Per-arch state required to generate an intercept message. +#[cfg_attr(guest_arch = "aarch64", expect(dead_code))] +struct InterceptMessageState { + instruction_length_and_cr8: u8, + cpl: u8, + efer_lma: bool, + cs_segment: hvdef::HvX64SegmentRegister, + rip: u64, + rflags: u64, + rax: u64, + rdx: u64, +} + +impl InterceptMessageType { + #[cfg(guest_arch = "x86_64")] + fn generate_hv_message( + &self, + vp_index: VpIndex, + vtl: GuestVtl, + state: InterceptMessageState, + ) -> HvMessage { + let write_header = hvdef::HvX64InterceptMessageHeader { + vp_index: vp_index.index(), + instruction_length_and_cr8: state.instruction_length_and_cr8, + intercept_access_type: hvdef::HvInterceptAccessType::WRITE, + execution_state: hvdef::HvX64VpExecutionState::new() + .with_cpl(state.cpl) + .with_vtl(vtl.into()) + .with_efer_lma(state.efer_lma), + cs_segment: state.cs_segment, + rip: state.rip, + rflags: state.rflags, + }; + match self { + InterceptMessageType::Register { reg, value } => { + let intercept_message = hvdef::HvX64RegisterInterceptMessage { + header: write_header, + flags: hvdef::HvX64RegisterInterceptMessageFlags::new(), + rsvd: 0, + rsvd2: 0, + register_name: *reg, + access_info: hvdef::HvX64RegisterAccessInfo::new_source_value( + hvdef::HvRegisterValue::from(*value), + ), + }; + HvMessage::new( + hvdef::HvMessageType::HvMessageTypeRegisterIntercept, + 0, + intercept_message.as_bytes(), + ) + } + InterceptMessageType::Msr { msr } => { + let intercept_message = hvdef::HvX64MsrInterceptMessage { + header: write_header, + msr_number: *msr, + rax: state.rax, + rdx: state.rdx, + reserved: 0, + }; + + HvMessage::new( + hvdef::HvMessageType::HvMessageTypeMsrIntercept, + 0, + intercept_message.as_bytes(), + ) + } + } + } +} + /// Trait for processor backings that have hardware isolation support. #[cfg(guest_arch = "x86_64")] trait HardwareIsolatedBacking: Backing { @@ -318,9 +403,21 @@ trait HardwareIsolatedBacking: Backing { vtl: GuestVtl, event: hvdef::HvX64PendingExceptionEvent, ); - /// Individual register for CPUID, since AccessVpState::registers is - /// relatively slow on TDX. - fn cr4_for_cpuid(this: &mut UhProcessor<'_, Self>, vtl: GuestVtl) -> u64; + + fn intercept_message_state( + this: &UhProcessor<'_, Self>, + vtl: GuestVtl, + ) -> InterceptMessageState; + + /// Individual register for CPUID and crx intercept handling, since + /// AccessVpState::registers is relatively slow on TDX. + fn cr0(this: &UhProcessor<'_, Self>, vtl: GuestVtl) -> u64; + fn cr4(this: &UhProcessor<'_, Self>, vtl: GuestVtl) -> u64; + + fn cr_intercept_registration( + this: &mut UhProcessor<'_, Self>, + intercept_control: HvRegisterCrInterceptControl, + ); } #[cfg_attr(guest_arch = "aarch64", expect(dead_code))] @@ -457,6 +554,8 @@ pub enum UhRunVpError { /// Handling an intercept on behalf of an invalid Lower VTL #[error("invalid intercepted vtl {0:?}")] InvalidInterceptedVtl(u8), + #[error("access to state blocked by another vtl")] + StateAccessDenied, } /// Underhill processor run error diff --git a/openhcl/virt_mshv_vtl/src/processor/snp/mod.rs b/openhcl/virt_mshv_vtl/src/processor/snp/mod.rs index b268ce9d37..bd9a4325b9 100644 --- a/openhcl/virt_mshv_vtl/src/processor/snp/mod.rs +++ b/openhcl/virt_mshv_vtl/src/processor/snp/mod.rs @@ -7,6 +7,7 @@ use super::BackingParams; use super::BackingPrivate; use super::BackingSharedParams; use super::HardwareIsolatedBacking; +use super::InterceptMessageState; use super::UhEmulationState; use super::UhRunVpError; use super::hardware_cvm; @@ -114,6 +115,7 @@ struct ExitStats { vmmcall: Counter, xsetbv: Counter, excp_db: Counter, + secure_reg_write: Counter, } enum UhDirectOverlay { @@ -259,9 +261,50 @@ impl HardwareIsolatedBacking for SnpBacked { this.runner.vmsa_mut(vtl).set_event_inject(inject_info); } - fn cr4_for_cpuid(this: &mut UhProcessor<'_, Self>, vtl: GuestVtl) -> u64 { + fn cr0(this: &UhProcessor<'_, Self>, vtl: GuestVtl) -> u64 { + this.runner.vmsa(vtl).cr0() + } + + fn cr4(this: &UhProcessor<'_, Self>, vtl: GuestVtl) -> u64 { this.runner.vmsa(vtl).cr4() } + + fn intercept_message_state( + this: &UhProcessor<'_, Self>, + vtl: GuestVtl, + ) -> InterceptMessageState { + let vmsa = this.runner.vmsa(vtl); + + InterceptMessageState { + instruction_length_and_cr8: (vmsa.next_rip() - vmsa.rip()) as u8, + cpl: vmsa.cpl(), + efer_lma: vmsa.efer() & x86defs::X64_EFER_LMA != 0, + cs_segment: virt_seg_from_snp(vmsa.cs()).into(), + rip: vmsa.rip(), + rflags: vmsa.rflags(), + rax: vmsa.rax(), + rdx: vmsa.rdx(), + } + } + + fn cr_intercept_registration( + this: &mut UhProcessor<'_, Self>, + intercept_control: hvdef::HvRegisterCrInterceptControl, + ) { + // Intercept control is always managed by the hypervisor, so any request + // here is only opportunistic. Make the request directly with the + // hypervisor. Since intercept control always applies to VTL 1 control of + // VTL 0 state, the VTL 1 intercept control register is set here. + this.runner + .set_vp_registers_hvcall( + Vtl::Vtl1, + [( + HvX64RegisterName::CrInterceptControl, + u64::from(intercept_control), + )], + ) + .expect("setting intercept control succeeds"); + } } /// Partition-wide shared data for SNP VPs. @@ -929,6 +972,193 @@ impl UhProcessor<'_, SnpBacked> { Ok(()) } + fn handle_msr_access( + &mut self, + dev: &impl CpuIo, + entered_from_vtl: GuestVtl, + msr: u32, + is_write: bool, + ) { + if is_write && self.cvm_protect_msr_write(entered_from_vtl, msr) { + // An intercept message has been posted, no further processing is + // required. Return without advancing instruction pointer, it must + // continue to point to the instruction that generated the + // intercept. + return; + } + + let vmsa = self.runner.vmsa_mut(entered_from_vtl); + let gp = if is_write { + let value = (vmsa.rax() as u32 as u64) | ((vmsa.rdx() as u32 as u64) << 32); + + let r = self.backing.cvm.lapics[entered_from_vtl] + .lapic + .access(&mut SnpApicClient { + partition: self.partition, + vmsa, + dev, + vmtime: &self.vmtime, + vtl: entered_from_vtl, + }) + .msr_write(msr, value) + .or_else_if_unknown(|| self.write_msr_cvm(msr, value, entered_from_vtl)) + .or_else_if_unknown(|| self.write_msr(msr, value, entered_from_vtl)) + .or_else_if_unknown(|| self.write_msr_snp(dev, msr, value, entered_from_vtl)); + + match r { + Ok(()) => false, + Err(MsrError::Unknown) => { + tracing::debug!(msr, value, "unknown cvm msr write"); + false + } + Err(MsrError::InvalidAccess) => true, + } + } else { + let r = self.backing.cvm.lapics[entered_from_vtl] + .lapic + .access(&mut SnpApicClient { + partition: self.partition, + vmsa, + dev, + vmtime: &self.vmtime, + vtl: entered_from_vtl, + }) + .msr_read(msr) + .or_else_if_unknown(|| self.read_msr(msr, entered_from_vtl)) + .or_else_if_unknown(|| self.read_msr_cvm(dev, msr, entered_from_vtl)) + .or_else_if_unknown(|| match msr { + hvdef::HV_X64_MSR_GUEST_IDLE => { + self.backing.cvm.lapics[entered_from_vtl].activity = MpState::Idle; + let mut vmsa = self.runner.vmsa_mut(entered_from_vtl); + vmsa.v_intr_cntrl_mut().set_intr_shadow(false); + Ok(0) + } + _ => Err(MsrError::Unknown), + }); + + let value = match r { + Ok(v) => Some(v), + Err(MsrError::Unknown) => { + tracing::debug!(msr, "unknown cvm msr read"); + Some(0) + } + Err(MsrError::InvalidAccess) => None, + }; + + if let Some(value) = value { + let mut vmsa = self.runner.vmsa_mut(entered_from_vtl); + vmsa.set_rax((value as u32).into()); + vmsa.set_rdx(((value >> 32) as u32).into()); + false + } else { + true + } + }; + + let mut vmsa = self.runner.vmsa_mut(entered_from_vtl); + if gp { + vmsa.set_event_inject( + SevEventInjectInfo::new() + .with_interruption_type(x86defs::snp::SEV_INTR_TYPE_EXCEPT) + .with_vector(x86defs::Exception::GENERAL_PROTECTION_FAULT.0) + .with_deliver_error_code(true) + .with_valid(true), + ); + } else { + advance_to_next_instruction(&mut vmsa); + } + } + + fn handle_xsetbv(&mut self, entered_from_vtl: GuestVtl) { + let vmsa = self.runner.vmsa(entered_from_vtl); + if let Some(value) = hardware_cvm::validate_xsetbv_exit(hardware_cvm::XsetbvExitInput { + rax: vmsa.rax(), + rcx: vmsa.rcx(), + rdx: vmsa.rdx(), + cr4: vmsa.cr4(), + cpl: vmsa.cpl(), + }) { + if self.cvm_protect_secure_register_write( + entered_from_vtl, + HvX64RegisterName::Xfem, + value, + ) { + // Once the intercept message has been posted, no further + // processing is required. Do not advance the instruction + // pointer here, since the instruction pointer must continue to + // point to the instruction that generated the intercept. + } else { + let mut vmsa = self.runner.vmsa_mut(entered_from_vtl); + vmsa.set_xcr0(value); + advance_to_next_instruction(&mut vmsa); + } + } else { + let mut vmsa = self.runner.vmsa_mut(entered_from_vtl); + vmsa.set_event_inject( + SevEventInjectInfo::new() + .with_interruption_type(x86defs::snp::SEV_INTR_TYPE_EXCEPT) + .with_vector(x86defs::Exception::GENERAL_PROTECTION_FAULT.0) + .with_deliver_error_code(true) + .with_valid(true), + ); + } + } + + fn handle_crx_intercept(&mut self, entered_from_vtl: GuestVtl, reg: HvX64RegisterName) { + let vmsa = self.runner.vmsa(entered_from_vtl); + let mov_crx_drx = x86defs::snp::MovCrxDrxInfo::from(vmsa.exit_info1()); + let reg_value = { + let gpr_name = + HvX64RegisterName(HvX64RegisterName::Rax.0 + mov_crx_drx.gpr_number() as u32); + + match gpr_name { + HvX64RegisterName::Rax => vmsa.rax(), + HvX64RegisterName::Rbx => vmsa.rbx(), + HvX64RegisterName::Rcx => vmsa.rcx(), + HvX64RegisterName::Rdx => vmsa.rdx(), + HvX64RegisterName::Rsp => vmsa.rsp(), + HvX64RegisterName::Rbp => vmsa.rbp(), + HvX64RegisterName::Rsi => vmsa.rsi(), + HvX64RegisterName::Rdi => vmsa.rdi(), + HvX64RegisterName::R8 => vmsa.r8(), + HvX64RegisterName::R9 => vmsa.r9(), + HvX64RegisterName::R10 => vmsa.r10(), + HvX64RegisterName::R11 => vmsa.r11(), + HvX64RegisterName::R12 => vmsa.r12(), + HvX64RegisterName::R13 => vmsa.r13(), + HvX64RegisterName::R14 => vmsa.r14(), + HvX64RegisterName::R15 => vmsa.r15(), + _ => unreachable!("unexpected register"), + } + }; + + // Special case: LMSW/CLTS/SMSW intercepts do not provide decode assist + // information. No support to emulate these instructions yet, but the + // access by the guest might be allowed by the higher VTL and therefore + // crashing is not necessarily the correct behavior. + // + // TODO SNP: consider emulating the instruction. + if !mov_crx_drx.mov_crx() { + tracelimit::warn_ratelimited!("Intercepted crx access, instruction is not mov crx"); + return; + } + + if self.cvm_protect_secure_register_write(entered_from_vtl, reg, reg_value) { + // Once the intercept message has been posted, no further + // processing is required. Do not advance the instruction + // pointer here, since the instruction pointer must continue to + // point to the instruction that generated the intercept. + } else { + let mut vmsa = self.runner.vmsa_mut(entered_from_vtl); + match reg { + HvX64RegisterName::Cr0 => vmsa.set_cr0(reg_value), + HvX64RegisterName::Cr4 => vmsa.set_cr4(reg_value), + _ => unreachable!(), + } + advance_to_next_instruction(&mut vmsa); + } + } + #[must_use] fn sync_lazy_eoi(&mut self, vtl: GuestVtl) -> bool { if self.backing.cvm.lapics[vtl].lapic.is_lazy_eoi_pending() { @@ -1046,86 +1276,7 @@ impl UhProcessor<'_, SnpBacked> { let is_write = vmsa.exit_info1() & 1 != 0; let msr = vmsa.rcx() as u32; - let gp = if is_write { - let value = (vmsa.rax() as u32 as u64) | ((vmsa.rdx() as u32 as u64) << 32); - let r = self.backing.cvm.lapics[entered_from_vtl] - .lapic - .access(&mut SnpApicClient { - partition: self.partition, - vmsa, - dev, - vmtime: &self.vmtime, - vtl: entered_from_vtl, - }) - .msr_write(msr, value) - .or_else_if_unknown(|| self.write_msr_cvm(msr, value, entered_from_vtl)) - .or_else_if_unknown(|| self.write_msr(msr, value, entered_from_vtl)) - .or_else_if_unknown(|| { - self.write_msr_snp(dev, msr, value, entered_from_vtl) - }); - - match r { - Ok(()) => false, - Err(MsrError::Unknown) => { - tracing::debug!(msr, value, "unknown cvm msr write"); - false - } - Err(MsrError::InvalidAccess) => true, - } - } else { - let r = self.backing.cvm.lapics[entered_from_vtl] - .lapic - .access(&mut SnpApicClient { - partition: self.partition, - vmsa, - dev, - vmtime: &self.vmtime, - vtl: entered_from_vtl, - }) - .msr_read(msr) - .or_else_if_unknown(|| self.read_msr(msr, entered_from_vtl)) - .or_else_if_unknown(|| self.read_msr_cvm(dev, msr, entered_from_vtl)) - .or_else_if_unknown(|| match msr { - hvdef::HV_X64_MSR_GUEST_IDLE => { - self.backing.cvm.lapics[entered_from_vtl].activity = MpState::Idle; - let mut vmsa = self.runner.vmsa_mut(entered_from_vtl); - vmsa.v_intr_cntrl_mut().set_intr_shadow(false); - Ok(0) - } - _ => Err(MsrError::Unknown), - }); - - let value = match r { - Ok(v) => Some(v), - Err(MsrError::Unknown) => { - tracing::debug!(msr, "unknown cvm msr read"); - Some(0) - } - Err(MsrError::InvalidAccess) => None, - }; - - if let Some(value) = value { - let mut vmsa = self.runner.vmsa_mut(entered_from_vtl); - vmsa.set_rax((value as u32).into()); - vmsa.set_rdx(((value >> 32) as u32).into()); - false - } else { - true - } - }; - - let mut vmsa = self.runner.vmsa_mut(entered_from_vtl); - if gp { - vmsa.set_event_inject( - SevEventInjectInfo::new() - .with_interruption_type(x86defs::snp::SEV_INTR_TYPE_EXCEPT) - .with_vector(x86defs::Exception::GENERAL_PROTECTION_FAULT.0) - .with_deliver_error_code(true) - .with_valid(true), - ); - } else { - advance_to_next_instruction(&mut vmsa); - } + self.handle_msr_access(dev, entered_from_vtl, msr, is_write); if is_write { &mut self.backing.exit_stats[entered_from_vtl].msr_write @@ -1324,31 +1475,46 @@ impl UhProcessor<'_, SnpBacked> { } SevExitCode::XSETBV => { - if let Some(value) = - hardware_cvm::validate_xsetbv_exit(hardware_cvm::XsetbvExitInput { - rax: vmsa.rax(), - rcx: vmsa.rcx(), - rdx: vmsa.rdx(), - cr4: vmsa.cr4(), - cpl: vmsa.cpl(), - }) - { - vmsa.set_xcr0(value); - advance_to_next_instruction(&mut vmsa); - } else { - vmsa.set_event_inject( - SevEventInjectInfo::new() - .with_interruption_type(x86defs::snp::SEV_INTR_TYPE_EXCEPT) - .with_vector(x86defs::Exception::GENERAL_PROTECTION_FAULT.0) - .with_deliver_error_code(true) - .with_valid(true), - ); - } + self.handle_xsetbv(entered_from_vtl); &mut self.backing.exit_stats[entered_from_vtl].xsetbv } SevExitCode::EXCP_DB => &mut self.backing.exit_stats[entered_from_vtl].excp_db, + SevExitCode::CR0_WRITE => { + self.handle_crx_intercept(entered_from_vtl, HvX64RegisterName::Cr0); + &mut self.backing.exit_stats[entered_from_vtl].secure_reg_write + } + SevExitCode::CR4_WRITE => { + self.handle_crx_intercept(entered_from_vtl, HvX64RegisterName::Cr4); + &mut self.backing.exit_stats[entered_from_vtl].secure_reg_write + } + + tr_exit_code @ (SevExitCode::GDTR_WRITE + | SevExitCode::IDTR_WRITE + | SevExitCode::LDTR_WRITE + | SevExitCode::TR_WRITE) => { + let reg = match tr_exit_code { + SevExitCode::GDTR_WRITE => HvX64RegisterName::Gdtr, + SevExitCode::IDTR_WRITE => HvX64RegisterName::Idtr, + SevExitCode::LDTR_WRITE => HvX64RegisterName::Ldtr, + SevExitCode::TR_WRITE => HvX64RegisterName::Tr, + _ => unreachable!(), + }; + + if !self.cvm_protect_secure_register_write(entered_from_vtl, reg, 0) { + // This is an unexpected intercept: should only have received an + // intercept for these registers if a VTL (i.e. VTL 1) requested + // it. If an unexpected intercept has been received, then the + // host must have enabled an intercept that was not desired. + // Since the intercept cannot correctly be emulated, this must + // be treated as a fatal error. + panic!("unexpected secure register"); + } + + &mut self.backing.exit_stats[entered_from_vtl].secure_reg_write + } + _ => { debug_assert!( false, diff --git a/openhcl/virt_mshv_vtl/src/processor/tdx/mod.rs b/openhcl/virt_mshv_vtl/src/processor/tdx/mod.rs index 32db5fa02d..bfba159a46 100644 --- a/openhcl/virt_mshv_vtl/src/processor/tdx/mod.rs +++ b/openhcl/virt_mshv_vtl/src/processor/tdx/mod.rs @@ -559,9 +559,27 @@ impl HardwareIsolatedBacking for TdxBacked { this.backing.vtls[vtl].exception_error_code = event.error_code(); } - fn cr4_for_cpuid(this: &mut UhProcessor<'_, Self>, vtl: GuestVtl) -> u64 { + fn cr0(this: &UhProcessor<'_, Self>, vtl: GuestVtl) -> u64 { + this.backing.vtls[vtl].cr0.read(&this.runner) + } + + fn cr4(this: &UhProcessor<'_, Self>, vtl: GuestVtl) -> u64 { this.backing.vtls[vtl].cr4.read(&this.runner) } + + fn intercept_message_state( + _this: &UhProcessor<'_, Self>, + _vtl: GuestVtl, + ) -> super::InterceptMessageState { + todo!() + } + + fn cr_intercept_registration( + _this: &mut UhProcessor<'_, Self>, + _intercept_control: hvdef::HvRegisterCrInterceptControl, + ) { + todo!() + } } /// Partition-wide shared data for TDX VPs. diff --git a/vm/hv1/hvdef/src/lib.rs b/vm/hv1/hvdef/src/lib.rs index aa661af116..f2bbeb9f22 100644 --- a/vm/hv1/hvdef/src/lib.rs +++ b/vm/hv1/hvdef/src/lib.rs @@ -755,6 +755,8 @@ impl Default for HvMessageType { } } +pub const HV_SYNIC_INTERCEPTION_SINT_INDEX: u8 = 0; + pub const NUM_SINTS: usize = 16; pub const NUM_TIMERS: usize = 4; @@ -2262,6 +2264,11 @@ registers! { // AMD SEV configuration MSRs SevControl = 0x00090040, + + CrInterceptControl = 0x000E0000, + CrInterceptCr0Mask = 0x000E0001, + CrInterceptCr4Mask = 0x000E0002, + CrInterceptIa32MiscEnableMask = 0x000E0003, } } @@ -2963,6 +2970,35 @@ open_enum! { } } +#[bitfield(u8)] +#[derive(IntoBytes, Immutable, FromBytes)] +pub struct HvX64RegisterInterceptMessageFlags { + pub is_memory_op: bool, + #[bits(7)] + _rsvd: u8, +} + +#[repr(C)] +#[derive(IntoBytes, Immutable, FromBytes)] +pub struct HvX64RegisterInterceptMessage { + pub header: HvX64InterceptMessageHeader, + pub flags: HvX64RegisterInterceptMessageFlags, + pub rsvd: u8, + pub rsvd2: u16, + pub register_name: HvX64RegisterName, + pub access_info: HvX64RegisterAccessInfo, +} + +#[repr(transparent)] +#[derive(IntoBytes, Immutable, FromBytes)] +pub struct HvX64RegisterAccessInfo(u128); + +impl HvX64RegisterAccessInfo { + pub fn new_source_value(source_value: HvRegisterValue) -> Self { + Self(source_value.as_u128()) + } +} + open_enum! { #[derive(IntoBytes, Immutable, KnownLayout, FromBytes)] pub enum HvInterruptType : u32 { @@ -3672,3 +3708,38 @@ pub struct HvRegisterVsmVpSecureVtlConfig { #[bits(60)] _reserved: u64, } + +#[bitfield(u64)] +pub struct HvRegisterCrInterceptControl { + pub cr0_write: bool, + pub cr4_write: bool, + pub xcr0_write: bool, + pub ia32_misc_enable_read: bool, + pub ia32_misc_enable_write: bool, + pub msr_lstar_read: bool, + pub msr_lstar_write: bool, + pub msr_star_read: bool, + pub msr_star_write: bool, + pub msr_cstar_read: bool, + pub msr_cstar_write: bool, + pub apic_base_msr_read: bool, + pub apic_base_msr_write: bool, + pub msr_efer_read: bool, + pub msr_efer_write: bool, + pub gdtr_write: bool, + pub idtr_write: bool, + pub ldtr_write: bool, + pub tr_write: bool, + pub msr_sysenter_cs_write: bool, + pub msr_sysenter_eip_write: bool, + pub msr_sysenter_esp_write: bool, + pub msr_sfmask_write: bool, + pub msr_tsc_aux_write: bool, + pub msr_sgx_launch_control_write: bool, + pub msr_xss_write: bool, + pub msr_scet_write: bool, + pub msr_pls_ssp_write: bool, + pub msr_interrupt_ssp_table_addr_write: bool, + #[bits(35)] + _rsvd_z: u64, +} diff --git a/vm/x86/x86defs/src/snp.rs b/vm/x86/x86defs/src/snp.rs index 4ae83ce8ac..f486624b9a 100644 --- a/vm/x86/x86defs/src/snp.rs +++ b/vm/x86/x86defs/src/snp.rs @@ -786,3 +786,12 @@ pub struct SevInvlpgbEcx { reserved: u64, pub large_page: bool, } + +#[bitfield(u64)] +pub struct MovCrxDrxInfo { + #[bits(4)] + pub gpr_number: u64, + #[bits(59)] + pub reserved: u64, + pub mov_crx: bool, +} diff --git a/vmm_core/virt_support_apic/src/lib.rs b/vmm_core/virt_support_apic/src/lib.rs index 050c9a0cab..83e5a82c0c 100644 --- a/vmm_core/virt_support_apic/src/lib.rs +++ b/vmm_core/virt_support_apic/src/lib.rs @@ -226,6 +226,7 @@ struct SharedState { )] auto_eoi: [AtomicU32; 8], work: AtomicU32, + software_enabled_on_reset: bool, } #[bitfield(u32)] @@ -336,13 +337,14 @@ impl LocalApicSet { } /// Adds an APIC for the specified VP to the set. - pub fn add_apic(&self, vp: &X86VpInfo) -> LocalApic { + pub fn add_apic(&self, vp: &X86VpInfo, software_enabled_on_reset: bool) -> LocalApic { let shared = Arc::new(SharedState { vp_index: vp.base.vp_index, tmr: Default::default(), new_irr: Default::default(), auto_eoi: Default::default(), work: 0.into(), + software_enabled_on_reset, }); { @@ -1713,7 +1715,7 @@ impl LocalApic { fn reset_registers(&mut self) { let Self { - shared: _, + shared, global: _, apic_base: _, base_address: _, @@ -1748,7 +1750,7 @@ impl LocalApic { *ldr = 0; *cluster_mode = false; - *svr = 0xff; + *svr = u32::from(Svr::from(0xff).with_enable(shared.software_enabled_on_reset)); isr.clear(); *esr = 0; *icr = 0; diff --git a/vmm_core/virt_whp/src/apic.rs b/vmm_core/virt_whp/src/apic.rs index 616fbe60c1..7da2776274 100644 --- a/vmm_core/virt_whp/src/apic.rs +++ b/vmm_core/virt_whp/src/apic.rs @@ -741,7 +741,7 @@ pub(crate) struct ApicState { impl ApicState { pub fn new(table: &LocalApicSet, vp_info: &vm_topology::processor::x86::X86VpInfo) -> Self { Self { - apic: table.add_apic(vp_info), + apic: table.add_apic(vp_info, false), startup_suspend: !vp_info.base.is_bsp(), nmi_pending: false, }