diff --git a/.github/workflows/pr_tests.yml b/.github/workflows/pr_tests.yml new file mode 100644 index 00000000000..5245e19d08d --- /dev/null +++ b/.github/workflows/pr_tests.yml @@ -0,0 +1,24 @@ +name: PR Tests + +on: + pull_request: + +env: + CARGO_TERM_COLOR: always + +jobs: + tests: + name: Tests + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Run tests with devtool + env: + PYTEST_ADDOPTS: "-n 2" + run: | + ./tools/devtool -y test -- integration_tests/functional/ diff --git a/.gitignore b/.gitignore index 155e4cbd8a8..f56db437d09 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,4 @@ test_results/* /resources/linux /resources/x86_64 /resources/aarch64 +.env \ No newline at end of file diff --git a/.tool-versions b/.tool-versions new file mode 100644 index 00000000000..3e828d8e972 --- /dev/null +++ b/.tool-versions @@ -0,0 +1,2 @@ +gcloud 534.0.0 +rust 1.85.0 diff --git a/Makefile b/Makefile new file mode 100644 index 00000000000..1fda2f26881 --- /dev/null +++ b/Makefile @@ -0,0 +1,12 @@ +-include .env + +.PHONY: build +build: + ./scripts/build.sh + +.PHONY: upload +upload: + ./scripts/upload.sh $(GCP_PROJECT_ID) + +.PHONY: build-and-upload +make build-and-upload: build upload diff --git a/resources/seccomp/aarch64-unknown-linux-musl.json b/resources/seccomp/aarch64-unknown-linux-musl.json index db3abe1eced..6a95eca603d 100644 --- a/resources/seccomp/aarch64-unknown-linux-musl.json +++ b/resources/seccomp/aarch64-unknown-linux-musl.json @@ -212,6 +212,10 @@ "syscall": "madvise", "comment": "Used by the VirtIO balloon device and by musl for some customer workloads. It is also used by aws-lc during random number generation. They setup a memory page that mark with MADV_WIPEONFORK to be able to detect forks. They also call it with -1 to see if madvise is supported in certain platforms." }, + { + "syscall": "mincore", + "comment": "Used by get_memory_dirty_bitmap to check if memory pages are resident" + }, { "syscall": "mmap", "comment": "Used by the VirtIO balloon device", diff --git a/resources/seccomp/x86_64-unknown-linux-musl.json b/resources/seccomp/x86_64-unknown-linux-musl.json index 95ceca1b7ef..9c22667c20e 100644 --- a/resources/seccomp/x86_64-unknown-linux-musl.json +++ b/resources/seccomp/x86_64-unknown-linux-musl.json @@ -212,6 +212,10 @@ "syscall": "madvise", "comment": "Used by the VirtIO balloon device and by musl for some customer workloads. It is also used by aws-lc during random number generation. They setup a memory page that mark with MADV_WIPEONFORK to be able to detect forks. They also call it with -1 to see if madvise is supported in certain platforms." }, + { + "syscall": "mincore", + "comment": "Used by get_memory_dirty_bitmap to check if memory pages are resident" + }, { "syscall": "mmap", "comment": "Used by the VirtIO balloon device", @@ -524,8 +528,8 @@ "comment": "sigaltstack is used by Rust stdlib to remove alternative signal stack during thread teardown." }, { - "syscall": "getrandom", - "comment": "getrandom is used by `HttpServer` to reinialize `HashMap` after moving to the API thread" + "syscall": "getrandom", + "comment": "getrandom is used by `HttpServer` to reinialize `HashMap` after moving to the API thread" }, { "syscall": "accept4", @@ -1152,4 +1156,4 @@ } ] } -} +} \ No newline at end of file diff --git a/scripts/build.sh b/scripts/build.sh new file mode 100755 index 00000000000..6f459f63e07 --- /dev/null +++ b/scripts/build.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -euo pipefail + +# The format will be: v.._g — e.g. v1.7.2_g8bb88311 +# Extract full version from src/firecracker/swagger/firecracker.yaml +FC_VERSION=$(awk '/^info:/{flag=1} flag && /^ version:/{print $2; exit}' src/firecracker/swagger/firecracker.yaml) +commit_hash=$(git rev-parse --short HEAD) +version_name="v${FC_VERSION}_g${commit_hash}" +echo "Version name: $version_name" + +echo "Starting to build Firecracker version: $version_name" +tools/devtool -y build --release + +mkdir -p "./build/fc/${version_name}" +cp ./build/cargo_target/x86_64-unknown-linux-musl/release/firecracker "./build/fc/${version_name}/firecracker" +echo "Finished building Firecracker version: $version_name and copied to ./build/fc/${version_name}/firecracker" diff --git a/scripts/upload.sh b/scripts/upload.sh new file mode 100755 index 00000000000..4227c642593 --- /dev/null +++ b/scripts/upload.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +set -euo pipefail + +GCP_PROJECT_ID=$1 + +gsutil -h "Cache-Control:no-cache, max-age=0" cp -r "build/fc/*" "gs://${GCP_PROJECT_ID}-fc-versions" +if [ "$GCP_PROJECT_ID" == "e2b-prod" ]; then + # Upload kernel to GCP public builds bucket + gsutil -h "Cache-Control:no-cache, max-age=0" cp -r "build/fc/*" "gs://${GCP_PROJECT_ID}-public-builds/firecrackers/" +fi + +rm -rf build/fc/* diff --git a/src/cpu-template-helper/src/utils/mod.rs b/src/cpu-template-helper/src/utils/mod.rs index f23871df1a9..41d355594e7 100644 --- a/src/cpu-template-helper/src/utils/mod.rs +++ b/src/cpu-template-helper/src/utils/mod.rs @@ -125,6 +125,7 @@ pub fn build_microvm_from_config( state: VmState::NotStarted, vmm_version: CPU_TEMPLATE_HELPER_VERSION.to_string(), app_name: "cpu-template-helper".to_string(), + memory_regions: None, }; let mut vm_resources = VmResources::from_json(&config, &instance_info, HTTP_MAX_PAYLOAD_SIZE, None) diff --git a/src/firecracker/src/api_server/mod.rs b/src/firecracker/src/api_server/mod.rs index 71d1856b0d5..d7351f1f1bf 100644 --- a/src/firecracker/src/api_server/mod.rs +++ b/src/firecracker/src/api_server/mod.rs @@ -275,7 +275,7 @@ mod tests { Box::new(VmmAction::CreateSnapshot(CreateSnapshotParams { snapshot_type: SnapshotType::Diff, snapshot_path: PathBuf::new(), - mem_file_path: PathBuf::new(), + mem_file_path: Some(PathBuf::new()), })), start_time_us, ); @@ -288,7 +288,7 @@ mod tests { Box::new(VmmAction::CreateSnapshot(CreateSnapshotParams { snapshot_type: SnapshotType::Diff, snapshot_path: PathBuf::new(), - mem_file_path: PathBuf::new(), + mem_file_path: Some(PathBuf::new()), })), start_time_us, ); diff --git a/src/firecracker/src/api_server/parsed_request.rs b/src/firecracker/src/api_server/parsed_request.rs index 10d5c3d97ea..ccfbfc34695 100644 --- a/src/firecracker/src/api_server/parsed_request.rs +++ b/src/firecracker/src/api_server/parsed_request.rs @@ -21,6 +21,7 @@ use super::request::logger::parse_put_logger; use super::request::machine_configuration::{ parse_get_machine_config, parse_patch_machine_config, parse_put_machine_config, }; +use super::request::memory::{parse_get_memory, parse_get_memory_mappings}; use super::request::metrics::parse_put_metrics; use super::request::mmds::{parse_get_mmds, parse_patch_mmds, parse_put_mmds}; use super::request::net::{parse_patch_net, parse_put_net}; @@ -82,6 +83,14 @@ impl TryFrom<&Request> for ParsedRequest { Ok(ParsedRequest::new_sync(VmmAction::GetFullVmConfig)) } (Method::Get, "machine-config", None) => parse_get_machine_config(), + (Method::Get, "memory", None) => match path_tokens.next() { + Some("mappings") => parse_get_memory_mappings(), + None => parse_get_memory(), + _ => Err(RequestError::InvalidPathMethod( + request_uri.to_string(), + Method::Get, + )), + }, (Method::Get, "mmds", None) => parse_get_mmds(), (Method::Get, _, Some(_)) => method_to_error(Method::Get), (Method::Put, "actions", Some(body)) => parse_put_actions(body), @@ -172,6 +181,8 @@ impl ParsedRequest { } VmmData::BalloonStats(stats) => Self::success_response_with_data(stats), VmmData::InstanceInformation(info) => Self::success_response_with_data(info), + VmmData::MemoryMappings(mappings) => Self::success_response_with_data(mappings), + VmmData::Memory(memory) => Self::success_response_with_data(memory), VmmData::VmmVersion(version) => Self::success_response_with_data( &serde_json::json!({ "firecracker_version": version.as_str() }), ), @@ -568,6 +579,12 @@ pub mod tests { VmmData::InstanceInformation(info) => { http_response(&serde_json::to_string(info).unwrap(), 200) } + VmmData::MemoryMappings(mappings) => { + http_response(&serde_json::to_string(mappings).unwrap(), 200) + } + VmmData::Memory(memory) => { + http_response(&serde_json::to_string(memory).unwrap(), 200) + } VmmData::VmmVersion(version) => http_response( &serde_json::json!({ "firecracker_version": version.as_str() }).to_string(), 200, @@ -589,6 +606,15 @@ pub mod tests { verify_ok_response_with(VmmData::MachineConfiguration(MachineConfig::default())); verify_ok_response_with(VmmData::MmdsValue(serde_json::from_str("{}").unwrap())); verify_ok_response_with(VmmData::InstanceInformation(InstanceInfo::default())); + verify_ok_response_with(VmmData::MemoryMappings( + vmm::vmm_config::instance_info::MemoryMappingsResponse { mappings: vec![] }, + )); + verify_ok_response_with(VmmData::Memory( + vmm::vmm_config::instance_info::MemoryResponse { + resident: vec![], + empty: vec![], + }, + )); verify_ok_response_with(VmmData::VmmVersion(String::default())); // Error. @@ -662,6 +688,30 @@ pub mod tests { ParsedRequest::try_from(&req).unwrap(); } + #[test] + fn test_try_from_get_memory_mappings() { + let (mut sender, receiver) = UnixStream::pair().unwrap(); + let mut connection = HttpConnection::new(receiver); + sender + .write_all(http_request("GET", "/memory/mappings", None).as_bytes()) + .unwrap(); + connection.try_read().unwrap(); + let req = connection.pop_parsed_request().unwrap(); + ParsedRequest::try_from(&req).unwrap(); + } + + #[test] + fn test_try_from_get_memory() { + let (mut sender, receiver) = UnixStream::pair().unwrap(); + let mut connection = HttpConnection::new(receiver); + sender + .write_all(http_request("GET", "/memory", None).as_bytes()) + .unwrap(); + connection.try_read().unwrap(); + let req = connection.pop_parsed_request().unwrap(); + ParsedRequest::try_from(&req).unwrap(); + } + #[test] fn test_try_from_get_version() { let (mut sender, receiver) = UnixStream::pair().unwrap(); diff --git a/src/firecracker/src/api_server/request/memory.rs b/src/firecracker/src/api_server/request/memory.rs new file mode 100644 index 00000000000..e879d6b3b02 --- /dev/null +++ b/src/firecracker/src/api_server/request/memory.rs @@ -0,0 +1,39 @@ +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use vmm::logger::{IncMetric, METRICS}; +use vmm::rpc_interface::VmmAction; + +use super::super::parsed_request::{ParsedRequest, RequestError}; + +pub(crate) fn parse_get_memory_mappings() -> Result { + METRICS.get_api_requests.instance_info_count.inc(); + Ok(ParsedRequest::new_sync(VmmAction::GetMemoryMappings)) +} + +pub(crate) fn parse_get_memory() -> Result { + METRICS.get_api_requests.instance_info_count.inc(); + Ok(ParsedRequest::new_sync(VmmAction::GetMemory)) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::api_server::parsed_request::RequestAction; + + #[test] + fn test_parse_get_memory_mappings_request() { + match parse_get_memory_mappings().unwrap().into_parts() { + (RequestAction::Sync(action), _) if *action == VmmAction::GetMemoryMappings => {} + _ => panic!("Test failed."), + } + } + + #[test] + fn test_parse_get_memory_request() { + match parse_get_memory().unwrap().into_parts() { + (RequestAction::Sync(action), _) if *action == VmmAction::GetMemory => {} + _ => panic!("Test failed."), + } + } +} diff --git a/src/firecracker/src/api_server/request/mod.rs b/src/firecracker/src/api_server/request/mod.rs index 0c1622798f4..4442436986c 100644 --- a/src/firecracker/src/api_server/request/mod.rs +++ b/src/firecracker/src/api_server/request/mod.rs @@ -10,6 +10,7 @@ pub mod entropy; pub mod instance_info; pub mod logger; pub mod machine_configuration; +pub mod memory; pub mod metrics; pub mod mmds; pub mod net; diff --git a/src/firecracker/src/api_server/request/snapshot.rs b/src/firecracker/src/api_server/request/snapshot.rs index 4a96292d11d..07ddc40c23c 100644 --- a/src/firecracker/src/api_server/request/snapshot.rs +++ b/src/firecracker/src/api_server/request/snapshot.rs @@ -140,7 +140,7 @@ mod tests { let expected_config = CreateSnapshotParams { snapshot_type: SnapshotType::Diff, snapshot_path: PathBuf::from("foo"), - mem_file_path: PathBuf::from("bar"), + mem_file_path: Some(PathBuf::from("bar")), }; assert_eq!( vmm_action_from_request(parse_put_snapshot(&Body::new(body), Some("create")).unwrap()), @@ -154,7 +154,7 @@ mod tests { let expected_config = CreateSnapshotParams { snapshot_type: SnapshotType::Full, snapshot_path: PathBuf::from("foo"), - mem_file_path: PathBuf::from("bar"), + mem_file_path: Some(PathBuf::from("bar")), }; assert_eq!( vmm_action_from_request(parse_put_snapshot(&Body::new(body), Some("create")).unwrap()), diff --git a/src/firecracker/src/main.rs b/src/firecracker/src/main.rs index 6b01f776729..1f3790f0f94 100644 --- a/src/firecracker/src/main.rs +++ b/src/firecracker/src/main.rs @@ -342,6 +342,7 @@ fn main_exec() -> Result<(), MainError> { state: VmState::NotStarted, vmm_version: FIRECRACKER_VERSION.to_string(), app_name: "Firecracker".to_string(), + memory_regions: None, }; if let Some(metrics_path) = arguments.single_value("metrics-path") { diff --git a/src/firecracker/swagger/firecracker.yaml b/src/firecracker/swagger/firecracker.yaml index 9c335290157..2cd2d92f208 100644 --- a/src/firecracker/swagger/firecracker.yaml +++ b/src/firecracker/swagger/firecracker.yaml @@ -5,7 +5,7 @@ info: The API is accessible through HTTP calls on specific URLs carrying JSON modeled data. The transport medium is a Unix Domain Socket. - version: 1.12.1 + version: 1.12.2 termsOfService: "" contact: email: "firecracker-maintainers@amazon.com" @@ -618,6 +618,35 @@ paths: schema: $ref: "#/definitions/Error" + /memory/mappings: + get: + summary: Gets the memory mappings with skippable pages bitmap. + operationId: getMemoryMappings + responses: + 200: + description: OK + schema: + $ref: "#/definitions/MemoryMappingsResponse" + default: + description: Internal server error + schema: + $ref: "#/definitions/Error" + + /memory: + get: + summary: Gets the memory info (resident and empty pages). + description: Returns an object with resident and empty bitmaps. The resident bitmap marks all pages that are resident. The empty bitmap marks zero pages (subset of resident pages). This is checked at the pageSize of each region. All regions must have the same page size. + operationId: getMemory + responses: + 200: + description: OK + schema: + $ref: "#/definitions/MemoryResponse" + default: + description: Internal server error + schema: + $ref: "#/definitions/Error" + /version: get: summary: Gets the Firecracker version. @@ -997,6 +1026,59 @@ definitions: description: MicroVM hypervisor build version. type: string + GuestMemoryRegionMapping: + type: object + description: Describes the region of guest memory that can be used for creating the memfile. + required: + - base_host_virt_addr + - size + - offset + - page_size + properties: + base_host_virt_addr: + type: integer + size: + description: The size of the region in bytes. + type: integer + offset: + description: The offset of the region in bytes. + type: integer + page_size: + description: The page size in bytes. + type: integer + + MemoryMappingsResponse: + type: object + description: Response containing memory region mappings. + required: + - mappings + properties: + mappings: + type: array + description: The memory region mappings. + items: + $ref: "#/definitions/GuestMemoryRegionMapping" + + MemoryResponse: + type: object + description: Response containing the memory info (resident and empty pages). + required: + - resident + - empty + properties: + resident: + type: array + description: The resident bitmap as a vector of u64 values. Each bit represents if the page is resident. + items: + type: integer + format: uint64 + empty: + type: array + description: The empty bitmap as a vector of u64 values. Each bit represents if the page is zero (empty). This is a subset of the resident pages. + items: + type: integer + format: uint64 + Logger: type: object description: @@ -1198,7 +1280,6 @@ definitions: SnapshotCreateParams: type: object required: - - mem_file_path - snapshot_path properties: mem_file_path: diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index aeacadeb66e..b84f518919c 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -162,8 +162,11 @@ pub fn create_snapshot( snapshot_state_to_file(µvm_state, ¶ms.snapshot_path)?; - vmm.vm - .snapshot_memory_to_file(¶ms.mem_file_path, params.snapshot_type)?; + // Dump memory to file only if mem_file_path is specified + if let Some(ref mem_file_path) = params.mem_file_path { + vmm.vm + .snapshot_memory_to_file(mem_file_path, params.snapshot_type)?; + } // We need to mark queues as dirty again for all activated devices. The reason we // do it here is because we don't mark pages as dirty during runtime diff --git a/src/vmm/src/rpc_interface.rs b/src/vmm/src/rpc_interface.rs index 127b75e594e..be0d74b0d3b 100644 --- a/src/vmm/src/rpc_interface.rs +++ b/src/vmm/src/rpc_interface.rs @@ -26,7 +26,7 @@ use crate::vmm_config::balloon::{ use crate::vmm_config::boot_source::{BootSourceConfig, BootSourceConfigError}; use crate::vmm_config::drive::{BlockDeviceConfig, BlockDeviceUpdateConfig, DriveError}; use crate::vmm_config::entropy::{EntropyDeviceConfig, EntropyDeviceError}; -use crate::vmm_config::instance_info::InstanceInfo; +use crate::vmm_config::instance_info::{InstanceInfo, MemoryMappingsResponse, MemoryResponse}; use crate::vmm_config::machine_config::{MachineConfig, MachineConfigError, MachineConfigUpdate}; use crate::vmm_config::metrics::{MetricsConfig, MetricsConfigError}; use crate::vmm_config::mmds::{MmdsConfig, MmdsConfigError}; @@ -65,6 +65,10 @@ pub enum VmmAction { GetVmMachineConfig, /// Get microVM instance information. GetVmInstanceInfo, + /// Get memory mappings with skippable pages bitmap. + GetMemoryMappings, + /// Get memory info (resident and empty pages). + GetMemory, /// Get microVM version. GetVmmVersion, /// Flush the metrics. This action can only be called after the logger has been configured. @@ -189,6 +193,10 @@ pub enum VmmData { MmdsValue(serde_json::Value), /// The microVM instance information. InstanceInformation(InstanceInfo), + /// Memory mappings with skippable pages bitmap. + MemoryMappings(MemoryMappingsResponse), + /// Memory info (resident and empty pages). + Memory(MemoryResponse), /// The microVM version. VmmVersion(String), } @@ -419,6 +427,7 @@ impl<'a> PrebootApiController<'a> { self.vm_resources.machine_config.clone(), )), GetVmInstanceInfo => Ok(VmmData::InstanceInformation(self.instance_info.clone())), + GetMemoryMappings | GetMemory => Err(VmmActionError::OperationNotSupportedPreBoot), GetVmmVersion => Ok(VmmData::VmmVersion(self.instance_info.vmm_version.clone())), InsertBlockDevice(config) => self.insert_block_device(config), InsertNetworkDevice(config) => self.insert_net_device(config), @@ -647,9 +656,30 @@ impl RuntimeApiController { GetVmMachineConfig => Ok(VmmData::MachineConfiguration( self.vm_resources.machine_config.clone(), )), - GetVmInstanceInfo => Ok(VmmData::InstanceInformation( - self.vmm.lock().expect("Poisoned lock").instance_info(), - )), + GetVmInstanceInfo => { + let locked_vmm = self.vmm.lock().expect("Poisoned lock"); + let instance_info = locked_vmm.instance_info(); + Ok(VmmData::InstanceInformation(instance_info)) + } + GetMemoryMappings => { + let locked_vmm = self.vmm.lock().expect("Poisoned lock"); + let mappings = locked_vmm + .vm + .guest_memory_mappings(&VmInfo::from(&self.vm_resources)); + + Ok(VmmData::MemoryMappings(MemoryMappingsResponse { mappings })) + } + GetMemory => { + let locked_vmm = self.vmm.lock().expect("Poisoned lock"); + let (resident_bitmap, empty_bitmap) = locked_vmm + .vm + .get_memory_info(&VmInfo::from(&self.vm_resources)) + .map_err(|e| VmmActionError::InternalVmm(VmmError::Vm(e)))?; + Ok(VmmData::Memory(MemoryResponse { + resident: resident_bitmap, + empty: empty_bitmap, + })) + } GetVmmVersion => Ok(VmmData::VmmVersion( self.vmm.lock().expect("Poisoned lock").version(), )), @@ -1147,7 +1177,7 @@ mod tests { CreateSnapshotParams { snapshot_type: SnapshotType::Full, snapshot_path: PathBuf::new(), - mem_file_path: PathBuf::new(), + mem_file_path: Some(PathBuf::new()), }, ))); #[cfg(target_arch = "x86_64")] diff --git a/src/vmm/src/vmm_config/instance_info.rs b/src/vmm/src/vmm_config/instance_info.rs index cd5b44f30ba..1d8f4fb57b1 100644 --- a/src/vmm/src/vmm_config/instance_info.rs +++ b/src/vmm/src/vmm_config/instance_info.rs @@ -2,7 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 use std::fmt::{self, Display, Formatter}; -use serde::{Serialize, ser}; +use crate::vstate::vm::GuestMemoryRegionMapping; +use serde::{ser, Serialize}; /// Enumerates microVM runtime states. #[derive(Clone, Debug, Default, PartialEq, Eq)] @@ -46,4 +47,23 @@ pub struct InstanceInfo { pub vmm_version: String, /// The name of the application that runs the microVM. pub app_name: String, + /// The regions of the guest memory. + pub memory_regions: Option>, +} + +/// Response structure for the memory mappings endpoint. +#[derive(Clone, Debug, PartialEq, Eq, Serialize)] +pub struct MemoryMappingsResponse { + /// The memory region mappings. + pub mappings: Vec, +} + +/// Response structure for the memory endpoint. +#[derive(Clone, Debug, PartialEq, Eq, Serialize)] +pub struct MemoryResponse { + /// The resident bitmap as a vector of u64 values. Each bit represents if the page is resident. + pub resident: Vec, + /// The empty bitmap as a vector of u64 values. Each bit represents if the page is zero (empty). + /// This is a subset of the resident pages. + pub empty: Vec, } diff --git a/src/vmm/src/vmm_config/snapshot.rs b/src/vmm/src/vmm_config/snapshot.rs index 27a7841d5a4..657dc653f6f 100644 --- a/src/vmm/src/vmm_config/snapshot.rs +++ b/src/vmm/src/vmm_config/snapshot.rs @@ -44,7 +44,9 @@ pub struct CreateSnapshotParams { /// Path to the file that will contain the microVM state. pub snapshot_path: PathBuf, /// Path to the file that will contain the guest memory. - pub mem_file_path: PathBuf, + /// If not specified, the memory is not dumped to a file. + #[serde(skip_serializing_if = "Option::is_none")] + pub mem_file_path: Option, } /// Allows for changing the mapping between tap devices and host devices diff --git a/src/vmm/src/vstate/vm.rs b/src/vmm/src/vstate/vm.rs index 7a8965a4b9a..f6f99ef95c0 100644 --- a/src/vmm/src/vstate/vm.rs +++ b/src/vmm/src/vstate/vm.rs @@ -13,11 +13,13 @@ use std::sync::Arc; use kvm_bindings::{KVM_MEM_LOG_DIRTY_PAGES, kvm_userspace_memory_region}; use kvm_ioctls::VmFd; +use serde::{Deserialize, Serialize}; use vmm_sys_util::eventfd::EventFd; +use crate::arch::host_page_size; pub use crate::arch::{ArchVm as Vm, ArchVmError, VmState}; use crate::logger::info; -use crate::persist::CreateSnapshotError; +use crate::persist::{CreateSnapshotError, VmInfo}; use crate::utils::u64_to_usize; use crate::vmm_config::snapshot::SnapshotType; use crate::vstate::memory::{ @@ -36,6 +38,20 @@ pub struct VmCommon { pub guest_memory: GuestMemoryMmap, } +/// Describes the region of guest memory that can be used for creating the memfile. +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Deserialize, Serialize)] +pub struct GuestMemoryRegionMapping { + /// Base host virtual address where the guest memory contents for this region + /// should be copied/populated. + pub base_host_virt_addr: u64, + /// Region size. + pub size: usize, + /// Offset in the backend file/buffer where the region contents are. + pub offset: u64, + /// The configured page size for this memory region. + pub page_size: usize, +} + /// Errors associated with the wrappers over KVM ioctls. /// Needs `rustfmt::skip` to make multiline comments work #[rustfmt::skip] @@ -55,6 +71,8 @@ pub enum VmError { NotEnoughMemorySlots, /// Memory Error: {0} VmMemory(#[from] vm_memory::Error), + /// Invalid memory configuration: {0} + InvalidMemoryConfiguration(String), } /// Contains Vm functions that are usable across CPU architectures @@ -185,6 +203,129 @@ impl Vm { &self.common.guest_memory } + /// Gets the mappings for the guest memory. + pub fn guest_memory_mappings(&self, vm_info: &VmInfo) -> Vec { + let mut offset = 0; + let mut mappings = Vec::new(); + for mem_region in self.guest_memory().iter() { + mappings.push(GuestMemoryRegionMapping { + base_host_virt_addr: mem_region.as_ptr() as u64, + size: mem_region.size(), + offset, + page_size: vm_info.huge_pages.page_size(), + }); + offset += mem_region.size() as u64; + } + mappings + } + + /// Gets the memory info (resident and empty pages) for all memory regions. + /// Returns two bitmaps: resident (all resident pages) and empty (zero pages, subset of resident). + /// This checks at the pageSize of each region and requires all regions to have the same page size. + pub fn get_memory_info(&self, vm_info: &VmInfo) -> Result<(Vec, Vec), VmError> { + let mappings = self.guest_memory_mappings(vm_info); + + if mappings.is_empty() { + return Ok((Vec::new(), Vec::new())); + } + + // Check that all regions have the same page size + let page_size = mappings[0].page_size; + if mappings.iter().any(|m| m.page_size != page_size) { + return Err(VmError::InvalidMemoryConfiguration( + "All memory regions must have the same page size".to_string(), + )); + } + + // Calculate total number of pages across all regions + let total_pages: usize = mappings.iter().map(|m| m.size / page_size).sum(); + let bitmap_size = total_pages.div_ceil(64); + let mut resident_bitmap = vec![0u64; bitmap_size]; + let mut empty_bitmap = vec![0u64; bitmap_size]; + + let mut global_page_idx = 0; + + // SAFETY: We're reading from valid memory regions that we own + unsafe { + // Pre-allocate zero buffer once per page size (reused for all pages) + // This is the most important optimization - avoids repeated allocations + let zero_buf = vec![0u8; page_size]; + + for mapping in &mappings { + // Find the memory region that matches this mapping + let mem_region = self + .guest_memory() + .iter() + .find(|region| region.as_ptr() as u64 == mapping.base_host_virt_addr) + .expect("Memory region not found for mapping"); + + let region_ptr = mem_region.as_ptr(); + let region_size = mem_region.size(); + let num_pages = region_size / page_size; + + // Use mincore on the entire region to check residency + let sys_page_size = host_page_size(); + let mincore_pages = region_size.div_ceil(sys_page_size); + let mut mincore_vec = vec![0u8; mincore_pages]; + + let mincore_result = libc::mincore( + region_ptr.cast::(), + region_size, + mincore_vec.as_mut_ptr(), + ); + + // Check each page + for page_idx in 0..num_pages { + let page_offset = page_idx * page_size; + let page_ptr = region_ptr.add(page_offset); + + // Check if page is resident using mincore + let is_resident = if mincore_result == 0 { + let page_mincore_start = page_offset / sys_page_size; + let page_mincore_count = page_size.div_ceil(sys_page_size); + if page_mincore_start + page_mincore_count <= mincore_vec.len() { + // Page is resident if any 4KB sub-page is resident (check LSB only) + mincore_vec[page_mincore_start..page_mincore_start + page_mincore_count] + .iter() + .any(|&v| (v & 0x1) != 0) + } else { + false + } + } else { + // If mincore failed, assume resident (conservative approach) + true + }; + + let bitmap_idx = global_page_idx / 64; + let bit_idx = global_page_idx % 64; + + if is_resident { + // Set bit in resident bitmap + if bitmap_idx < resident_bitmap.len() { + resident_bitmap[bitmap_idx] |= 1u64 << bit_idx; + } + + // Check if page is zero (empty) + let is_zero = libc::memcmp( + page_ptr.cast::(), + zero_buf.as_ptr().cast::(), + page_size, + ) == 0; + + // Set bit in empty bitmap if page is zero + if is_zero && bitmap_idx < empty_bitmap.len() { + empty_bitmap[bitmap_idx] |= 1u64 << bit_idx; + } + } + + global_page_idx += 1; + } + } + } + + Ok((resident_bitmap, empty_bitmap)) + } + /// Resets the KVM dirty bitmap for each of the guest's memory regions. pub fn reset_dirty_bitmap(&self) { self.guest_memory() diff --git a/src/vmm/tests/integration_tests.rs b/src/vmm/tests/integration_tests.rs index 55fb07c1aae..b8656ec967e 100644 --- a/src/vmm/tests/integration_tests.rs +++ b/src/vmm/tests/integration_tests.rs @@ -215,7 +215,7 @@ fn verify_create_snapshot(is_diff: bool) -> (TempFile, TempFile) { let snapshot_params = CreateSnapshotParams { snapshot_type, snapshot_path: snapshot_file.as_path().to_path_buf(), - mem_file_path: memory_file.as_path().to_path_buf(), + mem_file_path: Some(memory_file.as_path().to_path_buf()), }; controller diff --git a/tests/framework/http_api.py b/tests/framework/http_api.py index ea8efd3df4f..97a70a44a03 100644 --- a/tests/framework/http_api.py +++ b/tests/framework/http_api.py @@ -121,3 +121,5 @@ def __init__(self, api_usocket_full_name): self.snapshot_load = Resource(self, "/snapshot/load") self.cpu_config = Resource(self, "/cpu-config") self.entropy = Resource(self, "/entropy") + self.memory_mappings = Resource(self, "/memory/mappings") + self.memory = Resource(self, "/memory") diff --git a/tests/integration_tests/functional/test_api.py b/tests/integration_tests/functional/test_api.py index 864c6d5eda9..ed377bf12bf 100644 --- a/tests/integration_tests/functional/test_api.py +++ b/tests/integration_tests/functional/test_api.py @@ -17,6 +17,7 @@ import host_tools.drive as drive_tools import host_tools.network as net_tools from framework import utils, utils_cpuid +from framework.microvm import HugePagesConfig from framework.utils import get_firecracker_version_from_toml from framework.utils_cpu_templates import SUPPORTED_CPU_TEMPLATES @@ -1355,3 +1356,239 @@ def test_negative_snapshot_load_api(microvm_factory): # The snapshot/memory files above don't exist, but the request is otherwise syntactically valid. # In this case, Firecracker exits. vm.mark_killed() + + +def test_memory_mappings_pre_boot(uvm_plain): + """Test that memory mappings endpoint is not available before boot.""" + test_microvm = uvm_plain + test_microvm.spawn() + test_microvm.basic_config() + + # Use session directly since get() asserts on 200 + url = test_microvm.api.endpoint + "/memory/mappings" + res = test_microvm.api.session.get(url) + assert res.status_code == 400 + assert NOT_SUPPORTED_BEFORE_START in res.json()["fault_message"] + + +def test_memory_pre_boot(uvm_plain): + """Test that memory endpoint is not available before boot.""" + test_microvm = uvm_plain + test_microvm.spawn() + test_microvm.basic_config() + + # Use session directly since get() asserts on 200 + url = test_microvm.api.endpoint + "/memory" + res = test_microvm.api.session.get(url) + assert res.status_code == 400 + assert NOT_SUPPORTED_BEFORE_START in res.json()["fault_message"] + + +def test_memory_mappings_post_boot(uvm_plain): + """Test that memory mappings endpoint works after boot with hugepages.""" + test_microvm = uvm_plain + test_microvm.spawn() + test_microvm.basic_config(huge_pages=HugePagesConfig.HUGETLBFS_2MB) + test_microvm.start() + + response = test_microvm.api.memory_mappings.get() + assert response.status_code == 200 + + data = response.json() + assert isinstance(data, dict) + assert "mappings" in data + mappings = data["mappings"] + assert isinstance(mappings, list) + assert len(mappings) > 0 + + # Verify structure of each mapping + for mapping in mappings: + assert "base_host_virt_addr" in mapping + assert "size" in mapping + assert "offset" in mapping + assert "page_size" in mapping + assert isinstance(mapping["base_host_virt_addr"], int) + assert isinstance(mapping["size"], int) + assert isinstance(mapping["offset"], int) + assert isinstance(mapping["page_size"], int) + assert mapping["size"] > 0 + # Verify page size is 2MB (2097152 bytes) for hugepages + assert mapping["page_size"] == 2 * 1024 * 1024 + + +def test_memory_post_boot(uvm_plain): + """Test that memory endpoint works after boot with hugepages.""" + test_microvm = uvm_plain + test_microvm.spawn() + test_microvm.basic_config(huge_pages=HugePagesConfig.HUGETLBFS_2MB) + test_microvm.start() + + # Get memory mappings to determine page size and total memory + mappings_response = test_microvm.api.memory_mappings.get() + assert mappings_response.status_code == 200 + mappings_data = mappings_response.json() + assert isinstance(mappings_data, dict) + assert "mappings" in mappings_data + mappings = mappings_data["mappings"] + assert len(mappings) > 0 + + # All regions should have the same page size (2MB for hugepages) + page_size = mappings[0]["page_size"] + assert page_size == 2 * 1024 * 1024, "Expected 2MB page size for hugepages" + + # Verify all regions have the same page size + for mapping in mappings: + assert ( + mapping["page_size"] == page_size + ), "All regions must have the same page size" + + total_memory_size = sum(mapping["size"] for mapping in mappings) + total_pages = total_memory_size // page_size + expected_bitmap_size = (total_pages + 63) // 64 # ceil(total_pages / 64) + + # Get memory info + response = test_microvm.api.memory.get() + assert response.status_code == 200 + + data = response.json() + assert isinstance(data, dict) + assert "resident" in data + assert "empty" in data + resident_bitmap = data["resident"] + empty_bitmap = data["empty"] + assert isinstance(resident_bitmap, list) + assert isinstance(empty_bitmap, list) + assert len(resident_bitmap) == expected_bitmap_size + assert len(empty_bitmap) == expected_bitmap_size + + # Verify all values are valid u64 integers + for value in resident_bitmap: + assert isinstance(value, int) + assert value >= 0 + assert value <= 0xFFFFFFFFFFFFFFFF # Max u64 value + + for value in empty_bitmap: + assert isinstance(value, int) + assert value >= 0 + assert value <= 0xFFFFFFFFFFFFFFFF # Max u64 value + + # After boot, there should be at least one resident page + has_resident_page = any(value != 0 for value in resident_bitmap) + assert has_resident_page, "Expected at least one resident page after VM boot" + + # Empty pages should be a subset of resident pages + # (empty_bitmap & resident_bitmap) == empty_bitmap + for i in range(len(empty_bitmap)): + assert (empty_bitmap[i] & resident_bitmap[i]) == empty_bitmap[i], \ + "Empty pages must be a subset of resident pages" + + +@pytest.mark.nonci +def test_memory_benchmark(microvm_factory, guest_kernel_linux_6_1, rootfs): + """Benchmark the memory endpoint performance (resident + zero page checking).""" + test_microvm = microvm_factory.build(guest_kernel_linux_6_1, rootfs) + test_microvm.spawn() + + # Use larger memory size for benchmarking + # Check available hugepages and use a size that fits (need at least some headroom) + # Default to 256MB if we can't determine, or use available - 64MB headroom + try: + with open('/sys/kernel/mm/hugepages/hugepages-2048kB/free_hugepages', 'r') as f: + free_hugepages = int(f.read().strip()) + # Each hugepage is 2MB, reserve 32 pages (64MB) for system + available_mib = max(128, (free_hugepages - 32) * 2) + mem_size_mib = min(1024, available_mib) # Cap at 1GB for proper benchmark + except (FileNotFoundError, ValueError, OSError): + # Fallback to 256MB if we can't read hugepage info + mem_size_mib = 256 + test_microvm.basic_config( + mem_size_mib=mem_size_mib, + huge_pages=HugePagesConfig.HUGETLBFS_2MB + ) + # Add network interface for SSH access + test_microvm.add_net_iface() + test_microvm.start() + + # Get memory mappings to determine actual memory size + mappings_response = test_microvm.api.memory_mappings.get() + assert mappings_response.status_code == 200 + mappings_data = mappings_response.json() + mappings = mappings_data["mappings"] + + # Calculate total memory size + total_memory_bytes = sum(mapping["size"] for mapping in mappings) + total_memory_mib = total_memory_bytes / (1024 * 1024) + page_size = mappings[0]["page_size"] + + # Ensure memory is resident by writing zeros to it via guest + # This will fault in the pages and make them resident + # Using tmpfs (/dev/shm) ensures the memory is actually resident + # Allocate a reasonable portion (e.g., 256MB) to avoid freezing the sandbox + fault_memory_mib = min(256, int(total_memory_mib * 0.25)) # 25% or max 256MB + test_microvm.ssh.run("dd if=/dev/zero of=/dev/shm/zero_mem bs=1M count={} 2>/dev/null || true".format(fault_memory_mib)) + + # Give the system a moment to fault in pages + time.sleep(0.1) + + # Benchmark the /memory endpoint call + start_time = time.perf_counter() + response = test_microvm.api.memory.get() + end_time = time.perf_counter() + + assert response.status_code == 200 + data = response.json() + assert "resident" in data + assert "empty" in data + + # Verify the response is valid + resident_bitmap = data["resident"] + empty_bitmap = data["empty"] + + # Calculate expected bitmap size + page_size = mappings[0]["page_size"] + total_pages = total_memory_bytes // page_size + expected_bitmap_size = (total_pages + 63) // 64 + + assert len(resident_bitmap) == expected_bitmap_size + assert len(empty_bitmap) == expected_bitmap_size + + # Count actual resident pages (faulted-in memory) + resident_page_count = 0 + for bitmap_value in resident_bitmap: + # Count set bits in each u64 value + resident_page_count += bin(bitmap_value).count('1') + + # Calculate resident memory size (actual memory that was checked) + resident_memory_bytes = resident_page_count * page_size + resident_memory_mib = resident_memory_bytes / (1024 * 1024) + + # Calculate elapsed time and throughput based on actual resident memory + elapsed_seconds = end_time - start_time + + if resident_memory_bytes > 0: + throughput_mib_per_sec = resident_memory_mib / elapsed_seconds + time_per_mb_ms = (elapsed_seconds * 1000) / resident_memory_mib + else: + throughput_mib_per_sec = 0 + time_per_mb_ms = 0 + + # Count empty pages + empty_page_count = 0 + for bitmap_value in empty_bitmap: + empty_page_count += bin(bitmap_value).count('1') + + # Print benchmark results + print(f"\n{'='*60}") + print(f"Memory Benchmark Results") + print(f"{'='*60}") + print(f"Total Memory: {total_memory_mib:.2f} MiB ({total_memory_bytes / (1024**3):.3f} GB)") + print(f"Resident Pages: {resident_page_count} / {total_pages} ({resident_page_count * 100 / total_pages:.1f}%)") + print(f"Resident Memory: {resident_memory_mib:.2f} MiB ({resident_memory_bytes / (1024**3):.3f} GB)") + print(f"Empty Pages: {empty_page_count} / {resident_page_count} ({empty_page_count * 100 / resident_page_count if resident_page_count > 0 else 0:.1f}% of resident)") + print(f"Elapsed Time: {elapsed_seconds*1000:.2f} ms") + print(f"Throughput (resident): {throughput_mib_per_sec:.2f} MiB/s") + print(f"Time per MB (resident): {time_per_mb_ms:.3f} ms/MB") + print(f"{'='*60}\n") + + # Verify at least some pages are resident + assert resident_page_count > 0, "Expected at least one resident page"