diff --git a/qemu/Dockerfile b/qemu/Dockerfile index 1bf59439..5783af62 100755 --- a/qemu/Dockerfile +++ b/qemu/Dockerfile @@ -1,5 +1,8 @@ FROM ubuntu:22.04 +ARG NUMA=false +ARG VIRTIOFS=false + RUN apt update \ && \ \ @@ -44,14 +47,15 @@ RUN echo CONFIG_EDID=y >> /qemu/configs/devices/i386-softmmu/default.mak RUN echo CONFIG_VGA=y >> /qemu/configs/devices/i386-softmmu/default.mak RUN echo CONFIG_VGA_PCI=y >> /qemu/configs/devices/i386-softmmu/default.mak RUN echo CONFIG_PCIE_PORT=y >> /qemu/configs/devices/i386-softmmu/default.mak +RUN if [ "$VIRTIOFS" = "true" ]; then \ + echo CONFIG_VHOST_USER_FS=y >> /qemu/configs/devices/i386-softmmu/default.mak; \ +fi -# --without-default-devices -RUN mkdir build && \ - cd build && \ - /qemu/configure \ +RUN mkdir build && cd build && \ + CONFIGURE_OPTS=" \ --target-list=x86_64-softmmu \ --static \ - --audio-drv-list="" \ + --audio-drv-list='' \ --disable-slirp \ --disable-tcg-interpreter \ --disable-containers \ @@ -70,14 +74,18 @@ RUN mkdir build && \ --disable-bochs \ --disable-bzip2 \ --disable-guest-agent \ - --disable-numa \ - --disable-tcg \ --disable-vnc \ --disable-live-block-migration \ --disable-gio \ --enable-vhost-kernel \ --enable-virtfs \ - --without-default-devices + --without-default-devices" && \ + if [ "$NUMA" = "true" ]; then \ + CONFIGURE_OPTS="$CONFIGURE_OPTS --enable-memfd --enable-mem-backend"; \ + else \ + CONFIGURE_OPTS="$CONFIGURE_OPTS --disable-numa --disable-tcg"; \ + fi && \ + /qemu/configure $CONFIGURE_OPTS RUN cd build && make V=1 CFLAGS+="-Os -flto" -j4 diff --git a/qemu/Makefile b/qemu/Makefile index 3bedd5c8..78ade930 100644 --- a/qemu/Makefile +++ b/qemu/Makefile @@ -1,7 +1,13 @@ all: vmrt +VIRTIOFS ?= false +NUMA ?= $(if $(filter true,$(VIRTIOFS)),true,false) + vmrt: Dockerfile - docker build -t build-qemu . + docker build \ + --build-arg VIRTIOFS=$(VIRTIOFS) \ + --build-arg NUMA=$(NUMA) \ + -t build-qemu . $(SHELL) copy_img build-qemu vmrt . $(SHELL) copy_img build-qemu /qemu/pc-bios/vgabios-stdvga.bin . diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml index 13c17c67..da9d40a7 100644 --- a/runtime/Cargo.toml +++ b/runtime/Cargo.toml @@ -118,3 +118,8 @@ path = "src/lib.rs" [[bin]] name = "ya-runtime-vm" path = "src/main.rs" + +[features] +default = [] +virtiofs = [] +numa = ["virtiofs"] diff --git a/runtime/init-container/Makefile b/runtime/init-container/Makefile index a1707b86..c9db5da5 100644 --- a/runtime/init-container/Makefile +++ b/runtime/init-container/Makefile @@ -5,6 +5,9 @@ NEW_ROOT := newroot # -MMD to create dependency files (*.d) on first compilation CFLAGS := -MMD -std=c11 -O2 -Wall -Wextra -Werror -fPIE -pie -Iinclude/ -Wmaybe-uninitialized -Iunpacked_headers/usr/include -I$(CURDIR)/$(LIBSECCOMP_SUBMODULE)/include '-DNEW_ROOT="$(NEW_ROOT)"' +VIRTIOFS ?= false +CFLAGS += '-DVIRTIOFS=$(VIRTIOFS)' + ifneq ($(DEBUG), "") CFLAGS += -DNDEBUG endif @@ -135,7 +138,10 @@ initramfs.cpio.gz: init mkfs $(UNPACKED_KERNEL) cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/net/core/failover.ko initramfs cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/net/ipv6/ipv6.ko initramfs cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/net/packet/af_packet.ko initramfs - cp $(UNPACKED_KERNEL)/lib/modules/5.10.29-0-virt/kernel/fs/fuse/fuse.ko initramfs + cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/fs/fuse/fuse.ko initramfs +ifdef VIRTIOFS + cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/fs/fuse/virtiofs.ko initramfs +endif cp $(BUSYBOX)/$(MKFS_NAME) initramfs mkdir initramfs/$(NEW_ROOT) diff --git a/runtime/init-container/src/init.c b/runtime/init-container/src/init.c index dc72f09c..86ffa017 100644 --- a/runtime/init-container/src/init.c +++ b/runtime/init-container/src/init.c @@ -39,6 +39,10 @@ #include "proto.h" #include "init-seccomp.h" +#ifndef VIRTIOFS +#define VIRTIOFS false +#endif + #define SYSROOT "/mnt/newroot" #define CONTAINER_OF(ptr, type, member) (type *)((char *)(ptr) - offsetof(type, member)) @@ -2604,6 +2608,12 @@ static void scan_storage(struct storage_node_t **list) free(data); } +#ifdef VIRTIOFS + // This assumes that a virtiofs user drive with the tag "rootfs-0" is attached to QEMU (vmrt). + // A more robust and extensible approach could be to look up the list of tags in /sys/class/virtio-ports/ + storage_append(list, "/mnt/image-0", "rootfs-0", "virtiofs", "", MS_RDONLY | MS_NODEV); +#endif + fflush(stderr); for (char **p = environ; *p; ++p) @@ -2678,7 +2688,7 @@ int main(int argc, char **argv) if (access("/netfs.ko", R_OK) == 0) { load_module("/netfs.ko"); } - + load_module("/fscache.ko"); load_module("/af_packet.ko"); load_module("/ipv6.ko"); diff --git a/runtime/src/vmrt.rs b/runtime/src/vmrt.rs index 093a574a..9d67d594 100755 --- a/runtime/src/vmrt.rs +++ b/runtime/src/vmrt.rs @@ -8,7 +8,10 @@ use std::sync::Arc; use futures::lock::Mutex; use futures::FutureExt; use tokio::io::AsyncBufReadExt; -use tokio::{io, process, spawn}; +use tokio::{ + io, process, spawn, + time::{sleep, Duration}, +}; use ya_client_model::activity::exe_script_command::VolumeMount; use ya_runtime_sdk::runtime_api::server; @@ -105,27 +108,43 @@ pub async fn start_vmrt( "virtserialport,chardev=manager_cdev,name=manager_port", ]; - let rootfs_devices: Vec<(String, String)> = deployment - .task_packages - .iter() - .enumerate() - .map(|(i, path)| { - let drive = format!( - "file={},cache=unsafe,readonly=on,format=raw,id=rootfs-{},if=none", - path.display(), - i - ); - let device = format!("virtio-blk-pci,drive=rootfs-{},serial=rootfs-{}", i, i); - (drive, device) - }) - .collect(); - - for (drive, device) in rootfs_devices.iter() { - args.push("-drive"); - args.push(drive); - args.push("-device"); - args.push(device); + let mut additional_args = Vec::new(); + if cfg!(feature = "virtiofs") { + // Reading the VIRTIOFS_SOCK_PATH environment variable isn't a robust or extensible solution. + // A better approach would be to update the deployment logic to retrieve this information + // from the image parameters. This would allow the image creator to specify whether a host + // user virtiofs drive should be used, and if so, ensure the drive is set up and mounted + // on the host before attaching it to the guest VM. + let socket_path = std::env::var("VIRTIOFS_SOCK_PATH") + .expect("Environment variable VIRTIOFS_SOCK_PATH is not set"); + additional_args.extend([ + "-chardev".to_string(), + format!("socket,id=char-0,path={socket_path}"), + "-device".to_string(), + format!("vhost-user-fs-pci,queue-size=1024,chardev=char-0,tag=rootfs-0"), + ]); + } else { + for (i, path) in deployment.task_packages.iter().enumerate() { + additional_args.extend([ + "-drive".to_string(), + format!( + "file={},cache=unsafe,readonly=on,format=raw,id=rootfs-{i},if=none", + path.display() + ), + "-device".to_string(), + format!("virtio-blk-pci,drive=rootfs-{i},serial=rootfs-{i}"), + ]); + } + } + if cfg!(feature = "numa") { + additional_args.extend([ + "-object".to_string(), + format!("memory-backend-file,id=mem,size={memory_size},mem-path=/dev/shm,share=on"), + "-numa".to_string(), + "node,memdev=mem".to_string(), + ]); } + args.extend(additional_args.iter().map(String::as_str)); cmd.args(args);