Skip to content

feat: enable vmrt to boot with host virtiofs drive as root fs #200

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 16 additions & 8 deletions qemu/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
FROM ubuntu:22.04

ARG NUMA=false
ARG VIRTIOFS=false

RUN apt update \
&& \
\
Expand Down Expand Up @@ -44,14 +47,15 @@ RUN echo CONFIG_EDID=y >> /qemu/configs/devices/i386-softmmu/default.mak
RUN echo CONFIG_VGA=y >> /qemu/configs/devices/i386-softmmu/default.mak
RUN echo CONFIG_VGA_PCI=y >> /qemu/configs/devices/i386-softmmu/default.mak
RUN echo CONFIG_PCIE_PORT=y >> /qemu/configs/devices/i386-softmmu/default.mak
RUN if [ "$VIRTIOFS" = "true" ]; then \
echo CONFIG_VHOST_USER_FS=y >> /qemu/configs/devices/i386-softmmu/default.mak; \
fi

# --without-default-devices
RUN mkdir build && \
cd build && \
/qemu/configure \
RUN mkdir build && cd build && \
CONFIGURE_OPTS=" \
--target-list=x86_64-softmmu \
--static \
--audio-drv-list="" \
--audio-drv-list='' \
--disable-slirp \
--disable-tcg-interpreter \
--disable-containers \
Expand All @@ -70,14 +74,18 @@ RUN mkdir build && \
--disable-bochs \
--disable-bzip2 \
--disable-guest-agent \
--disable-numa \
--disable-tcg \
--disable-vnc \
--disable-live-block-migration \
--disable-gio \
--enable-vhost-kernel \
--enable-virtfs \
--without-default-devices
--without-default-devices" && \
if [ "$NUMA" = "true" ]; then \
CONFIGURE_OPTS="$CONFIGURE_OPTS --enable-memfd --enable-mem-backend"; \
else \
CONFIGURE_OPTS="$CONFIGURE_OPTS --disable-numa --disable-tcg"; \
fi && \
/qemu/configure $CONFIGURE_OPTS

RUN cd build && make V=1 CFLAGS+="-Os -flto" -j4

Expand Down
8 changes: 7 additions & 1 deletion qemu/Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
all: vmrt

VIRTIOFS ?= false
NUMA ?= $(if $(filter true,$(VIRTIOFS)),true,false)

vmrt: Dockerfile
docker build -t build-qemu .
docker build \
--build-arg VIRTIOFS=$(VIRTIOFS) \
--build-arg NUMA=$(NUMA) \
-t build-qemu .
$(SHELL) copy_img build-qemu vmrt .
$(SHELL) copy_img build-qemu /qemu/pc-bios/vgabios-stdvga.bin .

Expand Down
5 changes: 5 additions & 0 deletions runtime/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -118,3 +118,8 @@ path = "src/lib.rs"
[[bin]]
name = "ya-runtime-vm"
path = "src/main.rs"

[features]
default = []
virtiofs = []
numa = ["virtiofs"]
8 changes: 7 additions & 1 deletion runtime/init-container/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ NEW_ROOT := newroot
# -MMD to create dependency files (*.d) on first compilation
CFLAGS := -MMD -std=c11 -O2 -Wall -Wextra -Werror -fPIE -pie -Iinclude/ -Wmaybe-uninitialized -Iunpacked_headers/usr/include -I$(CURDIR)/$(LIBSECCOMP_SUBMODULE)/include '-DNEW_ROOT="$(NEW_ROOT)"'

VIRTIOFS ?= false
CFLAGS += '-DVIRTIOFS=$(VIRTIOFS)'

ifneq ($(DEBUG), "")
CFLAGS += -DNDEBUG
endif
Expand Down Expand Up @@ -135,7 +138,10 @@ initramfs.cpio.gz: init mkfs $(UNPACKED_KERNEL)
cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/net/core/failover.ko initramfs
cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/net/ipv6/ipv6.ko initramfs
cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/net/packet/af_packet.ko initramfs
cp $(UNPACKED_KERNEL)/lib/modules/5.10.29-0-virt/kernel/fs/fuse/fuse.ko initramfs
cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/fs/fuse/fuse.ko initramfs
ifdef VIRTIOFS
cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/fs/fuse/virtiofs.ko initramfs
endif

cp $(BUSYBOX)/$(MKFS_NAME) initramfs
mkdir initramfs/$(NEW_ROOT)
Expand Down
12 changes: 11 additions & 1 deletion runtime/init-container/src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@
#include "proto.h"
#include "init-seccomp.h"

#ifndef VIRTIOFS
#define VIRTIOFS false
#endif

#define SYSROOT "/mnt/newroot"

#define CONTAINER_OF(ptr, type, member) (type *)((char *)(ptr) - offsetof(type, member))
Expand Down Expand Up @@ -2604,6 +2608,12 @@ static void scan_storage(struct storage_node_t **list)
free(data);
}

#ifdef VIRTIOFS
// This assumes that a virtiofs user drive with the tag "rootfs-0" is attached to QEMU (vmrt).
// A more robust and extensible approach could be to look up the list of tags in /sys/class/virtio-ports/
storage_append(list, "/mnt/image-0", "rootfs-0", "virtiofs", "", MS_RDONLY | MS_NODEV);
#endif

fflush(stderr);

for (char **p = environ; *p; ++p)
Expand Down Expand Up @@ -2678,7 +2688,7 @@ int main(int argc, char **argv)
if (access("/netfs.ko", R_OK) == 0) {
load_module("/netfs.ko");
}

load_module("/fscache.ko");
load_module("/af_packet.ko");
load_module("/ipv6.ko");
Expand Down
61 changes: 40 additions & 21 deletions runtime/src/vmrt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@ use std::sync::Arc;
use futures::lock::Mutex;
use futures::FutureExt;
use tokio::io::AsyncBufReadExt;
use tokio::{io, process, spawn};
use tokio::{
io, process, spawn,
time::{sleep, Duration},
};

use ya_client_model::activity::exe_script_command::VolumeMount;
use ya_runtime_sdk::runtime_api::server;
Expand Down Expand Up @@ -105,27 +108,43 @@ pub async fn start_vmrt(
"virtserialport,chardev=manager_cdev,name=manager_port",
];

let rootfs_devices: Vec<(String, String)> = deployment
.task_packages
.iter()
.enumerate()
.map(|(i, path)| {
let drive = format!(
"file={},cache=unsafe,readonly=on,format=raw,id=rootfs-{},if=none",
path.display(),
i
);
let device = format!("virtio-blk-pci,drive=rootfs-{},serial=rootfs-{}", i, i);
(drive, device)
})
.collect();

for (drive, device) in rootfs_devices.iter() {
args.push("-drive");
args.push(drive);
args.push("-device");
args.push(device);
let mut additional_args = Vec::new();
if cfg!(feature = "virtiofs") {
// Reading the VIRTIOFS_SOCK_PATH environment variable isn't a robust or extensible solution.
// A better approach would be to update the deployment logic to retrieve this information
// from the image parameters. This would allow the image creator to specify whether a host
// user virtiofs drive should be used, and if so, ensure the drive is set up and mounted
// on the host before attaching it to the guest VM.
let socket_path = std::env::var("VIRTIOFS_SOCK_PATH")
.expect("Environment variable VIRTIOFS_SOCK_PATH is not set");
additional_args.extend([
"-chardev".to_string(),
format!("socket,id=char-0,path={socket_path}"),
"-device".to_string(),
format!("vhost-user-fs-pci,queue-size=1024,chardev=char-0,tag=rootfs-0"),
]);
} else {
for (i, path) in deployment.task_packages.iter().enumerate() {
additional_args.extend([
"-drive".to_string(),
format!(
"file={},cache=unsafe,readonly=on,format=raw,id=rootfs-{i},if=none",
path.display()
),
"-device".to_string(),
format!("virtio-blk-pci,drive=rootfs-{i},serial=rootfs-{i}"),
]);
}
}
if cfg!(feature = "numa") {
additional_args.extend([
"-object".to_string(),
format!("memory-backend-file,id=mem,size={memory_size},mem-path=/dev/shm,share=on"),
"-numa".to_string(),
"node,memdev=mem".to_string(),
]);
}
args.extend(additional_args.iter().map(String::as_str));

cmd.args(args);

Expand Down