From c5f28995b8fe52be02f400765694203bc6b94904 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 12 Dec 2024 14:45:03 +0100 Subject: [PATCH 01/25] vpc: Split off vpc_ignore_current_size() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation to making changes to the logic deciding whether CHS or 'current_size' need to be used in determining the image size, split off vpc_ignore_current_size() helper. No functional change intended. Signed-off-by: Vitaly Kuznetsov Message-ID: <20241212134504.1983757-2-vkuznets@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/vpc.c | 67 +++++++++++++++++++++++++++++------------------------ 1 file changed, 37 insertions(+), 30 deletions(-) diff --git a/block/vpc.c b/block/vpc.c index 6489ee756abe..cb0773914c72 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -216,6 +216,41 @@ static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts, } } +/* + * Microsoft Virtual PC and Microsoft Hyper-V produce and read + * VHD image sizes differently. VPC will rely on CHS geometry, + * while Hyper-V and disk2vhd use the size specified in the footer. + * + * We use a couple of approaches to try and determine the correct method: + * look at the Creator App field, and look for images that have CHS + * geometry that is the maximum value. + * + * If the CHS geometry is the maximum CHS geometry, then we assume that + * the size is the footer->current_size to avoid truncation. Otherwise, + * we follow the table based on footer->creator_app: + * + * Known creator apps: + * 'vpc ' : CHS Virtual PC (uses disk geometry) + * 'qemu' : CHS QEMU (uses disk geometry) + * 'qem2' : current_size QEMU (uses current_size) + * 'win ' : current_size Hyper-V + * 'd2v ' : current_size Disk2vhd + * 'tap\0' : current_size XenServer + * 'CTXS' : current_size XenConverter + * + * The user can override the table values via drive options, however + * even with an override we will still use current_size for images + * that have CHS geometry of the maximum size. + */ +static bool vpc_ignore_current_size(VHDFooter *footer) +{ + return !!strncmp(footer->creator_app, "win ", 4) && + !!strncmp(footer->creator_app, "qem2", 4) && + !!strncmp(footer->creator_app, "d2v ", 4) && + !!strncmp(footer->creator_app, "CTXS", 4) && + !!memcmp(footer->creator_app, "tap", 4)); +} + static int vpc_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { @@ -304,36 +339,8 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, bs->total_sectors = (int64_t) be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl; - /* Microsoft Virtual PC and Microsoft Hyper-V produce and read - * VHD image sizes differently. VPC will rely on CHS geometry, - * while Hyper-V and disk2vhd use the size specified in the footer. - * - * We use a couple of approaches to try and determine the correct method: - * look at the Creator App field, and look for images that have CHS - * geometry that is the maximum value. - * - * If the CHS geometry is the maximum CHS geometry, then we assume that - * the size is the footer->current_size to avoid truncation. Otherwise, - * we follow the table based on footer->creator_app: - * - * Known creator apps: - * 'vpc ' : CHS Virtual PC (uses disk geometry) - * 'qemu' : CHS QEMU (uses disk geometry) - * 'qem2' : current_size QEMU (uses current_size) - * 'win ' : current_size Hyper-V - * 'd2v ' : current_size Disk2vhd - * 'tap\0' : current_size XenServer - * 'CTXS' : current_size XenConverter - * - * The user can override the table values via drive options, however - * even with an override we will still use current_size for images - * that have CHS geometry of the maximum size. - */ - use_chs = (!!strncmp(footer->creator_app, "win ", 4) && - !!strncmp(footer->creator_app, "qem2", 4) && - !!strncmp(footer->creator_app, "d2v ", 4) && - !!strncmp(footer->creator_app, "CTXS", 4) && - !!memcmp(footer->creator_app, "tap", 4)) || s->force_use_chs; + /* Use CHS or current_size to determine the image size. */ + use_chs = vpc_ignore_current_size(footer) || s->force_use_chs; if (!use_chs || bs->total_sectors == VHD_MAX_GEOMETRY || s->force_use_sz) { bs->total_sectors = be64_to_cpu(footer->current_size) / From 558d8eb7f32dda8634e0d3e82ac36e00bf61c9ee Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 12 Dec 2024 14:45:04 +0100 Subject: [PATCH 02/25] vpc: Read images exported from Azure correctly It was found that 'qemu-nbd' is not able to work with some disk images exported from Azure. Looking at the 512b footer (which contains VPC metadata): 00000000 63 6f 6e 65 63 74 69 78 00 00 00 02 00 01 00 00 |conectix........| 00000010 ff ff ff ff ff ff ff ff 2e c7 9b 96 77 61 00 00 |............wa..| 00000020 00 07 00 00 57 69 32 6b 00 00 00 01 40 00 00 00 |....Wi2k....@...| 00000030 00 00 00 01 40 00 00 00 28 a2 10 3f 00 00 00 02 |....@...(..?....| 00000040 ff ff e7 47 8c 54 df 94 bd 35 71 4c 94 5f e5 44 |...G.T...5qL._.D| 00000050 44 53 92 1a 00 00 00 00 00 00 00 00 00 00 00 00 |DS..............| 00000060 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| we can see that Azure uses a different 'Creator application' -- 'wa\0\0' (offset 0x1c, likely reads as 'Windows Azure') and QEMU uses this field to determine how it can get image size. Apparently, Azure uses 'new' method, just like Hyper-V. Overall, it seems that only VPC and old QEMUs need to be ignored as all new creator apps seem to have reliable current_size. Invert the logic and make 'current_size' method the default to avoid adding every new creator app to the list. Signed-off-by: Vitaly Kuznetsov Message-ID: <20241212134504.1983757-3-vkuznets@redhat.com> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/vpc.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/block/vpc.c b/block/vpc.c index cb0773914c72..fb64ea604051 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -237,6 +237,7 @@ static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts, * 'd2v ' : current_size Disk2vhd * 'tap\0' : current_size XenServer * 'CTXS' : current_size XenConverter + * 'wa\0\0': current_size Azure * * The user can override the table values via drive options, however * even with an override we will still use current_size for images @@ -244,11 +245,8 @@ static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts, */ static bool vpc_ignore_current_size(VHDFooter *footer) { - return !!strncmp(footer->creator_app, "win ", 4) && - !!strncmp(footer->creator_app, "qem2", 4) && - !!strncmp(footer->creator_app, "d2v ", 4) && - !!strncmp(footer->creator_app, "CTXS", 4) && - !!memcmp(footer->creator_app, "tap", 4)); + return !strncmp(footer->creator_app, "vpc ", 4) || + !strncmp(footer->creator_app, "qemu", 4); } static int vpc_open(BlockDriverState *bs, QDict *options, int flags, From 84d388c22b8e34981539fd3c69f84099f1824f1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 11 Nov 2024 18:03:32 +0100 Subject: [PATCH 03/25] block: Improve blk_get_attached_dev_id() docstring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expose the method docstring in the header, and mention returned value must be free'd by caller. Reported-by: Fabiano Rosas Signed-off-by: Philippe Mathieu-Daudé Message-ID: <20241111170333.43833-2-philmd@linaro.org> Signed-off-by: Kevin Wolf --- block/block-backend.c | 12 ++++++++---- include/system/block-backend-io.h | 7 +++++++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index c93a7525ad0c..789fc6d4ea96 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1019,6 +1019,10 @@ DeviceState *blk_get_attached_dev(BlockBackend *blk) return blk->dev; } +/* + * The caller is responsible for releasing the value returned + * with g_free() after use. + */ static char *blk_get_attached_dev_id_or_path(BlockBackend *blk, bool want_id) { DeviceState *dev = blk->dev; @@ -1033,15 +1037,15 @@ static char *blk_get_attached_dev_id_or_path(BlockBackend *blk, bool want_id) return object_get_canonical_path(OBJECT(dev)) ?: g_strdup(""); } -/* - * Return the qdev ID, or if no ID is assigned the QOM path, of the block - * device attached to the BlockBackend. - */ char *blk_get_attached_dev_id(BlockBackend *blk) { return blk_get_attached_dev_id_or_path(blk, true); } +/* + * The caller is responsible for releasing the value returned + * with g_free() after use. + */ static char *blk_get_attached_dev_path(BlockBackend *blk) { return blk_get_attached_dev_id_or_path(blk, false); diff --git a/include/system/block-backend-io.h b/include/system/block-backend-io.h index d174275a5cb6..ba8dfcc7d048 100644 --- a/include/system/block-backend-io.h +++ b/include/system/block-backend-io.h @@ -32,6 +32,13 @@ void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow); void blk_set_disable_request_queuing(BlockBackend *blk, bool disable); bool blk_iostatus_is_enabled(const BlockBackend *blk); +/* + * Return the qdev ID, or if no ID is assigned the QOM path, + * of the block device attached to the BlockBackend. + * + * The caller is responsible for releasing the value returned + * with g_free() after use. + */ char *blk_get_attached_dev_id(BlockBackend *blk); BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset, From 23ea425c14d3b89a002e0127b17456eee3102ab7 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Mon, 11 Nov 2024 18:03:33 +0100 Subject: [PATCH 04/25] block: Fix leak in send_qmp_error_event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ASAN detected a leak when running the ahci-test /ahci/io/dma/lba28/retry: Direct leak of 35 byte(s) in 1 object(s) allocated from: #0 in malloc #1 in __vasprintf_internal #2 in vasprintf #3 in g_vasprintf #4 in g_strdup_vprintf #5 in g_strdup_printf #6 in object_get_canonical_path ../qom/object.c:2096:19 #7 in blk_get_attached_dev_id_or_path ../block/block-backend.c:1033:12 #8 in blk_get_attached_dev_path ../block/block-backend.c:1047:12 #9 in send_qmp_error_event ../block/block-backend.c:2140:36 #10 in blk_error_action ../block/block-backend.c:2172:9 #11 in ide_handle_rw_error ../hw/ide/core.c:875:5 #12 in ide_dma_cb ../hw/ide/core.c:894:13 #13 in dma_complete ../system/dma-helpers.c:107:9 #14 in dma_blk_cb ../system/dma-helpers.c:129:9 #15 in blk_aio_complete ../block/block-backend.c:1552:9 #16 in blk_aio_write_entry ../block/block-backend.c:1619:5 #17 in coroutine_trampoline ../util/coroutine-ucontext.c:175:9 Plug the leak by freeing the device path string. Signed-off-by: Fabiano Rosas Reviewed-by: Philippe Mathieu-Daudé Message-ID: <20241111145214.8261-1-farosas@suse.de> [PMD: Use g_autofree] Signed-off-by: Philippe Mathieu-Daudé Message-ID: <20241111170333.43833-3-philmd@linaro.org> Signed-off-by: Kevin Wolf --- block/block-backend.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index 789fc6d4ea96..b61058264466 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -2138,10 +2138,10 @@ static void send_qmp_error_event(BlockBackend *blk, { IoOperationType optype; BlockDriverState *bs = blk_bs(blk); + g_autofree char *path = blk_get_attached_dev_path(blk); optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE; - qapi_event_send_block_io_error(blk_name(blk), - blk_get_attached_dev_path(blk), + qapi_event_send_block_io_error(blk_name(blk), path, bs ? bdrv_get_node_name(bs) : NULL, optype, action, blk_iostatus_is_enabled(blk), error == ENOSPC, strerror(error)); From 5bf10468b68816377264e557e91186a2ee129c95 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 12 Dec 2024 15:47:59 -0500 Subject: [PATCH 05/25] scripts/qemu-gdb: Always do full stack dump for python errors It's easier for either debugging plugin errors, or issue reports. Signed-off-by: Peter Xu Message-ID: <20241212204801.1420528-2-peterx@redhat.com> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- scripts/qemu-gdb.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/qemu-gdb.py b/scripts/qemu-gdb.py index 4d2a9f6c430a..cfae94a2e906 100644 --- a/scripts/qemu-gdb.py +++ b/scripts/qemu-gdb.py @@ -45,3 +45,5 @@ def __init__(self): # Default to silently passing through SIGUSR1, because QEMU sends it # to itself a lot. gdb.execute('handle SIGUSR1 pass noprint nostop') +# Always print full stack for python errors, easier to debug and report issues +gdb.execute('set python print-stack full') From f4e343b6559eda19efe972b9dcd52e479320e388 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 12 Dec 2024 15:48:00 -0500 Subject: [PATCH 06/25] scripts/qemu-gdb: Simplify fs_base fetching for coroutines There're a bunch of code trying to fetch fs_base in different ways. IIUC the simplest way instead is "$fs_base". It also has the benefit that it'll work for both live gdb session or coredumps. Signed-off-by: Peter Xu Message-ID: <20241212204801.1420528-3-peterx@redhat.com> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- scripts/qemugdb/coroutine.py | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/scripts/qemugdb/coroutine.py b/scripts/qemugdb/coroutine.py index 7db46d4b6841..20f76ed37b42 100644 --- a/scripts/qemugdb/coroutine.py +++ b/scripts/qemugdb/coroutine.py @@ -13,28 +13,9 @@ VOID_PTR = gdb.lookup_type('void').pointer() -def get_fs_base(): - '''Fetch %fs base value using arch_prctl(ARCH_GET_FS). This is - pthread_self().''' - # %rsp - 120 is scratch space according to the SystemV ABI - old = gdb.parse_and_eval('*(uint64_t*)($rsp - 120)') - gdb.execute('call (int)arch_prctl(0x1003, $rsp - 120)', False, True) - fs_base = gdb.parse_and_eval('*(uint64_t*)($rsp - 120)') - gdb.execute('set *(uint64_t*)($rsp - 120) = %s' % old, False, True) - return fs_base - def pthread_self(): - '''Fetch pthread_self() from the glibc start_thread function.''' - f = gdb.newest_frame() - while f.name() != 'start_thread': - f = f.older() - if f is None: - return get_fs_base() - - try: - return f.read_var("arg") - except ValueError: - return get_fs_base() + '''Fetch the base address of TLS.''' + return gdb.parse_and_eval("$fs_base") def get_glibc_pointer_guard(): '''Fetch glibc pointer guard value''' From 772f86839f777148089e0b248a78986406ee3660 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 12 Dec 2024 15:48:01 -0500 Subject: [PATCH 07/25] scripts/qemu-gdb: Support coroutine dumps in coredumps Dumping coroutines don't yet work with coredumps. Let's make it work. We still kept most of the old code because they can be either more flexible, or prettier. Only add the fallbacks when they stop working. Currently the raw unwind is pretty ugly, but it works, like this: (gdb) qemu bt #0 process_incoming_migration_co (opaque=0x0) at ../migration/migration.c:788 #1 0x000055ae6c0dc4d9 in coroutine_trampoline (i0=-1711718576, i1=21934) at ../util/coroutine-ucontext.c:175 #2 0x00007f9f59d72f40 in ??? () at /lib64/libc.so.6 #3 0x00007ffd549214a0 in ??? () #4 0x0000000000000000 in ??? () Coroutine at 0x7f9f4c57c748: #0 0x55ae6c0dc9a8 in qemu_coroutine_switch<+120> () at ../util/coroutine-ucontext.c:321 #1 0x55ae6c0da2f8 in qemu_aio_coroutine_enter<+356> () at ../util/qemu-coroutine.c:293 #2 0x55ae6c0da3f1 in qemu_coroutine_enter<+34> () at ../util/qemu-coroutine.c:316 #3 0x55ae6baf775e in migration_incoming_process<+43> () at ../migration/migration.c:876 #4 0x55ae6baf7ab4 in migration_ioc_process_incoming<+490> () at ../migration/migration.c:1008 #5 0x55ae6bae9ae7 in migration_channel_process_incoming<+145> () at ../migration/channel.c:45 #6 0x55ae6bb18e35 in socket_accept_incoming_migration<+118> () at ../migration/socket.c:132 #7 0x55ae6be939ef in qio_net_listener_channel_func<+131> () at ../io/net-listener.c:54 #8 0x55ae6be8ce1a in qio_channel_fd_source_dispatch<+78> () at ../io/channel-watch.c:84 #9 0x7f9f5b26728c in g_main_context_dispatch_unlocked.lto_priv<+315> () #10 0x7f9f5b267555 in g_main_context_dispatch<+36> () #11 0x55ae6c0d91a7 in glib_pollfds_poll<+90> () at ../util/main-loop.c:287 #12 0x55ae6c0d9235 in os_host_main_loop_wait<+128> () at ../util/main-loop.c:310 #13 0x55ae6c0d9364 in main_loop_wait<+203> () at ../util/main-loop.c:589 #14 0x55ae6bac212a in qemu_main_loop<+41> () at ../system/runstate.c:835 #15 0x55ae6bfdf522 in qemu_default_main<+19> () at ../system/main.c:37 #16 0x55ae6bfdf55f in main<+40> () at ../system/main.c:48 #17 0x7f9f59d42248 in __libc_start_call_main<+119> () #18 0x7f9f59d4230b in __libc_start_main_impl<+138> () Signed-off-by: Peter Xu Message-ID: <20241212204801.1420528-4-peterx@redhat.com> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- scripts/qemugdb/coroutine.py | 79 +++++++++++++++++++++++++++++++++--- 1 file changed, 73 insertions(+), 6 deletions(-) diff --git a/scripts/qemugdb/coroutine.py b/scripts/qemugdb/coroutine.py index 20f76ed37b42..e98fc48a4b2b 100644 --- a/scripts/qemugdb/coroutine.py +++ b/scripts/qemugdb/coroutine.py @@ -46,9 +46,60 @@ def get_jmpbuf_regs(jmpbuf): 'r15': jmpbuf[JB_R15], 'rip': glibc_ptr_demangle(jmpbuf[JB_PC], pointer_guard) } -def bt_jmpbuf(jmpbuf): - '''Backtrace a jmpbuf''' - regs = get_jmpbuf_regs(jmpbuf) +def symbol_lookup(addr): + # Example: "__clone3 + 44 in section .text of /lib64/libc.so.6" + result = gdb.execute(f"info symbol {hex(addr)}", to_string=True).strip() + try: + if "+" in result: + (func, result) = result.split(" + ") + (offset, result) = result.split(" in ") + else: + offset = "0" + (func, result) = result.split(" in ") + func_str = f"{func}<+{offset}> ()" + except: + return f"??? ({result})" + + # Example: Line 321 of "../util/coroutine-ucontext.c" starts at address + # 0x55cf3894d993 and ends at 0x55cf3894d9ab + # . + result = gdb.execute(f"info line *{hex(addr)}", to_string=True).strip() + if not result.startswith("Line "): + return func_str + result = result[5:] + + try: + result = result.split(" starts ")[0] + (line, path) = result.split(" of ") + path = path.replace("\"", "") + except: + return func_str + + return f"{func_str} at {path}:{line}" + +def dump_backtrace(regs): + ''' + Backtrace dump with raw registers, mimic GDB command 'bt'. + ''' + # Here only rbp and rip that matter.. + rbp = regs['rbp'] + rip = regs['rip'] + i = 0 + + while rbp: + # For all return addresses on stack, we want to look up symbol/line + # on the CALL command, because the return address is the next + # instruction instead of the CALL. Here -1 would work for any + # sized CALL instruction. + print(f"#{i} {hex(rip)} in {symbol_lookup(rip if i == 0 else rip-1)}") + rip = gdb.parse_and_eval(f"*(uint64_t *)(uint64_t)({hex(rbp)} + 8)") + rbp = gdb.parse_and_eval(f"*(uint64_t *)(uint64_t)({hex(rbp)})") + i += 1 + +def dump_backtrace_live(regs): + ''' + Backtrace dump with gdb's 'bt' command, only usable in a live session. + ''' old = dict() # remember current stack frame and select the topmost @@ -69,6 +120,17 @@ def bt_jmpbuf(jmpbuf): selected_frame.select() +def bt_jmpbuf(jmpbuf): + '''Backtrace a jmpbuf''' + regs = get_jmpbuf_regs(jmpbuf) + try: + # This reuses gdb's "bt" command, which can be slightly prettier + # but only works with live sessions. + dump_backtrace_live(regs) + except: + # If above doesn't work, fallback to poor man's unwind + dump_backtrace(regs) + def co_cast(co): return co.cast(gdb.lookup_type('CoroutineUContext').pointer()) @@ -101,10 +163,15 @@ def invoke(self, arg, from_tty): gdb.execute("bt") - if gdb.parse_and_eval("qemu_in_coroutine()") == False: - return + try: + # This only works with a live session + co_ptr = gdb.parse_and_eval("qemu_coroutine_self()") + except: + # Fallback to use hard-coded ucontext vars if it's coredump + co_ptr = gdb.parse_and_eval("co_tls_current") - co_ptr = gdb.parse_and_eval("qemu_coroutine_self()") + if co_ptr == False: + return while True: co = co_cast(co_ptr) From 107c551de0d7bc3aa8e926c557b66b9549616f42 Mon Sep 17 00:00:00 2001 From: Peter Krempa Date: Mon, 27 Jan 2025 11:29:24 +0100 Subject: [PATCH 08/25] block-backend: Fix argument order when calling 'qapi_event_send_block_io_error()' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 7452162adec25c10 introduced 'qom-path' argument to BLOCK_IO_ERROR event but when the event is instantiated in 'send_qmp_error_event()' the arguments for 'device' and 'qom_path' in qapi_event_send_block_io_error() were reversed : Generated code for sending event: void qapi_event_send_block_io_error(const char *qom_path, const char *device, const char *node_name, IoOperationType operation, [...] Call inside send_qmp_error_event(): qapi_event_send_block_io_error(blk_name(blk), blk_get_attached_dev_path(blk), bs ? bdrv_get_node_name(bs) : NULL, optype, [...] This results into reporting the QOM path as the device alias and vice versa which in turn breaks libvirt, which expects the device alias being either a valid alias or empty (which would make libvirt do the lookup by node-name instead). Cc: qemu-stable@nongnu.org Fixes: 7452162adec2 ("qapi: add qom-path to BLOCK_IO_ERROR event") Signed-off-by: Peter Krempa Message-ID: <09728d784888b38d7a8f09ee5e9e9c542c875e1e.1737973614.git.pkrempa@redhat.com> Reviewed-by: Daniel P. Berrangé Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/block-backend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/block-backend.c b/block/block-backend.c index b61058264466..d093f01f897f 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -2141,7 +2141,7 @@ static void send_qmp_error_event(BlockBackend *blk, g_autofree char *path = blk_get_attached_dev_path(blk); optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE; - qapi_event_send_block_io_error(blk_name(blk), path, + qapi_event_send_block_io_error(path, blk_name(blk), bs ? bdrv_get_node_name(bs) : NULL, optype, action, blk_iostatus_is_enabled(blk), error == ENOSPC, strerror(error)); From aec81049c2daa8a97b89e59f03733b21ae0f8c2d Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 4 Feb 2025 22:13:52 +0100 Subject: [PATCH 09/25] block: Add 'active' field to BlockDeviceInfo This allows querying from QMP (and also HMP) whether an image is currently active or inactive (in the sense of BDRV_O_INACTIVE). Signed-off-by: Kevin Wolf Acked-by: Fabiano Rosas Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-ID: <20250204211407.381505-2-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 4 ++++ block/monitor/block-hmp-cmds.c | 5 +++-- block/qapi.c | 1 + include/block/block-global-state.h | 3 +++ qapi/block-core.json | 6 +++++- tests/qemu-iotests/184.out | 2 ++ tests/qemu-iotests/191.out | 16 ++++++++++++++++ tests/qemu-iotests/273.out | 5 +++++ 8 files changed, 39 insertions(+), 3 deletions(-) diff --git a/block.c b/block.c index f60606f24288..9aad958269f3 100644 --- a/block.c +++ b/block.c @@ -6824,6 +6824,10 @@ void bdrv_init_with_whitelist(void) bdrv_init(); } +bool bdrv_is_inactive(BlockDriverState *bs) { + return bs->open_flags & BDRV_O_INACTIVE; +} + int bdrv_activate(BlockDriverState *bs, Error **errp) { BdrvChild *child, *parent; diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c index 1d312513fc44..e84ff6ab1688 100644 --- a/block/monitor/block-hmp-cmds.c +++ b/block/monitor/block-hmp-cmds.c @@ -630,11 +630,12 @@ static void print_block_info(Monitor *mon, BlockInfo *info, } if (inserted) { - monitor_printf(mon, ": %s (%s%s%s)\n", + monitor_printf(mon, ": %s (%s%s%s%s)\n", inserted->file, inserted->drv, inserted->ro ? ", read-only" : "", - inserted->encrypted ? ", encrypted" : ""); + inserted->encrypted ? ", encrypted" : "", + inserted->active ? "" : ", inactive"); } else { monitor_printf(mon, ": [not inserted]\n"); } diff --git a/block/qapi.c b/block/qapi.c index 902ecb08e067..63604dc6d35d 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -63,6 +63,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, info->file = g_strdup(bs->filename); info->ro = bdrv_is_read_only(bs); info->drv = g_strdup(bs->drv->format_name); + info->active = !bdrv_is_inactive(bs); info->encrypted = bs->encrypted; info->cache = g_new(BlockdevCacheInfo, 1); diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h index bd7cecd1cf44..a826bf5f78ae 100644 --- a/include/block/block-global-state.h +++ b/include/block/block-global-state.h @@ -175,6 +175,9 @@ BlockDriverState * GRAPH_RDLOCK check_to_replace_node(BlockDriverState *parent_bs, const char *node_name, Error **errp); + +bool GRAPH_RDLOCK bdrv_is_inactive(BlockDriverState *bs); + int no_coroutine_fn GRAPH_RDLOCK bdrv_activate(BlockDriverState *bs, Error **errp); diff --git a/qapi/block-core.json b/qapi/block-core.json index fd3bcc1c1751..1296ca8ae2b3 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -486,6 +486,10 @@ # @backing_file_depth: number of files in the backing file chain # (since: 1.2) # +# @active: true if the backend is active; typical cases for inactive backends +# are on the migration source instance after migration completes and on the +# destination before it completes. (since: 10.0) +# # @encrypted: true if the backing device is encrypted # # @detect_zeroes: detect and optimize zero writes (Since 2.1) @@ -556,7 +560,7 @@ { 'struct': 'BlockDeviceInfo', 'data': { 'file': 'str', '*node-name': 'str', 'ro': 'bool', 'drv': 'str', '*backing_file': 'str', 'backing_file_depth': 'int', - 'encrypted': 'bool', + 'active': 'bool', 'encrypted': 'bool', 'detect_zeroes': 'BlockdevDetectZeroesOptions', 'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int', 'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int', diff --git a/tests/qemu-iotests/184.out b/tests/qemu-iotests/184.out index e8f631f85327..52692b6b3b9b 100644 --- a/tests/qemu-iotests/184.out +++ b/tests/qemu-iotests/184.out @@ -26,6 +26,7 @@ Testing: { "iops_rd": 0, "detect_zeroes": "off", + "active": true, "image": { "backing-image": { "virtual-size": 1073741824, @@ -59,6 +60,7 @@ Testing: { "iops_rd": 0, "detect_zeroes": "off", + "active": true, "image": { "virtual-size": 1073741824, "filename": "null-co://", diff --git a/tests/qemu-iotests/191.out b/tests/qemu-iotests/191.out index c3309e4bc69e..2a72ca7106e8 100644 --- a/tests/qemu-iotests/191.out +++ b/tests/qemu-iotests/191.out @@ -114,6 +114,7 @@ wrote 65536/65536 bytes at offset 1048576 { "iops_rd": 0, "detect_zeroes": "off", + "active": true, "image": { "backing-image": { "virtual-size": 67108864, @@ -155,6 +156,7 @@ wrote 65536/65536 bytes at offset 1048576 { "iops_rd": 0, "detect_zeroes": "off", + "active": true, "image": { "virtual-size": 197120, "filename": "TEST_DIR/t.IMGFMT.ovl2", @@ -183,6 +185,7 @@ wrote 65536/65536 bytes at offset 1048576 { "iops_rd": 0, "detect_zeroes": "off", + "active": true, "image": { "backing-image": { "virtual-size": 67108864, @@ -224,6 +227,7 @@ wrote 65536/65536 bytes at offset 1048576 { "iops_rd": 0, "detect_zeroes": "off", + "active": true, "image": { "virtual-size": 197120, "filename": "TEST_DIR/t.IMGFMT", @@ -252,6 +256,7 @@ wrote 65536/65536 bytes at offset 1048576 { "iops_rd": 0, "detect_zeroes": "off", + "active": true, "image": { "backing-image": { "virtual-size": 67108864, @@ -293,6 +298,7 @@ wrote 65536/65536 bytes at offset 1048576 { "iops_rd": 0, "detect_zeroes": "off", + "active": true, "image": { "virtual-size": 393216, "filename": "TEST_DIR/t.IMGFMT.mid", @@ -321,6 +327,7 @@ wrote 65536/65536 bytes at offset 1048576 { "iops_rd": 0, "detect_zeroes": "off", + "active": true, "image": { "virtual-size": 67108864, "filename": "TEST_DIR/t.IMGFMT.base", @@ -350,6 +357,7 @@ wrote 65536/65536 bytes at offset 1048576 { "iops_rd": 0, "detect_zeroes": "off", + "active": true, "image": { "virtual-size": 393216, "filename": "TEST_DIR/t.IMGFMT.base", @@ -521,6 +529,7 @@ wrote 65536/65536 bytes at offset 1048576 { "iops_rd": 0, "detect_zeroes": "off", + "active": true, "image": { "backing-image": { "virtual-size": 67108864, @@ -562,6 +571,7 @@ wrote 65536/65536 bytes at offset 1048576 { "iops_rd": 0, "detect_zeroes": "off", + "active": true, "image": { "virtual-size": 197120, "filename": "TEST_DIR/t.IMGFMT.ovl2", @@ -590,6 +600,7 @@ wrote 65536/65536 bytes at offset 1048576 { "iops_rd": 0, "detect_zeroes": "off", + "active": true, "image": { "backing-image": { "backing-image": { @@ -642,6 +653,7 @@ wrote 65536/65536 bytes at offset 1048576 { "iops_rd": 0, "detect_zeroes": "off", + "active": true, "image": { "virtual-size": 197120, "filename": "TEST_DIR/t.IMGFMT.ovl3", @@ -670,6 +682,7 @@ wrote 65536/65536 bytes at offset 1048576 { "iops_rd": 0, "detect_zeroes": "off", + "active": true, "image": { "virtual-size": 67108864, "filename": "TEST_DIR/t.IMGFMT.base", @@ -699,6 +712,7 @@ wrote 65536/65536 bytes at offset 1048576 { "iops_rd": 0, "detect_zeroes": "off", + "active": true, "image": { "virtual-size": 393216, "filename": "TEST_DIR/t.IMGFMT.base", @@ -727,6 +741,7 @@ wrote 65536/65536 bytes at offset 1048576 { "iops_rd": 0, "detect_zeroes": "off", + "active": true, "image": { "backing-image": { "virtual-size": 67108864, @@ -768,6 +783,7 @@ wrote 65536/65536 bytes at offset 1048576 { "iops_rd": 0, "detect_zeroes": "off", + "active": true, "image": { "virtual-size": 197120, "filename": "TEST_DIR/t.IMGFMT", diff --git a/tests/qemu-iotests/273.out b/tests/qemu-iotests/273.out index 71843f02de6f..c19753c685a4 100644 --- a/tests/qemu-iotests/273.out +++ b/tests/qemu-iotests/273.out @@ -23,6 +23,7 @@ Testing: -blockdev file,node-name=base,filename=TEST_DIR/t.IMGFMT.base -blockdev { "iops_rd": 0, "detect_zeroes": "off", + "active": true, "image": { "backing-image": { "backing-image": { @@ -74,6 +75,7 @@ Testing: -blockdev file,node-name=base,filename=TEST_DIR/t.IMGFMT.base -blockdev { "iops_rd": 0, "detect_zeroes": "off", + "active": true, "image": { "virtual-size": 197120, "filename": "TEST_DIR/t.IMGFMT", @@ -102,6 +104,7 @@ Testing: -blockdev file,node-name=base,filename=TEST_DIR/t.IMGFMT.base -blockdev { "iops_rd": 0, "detect_zeroes": "off", + "active": true, "image": { "backing-image": { "virtual-size": 197120, @@ -142,6 +145,7 @@ Testing: -blockdev file,node-name=base,filename=TEST_DIR/t.IMGFMT.base -blockdev { "iops_rd": 0, "detect_zeroes": "off", + "active": true, "image": { "virtual-size": 197120, "filename": "TEST_DIR/t.IMGFMT.mid", @@ -170,6 +174,7 @@ Testing: -blockdev file,node-name=base,filename=TEST_DIR/t.IMGFMT.base -blockdev { "iops_rd": 0, "detect_zeroes": "off", + "active": true, "image": { "virtual-size": 197120, "filename": "TEST_DIR/t.IMGFMT.base", From a6490ec9d56b9e95a13918813585a3a9891710bc Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 4 Feb 2025 22:13:53 +0100 Subject: [PATCH 10/25] block: Allow inactivating already inactive nodes What we wanted to catch with the assertion is cases where the recursion finds that a child was inactive before its parent. This should never happen. But if the user tries to inactivate an image that is already inactive, that's harmless and we don't want to fail the assertion. Signed-off-by: Kevin Wolf Acked-by: Fabiano Rosas Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-ID: <20250204211407.381505-3-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/block.c b/block.c index 9aad958269f3..9458c5e01318 100644 --- a/block.c +++ b/block.c @@ -6959,7 +6959,8 @@ bdrv_has_bds_parent(BlockDriverState *bs, bool only_active) return false; } -static int GRAPH_RDLOCK bdrv_inactivate_recurse(BlockDriverState *bs) +static int GRAPH_RDLOCK +bdrv_inactivate_recurse(BlockDriverState *bs, bool top_level) { BdrvChild *child, *parent; int ret; @@ -6977,7 +6978,14 @@ static int GRAPH_RDLOCK bdrv_inactivate_recurse(BlockDriverState *bs) return 0; } - assert(!(bs->open_flags & BDRV_O_INACTIVE)); + /* + * Inactivating an already inactive node on user request is harmless, but if + * a child is already inactive before its parent, that's bad. + */ + if (bs->open_flags & BDRV_O_INACTIVE) { + assert(top_level); + return 0; + } /* Inactivate this node */ if (bs->drv->bdrv_inactivate) { @@ -7014,7 +7022,7 @@ static int GRAPH_RDLOCK bdrv_inactivate_recurse(BlockDriverState *bs) /* Recursively inactivate children */ QLIST_FOREACH(child, &bs->children, next) { - ret = bdrv_inactivate_recurse(child->bs); + ret = bdrv_inactivate_recurse(child->bs, false); if (ret < 0) { return ret; } @@ -7039,7 +7047,7 @@ int bdrv_inactivate_all(void) if (bdrv_has_bds_parent(bs, false)) { continue; } - ret = bdrv_inactivate_recurse(bs); + ret = bdrv_inactivate_recurse(bs, true); if (ret < 0) { bdrv_next_cleanup(&it); break; From e80210ffb24c4e47650344ba77ce3ed354af596c Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 4 Feb 2025 22:13:54 +0100 Subject: [PATCH 11/25] block: Inactivate external snapshot overlays when necessary Putting an active block node on top of an inactive one is strictly speaking an invalid configuration and the next patch will turn it into a hard error. However, taking a snapshot while disk images are inactive after completing migration has an important use case: After migrating to a file, taking an external snapshot is what is needed to take a full VM snapshot. In order for this to keep working after the later patches, change creating a snapshot such that it automatically inactivates an overlay that is added on top of an already inactive node. Signed-off-by: Kevin Wolf Acked-by: Fabiano Rosas Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-ID: <20250204211407.381505-4-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- blockdev.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/blockdev.c b/blockdev.c index 218024497b1e..eb2517f1dde6 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1497,6 +1497,22 @@ static void external_snapshot_action(TransactionAction *action, return; } + /* + * Older QEMU versions have allowed adding an active parent node to an + * inactive child node. This is unsafe in the general case, but there is an + * important use case, which is taking a VM snapshot with migration to file + * and then adding an external snapshot while the VM is still stopped and + * images are inactive. Requiring the user to explicitly create the overlay + * as inactive would break compatibility, so just do it automatically here + * to keep this working. + */ + if (bdrv_is_inactive(state->old_bs) && !bdrv_is_inactive(state->new_bs)) { + ret = bdrv_inactivate(state->new_bs, errp); + if (ret < 0) { + return; + } + } + ret = bdrv_append(state->new_bs, state->old_bs, errp); if (ret < 0) { return; From c2a189976e211c9ff782538d5a5ed5e5cffeccd6 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 4 Feb 2025 22:13:55 +0100 Subject: [PATCH 12/25] migration/block-active: Remove global active flag Block devices have an individual active state, a single global flag can't cover this correctly. This becomes more important as we allow users to manually manage which nodes are active or inactive. Now that it's allowed to call bdrv_inactivate_all() even when some nodes are already inactive, we can remove the flag and just unconditionally call bdrv_inactivate_all() and, more importantly, bdrv_activate_all() before we make use of the nodes. Signed-off-by: Kevin Wolf Acked-by: Fabiano Rosas Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-ID: <20250204211407.381505-5-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- migration/block-active.c | 46 ---------------------------------------- migration/migration.c | 8 ------- migration/migration.h | 3 --- 3 files changed, 57 deletions(-) diff --git a/migration/block-active.c b/migration/block-active.c index d477cf81828c..40e986aadea4 100644 --- a/migration/block-active.c +++ b/migration/block-active.c @@ -12,51 +12,12 @@ #include "qemu/error-report.h" #include "trace.h" -/* - * Migration-only cache to remember the block layer activation status. - * Protected by BQL. - * - * We need this because.. - * - * - Migration can fail after block devices are invalidated (during - * switchover phase). When that happens, we need to be able to recover - * the block drive status by re-activating them. - * - * - Currently bdrv_inactivate_all() is not safe to be invoked on top of - * invalidated drives (even if bdrv_activate_all() is actually safe to be - * called any time!). It means remembering this could help migration to - * make sure it won't invalidate twice in a row, crashing QEMU. It can - * happen when we migrate a PAUSED VM from host1 to host2, then migrate - * again to host3 without starting it. TODO: a cleaner solution is to - * allow safe invoke of bdrv_inactivate_all() at anytime, like - * bdrv_activate_all(). - * - * For freshly started QEMU, the flag is initialized to TRUE reflecting the - * scenario where QEMU owns block device ownerships. - * - * For incoming QEMU taking a migration stream, the flag is initialized to - * FALSE reflecting that the incoming side doesn't own the block devices, - * not until switchover happens. - */ -static bool migration_block_active; - -/* Setup the disk activation status */ -void migration_block_active_setup(bool active) -{ - migration_block_active = active; -} - bool migration_block_activate(Error **errp) { ERRP_GUARD(); assert(bql_locked()); - if (migration_block_active) { - trace_migration_block_activation("active-skipped"); - return true; - } - trace_migration_block_activation("active"); bdrv_activate_all(errp); @@ -65,7 +26,6 @@ bool migration_block_activate(Error **errp) return false; } - migration_block_active = true; return true; } @@ -75,11 +35,6 @@ bool migration_block_inactivate(void) assert(bql_locked()); - if (!migration_block_active) { - trace_migration_block_activation("inactive-skipped"); - return true; - } - trace_migration_block_activation("inactive"); ret = bdrv_inactivate_all(); @@ -89,6 +44,5 @@ bool migration_block_inactivate(void) return false; } - migration_block_active = false; return true; } diff --git a/migration/migration.c b/migration/migration.c index 74c50cc72ca1..95e05bbc3baf 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1895,12 +1895,6 @@ void qmp_migrate_incoming(const char *uri, bool has_channels, return; } - /* - * Newly setup incoming QEMU. Mark the block active state to reflect - * that the src currently owns the disks. - */ - migration_block_active_setup(false); - once = false; } @@ -3992,8 +3986,6 @@ static void migration_instance_init(Object *obj) ms->state = MIGRATION_STATUS_NONE; ms->mbps = -1; ms->pages_per_second = -1; - /* Freshly started QEMU owns all the block devices */ - migration_block_active_setup(true); qemu_sem_init(&ms->pause_sem, 0); qemu_mutex_init(&ms->error_mutex); diff --git a/migration/migration.h b/migration/migration.h index 4c1fafc2b5a3..8b24cbedd130 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -554,7 +554,4 @@ void migration_bitmap_sync_precopy(bool last_stage); void dirty_bitmap_mig_init(void); bool should_send_vmdesc(void); -/* migration/block-active.c */ -void migration_block_active_setup(bool active); - #endif From 9b81361aedcc47905de5e91f68221de89c6f5467 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 4 Feb 2025 22:13:56 +0100 Subject: [PATCH 13/25] block: Don't attach inactive child to active node An active node makes unrestricted use of its children and would possibly run into assertion failures when it operates on an inactive child node. Signed-off-by: Kevin Wolf Acked-by: Fabiano Rosas Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-ID: <20250204211407.381505-6-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/block.c b/block.c index 9458c5e01318..66a99e87c5f5 100644 --- a/block.c +++ b/block.c @@ -3183,6 +3183,11 @@ bdrv_attach_child_noperm(BlockDriverState *parent_bs, child_bs->node_name, child_name, parent_bs->node_name); return NULL; } + if (bdrv_is_inactive(child_bs) && !bdrv_is_inactive(parent_bs)) { + error_setg(errp, "Inactive '%s' can't be a %s child of active '%s'", + child_bs->node_name, child_name, parent_bs->node_name); + return NULL; + } bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm); bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL, From 8c2c72a33581987af8d8c484d03af3cd69b9e10a Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 4 Feb 2025 22:13:57 +0100 Subject: [PATCH 14/25] block: Fix crash on block_resize on inactive node In order for block_resize to fail gracefully on an inactive node instead of crashing with an assertion failure in bdrv_co_write_req_prepare() (called from bdrv_co_truncate()), we need to check for inactive nodes also when they are attached as a root node and make sure that BLK_PERM_RESIZE isn't among the permissions allowed for inactive nodes. To this effect, don't enumerate the permissions that are incompatible with inactive nodes any more, but allow only BLK_PERM_CONSISTENT_READ for them. Signed-off-by: Kevin Wolf Acked-by: Fabiano Rosas Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-ID: <20250204211407.381505-7-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 7 +++++++ block/block-backend.c | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index 66a99e87c5f5..73d0de12cfe5 100644 --- a/block.c +++ b/block.c @@ -3077,6 +3077,13 @@ bdrv_attach_child_common(BlockDriverState *child_bs, assert(child_class->get_parent_desc); GLOBAL_STATE_CODE(); + if (bdrv_is_inactive(child_bs) && (perm & ~BLK_PERM_CONSISTENT_READ)) { + g_autofree char *perm_names = bdrv_perm_names(perm); + error_setg(errp, "Permission '%s' unavailable on inactive node", + perm_names); + return NULL; + } + new_child = g_new(BdrvChild, 1); *new_child = (BdrvChild) { .bs = NULL, diff --git a/block/block-backend.c b/block/block-backend.c index d093f01f897f..cc6f58ae78db 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -253,7 +253,7 @@ static bool blk_can_inactivate(BlockBackend *blk) * guest. For block job BBs that satisfy this, we can just allow * it. This is the case for mirror job source, which is required * by libvirt non-shared block migration. */ - if (!(blk->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED))) { + if (!(blk->perm & ~BLK_PERM_CONSISTENT_READ)) { return true; } From faecd16fe5c65a25b5b55b5edbe4322cec5a9d96 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 4 Feb 2025 22:13:58 +0100 Subject: [PATCH 15/25] block: Add option to create inactive nodes In QEMU, nodes are automatically created inactive while expecting an incoming migration (i.e. RUN_STATE_INMIGRATE). In qemu-storage-daemon, the notion of runstates doesn't exist. It also wouldn't necessarily make sense to introduce it because a single daemon can serve multiple VMs that can be in different states. Therefore, allow the user to explicitly open images as inactive with a new option. The default is as before: Nodes are usually active, except when created during RUN_STATE_INMIGRATE. Signed-off-by: Kevin Wolf Acked-by: Fabiano Rosas Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-ID: <20250204211407.381505-8-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 9 +++++++++ include/block/block-common.h | 1 + qapi/block-core.json | 6 ++++++ 3 files changed, 16 insertions(+) diff --git a/block.c b/block.c index 73d0de12cfe5..7f6eca392f70 100644 --- a/block.c +++ b/block.c @@ -1573,6 +1573,10 @@ static void update_flags_from_options(int *flags, QemuOpts *opts) if (qemu_opt_get_bool_del(opts, BDRV_OPT_AUTO_READ_ONLY, false)) { *flags |= BDRV_O_AUTO_RDONLY; } + + if (!qemu_opt_get_bool_del(opts, BDRV_OPT_ACTIVE, true)) { + *flags |= BDRV_O_INACTIVE; + } } static void update_options_from_flags(QDict *options, int flags) @@ -1799,6 +1803,11 @@ QemuOptsList bdrv_runtime_opts = { .type = QEMU_OPT_BOOL, .help = "Ignore flush requests", }, + { + .name = BDRV_OPT_ACTIVE, + .type = QEMU_OPT_BOOL, + .help = "Node is activated", + }, { .name = BDRV_OPT_READ_ONLY, .type = QEMU_OPT_BOOL, diff --git a/include/block/block-common.h b/include/block/block-common.h index 338fe5ff7a4e..7030669f0403 100644 --- a/include/block/block-common.h +++ b/include/block/block-common.h @@ -257,6 +257,7 @@ typedef enum { #define BDRV_OPT_AUTO_READ_ONLY "auto-read-only" #define BDRV_OPT_DISCARD "discard" #define BDRV_OPT_FORCE_SHARE "force-share" +#define BDRV_OPT_ACTIVE "active" #define BDRV_SECTOR_BITS 9 diff --git a/qapi/block-core.json b/qapi/block-core.json index 1296ca8ae2b3..6029e5488915 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -4683,6 +4683,11 @@ # # @cache: cache-related options # +# @active: whether the block node should be activated (default: true). +# Having inactive block nodes is useful primarily for migration because it +# allows opening an image on the destination while the source is still +# holding locks for it. (Since 10.0) +# # @read-only: whether the block device should be read-only (default: # false). Note that some block drivers support only read-only # access, either generally or in certain configurations. In this @@ -4709,6 +4714,7 @@ '*node-name': 'str', '*discard': 'BlockdevDiscardOptions', '*cache': 'BlockdevCacheOptions', + '*active': 'bool', '*read-only': 'bool', '*auto-read-only': 'bool', '*force-share': 'bool', From 8cd37207f8a90c5f995283ecf95f1cb5f7518a77 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 4 Feb 2025 22:13:59 +0100 Subject: [PATCH 16/25] block: Add blockdev-set-active QMP command The system emulator tries to automatically activate and inactivate block nodes at the right point during migration. However, there are still cases where it's necessary that the user can do this manually. Images are only activated on the destination VM of a migration when the VM is actually resumed. If the VM was paused, this doesn't happen automatically. The user may want to perform some operation on a block device (e.g. taking a snapshot or starting a block job) without also resuming the VM yet. This is an example where a manual command is necessary. Another example is VM migration when the image files are opened by an external qemu-storage-daemon instance on each side. In this case, the process that needs to hand over the images isn't even part of the migration and can't know when the migration completes. Management tools need a way to explicitly inactivate images on the source and activate them on the destination. This adds a new blockdev-set-active QMP command that lets the user change the status of individual nodes (this is necessary in qemu-storage-daemon because it could be serving multiple VMs and only one of them migrates at a time). For convenience, operating on all devices (like QEMU does automatically during migration) is offered as an option, too, and can be used in the context of single VM. Signed-off-by: Kevin Wolf Acked-by: Fabiano Rosas Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-ID: <20250204211407.381505-9-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 21 ++++++++++++++++++++ blockdev.c | 32 ++++++++++++++++++++++++++++++ include/block/block-global-state.h | 3 +++ qapi/block-core.json | 32 ++++++++++++++++++++++++++++++ 4 files changed, 88 insertions(+) diff --git a/block.c b/block.c index 7f6eca392f70..7eeb8d076e42 100644 --- a/block.c +++ b/block.c @@ -7052,6 +7052,27 @@ bdrv_inactivate_recurse(BlockDriverState *bs, bool top_level) return 0; } +int bdrv_inactivate(BlockDriverState *bs, Error **errp) +{ + int ret; + + GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + + if (bdrv_has_bds_parent(bs, true)) { + error_setg(errp, "Node has active parent node"); + return -EPERM; + } + + ret = bdrv_inactivate_recurse(bs, true); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to inactivate node"); + return ret; + } + + return 0; +} + int bdrv_inactivate_all(void) { BlockDriverState *bs = NULL; diff --git a/blockdev.c b/blockdev.c index eb2517f1dde6..7e0d433712f4 100644 --- a/blockdev.c +++ b/blockdev.c @@ -3471,6 +3471,38 @@ void qmp_blockdev_del(const char *node_name, Error **errp) bdrv_unref(bs); } +void qmp_blockdev_set_active(const char *node_name, bool active, Error **errp) +{ + int ret; + + GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + + if (!node_name) { + if (active) { + bdrv_activate_all(errp); + } else { + ret = bdrv_inactivate_all(); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to inactivate all nodes"); + } + } + } else { + BlockDriverState *bs = bdrv_find_node(node_name); + if (!bs) { + error_setg(errp, "Failed to find node with node-name='%s'", + node_name); + return; + } + + if (active) { + bdrv_activate(bs, errp); + } else { + bdrv_inactivate(bs, errp); + } + } +} + static BdrvChild * GRAPH_RDLOCK bdrv_find_child(BlockDriverState *parent_bs, const char *child_name) { diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h index a826bf5f78ae..9be34b3c990a 100644 --- a/include/block/block-global-state.h +++ b/include/block/block-global-state.h @@ -184,6 +184,9 @@ bdrv_activate(BlockDriverState *bs, Error **errp); int coroutine_fn no_co_wrapper_bdrv_rdlock bdrv_co_activate(BlockDriverState *bs, Error **errp); +int no_coroutine_fn +bdrv_inactivate(BlockDriverState *bs, Error **errp); + void bdrv_activate_all(Error **errp); int bdrv_inactivate_all(void); diff --git a/qapi/block-core.json b/qapi/block-core.json index 6029e5488915..ee6eccc68c81 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -4945,6 +4945,38 @@ { 'command': 'blockdev-del', 'data': { 'node-name': 'str' }, 'allow-preconfig': true } +## +# @blockdev-set-active: +# +# Activate or inactivate a block device. Use this to manage the handover of +# block devices on migration with qemu-storage-daemon. +# +# Activating a node automatically activates all of its child nodes first. +# Inactivating a node automatically inactivates any of its child nodes that are +# not in use by a still active node. +# +# @node-name: Name of the graph node to activate or inactivate. By default, all +# nodes are affected by the operation. +# +# @active: true if the nodes should be active when the command returns success, +# false if they should be inactive. +# +# Since: 10.0 +# +# .. qmp-example:: +# +# -> { "execute": "blockdev-set-active", +# "arguments": { +# "node-name": "node0", +# "active": false +# } +# } +# <- { "return": {} } +## +{ 'command': 'blockdev-set-active', + 'data': { '*node-name': 'str', 'active': 'bool' }, + 'allow-preconfig': true } + ## # @BlockdevCreateOptionsFile: # From c1c5c7cc4ef6c45ca769c640566fd40d2cb7d5c1 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 4 Feb 2025 22:14:00 +0100 Subject: [PATCH 17/25] block: Support inactive nodes in blk_insert_bs() Device models have a relatively complex way to set up their block backends, in which blk_attach_dev() sets blk->disable_perm = true. We want to support inactive images in exports, too, so that qemu-storage-daemon can be used with migration. Because they don't use blk_attach_dev(), they need another way to set this flag. The most convenient is to do this automatically when an inactive node is attached to a BlockBackend that can be inactivated. Signed-off-by: Kevin Wolf Acked-by: Fabiano Rosas Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-ID: <20250204211407.381505-10-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block/block-backend.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index cc6f58ae78db..9288f7e1c69e 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -900,14 +900,24 @@ void blk_remove_bs(BlockBackend *blk) int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) { ThrottleGroupMember *tgm = &blk->public.throttle_group_member; + uint64_t perm, shared_perm; GLOBAL_STATE_CODE(); bdrv_ref(bs); bdrv_graph_wrlock(); + + if ((bs->open_flags & BDRV_O_INACTIVE) && blk_can_inactivate(blk)) { + blk->disable_perm = true; + perm = 0; + shared_perm = BLK_PERM_ALL; + } else { + perm = blk->perm; + shared_perm = blk->shared_perm; + } + blk->root = bdrv_root_attach_child(bs, "root", &child_root, BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, - blk->perm, blk->shared_perm, - blk, errp); + perm, shared_perm, blk, errp); bdrv_graph_wrunlock(); if (blk->root == NULL) { return -EPERM; From 69f28176ca0af850db23a1c6364f0c8525b20801 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 4 Feb 2025 22:14:01 +0100 Subject: [PATCH 18/25] block/export: Don't ignore image activation error in blk_exp_add() Currently, block exports can't handle inactive images correctly. Incoming write requests would run into assertion failures. Make sure that we return an error when creating an export can't activate the image. Signed-off-by: Kevin Wolf Acked-by: Fabiano Rosas Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-ID: <20250204211407.381505-11-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block/export/export.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/block/export/export.c b/block/export/export.c index 79c71ee24566..bac42b86082d 100644 --- a/block/export/export.c +++ b/block/export/export.c @@ -145,7 +145,11 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) * ctx was acquired in the caller. */ bdrv_graph_rdlock_main_loop(); - bdrv_activate(bs, NULL); + ret = bdrv_activate(bs, errp); + if (ret < 0) { + bdrv_graph_rdunlock_main_loop(); + goto fail; + } bdrv_graph_rdunlock_main_loop(); perm = BLK_PERM_CONSISTENT_READ; From 2849092a0024405e74c96f0a5ec41bb182ec8538 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 4 Feb 2025 22:14:02 +0100 Subject: [PATCH 19/25] block: Drain nodes before inactivating them So far the assumption has always been that if we try to inactivate a node, it is already idle. This doesn't hold true any more if we allow inactivating exported nodes because we can't know when new external requests come in. Drain the node around setting BDRV_O_INACTIVE so that requests can't start operating on an active node and then in the middle it suddenly becomes inactive. With this change, it's enough for exports to check for new requests that they operate on an active node (or, like reads, are allowed even on an inactive node). Signed-off-by: Kevin Wolf Acked-by: Fabiano Rosas Message-ID: <20250204211407.381505-12-kwolf@redhat.com> Reviewed-by: Stefan Hajnoczi Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block.c b/block.c index 7eeb8d076e42..1601b25f66d2 100644 --- a/block.c +++ b/block.c @@ -7032,7 +7032,9 @@ bdrv_inactivate_recurse(BlockDriverState *bs, bool top_level) return -EPERM; } + bdrv_drained_begin(bs); bs->open_flags |= BDRV_O_INACTIVE; + bdrv_drained_end(bs); /* * Update permissions, they may differ for inactive nodes. From 1600ef01ab1296ca8230daa6bc41ba983751f646 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 4 Feb 2025 22:14:03 +0100 Subject: [PATCH 20/25] block/export: Add option to allow export of inactive nodes Add an option in BlockExportOptions to allow creating an export on an inactive node without activating the node. This mode needs to be explicitly supported by the export type (so that it doesn't perform any operations that are forbidden for inactive nodes), so this patch alone doesn't allow this option to be successfully used yet. Signed-off-by: Kevin Wolf Acked-by: Fabiano Rosas Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-ID: <20250204211407.381505-13-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block/export/export.c | 31 +++++++++++++++++++++---------- include/block/export.h | 3 +++ qapi/block-export.json | 10 +++++++++- 3 files changed, 33 insertions(+), 11 deletions(-) diff --git a/block/export/export.c b/block/export/export.c index bac42b86082d..f3bbf11070d5 100644 --- a/block/export/export.c +++ b/block/export/export.c @@ -75,6 +75,7 @@ static const BlockExportDriver *blk_exp_find_driver(BlockExportType type) BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) { bool fixed_iothread = export->has_fixed_iothread && export->fixed_iothread; + bool allow_inactive = export->has_allow_inactive && export->allow_inactive; const BlockExportDriver *drv; BlockExport *exp = NULL; BlockDriverState *bs; @@ -138,17 +139,24 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) } } - /* - * Block exports are used for non-shared storage migration. Make sure - * that BDRV_O_INACTIVE is cleared and the image is ready for write - * access since the export could be available before migration handover. - * ctx was acquired in the caller. - */ bdrv_graph_rdlock_main_loop(); - ret = bdrv_activate(bs, errp); - if (ret < 0) { - bdrv_graph_rdunlock_main_loop(); - goto fail; + if (allow_inactive) { + if (!drv->supports_inactive) { + error_setg(errp, "Export type does not support inactive exports"); + bdrv_graph_rdunlock_main_loop(); + goto fail; + } + } else { + /* + * Block exports are used for non-shared storage migration. Make sure + * that BDRV_O_INACTIVE is cleared and the image is ready for write + * access since the export could be available before migration handover. + */ + ret = bdrv_activate(bs, errp); + if (ret < 0) { + bdrv_graph_rdunlock_main_loop(); + goto fail; + } } bdrv_graph_rdunlock_main_loop(); @@ -162,6 +170,9 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) if (!fixed_iothread) { blk_set_allow_aio_context_change(blk, true); } + if (allow_inactive) { + blk_set_force_allow_inactivate(blk); + } ret = blk_insert_bs(blk, bs, errp); if (ret < 0) { diff --git a/include/block/export.h b/include/block/export.h index f2fe0f8078f3..4bd9531d4d9c 100644 --- a/include/block/export.h +++ b/include/block/export.h @@ -29,6 +29,9 @@ typedef struct BlockExportDriver { */ size_t instance_size; + /* True if the export type supports running on an inactive node */ + bool supports_inactive; + /* Creates and starts a new block export */ int (*create)(BlockExport *, BlockExportOptions *, Error **); diff --git a/qapi/block-export.json b/qapi/block-export.json index ce33fe378df9..117b05d13cbe 100644 --- a/qapi/block-export.json +++ b/qapi/block-export.json @@ -372,6 +372,13 @@ # cannot be moved to the iothread. The default is false. # (since: 5.2) # +# @allow-inactive: If true, the export allows the exported node to be inactive. +# If it is created for an inactive block node, the node remains inactive. If +# the export type doesn't support running on an inactive node, an error is +# returned. If false, inactive block nodes are automatically activated before +# creating the export and trying to inactivate them later fails. +# (since: 10.0; default: false) +# # Since: 4.2 ## { 'union': 'BlockExportOptions', @@ -381,7 +388,8 @@ '*iothread': 'str', 'node-name': 'str', '*writable': 'bool', - '*writethrough': 'bool' }, + '*writethrough': 'bool', + '*allow-inactive': 'bool' }, 'discriminator': 'type', 'data': { 'nbd': 'BlockExportOptionsNbd', From 2e73a17c68f4d80023dc616e596e8c1f3ea8dd75 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 4 Feb 2025 22:14:04 +0100 Subject: [PATCH 21/25] nbd/server: Support inactive nodes In order to support running an NBD export on inactive nodes, we must make sure to return errors for any operations that aren't allowed on inactive nodes. Reads are the only operation we know we need for inactive images, so to err on the side of caution, return errors for everything else, even if some operations could possibly be okay. Signed-off-by: Kevin Wolf Acked-by: Fabiano Rosas Message-ID: <20250204211407.381505-14-kwolf@redhat.com> Reviewed-by: Stefan Hajnoczi Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- nbd/server.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/nbd/server.c b/nbd/server.c index f64e47270c0b..2076fb2666bc 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -2026,6 +2026,7 @@ static void nbd_export_delete(BlockExport *blk_exp) const BlockExportDriver blk_exp_nbd = { .type = BLOCK_EXPORT_TYPE_NBD, .instance_size = sizeof(NBDExport), + .supports_inactive = true, .create = nbd_export_create, .delete = nbd_export_delete, .request_shutdown = nbd_export_request_shutdown, @@ -2920,6 +2921,22 @@ static coroutine_fn int nbd_handle_request(NBDClient *client, NBDExport *exp = client->exp; char *msg; size_t i; + bool inactive; + + WITH_GRAPH_RDLOCK_GUARD() { + inactive = bdrv_is_inactive(blk_bs(exp->common.blk)); + if (inactive) { + switch (request->type) { + case NBD_CMD_READ: + /* These commands are allowed on inactive nodes */ + break; + default: + /* Return an error for the rest */ + return nbd_send_generic_reply(client, request, -EPERM, + "export is inactive", errp); + } + } + } switch (request->type) { case NBD_CMD_CACHE: From ed26db83673f4a190332d2a378e2f6e342b8904d Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 4 Feb 2025 22:14:05 +0100 Subject: [PATCH 22/25] iotests: Add filter_qtest() The open-coded form of this filter has been copied into enough tests that it's better to move it into iotests.py. Signed-off-by: Kevin Wolf Acked-by: Fabiano Rosas Reviewed-by: Eric Blake Message-ID: <20250204211407.381505-15-kwolf@redhat.com> Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- tests/qemu-iotests/041 | 4 +--- tests/qemu-iotests/165 | 4 +--- tests/qemu-iotests/iotests.py | 4 ++++ tests/qemu-iotests/tests/copy-before-write | 3 +-- tests/qemu-iotests/tests/migrate-bitmaps-test | 7 +++---- 5 files changed, 10 insertions(+), 12 deletions(-) diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 index 98d17b138835..8452845f448b 100755 --- a/tests/qemu-iotests/041 +++ b/tests/qemu-iotests/041 @@ -1100,10 +1100,8 @@ class TestRepairQuorum(iotests.QMPTestCase): # Check the full error message now self.vm.shutdown() - log = self.vm.get_log() - log = re.sub(r'^\[I \d+\.\d+\] OPENED\n', '', log) + log = iotests.filter_qtest(self.vm.get_log()) log = re.sub(r'^Formatting.*\n', '', log) - log = re.sub(r'\n\[I \+\d+\.\d+\] CLOSED\n?$', '', log) log = re.sub(r'^%s: ' % os.path.basename(iotests.qemu_prog), '', log) self.assertEqual(log, diff --git a/tests/qemu-iotests/165 b/tests/qemu-iotests/165 index b24907a62f37..b3b1709d71fa 100755 --- a/tests/qemu-iotests/165 +++ b/tests/qemu-iotests/165 @@ -82,9 +82,7 @@ class TestPersistentDirtyBitmap(iotests.QMPTestCase): self.vm.shutdown() #catch 'Persistent bitmaps are lost' possible error - log = self.vm.get_log() - log = re.sub(r'^\[I \d+\.\d+\] OPENED\n', '', log) - log = re.sub(r'\[I \+\d+\.\d+\] CLOSED\n?$', '', log) + log = iotests.filter_qtest(self.vm.get_log()) if log: print(log) diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 19817c735305..9c9c908983c7 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -701,6 +701,10 @@ def _filter(_key, value): def filter_nbd_exports(output: str) -> str: return re.sub(r'((min|opt|max) block): [0-9]+', r'\1: XXX', output) +def filter_qtest(output: str) -> str: + output = re.sub(r'^\[I \d+\.\d+\] OPENED\n', '', output) + output = re.sub(r'\n?\[I \+\d+\.\d+\] CLOSED\n?$', '', output) + return output Msg = TypeVar('Msg', Dict[str, Any], List[Any], str) diff --git a/tests/qemu-iotests/tests/copy-before-write b/tests/qemu-iotests/tests/copy-before-write index d33bea577db1..498c55800853 100755 --- a/tests/qemu-iotests/tests/copy-before-write +++ b/tests/qemu-iotests/tests/copy-before-write @@ -95,8 +95,7 @@ class TestCbwError(iotests.QMPTestCase): self.vm.shutdown() log = self.vm.get_log() - log = re.sub(r'^\[I \d+\.\d+\] OPENED\n', '', log) - log = re.sub(r'\[I \+\d+\.\d+\] CLOSED\n?$', '', log) + log = iotests.filter_qtest(log) log = iotests.filter_qemu_io(log) return log diff --git a/tests/qemu-iotests/tests/migrate-bitmaps-test b/tests/qemu-iotests/tests/migrate-bitmaps-test index f98e721e97db..8fb4099201de 100755 --- a/tests/qemu-iotests/tests/migrate-bitmaps-test +++ b/tests/qemu-iotests/tests/migrate-bitmaps-test @@ -122,11 +122,10 @@ class TestDirtyBitmapMigration(iotests.QMPTestCase): # catch 'Could not reopen qcow2 layer: Bitmap already exists' # possible error - log = self.vm_a.get_log() - log = re.sub(r'^\[I \d+\.\d+\] OPENED\n', '', log) - log = re.sub(r'^(wrote .* bytes at offset .*\n.*KiB.*ops.*sec.*\n){3}', + log = iotests.filter_qtest(self.vm_a.get_log()) + log = re.sub(r'^(wrote .* bytes at offset .*\n' + r'.*KiB.*ops.*sec.*\n?){3}', '', log) - log = re.sub(r'\[I \+\d+\.\d+\] CLOSED\n?$', '', log) self.assertEqual(log, '') # test that bitmap is still persistent From 3ea437ab3d561ca79b95a34c5128e370de4738e3 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 4 Feb 2025 22:14:06 +0100 Subject: [PATCH 23/25] iotests: Add qsd-migrate case Test that it's possible to migrate a VM that uses an image on shared storage through qemu-storage-daemon. Signed-off-by: Kevin Wolf Acked-by: Fabiano Rosas Reviewed-by: Eric Blake Message-ID: <20250204211407.381505-16-kwolf@redhat.com> Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- tests/qemu-iotests/tests/qsd-migrate | 140 +++++++++++++++++++++++ tests/qemu-iotests/tests/qsd-migrate.out | 59 ++++++++++ 2 files changed, 199 insertions(+) create mode 100755 tests/qemu-iotests/tests/qsd-migrate create mode 100644 tests/qemu-iotests/tests/qsd-migrate.out diff --git a/tests/qemu-iotests/tests/qsd-migrate b/tests/qemu-iotests/tests/qsd-migrate new file mode 100755 index 000000000000..de17562cb06a --- /dev/null +++ b/tests/qemu-iotests/tests/qsd-migrate @@ -0,0 +1,140 @@ +#!/usr/bin/env python3 +# group: rw quick +# +# Copyright (C) Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# Creator/Owner: Kevin Wolf + +import iotests + +from iotests import filter_qemu_io, filter_qtest + +iotests.script_initialize(supported_fmts=['generic'], + supported_protocols=['file'], + supported_platforms=['linux']) + +with iotests.FilePath('disk.img') as path, \ + iotests.FilePath('nbd-src.sock', base_dir=iotests.sock_dir) as nbd_src, \ + iotests.FilePath('nbd-dst.sock', base_dir=iotests.sock_dir) as nbd_dst, \ + iotests.FilePath('migrate.sock', base_dir=iotests.sock_dir) as mig_sock, \ + iotests.VM(path_suffix="-src") as vm_src, \ + iotests.VM(path_suffix="-dst") as vm_dst: + + img_size = '10M' + + iotests.log('Preparing disk...') + iotests.qemu_img_create('-f', iotests.imgfmt, path, img_size) + + iotests.log('Launching source QSD...') + qsd_src = iotests.QemuStorageDaemon( + '--blockdev', f'file,node-name=disk-file,filename={path}', + '--blockdev', f'{iotests.imgfmt},file=disk-file,node-name=disk-fmt', + '--nbd-server', f'addr.type=unix,addr.path={nbd_src}', + '--export', 'nbd,id=exp0,node-name=disk-fmt,writable=true,' + 'allow-inactive=true', + qmp=True, + ) + + iotests.log('Launching source VM...') + vm_src.add_args('-blockdev', f'nbd,node-name=disk,server.type=unix,' + f'server.path={nbd_src},export=disk-fmt') + vm_src.add_args('-device', 'virtio-blk,drive=disk,id=virtio0') + vm_src.launch() + + iotests.log('Launching destination QSD...') + qsd_dst = iotests.QemuStorageDaemon( + '--blockdev', f'file,node-name=disk-file,filename={path},active=off', + '--blockdev', f'{iotests.imgfmt},file=disk-file,node-name=disk-fmt,' + f'active=off', + '--nbd-server', f'addr.type=unix,addr.path={nbd_dst}', + '--export', 'nbd,id=exp0,node-name=disk-fmt,writable=true,' + 'allow-inactive=true', + qmp=True, + instance_id='b', + ) + + iotests.log('Launching destination VM...') + vm_dst.add_args('-blockdev', f'nbd,node-name=disk,server.type=unix,' + f'server.path={nbd_dst},export=disk-fmt') + vm_dst.add_args('-device', 'virtio-blk,drive=disk,id=virtio0') + vm_dst.add_args('-incoming', f'unix:{mig_sock}') + vm_dst.launch() + + iotests.log('\nTest I/O on the source') + vm_src.hmp_qemu_io('virtio0/virtio-backend', 'write -P 0x11 0 4k', + use_log=True, qdev=True) + vm_src.hmp_qemu_io('virtio0/virtio-backend', 'read -P 0x11 0 4k', + use_log=True, qdev=True) + + iotests.log('\nStarting migration...') + + mig_caps = [ + {'capability': 'events', 'state': True}, + {'capability': 'pause-before-switchover', 'state': True}, + ] + vm_src.qmp_log('migrate-set-capabilities', capabilities=mig_caps) + vm_dst.qmp_log('migrate-set-capabilities', capabilities=mig_caps) + vm_src.qmp_log('migrate', uri=f'unix:{mig_sock}', + filters=[iotests.filter_qmp_testfiles]) + + vm_src.event_wait('MIGRATION', + match={'data': {'status': 'pre-switchover'}}) + + iotests.log('\nPre-switchover: Reconfigure QSD instances') + + iotests.log(qsd_src.qmp('blockdev-set-active', {'active': False})) + + # Reading is okay from both sides while the image is inactive. Note that + # the destination may have stale data until it activates the image, though. + vm_src.hmp_qemu_io('virtio0/virtio-backend', 'read -P 0x11 0 4k', + use_log=True, qdev=True) + vm_dst.hmp_qemu_io('virtio0/virtio-backend', 'read 0 4k', + use_log=True, qdev=True) + + iotests.log(qsd_dst.qmp('blockdev-set-active', {'active': True})) + + iotests.log('\nCompleting migration...') + + vm_src.qmp_log('migrate-continue', state='pre-switchover') + vm_dst.event_wait('MIGRATION', match={'data': {'status': 'completed'}}) + + iotests.log('\nTest I/O on the destination') + + # Now the destination must see what the source wrote + vm_dst.hmp_qemu_io('virtio0/virtio-backend', 'read -P 0x11 0 4k', + use_log=True, qdev=True) + + # And be able to overwrite it + vm_dst.hmp_qemu_io('virtio0/virtio-backend', 'write -P 0x22 0 4k', + use_log=True, qdev=True) + vm_dst.hmp_qemu_io('virtio0/virtio-backend', 'read -P 0x22 0 4k', + use_log=True, qdev=True) + + iotests.log('\nDone') + + vm_src.shutdown() + iotests.log('\n--- vm_src log ---') + log = vm_src.get_log() + if log: + iotests.log(log, [filter_qtest, filter_qemu_io]) + qsd_src.stop() + + vm_dst.shutdown() + iotests.log('\n--- vm_dst log ---') + log = vm_dst.get_log() + if log: + iotests.log(log, [filter_qtest, filter_qemu_io]) + qsd_dst.stop() diff --git a/tests/qemu-iotests/tests/qsd-migrate.out b/tests/qemu-iotests/tests/qsd-migrate.out new file mode 100644 index 000000000000..4a5241e5d40f --- /dev/null +++ b/tests/qemu-iotests/tests/qsd-migrate.out @@ -0,0 +1,59 @@ +Preparing disk... +Launching source QSD... +Launching source VM... +Launching destination QSD... +Launching destination VM... + +Test I/O on the source +{"execute": "human-monitor-command", "arguments": {"command-line": "qemu-io -d virtio0/virtio-backend \"write -P 0x11 0 4k\""}} +{"return": ""} +{"execute": "human-monitor-command", "arguments": {"command-line": "qemu-io -d virtio0/virtio-backend \"read -P 0x11 0 4k\""}} +{"return": ""} + +Starting migration... +{"execute": "migrate-set-capabilities", "arguments": {"capabilities": [{"capability": "events", "state": true}, {"capability": "pause-before-switchover", "state": true}]}} +{"return": {}} +{"execute": "migrate-set-capabilities", "arguments": {"capabilities": [{"capability": "events", "state": true}, {"capability": "pause-before-switchover", "state": true}]}} +{"return": {}} +{"execute": "migrate", "arguments": {"uri": "unix:SOCK_DIR/PID-migrate.sock"}} +{"return": {}} + +Pre-switchover: Reconfigure QSD instances +{"return": {}} +{"execute": "human-monitor-command", "arguments": {"command-line": "qemu-io -d virtio0/virtio-backend \"read -P 0x11 0 4k\""}} +{"return": ""} +{"execute": "human-monitor-command", "arguments": {"command-line": "qemu-io -d virtio0/virtio-backend \"read 0 4k\""}} +{"return": ""} +{"return": {}} + +Completing migration... +{"execute": "migrate-continue", "arguments": {"state": "pre-switchover"}} +{"return": {}} + +Test I/O on the destination +{"execute": "human-monitor-command", "arguments": {"command-line": "qemu-io -d virtio0/virtio-backend \"read -P 0x11 0 4k\""}} +{"return": ""} +{"execute": "human-monitor-command", "arguments": {"command-line": "qemu-io -d virtio0/virtio-backend \"write -P 0x22 0 4k\""}} +{"return": ""} +{"execute": "human-monitor-command", "arguments": {"command-line": "qemu-io -d virtio0/virtio-backend \"read -P 0x22 0 4k\""}} +{"return": ""} + +Done + +--- vm_src log --- +wrote 4096/4096 bytes at offset 0 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 4096/4096 bytes at offset 0 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 4096/4096 bytes at offset 0 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +--- vm_dst log --- +read 4096/4096 bytes at offset 0 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 4096/4096 bytes at offset 0 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 4096/4096 bytes at offset 0 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 4096/4096 bytes at offset 0 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) From bbf105ef3cc48fff282789e9bf56b7a81e1407bd Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 4 Feb 2025 22:14:07 +0100 Subject: [PATCH 24/25] iotests: Add (NBD-based) tests for inactive nodes This tests different types of operations on inactive block nodes (including graph changes, block jobs and NBD exports) to make sure that users manually activating and inactivating nodes doesn't break things. Support for inactive nodes in other export types will have to come with separate test cases because they have different dependencies like blkio or root permissions and we don't want to disable this basic test when they are not fulfilled. Signed-off-by: Kevin Wolf Acked-by: Fabiano Rosas Message-ID: <20250204211407.381505-17-kwolf@redhat.com> Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- tests/qemu-iotests/iotests.py | 4 + tests/qemu-iotests/tests/inactive-node-nbd | 303 ++++++++++++++++++ .../qemu-iotests/tests/inactive-node-nbd.out | 239 ++++++++++++++ 3 files changed, 546 insertions(+) create mode 100755 tests/qemu-iotests/tests/inactive-node-nbd create mode 100644 tests/qemu-iotests/tests/inactive-node-nbd.out diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 9c9c908983c7..7292c8b342ad 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -913,6 +913,10 @@ def add_incoming(self, addr): self._args.append(addr) return self + def add_paused(self): + self._args.append('-S') + return self + def hmp(self, command_line: str, use_log: bool = False) -> QMPMessage: cmd = 'human-monitor-command' kwargs: Dict[str, Any] = {'command-line': command_line} diff --git a/tests/qemu-iotests/tests/inactive-node-nbd b/tests/qemu-iotests/tests/inactive-node-nbd new file mode 100755 index 000000000000..a95b37e79625 --- /dev/null +++ b/tests/qemu-iotests/tests/inactive-node-nbd @@ -0,0 +1,303 @@ +#!/usr/bin/env python3 +# group: rw quick +# +# Copyright (C) Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# Creator/Owner: Kevin Wolf + +import iotests + +from iotests import QemuIoInteractive +from iotests import filter_qemu_io, filter_qtest, filter_qmp_testfiles + +iotests.script_initialize(supported_fmts=['generic'], + supported_protocols=['file'], + supported_platforms=['linux']) + +def get_export(node_name='disk-fmt', allow_inactive=None): + exp = { + 'id': 'exp0', + 'type': 'nbd', + 'node-name': node_name, + 'writable': True, + } + + if allow_inactive is not None: + exp['allow-inactive'] = allow_inactive + + return exp + +def node_is_active(_vm, node_name): + nodes = _vm.cmd('query-named-block-nodes', flat=True) + node = next(n for n in nodes if n['node-name'] == node_name) + return node['active'] + +with iotests.FilePath('disk.img') as path, \ + iotests.FilePath('snap.qcow2') as snap_path, \ + iotests.FilePath('snap2.qcow2') as snap2_path, \ + iotests.FilePath('target.img') as target_path, \ + iotests.FilePath('nbd.sock', base_dir=iotests.sock_dir) as nbd_sock, \ + iotests.VM() as vm: + + img_size = '10M' + + iotests.log('Preparing disk...') + iotests.qemu_img_create('-f', iotests.imgfmt, path, img_size) + iotests.qemu_img_create('-f', iotests.imgfmt, target_path, img_size) + + iotests.qemu_img_create('-f', 'qcow2', '-b', path, '-F', iotests.imgfmt, + snap_path) + iotests.qemu_img_create('-f', 'qcow2', '-b', snap_path, '-F', 'qcow2', + snap2_path) + + iotests.log('Launching VM...') + vm.add_blockdev(f'file,node-name=disk-file,filename={path}') + vm.add_blockdev(f'{iotests.imgfmt},file=disk-file,node-name=disk-fmt,' + 'active=off') + vm.add_blockdev(f'file,node-name=target-file,filename={target_path}') + vm.add_blockdev(f'{iotests.imgfmt},file=target-file,node-name=target-fmt') + vm.add_blockdev(f'file,node-name=snap-file,filename={snap_path}') + vm.add_blockdev(f'file,node-name=snap2-file,filename={snap2_path}') + + # Actually running the VM activates all images + vm.add_paused() + + vm.launch() + vm.qmp_log('nbd-server-start', + addr={'type': 'unix', 'data':{'path': nbd_sock}}, + filters=[filter_qmp_testfiles]) + + iotests.log('\n=== Creating export of inactive node ===') + + iotests.log('\nExports activate nodes without allow-inactive') + iotests.log('disk-fmt active: %s' % node_is_active(vm, 'disk-fmt')) + vm.qmp_log('block-export-add', **get_export()) + iotests.log('disk-fmt active: %s' % node_is_active(vm, 'disk-fmt')) + vm.qmp_log('query-block-exports') + vm.qmp_log('block-export-del', id='exp0') + vm.event_wait('BLOCK_EXPORT_DELETED') + vm.qmp_log('query-block-exports') + + iotests.log('\nExports activate nodes with allow-inactive=false') + vm.qmp_log('blockdev-set-active', node_name='disk-fmt', active=False) + iotests.log('disk-fmt active: %s' % node_is_active(vm, 'disk-fmt')) + vm.qmp_log('block-export-add', **get_export(allow_inactive=False)) + iotests.log('disk-fmt active: %s' % node_is_active(vm, 'disk-fmt')) + vm.qmp_log('query-block-exports') + vm.qmp_log('block-export-del', id='exp0') + vm.event_wait('BLOCK_EXPORT_DELETED') + vm.qmp_log('query-block-exports') + + iotests.log('\nExport leaves nodes inactive with allow-inactive=true') + vm.qmp_log('blockdev-set-active', node_name='disk-fmt', active=False) + iotests.log('disk-fmt active: %s' % node_is_active(vm, 'disk-fmt')) + vm.qmp_log('block-export-add', **get_export(allow_inactive=True)) + iotests.log('disk-fmt active: %s' % node_is_active(vm, 'disk-fmt')) + vm.qmp_log('query-block-exports') + vm.qmp_log('block-export-del', id='exp0') + vm.event_wait('BLOCK_EXPORT_DELETED') + vm.qmp_log('query-block-exports') + + iotests.log('\n=== Inactivating node with existing export ===') + + iotests.log('\nInactivating nodes with an export fails without ' + 'allow-inactive') + vm.qmp_log('blockdev-set-active', node_name='disk-fmt', active=True) + vm.qmp_log('block-export-add', **get_export(node_name='disk-fmt')) + vm.qmp_log('blockdev-set-active', node_name='disk-fmt', active=False) + iotests.log('disk-fmt active: %s' % node_is_active(vm, 'disk-fmt')) + vm.qmp_log('query-block-exports') + vm.qmp_log('block-export-del', id='exp0') + vm.event_wait('BLOCK_EXPORT_DELETED') + vm.qmp_log('query-block-exports') + + iotests.log('\nInactivating nodes with an export fails with ' + 'allow-inactive=false') + vm.qmp_log('blockdev-set-active', node_name='disk-fmt', active=True) + vm.qmp_log('block-export-add', + **get_export(node_name='disk-fmt', allow_inactive=False)) + vm.qmp_log('blockdev-set-active', node_name='disk-fmt', active=False) + iotests.log('disk-fmt active: %s' % node_is_active(vm, 'disk-fmt')) + vm.qmp_log('query-block-exports') + vm.qmp_log('block-export-del', id='exp0') + vm.event_wait('BLOCK_EXPORT_DELETED') + vm.qmp_log('query-block-exports') + + iotests.log('\nInactivating nodes with an export works with ' + 'allow-inactive=true') + vm.qmp_log('blockdev-set-active', node_name='disk-fmt', active=True) + vm.qmp_log('block-export-add', + **get_export(node_name='disk-fmt', allow_inactive=True)) + vm.qmp_log('blockdev-set-active', node_name='disk-fmt', active=False) + iotests.log('disk-fmt active: %s' % node_is_active(vm, 'disk-fmt')) + vm.qmp_log('query-block-exports') + vm.qmp_log('block-export-del', id='exp0') + vm.event_wait('BLOCK_EXPORT_DELETED') + vm.qmp_log('query-block-exports') + + iotests.log('\n=== Inactive nodes with parent ===') + + iotests.log('\nInactivating nodes with an active parent fails') + vm.qmp_log('blockdev-set-active', node_name='disk-fmt', active=True) + vm.qmp_log('blockdev-set-active', node_name='disk-file', active=False) + iotests.log('disk-file active: %s' % node_is_active(vm, 'disk-file')) + iotests.log('disk-fmt active: %s' % node_is_active(vm, 'disk-fmt')) + + iotests.log('\nInactivating nodes with an inactive parent works') + vm.qmp_log('blockdev-set-active', node_name='disk-fmt', active=False) + vm.qmp_log('blockdev-set-active', node_name='disk-file', active=False) + iotests.log('disk-file active: %s' % node_is_active(vm, 'disk-file')) + iotests.log('disk-fmt active: %s' % node_is_active(vm, 'disk-fmt')) + + iotests.log('\nCreating active parent node with an inactive child fails') + vm.qmp_log('blockdev-add', driver='raw', file='disk-fmt', + node_name='disk-filter') + vm.qmp_log('blockdev-add', driver='raw', file='disk-fmt', + node_name='disk-filter', active=True) + + iotests.log('\nCreating inactive parent node with an inactive child works') + vm.qmp_log('blockdev-add', driver='raw', file='disk-fmt', + node_name='disk-filter', active=False) + vm.qmp_log('blockdev-del', node_name='disk-filter') + + iotests.log('\n=== Resizing an inactive node ===') + vm.qmp_log('block_resize', node_name='disk-fmt', size=16*1024*1024) + + iotests.log('\n=== Taking a snapshot of an inactive node ===') + + iotests.log('\nActive overlay over inactive backing file automatically ' + 'makes both inactive for compatibility') + vm.qmp_log('blockdev-add', driver='qcow2', node_name='snap-fmt', + file='snap-file', backing=None) + iotests.log('disk-fmt active: %s' % node_is_active(vm, 'disk-fmt')) + iotests.log('snap-fmt active: %s' % node_is_active(vm, 'snap-fmt')) + vm.qmp_log('blockdev-snapshot', node='disk-fmt', overlay='snap-fmt') + iotests.log('disk-fmt active: %s' % node_is_active(vm, 'disk-fmt')) + iotests.log('snap-fmt active: %s' % node_is_active(vm, 'snap-fmt')) + vm.qmp_log('blockdev-del', node_name='snap-fmt') + + iotests.log('\nInactive overlay over inactive backing file just works') + vm.qmp_log('blockdev-add', driver='qcow2', node_name='snap-fmt', + file='snap-file', backing=None, active=False) + vm.qmp_log('blockdev-snapshot', node='disk-fmt', overlay='snap-fmt') + + iotests.log('\n=== Block jobs with inactive nodes ===') + + iotests.log('\nStreaming into an inactive node') + vm.qmp_log('block-stream', device='snap-fmt', + filters=[iotests.filter_qmp_generated_node_ids]) + + iotests.log('\nCommitting an inactive root node (active commit)') + vm.qmp_log('block-commit', job_id='job0', device='snap-fmt', + filters=[iotests.filter_qmp_generated_node_ids]) + + iotests.log('\nCommitting an inactive intermediate node to inactive base') + vm.qmp_log('blockdev-add', driver='qcow2', node_name='snap2-fmt', + file='snap2-file', backing='snap-fmt', active=False) + + iotests.log('disk-fmt active: %s' % node_is_active(vm, 'disk-fmt')) + iotests.log('snap-fmt active: %s' % node_is_active(vm, 'snap-fmt')) + iotests.log('snap2-fmt active: %s' % node_is_active(vm, 'snap2-fmt')) + + vm.qmp_log('block-commit', job_id='job0', device='snap2-fmt', + top_node='snap-fmt', + filters=[iotests.filter_qmp_generated_node_ids]) + + iotests.log('\nCommitting an inactive intermediate node to active base') + vm.qmp_log('blockdev-set-active', node_name='disk-fmt', active=True) + vm.qmp_log('block-commit', job_id='job0', device='snap2-fmt', + top_node='snap-fmt', + filters=[iotests.filter_qmp_generated_node_ids]) + + iotests.log('\nMirror from inactive source to active target') + vm.qmp_log('blockdev-mirror', job_id='job0', device='snap2-fmt', + target='target-fmt', sync='full', + filters=[iotests.filter_qmp_generated_node_ids]) + + iotests.log('\nMirror from active source to inactive target') + + iotests.log('disk-fmt active: %s' % node_is_active(vm, 'disk-fmt')) + iotests.log('snap-fmt active: %s' % node_is_active(vm, 'snap-fmt')) + iotests.log('snap2-fmt active: %s' % node_is_active(vm, 'snap2-fmt')) + iotests.log('target-fmt active: %s' % node_is_active(vm, 'target-fmt')) + + # Activating snap2-fmt recursively activates the whole backing chain + vm.qmp_log('blockdev-set-active', node_name='snap2-fmt', active=True) + vm.qmp_log('blockdev-set-active', node_name='target-fmt', active=False) + + iotests.log('disk-fmt active: %s' % node_is_active(vm, 'disk-fmt')) + iotests.log('snap-fmt active: %s' % node_is_active(vm, 'snap-fmt')) + iotests.log('snap2-fmt active: %s' % node_is_active(vm, 'snap2-fmt')) + iotests.log('target-fmt active: %s' % node_is_active(vm, 'target-fmt')) + + vm.qmp_log('blockdev-mirror', job_id='job0', device='snap2-fmt', + target='target-fmt', sync='full', + filters=[iotests.filter_qmp_generated_node_ids]) + + iotests.log('\nBackup from active source to inactive target') + + vm.qmp_log('blockdev-backup', job_id='job0', device='snap2-fmt', + target='target-fmt', sync='full', + filters=[iotests.filter_qmp_generated_node_ids]) + + iotests.log('\nBackup from inactive source to active target') + + # Inactivating snap2-fmt recursively inactivates the whole backing chain + vm.qmp_log('blockdev-set-active', node_name='snap2-fmt', active=False) + vm.qmp_log('blockdev-set-active', node_name='target-fmt', active=True) + + iotests.log('disk-fmt active: %s' % node_is_active(vm, 'disk-fmt')) + iotests.log('snap-fmt active: %s' % node_is_active(vm, 'snap-fmt')) + iotests.log('snap2-fmt active: %s' % node_is_active(vm, 'snap2-fmt')) + iotests.log('target-fmt active: %s' % node_is_active(vm, 'target-fmt')) + + vm.qmp_log('blockdev-backup', job_id='job0', device='snap2-fmt', + target='target-fmt', sync='full', + filters=[iotests.filter_qmp_generated_node_ids]) + + iotests.log('\n=== Accessing export on inactive node ===') + + # Use the target node because it has the right image format and isn't the + # (read-only) backing file of a qcow2 node + vm.qmp_log('blockdev-set-active', node_name='target-fmt', active=False) + vm.qmp_log('block-export-add', + **get_export(node_name='target-fmt', allow_inactive=True)) + + # The read should succeed, everything else should fail gracefully + qemu_io = QemuIoInteractive('-f', 'raw', + f'nbd+unix:///target-fmt?socket={nbd_sock}') + iotests.log(qemu_io.cmd('read 0 64k'), filters=[filter_qemu_io]) + iotests.log(qemu_io.cmd('write 0 64k'), filters=[filter_qemu_io]) + iotests.log(qemu_io.cmd('write -z 0 64k'), filters=[filter_qemu_io]) + iotests.log(qemu_io.cmd('write -zu 0 64k'), filters=[filter_qemu_io]) + iotests.log(qemu_io.cmd('discard 0 64k'), filters=[filter_qemu_io]) + iotests.log(qemu_io.cmd('flush'), filters=[filter_qemu_io]) + iotests.log(qemu_io.cmd('map'), filters=[filter_qemu_io]) + qemu_io.close() + + iotests.log('\n=== Resuming VM activates all images ===') + vm.qmp_log('cont') + + iotests.log('disk-fmt active: %s' % node_is_active(vm, 'disk-fmt')) + iotests.log('snap-fmt active: %s' % node_is_active(vm, 'snap-fmt')) + iotests.log('snap2-fmt active: %s' % node_is_active(vm, 'snap2-fmt')) + iotests.log('target-fmt active: %s' % node_is_active(vm, 'target-fmt')) + + iotests.log('\nShutting down...') + vm.shutdown() + log = vm.get_log() + if log: + iotests.log(log, [filter_qtest, filter_qemu_io]) diff --git a/tests/qemu-iotests/tests/inactive-node-nbd.out b/tests/qemu-iotests/tests/inactive-node-nbd.out new file mode 100644 index 000000000000..a458b4fc0552 --- /dev/null +++ b/tests/qemu-iotests/tests/inactive-node-nbd.out @@ -0,0 +1,239 @@ +Preparing disk... +Launching VM... +{"execute": "nbd-server-start", "arguments": {"addr": {"data": {"path": "SOCK_DIR/PID-nbd.sock"}, "type": "unix"}}} +{"return": {}} + +=== Creating export of inactive node === + +Exports activate nodes without allow-inactive +disk-fmt active: False +{"execute": "block-export-add", "arguments": {"id": "exp0", "node-name": "disk-fmt", "type": "nbd", "writable": true}} +{"return": {}} +disk-fmt active: True +{"execute": "query-block-exports", "arguments": {}} +{"return": [{"id": "exp0", "node-name": "disk-fmt", "shutting-down": false, "type": "nbd"}]} +{"execute": "block-export-del", "arguments": {"id": "exp0"}} +{"return": {}} +{"execute": "query-block-exports", "arguments": {}} +{"return": []} + +Exports activate nodes with allow-inactive=false +{"execute": "blockdev-set-active", "arguments": {"active": false, "node-name": "disk-fmt"}} +{"return": {}} +disk-fmt active: False +{"execute": "block-export-add", "arguments": {"allow-inactive": false, "id": "exp0", "node-name": "disk-fmt", "type": "nbd", "writable": true}} +{"return": {}} +disk-fmt active: True +{"execute": "query-block-exports", "arguments": {}} +{"return": [{"id": "exp0", "node-name": "disk-fmt", "shutting-down": false, "type": "nbd"}]} +{"execute": "block-export-del", "arguments": {"id": "exp0"}} +{"return": {}} +{"execute": "query-block-exports", "arguments": {}} +{"return": []} + +Export leaves nodes inactive with allow-inactive=true +{"execute": "blockdev-set-active", "arguments": {"active": false, "node-name": "disk-fmt"}} +{"return": {}} +disk-fmt active: False +{"execute": "block-export-add", "arguments": {"allow-inactive": true, "id": "exp0", "node-name": "disk-fmt", "type": "nbd", "writable": true}} +{"return": {}} +disk-fmt active: False +{"execute": "query-block-exports", "arguments": {}} +{"return": [{"id": "exp0", "node-name": "disk-fmt", "shutting-down": false, "type": "nbd"}]} +{"execute": "block-export-del", "arguments": {"id": "exp0"}} +{"return": {}} +{"execute": "query-block-exports", "arguments": {}} +{"return": []} + +=== Inactivating node with existing export === + +Inactivating nodes with an export fails without allow-inactive +{"execute": "blockdev-set-active", "arguments": {"active": true, "node-name": "disk-fmt"}} +{"return": {}} +{"execute": "block-export-add", "arguments": {"id": "exp0", "node-name": "disk-fmt", "type": "nbd", "writable": true}} +{"return": {}} +{"execute": "blockdev-set-active", "arguments": {"active": false, "node-name": "disk-fmt"}} +{"error": {"class": "GenericError", "desc": "Failed to inactivate node: Operation not permitted"}} +disk-fmt active: True +{"execute": "query-block-exports", "arguments": {}} +{"return": [{"id": "exp0", "node-name": "disk-fmt", "shutting-down": false, "type": "nbd"}]} +{"execute": "block-export-del", "arguments": {"id": "exp0"}} +{"return": {}} +{"execute": "query-block-exports", "arguments": {}} +{"return": []} + +Inactivating nodes with an export fails with allow-inactive=false +{"execute": "blockdev-set-active", "arguments": {"active": true, "node-name": "disk-fmt"}} +{"return": {}} +{"execute": "block-export-add", "arguments": {"allow-inactive": false, "id": "exp0", "node-name": "disk-fmt", "type": "nbd", "writable": true}} +{"return": {}} +{"execute": "blockdev-set-active", "arguments": {"active": false, "node-name": "disk-fmt"}} +{"error": {"class": "GenericError", "desc": "Failed to inactivate node: Operation not permitted"}} +disk-fmt active: True +{"execute": "query-block-exports", "arguments": {}} +{"return": [{"id": "exp0", "node-name": "disk-fmt", "shutting-down": false, "type": "nbd"}]} +{"execute": "block-export-del", "arguments": {"id": "exp0"}} +{"return": {}} +{"execute": "query-block-exports", "arguments": {}} +{"return": []} + +Inactivating nodes with an export works with allow-inactive=true +{"execute": "blockdev-set-active", "arguments": {"active": true, "node-name": "disk-fmt"}} +{"return": {}} +{"execute": "block-export-add", "arguments": {"allow-inactive": true, "id": "exp0", "node-name": "disk-fmt", "type": "nbd", "writable": true}} +{"return": {}} +{"execute": "blockdev-set-active", "arguments": {"active": false, "node-name": "disk-fmt"}} +{"return": {}} +disk-fmt active: False +{"execute": "query-block-exports", "arguments": {}} +{"return": [{"id": "exp0", "node-name": "disk-fmt", "shutting-down": false, "type": "nbd"}]} +{"execute": "block-export-del", "arguments": {"id": "exp0"}} +{"return": {}} +{"execute": "query-block-exports", "arguments": {}} +{"return": []} + +=== Inactive nodes with parent === + +Inactivating nodes with an active parent fails +{"execute": "blockdev-set-active", "arguments": {"active": true, "node-name": "disk-fmt"}} +{"return": {}} +{"execute": "blockdev-set-active", "arguments": {"active": false, "node-name": "disk-file"}} +{"error": {"class": "GenericError", "desc": "Node has active parent node"}} +disk-file active: True +disk-fmt active: True + +Inactivating nodes with an inactive parent works +{"execute": "blockdev-set-active", "arguments": {"active": false, "node-name": "disk-fmt"}} +{"return": {}} +{"execute": "blockdev-set-active", "arguments": {"active": false, "node-name": "disk-file"}} +{"return": {}} +disk-file active: False +disk-fmt active: False + +Creating active parent node with an inactive child fails +{"execute": "blockdev-add", "arguments": {"driver": "raw", "file": "disk-fmt", "node-name": "disk-filter"}} +{"error": {"class": "GenericError", "desc": "Inactive 'disk-fmt' can't be a file child of active 'disk-filter'"}} +{"execute": "blockdev-add", "arguments": {"active": true, "driver": "raw", "file": "disk-fmt", "node-name": "disk-filter"}} +{"error": {"class": "GenericError", "desc": "Inactive 'disk-fmt' can't be a file child of active 'disk-filter'"}} + +Creating inactive parent node with an inactive child works +{"execute": "blockdev-add", "arguments": {"active": false, "driver": "raw", "file": "disk-fmt", "node-name": "disk-filter"}} +{"return": {}} +{"execute": "blockdev-del", "arguments": {"node-name": "disk-filter"}} +{"return": {}} + +=== Resizing an inactive node === +{"execute": "block_resize", "arguments": {"node-name": "disk-fmt", "size": 16777216}} +{"error": {"class": "GenericError", "desc": "Permission 'resize' unavailable on inactive node"}} + +=== Taking a snapshot of an inactive node === + +Active overlay over inactive backing file automatically makes both inactive for compatibility +{"execute": "blockdev-add", "arguments": {"backing": null, "driver": "qcow2", "file": "snap-file", "node-name": "snap-fmt"}} +{"return": {}} +disk-fmt active: False +snap-fmt active: True +{"execute": "blockdev-snapshot", "arguments": {"node": "disk-fmt", "overlay": "snap-fmt"}} +{"return": {}} +disk-fmt active: False +snap-fmt active: False +{"execute": "blockdev-del", "arguments": {"node-name": "snap-fmt"}} +{"return": {}} + +Inactive overlay over inactive backing file just works +{"execute": "blockdev-add", "arguments": {"active": false, "backing": null, "driver": "qcow2", "file": "snap-file", "node-name": "snap-fmt"}} +{"return": {}} +{"execute": "blockdev-snapshot", "arguments": {"node": "disk-fmt", "overlay": "snap-fmt"}} +{"return": {}} + +=== Block jobs with inactive nodes === + +Streaming into an inactive node +{"execute": "block-stream", "arguments": {"device": "snap-fmt"}} +{"error": {"class": "GenericError", "desc": "Could not create node: Inactive 'snap-fmt' can't be a file child of active 'NODE_NAME'"}} + +Committing an inactive root node (active commit) +{"execute": "block-commit", "arguments": {"device": "snap-fmt", "job-id": "job0"}} +{"error": {"class": "GenericError", "desc": "Inactive 'snap-fmt' can't be a backing child of active 'NODE_NAME'"}} + +Committing an inactive intermediate node to inactive base +{"execute": "blockdev-add", "arguments": {"active": false, "backing": "snap-fmt", "driver": "qcow2", "file": "snap2-file", "node-name": "snap2-fmt"}} +{"return": {}} +disk-fmt active: False +snap-fmt active: False +snap2-fmt active: False +{"execute": "block-commit", "arguments": {"device": "snap2-fmt", "job-id": "job0", "top-node": "snap-fmt"}} +{"error": {"class": "GenericError", "desc": "Inactive 'snap-fmt' can't be a backing child of active 'NODE_NAME'"}} + +Committing an inactive intermediate node to active base +{"execute": "blockdev-set-active", "arguments": {"active": true, "node-name": "disk-fmt"}} +{"return": {}} +{"execute": "block-commit", "arguments": {"device": "snap2-fmt", "job-id": "job0", "top-node": "snap-fmt"}} +{"error": {"class": "GenericError", "desc": "Inactive 'snap-fmt' can't be a backing child of active 'NODE_NAME'"}} + +Mirror from inactive source to active target +{"execute": "blockdev-mirror", "arguments": {"device": "snap2-fmt", "job-id": "job0", "sync": "full", "target": "target-fmt"}} +{"error": {"class": "GenericError", "desc": "Inactive 'snap2-fmt' can't be a backing child of active 'NODE_NAME'"}} + +Mirror from active source to inactive target +disk-fmt active: True +snap-fmt active: False +snap2-fmt active: False +target-fmt active: True +{"execute": "blockdev-set-active", "arguments": {"active": true, "node-name": "snap2-fmt"}} +{"return": {}} +{"execute": "blockdev-set-active", "arguments": {"active": false, "node-name": "target-fmt"}} +{"return": {}} +disk-fmt active: True +snap-fmt active: True +snap2-fmt active: True +target-fmt active: False +{"execute": "blockdev-mirror", "arguments": {"device": "snap2-fmt", "job-id": "job0", "sync": "full", "target": "target-fmt"}} +{"error": {"class": "GenericError", "desc": "Permission 'write' unavailable on inactive node"}} + +Backup from active source to inactive target +{"execute": "blockdev-backup", "arguments": {"device": "snap2-fmt", "job-id": "job0", "sync": "full", "target": "target-fmt"}} +{"error": {"class": "GenericError", "desc": "Could not create node: Inactive 'target-fmt' can't be a target child of active 'NODE_NAME'"}} + +Backup from inactive source to active target +{"execute": "blockdev-set-active", "arguments": {"active": false, "node-name": "snap2-fmt"}} +{"return": {}} +{"execute": "blockdev-set-active", "arguments": {"active": true, "node-name": "target-fmt"}} +{"return": {}} +disk-fmt active: False +snap-fmt active: False +snap2-fmt active: False +target-fmt active: True +{"execute": "blockdev-backup", "arguments": {"device": "snap2-fmt", "job-id": "job0", "sync": "full", "target": "target-fmt"}} +{"error": {"class": "GenericError", "desc": "Could not create node: Inactive 'snap2-fmt' can't be a file child of active 'NODE_NAME'"}} + +=== Accessing export on inactive node === +{"execute": "blockdev-set-active", "arguments": {"active": false, "node-name": "target-fmt"}} +{"return": {}} +{"execute": "block-export-add", "arguments": {"allow-inactive": true, "id": "exp0", "node-name": "target-fmt", "type": "nbd", "writable": true}} +{"return": {}} +read 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +write failed: Operation not permitted + +write failed: Operation not permitted + +write failed: Operation not permitted + +discard failed: Operation not permitted + + +qemu-io: Failed to get allocation status: Operation not permitted + + +=== Resuming VM activates all images === +{"execute": "cont", "arguments": {}} +{"return": {}} +disk-fmt active: True +snap-fmt active: True +snap2-fmt active: True +target-fmt active: True + +Shutting down... + From fc4e394b2887e15d5f83994e4fc7b26c895c627a Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 3 Feb 2025 13:25:29 -0500 Subject: [PATCH 25/25] block: remove unused BLOCK_OP_TYPE_DATAPLANE BLOCK_OP_TYPE_DATAPLANE prevents BlockDriverState from being used by virtio-blk/virtio-scsi with IOThread. Commit b112a65c52aa ("block: declare blockjobs and dataplane friends!") eliminated the main reason for this blocker in 2014. Nowadays the block layer supports I/O from multiple AioContexts, so there is even less reason to block IOThread users. Any legitimate reasons related to interference would probably also apply to non-IOThread users. The only remaining users are bdrv_op_unblock(BLOCK_OP_TYPE_DATAPLANE) calls after bdrv_op_block_all(). If we remove BLOCK_OP_TYPE_DATAPLANE their behavior doesn't change. Existing bdrv_op_block_all() callers that don't explicitly unblock BLOCK_OP_TYPE_DATAPLANE seem to do so simply because no one bothered to rather than because it is necessary to keep BLOCK_OP_TYPE_DATAPLANE blocked. Signed-off-by: Stefan Hajnoczi Message-ID: <20250203182529.269066-1-stefanha@redhat.com> Reviewed-by: Eric Blake Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/replication.c | 1 - blockjob.c | 2 -- hw/block/virtio-blk.c | 9 --------- hw/scsi/virtio-scsi.c | 3 --- include/block/block-common.h | 1 - 5 files changed, 16 deletions(-) diff --git a/block/replication.c b/block/replication.c index 2ce16f058985..d4d677a902fd 100644 --- a/block/replication.c +++ b/block/replication.c @@ -576,7 +576,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, return; } bdrv_op_block_all(top_bs, s->blocker); - bdrv_op_unblock(top_bs, BLOCK_OP_TYPE_DATAPLANE, s->blocker); bdrv_graph_wrunlock(); diff --git a/blockjob.c b/blockjob.c index e94a840d7f92..32007f31a9d9 100644 --- a/blockjob.c +++ b/blockjob.c @@ -539,8 +539,6 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, goto fail; } - bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); - if (!block_job_set_speed(job, speed, errp)) { goto fail; } diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index e0acce89e1e0..a1829e3abdf7 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -1562,15 +1562,6 @@ static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp) error_setg(errp, "ioeventfd is required for iothread"); return false; } - - /* - * If ioeventfd is (re-)enabled while the guest is running there could - * be block jobs that can conflict. - */ - if (blk_op_is_blocked(conf->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { - error_prepend(errp, "cannot start virtio-blk ioeventfd: "); - return false; - } } s->vq_aio_context = g_new(AioContext *, conf->num_queues); diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 23516995dc9f..7d094e188103 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -1065,9 +1065,6 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, int ret; if (s->ctx && !s->dataplane_fenced) { - if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { - return; - } ret = blk_set_aio_context(sd->conf.blk, s->ctx, errp); if (ret < 0) { return; diff --git a/include/block/block-common.h b/include/block/block-common.h index 7030669f0403..0b831ef87b1b 100644 --- a/include/block/block-common.h +++ b/include/block/block-common.h @@ -356,7 +356,6 @@ typedef enum BlockOpType { BLOCK_OP_TYPE_CHANGE, BLOCK_OP_TYPE_COMMIT_SOURCE, BLOCK_OP_TYPE_COMMIT_TARGET, - BLOCK_OP_TYPE_DATAPLANE, BLOCK_OP_TYPE_DRIVE_DEL, BLOCK_OP_TYPE_EJECT, BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT,