diff --git a/ocaml/libs/vhd/vhd_format/f.ml b/ocaml/libs/vhd/vhd_format/f.ml index 4285d2fabd4..c3bf4dce4b6 100644 --- a/ocaml/libs/vhd/vhd_format/f.ml +++ b/ocaml/libs/vhd/vhd_format/f.ml @@ -2903,24 +2903,10 @@ functor let raw ?from (vhd : fd Vhd.t) = raw_common ?from vhd - let vhd_blocks_to_json (t : fd Vhd.t) = + let vhd_blocks_to_json_aux (t : fd Vhd.t) blocks = let block_size_sectors_shift = t.Vhd.header.Header.block_size_sectors_shift in - let max_table_entries = Vhd.used_max_table_entries t in - - let include_block = include_block None t in - - let blocks = - Seq.init max_table_entries Fun.id - |> Seq.filter_map (fun i -> - if include_block i then - Some (`Int i) - else - None - ) - |> List.of_seq - in let json = `Assoc [ @@ -2934,6 +2920,52 @@ functor let json_string = Yojson.to_string json in print_string json_string ; return () + let vhd_blocks_to_json (t : fd Vhd.t) = + let max_table_entries = Vhd.used_max_table_entries t in + let blocks = + Seq.init max_table_entries Fun.id + |> Seq.filter_map (fun i -> + if include_block None t i then + Some (`Int i) + else + None + ) + |> List.of_seq + in + vhd_blocks_to_json_aux t blocks + + let vhd_blocks_to_json_interval (t : fd Vhd.t) = + let max_table_entries = Vhd.used_max_table_entries t in + let blocks, last_block = + Seq.init max_table_entries Fun.id + |> Seq.fold_left + (fun (acc, left_block) i -> + if include_block None t i then + match left_block with + | Some _ -> + (acc, left_block) + | None -> + (acc, Some i) + else + match left_block with + | Some x -> + (`List [`Int x; `Int (i - 1)] :: acc, None) + | None -> + (acc, None) + ) + ([], None) + in + (* Close off the interval we were tracking we ran off the end of the seq *) + let blocks = + match last_block with + | Some x -> + `List [`Int x; `Int (max_table_entries - 1)] :: blocks + | None -> + blocks + in + let blocks = List.rev blocks in + vhd_blocks_to_json_aux t blocks + let vhd_common ?from ?raw ?(emit_batmap = false) (t : fd Vhd.t) = let block_size_sectors_shift = t.Vhd.header.Header.block_size_sectors_shift @@ -3173,6 +3205,8 @@ functor Vhd_input.vhd_common ?from ~raw vhd let blocks_json = Vhd_input.vhd_blocks_to_json + + let blocks_json_interval = Vhd_input.vhd_blocks_to_json_interval end (* Create a VHD stream from data on t, using `include_block` guide us which blocks have data *) diff --git a/ocaml/libs/vhd/vhd_format/f.mli b/ocaml/libs/vhd/vhd_format/f.mli index a4b4e976183..fdeca3ad15e 100644 --- a/ocaml/libs/vhd/vhd_format/f.mli +++ b/ocaml/libs/vhd/vhd_format/f.mli @@ -470,6 +470,8 @@ module From_file : functor (F : S.FILE) -> sig [from] into [t] *) val blocks_json : fd Vhd.t -> unit t + + val blocks_json_interval : fd Vhd.t -> unit t end module Raw_input : sig diff --git a/ocaml/qcow-stream-tool/qcow_stream_tool.ml b/ocaml/qcow-stream-tool/qcow_stream_tool.ml index b8605c2f44e..6228ec46e88 100644 --- a/ocaml/qcow-stream-tool/qcow_stream_tool.ml +++ b/ocaml/qcow-stream-tool/qcow_stream_tool.ml @@ -4,41 +4,6 @@ module Impl = struct let stream_decode output = Qcow_stream.stream_decode Unix.stdin output ; `Ok () - - let read_headers qcow_path = - let open Lwt.Syntax in - let t = - let* fd = Lwt_unix.openfile qcow_path [Unix.O_RDONLY] 0 in - let* virtual_size, cluster_bits, _, data_cluster_map = - Qcow_stream.start_stream_decode fd - in - (* TODO: List.map becomes tail-recursive in OCaml 5.1, and could be used here instead *) - let clusters = - data_cluster_map - |> Qcow_types.Cluster.Map.to_seq - |> Seq.map (fun (_, virt_address) -> - let ( >> ) = Int64.shift_right_logical in - let address = - Int64.to_int (virt_address >> Int32.to_int cluster_bits) - in - `Int address - ) - |> List.of_seq - in - let json = - `Assoc - [ - ("virtual_size", `Int (Int64.to_int virtual_size)) - ; ("cluster_bits", `Int (Int32.to_int cluster_bits)) - ; ("data_clusters", `List clusters) - ] - in - let json_string = Yojson.to_string json in - let* () = Lwt_io.print json_string in - let* () = Lwt_io.flush Lwt_io.stdout in - Lwt.return_unit - in - Lwt_main.run t ; `Ok () end module Cli = struct @@ -46,10 +11,6 @@ module Cli = struct let doc = Printf.sprintf "Path to the output file." in Arg.(value & pos 0 string default & info [] ~doc) - let input = - let doc = Printf.sprintf "Path to the input file." in - Arg.(required & pos 0 (some string) None & info [] ~doc) - let stream_decode_cmd = let doc = "decode qcow2 formatted data from stdin and write a raw image" in let man = @@ -62,18 +23,7 @@ module Cli = struct (Cmd.info "stream_decode" ~doc ~man) Term.(ret (const Impl.stream_decode $ output "test.raw")) - let read_headers_cmd = - let doc = - "Determine allocated clusters by parsing qcow2 file at the provided \ - path. Returns JSON like the following: {'virtual_size': X, \ - 'cluster_bits': Y, 'data_clusters': [1,2,3]}" - in - let man = [`S "DESCRIPTION"; `P doc] in - Cmd.v - (Cmd.info "read_headers" ~doc ~man) - Term.(ret (const Impl.read_headers $ input)) - - let cmds = [stream_decode_cmd; read_headers_cmd] + let cmds = [stream_decode_cmd] end let info = diff --git a/ocaml/vhd-tool/cli/main.ml b/ocaml/vhd-tool/cli/main.ml index a4043b52b3c..e131879c4ed 100644 --- a/ocaml/vhd-tool/cli/main.ml +++ b/ocaml/vhd-tool/cli/main.ml @@ -385,19 +385,34 @@ let stream_cmd = , Cmd.info "stream" ~sdocs:_common_options ~doc ~man ) +let vhd_source = + let doc = Printf.sprintf "Path to the VHD file" in + Arg.(required & pos 0 (some file) None & info [] ~doc) + let read_headers_cmd = let doc = {|Parse VHD headers and output allocated blocks information in JSON format \ like: {"virtual_size": X, "cluster_bits": X, "data_clusters": [1,2,3]}|} in - let source = - let doc = Printf.sprintf "Path to the VHD file" in - Arg.(required & pos 0 (some file) None & info [] ~doc) - in - ( Term.(ret (const Impl.read_headers $ common_options_t $ source)) + ( Term.( + ret + (const (Impl.read_headers ~legacy:true) $ common_options_t $ vhd_source) + ) , Cmd.info "read_headers" ~sdocs:_common_options ~doc ) +let read_headers_interval_cmd = + let doc = + {|Parse VHD headers and output allocated blocks intervals information in JSON format \ + like: {"virtual_size": X, "cluster_bits": X, "data_clusters": [[1,13],[17,17],[19,272]]|} + in + ( Term.( + ret + (const (Impl.read_headers ~legacy:false) $ common_options_t $ vhd_source) + ) + , Cmd.info "read_headers_interval" ~sdocs:_common_options ~doc + ) + let cmds = [ info_cmd @@ -408,6 +423,7 @@ let cmds = ; serve_cmd ; stream_cmd ; read_headers_cmd + ; read_headers_interval_cmd ] |> List.map (fun (t, i) -> Cmd.v i t) diff --git a/ocaml/vhd-tool/src/impl.ml b/ocaml/vhd-tool/src/impl.ml index 530c915b8e3..6c759535176 100644 --- a/ocaml/vhd-tool/src/impl.ml +++ b/ocaml/vhd-tool/src/impl.ml @@ -1168,11 +1168,15 @@ let stream_t common args ?(progress = no_progress_bar) () = args.StreamCommon.tar_filename_prefix args.StreamCommon.good_ciphersuites args.StreamCommon.verify_cert -let read_headers common source = +let read_headers common source ~legacy = let path = [Filename.dirname source] in let thread = retry common 3 (fun () -> Vhd_IO.openchain ~path source false) >>= fun t -> - Vhd_IO.close t >>= fun () -> Hybrid_input.blocks_json t + Vhd_IO.close t >>= fun () -> + if legacy then + Hybrid_input.blocks_json t + else + Hybrid_input.blocks_json_interval t in Lwt_main.run thread ; `Ok () diff --git a/ocaml/vhd-tool/src/impl.mli b/ocaml/vhd-tool/src/impl.mli index 13fe7ba6853..d2adae5a9dc 100644 --- a/ocaml/vhd-tool/src/impl.mli +++ b/ocaml/vhd-tool/src/impl.mli @@ -36,7 +36,7 @@ val stream : Common.t -> StreamCommon.t -> [> `Error of bool * string | `Ok of unit] val read_headers : - Common.t -> string -> [> `Error of bool * string | `Ok of unit] + Common.t -> string -> legacy:bool -> [> `Error of bool * string | `Ok of unit] val serve : Common.t diff --git a/ocaml/xapi/qcow_tool_wrapper.ml b/ocaml/xapi/qcow_tool_wrapper.ml index c04617f4fa6..652db754ac7 100644 --- a/ocaml/xapi/qcow_tool_wrapper.ml +++ b/ocaml/xapi/qcow_tool_wrapper.ml @@ -22,28 +22,43 @@ let receive (progress_cb : int -> unit) (unix_fd : Unix.file_descr) Vhd_qcow_parsing.run_tool qcow_tool progress_cb args ~input_fd:unix_fd let read_header qcow_path = - let args = ["read_headers"; qcow_path] in - let qcow_tool = !Xapi_globs.qcow_stream_tool in - let pipe_reader, pipe_writer = Unix.pipe ~cloexec:true () in - let progress_cb _ = () in - let (_ : Thread.t) = + let run_in_thread tool args pipe_writer replace_fds = Thread.create (fun () -> Xapi_stdext_pervasives.Pervasiveext.finally (fun () -> - Vhd_qcow_parsing.run_tool qcow_tool progress_cb args - ~output_fd:pipe_writer + Vhd_qcow_parsing.run_tool tool progress_cb args + ~output_fd:pipe_writer ~replace_fds ) (fun () -> Unix.close pipe_writer) ) () in - pipe_reader + + let map_pipe_reader, map_pipe_writer = Unix.pipe ~cloexec:true () in + let (_ : Thread.t) = + run_in_thread !Xapi_globs.qemu_img + ["map"; qcow_path; "--output=json"] + map_pipe_writer [] + in + + let info_pipe_reader, info_pipe_writer = Unix.pipe ~cloexec:true () in + let (_ : Thread.t) = + run_in_thread !Xapi_globs.qemu_img + ["info"; qcow_path; "--output=json"] + info_pipe_writer [] + in + + (map_pipe_reader, info_pipe_reader) let parse_header qcow_path = - let pipe_reader = read_header qcow_path in - Vhd_qcow_parsing.parse_header pipe_reader + let pipe, _ = read_header qcow_path in + Vhd_qcow_parsing.parse_header pipe + +let parse_header_interval qcow_path = + let pipes = read_header qcow_path in + Vhd_qcow_parsing.parse_header_qemu_img pipes let send ?relative_to (progress_cb : int -> unit) (unix_fd : Unix.file_descr) (path : string) (_size : Int64.t) = @@ -54,7 +69,12 @@ let send ?relative_to (progress_cb : int -> unit) (unix_fd : Unix.file_descr) (* If VDI is backed by QCOW, parse the header to determine nonzero clusters to avoid reading all of the raw disk *) - let input_fd = Result.map read_header qcow_path |> Result.to_option in + let input_fds = Result.map read_header qcow_path |> Result.to_option in + + (* TODO: If VHD headers are to be consulted as well, qcow2-to-stdout + needs to properly account for cluster_bits. Currently QCOW2 export + from VHD-backed VDIs will just revert to raw, without any + allocation accounting. *) (* Parse the header of the VDI we are diffing against as well *) let relative_to_qcow_path = @@ -64,9 +84,13 @@ let send ?relative_to (progress_cb : int -> unit) (unix_fd : Unix.file_descr) | None -> None in - let diff_fd = Option.map read_header relative_to_qcow_path in + let diff_fds = Option.map read_header relative_to_qcow_path in + + let map_fd_string = Uuidx.(to_string (make ())) in + let info_fd_string = Uuidx.(to_string (make ())) in + let diff_map_fd_string = Uuidx.(to_string (make ())) in + let diff_info_fd_string = Uuidx.(to_string (make ())) in - let unique_string = Uuidx.(to_string (make ())) in let args = [path] @ (match relative_to with None -> [] | Some vdi -> ["--diff"; vdi]) @@ -74,18 +98,46 @@ let send ?relative_to (progress_cb : int -> unit) (unix_fd : Unix.file_descr) | None -> [] | Some _ -> - ["--json-header-diff"; unique_string] + [ + "--json-header-diff-map" + ; diff_map_fd_string + ; "--json-header-diff-info" + ; diff_info_fd_string + ] ) - @ match qcow_path with Error _ -> [] | Ok _ -> ["--json-header"] + @ + match qcow_path with + | Error _ -> + [] + | Ok _ -> + [ + "--json-header-map" + ; map_fd_string + ; "--json-header-info" + ; info_fd_string + ] in let qcow_tool = !Xapi_globs.qcow_to_stdout in - let replace_fds = Option.map (fun fd -> [(unique_string, fd)]) diff_fd in + let replace_fds = + Option.map + (fun (map_fd, info_fd) -> + let rfds = [(map_fd_string, map_fd); (info_fd_string, info_fd)] in + match diff_fds with + | Some (diff_map_fd, diff_info_fd) -> + (diff_map_fd_string, diff_map_fd) + :: (diff_info_fd_string, diff_info_fd) + :: rfds + | None -> + rfds + ) + input_fds + in Xapi_stdext_pervasives.Pervasiveext.finally (fun () -> - Vhd_qcow_parsing.run_tool qcow_tool progress_cb args ?input_fd - ~output_fd:unix_fd ?replace_fds + Vhd_qcow_parsing.run_tool qcow_tool progress_cb args ~output_fd:unix_fd + ?replace_fds ) (fun () -> - Option.iter Unix.close input_fd ; - Option.iter Unix.close diff_fd + Option.iter (fun (x, y) -> Unix.close x ; Unix.close y) input_fds ; + Option.iter (fun (x, y) -> Unix.close x ; Unix.close y) diff_fds ) diff --git a/ocaml/xapi/qcow_tool_wrapper.mli b/ocaml/xapi/qcow_tool_wrapper.mli index c1c4a6426af..16cede3bbcd 100644 --- a/ocaml/xapi/qcow_tool_wrapper.mli +++ b/ocaml/xapi/qcow_tool_wrapper.mli @@ -25,3 +25,5 @@ val send : -> unit val parse_header : string -> int * int list + +val parse_header_interval : string -> int * (int * int) list diff --git a/ocaml/xapi/stream_vdi.ml b/ocaml/xapi/stream_vdi.ml index 9b4e5bebd7b..4481cb767de 100644 --- a/ocaml/xapi/stream_vdi.ml +++ b/ocaml/xapi/stream_vdi.ml @@ -306,37 +306,99 @@ let send_one ofd (__context : Context.t) rpc session_id progress refresh_session | Ok (Some (driver, path)) when driver = "vhd" || driver = "qcow2" -> ( try - (* Read backing file headers, then only read and write + let last_chunk = Int64.((to_int size - 1) / to_int chunk_size) in + if !Xapi_globs.vhd_legacy_blocks_format then + (* Read backing file headers, then only read and write allocated clusters from the bitmap *) - let cluster_size, cluster_list = - match driver with - | "vhd" -> - Vhd_tool_wrapper.parse_header path - | "qcow2" -> - Qcow_tool_wrapper.parse_header path - | _ -> - failwith (Printf.sprintf "%s: unreachable" __FUNCTION__) - in - let set = - get_allocated_chunks_from_clusters cluster_size cluster_list - in - (* First and last chunks are always written - it's a limitation + let cluster_size, cluster_list = + match driver with + | "vhd" -> + Vhd_tool_wrapper.parse_header path + | "qcow2" -> + Qcow_tool_wrapper.parse_header path + | _ -> + failwith (Printf.sprintf "%s: unreachable" __FUNCTION__) + in + let set = + get_allocated_chunks_from_clusters cluster_size cluster_list + in + (* First and last chunks are always written - it's a limitation of the XVA format *) - let last_chunk = - Int64.((to_int size - to_int chunk_size + 1) / to_int chunk_size) - in - let set = set |> ChunkSet.add 0 |> ChunkSet.add last_chunk in - ChunkSet.iter - (fun this_chunk_no -> - let offset = Int64.(mul (of_int this_chunk_no) chunk_size) in - let _ = - write_chunk this_chunk_no offset - ~write_check:(fun _ _ -> true) - ~seek:true ~timeout_workaround:false - in - () - ) - set + let set = set |> ChunkSet.add 0 |> ChunkSet.add last_chunk in + ChunkSet.iter + (fun this_chunk_no -> + let offset = + Int64.(mul (of_int this_chunk_no) chunk_size) + in + let _ = + write_chunk this_chunk_no offset + ~write_check:(fun _ _ -> true) + ~seek:true ~timeout_workaround:false + in + () + ) + set + else + let cluster_size, cluster_list = + match driver with + | "vhd" -> + Vhd_tool_wrapper.parse_header_interval path + | "qcow2" -> + Qcow_tool_wrapper.parse_header_interval path + | _ -> + failwith (Printf.sprintf "%s: unreachable" __FUNCTION__) + in + let process_chunk chunk_no ~force = + if force || (chunk_no <> 0 && chunk_no <> last_chunk) then + let offset = Int64.(mul (of_int chunk_no) chunk_size) in + let _ = + write_chunk chunk_no offset + ~write_check:(fun _ _ -> true) + ~seek:true ~timeout_workaround:false + in + () + in + + process_chunk 0 ~force:true ; + + let chunk_size = Int64.to_int chunk_size in + let chunks_in_cluster = + (cluster_size + chunk_size - 1) / chunk_size + in + (* Iterate over allocated intervals, copying every cluster inside *) + let _ = + List.fold_left + (fun prev_chunk (cluster_no_left, cluster_no_right) -> + let calc_chunk cluster = + let cluster_offset = cluster * cluster_size in + let chunk_no = cluster_offset / chunk_size in + chunk_no + in + let left_chunk_no = calc_chunk cluster_no_left in + let right_chunk_no = + calc_chunk cluster_no_right + chunks_in_cluster - 1 + in + + (* If a chunk contains multiple clusters, we could have + already copied it. In that case, start with the + following chunk. *) + let left_chunk_no = + if left_chunk_no = prev_chunk then + left_chunk_no + 1 + else + left_chunk_no + in + + for i = left_chunk_no to right_chunk_no do + process_chunk i ~force:false + done ; + + right_chunk_no + ) + (-1) cluster_list + in + + process_chunk last_chunk ~force:true with e -> debug "%s: Falling back to reading the whole raw disk after %s" __FUNCTION__ (Printexc.to_string e) ; diff --git a/ocaml/xapi/vhd_qcow_parsing.ml b/ocaml/xapi/vhd_qcow_parsing.ml index 627f16bb049..90ed8a7b288 100644 --- a/ocaml/xapi/vhd_qcow_parsing.ml +++ b/ocaml/xapi/vhd_qcow_parsing.ml @@ -44,15 +44,64 @@ let run_tool tool ?(replace_fds = []) ?input_fd ?output_fd error "%s output: %s" tool out ; raise (Api_errors.Server_error (Api_errors.vdi_io_error, [out])) -let parse_header pipe_reader = +let read_json pipe_reader = let ic = Unix.in_channel_of_descr pipe_reader in let buf = Buffer.create 4096 in let json = Yojson.Basic.from_channel ~buf ~fname:"header.json" ic in - In_channel.close ic ; + In_channel.close ic ; json + +let parse_header_aux pipe_reader = + let json = read_json pipe_reader in let cluster_size = 1 lsl Yojson.Basic.Util.(member "cluster_bits" json |> to_int) in + (cluster_size, json) + +let parse_header pipe_reader = + let cluster_size, json = parse_header_aux pipe_reader in let cluster_list = Yojson.Basic.Util.(member "data_clusters" json |> to_list |> List.map to_int) in (cluster_size, cluster_list) + +let parse_header_qemu_img (map_pipe_reader, info_pipe_reader) = + let info_json = read_json info_pipe_reader in + let cluster_size = + Yojson.Basic.Util.(member "cluster-size" info_json |> to_int) + in + let map_json = read_json map_pipe_reader in + let cluster_list = + Yojson.Basic.Util.( + map_json + |> to_list + |> List.filter_map (fun i -> + let present = member "data" i |> to_bool in + if present then + let start_cluster = (member "start" i |> to_int) / cluster_size in + let end_cluster = + start_cluster + ((member "length" i |> to_int) / cluster_size) - 1 + in + Some (start_cluster, end_cluster) + else + None + ) + ) + in + (cluster_size, cluster_list) + +let parse_header_interval pipe_reader = + let cluster_size, json = parse_header_aux pipe_reader in + let cluster_list = + Yojson.Basic.Util.( + member "data_clusters" json + |> to_list + |> List.map (fun x -> + match to_list x with + | x :: y :: _ -> + (to_int x, to_int y) + | _ -> + raise (Invalid_argument "Invalid JSON") + ) + ) + in + (cluster_size, cluster_list) diff --git a/ocaml/xapi/vhd_qcow_parsing.mli b/ocaml/xapi/vhd_qcow_parsing.mli index 25417c0b91c..f43b56dab51 100644 --- a/ocaml/xapi/vhd_qcow_parsing.mli +++ b/ocaml/xapi/vhd_qcow_parsing.mli @@ -22,3 +22,8 @@ val run_tool : -> unit val parse_header : Unix.file_descr -> int * int list + +val parse_header_interval : Unix.file_descr -> int * (int * int) list + +val parse_header_qemu_img : + Unix.file_descr * Unix.file_descr -> int * (int * int) list diff --git a/ocaml/xapi/vhd_tool_wrapper.ml b/ocaml/xapi/vhd_tool_wrapper.ml index 64afa6b4522..a2faa5acc09 100644 --- a/ocaml/xapi/vhd_tool_wrapper.ml +++ b/ocaml/xapi/vhd_tool_wrapper.ml @@ -116,9 +116,14 @@ let receive progress_cb format protocol (s : Unix.file_descr) in run_vhd_tool progress_cb args s s' path -let read_vhd_header path = +let read_vhd_header path ~legacy = let vhd_tool = !Xapi_globs.vhd_tool in - let args = ["read_headers"; path] in + let args = + if legacy then + ["read_headers"; path] + else + ["read_headers_interval"; path] + in let pipe_reader, pipe_writer = Unix.pipe ~cloexec:true () in let progress_cb _ = () in @@ -137,9 +142,13 @@ let read_vhd_header path = pipe_reader let parse_header vhd_path = - let pipe_reader = read_vhd_header vhd_path in + let pipe_reader = read_vhd_header vhd_path ~legacy:true in Vhd_qcow_parsing.parse_header pipe_reader +let parse_header_interval vhd_path = + let pipe_reader = read_vhd_header vhd_path ~legacy:false in + Vhd_qcow_parsing.parse_header_interval pipe_reader + let send progress_cb ?relative_to (protocol : string) (dest_format : string) (s : Unix.file_descr) (path : string) (size : Int64.t) (prefix : string) = let __FUN = __FUNCTION__ in diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index 4d90de5c686..25c76a64a39 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -841,6 +841,8 @@ let qcow_to_stdout = ref "/opt/xensource/libexec/qcow2-to-stdout.py" let qcow_stream_tool = ref "qcow-stream-tool" +let qemu_img = ref "/usr/lib64/xen/bin/qemu-img" + let fence = ref "fence" let host_bugreport_upload = ref "host-bugreport-upload" @@ -1146,6 +1148,10 @@ let validate_reusable_pool_session = ref false let vm_sysprep_enabled = ref true (* enable VM.sysprep API *) +let vhd_legacy_blocks_format = ref true +(* If false, uses an interval-based JSON blocks format for VHD instead of the + legacy format which includes all the allocated clusters *) + let vm_sysprep_wait = ref 5.0 (* seconds *) let test_open = ref 0 @@ -1870,6 +1876,12 @@ let other_options = , (fun () -> string_of_float !vm_sysprep_wait) , "Time in seconds to wait for VM to recognise inserted CD" ) + ; ( "vhd-legacy-blocks-format" + , Arg.Set vhd_legacy_blocks_format + , (fun () -> string_of_bool !vhd_legacy_blocks_format) + , "Choose whether legacy/sparse block format will be used for determining \ + allocated VHD clusters" + ) ; ( "proxy_poll_period_timeout" , Arg.Set_float proxy_poll_period_timeout , (fun () -> string_of_float !proxy_poll_period_timeout) @@ -2004,6 +2016,7 @@ module Resources = struct ; ("vhd-tool", vhd_tool, "Path to vhd-tool") ; ("qcow_to_stdout", qcow_to_stdout, "Path to qcow-to-stdout script") ; ("qcow_stream_tool", qcow_stream_tool, "Path to qcow-stream-tool") + ; ("qemu-img", qemu_img, "Path to qemu-img") ; ("fence", fence, "Path to fence binary, used for HA host fencing") ; ( "host-bugreport-upload" , host_bugreport_upload diff --git a/python3/libexec/qcow2-to-stdout.py b/python3/libexec/qcow2-to-stdout.py index 4ce1cc72b56..00fee7d2f40 100755 --- a/python3/libexec/qcow2-to-stdout.py +++ b/python3/libexec/qcow2-to-stdout.py @@ -91,6 +91,65 @@ def write_features(cluster, offset, data_file_name): offset += 48 +class Interval: + """ + Represents the allocated virtual cluster intervals in a sparse file + """ + def __init__(self, lst): + self.intervals = lst + self.intervals.sort(key=lambda x: x[0]) + + + def __contains__(self, cluster): + """ + Checks if cluster is in one of the intervals, removes it from the + interval if true + """ + # Check if cluster is within [min, max] + if (len(self.intervals) == 0 or + (self.intervals[-1][1] < cluster or self.intervals[0][0] > cluster)): + return False + + # Binary search for the interval that could contain the cluster + l = 0 + h = len(self.intervals) - 1 + while l <= h: + mid = (l + h) // 2 + current = self.intervals[mid] + + if cluster >= current[0] and cluster <= current[1]: + if cluster == current[0] and cluster == current[1]: + # Remove the cluster from the interval + del self.intervals[mid] + return True + + if cluster == current[0]: + # Shrink interval from the left + left = current[0] + 1 + right = current[1] + elif cluster == current[1]: + # Shrink interval from the right + left = current[0] + right = current[1] - 1 + else: + # Split the original interval into two + left = current[0] + right = cluster + self.intervals.insert(mid+1, [cluster+1, current[1]]) + + self.intervals[mid] = [left, right] + return True + elif cluster < current[0]: + h = mid - 1 + elif cluster > current[1]: + l = mid + 1 + + return False + + def __iter__(self): + return self.intervals.__iter__() + + def write_qcow2_content(input_file, cluster_size, refcount_bits, data_file_name, data_file_raw, diff_file_name, virtual_size, nonzero_clusters, @@ -166,26 +225,29 @@ def check_cluster_allocate(idx, cluster, cluster_to_compare_with): # In case input_file is bigger than diff_file_name, first check # if clusters from diff_file_name differ, and then check if the # rest contain data - diff_nonzero_clusters_set = set(diff_nonzero_clusters) - for cluster in nonzero_clusters: - if cluster >= last_diff_cluster: - allocate_cluster(cluster) - elif cluster in diff_nonzero_clusters_set: - # If a cluster has different data from the original_cluster - # then it must be allocated - cluster_data = os.pread(fd, cluster_size, cluster_size * cluster) - original_cluster = os.pread(diff_fd, cluster_size, cluster_size * cluster) - check_cluster_allocate(cluster, cluster_data, original_cluster) - diff_nonzero_clusters_set.remove(cluster) - else: - allocate_cluster(cluster) + diff_nonzero_clusters_set = Interval(diff_nonzero_clusters) + + for (cluster_left, cluster_right) in nonzero_clusters: + for cluster in range(cluster_left, cluster_right+1): + if cluster >= last_diff_cluster: + allocate_cluster(cluster) + elif cluster in diff_nonzero_clusters_set: + # If a cluster has different data from the original_cluster + # then it must be allocated + cluster_data = os.pread(fd, cluster_size, cluster_size * cluster) + original_cluster = os.pread(diff_fd, cluster_size, cluster_size * cluster) + check_cluster_allocate(cluster, cluster_data, original_cluster) + else: + allocate_cluster(cluster) # These are not present in the original file - for cluster in diff_nonzero_clusters_set: - allocate_cluster(cluster) + for (cluster_left, cluster_right) in diff_nonzero_clusters_set: + for cluster in range(cluster_left, cluster_right+1): + allocate_cluster(cluster) else: - for cluster in nonzero_clusters: - allocate_cluster(cluster) + for (cluster_left, cluster_right) in nonzero_clusters: + for cluster in range(cluster_left, cluster_right+1): + allocate_cluster(cluster) else: zero_cluster = bytes(cluster_size) @@ -427,18 +489,36 @@ def main(): action="store_true", ) parser.add_argument( - "--json-header", - dest="json_header", - help="stdin contains a JSON of pre-parsed QCOW2 information" - "(virtual_size, data_clusters, cluster_bits)", - action="store_true", + "--json-header-map", + dest="json_header_map", + help="File descriptor that contains a JSON of pre-parsed QCOW2" + "data clusters information for input_file", + type=int, + default=None, ) parser.add_argument( - "--json-header-diff", - dest="json_header_diff", - metavar="json_header_diff", - help="File descriptor that contains a JSON of pre-parsed QCOW2 " - "information for the diff_file_name", + "--json-header-info", + dest="json_header_info", + help="File descriptor that contains a JSON of pre-parsed QCOW2" + "virtual size, cluster size information for input_file", + type=int, + default=None, + ) + parser.add_argument( + "--json-header-diff-map", + dest="json_header_diff_map", + metavar="json_header_diff_map", + help="File descriptor that contains a JSON of pre-parsed QCOW2" + "data clusters for diff_file_name", + type=int, + default=None, + ) + parser.add_argument( + "--json-header-diff-info", + dest="json_header_diff_info", + metavar="json_header_diff_info", + help="File descriptor that contains a JSON of pre-parsed QCOW2" + "virtual size, cluster size information for diff_file_name", type=int, default=None, ) @@ -451,29 +531,31 @@ def main(): nonzero_clusters = None diff_virtual_size = None diff_nonzero_clusters = None - if args.json_header: - json_header = json.load(sys.stdin) - try: - virtual_size = json_header['virtual_size'] - source_cluster_size = 2 ** json_header['cluster_bits'] - if source_cluster_size != args.cluster_size: - args.cluster_size = source_cluster_size - nonzero_clusters = json_header['data_clusters'] - except KeyError as e: - raise RuntimeError(f'Incomplete JSON - missing value for {str(e)}') from e - if args.json_header_diff: - f = os.fdopen(args.json_header_diff) - json_header = json.load(f) + + def parse_json_files(info_fd, map_fd): + map_f = os.fdopen(map_fd) + info_f = os.fdopen(info_fd) + map_json = json.load(map_f) + info_json = json.load(info_f) + try: - diff_virtual_size = json_header['virtual_size'] - if 2 ** json_header['cluster_bits'] == args.cluster_size: - diff_nonzero_clusters = json_header['data_clusters'] + virt_size = info_json['virtual-size'] + cluster_size = info_json['cluster-size'] + if cluster_size == args.cluster_size: + clusters = [ [int(el["start"] / cluster_size), int((el["start"] + el["length"]) / cluster_size) - 1] for el in map_json if el["data"] ] else: sys.exit(f"[Error] Cluster size in the files being compared are " - f"different: {2**json_header['cluster_bits']} vs. {args.cluster_size}") + f"different: {info_json['cluster-size']} vs. {args.cluster_size}") + return virt_size, clusters except KeyError as e: raise RuntimeError(f'Incomplete JSON for the diff - missing value for {str(e)}') from e + + if args.json_header_info and args.json_header_map: + virtual_size, nonzero_clusters = parse_json_files(args.json_header_info, args.json_header_map) + if args.json_header_diff_info and args.json_header_diff_map: + diff_virtual_size, diff_nonzero_clusters = parse_json_files(args.json_header_diff_info, args.json_header_diff_map) + if not os.path.exists(args.input_file): sys.exit(f"[Error] {args.input_file} does not exist.")