From e8c7e4b60df55b6efcd288e0d362e9c87d4a313c Mon Sep 17 00:00:00 2001 From: Patrick Ferris Date: Wed, 12 Jul 2023 21:10:03 +0100 Subject: [PATCH 1/2] Add eio backend for tar --- dune-project | 15 ++++ eio/dune | 4 ++ eio/tar_eio.ml | 184 ++++++++++++++++++++++++++++++++++++++++++++++++ eio/tar_eio.mli | 69 ++++++++++++++++++ tar-eio.opam | 38 ++++++++++ 5 files changed, 310 insertions(+) create mode 100644 eio/dune create mode 100644 eio/tar_eio.ml create mode 100644 eio/tar_eio.mli create mode 100644 tar-eio.opam diff --git a/dune-project b/dune-project index 2286445..a3f6da8 100644 --- a/dune-project +++ b/dune-project @@ -80,3 +80,18 @@ (tar-unix (and :with-test (= :version))) ) ) + +(package + (name tar-eio) + (synopsis "Decode and encode tar format files using Eio") + (description +"\| tar is a library to read and write tar files with an emphasis on +"\| streaming. This library uses Eio to provide a portable tar library. + ) + (tags ("org:xapi-project" "org:mirage")) + (depends + (ocaml (>= 4.08.0)) + (eio (>= 0.10.0)) + (tar (= :version)) + ) +) diff --git a/eio/dune b/eio/dune new file mode 100644 index 0000000..0ec6d2e --- /dev/null +++ b/eio/dune @@ -0,0 +1,4 @@ +(library + (name tar_eio) + (public_name tar-eio) + (libraries tar eio)) diff --git a/eio/tar_eio.ml b/eio/tar_eio.ml new file mode 100644 index 0000000..ecf391b --- /dev/null +++ b/eio/tar_eio.ml @@ -0,0 +1,184 @@ +(* + * Copyright (C) 2006-2013 Citrix Systems Inc. + * Copyright (C) 2012 Thomas Gazagnaire + * Copyright (C) 2023 Patrick Ferris + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + *) + +open Eio + +module Monad = struct + type 'a t = 'a + let (>>=) a f = f a + let return = Fun.id + let return_unit = () +end + +module Reader = struct + type in_channel = Flow.source + type 'a t = 'a Monad.t + let really_read f b = Flow.read_exact f b |> Monad.return + let skip f (n: int) = + let open Monad in + let buffer_size = 32768 in + let buffer = Cstruct.create buffer_size in + let rec loop (n: int) = + if n <= 0 then Monad.return () + else + let amount = min n buffer_size in + let block = Cstruct.sub buffer 0 amount in + really_read f block >>= fun () -> + loop (n - amount) in + loop n +end +let really_read = Reader.really_read + +module Writer = struct + type out_channel = Flow.sink + type 'a t = 'a Monad.t + let really_write f b = Flow.write f [ b ] |> Monad.return +end +let really_write = Writer.really_write + +let copy_n ifd ofd n = + let open Monad in + let block_size = 32768 in + let buffer = Cstruct.create block_size in + let rec loop remaining = + if remaining = 0L then Monad.return_unit else begin + let this = Int64.(to_int (min (of_int block_size) remaining)) in + let block = Cstruct.sub buffer 0 this in + really_read ifd block >>= fun () -> + really_write ofd block >>= fun () -> + loop (Int64.(sub remaining (of_int this))) + end in + loop n + +module HR = Tar.HeaderReader(Monad)(Reader) +module HW = Tar.HeaderWriter(Monad)(Writer) + +let get_next_header ?level ~global ic = + match HR.read ?level ~global (ic :> Flow.source) with + | Error `Eof -> Monad.return None + | Ok hdrs -> Monad.return (Some hdrs) + +(* Eio needs a non-file-opening stat. *) +let stat path = + Eio.Path.with_open_in path @@ fun f -> + Eio.File.stat f + +(** Return the header needed for a particular file on disk *) +let header_of_file ?level ?getpwuid ?getgrgid filepath : Tar.Header.t = + let level = match level with None -> Tar.Header.V7 | Some level -> level in + let stat = stat filepath in + let pwent = Option.map (fun f -> f stat.uid) getpwuid in + let grent = Option.map (fun f -> f stat.gid) getgrgid in + let uname = if level = V7 then Some "" else pwent in + let gname = if level = V7 then Some "" else grent in + let file_mode = stat.perm in + let user_id = stat.uid |> Int64.to_int in + let group_id = stat.gid |> Int64.to_int in + let file_size = stat.size |> Optint.Int63.to_int64 in + let mod_time = Int64.of_float stat.mtime in + let link_indicator = Tar.Header.Link.Normal in + let link_name = "" in + let devmajor = if level = Ustar then stat.dev |> Int64.to_int else 0 in + let devminor = if level = Ustar then stat.rdev |> Int64.to_int else 0 in + Monad.return (Tar.Header.make ~file_mode ~user_id ~group_id ~mod_time ~link_indicator ~link_name + ?uname ?gname ~devmajor ~devminor (snd filepath) file_size) + +let write_block ?level ?global (header: Tar.Header.t) (body: #Flow.sink -> unit) sink = + HW.write ?level ?global header (sink :> Flow.sink); + body sink; + really_write sink (Tar.Header.zero_padding header) + +let write_end sink = + really_write sink Tar.Header.zero_block; + really_write sink Tar.Header.zero_block + +(** Utility functions for operating over whole tar archives *) +module Archive = struct + + (** Read the next header, apply the function 'f' to the fd and the header. The function + should leave the fd positioned immediately after the datablock. Finally the function + skips past the zero padding to the next header *) + let with_next_file src ~(global: Tar.Header.Extended.t option) + (f: Eio.Flow.source -> Tar.Header.Extended.t option -> Tar.Header.t -> 'a) = + match get_next_header ~global src with + | Some (hdr, global) -> + let result = f src global hdr in + Reader.skip src (Tar.Header.compute_zero_padding_length hdr); + Some result + | None -> + None + + (** List the contents of a tar *) + let list ?level fd = + let rec loop global acc = + match get_next_header ?level ~global (fd :> Flow.source) with + | None -> Monad.return (List.rev acc) + | Some (hdr, global) -> + Reader.skip fd (Int64.to_int hdr.Tar.Header.file_size); + Reader.skip fd (Tar.Header.compute_zero_padding_length hdr); + loop global (hdr :: acc) in + loop None [] + + (** Extract the contents of a tar to directory 'dest' *) + let extract dest ifd = + let rec loop global () = + match get_next_header ~global ifd with + | None -> Monad.return_unit + | Some (hdr, global) -> + let filename = dest hdr.Tar.Header.file_name in + Eio.Path.(with_open_out ~create:(`Exclusive 0) filename) @@ fun ofd -> + copy_n ifd ofd hdr.Tar.Header.file_size; + Reader.skip ifd (Tar.Header.compute_zero_padding_length hdr); + loop global () + in + loop None () + + let transform ?level f (ifd : #Flow.source) (ofd : #Flow.sink) = + let rec loop global () = + match get_next_header ~global ifd with + | None -> Monad.return_unit + | Some (header', global') -> + let header = f header' in + let body = fun _ -> copy_n ifd ofd header.Tar.Header.file_size in + write_block ?level ?global:(if global <> global' then global' else None) header body ofd; + Reader.skip ifd (Tar.Header.compute_zero_padding_length header'); + loop global' () + in + loop None (); + write_end ofd + + (** Create a tar on file descriptor fd from the filename list + 'files' *) + let create ?getpwuid ?getgrgid files ofd = + let file filename = + let stat = stat filename in + if stat.kind <> `Regular_file then + (* Skipping, not a regular file. *) + Monad.return_unit + else begin + let hdr = header_of_file ?getpwuid ?getgrgid filename in + write_block hdr (fun ofd -> + Eio.Path.with_open_in filename @@ fun ifd -> + copy_n ifd ofd hdr.Tar.Header.file_size + ) ofd + end in + List.iter file files; + (* Add two empty blocks *) + write_end ofd + +end diff --git a/eio/tar_eio.mli b/eio/tar_eio.mli new file mode 100644 index 0000000..c604b7d --- /dev/null +++ b/eio/tar_eio.mli @@ -0,0 +1,69 @@ +(* + * Copyright (C) 2006-2009 Citrix Systems Inc. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + *) + +(** I/O for tar-formatted data *) + +(** Returns the next header block or None if two consecutive + zero-filled blocks are discovered. Assumes stream is positioned at the + possible start of a header block. + @raise End_of_file if the stream unexpectedly fails. *) +val get_next_header : ?level:Tar.Header.compatibility -> global:Tar.Header.Extended.t option -> Eio.Flow.source -> + (Tar.Header.t * Tar.Header.Extended.t option) option + +(** Return the header needed for a particular file on disk. [getpwuid] and [getgrgid] are optional + functions that should take the uid and gid respectively and return the passwd and group entry + names for each. These will be added to the header. *) +val header_of_file : + ?level:Tar.Header.compatibility -> + ?getpwuid:(int64 -> string) -> + ?getgrgid:(int64 -> string) -> + Eio.Fs.dir Eio.Path.t -> + Tar.Header.t + +module Archive : sig + (** Utility functions for operating over whole tar archives *) + + (** Read the next header, apply the function 'f' to the source and the header. The function + should leave the source positioned immediately after the datablock. Finally the function + skips past the zero padding to the next header. *) + val with_next_file : Eio.Flow.source -> global:Tar.Header.Extended.t option -> + (Eio.Flow.source -> Tar.Header.Extended.t option -> Tar.Header.t -> 'a) -> 'a option + + (** List the contents of a tar to stdout. *) + val list : ?level:Tar.Header.compatibility -> #Eio.Flow.source -> Tar.Header.t list + + (** [extract dest] extract the contents of a tar. + Apply [dest] on each source filename to change the destination + filename. It only supports extracting regular files from the + top-level of the archive. *) + val extract : (string -> Eio.Fs.dir Eio.Path.t) -> Eio.Flow.source -> unit + + (** [transform f src sink] applies [f] to the header of each + file in the tar inputted in [src], and writes the resulting + headers to [sink] preserving the content and structure of the + archive. *) + val transform : ?level:Tar.Header.compatibility -> (Tar.Header.t -> Tar.Header.t) -> #Eio.Flow.source -> #Eio.Flow.sink -> unit + + (** Create a tar in the sink from a list of file paths. It only supports regular files. + + See {! header_of_file} for the meaning of [getpwuid] and [getgrgid]. *) + val create : + ?getpwuid:(int64 -> string) -> + ?getgrgid:(int64 -> string) -> + Eio.Fs.dir Eio.Path.t list -> + #Eio.Flow.sink -> + unit +end diff --git a/tar-eio.opam b/tar-eio.opam new file mode 100644 index 0000000..259ce87 --- /dev/null +++ b/tar-eio.opam @@ -0,0 +1,38 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +synopsis: "Decode and encode tar format files using Eio" +description: """ +tar is a library to read and write tar files with an emphasis on +streaming. This library uses Eio to provide a portable tar library. +""" +maintainer: ["Reynir Björnsson " "dave@recoil.org"] +authors: ["Dave Scott" "Thomas Gazagnaire" "David Allsopp" "Antonin Décimo"] +license: "ISC" +tags: ["org:xapi-project" "org:mirage"] +homepage: "https://github.com/mirage/ocaml-tar" +doc: "https://mirage.github.io/ocaml-tar/" +bug-reports: "https://github.com/mirage/ocaml-tar/issues" +depends: [ + "dune" {>= "2.9"} + "ocaml" {>= "4.08.0"} + "eio" {>= "0.10.0"} + "tar" {= version} + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "--promote-install-files=false" + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] + ["dune" "install" "-p" name "--create-install-files" name] +] +dev-repo: "git+https://github.com/mirage/ocaml-tar.git" From 2b7c64aa2ffd5d2bb37e990398ed46c5106f1587 Mon Sep 17 00:00:00 2001 From: Patrick Ferris Date: Mon, 17 Jul 2023 19:09:50 +0100 Subject: [PATCH 2/2] Remove unnecessary identity monad calls --- eio/tar_eio.ml | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/eio/tar_eio.ml b/eio/tar_eio.ml index ecf391b..b1d2c7f 100644 --- a/eio/tar_eio.ml +++ b/eio/tar_eio.ml @@ -22,23 +22,21 @@ module Monad = struct type 'a t = 'a let (>>=) a f = f a let return = Fun.id - let return_unit = () end module Reader = struct type in_channel = Flow.source - type 'a t = 'a Monad.t - let really_read f b = Flow.read_exact f b |> Monad.return + type 'a t = 'a + let really_read f b = Flow.read_exact f b let skip f (n: int) = - let open Monad in let buffer_size = 32768 in let buffer = Cstruct.create buffer_size in let rec loop (n: int) = - if n <= 0 then Monad.return () + if n <= 0 then () else let amount = min n buffer_size in let block = Cstruct.sub buffer 0 amount in - really_read f block >>= fun () -> + really_read f block; loop (n - amount) in loop n end @@ -46,21 +44,20 @@ let really_read = Reader.really_read module Writer = struct type out_channel = Flow.sink - type 'a t = 'a Monad.t - let really_write f b = Flow.write f [ b ] |> Monad.return + type 'a t = 'a + let really_write f b = Flow.write f [ b ] end let really_write = Writer.really_write let copy_n ifd ofd n = - let open Monad in let block_size = 32768 in let buffer = Cstruct.create block_size in let rec loop remaining = - if remaining = 0L then Monad.return_unit else begin + if remaining = 0L then () else begin let this = Int64.(to_int (min (of_int block_size) remaining)) in let block = Cstruct.sub buffer 0 this in - really_read ifd block >>= fun () -> - really_write ofd block >>= fun () -> + really_read ifd block; + really_write ofd block; loop (Int64.(sub remaining (of_int this))) end in loop n @@ -70,8 +67,8 @@ module HW = Tar.HeaderWriter(Monad)(Writer) let get_next_header ?level ~global ic = match HR.read ?level ~global (ic :> Flow.source) with - | Error `Eof -> Monad.return None - | Ok hdrs -> Monad.return (Some hdrs) + | Error `Eof -> None + | Ok hdrs -> Some hdrs (* Eio needs a non-file-opening stat. *) let stat path = @@ -95,8 +92,8 @@ let header_of_file ?level ?getpwuid ?getgrgid filepath : Tar.Header.t = let link_name = "" in let devmajor = if level = Ustar then stat.dev |> Int64.to_int else 0 in let devminor = if level = Ustar then stat.rdev |> Int64.to_int else 0 in - Monad.return (Tar.Header.make ~file_mode ~user_id ~group_id ~mod_time ~link_indicator ~link_name - ?uname ?gname ~devmajor ~devminor (snd filepath) file_size) + Tar.Header.make ~file_mode ~user_id ~group_id ~mod_time ~link_indicator ~link_name + ?uname ?gname ~devmajor ~devminor (snd filepath) file_size let write_block ?level ?global (header: Tar.Header.t) (body: #Flow.sink -> unit) sink = HW.write ?level ?global header (sink :> Flow.sink); @@ -127,7 +124,7 @@ module Archive = struct let list ?level fd = let rec loop global acc = match get_next_header ?level ~global (fd :> Flow.source) with - | None -> Monad.return (List.rev acc) + | None -> List.rev acc | Some (hdr, global) -> Reader.skip fd (Int64.to_int hdr.Tar.Header.file_size); Reader.skip fd (Tar.Header.compute_zero_padding_length hdr); @@ -138,7 +135,7 @@ module Archive = struct let extract dest ifd = let rec loop global () = match get_next_header ~global ifd with - | None -> Monad.return_unit + | None -> () | Some (hdr, global) -> let filename = dest hdr.Tar.Header.file_name in Eio.Path.(with_open_out ~create:(`Exclusive 0) filename) @@ fun ofd -> @@ -151,7 +148,7 @@ module Archive = struct let transform ?level f (ifd : #Flow.source) (ofd : #Flow.sink) = let rec loop global () = match get_next_header ~global ifd with - | None -> Monad.return_unit + | None -> () | Some (header', global') -> let header = f header' in let body = fun _ -> copy_n ifd ofd header.Tar.Header.file_size in @@ -169,7 +166,7 @@ module Archive = struct let stat = stat filename in if stat.kind <> `Regular_file then (* Skipping, not a regular file. *) - Monad.return_unit + () else begin let hdr = header_of_file ?getpwuid ?getgrgid filename in write_block hdr (fun ofd ->