diff --git a/archive/tar.ts b/archive/tar.ts index c1eb188bb4dd..757a00ec2f3f 100644 --- a/archive/tar.ts +++ b/archive/tar.ts @@ -28,149 +28,37 @@ * THE SOFTWARE. */ -import { - FileTypes, - type TarInfo, - type TarMeta, - USTAR_STRUCTURE, -} from "./_common.ts"; -import type { Reader } from "../io/types.ts"; -import { MultiReader } from "../io/multi_reader.ts"; -import { Buffer } from "../io/buffer.ts"; -import { assert } from "../assert/assert.ts"; -import { HEADER_LENGTH } from "./_common.ts"; - -export type { TarInfo, TarMeta }; - -/** Options for {@linkcode Tar.append}. */ -export interface TarOptions extends TarInfo { - /** - * Filepath of the file to append to the archive - */ - filePath?: string; - - /** - * A Reader of any arbitrary content to append to the archive - */ - reader?: Reader; - - /** - * Size of the content to be appended. This is only required - * when passing a reader to the archive. - */ - contentSize?: number; -} - -const USTAR_MAGIC_HEADER = "ustar\u000000" as const; - /** - * Simple file reader + * @param pathname The pathname of of the file or directory inside the archive. + * @param iterable The source of the file for the archive. + * @param size The size of the file for the archive. + * @param [sizeExtension=false] Whether to increase the size limit for this file from the default 8 GiB to 64 GiB. + * @param options: Optional settings you can specify with the file. */ -class FileReader implements Reader { - #file?: Deno.FsFile; - - constructor(private filePath: string) {} - - public async read(p: Uint8Array): Promise { - if (!this.#file) { - this.#file = await Deno.open(this.filePath, { read: true }); - } - const res = await this.#file.read(p); - if (res === null) { - this.#file.close(); - this.#file = undefined; - } - return res; - } -} +export type TarEntry = { + pathname: string; + size: number; + sizeExtension?: boolean; + iterable: Iterable | AsyncIterable; + options?: Partial; +} | { + pathname: string; + options?: Partial; +}; /** - * Pads a number with leading zeros to a specified number of bytes. - * - * @param num The number to pad. - * @param bytes The number of bytes to pad the number to. - * @returns The padded number as a string. + * The Options */ -function pad(num: number, bytes: number): string { - return num.toString(8).padStart(bytes, "0"); -} - -/** - * Formats the header data for a tar file entry. - * - * @param data The data object containing the values for the tar header fields. - * @returns The formatted header data as a Uint8Array. - */ -function formatHeader(data: TarData): Uint8Array { - const encoder = new TextEncoder(); - const buffer = new Uint8Array(HEADER_LENGTH); - let offset = 0; - for (const { field, length } of USTAR_STRUCTURE) { - const entry = encoder.encode(data[field as keyof TarData] || ""); - buffer.set(entry, offset); - offset += length; - } - return buffer; -} - -/** Base interface for {@linkcode TarDataWithSource}. */ -export interface TarData { - /** Name of the file, excluding directory names (if any). */ - fileName?: string; - /** Directory names preceding the file name (if any). */ - fileNamePrefix?: string; - /** - * The underlying raw `st_mode` bits that contain the standard Unix - * permissions for this file/directory. - */ - fileMode?: string; - /** - * Numeric user ID of the file owner. This is ignored if the operating system - * does not support numeric user IDs. - */ - uid?: string; - /** - * Numeric group ID of the file owner. This is ignored if the operating - * system does not support numeric group IDs. - */ - gid?: string; - /** - * The size of the file in bytes; for archive members that are symbolic or - * hard links to another file, this field is specified as zero. - */ - fileSize?: string; - /** - * Data modification time of the file at the time it was archived. It - * represents the integer number of seconds since January 1, 1970, 00:00 UTC. - */ - mtime?: string; - /** The simple sum of all bytes in the header block */ - checksum?: string; - /** - * The type of file archived. - * - * @see {@linkcode FileTypes} - */ - type?: string; - /** Ustar magic header */ - ustar?: string; - /** The name of the file owner. */ - owner?: string; - /** The group that the file owner belongs to. */ - group?: string; -} - -/** Tar data interface for {@linkcode Tar.data}. */ -export interface TarDataWithSource extends TarData { - /** - * Path of the file to read. - */ - filePath?: string; - /** - * Buffer reader. - */ - reader?: Reader; -} +export type TarOptions = { + mode: string; + uid: string; + gid: string; + mtime: number; + uname: string; + gname: string; + devmajor: string; + devminor: string; +}; /** * ### Overview @@ -178,214 +66,332 @@ export interface TarDataWithSource extends TarData { * single file (called an archive, or sometimes a tarball). These archives typically * have the '.tar' extension. * - * ### Usage + * # Usage * The workflow is to create a Tar instance, append files to it, and then write the - * tar archive to the filesystem (or other output stream). See the worked example - * below for details. - * - * ### Compression - * Tar archives are not compressed by default. If you want to compress the archive, - * you may compress the tar archive after creation, but this capability is not provided - * here. + * tar archive to the filesystem (or other output stream). See the worked example below for details. * * ### File format and limitations - * - * The ustar file format is used for creating the archive file. + * The ustar file format used for creating the archive file. * While this format is compatible with most tar readers, * the format has several limitations, including: - * * Files must be smaller than 8GiB - * * Filenames (including path) must be shorter than 256 characters - * * Filenames (including path) cannot contain non-ASCII characters - * * Sparse files are not supported + * * File sizes can be at most 8 GiBs. + * * Filenames (including path) must be at most 256 characters. + * * Sparse files are not supported. + * This implementation does support decoding tarballs with files up to 64 GiBs, and can create them + * via setting `sizeExtension` to true in `TarEntry` for the `append` method, but doing so may limit its compatibility + * with older tar implementations. * * @example * ```ts - * import { Tar } from "https://deno.land/std@$STD_VERSION/archive/tar.ts"; - * import { Buffer } from "https://deno.land/std@$STD_VERSION/io/buffer.ts"; - * import { copy } from "https://deno.land/std@$STD_VERSION/io/copy.ts"; + * import { Tar } from '@std/archive' * * const tar = new Tar(); + * tar.append({ + * pathname: 'deno.txt', + * size: (await Deno.stat('deno.txt')).size, + * iterable: (await Deno.open('deno.txt')).readable + * }); + * tar.append({ + * pathname: 'filename_in_archive.txt', + * size: (await Deno.stat('filename_in_archive.txt')).size, + * iterable: (await Deno.open('filename_in_archive.txt')).readable + * }); + * tar.close(); * - * // Now that we've created our tar, let's add some files to it: + * await tar.readable.pipeTo((await Deno.create('./out.tar')).writable); + * ``` * - * const content = new TextEncoder().encode("Some arbitrary content"); - * await tar.append("deno.txt", { - * reader: new Buffer(content), - * contentSize: content.byteLength, - * }); + * ### Compression + * Tar archives are not compressed by default, but if you want to compress the archive, + * you may pipe the archive through a compression stream like `gzip` before writing it to disk. * - * // This file is sourced from the filesystem (and renamed in the archive) - * await tar.append("filename_in_archive.txt", { - * filePath: "./filename_on_filesystem.txt", - * }); + * @example + * ```ts + * import { Tar } from '@std/archive' * - * // Now let's write the tar (with it's two files) to the filesystem - * // use tar.getReader() to read the contents. + * const tar = new Tar(); + * tar.append({ + * pathname: 'deno.txt', + * size: (await Deno.stat('deno.txt')).size, + * iterable: (await Deno.open('deno.txt')).readable + * }); + * tar.append({ + * pathname: 'filename_in_archive.txt', + * size: (await Deno.stat('filename_in_archive.txt')).size, + * iterable: (await Deno.open('filename_in_archive.txt')).readable + * }); + * tar.close(); * - * const writer = await Deno.open("./out.tar", { write: true, create: true }); - * await copy(tar.getReader(), writer); - * writer.close(); + * await tar + * .readable + * .pipeThrough(new CompressionStream('gzip')) + * .pipeTo((await Deno.create('./out.tar.gz')).writable); * ``` */ export class Tar { - /** Tar data. */ - data: TarDataWithSource[]; - - /** Constructs a new instance. */ - constructor() { - this.data = []; - } - + #paths: string[] = []; + #entries: ({ + prefix: Uint8Array; + name: Uint8Array; + typeflag: string; + options: Partial; + iterable: Iterable | AsyncIterable; + size: number; + sizeExtension: boolean; + } | { + prefix: Uint8Array; + name: Uint8Array; + typeflag: string; + options: Partial; + sizeExtension: boolean; + })[] = []; + #readable: ReadableStream; + #finishedAppending = false; /** - * Append a file or reader of arbitrary content to this tar archive. Directories - * appended to the archive append only the directory itself to the archive, not - * its contents. To add a directory and its contents, recursively append the - * directory's contents. Directories and subdirectories will be created automatically - * in the archive as required. - * - * @param filenameInArchive File name of the content in the archive. E.g. - * `test.txt`. Use slash for directory separators. - * @param source Details of the source of the content including the - * reference to the content itself and potentially any related metadata. + * Constructs a new instance. */ - async append(filenameInArchive: string, source: TarOptions) { - if (typeof filenameInArchive !== "string") { - throw new Error("file name not specified"); - } - let fileName = filenameInArchive; + constructor() { + const gen = (async function* (tar) { + while ( + ( + !tar.#finishedAppending || + tar.#entries.length + ) && + await new Promise((a) => setTimeout(() => a(true), 0)) + ) { + if (!tar.#entries.length) { + continue; + } - /** - * Ustar format has a limitation of file name length. Specifically: - * 1. File names can contain at most 255 bytes. - * 2. File names longer than 100 bytes must be split at a directory separator in two parts, - * the first being at most 155 bytes long. So, in most cases file names must be a bit shorter - * than 255 bytes. - */ - // separate file name into two parts if needed - let fileNamePrefix: string | undefined; - if (fileName.length > 100) { - let i = fileName.length; - while (i >= 0) { - i = fileName.lastIndexOf("/", i); - if (i <= 155) { - fileNamePrefix = fileName.slice(0, i); - fileName = fileName.slice(i + 1); - break; + const entry = tar.#entries.shift()!; + const encoder = new TextEncoder(); + const header = new Uint8Array(512); + + header.set(entry.name); // name + header.set( + encoder.encode( + (entry.options.mode ?? (entry.typeflag === "5" ? "755" : "644")) + .padStart(6, "0") + + " \0" + // mode + (entry.options.uid ?? "").padStart(6, "0") + " \0" + // uid + (entry.options.gid ?? "").padStart(6, "0") + " \0" + // gid + ("size" in entry ? entry.size.toString(8) : "").padStart( + entry.sizeExtension ? 12 : 11, + "0", + ) + (entry.sizeExtension ? "" : " ") + // size + (entry.options.mtime?.toString(8) ?? "").padStart(11, "0") + + " " + // mtime + " ".repeat(8) + // checksum | Needs to be updated + entry.typeflag + // typeflag + "\0".repeat(100) + // linkname + "ustar\0" + // magic + "00" + // version + (entry.options.uname ?? "").padEnd(32, "\0") + // uname + (entry.options.gname ?? "").padEnd(32, "\0") + // gname + (entry.options.devmajor ?? "").padEnd(8, "\0") + // devmajor + (entry.options.devminor ?? "").padEnd(8, "\0"), // devminor + ), + 100, + ); + header.set(entry.prefix, 345); // prefix + + header.set( + encoder.encode( + header.reduce((x, y) => x + y).toString(8).padStart(6, "0") + "\0", + ), + 148, + ); // update checksum + yield header; + + if ("size" in entry) { + let size = 0; + for await (const x of entry.iterable) { + size += x.length; + yield x; + } + if (entry.size !== size) { + throw new Error( + "Invalid Tarball! Provided size did not match bytes read from iterable.", + ); + } + yield new Uint8Array(new Array(512 - entry.size % 512).fill(0)); } - i--; } - const errMsg = - "ustar format does not allow a long file name (length of [file name" + - "prefix] + / + [file name] must be shorter than 256 bytes)"; - if (i < 0 || fileName.length > 100) { - throw new Error(errMsg); - } else { - assert(fileNamePrefix !== undefined); - if (fileNamePrefix.length > 155) { - throw new Error(errMsg); + yield new Uint8Array(new Array(1024).fill(0)); + })(this); + this.#readable = new ReadableStream({ + async pull(controller) { + const { done, value } = await gen.next(); + if (done) { + controller.close(); + } else { + controller.enqueue(value); } - } + }, + }); + } + + /** + * Append a file or directory to the archive. + */ + append(entry: TarEntry): void { + if (this.#finishedAppending) { + throw new Error("This Tar Instance has already been closed."); } - source = source || {}; + if ( + "size" in entry && + ( + entry.size < 0 || + Math.pow(8, entry.sizeExtension ? 12 : 11) < entry.size || + entry.size.toString() === "NaN" + ) + ) { + throw new Error( + "Invalid Size Provided! Size cannot exceed 8 GiBs by default or 64 GiBs with sizeExtension set to true.", + ); + } + entry.pathname = entry.pathname.split("/").filter((x) => x).join("/"); + if (entry.pathname.startsWith("./")) { + entry.pathname = entry.pathname.slice(2); + } + if (!("size" in entry)) { + entry.pathname += "/"; + } - // set meta data - let info: Deno.FileInfo | undefined; - if (source.filePath) { - info = await Deno.stat(source.filePath); - if (info.isDirectory) { - info.size = 0; - source.reader = new Buffer(); - } + const pathname = new TextEncoder().encode(entry.pathname); + if (pathname.length > 256) { + throw new Error("Invalid Pathname! Pathname cannot exceed 256 bytes."); } - const mode = source.fileMode || (info && info.mode) || - parseInt("777", 8) & 0xfff /* 511 */; - const mtime = Math.floor( - source.mtime ?? (info?.mtime ?? new Date()).valueOf() / 1000, - ); - const uid = source.uid || 0; - const gid = source.gid || 0; + let i = Math.max(0, pathname.lastIndexOf(47)); + if (pathname.slice(i).length > 100) { + throw new Error("Invalid Filename! Filename cannot exceed 100 bytes."); + } - if (typeof source.owner === "string" && source.owner.length >= 32) { - throw new Error( - "ustar format does not allow owner name length >= 32 bytes", - ); + if (pathname.length <= 100) { + i = 0; + } else { + for (; i > 0; --i) { + i = pathname.lastIndexOf(47, i); + if (pathname.slice(i).length > 100) { + i = Math.max(0, pathname.indexOf(47, i + 1)); + break; + } + } } - if (typeof source.group === "string" && source.group.length >= 32) { + + const prefix = pathname.slice(0, i++); + if (prefix.length > 155) { throw new Error( - "ustar format does not allow group name length >= 32 bytes", + "Invalid Pathname! Pathname needs to be split-able on a forward slash separator into [155, 100] bytes respectively.", ); } + const name = prefix.length ? pathname.slice(i) : pathname; - const fileSize = info?.size ?? source.contentSize; - assert(fileSize !== undefined, "fileSize must be set"); - - const type = source.type - ? FileTypes[source.type as keyof typeof FileTypes] - : (info?.isDirectory ? FileTypes.directory : FileTypes.file); - const tarData: TarDataWithSource = { - fileName, - fileNamePrefix, - fileMode: pad(mode, 7), - uid: pad(uid, 7), - gid: pad(gid, 7), - fileSize: pad(fileSize, 11), - mtime: pad(mtime, 11), - checksum: " ", - type: type.toString(), - ustar: USTAR_MAGIC_HEADER, - owner: source.owner || "", - group: source.group || "", - filePath: source.filePath, - reader: source.reader, - }; + if (this.#paths.includes(entry.pathname)) { + return; + } + this.#paths.push(entry.pathname); - // calculate the checksum - let checksum = 0; - const encoder = new TextEncoder(); - Object.keys(tarData) - .filter((key): boolean => ["filePath", "reader"].indexOf(key) < 0) - .forEach(function (key) { - checksum += encoder - .encode(tarData[key as keyof TarData]) - .reduce((p, c): number => p + c, 0); + if ("size" in entry) { // File + this.#entries.push({ + prefix, + name, + typeflag: "0", + options: entry.options ?? {}, + iterable: entry.iterable, + size: entry.size, + sizeExtension: entry.sizeExtension ?? false, }); + } // Directory + else { + this.#entries.push({ + prefix, + name, + typeflag: "5", + options: entry.options ?? {}, + sizeExtension: false, + }); + } + } - tarData.checksum = pad(checksum, 6) + "\u0000 "; - this.data.push(tarData); + /** + * Close the archive once you're end appending. + */ + close(): void { + this.#finishedAppending = true; } /** - * Get a Reader instance for this tar archive. + * Read the archive via a `ReadableStream`. */ - getReader(): Reader { - const readers: Reader[] = []; - this.data.forEach((tarData) => { - let { reader } = tarData; - const { filePath } = tarData; - const headerArr = formatHeader(tarData); - readers.push(new Buffer(headerArr)); - if (!reader) { - assert(filePath !== undefined); - reader = new FileReader(filePath); - } - readers.push(reader); + get readable(): ReadableStream { + return this.#readable; + } +} - // to the nearest multiple of recordSize - assert(tarData.fileSize !== undefined, "fileSize must be set"); - readers.push( - new Buffer( - new Uint8Array( - HEADER_LENGTH - - (parseInt(tarData.fileSize, 8) % HEADER_LENGTH || HEADER_LENGTH), - ), - ), - ); - }); +/** + * Like the Tar class, but takes in a ReadableStream and outputs a ReadableStream + * + * @example + * ```ts + * import { TarStream } from '@std/archive' + * + * ReadableStream.from([ + * { + * pathname: 'deno.txt', + * size: (await Deno.stat('deno.txt')).size, + * iterable: (await Deno.open('deno.txt')).readable + * }, + * { + * pathname: 'filename_in_archive.txt', + * size: (await Deno.stat('filename_in_archive.txt')).size, + * iterable: (await Deno.open('filename_in_archive.txt')).readable + * } + * ]) + * .pipeThrough(new TarStream()) + * .pipeThrough(new CompressionStream('gzip')) + * .pipeTo((await Deno.create('./out.tar.gz'))) + * ``` + */ +export class TarStream { + #readable: ReadableStream; + #writable: WritableStream; + /** + * Constructs a new instance. + */ + constructor() { + const { readable, writable } = new TransformStream(); + const tar = new Tar(); + this.#readable = tar.readable; + this.#writable = writable; + readable.pipeTo( + new WritableStream({ + write(chunk) { + tar.append(chunk); + }, + close() { + tar.close(); + }, + abort() { + tar.close(); + }, + }), + ); + } - // append 2 empty records - readers.push(new Buffer(new Uint8Array(HEADER_LENGTH * 2))); - return new MultiReader(readers); + /** + * Read the archive via a ReadableStream + */ + get readable(): ReadableStream { + return this.#readable; + } + + /** + * Write to the archive via a WritableStream + */ + get writable(): WritableStream { + return this.#writable; } } diff --git a/archive/untar.ts b/archive/untar.ts index d90c27b654bd..09069be68870 100644 --- a/archive/untar.ts +++ b/archive/untar.ts @@ -29,152 +29,51 @@ * THE SOFTWARE. */ -import { - FileTypes, - HEADER_LENGTH, - readBlock, - type TarMeta, - USTAR_STRUCTURE, - type UstarFields, -} from "./_common.ts"; -import { readAll } from "../io/read_all.ts"; -import type { Reader } from "../io/types.ts"; - /** - * Extend TarMeta with the `linkName` property so that readers can access - * symbolic link values without polluting the world of archive writers. + * @param pathname The pathname of the item inside the archive. + * @param header The header of the item. + * @param readable The contents of the file from the item. */ -export interface TarMetaWithLinkName extends TarMeta { - /** File name of the symbolic link. */ - linkName?: string; -} - -/** Tar header with raw, unprocessed bytes as values. */ -export type TarHeader = { - [key in UstarFields]: Uint8Array; +export type TarItem = { + pathname: string; + header: TarHeader; + readable?: ReadableStream; }; -// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06 -// eight checksum bytes taken to be ascii spaces (decimal value 32) -const initialChecksum = 8 * 32; - -/** - * Trims a Uint8Array by removing any trailing zero bytes. - * - * @param buffer The Uint8Array to trim. - * @returns A new Uint8Array with trailing zero bytes removed, or the original - * buffer if no trailing zero bytes are found. - */ -function trim(buffer: Uint8Array): Uint8Array { - const index = buffer.indexOf(0); - return index === -1 ? buffer : buffer.subarray(0, index); -} - /** - * Parse file header in a tar archive - * @param length + * The header of a file decoded into an object, where `pad` is the remaining bytes of the header. + * The `pad` will be larger if the optional properties are missing. */ -function parseHeader(buffer: Uint8Array): TarHeader { - const data = {} as TarHeader; - let offset = 0; - USTAR_STRUCTURE.forEach(function (value) { - const arr = buffer.subarray(offset, offset + value.length); - data[value.field] = arr; - offset += value.length; - }); - return data; -} - -/** Tar entry */ -export interface TarEntry extends TarMetaWithLinkName {} - -/** Contains tar header metadata and a reader to the entry's body. */ -export class TarEntry implements Reader { - #header: TarHeader; - #reader: Reader | (Reader & Deno.Seeker); - #size: number; - #read = 0; - #consumed = false; - #entrySize: number; - - /** Constructs a new instance. */ - constructor( - meta: TarMetaWithLinkName, - header: TarHeader, - reader: Reader | (Reader & Deno.Seeker), - ) { - Object.assign(this, meta); - this.#header = header; - this.#reader = reader; - - // File Size - this.#size = this.fileSize || 0; - // Entry Size - const blocks = Math.ceil(this.#size / HEADER_LENGTH); - this.#entrySize = blocks * HEADER_LENGTH; - } - - /** Returns whether the entry has already been consumed. */ - get consumed(): boolean { - return this.#consumed; - } - - /** - * Reads up to `p.byteLength` bytes of the tar entry into `p`. It resolves to - * the number of bytes read (`0 < n <= p.byteLength`) and rejects if any - * error encountered. Even if read() resolves to n < p.byteLength, it may use - * all of `p` as scratch space during the call. If some data is available but - * not `p.byteLength bytes`, read() conventionally resolves to what is available - * instead of waiting for more. - */ - async read(p: Uint8Array): Promise { - // Bytes left for entry - const entryBytesLeft = this.#entrySize - this.#read; - const bufSize = Math.min( - // bufSize can't be greater than p.length nor bytes left in the entry - p.length, - entryBytesLeft, - ); - - if (entryBytesLeft <= 0) { - this.#consumed = true; - return null; - } - - const block = new Uint8Array(bufSize); - const n = await readBlock(this.#reader, block); - const bytesLeft = this.#size - this.#read; - - this.#read += n || 0; - if (n === null || bytesLeft <= 0) { - if (n === null) this.#consumed = true; - return null; - } - - // Remove zero filled - const offset = bytesLeft < n ? bytesLeft : n; - p.set(block.subarray(0, offset), 0); - - return offset < 0 ? n - Math.abs(offset) : offset; - } - - /** Discords the current entry. */ - async discard() { - // Discard current entry - if (this.#consumed) return; - this.#consumed = true; - - if (typeof (this.#reader as Deno.Seeker).seek === "function") { - await (this.#reader as Deno.Seeker).seek( - this.#entrySize - this.#read, - Deno.SeekMode.Current, - ); - this.#read = this.#entrySize; - } else { - await readAll(this); - } - } -} +export type TarHeader = { + name: string; + mode: string; + uid: string; + gid: string; + size: number; + mtime: number; + checksum: string; + typeflag: string; + linkname: string; + pad: Uint8Array; +} | { + name: string; + mode: string; + uid: string; + gid: string; + size: number; + mtime: number; + checksum: string; + typeflag: string; + linkname: string; + magic: string; + version: string; + uname: string; + gname: string; + devmajor: string; + devminor: string; + prefix: string; + pad: Uint8Array; +}; /** * ### Overview @@ -183,167 +82,310 @@ export class TarEntry implements Reader { * archives typically have the '.tar' extension. * * ### Supported file formats - * Only the ustar file format is supported. This is the most common format. The - * pax file format may also be read, but additional features, such as longer - * filenames may be ignored. + * Only the ustar file format is supported. This is the most common format. + * The numeric extension feature of the size to allow up to 64 GiBs is also supported. * * ### Usage - * The workflow is to create a Untar instance referencing the source of the tar file. - * You can then use the untar reference to extract files one at a time. See the worked - * example below for details. - * - * ### Understanding compression - * A tar archive may be compressed, often identified by the `.tar.gz` extension. - * This utility does not support decompression which must be done before extracting - * the files. + * The workflow is to create a UnTar instance passing in a Iterable or AsyncIterable of the archive. + * You can then iterate over the instance to pull out the entries one by one and decide + * if you want to read it or skip over it. Each entry's readable stream must either be + * consumed or the `cancel` method **must** be called on it. The next entry won't resolve **until** + * either action is done on the ReadableStream. * * @example * ```ts - * import { Untar } from "https://deno.land/std@$STD_VERSION/archive/untar.ts"; - * import { ensureFile } from "https://deno.land/std@$STD_VERSION/fs/ensure_file.ts"; - * import { ensureDir } from "https://deno.land/std@$STD_VERSION/fs/ensure_dir.ts"; - * import { copy } from "https://deno.land/std@$STD_VERSION/io/copy.ts"; + * import { UnTar } from '@std/archive' * - * using reader = await Deno.open("./out.tar", { read: true }); - * const untar = new Untar(reader); + * for await ( + * const entry of new UnTar((await Deno.open('./out.tar.gz')).readable) + * ) { + * console.log(entry.pathname); + * entry.readable.pipeTo((await Deno.create(file.pathname)).writable); + * } + * ``` * - * for await (const entry of untar) { - * console.log(entry); // metadata + * ### Decompression + * UnTar does not handle decompression itself. One must first run it through the required + * decompression stream before passing the ReadableStream to UnTar. * - * if (entry.type === "directory") { - * await ensureDir(entry.fileName); - * continue; - * } + * @example + * ```ts + * import { UnTar } from '@std/archive' * - * await ensureFile(entry.fileName); - * using file = await Deno.open(entry.fileName, { write: true }); - * // is a reader. - * await copy(entry, file); + * for await ( + * const entry of new UnTar( + * (await Deno.open('./out.tar.gz')) + * .readable + * .pipeThrough(new DecompressionStream('gzip')) + * ) + * ) { + * console.log(entry.pathname); + * entry.readable.pipeTo((await Deno.create(file.pathname)).writable); * } * ``` */ -export class Untar { - /** Internal reader. */ - reader: Reader; - /** Internal block. */ - block: Uint8Array; - #entry: TarEntry | undefined; - - /** Constructs a new instance. */ - constructor(reader: Reader) { - this.reader = reader; - this.block = new Uint8Array(HEADER_LENGTH); - } - - #checksum(header: Uint8Array): number { - let sum = initialChecksum; - for (let i = 0; i < HEADER_LENGTH; i++) { - if (i >= 148 && i < 156) { - // Ignore checksum header - continue; - } - sum += header[i]!; - } - return sum; - } - - async #getAndValidateHeader(): Promise { - await readBlock(this.reader, this.block); - const header = parseHeader(this.block); - - // calculate the checksum - const decoder = new TextDecoder(); - const checksum = this.#checksum(this.block); - - if (parseInt(decoder.decode(header.checksum), 8) !== checksum) { - if (checksum === initialChecksum) { - // EOF - return null; - } - throw new Error("checksum error"); - } - - const magic = decoder.decode(header.ustar); - - if (magic.indexOf("ustar")) { - throw new Error(`unsupported archive format: ${magic}`); - } - - return header; +export class UnTar extends ReadableStream { + /** + * Constructs a new instance. + */ + constructor(iterable: Iterable | AsyncIterable) { + const reader = new ReadableStream( + { // Converts iterable into ReadableStream. + iter: Symbol.iterator in iterable + ? iterable[Symbol.iterator]() + : iterable[Symbol.asyncIterator](), + async pull(controller) { + const { done, value } = await this.iter.next(); + if (done) { + controller.close(); + } else { + controller.enqueue(value); + } + }, + } as UnderlyingSource & { + iter: Iterator | AsyncIterator; + }, + ) + .pipeThrough( + new TransformStream( + { // Slices ReadableStream's Uint8Array into 512 byte chunks. + push: new Uint8Array(0), + transform(chunk, controller) { + const x = new Uint8Array(this.push.length + chunk.length); + x.set(this.push); + x.set(chunk, this.push.length); + for (let i = 512; i <= x.length; i += 512) { + controller.enqueue(x.slice(i - 512, i)); + } + this.push = x.length % 512 + ? x.slice(-x.length % 512) + : new Uint8Array(0); + }, + flush(controller) { + if (this.push.length) { + controller.error("Tarball has an unexpected number of bytes."); + } + }, + } as Transformer & { push: Uint8Array }, + ), + ) + .pipeThrough( + new TransformStream( + { // Trims the last two Uint8Array chunks off. + array: [], + transform(chunk, controller) { + this.array.push(chunk); + if (this.array.length === 3) { + controller.enqueue(this.array.shift()!); + } + }, + flush(controller) { + if (this.array.length < 2) { + controller.error("Tarball was too small to be valid."); + } else if ( + !this.array.every((array) => array.every((byte) => byte === 0)) + ) { + controller.error("Tarball has invalid ending."); + } + }, + } as Transformer & { array: Uint8Array[] }, + ), + ) + .getReader(); + + let header: TarHeader | undefined; + super( + { + cancelled: false, + async pull(controller) { + while (header != undefined) { + await new Promise((a) => setTimeout(a, 0)); + } + + const { done, value } = await reader.read(); + if (done) { + return controller.close(); + } + + const decoder = new TextDecoder(); + { // Validate checksum + const checksum = value.slice(); + checksum.set(new Uint8Array(new Array(8).fill(32)), 148); + if ( + checksum.reduce((x, y) => x + y) !== + parseInt(decoder.decode(value.slice(148, 156 - 2)), 8) + ) { + return controller.error( + "Invalid Tarball. Header failed to pass checksum.", + ); + } + } + header = { + name: decoder.decode(value.slice(0, 100)).replaceAll("\0", ""), + mode: decoder.decode(value.slice(100, 108 - 2)), + uid: decoder.decode(value.slice(108, 116 - 2)), + gid: decoder.decode(value.slice(116, 124 - 2)), + size: parseInt(decoder.decode(value.slice(124, 136)).trimEnd(), 8), + mtime: parseInt(decoder.decode(value.slice(136, 148 - 1)), 8), + checksum: decoder.decode(value.slice(148, 156 - 2)), + typeflag: decoder.decode(value.slice(156, 157)), + linkname: decoder.decode(value.slice(157, 257)).replaceAll( + "\0", + "", + ), + pad: value.slice(257), + }; + if (header.typeflag === "\0") { + header.typeflag = "0"; + } + // Check if header is POSIX ustar | new TextEncoder().encode('ustar\0' + '00') + if ( + [117, 115, 116, 97, 114, 0, 48, 48].every((byte, i) => + value[i + 257] === byte + ) + ) { + header = { + ...header, + magic: decoder.decode(value.slice(257, 263)), + version: decoder.decode(value.slice(263, 265)), + uname: decoder.decode(value.slice(265, 297)).replaceAll("\0", ""), + gname: decoder.decode(value.slice(297, 329)).replaceAll("\0", ""), + devmajor: decoder.decode(value.slice(329, 337)).replaceAll( + "\0", + "", + ), + devminor: decoder.decode(value.slice(337, 345)).replaceAll( + "\0", + "", + ), + prefix: decoder.decode(value.slice(345, 500)).replaceAll( + "\0", + "", + ), + pad: value.slice(500), + }; + } + + if (header.typeflag === "0") { + const size = header.size; + let i = Math.ceil(size / 512); + const isCancelled = () => this.cancelled; + let lock = false; + controller.enqueue( + { + pathname: ("prefix" in header && header.prefix.length + ? header.prefix + "/" + : "") + header.name, + header, + readable: new ReadableStream({ + async pull(controller) { + if (i > 0) { + lock = true; + const { done, value } = await reader.read(); + if (done) { + header = undefined; + controller.error("Tarball ended unexpectedly"); + } else { + // Pull is unlocked before enqueue is called because if pull is in the middle of processing a chunk when cancel is called, nothing after enqueue will run. + lock = false; + controller.enqueue( + i-- === 1 ? value.slice(0, size % 512) : value, + ); + } + } else { + header = undefined; + if (isCancelled()) { + reader.cancel(); + } + controller.close(); + } + }, + async cancel() { + while (lock) { + await new Promise((a) => + setTimeout(a, 0) + ); + } + try { + while (i-- > 0) { + if ((await reader.read()).done) { + throw new Error("Tarball ended unexpectedly"); + } + } + } catch (error) { + throw error; + } finally { + header = undefined; + } + }, + }), + } satisfies TarItem, + ); + } else { + controller.enqueue( + { + pathname: ("prefix" in header && header.prefix.length + ? header.prefix + "/" + : "") + header.name, + header, + } satisfies TarItem, + ); + header = undefined; + } + }, + cancel() { + this.cancelled = true; + }, + } as UnderlyingSource & { cancelled: boolean }, + ); } +} - #getMetadata(header: TarHeader): TarMetaWithLinkName { - const decoder = new TextDecoder(); - // get meta data - const meta: TarMetaWithLinkName = { - fileName: decoder.decode(trim(header.fileName)), - }; - const fileNamePrefix = trim(header.fileNamePrefix); - if (fileNamePrefix.byteLength > 0) { - meta.fileName = decoder.decode(fileNamePrefix) + "/" + meta.fileName; - } - (["fileMode", "mtime", "uid", "gid"] as const) - .forEach((key) => { - const arr = trim(header[key]); - if (arr.byteLength > 0) { - meta[key] = parseInt(decoder.decode(arr), 8); - } - }); - (["owner", "group", "type"] as const) - .forEach((key) => { - const arr = trim(header[key]); - if (arr.byteLength > 0) { - meta[key] = decoder.decode(arr); - } - }); - - meta.fileSize = parseInt(decoder.decode(header.fileSize), 8); - meta.type = FileTypes[parseInt(meta.type!)] ?? meta.type; - - // Only create the `linkName` property for symbolic links to minimize - // the effect on existing code that only deals with non-links. - if (meta.type === "symlink") { - meta.linkName = decoder.decode(trim(header.linkName)); - } - - return meta; +/** + * Like the UnTar class, taking in a ReadableStream and outputs a ReadableStream + * + * @example + * ```ts + * import { UnTarStream } from '@std/archive' + * + * await Deno.mkdir('out/') + * for await ( + * const entry of (await Deno.open('./out.tar.gz')) + * .readable + * .pipeThrough(new DecompressionStream('gzip')) + * .pipeThrough(new UnTarStream()) + * ) { + * await entry.readable.pipeTo((await Deno.create('out/' + entry.pathname)).writable); + * } + * ``` + */ +export class UnTarStream { + #readable: ReadableStream; + #writable: WritableStream; + /** + * Constructs a new instance. + */ + constructor() { + const { readable, writable } = new TransformStream< + Uint8Array, + Uint8Array + >(); + const unTar = new UnTar(readable); + this.#readable = unTar; + this.#writable = writable; } /** - * Extract the next entry of the tar archive. - * - * @returns A TarEntry with header metadata and a reader to the entry's - * body, or null if there are no more entries to extract. + * Read the contents of the archive via a ReadableStream */ - async extract(): Promise { - if (this.#entry && !this.#entry.consumed) { - // If entry body was not read, discard the body - // so we can read the next entry. - await this.#entry.discard(); - } - - const header = await this.#getAndValidateHeader(); - if (header === null) return null; - - const meta = this.#getMetadata(header); - - this.#entry = new TarEntry(meta, header, this.reader); - - return this.#entry; + get readable(): ReadableStream { + return this.#readable; } /** - * Iterate over all entries of the tar archive. - * - * @yields A TarEntry with tar header metadata and a reader to the entry's body. + * Write the archive via a WritableStream */ - async *[Symbol.asyncIterator](): AsyncIterableIterator { - while (true) { - const entry = await this.extract(); - - if (entry === null) return; - - yield entry; - } + get writable(): WritableStream { + return this.#writable; } }