From cb977bd1693a47ee1a81b5a996664767a1a0bc52 Mon Sep 17 00:00:00 2001 From: BlackAsLight <44320105+BlackAsLight@users.noreply.github.com> Date: Sun, 31 Mar 2024 18:06:48 +1100 Subject: [PATCH 1/6] refactor(archive): A clean implementation of Tar --- archive/tar.ts | 466 +++++++++++++++-------------------------------- archive/untar.ts | 452 ++++++++++++++++----------------------------- 2 files changed, 308 insertions(+), 610 deletions(-) diff --git a/archive/tar.ts b/archive/tar.ts index c1eb188bb4dd..37868d916e27 100644 --- a/archive/tar.ts +++ b/archive/tar.ts @@ -28,148 +28,15 @@ * THE SOFTWARE. */ -import { - FileTypes, - type TarInfo, - type TarMeta, - USTAR_STRUCTURE, -} from "./_common.ts"; -import type { Reader } from "../io/types.ts"; -import { MultiReader } from "../io/multi_reader.ts"; -import { Buffer } from "../io/buffer.ts"; -import { assert } from "../assert/assert.ts"; -import { HEADER_LENGTH } from "./_common.ts"; - -export type { TarInfo, TarMeta }; - -/** Options for {@linkcode Tar.append}. */ -export interface TarOptions extends TarInfo { - /** - * Filepath of the file to append to the archive - */ - filePath?: string; - - /** - * A Reader of any arbitrary content to append to the archive - */ - reader?: Reader; - - /** - * Size of the content to be appended. This is only required - * when passing a reader to the archive. - */ - contentSize?: number; -} - -const USTAR_MAGIC_HEADER = "ustar\u000000" as const; - -/** - * Simple file reader - */ -class FileReader implements Reader { - #file?: Deno.FsFile; - - constructor(private filePath: string) {} - - public async read(p: Uint8Array): Promise { - if (!this.#file) { - this.#file = await Deno.open(this.filePath, { read: true }); - } - const res = await this.#file.read(p); - if (res === null) { - this.#file.close(); - this.#file = undefined; - } - return res; - } -} - /** - * Pads a number with leading zeros to a specified number of bytes. - * - * @param num The number to pad. - * @param bytes The number of bytes to pad the number to. - * @returns The padded number as a string. + * @param pathname is what you want the file to be called inside the archive. + * @param iterable is the source of the file in Uint8Array form. + * @param size is the size of the source in bytes. Providing the wrong size can lead to corrupt data. */ -function pad(num: number, bytes: number): string { - return num.toString(8).padStart(bytes, "0"); -} - -/** - * Formats the header data for a tar file entry. - * - * @param data The data object containing the values for the tar header fields. - * @returns The formatted header data as a Uint8Array. - */ -function formatHeader(data: TarData): Uint8Array { - const encoder = new TextEncoder(); - const buffer = new Uint8Array(HEADER_LENGTH); - let offset = 0; - for (const { field, length } of USTAR_STRUCTURE) { - const entry = encoder.encode(data[field as keyof TarData] || ""); - buffer.set(entry, offset); - offset += length; - } - return buffer; -} - -/** Base interface for {@linkcode TarDataWithSource}. */ -export interface TarData { - /** Name of the file, excluding directory names (if any). */ - fileName?: string; - /** Directory names preceding the file name (if any). */ - fileNamePrefix?: string; - /** - * The underlying raw `st_mode` bits that contain the standard Unix - * permissions for this file/directory. - */ - fileMode?: string; - /** - * Numeric user ID of the file owner. This is ignored if the operating system - * does not support numeric user IDs. - */ - uid?: string; - /** - * Numeric group ID of the file owner. This is ignored if the operating - * system does not support numeric group IDs. - */ - gid?: string; - /** - * The size of the file in bytes; for archive members that are symbolic or - * hard links to another file, this field is specified as zero. - */ - fileSize?: string; - /** - * Data modification time of the file at the time it was archived. It - * represents the integer number of seconds since January 1, 1970, 00:00 UTC. - */ - mtime?: string; - /** The simple sum of all bytes in the header block */ - checksum?: string; - /** - * The type of file archived. - * - * @see {@linkcode FileTypes} - */ - type?: string; - /** Ustar magic header */ - ustar?: string; - /** The name of the file owner. */ - owner?: string; - /** The group that the file owner belongs to. */ - group?: string; -} - -/** Tar data interface for {@linkcode Tar.data}. */ -export interface TarDataWithSource extends TarData { - /** - * Path of the file to read. - */ - filePath?: string; - /** - * Buffer reader. - */ - reader?: Reader; +export type TarFile = { + pathname: string, + iterable: Iterable | AsyncIterable, + size: number } /** @@ -178,214 +45,177 @@ export interface TarDataWithSource extends TarData { * single file (called an archive, or sometimes a tarball). These archives typically * have the '.tar' extension. * - * ### Usage + * # Usage * The workflow is to create a Tar instance, append files to it, and then write the - * tar archive to the filesystem (or other output stream). See the worked example - * below for details. - * - * ### Compression - * Tar archives are not compressed by default. If you want to compress the archive, - * you may compress the tar archive after creation, but this capability is not provided - * here. + * tar archive to the filesystem (or other output stream). See the worked example below for details. * * ### File format and limitations - * - * The ustar file format is used for creating the archive file. + * The ustar file format used for creating the archive file. * While this format is compatible with most tar readers, * the format has several limitations, including: - * * Files must be smaller than 8GiB - * * Filenames (including path) must be shorter than 256 characters - * * Filenames (including path) cannot contain non-ASCII characters - * * Sparse files are not supported + * * File sizes can be at most 8 GiBs. + * * Filenames (including path) must be shorter than 256 characters. + * * Sparse files are not supported. + * This implementation does support decoding tarballs with files up to 64 GiBs, and can create them + * via setting `sizeExtension` to true in the `append` method, but doing so may limit its compatibility + * with older tar implementations. * * @example * ```ts - * import { Tar } from "https://deno.land/std@$STD_VERSION/archive/tar.ts"; - * import { Buffer } from "https://deno.land/std@$STD_VERSION/io/buffer.ts"; - * import { copy } from "https://deno.land/std@$STD_VERSION/io/copy.ts"; + * import { Tar } from '@std/archive' * * const tar = new Tar(); + * tar.append({ + * pathname: 'deno.txt', + * size: (await Deno.stat('deno.txt')).size, + * iterable: (await Deno.open('deno.txt')).readable + * }); + * tar.append({ + * pathname: 'filename_in_archive.txt', + * size: (await Deno.stat('filename_in_archive.txt')).size, + * iterable: (await Deno.open('filename_in_archive.txt')).readable + * }); + * tar.close(); * - * // Now that we've created our tar, let's add some files to it: + * await tar.pipeTo((await Deno.create('./out.tar')).writable); + * ``` * - * const content = new TextEncoder().encode("Some arbitrary content"); - * await tar.append("deno.txt", { - * reader: new Buffer(content), - * contentSize: content.byteLength, - * }); + * ### Compression + * Tar archives are not compressed by default, but if you want to compress the archive, + * you may pipe the archive through a compression stream like `gzip` before writing it to disk. * - * // This file is sourced from the filesystem (and renamed in the archive) - * await tar.append("filename_in_archive.txt", { - * filePath: "./filename_on_filesystem.txt", - * }); + * @example + * ```ts + * import { Tar } from '@std/archive' * - * // Now let's write the tar (with it's two files) to the filesystem - * // use tar.getReader() to read the contents. + * const tar = new Tar(); + * tar.append({ + * pathname: 'deno.txt', + * size: (await Deno.stat('deno.txt')).size, + * iterable: (await Deno.open('deno.txt')).readable + * }); + * tar.append({ + * pathname: 'filename_in_archive.txt', + * size: (await Deno.stat('filename_in_archive.txt')).size, + * iterable: (await Deno.open('filename_in_archive.txt')).readable + * }); + * tar.close(); * - * const writer = await Deno.open("./out.tar", { write: true, create: true }); - * await copy(tar.getReader(), writer); - * writer.close(); + * await tar + * .pipeThrough(new CompressionStream('gzip')) + * .pipeTo((await Deno.create('./out.tar.gz')).writable); * ``` */ export class Tar { - /** Tar data. */ - data: TarDataWithSource[]; - - /** Constructs a new instance. */ - constructor() { - this.data = []; - } - + #files: { prefix: Uint8Array, name: Uint8Array, iterable: Iterable | AsyncIterable, size: number, sizeExtension: boolean }[] = [] + #readable: ReadableStream + #finishedAppending: boolean = false /** - * Append a file or reader of arbitrary content to this tar archive. Directories - * appended to the archive append only the directory itself to the archive, not - * its contents. To add a directory and its contents, recursively append the - * directory's contents. Directories and subdirectories will be created automatically - * in the archive as required. - * - * @param filenameInArchive File name of the content in the archive. E.g. - * `test.txt`. Use slash for directory separators. - * @param source Details of the source of the content including the - * reference to the content itself and potentially any related metadata. + * Constructs a new instance. */ - async append(filenameInArchive: string, source: TarOptions) { - if (typeof filenameInArchive !== "string") { - throw new Error("file name not specified"); - } - let fileName = filenameInArchive; - - /** - * Ustar format has a limitation of file name length. Specifically: - * 1. File names can contain at most 255 bytes. - * 2. File names longer than 100 bytes must be split at a directory separator in two parts, - * the first being at most 155 bytes long. So, in most cases file names must be a bit shorter - * than 255 bytes. - */ - // separate file name into two parts if needed - let fileNamePrefix: string | undefined; - if (fileName.length > 100) { - let i = fileName.length; - while (i >= 0) { - i = fileName.lastIndexOf("/", i); - if (i <= 155) { - fileNamePrefix = fileName.slice(0, i); - fileName = fileName.slice(i + 1); - break; + constructor() { + const gen = (async function* (tar) { + while ( + (!tar.#finishedAppending || tar.#files.length) + && await new Promise(a => setTimeout(() => a(true), 0)) + ) { + if (tar.#files.length) { + const file = tar.#files.shift()! + const encoder = new TextEncoder() + const header = new Uint8Array(512) + + header.set(file.name) // name + header.set(encoder.encode( + '000644 \0' // mode + + '000000 \0' // uid + + '000000 \0' // gid + + file.size.toString(8).padStart(file.sizeExtension ? 12 : 11) + (file.sizeExtension ? '' : ' ') // size + + '00000000000 ' // mtime + + ' ' // checksum | Needs to be updated + + '0' // typeflag + + '\0'.repeat(100) // linkname + + 'ustar\0' // magic + + '00' // version + + '\0'.repeat(32 + 32 + 8 + 8) // uname, gname, devmajor, devminor + ), 100) + header.set(file.prefix, 345) // prefix + + header.set(encoder.encode(header.reduce((x, y) => x + y).toString(8).padStart(6, '0') + '\0'), 148) + yield header + + for await (const x of file.iterable) + yield x + yield encoder.encode('\0'.repeat(512 - file.size % 512)) } - i--; } - const errMsg = - "ustar format does not allow a long file name (length of [file name" + - "prefix] + / + [file name] must be shorter than 256 bytes)"; - if (i < 0 || fileName.length > 100) { - throw new Error(errMsg); - } else { - assert(fileNamePrefix !== undefined); - if (fileNamePrefix.length > 155) { - throw new Error(errMsg); - } + yield new TextEncoder().encode('\0'.repeat(1024)) + })(this) + this.#readable = new ReadableStream({ + async pull(controller) { + const { done, value } = await gen.next() + if (done) + controller.close() + else + controller.enqueue(value) } - } - - source = source || {}; + }) + } - // set meta data - let info: Deno.FileInfo | undefined; - if (source.filePath) { - info = await Deno.stat(source.filePath); - if (info.isDirectory) { - info.size = 0; - source.reader = new Buffer(); + /** + * Append a file to the archive. This method will throw if you provide an incompatible + * size or pathname, or have already called the `close` method. + * @param file Details of the TarFile being appended to the archive. + * @param [sizeExtension=false] Enable up to 64 GiB files in the archive instead of 8 GiBs. + */ + append(file: TarFile, sizeExtension = false): void { + if (this.#finishedAppending) + throw new Error('This Tar instance has already be closed.') + + // Validate size provided. + if (file.size < 0 || Math.pow(8, sizeExtension ? 12 : 11) < file.size) + throw new Error('Invalid File Size: Up to 8 GiBs allowed or 64 GiBs if `sizeExtension` is enabled.') + + file.pathname = file.pathname.split('/').filter(x => x).join('/') + if (file.pathname.startsWith('./')) + file.pathname = file.pathname.slice(2) + + // Validating the path provided. + const pathname = new TextEncoder().encode(file.pathname) + if (pathname.length > 256) + throw new Error('Provided pathname is too long. Max 256 bytes.') + + let i = Math.max(0, pathname.lastIndexOf(47)) + if (pathname.slice(i).length > 100) + throw new Error('Filename in pathname is too long. Filename can be at most 100 bytes.') + + if (pathname.length <= 100) + i = 0 + else + for (; i > 0; --i) { + i = pathname.lastIndexOf(47, i) + if (pathname.slice(i).length > 100) { + i = Math.max(0, pathname.indexOf(47, ++i)) + break + } } - } - const mode = source.fileMode || (info && info.mode) || - parseInt("777", 8) & 0xfff /* 511 */; - const mtime = Math.floor( - source.mtime ?? (info?.mtime ?? new Date()).valueOf() / 1000, - ); - const uid = source.uid || 0; - const gid = source.gid || 0; - - if (typeof source.owner === "string" && source.owner.length >= 32) { - throw new Error( - "ustar format does not allow owner name length >= 32 bytes", - ); - } - if (typeof source.group === "string" && source.group.length >= 32) { - throw new Error( - "ustar format does not allow group name length >= 32 bytes", - ); - } - - const fileSize = info?.size ?? source.contentSize; - assert(fileSize !== undefined, "fileSize must be set"); - - const type = source.type - ? FileTypes[source.type as keyof typeof FileTypes] - : (info?.isDirectory ? FileTypes.directory : FileTypes.file); - const tarData: TarDataWithSource = { - fileName, - fileNamePrefix, - fileMode: pad(mode, 7), - uid: pad(uid, 7), - gid: pad(gid, 7), - fileSize: pad(fileSize, 11), - mtime: pad(mtime, 11), - checksum: " ", - type: type.toString(), - ustar: USTAR_MAGIC_HEADER, - owner: source.owner || "", - group: source.group || "", - filePath: source.filePath, - reader: source.reader, - }; - - // calculate the checksum - let checksum = 0; - const encoder = new TextEncoder(); - Object.keys(tarData) - .filter((key): boolean => ["filePath", "reader"].indexOf(key) < 0) - .forEach(function (key) { - checksum += encoder - .encode(tarData[key as keyof TarData]) - .reduce((p, c): number => p + c, 0); - }); - - tarData.checksum = pad(checksum, 6) + "\u0000 "; - this.data.push(tarData); + const prefix = pathname.slice(0, i++) + if (prefix.length > 155) + throw new Error('Provided pathname cannot be split into [155, 100] segments along a forward slash separator.') + this.#files.push({ name: prefix.length ? pathname.slice(i) : pathname, prefix, iterable: file.iterable, size: file.size, sizeExtension }) } /** - * Get a Reader instance for this tar archive. + * Closes the tar archive from accepting more files. Must be called for tar archive to be properly created. */ - getReader(): Reader { - const readers: Reader[] = []; - this.data.forEach((tarData) => { - let { reader } = tarData; - const { filePath } = tarData; - const headerArr = formatHeader(tarData); - readers.push(new Buffer(headerArr)); - if (!reader) { - assert(filePath !== undefined); - reader = new FileReader(filePath); - } - readers.push(reader); - - // to the nearest multiple of recordSize - assert(tarData.fileSize !== undefined, "fileSize must be set"); - readers.push( - new Buffer( - new Uint8Array( - HEADER_LENGTH - - (parseInt(tarData.fileSize, 8) % HEADER_LENGTH || HEADER_LENGTH), - ), - ), - ); - }); + close(): void { + this.#finishedAppending = true + } - // append 2 empty records - readers.push(new Buffer(new Uint8Array(HEADER_LENGTH * 2))); - return new MultiReader(readers); + /** + * A Readable Stream of the archive. + */ + get readable(): ReadableStream { + return this.#readable } } diff --git a/archive/untar.ts b/archive/untar.ts index d90c27b654bd..7c7d78c4e0c9 100644 --- a/archive/untar.ts +++ b/archive/untar.ts @@ -29,151 +29,39 @@ * THE SOFTWARE. */ -import { - FileTypes, - HEADER_LENGTH, - readBlock, - type TarMeta, - USTAR_STRUCTURE, - type UstarFields, -} from "./_common.ts"; -import { readAll } from "../io/read_all.ts"; -import type { Reader } from "../io/types.ts"; - /** - * Extend TarMeta with the `linkName` property so that readers can access - * symbolic link values without polluting the world of archive writers. + * @param pathname is what the file is called. + * @param header is the header of the file. + * @param readable is the contents of the file. */ -export interface TarMetaWithLinkName extends TarMeta { - /** File name of the symbolic link. */ - linkName?: string; +export type TarEntry = { + pathname: string, + header: TarHeader, + readable: ReadableStream } -/** Tar header with raw, unprocessed bytes as values. */ -export type TarHeader = { - [key in UstarFields]: Uint8Array; -}; - -// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06 -// eight checksum bytes taken to be ascii spaces (decimal value 32) -const initialChecksum = 8 * 32; - /** - * Trims a Uint8Array by removing any trailing zero bytes. - * - * @param buffer The Uint8Array to trim. - * @returns A new Uint8Array with trailing zero bytes removed, or the original - * buffer if no trailing zero bytes are found. + * The header of a file decoded into an object, where `pad` is the remaining bytes of the header. + * The `pad` will be larger if the optional properties are missing. */ -function trim(buffer: Uint8Array): Uint8Array { - const index = buffer.indexOf(0); - return index === -1 ? buffer : buffer.subarray(0, index); -} - -/** - * Parse file header in a tar archive - * @param length - */ -function parseHeader(buffer: Uint8Array): TarHeader { - const data = {} as TarHeader; - let offset = 0; - USTAR_STRUCTURE.forEach(function (value) { - const arr = buffer.subarray(offset, offset + value.length); - data[value.field] = arr; - offset += value.length; - }); - return data; -} - -/** Tar entry */ -export interface TarEntry extends TarMetaWithLinkName {} - -/** Contains tar header metadata and a reader to the entry's body. */ -export class TarEntry implements Reader { - #header: TarHeader; - #reader: Reader | (Reader & Deno.Seeker); - #size: number; - #read = 0; - #consumed = false; - #entrySize: number; - - /** Constructs a new instance. */ - constructor( - meta: TarMetaWithLinkName, - header: TarHeader, - reader: Reader | (Reader & Deno.Seeker), - ) { - Object.assign(this, meta); - this.#header = header; - this.#reader = reader; - - // File Size - this.#size = this.fileSize || 0; - // Entry Size - const blocks = Math.ceil(this.#size / HEADER_LENGTH); - this.#entrySize = blocks * HEADER_LENGTH; - } - - /** Returns whether the entry has already been consumed. */ - get consumed(): boolean { - return this.#consumed; - } - - /** - * Reads up to `p.byteLength` bytes of the tar entry into `p`. It resolves to - * the number of bytes read (`0 < n <= p.byteLength`) and rejects if any - * error encountered. Even if read() resolves to n < p.byteLength, it may use - * all of `p` as scratch space during the call. If some data is available but - * not `p.byteLength bytes`, read() conventionally resolves to what is available - * instead of waiting for more. - */ - async read(p: Uint8Array): Promise { - // Bytes left for entry - const entryBytesLeft = this.#entrySize - this.#read; - const bufSize = Math.min( - // bufSize can't be greater than p.length nor bytes left in the entry - p.length, - entryBytesLeft, - ); - - if (entryBytesLeft <= 0) { - this.#consumed = true; - return null; - } - - const block = new Uint8Array(bufSize); - const n = await readBlock(this.#reader, block); - const bytesLeft = this.#size - this.#read; - - this.#read += n || 0; - if (n === null || bytesLeft <= 0) { - if (n === null) this.#consumed = true; - return null; - } - - // Remove zero filled - const offset = bytesLeft < n ? bytesLeft : n; - p.set(block.subarray(0, offset), 0); - - return offset < 0 ? n - Math.abs(offset) : offset; - } - - /** Discords the current entry. */ - async discard() { - // Discard current entry - if (this.#consumed) return; - this.#consumed = true; - - if (typeof (this.#reader as Deno.Seeker).seek === "function") { - await (this.#reader as Deno.Seeker).seek( - this.#entrySize - this.#read, - Deno.SeekMode.Current, - ); - this.#read = this.#entrySize; - } else { - await readAll(this); - } - } +export type TarHeader = { + name: string + mode: string + uid: string + gid: string + size: number + mtime: string + checksum: string + typeflag: string + linkname: string + magic?: string + version?: string + uname?: string + gname?: string + devmajor?: number + devminor?: number + prefix?: string + pad: Uint8Array } /** @@ -183,167 +71,147 @@ export class TarEntry implements Reader { * archives typically have the '.tar' extension. * * ### Supported file formats - * Only the ustar file format is supported. This is the most common format. The - * pax file format may also be read, but additional features, such as longer - * filenames may be ignored. + * Only the ustar file format is supported. This is the most common format. + * The numeric extension feature of the size to allow up to 64 GiBs is also supported. * * ### Usage - * The workflow is to create a Untar instance referencing the source of the tar file. - * You can then use the untar reference to extract files one at a time. See the worked - * example below for details. - * - * ### Understanding compression - * A tar archive may be compressed, often identified by the `.tar.gz` extension. - * This utility does not support decompression which must be done before extracting - * the files. + * The workflow is to create a UnTar instance passing in a ReadableStream of the archive. + * You can then iterate over the instance to pull out the entries one by one and decide + * if you want to read it or skip over it. Each entry's readable stream must either be + * consumed or the `cancel` method must be called on it. The next entry won't resolve until + * either action is done on the ReadableStream. * * @example * ```ts - * import { Untar } from "https://deno.land/std@$STD_VERSION/archive/untar.ts"; - * import { ensureFile } from "https://deno.land/std@$STD_VERSION/fs/ensure_file.ts"; - * import { ensureDir } from "https://deno.land/std@$STD_VERSION/fs/ensure_dir.ts"; - * import { copy } from "https://deno.land/std@$STD_VERSION/io/copy.ts"; - * - * using reader = await Deno.open("./out.tar", { read: true }); - * const untar = new Untar(reader); - * - * for await (const entry of untar) { - * console.log(entry); // metadata + * for await ( + * const entry of new UnTar((await Deno.open('./out.tar.gz')).readable) + * ) { + * console.log(entry.pathname); + * entry.readable.pipeTo((await Deno.create(file.pathname)).writable); + * } + * ``` * - * if (entry.type === "directory") { - * await ensureDir(entry.fileName); - * continue; - * } + * ### Decompression + * UnTar does not handle decompression itself. One must first run it through the required + * decompression stream before passing the ReadableStream to UnTar. * - * await ensureFile(entry.fileName); - * using file = await Deno.open(entry.fileName, { write: true }); - * // is a reader. - * await copy(entry, file); + * @example + * ```ts + * for await ( + * const entry of new UnTar( + * (await Deno.open('./out.tar.gz')) + * .readable + * .pipeThrough(new DecompressionStream('gzip')) + * ) + * ) { + * console.log(entry.pathname); + * entry.readable.pipeTo((await Deno.create(file.pathname)).writable); * } * ``` */ -export class Untar { - /** Internal reader. */ - reader: Reader; - /** Internal block. */ - block: Uint8Array; - #entry: TarEntry | undefined; - - /** Constructs a new instance. */ - constructor(reader: Reader) { - this.reader = reader; - this.block = new Uint8Array(HEADER_LENGTH); - } - - #checksum(header: Uint8Array): number { - let sum = initialChecksum; - for (let i = 0; i < HEADER_LENGTH; i++) { - if (i >= 148 && i < 156) { - // Ignore checksum header - continue; - } - sum += header[i]!; - } - return sum; - } - - async #getAndValidateHeader(): Promise { - await readBlock(this.reader, this.block); - const header = parseHeader(this.block); - - // calculate the checksum - const decoder = new TextDecoder(); - const checksum = this.#checksum(this.block); - - if (parseInt(decoder.decode(header.checksum), 8) !== checksum) { - if (checksum === initialChecksum) { - // EOF - return null; - } - throw new Error("checksum error"); - } - - const magic = decoder.decode(header.ustar); - - if (magic.indexOf("ustar")) { - throw new Error(`unsupported archive format: ${magic}`); - } - - return header; - } - - #getMetadata(header: TarHeader): TarMetaWithLinkName { - const decoder = new TextDecoder(); - // get meta data - const meta: TarMetaWithLinkName = { - fileName: decoder.decode(trim(header.fileName)), - }; - const fileNamePrefix = trim(header.fileNamePrefix); - if (fileNamePrefix.byteLength > 0) { - meta.fileName = decoder.decode(fileNamePrefix) + "/" + meta.fileName; - } - (["fileMode", "mtime", "uid", "gid"] as const) - .forEach((key) => { - const arr = trim(header[key]); - if (arr.byteLength > 0) { - meta[key] = parseInt(decoder.decode(arr), 8); - } - }); - (["owner", "group", "type"] as const) - .forEach((key) => { - const arr = trim(header[key]); - if (arr.byteLength > 0) { - meta[key] = decoder.decode(arr); - } - }); - - meta.fileSize = parseInt(decoder.decode(header.fileSize), 8); - meta.type = FileTypes[parseInt(meta.type!)] ?? meta.type; - - // Only create the `linkName` property for symbolic links to minimize - // the effect on existing code that only deals with non-links. - if (meta.type === "symlink") { - meta.linkName = decoder.decode(trim(header.linkName)); - } - - return meta; - } - - /** - * Extract the next entry of the tar archive. - * - * @returns A TarEntry with header metadata and a reader to the entry's - * body, or null if there are no more entries to extract. - */ - async extract(): Promise { - if (this.#entry && !this.#entry.consumed) { - // If entry body was not read, discard the body - // so we can read the next entry. - await this.#entry.discard(); - } - - const header = await this.#getAndValidateHeader(); - if (header === null) return null; - - const meta = this.#getMetadata(header); - - this.#entry = new TarEntry(meta, header, this.reader); - - return this.#entry; - } - - /** - * Iterate over all entries of the tar archive. - * - * @yields A TarEntry with tar header metadata and a reader to the entry's body. - */ - async *[Symbol.asyncIterator](): AsyncIterableIterator { - while (true) { - const entry = await this.extract(); - - if (entry === null) return; - - yield entry; - } - } +export class UnTar extends ReadableStream { + /** + * Constructs a new instance. + */ + constructor(readable: ReadableStream) { + const reader = readable.pipeThrough(new TransformStream({ + push: new Uint8Array(0), + transform(chunk, controller) { + const x = new Uint8Array(this.push.length + chunk.length) + x.set(this.push) + x.set(chunk, this.push.length) + for (let i = 0; i < x.length; i += 512) + controller.enqueue(x.slice(i, i + 512)) + this.push = x.slice(x.length % 512) + }, + flush(controller) { + if (this.push.length) // This should always be zero! + controller.enqueue(this.push) + } + } as Transformer & { push: Uint8Array })).getReader() + + let header: TarHeader | undefined + super({ + cancelled: false, + async pull(controller) { + while (header !== undefined) + await new Promise(a => setTimeout(a, 0)) + + while (true) { + const { done, value } = await reader.read() + if (done || value.reduce((x, y) => x + y) === 0) + return controller.close() + + const decoder = new TextDecoder() + header = { + name: decoder.decode(value.slice(0, 100)).replaceAll('\0', ''), + mode: decoder.decode(value.slice(100, 108 - 2)), + uid: decoder.decode(value.slice(108, 116 - 2)), + gid: decoder.decode(value.slice(116, 124 - 2)), + size: parseInt(decoder.decode(value.slice(124, 136)).trimEnd(), 8), // Support tarballs with files up to 64 GiBs. + mtime: decoder.decode(value.slice(136, 148 - 1)), + checksum: decoder.decode(value.slice(148, 156 - 2)), + typeflag: decoder.decode(value.slice(156, 157)), + linkname: decoder.decode(value.slice(157, 257)).replaceAll('\0', ''), + pad: value.slice(257) + } + if ([117, 115, 116, 97, 114, 0, 48, 48].every((byte, i) => value[i + 257] === byte)) + header = { + ...header, + magic: decoder.decode(value.slice(257, 263)), + version: decoder.decode(value.slice(263, 265)), + uname: decoder.decode(value.slice(265, 297)).replaceAll('\0', ''), + gname: decoder.decode(value.slice(297, 329)).replaceAll('\0', ''), + devmajor: value.slice(329, 337).reduce((x, y) => x + y), + devminor: value.slice(337, 345).reduce((x, y) => x + y), + prefix: decoder.decode(value.slice(345, 500)).replaceAll('\0', ''), + pad: value.slice(500) + } + if (header.typeflag !== '0' && header.typeflag !== '\0') + continue + + const size = header.size + let i = Math.ceil(size / 512) + const isCancelled = () => this.cancelled + + controller.enqueue({ + pathname: (header.prefix ? header.prefix + '/' : '') + header.name, + header, + readable: new ReadableStream({ + async pull(controller) { + if (i > 0) { + const { done, value } = await reader.read() + if (done) { + header = undefined + return controller.close() + } + controller.enqueue(i === 1 ? value.slice(0, size % 512) : value) + --i + } + else { + header = undefined + if (isCancelled()) + reader.cancel() + controller.close() + } + }, + async cancel() { + while (i-- > 0) { + const { done } = await reader.read() + if (done) + break + } + header = undefined + } + }) + }) + break + } + }, + cancel() { + this.cancelled = true + } + } as UnderlyingSource & { cancelled: boolean } + ) + } } From 16dc111b5537522c5588c4beb4ea315075072545 Mon Sep 17 00:00:00 2001 From: BlackAsLight <44320105+BlackAsLight@users.noreply.github.com> Date: Mon, 1 Apr 2024 08:03:58 +1100 Subject: [PATCH 2/6] fmt(archive): Update indentation to match --- archive/untar.ts | 238 +++++++++++++++++++++++------------------------ 1 file changed, 119 insertions(+), 119 deletions(-) diff --git a/archive/untar.ts b/archive/untar.ts index 7c7d78c4e0c9..a6e4291ae9dc 100644 --- a/archive/untar.ts +++ b/archive/untar.ts @@ -35,9 +35,9 @@ * @param readable is the contents of the file. */ export type TarEntry = { - pathname: string, - header: TarHeader, - readable: ReadableStream + pathname: string, + header: TarHeader, + readable: ReadableStream } /** @@ -45,23 +45,23 @@ export type TarEntry = { * The `pad` will be larger if the optional properties are missing. */ export type TarHeader = { - name: string - mode: string - uid: string - gid: string - size: number - mtime: string - checksum: string - typeflag: string - linkname: string - magic?: string - version?: string - uname?: string - gname?: string - devmajor?: number - devminor?: number - prefix?: string - pad: Uint8Array + name: string + mode: string + uid: string + gid: string + size: number + mtime: string + checksum: string + typeflag: string + linkname: string + magic?: string + version?: string + uname?: string + gname?: string + devmajor?: number + devminor?: number + prefix?: string + pad: Uint8Array } /** @@ -110,108 +110,108 @@ export type TarHeader = { * ``` */ export class UnTar extends ReadableStream { - /** - * Constructs a new instance. - */ - constructor(readable: ReadableStream) { - const reader = readable.pipeThrough(new TransformStream({ - push: new Uint8Array(0), - transform(chunk, controller) { - const x = new Uint8Array(this.push.length + chunk.length) - x.set(this.push) - x.set(chunk, this.push.length) - for (let i = 0; i < x.length; i += 512) - controller.enqueue(x.slice(i, i + 512)) - this.push = x.slice(x.length % 512) - }, - flush(controller) { - if (this.push.length) // This should always be zero! - controller.enqueue(this.push) - } - } as Transformer & { push: Uint8Array })).getReader() + /** + * Constructs a new instance. + */ + constructor(readable: ReadableStream) { + const reader = readable.pipeThrough(new TransformStream({ + push: new Uint8Array(0), + transform(chunk, controller) { + const x = new Uint8Array(this.push.length + chunk.length) + x.set(this.push) + x.set(chunk, this.push.length) + for (let i = 0; i < x.length; i += 512) + controller.enqueue(x.slice(i, i + 512)) + this.push = x.slice(x.length % 512) + }, + flush(controller) { + if (this.push.length) // This should always be zero! + controller.enqueue(this.push) + } + } as Transformer & { push: Uint8Array })).getReader() - let header: TarHeader | undefined - super({ - cancelled: false, - async pull(controller) { - while (header !== undefined) - await new Promise(a => setTimeout(a, 0)) + let header: TarHeader | undefined + super({ + cancelled: false, + async pull(controller) { + while (header !== undefined) + await new Promise(a => setTimeout(a, 0)) - while (true) { - const { done, value } = await reader.read() - if (done || value.reduce((x, y) => x + y) === 0) - return controller.close() + while (true) { + const { done, value } = await reader.read() + if (done || value.reduce((x, y) => x + y) === 0) + return controller.close() - const decoder = new TextDecoder() - header = { - name: decoder.decode(value.slice(0, 100)).replaceAll('\0', ''), - mode: decoder.decode(value.slice(100, 108 - 2)), - uid: decoder.decode(value.slice(108, 116 - 2)), - gid: decoder.decode(value.slice(116, 124 - 2)), - size: parseInt(decoder.decode(value.slice(124, 136)).trimEnd(), 8), // Support tarballs with files up to 64 GiBs. - mtime: decoder.decode(value.slice(136, 148 - 1)), - checksum: decoder.decode(value.slice(148, 156 - 2)), - typeflag: decoder.decode(value.slice(156, 157)), - linkname: decoder.decode(value.slice(157, 257)).replaceAll('\0', ''), - pad: value.slice(257) - } - if ([117, 115, 116, 97, 114, 0, 48, 48].every((byte, i) => value[i + 257] === byte)) - header = { - ...header, - magic: decoder.decode(value.slice(257, 263)), - version: decoder.decode(value.slice(263, 265)), - uname: decoder.decode(value.slice(265, 297)).replaceAll('\0', ''), - gname: decoder.decode(value.slice(297, 329)).replaceAll('\0', ''), - devmajor: value.slice(329, 337).reduce((x, y) => x + y), - devminor: value.slice(337, 345).reduce((x, y) => x + y), - prefix: decoder.decode(value.slice(345, 500)).replaceAll('\0', ''), - pad: value.slice(500) - } - if (header.typeflag !== '0' && header.typeflag !== '\0') - continue + const decoder = new TextDecoder() + header = { + name: decoder.decode(value.slice(0, 100)).replaceAll('\0', ''), + mode: decoder.decode(value.slice(100, 108 - 2)), + uid: decoder.decode(value.slice(108, 116 - 2)), + gid: decoder.decode(value.slice(116, 124 - 2)), + size: parseInt(decoder.decode(value.slice(124, 136)).trimEnd(), 8), // Support tarballs with files up to 64 GiBs. + mtime: decoder.decode(value.slice(136, 148 - 1)), + checksum: decoder.decode(value.slice(148, 156 - 2)), + typeflag: decoder.decode(value.slice(156, 157)), + linkname: decoder.decode(value.slice(157, 257)).replaceAll('\0', ''), + pad: value.slice(257) + } + if ([117, 115, 116, 97, 114, 0, 48, 48].every((byte, i) => value[i + 257] === byte)) + header = { + ...header, + magic: decoder.decode(value.slice(257, 263)), + version: decoder.decode(value.slice(263, 265)), + uname: decoder.decode(value.slice(265, 297)).replaceAll('\0', ''), + gname: decoder.decode(value.slice(297, 329)).replaceAll('\0', ''), + devmajor: value.slice(329, 337).reduce((x, y) => x + y), + devminor: value.slice(337, 345).reduce((x, y) => x + y), + prefix: decoder.decode(value.slice(345, 500)).replaceAll('\0', ''), + pad: value.slice(500) + } + if (header.typeflag !== '0' && header.typeflag !== '\0') + continue - const size = header.size - let i = Math.ceil(size / 512) - const isCancelled = () => this.cancelled + const size = header.size + let i = Math.ceil(size / 512) + const isCancelled = () => this.cancelled - controller.enqueue({ - pathname: (header.prefix ? header.prefix + '/' : '') + header.name, - header, - readable: new ReadableStream({ - async pull(controller) { - if (i > 0) { - const { done, value } = await reader.read() - if (done) { - header = undefined - return controller.close() - } - controller.enqueue(i === 1 ? value.slice(0, size % 512) : value) - --i - } - else { - header = undefined - if (isCancelled()) - reader.cancel() - controller.close() - } - }, - async cancel() { - while (i-- > 0) { - const { done } = await reader.read() - if (done) - break - } - header = undefined - } - }) - }) - break - } - }, - cancel() { - this.cancelled = true - } - } as UnderlyingSource & { cancelled: boolean } - ) - } + controller.enqueue({ + pathname: (header.prefix ? header.prefix + '/' : '') + header.name, + header, + readable: new ReadableStream({ + async pull(controller) { + if (i > 0) { + const { done, value } = await reader.read() + if (done) { + header = undefined + return controller.close() + } + controller.enqueue(i === 1 ? value.slice(0, size % 512) : value) + --i + } + else { + header = undefined + if (isCancelled()) + reader.cancel() + controller.close() + } + }, + async cancel() { + while (i-- > 0) { + const { done } = await reader.read() + if (done) + break + } + header = undefined + } + }) + }) + break + } + }, + cancel() { + this.cancelled = true + } + } as UnderlyingSource & { cancelled: boolean } + ) + } } From 4aa85a6b28b380227d2f9c32e17df1ada068311f Mon Sep 17 00:00:00 2001 From: BlackAsLight <44320105+BlackAsLight@users.noreply.github.com> Date: Mon, 1 Apr 2024 08:14:02 +1100 Subject: [PATCH 3/6] feat(archive): add TarStream & UnTarStream --- archive/tar.ts | 55 ++++++++++++++++++++++++++++++++++++++++++++++++ archive/untar.ts | 44 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+) diff --git a/archive/tar.ts b/archive/tar.ts index 37868d916e27..475c4e54a277 100644 --- a/archive/tar.ts +++ b/archive/tar.ts @@ -219,3 +219,58 @@ export class Tar { return this.#readable } } + +/** + * Like the Tar class, but takes in a ReadableStream and outputs a ReadableStream + * + * @example + * ```ts + * ReadableStream.from([ + * { + * pathname: 'deno.txt', + * size: (await Deno.stat('deno.txt')).size, + * iterable: (await Deno.open('deno.txt')).readable + * }, + * { + * pathname: 'filename_in_archive.txt', + * size: (await Deno.stat('filename_in_archive.txt')).size, + * iterable: (await Deno.open('filename_in_archive.txt')).readable + * } + * ]) + * .pipeThrough(new TarStream()) + * .pipeThrough(new CompressionStream('gzip')) + * .pipeTo((await Deno.create('./out.tar.gz'))) + * ``` + */ +export class TarStream { + #readable: ReadableStream + #writable: WritableStream + /** + * Creates an instance. + */ + constructor() { + const { readable, writable } = new TransformStream() + const tar = new Tar() + this.#readable = tar.readable + this.#writable = writable; + (async () => { + for await (const tarFile of readable) + tar.append(tarFile) + tar.close() + })() + } + + /** + * Returns a ReadableStream of the archive. + */ + get readable(): ReadableStream { + return this.#readable + } + + /** + * Returns a WritableStream for the files to be archived. + */ + get writable(): WritableStream { + return this.#writable + } +} diff --git a/archive/untar.ts b/archive/untar.ts index a6e4291ae9dc..c1c7676a0d99 100644 --- a/archive/untar.ts +++ b/archive/untar.ts @@ -215,3 +215,47 @@ export class UnTar extends ReadableStream { ) } } + +/** + * Like the UnTar class, taking in a ReadableStream and outputs a ReadableStream + * + * @example + * ```ts + * await Deno.mkdir('out/') + * for await ( + * const entry of (await Deno.open('./out.tar.gz')) + * .readable + * .pipeThrough(new DecompressionStream('gzip')) + * .pipeThrough(new UnTarStream()) + * ) { + * await entry.readable.pipeTo((await Deno.create('out/' + entry.pathname)).writable); + * } + * ``` + */ +export class UnTarStream { + #readable: ReadableStream + #writable: WritableStream + /** + * Creates an instance. + */ + constructor() { + const { readable, writable } = new TransformStream() + const unTar = new UnTar(readable) + this.#readable = unTar + this.#writable = writable + } + + /** + * Returns a ReadableStream of the files in the archive. + */ + get readable(): ReadableStream { + return this.#readable + } + + /** + * Returns a WritableStream for the archive to be expanded. + */ + get writable(): WritableStream { + return this.#writable + } +} From 021e7ae351dae55318629e258cf64c969b817714 Mon Sep 17 00:00:00 2001 From: BlackAsLight <44320105+BlackAsLight@users.noreply.github.com> Date: Mon, 1 Apr 2024 09:36:00 +1100 Subject: [PATCH 4/6] fix(archive): UnTar failed to slice correctly - Incoming chunks of size less than 512 were causing the header to not be aligned properly. - ReadableStreams seem to call the `pull` upon initiation so if the cancel method is called, an extra record was being discarded. - I honestly don't understand how that push property wasn't causing issues before. I guess the chunks I got from Deno.open was always divisible by 512. --- archive/untar.ts | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/archive/untar.ts b/archive/untar.ts index c1c7676a0d99..aa425d0bb48f 100644 --- a/archive/untar.ts +++ b/archive/untar.ts @@ -120,9 +120,9 @@ export class UnTar extends ReadableStream { const x = new Uint8Array(this.push.length + chunk.length) x.set(this.push) x.set(chunk, this.push.length) - for (let i = 0; i < x.length; i += 512) - controller.enqueue(x.slice(i, i + 512)) - this.push = x.slice(x.length % 512) + for (let i = 512; i <= x.length; i += 512) + controller.enqueue(x.slice(i - 512, i)) + this.push = x.length % 512 ? x.slice(-x.length % 512) : new Uint8Array(0) }, flush(controller) { if (this.push.length) // This should always be zero! @@ -185,8 +185,7 @@ export class UnTar extends ReadableStream { header = undefined return controller.close() } - controller.enqueue(i === 1 ? value.slice(0, size % 512) : value) - --i + controller.enqueue(i-- === 1 ? value.slice(0, size % 512) : value) } else { header = undefined @@ -196,11 +195,12 @@ export class UnTar extends ReadableStream { } }, async cancel() { - while (i-- > 0) { - const { done } = await reader.read() - if (done) - break - } + if (i !== 1) + while (i-- > 0) { + const { done } = await reader.read() + if (done) + break + } header = undefined } }) From cf5292ce3ecc437b14c63cd3ed1986c66e4c54a3 Mon Sep 17 00:00:00 2001 From: BlackAsLight <44320105+BlackAsLight@users.noreply.github.com> Date: Wed, 3 Apr 2024 15:54:38 +1100 Subject: [PATCH 5/6] fmt(archive): Ran `deno fmt` & fixed `@example`s --- archive/tar.ts | 197 +++++++++++++++++++------------- archive/untar.ts | 286 +++++++++++++++++++++++++++-------------------- 2 files changed, 285 insertions(+), 198 deletions(-) diff --git a/archive/tar.ts b/archive/tar.ts index 475c4e54a277..75dc02b6329c 100644 --- a/archive/tar.ts +++ b/archive/tar.ts @@ -34,10 +34,10 @@ * @param size is the size of the source in bytes. Providing the wrong size can lead to corrupt data. */ export type TarFile = { - pathname: string, - iterable: Iterable | AsyncIterable, - size: number -} + pathname: string; + iterable: Iterable | AsyncIterable; + size: number; +}; /** * ### Overview @@ -77,7 +77,7 @@ export type TarFile = { * }); * tar.close(); * - * await tar.pipeTo((await Deno.create('./out.tar')).writable); + * await tar.readable.pipeTo((await Deno.create('./out.tar')).writable); * ``` * * ### Compression @@ -102,63 +102,82 @@ export type TarFile = { * tar.close(); * * await tar + * .readable * .pipeThrough(new CompressionStream('gzip')) * .pipeTo((await Deno.create('./out.tar.gz')).writable); * ``` */ export class Tar { - #files: { prefix: Uint8Array, name: Uint8Array, iterable: Iterable | AsyncIterable, size: number, sizeExtension: boolean }[] = [] - #readable: ReadableStream - #finishedAppending: boolean = false + #files: { + prefix: Uint8Array; + name: Uint8Array; + iterable: Iterable | AsyncIterable; + size: number; + sizeExtension: boolean; + }[] = []; + #readable: ReadableStream; + #finishedAppending: boolean = false; /** * Constructs a new instance. */ constructor() { const gen = (async function* (tar) { while ( - (!tar.#finishedAppending || tar.#files.length) - && await new Promise(a => setTimeout(() => a(true), 0)) + (!tar.#finishedAppending || tar.#files.length) && + await new Promise((a) => setTimeout(() => a(true), 0)) ) { if (tar.#files.length) { - const file = tar.#files.shift()! - const encoder = new TextEncoder() - const header = new Uint8Array(512) + const file = tar.#files.shift()!; + const encoder = new TextEncoder(); + const header = new Uint8Array(512); - header.set(file.name) // name - header.set(encoder.encode( - '000644 \0' // mode - + '000000 \0' // uid - + '000000 \0' // gid - + file.size.toString(8).padStart(file.sizeExtension ? 12 : 11) + (file.sizeExtension ? '' : ' ') // size - + '00000000000 ' // mtime - + ' ' // checksum | Needs to be updated - + '0' // typeflag - + '\0'.repeat(100) // linkname - + 'ustar\0' // magic - + '00' // version - + '\0'.repeat(32 + 32 + 8 + 8) // uname, gname, devmajor, devminor - ), 100) - header.set(file.prefix, 345) // prefix + header.set(file.name); // name + header.set( + encoder.encode( + "000644 \0" + // mode + "000000 \0" + // uid + "000000 \0" + // gid + file.size.toString(8).padStart(file.sizeExtension ? 12 : 11) + + (file.sizeExtension ? "" : " ") + // size + "00000000000 " + // mtime + " " + // checksum | Needs to be updated + "0" + // typeflag + "\0".repeat(100) + // linkname + "ustar\0" + // magic + "00" + // version + "\0".repeat(32 + 32 + 8 + 8), // uname, gname, devmajor, devminor + ), + 100, + ); + header.set(file.prefix, 345); // prefix - header.set(encoder.encode(header.reduce((x, y) => x + y).toString(8).padStart(6, '0') + '\0'), 148) - yield header + header.set( + encoder.encode( + header.reduce((x, y) => x + y).toString(8).padStart(6, "0") + + "\0", + ), + 148, + ); + yield header; - for await (const x of file.iterable) - yield x - yield encoder.encode('\0'.repeat(512 - file.size % 512)) + for await (const x of file.iterable) { + yield x; + } + yield encoder.encode("\0".repeat(512 - file.size % 512)); } } - yield new TextEncoder().encode('\0'.repeat(1024)) - })(this) + yield new TextEncoder().encode("\0".repeat(1024)); + })(this); this.#readable = new ReadableStream({ async pull(controller) { - const { done, value } = await gen.next() - if (done) - controller.close() - else - controller.enqueue(value) - } - }) + const { done, value } = await gen.next(); + if (done) { + controller.close(); + } else { + controller.enqueue(value); + } + }, + }); } /** @@ -168,55 +187,74 @@ export class Tar { * @param [sizeExtension=false] Enable up to 64 GiB files in the archive instead of 8 GiBs. */ append(file: TarFile, sizeExtension = false): void { - if (this.#finishedAppending) - throw new Error('This Tar instance has already be closed.') + if (this.#finishedAppending) { + throw new Error("This Tar instance has already be closed."); + } // Validate size provided. - if (file.size < 0 || Math.pow(8, sizeExtension ? 12 : 11) < file.size) - throw new Error('Invalid File Size: Up to 8 GiBs allowed or 64 GiBs if `sizeExtension` is enabled.') + if (file.size < 0 || Math.pow(8, sizeExtension ? 12 : 11) < file.size) { + throw new Error( + "Invalid File Size: Up to 8 GiBs allowed or 64 GiBs if `sizeExtension` is enabled.", + ); + } - file.pathname = file.pathname.split('/').filter(x => x).join('/') - if (file.pathname.startsWith('./')) - file.pathname = file.pathname.slice(2) + file.pathname = file.pathname.split("/").filter((x) => x).join("/"); + if (file.pathname.startsWith("./")) { + file.pathname = file.pathname.slice(2); + } // Validating the path provided. - const pathname = new TextEncoder().encode(file.pathname) - if (pathname.length > 256) - throw new Error('Provided pathname is too long. Max 256 bytes.') + const pathname = new TextEncoder().encode(file.pathname); + if (pathname.length > 256) { + throw new Error("Provided pathname is too long. Max 256 bytes."); + } - let i = Math.max(0, pathname.lastIndexOf(47)) - if (pathname.slice(i).length > 100) - throw new Error('Filename in pathname is too long. Filename can be at most 100 bytes.') + let i = Math.max(0, pathname.lastIndexOf(47)); + if (pathname.slice(i).length > 100) { + throw new Error( + "Filename in pathname is too long. Filename can be at most 100 bytes.", + ); + } - if (pathname.length <= 100) - i = 0 - else + if (pathname.length <= 100) { + i = 0; + } else { for (; i > 0; --i) { - i = pathname.lastIndexOf(47, i) + i = pathname.lastIndexOf(47, i); if (pathname.slice(i).length > 100) { - i = Math.max(0, pathname.indexOf(47, ++i)) - break + i = Math.max(0, pathname.indexOf(47, ++i)); + break; } } + } - const prefix = pathname.slice(0, i++) - if (prefix.length > 155) - throw new Error('Provided pathname cannot be split into [155, 100] segments along a forward slash separator.') - this.#files.push({ name: prefix.length ? pathname.slice(i) : pathname, prefix, iterable: file.iterable, size: file.size, sizeExtension }) + const prefix = pathname.slice(0, i++); + if (prefix.length > 155) { + throw new Error( + "Provided pathname cannot be split into [155, 100] segments along a forward slash separator.", + ); + } + this.#files.push({ + name: prefix.length ? pathname.slice(i) : pathname, + prefix, + iterable: file.iterable, + size: file.size, + sizeExtension, + }); } /** * Closes the tar archive from accepting more files. Must be called for tar archive to be properly created. */ close(): void { - this.#finishedAppending = true + this.#finishedAppending = true; } /** * A Readable Stream of the archive. */ get readable(): ReadableStream { - return this.#readable + return this.#readable; } } @@ -225,6 +263,8 @@ export class Tar { * * @example * ```ts + * import { TarStream } from '@std/archive' + * * ReadableStream.from([ * { * pathname: 'deno.txt', @@ -243,34 +283,35 @@ export class Tar { * ``` */ export class TarStream { - #readable: ReadableStream - #writable: WritableStream + #readable: ReadableStream; + #writable: WritableStream; /** * Creates an instance. */ constructor() { - const { readable, writable } = new TransformStream() - const tar = new Tar() - this.#readable = tar.readable + const { readable, writable } = new TransformStream(); + const tar = new Tar(); + this.#readable = tar.readable; this.#writable = writable; (async () => { - for await (const tarFile of readable) - tar.append(tarFile) - tar.close() - })() + for await (const tarFile of readable) { + tar.append(tarFile); + } + tar.close(); + })(); } /** * Returns a ReadableStream of the archive. */ get readable(): ReadableStream { - return this.#readable + return this.#readable; } /** * Returns a WritableStream for the files to be archived. */ get writable(): WritableStream { - return this.#writable + return this.#writable; } } diff --git a/archive/untar.ts b/archive/untar.ts index aa425d0bb48f..c7c34245d711 100644 --- a/archive/untar.ts +++ b/archive/untar.ts @@ -35,34 +35,34 @@ * @param readable is the contents of the file. */ export type TarEntry = { - pathname: string, - header: TarHeader, - readable: ReadableStream -} + pathname: string; + header: TarHeader; + readable: ReadableStream; +}; /** * The header of a file decoded into an object, where `pad` is the remaining bytes of the header. * The `pad` will be larger if the optional properties are missing. */ export type TarHeader = { - name: string - mode: string - uid: string - gid: string - size: number - mtime: string - checksum: string - typeflag: string - linkname: string - magic?: string - version?: string - uname?: string - gname?: string - devmajor?: number - devminor?: number - prefix?: string - pad: Uint8Array -} + name: string; + mode: string; + uid: string; + gid: string; + size: number; + mtime: string; + checksum: string; + typeflag: string; + linkname: string; + magic?: string; + version?: string; + uname?: string; + gname?: string; + devmajor?: number; + devminor?: number; + prefix?: string; + pad: Uint8Array; +}; /** * ### Overview @@ -83,6 +83,8 @@ export type TarHeader = { * * @example * ```ts + * import { UnTar } from '@std/archive' + * * for await ( * const entry of new UnTar((await Deno.open('./out.tar.gz')).readable) * ) { @@ -97,6 +99,8 @@ export type TarHeader = { * * @example * ```ts + * import { UnTar } from '@std/archive' + * * for await ( * const entry of new UnTar( * (await Deno.open('./out.tar.gz')) @@ -114,105 +118,142 @@ export class UnTar extends ReadableStream { * Constructs a new instance. */ constructor(readable: ReadableStream) { - const reader = readable.pipeThrough(new TransformStream({ - push: new Uint8Array(0), - transform(chunk, controller) { - const x = new Uint8Array(this.push.length + chunk.length) - x.set(this.push) - x.set(chunk, this.push.length) - for (let i = 512; i <= x.length; i += 512) - controller.enqueue(x.slice(i - 512, i)) - this.push = x.length % 512 ? x.slice(-x.length % 512) : new Uint8Array(0) - }, - flush(controller) { - if (this.push.length) // This should always be zero! - controller.enqueue(this.push) - } - } as Transformer & { push: Uint8Array })).getReader() + const reader = readable.pipeThrough( + new TransformStream( + { + push: new Uint8Array(0), + transform(chunk, controller) { + const x = new Uint8Array(this.push.length + chunk.length); + x.set(this.push); + x.set(chunk, this.push.length); + for (let i = 512; i <= x.length; i += 512) { + controller.enqueue(x.slice(i - 512, i)); + } + this.push = x.length % 512 + ? x.slice(-x.length % 512) + : new Uint8Array(0); + }, + flush(controller) { + if (this.push.length) { // This should always be zero! + controller.enqueue(this.push); + } + }, + } as Transformer & { push: Uint8Array }, + ), + ).getReader(); - let header: TarHeader | undefined - super({ - cancelled: false, - async pull(controller) { - while (header !== undefined) - await new Promise(a => setTimeout(a, 0)) + let header: TarHeader | undefined; + super( + { + cancelled: false, + async pull(controller) { + while (header !== undefined) { + await new Promise((a) => setTimeout(a, 0)); + } - while (true) { - const { done, value } = await reader.read() - if (done || value.reduce((x, y) => x + y) === 0) - return controller.close() + while (true) { + const { done, value } = await reader.read(); + if (done || value.reduce((x, y) => x + y) === 0) { + return controller.close(); + } - const decoder = new TextDecoder() - header = { - name: decoder.decode(value.slice(0, 100)).replaceAll('\0', ''), - mode: decoder.decode(value.slice(100, 108 - 2)), - uid: decoder.decode(value.slice(108, 116 - 2)), - gid: decoder.decode(value.slice(116, 124 - 2)), - size: parseInt(decoder.decode(value.slice(124, 136)).trimEnd(), 8), // Support tarballs with files up to 64 GiBs. - mtime: decoder.decode(value.slice(136, 148 - 1)), - checksum: decoder.decode(value.slice(148, 156 - 2)), - typeflag: decoder.decode(value.slice(156, 157)), - linkname: decoder.decode(value.slice(157, 257)).replaceAll('\0', ''), - pad: value.slice(257) - } - if ([117, 115, 116, 97, 114, 0, 48, 48].every((byte, i) => value[i + 257] === byte)) + const decoder = new TextDecoder(); header = { - ...header, - magic: decoder.decode(value.slice(257, 263)), - version: decoder.decode(value.slice(263, 265)), - uname: decoder.decode(value.slice(265, 297)).replaceAll('\0', ''), - gname: decoder.decode(value.slice(297, 329)).replaceAll('\0', ''), - devmajor: value.slice(329, 337).reduce((x, y) => x + y), - devminor: value.slice(337, 345).reduce((x, y) => x + y), - prefix: decoder.decode(value.slice(345, 500)).replaceAll('\0', ''), - pad: value.slice(500) + name: decoder.decode(value.slice(0, 100)).replaceAll("\0", ""), + mode: decoder.decode(value.slice(100, 108 - 2)), + uid: decoder.decode(value.slice(108, 116 - 2)), + gid: decoder.decode(value.slice(116, 124 - 2)), + size: parseInt( + decoder.decode(value.slice(124, 136)).trimEnd(), + 8, + ), // Support tarballs with files up to 64 GiBs. + mtime: decoder.decode(value.slice(136, 148 - 1)), + checksum: decoder.decode(value.slice(148, 156 - 2)), + typeflag: decoder.decode(value.slice(156, 157)), + linkname: decoder.decode(value.slice(157, 257)).replaceAll( + "\0", + "", + ), + pad: value.slice(257), + }; + if ( + [117, 115, 116, 97, 114, 0, 48, 48].every((byte, i) => + value[i + 257] === byte + ) + ) { + header = { + ...header, + magic: decoder.decode(value.slice(257, 263)), + version: decoder.decode(value.slice(263, 265)), + uname: decoder.decode(value.slice(265, 297)).replaceAll( + "\0", + "", + ), + gname: decoder.decode(value.slice(297, 329)).replaceAll( + "\0", + "", + ), + devmajor: value.slice(329, 337).reduce((x, y) => x + y), + devminor: value.slice(337, 345).reduce((x, y) => x + y), + prefix: decoder.decode(value.slice(345, 500)).replaceAll( + "\0", + "", + ), + pad: value.slice(500), + }; + } + if (header.typeflag !== "0" && header.typeflag !== "\0") { + continue; } - if (header.typeflag !== '0' && header.typeflag !== '\0') - continue - const size = header.size - let i = Math.ceil(size / 512) - const isCancelled = () => this.cancelled + const size = header.size; + let i = Math.ceil(size / 512); + const isCancelled = () => this.cancelled; - controller.enqueue({ - pathname: (header.prefix ? header.prefix + '/' : '') + header.name, - header, - readable: new ReadableStream({ - async pull(controller) { - if (i > 0) { - const { done, value } = await reader.read() - if (done) { - header = undefined - return controller.close() + controller.enqueue({ + pathname: (header.prefix ? header.prefix + "/" : "") + + header.name, + header, + readable: new ReadableStream({ + async pull(controller) { + if (i > 0) { + const { done, value } = await reader.read(); + if (done) { + header = undefined; + return controller.close(); + } + controller.enqueue( + i-- === 1 ? value.slice(0, size % 512) : value, + ); + } else { + header = undefined; + if (isCancelled()) { + reader.cancel(); + } + controller.close(); } - controller.enqueue(i-- === 1 ? value.slice(0, size % 512) : value) - } - else { - header = undefined - if (isCancelled()) - reader.cancel() - controller.close() - } - }, - async cancel() { - if (i !== 1) - while (i-- > 0) { - const { done } = await reader.read() - if (done) - break + }, + async cancel() { + if (i !== 1) { + while (i-- > 0) { + const { done } = await reader.read(); + if (done) { + break; + } + } } - header = undefined - } - }) - }) - break - } - }, - cancel() { - this.cancelled = true - } - } as UnderlyingSource & { cancelled: boolean } - ) + header = undefined; + }, + }), + }); + break; + } + }, + cancel() { + this.cancelled = true; + }, + } as UnderlyingSource & { cancelled: boolean }, + ); } } @@ -221,6 +262,8 @@ export class UnTar extends ReadableStream { * * @example * ```ts + * import { UnTarStream } from '@std/archive' + * * await Deno.mkdir('out/') * for await ( * const entry of (await Deno.open('./out.tar.gz')) @@ -233,29 +276,32 @@ export class UnTar extends ReadableStream { * ``` */ export class UnTarStream { - #readable: ReadableStream - #writable: WritableStream + #readable: ReadableStream; + #writable: WritableStream; /** * Creates an instance. */ constructor() { - const { readable, writable } = new TransformStream() - const unTar = new UnTar(readable) - this.#readable = unTar - this.#writable = writable + const { readable, writable } = new TransformStream< + Uint8Array, + Uint8Array + >(); + const unTar = new UnTar(readable); + this.#readable = unTar; + this.#writable = writable; } /** * Returns a ReadableStream of the files in the archive. */ get readable(): ReadableStream { - return this.#readable + return this.#readable; } /** * Returns a WritableStream for the archive to be expanded. */ get writable(): WritableStream { - return this.#writable + return this.#writable; } } From 6a3ad13e9b14be781a383f92d7de6107c37c881e Mon Sep 17 00:00:00 2001 From: BlackAsLight <44320105+BlackAsLight@users.noreply.github.com> Date: Wed, 3 Apr 2024 19:43:18 +1100 Subject: [PATCH 6/6] refactor(archive): added support as stated in pull request comment - https://github.com/denoland/deno_std/pull/4538#issuecomment-2030918786 --- archive/tar.ts | 256 +++++++++++++++++++++++------------ archive/untar.ts | 338 +++++++++++++++++++++++++++++------------------ 2 files changed, 379 insertions(+), 215 deletions(-) diff --git a/archive/tar.ts b/archive/tar.ts index 75dc02b6329c..757a00ec2f3f 100644 --- a/archive/tar.ts +++ b/archive/tar.ts @@ -29,14 +29,35 @@ */ /** - * @param pathname is what you want the file to be called inside the archive. - * @param iterable is the source of the file in Uint8Array form. - * @param size is the size of the source in bytes. Providing the wrong size can lead to corrupt data. + * @param pathname The pathname of of the file or directory inside the archive. + * @param iterable The source of the file for the archive. + * @param size The size of the file for the archive. + * @param [sizeExtension=false] Whether to increase the size limit for this file from the default 8 GiB to 64 GiB. + * @param options: Optional settings you can specify with the file. */ -export type TarFile = { +export type TarEntry = { pathname: string; - iterable: Iterable | AsyncIterable; size: number; + sizeExtension?: boolean; + iterable: Iterable | AsyncIterable; + options?: Partial; +} | { + pathname: string; + options?: Partial; +}; + +/** + * The Options + */ +export type TarOptions = { + mode: string; + uid: string; + gid: string; + mtime: number; + uname: string; + gname: string; + devmajor: string; + devminor: string; }; /** @@ -54,10 +75,10 @@ export type TarFile = { * While this format is compatible with most tar readers, * the format has several limitations, including: * * File sizes can be at most 8 GiBs. - * * Filenames (including path) must be shorter than 256 characters. + * * Filenames (including path) must be at most 256 characters. * * Sparse files are not supported. * This implementation does support decoding tarballs with files up to 64 GiBs, and can create them - * via setting `sizeExtension` to true in the `append` method, but doing so may limit its compatibility + * via setting `sizeExtension` to true in `TarEntry` for the `append` method, but doing so may limit its compatibility * with older tar implementations. * * @example @@ -108,65 +129,95 @@ export type TarFile = { * ``` */ export class Tar { - #files: { + #paths: string[] = []; + #entries: ({ prefix: Uint8Array; name: Uint8Array; + typeflag: string; + options: Partial; iterable: Iterable | AsyncIterable; size: number; sizeExtension: boolean; - }[] = []; + } | { + prefix: Uint8Array; + name: Uint8Array; + typeflag: string; + options: Partial; + sizeExtension: boolean; + })[] = []; #readable: ReadableStream; - #finishedAppending: boolean = false; + #finishedAppending = false; /** * Constructs a new instance. */ constructor() { const gen = (async function* (tar) { while ( - (!tar.#finishedAppending || tar.#files.length) && + ( + !tar.#finishedAppending || + tar.#entries.length + ) && await new Promise((a) => setTimeout(() => a(true), 0)) ) { - if (tar.#files.length) { - const file = tar.#files.shift()!; - const encoder = new TextEncoder(); - const header = new Uint8Array(512); + if (!tar.#entries.length) { + continue; + } + + const entry = tar.#entries.shift()!; + const encoder = new TextEncoder(); + const header = new Uint8Array(512); - header.set(file.name); // name - header.set( - encoder.encode( - "000644 \0" + // mode - "000000 \0" + // uid - "000000 \0" + // gid - file.size.toString(8).padStart(file.sizeExtension ? 12 : 11) + - (file.sizeExtension ? "" : " ") + // size - "00000000000 " + // mtime - " " + // checksum | Needs to be updated - "0" + // typeflag - "\0".repeat(100) + // linkname - "ustar\0" + // magic - "00" + // version - "\0".repeat(32 + 32 + 8 + 8), // uname, gname, devmajor, devminor - ), - 100, - ); - header.set(file.prefix, 345); // prefix + header.set(entry.name); // name + header.set( + encoder.encode( + (entry.options.mode ?? (entry.typeflag === "5" ? "755" : "644")) + .padStart(6, "0") + + " \0" + // mode + (entry.options.uid ?? "").padStart(6, "0") + " \0" + // uid + (entry.options.gid ?? "").padStart(6, "0") + " \0" + // gid + ("size" in entry ? entry.size.toString(8) : "").padStart( + entry.sizeExtension ? 12 : 11, + "0", + ) + (entry.sizeExtension ? "" : " ") + // size + (entry.options.mtime?.toString(8) ?? "").padStart(11, "0") + + " " + // mtime + " ".repeat(8) + // checksum | Needs to be updated + entry.typeflag + // typeflag + "\0".repeat(100) + // linkname + "ustar\0" + // magic + "00" + // version + (entry.options.uname ?? "").padEnd(32, "\0") + // uname + (entry.options.gname ?? "").padEnd(32, "\0") + // gname + (entry.options.devmajor ?? "").padEnd(8, "\0") + // devmajor + (entry.options.devminor ?? "").padEnd(8, "\0"), // devminor + ), + 100, + ); + header.set(entry.prefix, 345); // prefix - header.set( - encoder.encode( - header.reduce((x, y) => x + y).toString(8).padStart(6, "0") + - "\0", - ), - 148, - ); - yield header; + header.set( + encoder.encode( + header.reduce((x, y) => x + y).toString(8).padStart(6, "0") + "\0", + ), + 148, + ); // update checksum + yield header; - for await (const x of file.iterable) { + if ("size" in entry) { + let size = 0; + for await (const x of entry.iterable) { + size += x.length; yield x; } - yield encoder.encode("\0".repeat(512 - file.size % 512)); + if (entry.size !== size) { + throw new Error( + "Invalid Tarball! Provided size did not match bytes read from iterable.", + ); + } + yield new Uint8Array(new Array(512 - entry.size % 512).fill(0)); } } - yield new TextEncoder().encode("\0".repeat(1024)); + yield new Uint8Array(new Array(1024).fill(0)); })(this); this.#readable = new ReadableStream({ async pull(controller) { @@ -181,39 +232,41 @@ export class Tar { } /** - * Append a file to the archive. This method will throw if you provide an incompatible - * size or pathname, or have already called the `close` method. - * @param file Details of the TarFile being appended to the archive. - * @param [sizeExtension=false] Enable up to 64 GiB files in the archive instead of 8 GiBs. + * Append a file or directory to the archive. */ - append(file: TarFile, sizeExtension = false): void { + append(entry: TarEntry): void { if (this.#finishedAppending) { - throw new Error("This Tar instance has already be closed."); + throw new Error("This Tar Instance has already been closed."); } - // Validate size provided. - if (file.size < 0 || Math.pow(8, sizeExtension ? 12 : 11) < file.size) { + if ( + "size" in entry && + ( + entry.size < 0 || + Math.pow(8, entry.sizeExtension ? 12 : 11) < entry.size || + entry.size.toString() === "NaN" + ) + ) { throw new Error( - "Invalid File Size: Up to 8 GiBs allowed or 64 GiBs if `sizeExtension` is enabled.", + "Invalid Size Provided! Size cannot exceed 8 GiBs by default or 64 GiBs with sizeExtension set to true.", ); } - - file.pathname = file.pathname.split("/").filter((x) => x).join("/"); - if (file.pathname.startsWith("./")) { - file.pathname = file.pathname.slice(2); + entry.pathname = entry.pathname.split("/").filter((x) => x).join("/"); + if (entry.pathname.startsWith("./")) { + entry.pathname = entry.pathname.slice(2); + } + if (!("size" in entry)) { + entry.pathname += "/"; } - // Validating the path provided. - const pathname = new TextEncoder().encode(file.pathname); + const pathname = new TextEncoder().encode(entry.pathname); if (pathname.length > 256) { - throw new Error("Provided pathname is too long. Max 256 bytes."); + throw new Error("Invalid Pathname! Pathname cannot exceed 256 bytes."); } let i = Math.max(0, pathname.lastIndexOf(47)); if (pathname.slice(i).length > 100) { - throw new Error( - "Filename in pathname is too long. Filename can be at most 100 bytes.", - ); + throw new Error("Invalid Filename! Filename cannot exceed 100 bytes."); } if (pathname.length <= 100) { @@ -222,7 +275,7 @@ export class Tar { for (; i > 0; --i) { i = pathname.lastIndexOf(47, i); if (pathname.slice(i).length > 100) { - i = Math.max(0, pathname.indexOf(47, ++i)); + i = Math.max(0, pathname.indexOf(47, i + 1)); break; } } @@ -231,27 +284,47 @@ export class Tar { const prefix = pathname.slice(0, i++); if (prefix.length > 155) { throw new Error( - "Provided pathname cannot be split into [155, 100] segments along a forward slash separator.", + "Invalid Pathname! Pathname needs to be split-able on a forward slash separator into [155, 100] bytes respectively.", ); } - this.#files.push({ - name: prefix.length ? pathname.slice(i) : pathname, - prefix, - iterable: file.iterable, - size: file.size, - sizeExtension, - }); + const name = prefix.length ? pathname.slice(i) : pathname; + + if (this.#paths.includes(entry.pathname)) { + return; + } + this.#paths.push(entry.pathname); + + if ("size" in entry) { // File + this.#entries.push({ + prefix, + name, + typeflag: "0", + options: entry.options ?? {}, + iterable: entry.iterable, + size: entry.size, + sizeExtension: entry.sizeExtension ?? false, + }); + } // Directory + else { + this.#entries.push({ + prefix, + name, + typeflag: "5", + options: entry.options ?? {}, + sizeExtension: false, + }); + } } /** - * Closes the tar archive from accepting more files. Must be called for tar archive to be properly created. + * Close the archive once you're end appending. */ close(): void { this.#finishedAppending = true; } /** - * A Readable Stream of the archive. + * Read the archive via a `ReadableStream`. */ get readable(): ReadableStream { return this.#readable; @@ -259,7 +332,7 @@ export class Tar { } /** - * Like the Tar class, but takes in a ReadableStream and outputs a ReadableStream + * Like the Tar class, but takes in a ReadableStream and outputs a ReadableStream * * @example * ```ts @@ -284,34 +357,41 @@ export class Tar { */ export class TarStream { #readable: ReadableStream; - #writable: WritableStream; + #writable: WritableStream; /** - * Creates an instance. + * Constructs a new instance. */ constructor() { - const { readable, writable } = new TransformStream(); + const { readable, writable } = new TransformStream(); const tar = new Tar(); this.#readable = tar.readable; this.#writable = writable; - (async () => { - for await (const tarFile of readable) { - tar.append(tarFile); - } - tar.close(); - })(); + readable.pipeTo( + new WritableStream({ + write(chunk) { + tar.append(chunk); + }, + close() { + tar.close(); + }, + abort() { + tar.close(); + }, + }), + ); } /** - * Returns a ReadableStream of the archive. + * Read the archive via a ReadableStream */ get readable(): ReadableStream { return this.#readable; } /** - * Returns a WritableStream for the files to be archived. + * Write to the archive via a WritableStream */ - get writable(): WritableStream { + get writable(): WritableStream { return this.#writable; } } diff --git a/archive/untar.ts b/archive/untar.ts index c7c34245d711..09069be68870 100644 --- a/archive/untar.ts +++ b/archive/untar.ts @@ -30,14 +30,14 @@ */ /** - * @param pathname is what the file is called. - * @param header is the header of the file. - * @param readable is the contents of the file. + * @param pathname The pathname of the item inside the archive. + * @param header The header of the item. + * @param readable The contents of the file from the item. */ -export type TarEntry = { +export type TarItem = { pathname: string; header: TarHeader; - readable: ReadableStream; + readable?: ReadableStream; }; /** @@ -50,17 +50,28 @@ export type TarHeader = { uid: string; gid: string; size: number; - mtime: string; + mtime: number; checksum: string; typeflag: string; linkname: string; - magic?: string; - version?: string; - uname?: string; - gname?: string; - devmajor?: number; - devminor?: number; - prefix?: string; + pad: Uint8Array; +} | { + name: string; + mode: string; + uid: string; + gid: string; + size: number; + mtime: number; + checksum: string; + typeflag: string; + linkname: string; + magic: string; + version: string; + uname: string; + gname: string; + devmajor: string; + devminor: string; + prefix: string; pad: Uint8Array; }; @@ -75,10 +86,10 @@ export type TarHeader = { * The numeric extension feature of the size to allow up to 64 GiBs is also supported. * * ### Usage - * The workflow is to create a UnTar instance passing in a ReadableStream of the archive. + * The workflow is to create a UnTar instance passing in a Iterable or AsyncIterable of the archive. * You can then iterate over the instance to pull out the entries one by one and decide * if you want to read it or skip over it. Each entry's readable stream must either be - * consumed or the `cancel` method must be called on it. The next entry won't resolve until + * consumed or the `cancel` method **must** be called on it. The next entry won't resolve **until** * either action is done on the ReadableStream. * * @example @@ -113,140 +124,213 @@ export type TarHeader = { * } * ``` */ -export class UnTar extends ReadableStream { +export class UnTar extends ReadableStream { /** * Constructs a new instance. */ - constructor(readable: ReadableStream) { - const reader = readable.pipeThrough( - new TransformStream( - { - push: new Uint8Array(0), - transform(chunk, controller) { - const x = new Uint8Array(this.push.length + chunk.length); - x.set(this.push); - x.set(chunk, this.push.length); - for (let i = 512; i <= x.length; i += 512) { - controller.enqueue(x.slice(i - 512, i)); - } - this.push = x.length % 512 - ? x.slice(-x.length % 512) - : new Uint8Array(0); - }, - flush(controller) { - if (this.push.length) { // This should always be zero! - controller.enqueue(this.push); - } - }, - } as Transformer & { push: Uint8Array }, - ), - ).getReader(); + constructor(iterable: Iterable | AsyncIterable) { + const reader = new ReadableStream( + { // Converts iterable into ReadableStream. + iter: Symbol.iterator in iterable + ? iterable[Symbol.iterator]() + : iterable[Symbol.asyncIterator](), + async pull(controller) { + const { done, value } = await this.iter.next(); + if (done) { + controller.close(); + } else { + controller.enqueue(value); + } + }, + } as UnderlyingSource & { + iter: Iterator | AsyncIterator; + }, + ) + .pipeThrough( + new TransformStream( + { // Slices ReadableStream's Uint8Array into 512 byte chunks. + push: new Uint8Array(0), + transform(chunk, controller) { + const x = new Uint8Array(this.push.length + chunk.length); + x.set(this.push); + x.set(chunk, this.push.length); + for (let i = 512; i <= x.length; i += 512) { + controller.enqueue(x.slice(i - 512, i)); + } + this.push = x.length % 512 + ? x.slice(-x.length % 512) + : new Uint8Array(0); + }, + flush(controller) { + if (this.push.length) { + controller.error("Tarball has an unexpected number of bytes."); + } + }, + } as Transformer & { push: Uint8Array }, + ), + ) + .pipeThrough( + new TransformStream( + { // Trims the last two Uint8Array chunks off. + array: [], + transform(chunk, controller) { + this.array.push(chunk); + if (this.array.length === 3) { + controller.enqueue(this.array.shift()!); + } + }, + flush(controller) { + if (this.array.length < 2) { + controller.error("Tarball was too small to be valid."); + } else if ( + !this.array.every((array) => array.every((byte) => byte === 0)) + ) { + controller.error("Tarball has invalid ending."); + } + }, + } as Transformer & { array: Uint8Array[] }, + ), + ) + .getReader(); let header: TarHeader | undefined; super( { cancelled: false, async pull(controller) { - while (header !== undefined) { + while (header != undefined) { await new Promise((a) => setTimeout(a, 0)); } - while (true) { - const { done, value } = await reader.read(); - if (done || value.reduce((x, y) => x + y) === 0) { - return controller.close(); - } + const { done, value } = await reader.read(); + if (done) { + return controller.close(); + } - const decoder = new TextDecoder(); + const decoder = new TextDecoder(); + { // Validate checksum + const checksum = value.slice(); + checksum.set(new Uint8Array(new Array(8).fill(32)), 148); + if ( + checksum.reduce((x, y) => x + y) !== + parseInt(decoder.decode(value.slice(148, 156 - 2)), 8) + ) { + return controller.error( + "Invalid Tarball. Header failed to pass checksum.", + ); + } + } + header = { + name: decoder.decode(value.slice(0, 100)).replaceAll("\0", ""), + mode: decoder.decode(value.slice(100, 108 - 2)), + uid: decoder.decode(value.slice(108, 116 - 2)), + gid: decoder.decode(value.slice(116, 124 - 2)), + size: parseInt(decoder.decode(value.slice(124, 136)).trimEnd(), 8), + mtime: parseInt(decoder.decode(value.slice(136, 148 - 1)), 8), + checksum: decoder.decode(value.slice(148, 156 - 2)), + typeflag: decoder.decode(value.slice(156, 157)), + linkname: decoder.decode(value.slice(157, 257)).replaceAll( + "\0", + "", + ), + pad: value.slice(257), + }; + if (header.typeflag === "\0") { + header.typeflag = "0"; + } + // Check if header is POSIX ustar | new TextEncoder().encode('ustar\0' + '00') + if ( + [117, 115, 116, 97, 114, 0, 48, 48].every((byte, i) => + value[i + 257] === byte + ) + ) { header = { - name: decoder.decode(value.slice(0, 100)).replaceAll("\0", ""), - mode: decoder.decode(value.slice(100, 108 - 2)), - uid: decoder.decode(value.slice(108, 116 - 2)), - gid: decoder.decode(value.slice(116, 124 - 2)), - size: parseInt( - decoder.decode(value.slice(124, 136)).trimEnd(), - 8, - ), // Support tarballs with files up to 64 GiBs. - mtime: decoder.decode(value.slice(136, 148 - 1)), - checksum: decoder.decode(value.slice(148, 156 - 2)), - typeflag: decoder.decode(value.slice(156, 157)), - linkname: decoder.decode(value.slice(157, 257)).replaceAll( + ...header, + magic: decoder.decode(value.slice(257, 263)), + version: decoder.decode(value.slice(263, 265)), + uname: decoder.decode(value.slice(265, 297)).replaceAll("\0", ""), + gname: decoder.decode(value.slice(297, 329)).replaceAll("\0", ""), + devmajor: decoder.decode(value.slice(329, 337)).replaceAll( + "\0", + "", + ), + devminor: decoder.decode(value.slice(337, 345)).replaceAll( + "\0", + "", + ), + prefix: decoder.decode(value.slice(345, 500)).replaceAll( "\0", "", ), - pad: value.slice(257), + pad: value.slice(500), }; - if ( - [117, 115, 116, 97, 114, 0, 48, 48].every((byte, i) => - value[i + 257] === byte - ) - ) { - header = { - ...header, - magic: decoder.decode(value.slice(257, 263)), - version: decoder.decode(value.slice(263, 265)), - uname: decoder.decode(value.slice(265, 297)).replaceAll( - "\0", - "", - ), - gname: decoder.decode(value.slice(297, 329)).replaceAll( - "\0", - "", - ), - devmajor: value.slice(329, 337).reduce((x, y) => x + y), - devminor: value.slice(337, 345).reduce((x, y) => x + y), - prefix: decoder.decode(value.slice(345, 500)).replaceAll( - "\0", - "", - ), - pad: value.slice(500), - }; - } - if (header.typeflag !== "0" && header.typeflag !== "\0") { - continue; - } + } + if (header.typeflag === "0") { const size = header.size; let i = Math.ceil(size / 512); const isCancelled = () => this.cancelled; - - controller.enqueue({ - pathname: (header.prefix ? header.prefix + "/" : "") + - header.name, - header, - readable: new ReadableStream({ - async pull(controller) { - if (i > 0) { - const { done, value } = await reader.read(); - if (done) { + let lock = false; + controller.enqueue( + { + pathname: ("prefix" in header && header.prefix.length + ? header.prefix + "/" + : "") + header.name, + header, + readable: new ReadableStream({ + async pull(controller) { + if (i > 0) { + lock = true; + const { done, value } = await reader.read(); + if (done) { + header = undefined; + controller.error("Tarball ended unexpectedly"); + } else { + // Pull is unlocked before enqueue is called because if pull is in the middle of processing a chunk when cancel is called, nothing after enqueue will run. + lock = false; + controller.enqueue( + i-- === 1 ? value.slice(0, size % 512) : value, + ); + } + } else { header = undefined; - return controller.close(); + if (isCancelled()) { + reader.cancel(); + } + controller.close(); } - controller.enqueue( - i-- === 1 ? value.slice(0, size % 512) : value, - ); - } else { - header = undefined; - if (isCancelled()) { - reader.cancel(); + }, + async cancel() { + while (lock) { + await new Promise((a) => + setTimeout(a, 0) + ); } - controller.close(); - } - }, - async cancel() { - if (i !== 1) { - while (i-- > 0) { - const { done } = await reader.read(); - if (done) { - break; + try { + while (i-- > 0) { + if ((await reader.read()).done) { + throw new Error("Tarball ended unexpectedly"); + } } + } catch (error) { + throw error; + } finally { + header = undefined; } - } - header = undefined; - }, - }), - }); - break; + }, + }), + } satisfies TarItem, + ); + } else { + controller.enqueue( + { + pathname: ("prefix" in header && header.prefix.length + ? header.prefix + "/" + : "") + header.name, + header, + } satisfies TarItem, + ); + header = undefined; } }, cancel() { @@ -276,10 +360,10 @@ export class UnTar extends ReadableStream { * ``` */ export class UnTarStream { - #readable: ReadableStream; + #readable: ReadableStream; #writable: WritableStream; /** - * Creates an instance. + * Constructs a new instance. */ constructor() { const { readable, writable } = new TransformStream< @@ -292,14 +376,14 @@ export class UnTarStream { } /** - * Returns a ReadableStream of the files in the archive. + * Read the contents of the archive via a ReadableStream */ - get readable(): ReadableStream { + get readable(): ReadableStream { return this.#readable; } /** - * Returns a WritableStream for the archive to be expanded. + * Write the archive via a WritableStream */ get writable(): WritableStream { return this.#writable;