diff --git a/src/TarFile.ts b/src/TarFile.ts new file mode 100644 index 0000000..11bf1fb --- /dev/null +++ b/src/TarFile.ts @@ -0,0 +1,124 @@ +import { ArrayType, encodeUTF8 } from './codec'; +import { WriterLittle } from './Writer'; + +/** File type mapped from POSIX (partial list). */ + +const enum TarType { + FILE = '0', + HARDLINK = '1', + SYMLINK = '2', + DIRECTORY = '5', + FIFO = '6', + LONGNAME = 'L' +} + +const pad = '00000000000'; + +/** Convert number to octal, left-pad with zeroes to given length + * and append an ASCII NUL. */ + +function padOctal(num: number, len: number) { + const result = num.toString(8); + return pad.substr(12 - (len - result.length)) + result + '\0'; +} + +/** Generate a tape archive compatible with UStar (Unix Standard TAR), + * also readable according to more recent POSIX.1-2001 / pax. */ + +export class TarFile { + + /** Add a file to the archive. + * + * @param path Relative path: string (to be UTF-8 encoded) or numeric buffer. + * @param data Contents: string (to be UTF-8 encoded) or numeric buffer. + * @param mode POSIX file permissions. + * @param stamp JavaScript timestamp: milliseconds from 1970-01-01. */ + + add( + path: string | ArrayType, + data: string | ArrayType, + mode = 0o644, + stamp?: number | null, + type = TarType.FILE + ) { + const { content } = this; + stamp = stamp || new Date().getTime(); + + if(typeof path == 'string') path = encodeUTF8(path + '\0'); + if(typeof data == 'string') data = encodeUTF8(data); + + const uid = 0; + const gid = 0; + + let pathLen = path.length; + let pathOffset1 = 0; + let pathOffset2 = 0; + let pos: number; + + if(pathLen > 100) { + pos = pathLen - 100; + pathOffset1 = pos; + + // Find first slash. + while(path[pos] != 47 && ++pos < pathLen) { } + + if(pos < pathLen - 1) pathOffset1 = pos + 1; + + if(path[pathOffset1 - 1] != 47 || pathOffset1 > 156) { + // Path is unrepresentable in UStar format. Use a GNU-specific + // kludge: store it in another file with a special name and flag. + this.add('././@LongLink', path, mode, stamp, TarType.LONGNAME); + } + + pathOffset2 = Math.max(0, pathOffset1 - 156); + } + + pos = content.pos; + + (content + // Last 100 bytes of file path. Should be enough for everyone! + .copy(path, pathOffset1) + .padTo(pos + 100) + .ascii( + padOctal(mode, 8) + + padOctal(uid, 8) + + padOctal(gid, 8) + + padOctal(data.length, 12) + + padOctal(~~(stamp / 1000), 12) + + ' ' + + type + ) + // Omit link information. + .padTo(pos + 257) + .ascii('ustar\0' + '00') + // Omit user and group names and device numbers. + .padTo(pos + 345) + // Previous bytes of file path to allow total 256. + // Surely no more are ever needed! + .copy(path, pathOffset2, pathOffset1 - 1) + .padTo(pos + 512) + ); + + const end = content.pos; + let sum = 0; + + while(pos < end) { + sum += content.data[pos++]; + } + + content.pos = end - (512 - 148); + // One placeholder space left in place on purpose. + content.ascii(padOctal(sum, 7)); + content.pos = end; + + content.copy(data); + content.padTo((content.pos - 1 | 511) + 1); + } + + finish() { + return this.content.data; + } + + content = new WriterLittle(); + +} diff --git a/src/Writer.ts b/src/Writer.ts new file mode 100644 index 0000000..26f2dae --- /dev/null +++ b/src/Writer.ts @@ -0,0 +1,109 @@ +import { ArrayType, encodeUTF8 } from './codec'; + +export class Writer { + + constructor( + public data: ArrayType = [], + public pos = 0 + ) {} + + u8(num: number) { + this.data[this.pos++] = num & 0xff; + + return this; + } + + copy(src: ArrayType, srcPos = 0, srcEnd = src.length) { + let { data, pos } = this; + + while(srcPos < srcEnd) { + data[pos++] = src[srcPos++]; + } + + this.pos = pos; + return this; + } + + ascii(src: string) { + let { data, pos } = this; + let srcPos = 0; + let srcEnd = src.length; + + while(srcPos < srcEnd) { + data[pos++] = src.charCodeAt(srcPos++); + } + + this.pos = pos; + return this; + } + + utf8(src: string) { + this.pos = encodeUTF8(src, this.data, this.pos); + + return this; + } + + padTo(end: number, padding = 0) { + let { data, pos } = this; + + while(pos < end) { + data[pos++] = padding; + } + + this.pos = pos; + return this; + } + +} + +export class WriterLittle extends Writer { + + u16(num: number) { + let { data, pos } = this; + this.pos = pos + 2; + + data[pos++] = num & 0xff; num >>= 8; + data[pos] = num & 0xff; + + return this; + } + + u32(num: number) { + let { data, pos } = this; + this.pos = pos + 4; + + data[pos++] = num & 0xff; num >>= 8; + data[pos++] = num & 0xff; num >>= 8; + data[pos++] = num & 0xff; num >>= 8; + data[pos] = num & 0xff; + + return this; + } + +} + +export class WriterBig extends Writer { + + u16(num: number) { + let { data } = this; + let pos = (this.pos += 2); + + data[--pos] = num & 0xff; num >>= 8; + data[--pos] = num & 0xff; + + return this; + } + + u32(num: number) { + let { data } = this; + let pos = (this.pos += 4); + + data[--pos] = num & 0xff; num >>= 8; + data[--pos] = num & 0xff; num >>= 8; + data[--pos] = num & 0xff; num >>= 8; + data[--pos] = num & 0xff; + + return this; + } + +} diff --git a/src/ZipFile.ts b/src/ZipFile.ts new file mode 100644 index 0000000..db697b4 --- /dev/null +++ b/src/ZipFile.ts @@ -0,0 +1,149 @@ +import { ArrayType, encodeUTF8, CRC32 } from './codec'; +import { WriterLittle } from './Writer'; + +/** General purpose bit flags, documented for interest. */ + +const enum ZipFlag { + /** If set, file contents are encrypted. */ + ENCRYPT = 1, + /** If set, CRC and sizes go in a descriptor section after file + * contents, which were probably of unknown size prior to streaming + * directly from elsewhere. */ + STREAM = 1 << 3, + /** Language encoding flag (EFS) signal file name and contents are + * encoded in UTF-8. */ + UTF8 = 1 << 11 +} + +/** Compression methods (partial list). */ + +const enum ZipMethod { + /** Contents as-is, without compression. */ + STORE = 0, + DEFLATE = 8, + LZMA = 14 +} + +/** Operating system used to generate the archive (partial list). */ + +const enum ZipOS { + DOS = 0, + UNIX = 3, + NTFS = 11, + VFAT = 14, + OSX = 19 +} + +/** File attributes for compression software internal use. */ + +const enum ZipAttr { + BINARY = 0, + TEXT = 1 +} + +/** POSIX file type (partial list). */ + +const enum PosixType { + FIFO = 1, + DIRECTORY = 4, + FILE = 8, + SYMLINK = 10, + SOCKET = 12 +} + +/** Magic numbers to identify file sections. */ + +const enum Magic { + START = 0x04034b50, + ITEM = 0x02014b50, + END = 0x06054b50 +} + +/** CRC polynomial used to verify integrity of each archived file. */ + +const crcFactory = new CRC32(); + +export class ZipFile { + + add( + path: string | ArrayType, + data: string | ArrayType, + mode = 0o644, + stamp?: number | null, + comment: string | ArrayType = '' + ) { + const { content, directory } = this; + const date = stamp ? new Date(stamp) : new Date(); + + if(typeof path == 'string') path = encodeUTF8(path); + if(typeof data == 'string') data = encodeUTF8(data); + if(typeof comment == 'string') comment = encodeUTF8(comment); + + const version = 10; + const flags = ZipFlag.UTF8; + /** DOS internal date encoding format lives on, here. + * Notably accurate only to 2 seconds. */ + const time = (date.getHours() << 11) | (date.getMinutes() << 5) | (date.getSeconds() >> 1); + const day = (date.getFullYear() - 1980 << 9) | (date.getMonth() + 1 << 5) | date.getDate(); + const crc = crcFactory.create().append(data); + const size = data.length; + const extra: number[] = []; + const diskNumber = 0; + const dosAttr = 0x00; + const unixAttr = (PosixType.FILE << 12) | mode; + const headerOffset = content.pos; + + content.u32(Magic.START); + + const metaStart = content.pos; + + (content + .u16(version) + .u16(flags).u16(ZipMethod.STORE) + .u16(time).u16(day) + .u32(crc).u32(size).u32(size) + .u16(path.length).u16(extra.length) + ); + + const metaEnd = content.pos; + + content.copy(path).copy(extra).copy(data); + + (directory + .u32(Magic.ITEM).u8(version).u8(ZipOS.UNIX) + .copy(content.data, metaStart, metaEnd) + .u16(comment.length) + .u16(diskNumber) + .u16(ZipAttr.BINARY).u16(dosAttr).u16(unixAttr).u32(headerOffset) + .copy(path).copy(extra).copy(comment) + ); + + ++this.count; + } + + finish(comment: string | ArrayType = '') { + const { content, directory, count } = this; + + const dirOffset = content.pos; + const dirSize = directory.pos; + const diskNumber = 0; + + if(typeof comment == 'string') comment = encodeUTF8(comment); + + (content + .copy(directory.data) + .u32(Magic.END) + .u16(diskNumber).u16(diskNumber) + .u16(count).u16(count) + .u32(dirSize).u32(dirOffset) + .u16(comment.length).copy(comment) + ); + + return content.data; + } + + content = new WriterLittle(); + directory = new WriterLittle(); + count = 0; + +} diff --git a/src/codec.ts b/src/codec.ts new file mode 100644 index 0000000..021b7d7 --- /dev/null +++ b/src/codec.ts @@ -0,0 +1,263 @@ +/** Base64 encoding alphabet and = for padding. */ +const chars64 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/='; + +/** Map Base64 sextet to encoded character. */ +const toBase64: string[] = []; + +/** Map ASCII code of encoded character to Base64 sextet. */ +const fromBase64: number[] = []; + +// Fill Base64 character mapping tables. + +for(let i = 0; i < 65; ++i) { + toBase64[i] = chars64.charAt(i); + fromBase64[chars64.charCodeAt(i)] = i; +} + +/** Subtract from shifted and summed UTF-16 surrogate pair code units to get + * correct Unicode code point. Equals: + * (0xd800 << 10) + 0xdc00 - 0x10000 */ +const surrogateOffset = 0x35fdc00; + +export type ArrayType = number[] | Uint8Array | Buffer; + +export function encodeUTF8(src: string): number[]; +export function encodeUTF8( + src: string, + dst?: ArrayType, + dstPos?: number, + srcPos?: number, + srcEnd?: number +): number; + +/** UTF-8 encode a string to an array of bytes. + * This transform is reversible for any input string, + * regardless of strange or invalid characters. + * + * @param src String to encode. + * @param dst Destination array or buffer for storing the result. + * @param dstPos Initial offset to destination, default is 0. + * @param srcPos Initial offset to source data, default is 0. + * @param srcEnd Source data end offset, default is its length. + * + * @return End offset past data stored if a destination was given, + * otherwise a numeric array containing the encoded result. + * Note that output length cannot exceed 3 * input length. */ + +export function encodeUTF8( + src: string, + dst?: ArrayType, + dstPos = 0, + srcPos = 0, + srcEnd = src.length +) { + let result: number[] | undefined; + let code: number; + let a: number, b: number; + + dst = dst || (result = []); + + while(srcPos < srcEnd) { + code = src.charCodeAt(srcPos++); + + if(code >= 0x80) { + b = 0b11000000; + + if(code >= 0x800) { + a = 0b11100000; + b = 0b10000000; + + // Note: code <= 0xffff because JavaScript API exposes strings + // only as a 16-bit, UTF-16 encoded buffer. + + if((code - 0xd800 & 0xfc00) == 0) { + // Surrogate pair first half. + const next = src.charCodeAt(srcPos) || 0; + + if((next - 0xdc00 & 0xfc00) == 0) { + // Surrogate pair second half. Re-encode only if both + // halves are in the valid range. Otherwise store them + // as-is, to avoid altering decoded result. + + a = 0b10000000; + code = (code << 10) + next - surrogateOffset; + dst[dstPos++] = 0b11110000 | (code >> 18); + ++srcPos; + } + } + + dst[dstPos++] = a | ((code >> 12) & 0b00111111); + } + + dst[dstPos++] = b | ((code >> 6) & 0b00111111); + code = 0b10000000 | (code & 0b00111111); + } + + dst[dstPos++] = code; + } + + return result || dstPos; +} + +/** Base64 encode a string or numeric array to string. + * Input strings will be first re-encoded in UTF-8. + * + * @param src String or array to encode. + * @param dst Output string prefix, default is empty. + * @param srcPos Initial offset to source data, default is 0. + * @param srcEnd Source data end offset, default is its length. + * + * @return Encoded string. */ + +export function encode64( + src: string | ArrayType, + dst = '', + srcPos = 0, + srcEnd?: number +) { + let a: number, b: number, c: number; + + if(typeof src == 'string') src = encodeUTF8(src); + if(srcEnd === void 0) srcEnd = src.length; + + while(srcPos < srcEnd) { + a = src[srcPos++]; + b = src[srcPos++]; + c = src[srcPos++]; + + dst += ( + toBase64[a >> 2] + + toBase64[((a & 0b11) << 4) | (b >> 4)] + + // Insert padding if input ran out: + // (~(~n + n) & 64) converts undefined to 64, everything else to 0. + // Note: undefined == NaN == 0 in bitwise operations. + toBase64[(~(~b + b) & 64) | ((b & 0b1111) << 2) | (c >> 6)] + + toBase64[(~(~c + c) & 64) | (c & 0b111111)] + ); + } + + return dst; +} + +export function decodeVLQ(src: string): number[]; +export function decodeVLQ( + src: string, + dst?: number[], + dstPos?: number, + srcPos?: number, + srcEnd?: number +): number; + +/** Decode a string containing Base64 variable-length quantities, + * as seen in source maps. + * + * @param src String to decode. + * @param dst Destination array for storing the result. + * @param dstPos Initial offset to destination, default is 0. + * @param srcPos Initial offset to source data, default is 0. + * @param srcEnd Source data end offset, default is its length. + * + * @return End offset past data stored if a destination was given, + * otherwise a numeric array containing the encoded result. */ + +export function decodeVLQ( + src: string, + dst?: number[], + dstPos = 0, + srcPos = 0, + srcEnd = src.length +) { + let result: number[] | undefined; + let shift = 0; + let code: number; + let sign: number; + let num = 0; + + dst = dst || (result = []); + + while(srcPos < srcEnd) { + code = fromBase64[src.charCodeAt(srcPos++)]; + num += (code & 31) << shift; + + if(code & 32) { + shift += 5; + } else { + sign = num & 1; + dst[dstPos++] = ((num >>> 1) ^ -sign) + sign; + + shift = 0; + num = 0; + } + } + + return result || dstPos; +} + +// TODO +export function encodeVLQ( + src: number[], + dst = '', + srcPos = 0, + srcEnd = src.length +) { + while(srcPos < srcEnd) { + ++srcPos; + } + + return dst; +} + +export class Hasher32 { + + constructor(private tbl: number[]) {} + + append( + src: string | ArrayType, + srcPos = 0, + srcEnd?: number + ) { + let { tbl, crc } = this; + + if(typeof src == 'string') src = encodeUTF8(src); + if(srcEnd === void 0) srcEnd = src.length; + + while(srcPos < srcEnd) { + crc = (crc >>> 8) ^ tbl[(crc & 0xff) ^ src[srcPos++]]; + } + + this.crc = crc; + + return ~crc >>> 0; + } + + crc = ~0; + +} + +/** 32-bit Cyclic Redundancy Check. */ + +export class CRC32 { + + /** @param poly Reversed generator polynomial, default edb88320 (Ethernet, GZIP, PNG). + * Other good choices are 82f63b78 (Castagnoli) used in Btrfs and eb31d82e (Koopman). */ + + constructor(public poly = 0xedb88320) { + for(let n = 0; n < 256; ++n) { + let crc = n; + let b = 8; + + while(b--) { + crc = ((crc >>> 1) ^ (-(crc & 1) & poly)) >>> 0; + } + + this.tbl[n] = crc; + } + } + + create() { + return new Hasher32(this.tbl); + } + + tbl: number[] = []; + +}