From 1f0dfff02c2fa62f14ede147555a51a6965d8b50 Mon Sep 17 00:00:00 2001 From: slvrtrn Date: Sun, 11 Feb 2024 16:10:10 +0100 Subject: [PATCH 01/14] RowBinary experiments --- packages/client-common/src/client.ts | 2 +- .../src/data_formatter/formatter.ts | 1 + .../src/data_formatter/row_binary.ts | 57 ++++++++++++ .../node_stream_row_binary.test.ts | 48 ++++++++++ packages/client-node/src/client.ts | 10 ++- packages/client-node/src/result_set.ts | 88 +++++++++++++++++++ 6 files changed, 202 insertions(+), 4 deletions(-) create mode 100644 packages/client-common/src/data_formatter/row_binary.ts create mode 100644 packages/client-node/__tests__/integration/node_stream_row_binary.test.ts diff --git a/packages/client-common/src/client.ts b/packages/client-common/src/client.ts index ecfa3f90..9bb0406f 100644 --- a/packages/client-common/src/client.ts +++ b/packages/client-common/src/client.ts @@ -24,7 +24,7 @@ export type MakeConnection = ( export type MakeResultSet = ( stream: Stream, format: DataFormat, - session_id: string + query_id: string ) => BaseResultSet export interface ValuesEncoder { diff --git a/packages/client-common/src/data_formatter/formatter.ts b/packages/client-common/src/data_formatter/formatter.ts index 259e9b00..ca96d1f5 100644 --- a/packages/client-common/src/data_formatter/formatter.ts +++ b/packages/client-common/src/data_formatter/formatter.ts @@ -32,6 +32,7 @@ const supportedRawFormats = [ 'CustomSeparatedWithNames', 'CustomSeparatedWithNamesAndTypes', 'Parquet', + 'RowBinaryWithNamesAndTypes', ] as const export type JSONDataFormat = (typeof supportedJSONFormats)[number] diff --git a/packages/client-common/src/data_formatter/row_binary.ts b/packages/client-common/src/data_formatter/row_binary.ts new file mode 100644 index 00000000..29d95603 --- /dev/null +++ b/packages/client-common/src/data_formatter/row_binary.ts @@ -0,0 +1,57 @@ +type DecodeResult = [T, number] + +export class RowBinaryDecoder { + static columns( + src: Uint8Array + ): DecodeResult<{ names: string[]; types: string[] }> { + const res = readLEB128(src, 0) + const numColumns = res[0] + let nextLoc = res[1] + console.log(`Total columns: ${numColumns}`) + const names = new Array(numColumns) + const types = new Array(numColumns) + for (let i = 0; i < numColumns; i++) { + const res = readLEB128String(src, nextLoc) + nextLoc = res[1] + names[i] = res[0] + } + for (let i = 0; i < numColumns; i++) { + const res = readLEB128String(src, nextLoc) + nextLoc = res[1] + types[i] = res[0] + } + return [{ names, types }, nextLoc] + } + static int8(src: Uint8Array, loc: number): DecodeResult { + const x = src[loc] + console.log(`Got number: ${x}`) + return x < 128 ? [x, loc + 1] : [x - 256, loc + 1] + } + static string(src: Uint8Array, loc: number): DecodeResult { + return readLEB128String(src, loc) + } +} + +function readLEB128(src: Uint8Array, loc: number): DecodeResult { + let result = 0 + let shift = 0 + let ix = 0 + // eslint-disable-next-line no-constant-condition + while (true) { + const byte = src[loc + ix] + ix++ + result |= (byte & 0x7f) << shift + shift += 7 + if ((0x80 & byte) === 0) { + if (shift < 32 && (byte & 0x40) !== 0) { + return [result | (~0 << shift), loc + ix] + } + return [result, loc + ix] + } + } +} + +function readLEB128String(src: Uint8Array, loc: number): DecodeResult { + const [length, nextLoc] = readLEB128(src, loc) + return [src.slice(nextLoc, nextLoc + length).toString(), nextLoc + length] +} diff --git a/packages/client-node/__tests__/integration/node_stream_row_binary.test.ts b/packages/client-node/__tests__/integration/node_stream_row_binary.test.ts new file mode 100644 index 00000000..7a7060ae --- /dev/null +++ b/packages/client-node/__tests__/integration/node_stream_row_binary.test.ts @@ -0,0 +1,48 @@ +import type { ClickHouseClient } from '@clickhouse/client-common' +import { createTestClient, guid } from '@test/utils' +import type Stream from 'stream' + +fdescribe('[Node.js] stream RowBinary', () => { + let client: ClickHouseClient + let tableName: string + + beforeEach(async () => { + tableName = `insert_stream_row_binary_${guid()}` + client = createTestClient() + await client.command({ + query: `CREATE TABLE ${tableName} (i Int8, s String) ENGINE MergeTree ORDER BY (i)`, + clickhouse_settings: { + wait_end_of_query: 1, + }, + }) + console.log(`Created table ${tableName}`) + await client.insert({ + table: tableName, + values: [ + { i: 42, s: 'foo' }, + { i: -5, s: 'bar' }, + ], + format: 'JSONEachRow', + }) + }) + afterEach(async () => { + await client.close() + }) + + it('should stream stuff', async () => { + const rs = await client.query({ + query: `SELECT * FROM ${tableName} ORDER BY i DESC`, + format: 'RowBinaryWithNamesAndTypes', + }) + const values: unknown[][] = [] + for await (const rows of rs.stream()) { + rows.forEach((row: unknown[]) => { + values.push(row) + }) + } + expect(values).toEqual([ + [42, 'foo'], + [-5, 'bar'], + ]) + }) +}) diff --git a/packages/client-node/src/client.ts b/packages/client-node/src/client.ts index 7d46112d..8ccca7b9 100644 --- a/packages/client-node/src/client.ts +++ b/packages/client-node/src/client.ts @@ -8,7 +8,7 @@ import { ClickHouseClient } from '@clickhouse/client-common' import type Stream from 'stream' import type { NodeConnectionParams, TLSParams } from './connection' import { NodeHttpConnection, NodeHttpsConnection } from './connection' -import { ResultSet } from './result_set' +import { ResultSet, RowBinaryResultSet } from './result_set' import { NodeValuesEncoder } from './utils' export type NodeClickHouseClientConfigOptions = @@ -82,8 +82,12 @@ export function createClient( make_result_set: ( stream: Stream.Readable, format: DataFormat, - session_id: string - ) => new ResultSet(stream, format, session_id), + query_id: string + ) => { + return format === 'RowBinaryWithNamesAndTypes' + ? new RowBinaryResultSet(stream, format, query_id) + : new ResultSet(stream, format, query_id) + }, values_encoder: new NodeValuesEncoder(), close_stream: async (stream) => { stream.destroy() diff --git a/packages/client-node/src/result_set.ts b/packages/client-node/src/result_set.ts index 01b317ef..81faab5f 100644 --- a/packages/client-node/src/result_set.ts +++ b/packages/client-node/src/result_set.ts @@ -1,5 +1,6 @@ import type { BaseResultSet, DataFormat, Row } from '@clickhouse/client-common' import { decode, validateStreamFormat } from '@clickhouse/client-common' +import { RowBinaryDecoder } from '@clickhouse/client-common/src/data_formatter/row_binary' import { Buffer } from 'buffer' import type { TransformCallback } from 'stream' import Stream, { Transform } from 'stream' @@ -108,4 +109,91 @@ export class ResultSet implements BaseResultSet { } } +export class RowBinaryResultSet implements BaseResultSet { + constructor( + private _stream: Stream.Readable, + private readonly format: DataFormat, + public readonly query_id: string + ) {} + + async text(): Promise { + throw new Error( + `Can't call 'text()' on RowBinary result set; please use 'stream' instead` + ) + } + + async json(): Promise { + throw new Error( + `Can't call 'json()' on RowBinary result set; please use 'stream' instead` + ) + } + + stream(): Stream.Readable { + // If the underlying stream has already ended by calling `text` or `json`, + // Stream.pipeline will create a new empty stream + // but without "readableEnded" flag set to true + if (this._stream.readableEnded) { + throw Error(streamAlreadyConsumedMessage) + } + if (this.format !== 'RowBinaryWithNamesAndTypes') { + throw new Error( + `Can't use RowBinaryResultSet if the format is not RowBinary` + ) + } + + const toRows = new Transform({ + transform( + chunk: Buffer, + _encoding: BufferEncoding, + callback: TransformCallback + ) { + const src = chunk.subarray() + const rows: unknown[][] = [] + let res: [unknown, number] + const colDataRes = RowBinaryDecoder.columns(src) + const { names, types } = colDataRes[0] + let loc = colDataRes[1] + console.log(colDataRes[0]) + console.log(`Next loc: ${loc}`) + while (loc < src.length) { + const values = new Array(names.length) + types.forEach((t, i) => { + switch (t) { + case 'Int8': + res = RowBinaryDecoder.int8(src, loc) + console.log(`Int8: ${res[0]}, next loc: ${res[1]}`) + values[i] = res[0] + loc = res[1] + break + case 'String': + res = RowBinaryDecoder.string(src, loc) + console.log(`String: ${res[0]}, next loc: ${res[1]}`) + values[i] = res[0] + loc = res[1] + break + default: + throw new Error(`Unknown type ${t}`) + } + }) + rows.push(values) + } + this.push(rows) + callback() + }, + autoDestroy: true, + objectMode: true, + }) + + return Stream.pipeline(this._stream, toRows, function pipelineCb(err) { + if (err) { + console.error(err) + } + }) + } + + close() { + this._stream.destroy() + } +} + const streamAlreadyConsumedMessage = 'Stream has been already consumed' From 22a1c34bd707a64dd62cdc302f1c08a78e319529 Mon Sep 17 00:00:00 2001 From: slvrtrn Date: Sun, 3 Mar 2024 04:42:58 +0100 Subject: [PATCH 02/14] More experiments. --- benchmarks/leaks/row_binary.ts | 27 ++ benchmarks/tsconfig.json | 1 + .../__tests__/unit/row_binary_decoder.test.ts | 145 ++++++++++ packages/client-common/src/client.ts | 6 +- packages/client-common/src/config.ts | 2 +- .../src/data_formatter/formatter.ts | 2 +- .../client-common/src/data_formatter/index.ts | 1 + .../src/data_formatter/row_binary.ts | 201 ++++++++++++-- .../node_stream_row_binary.test.ts | 259 ++++++++++++++++-- packages/client-node/src/config.ts | 12 +- packages/client-node/src/result_set.ts | 153 ++++++++--- 11 files changed, 725 insertions(+), 84 deletions(-) create mode 100644 benchmarks/leaks/row_binary.ts create mode 100644 packages/client-common/__tests__/unit/row_binary_decoder.test.ts diff --git a/benchmarks/leaks/row_binary.ts b/benchmarks/leaks/row_binary.ts new file mode 100644 index 00000000..f3798c03 --- /dev/null +++ b/benchmarks/leaks/row_binary.ts @@ -0,0 +1,27 @@ +import { createClient } from '@clickhouse/client' +import { RowBinaryResultSet } from '@clickhouse/client/result_set' + +void (async () => { + const client = createClient({ + url: 'http://localhost:8123', + }) + async function streamRowBinary() { + const start = +new Date() + const rs = await client.query({ + query: `SELECT * FROM random ORDER BY id ASC LIMIT 10000`, + format: 'RowBinary', + }) + const values = await (rs as RowBinaryResultSet).get() + // console.log(values) + // console.log( + // `RowBinary elapsed: ${+new Date() - start} ms, length: ${values.length}` + // ) + return values.length + } + + for (let i = 0; i < 1000; i++) { + await streamRowBinary() + } + + process.exit(0) +})() diff --git a/benchmarks/tsconfig.json b/benchmarks/tsconfig.json index cc899888..4bd870c0 100644 --- a/benchmarks/tsconfig.json +++ b/benchmarks/tsconfig.json @@ -7,6 +7,7 @@ "outDir": "dist", "baseUrl": "./", "paths": { + "@clickhouse/client-common": ["../packages/client-common/src/index.ts"], "@clickhouse/client": ["../packages/client-node/src/index.ts"], "@clickhouse/client/*": ["../packages/client-node/src/*"] } diff --git a/packages/client-common/__tests__/unit/row_binary_decoder.test.ts b/packages/client-common/__tests__/unit/row_binary_decoder.test.ts new file mode 100644 index 00000000..1f1e9577 --- /dev/null +++ b/packages/client-common/__tests__/unit/row_binary_decoder.test.ts @@ -0,0 +1,145 @@ +import { + readBytesAsInt, + readBytesAsSignedBigInt, + readBytesAsUnsignedBigInt, +} from '../../src/data_formatter' + +describe('RowBinary decoder', () => { + describe('Unsigned integers', () => { + it('should decode UInt16', async () => { + const args: [Uint8Array, number][] = [ + [new Uint8Array([0x00, 0x00]), 0], + [new Uint8Array([0x01, 0x00]), 1], + [new Uint8Array([0x02, 0x00]), 2], + [new Uint8Array([0x10, 0x00]), 16], + [new Uint8Array([0xff, 0x00]), 255], + [new Uint8Array([0xff, 0xff]), 65_535], + [new Uint8Array([0x00, 0x80]), 32_768], + ] + args.forEach(([src, expected]) => { + expect(readBytesAsInt(src, 0, 2, false)) + .withContext(ctx(src, expected)) + .toBe(expected) + }) + }) + it('should decode UInt32', async () => { + const args: [Uint8Array, number][] = [ + [new Uint8Array([0x00, 0x00, 0x00, 0x00]), 0], + [new Uint8Array([0x01, 0x00, 0x00, 0x00]), 1], + [new Uint8Array([0x02, 0x00, 0x00, 0x00]), 2], + [new Uint8Array([0x10, 0x00, 0x00, 0x00]), 16], + [new Uint8Array([0xff, 0x00, 0x00, 0x00]), 255], + [new Uint8Array([0xff, 0xff, 0x00, 0x00]), 65_535], + [new Uint8Array([0xff, 0xff, 0xff, 0x00]), 16_777_215], + [new Uint8Array([0xff, 0xff, 0xff, 0x7f]), 2_147_483_647], + [new Uint8Array([0xff, 0xff, 0xff, 0xff]), 4_294_967_295], + [new Uint8Array([0x00, 0x00, 0x00, 0x80]), 2_147_483_648], + ] + args.forEach(([src, expected]) => { + expect(readBytesAsInt(src, 0, 4, false)) + .withContext(ctx(src, expected)) + .toBe(expected) + }) + }) + }) + + describe('Signed integers', () => { + it('should decode Int16', async () => { + const args: [Uint8Array, number][] = [ + [new Uint8Array([0x00, 0x00]), 0], + [new Uint8Array([0x01, 0x00]), 1], + [new Uint8Array([0x02, 0x00]), 2], + [new Uint8Array([0x10, 0x00]), 16], + [new Uint8Array([0xff, 0x00]), 255], + [new Uint8Array([0xff, 0xff]), -1], + [new Uint8Array([0x00, 0x80]), -32_768], + ] + args.forEach(([src, expected]) => { + expect(readBytesAsInt(src, 0, 2, true)) + .withContext(ctx(src, expected)) + .toBe(expected) + }) + }) + it('should decode Int32', async () => { + const args: [Uint8Array, number][] = [ + [new Uint8Array([0x00, 0x00, 0x00, 0x00]), 0], + [new Uint8Array([0x01, 0x00, 0x00, 0x00]), 1], + [new Uint8Array([0x02, 0x00, 0x00, 0x00]), 2], + [new Uint8Array([0x10, 0x00, 0x00, 0x00]), 16], + [new Uint8Array([0xff, 0x00, 0x00, 0x00]), 255], + [new Uint8Array([0xff, 0xff, 0x00, 0x00]), 65_535], + [new Uint8Array([0xff, 0xff, 0xff, 0x00]), 16_777_215], + [new Uint8Array([0xff, 0xff, 0xff, 0x7f]), 2_147_483_647], + [new Uint8Array([0xff, 0xff, 0xff, 0xff]), -1], + [new Uint8Array([0x00, 0x00, 0x00, 0x80]), -2_147_483_648], + ] + args.forEach(([src, expected]) => { + expect(readBytesAsInt(src, 0, 4, true)) + .withContext(ctx(src, expected)) + .toBe(expected) + }) + }) + }) + + describe('BigInt', () => { + it('should decode UInt64', async () => { + const args: [Uint8Array, BigInt][] = [ + [new Uint8Array([0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]), 0n], + [new Uint8Array([0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]), 1n], + [new Uint8Array([0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]), 2n], + [ + new Uint8Array([0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]), + 255n, + ], + [ + new Uint8Array([0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]), + 65_535n, + ], + [ + new Uint8Array([0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00]), + 16_777_215n, + ], + [ + new Uint8Array([0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00]), + 4_294_967_295n, + ], + [ + new Uint8Array([0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00]), + 1099511627775n, + ], + [ + new Uint8Array([0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00]), + 281474976710655n, + ], + [ + new Uint8Array([0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00]), + 72057594037927935n, + ], + [ + new Uint8Array([0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff]), + 18446744073709551615n, + ], + ] + + args.forEach(([src, expected]) => { + expect(readBytesAsUnsignedBigInt(src, 0, 8)) + .withContext(ctx(src, expected)) + .toBe(expected) + }) + }) + + it('should decode Int64 ', async () => { + expect( + readBytesAsSignedBigInt( + new Uint8Array([0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80]), + 0, + 8 + ) + ).toEqual(1n) + }) + }) + + function ctx(src: Uint8Array, expected: number | BigInt) { + return `Expected ${src.toString()} to be decoded as ${expected}` + } +}) diff --git a/packages/client-common/src/client.ts b/packages/client-common/src/client.ts index ea2d90a0..8b033938 100644 --- a/packages/client-common/src/client.ts +++ b/packages/client-common/src/client.ts @@ -230,7 +230,11 @@ export class ClickHouseClient { function formatQuery(query: string, format: DataFormat): string { query = query.trim() query = removeTrailingSemi(query) - return query + ' \nFORMAT ' + format + return ( + query + + ' \nFORMAT ' + + (format === 'RowBinary' ? 'RowBinaryWithNamesAndTypes' : format) + ) } function removeTrailingSemi(query: string) { diff --git a/packages/client-common/src/config.ts b/packages/client-common/src/config.ts index cdff26f9..62dbbaec 100644 --- a/packages/client-common/src/config.ts +++ b/packages/client-common/src/config.ts @@ -91,7 +91,7 @@ export type MakeConnection< export type MakeResultSet = ( stream: Stream, format: DataFormat, - session_id: string + query_id: string ) => BaseResultSet export interface ValuesEncoder { diff --git a/packages/client-common/src/data_formatter/formatter.ts b/packages/client-common/src/data_formatter/formatter.ts index ca96d1f5..4a472065 100644 --- a/packages/client-common/src/data_formatter/formatter.ts +++ b/packages/client-common/src/data_formatter/formatter.ts @@ -32,7 +32,7 @@ const supportedRawFormats = [ 'CustomSeparatedWithNames', 'CustomSeparatedWithNamesAndTypes', 'Parquet', - 'RowBinaryWithNamesAndTypes', + 'RowBinary', ] as const export type JSONDataFormat = (typeof supportedJSONFormats)[number] diff --git a/packages/client-common/src/data_formatter/index.ts b/packages/client-common/src/data_formatter/index.ts index 8f880b33..a4f5ce41 100644 --- a/packages/client-common/src/data_formatter/index.ts +++ b/packages/client-common/src/data_formatter/index.ts @@ -1,3 +1,4 @@ export * from './formatter' export { formatQueryParams } from './format_query_params' export { formatQuerySettings } from './format_query_settings' +export * from './row_binary' diff --git a/packages/client-common/src/data_formatter/row_binary.ts b/packages/client-common/src/data_formatter/row_binary.ts index 29d95603..d5abe28c 100644 --- a/packages/client-common/src/data_formatter/row_binary.ts +++ b/packages/client-common/src/data_formatter/row_binary.ts @@ -1,38 +1,139 @@ -type DecodeResult = [T, number] +export type ColumnType = + | 'Bool' + | 'UInt8' + | 'Int8' + | 'UInt16' + | 'Int16' + | 'UInt32' + | 'Int32' + // | 'UInt64' + // | 'Int64' + // | 'UInt128' + // | 'Int128' + // | 'UInt256' + // | 'Int256' + | 'String' -export class RowBinaryDecoder { - static columns( - src: Uint8Array - ): DecodeResult<{ names: string[]; types: string[] }> { +export type TypeDecoder = ( + src: Uint8Array, + loc: number +) => DecodeResult | null + +export type DecodeResult = [T, number] +export type DecodeError = { error: string } +export type DecodedColumns = DecodeResult<{ + names: string[] + types: ColumnType[] + decoders: TypeDecoder[] +}> + +export const RowBinaryTypesDecoder = { + bool: (src: Uint8Array, loc: number): DecodeResult | null => { + //// [1,2,3,4] - len 4; max loc 3 + if (src.length < loc + 1) return null + const x = src[loc] === 1 + return [x, loc + 1] + }, + uint8: (src: Uint8Array, loc: number): DecodeResult | null => { + if (src.length < loc + 1) return null + const x = src[loc] + return [x, loc + 1] + }, + int8: (src: Uint8Array, loc: number): DecodeResult | null => { + if (src.length < loc + 1) return null + const x = src[loc] + return [x, loc + 1] + }, + uint16: (src: Uint8Array, loc: number): DecodeResult | null => { + if (src.length < loc + 2) return null + const x = readBytesAsInt(src, loc, 2, false) + return [x, loc + 2] + }, + int16: (src: Uint8Array, loc: number): DecodeResult | null => { + if (src.length < loc + 2) return null + const x = readBytesAsInt(src, loc, 2, true) + return [x, loc + 2] + }, + uint32: (src: Uint8Array, loc: number): DecodeResult | null => { + if (src.length < loc + 4) return null + const x = readBytesAsInt(src, loc, 4, false) + return [x, loc + 4] + }, + int32: (src: Uint8Array, loc: number): DecodeResult | null => { + if (src.length < loc + 4) return null + const x = readBytesAsInt(src, loc, 4, true) + return [x, loc + 4] + }, + // uint64: (src: Uint8Array, loc: number): DecodeResult => { + // return [readBytesAsUnsignedBigInt(src, loc, 8), loc + 8] + // }, + // int64: (src: Uint8Array, loc: number): DecodeResult => { + // return [readBytesAsUnsignedBigInt(src, loc, 8), loc + 8] + // }, + // uint128: (src: Uint8Array, loc: number): DecodeResult => { + // return [readBytesAsUnsignedBigInt(src, loc, 16), loc + 16] + // }, + // int128: (src: Uint8Array, loc: number): DecodeResult => { + // return [readBytesAsUnsignedBigInt(src, loc, 16), loc + 16] + // }, + // uint256: (src: Uint8Array, loc: number): DecodeResult => { + // return [readBytesAsUnsignedBigInt(src, loc, 32), loc + 32] + // }, + // int256: (src: Uint8Array, loc: number): DecodeResult => { + // return [readBytesAsUnsignedBigInt(src, loc, 32), loc + 32] + // }, + string: (src: Uint8Array, loc: number): DecodeResult | null => { + return readLEB128String(src, loc) + }, +} + +export const RowBinaryColumnTypeToDecoder: { + [key in ColumnType]: TypeDecoder +} = { + Bool: RowBinaryTypesDecoder.bool, + UInt8: RowBinaryTypesDecoder.uint8, + Int8: RowBinaryTypesDecoder.int8, + UInt16: RowBinaryTypesDecoder.uint16, + Int16: RowBinaryTypesDecoder.int16, + UInt32: RowBinaryTypesDecoder.uint32, + Int32: RowBinaryTypesDecoder.int32, + // UInt64: RowBinaryTypesDecoder.uint64, + // Int64: RowBinaryTypesDecoder.int64, + // UInt128: RowBinaryTypesDecoder.uint128, + // Int128: RowBinaryTypesDecoder.int128, + // UInt256: RowBinaryTypesDecoder.uint256, + // Int256: RowBinaryTypesDecoder.int256, + String: RowBinaryTypesDecoder.string, +} + +export const RowBinaryColumns = { + decode: (src: Uint8Array): DecodedColumns | DecodeError => { const res = readLEB128(src, 0) const numColumns = res[0] let nextLoc = res[1] console.log(`Total columns: ${numColumns}`) const names = new Array(numColumns) - const types = new Array(numColumns) + const types = new Array(numColumns) + const decoders: TypeDecoder[] = new Array(numColumns) for (let i = 0; i < numColumns; i++) { - const res = readLEB128String(src, nextLoc) + const res = readLEB128String(src, nextLoc)! // FIXME non-null assertion nextLoc = res[1] names[i] = res[0] } for (let i = 0; i < numColumns; i++) { - const res = readLEB128String(src, nextLoc) + const res = readLEB128String(src, nextLoc)! // FIXME non-null assertion nextLoc = res[1] - types[i] = res[0] + decoders[i] = RowBinaryColumnTypeToDecoder[res[0] as ColumnType] + if (decoders[i] === undefined) { + return { error: `Unknown column type ${res[0]}` } + } + types[i] = res[0] as ColumnType } - return [{ names, types }, nextLoc] - } - static int8(src: Uint8Array, loc: number): DecodeResult { - const x = src[loc] - console.log(`Got number: ${x}`) - return x < 128 ? [x, loc + 1] : [x - 256, loc + 1] - } - static string(src: Uint8Array, loc: number): DecodeResult { - return readLEB128String(src, loc) - } + return [{ names, types, decoders }, nextLoc] + }, } -function readLEB128(src: Uint8Array, loc: number): DecodeResult { +export function readLEB128(src: Uint8Array, loc: number): DecodeResult { let result = 0 let shift = 0 let ix = 0 @@ -51,7 +152,65 @@ function readLEB128(src: Uint8Array, loc: number): DecodeResult { } } -function readLEB128String(src: Uint8Array, loc: number): DecodeResult { +export function readLEB128String( + src: Uint8Array, + loc: number +): DecodeResult | null { + // console.log(`Decoding string at loc ${loc}, src len: ${src.length}`) + if (src.length < loc + 1) return null const [length, nextLoc] = readLEB128(src, loc) + // console.log(`Got next loc for string, next loc ${nextLoc}, len: ${length}, src len: ${src.length}`) + if (src.length < nextLoc + length) return null return [src.slice(nextLoc, nextLoc + length).toString(), nextLoc + length] } + +export function readBytesAsInt( + src: Uint8Array, + loc: number, + bytes: 2 | 4, // (U)Int16 | (U)Int32 + signed: boolean +): number { + let result = 0 + for (let i = 0; i < bytes; i++) { + result |= src[loc + i] << (8 * i) + } + result = result >>> 0 + const max = 2 ** (bytes * 8) + if (signed && result > max / 2 - 1) { + return result - max + } + return result +} + +export function readBytesAsUnsignedBigInt( + src: Uint8Array, + loc: number, + bytes: 8 | 16 | 32 // (U)Int64 | (U)Int128 | (U)Int256 +): BigInt { + let result = 0n + for (let i = bytes - 1; i >= 0; i--) { + // console.log(src[loc + i]) + result = (result << 8n) + BigInt(src[loc + i]) + } + console.log( + `(BigInt) Decoded ${bytes} bytes ${src + .slice(loc, loc + bytes) + .toString()} into ${result}` + ) + return result +} + +export function readBytesAsSignedBigInt( + src: Uint8Array, + loc: number, + bytes: 8 | 16 | 32 // (U)Int64 | (U)Int128 | (U)Int256 +): BigInt { + let result = 0n + for (let i = bytes / 4 - 1; i >= 0; i--) { + const dec = readBytesAsInt(src, loc + i * 4, 4, true) + console.log(`Decoded: ${dec}`) + result += BigInt(dec) + } + // console.log(`(BigInt) Decoded ${bytes} bytes into ${result}`) + return result +} diff --git a/packages/client-node/__tests__/integration/node_stream_row_binary.test.ts b/packages/client-node/__tests__/integration/node_stream_row_binary.test.ts index 7a7060ae..4c3316b2 100644 --- a/packages/client-node/__tests__/integration/node_stream_row_binary.test.ts +++ b/packages/client-node/__tests__/integration/node_stream_row_binary.test.ts @@ -1,38 +1,129 @@ import type { ClickHouseClient } from '@clickhouse/client-common' import { createTestClient, guid } from '@test/utils' import type Stream from 'stream' +import { RowBinaryResultSet } from '../../src/result_set' -fdescribe('[Node.js] stream RowBinary', () => { +describe('[Node.js] stream RowBinary', () => { let client: ClickHouseClient let tableName: string beforeEach(async () => { - tableName = `insert_stream_row_binary_${guid()}` client = createTestClient() - await client.command({ - query: `CREATE TABLE ${tableName} (i Int8, s String) ENGINE MergeTree ORDER BY (i)`, - clickhouse_settings: { - wait_end_of_query: 1, - }, - }) - console.log(`Created table ${tableName}`) - await client.insert({ - table: tableName, - values: [ - { i: 42, s: 'foo' }, - { i: -5, s: 'bar' }, - ], - format: 'JSONEachRow', - }) }) afterEach(async () => { await client.close() }) - it('should stream stuff', async () => { + it('should stream booleans and integers up to 32 bits', async () => { + const columns = [ + ['b', 'Bool'], + ['i8', 'Int8'], + ['i16', 'Int16'], + ['i32', 'Int32'], + ['u8', 'UInt8'], + ['u16', 'UInt16'], + ['u32', 'UInt32'], + ] + const values = [ + [true, 127, 32767, 2147483647, 255, 65535, 4294967295], + [false, -128, -32768, -2147483648, 120, 1234, 51234], + ] + await createTableWithData(columns, values, 'int') + await selectAndAssert(values) + }) + + it('should stream 64/128/256-bit integers', async () => { + const columns = [ + ['i64', 'Int64'], + ['i128', 'Int128'], + ['i256', 'Int256'], + // ['u64', 'UInt64'], + // ['u128', 'UInt128'], + // ['u256', 'UInt256'], + ] + const insertValues = [ + [ + '9223372036854775807', + '170141183460469231731687303715884105727', + '57896044618658097711785492504343953926634992332820282019728792003956564819967', + // '18446744073709551615', + // '340282366920938463463374607431768211455', + // '115792089237316195423570985008687907853269984665640564039457584007913129639935', + ], + [ + '-9223372036854775808', + '-170141183460469231731687303715884105728', + '-57896044618658097711785492504343953926634992332820282019728792003956564819968', + // '120', + // '1234', + // '51234', + ], + ] + const assertValues = [ + [ + BigInt('9223372036854775807'), + BigInt('170141183460469231731687303715884105727'), + BigInt( + '57896044618658097711785492504343953926634992332820282019728792003956564819967' + ), + // BigInt('18446744073709551615'), + // BigInt('340282366920938463463374607431768211455'), + // BigInt( + // '115792089237316195423570985008687907853269984665640564039457584007913129639935' + // ), + ], + [ + BigInt('-9223372036854775808'), + BigInt('-170141183460469231731687303715884105728'), + BigInt( + '-57896044618658097711785492504343953926634992332820282019728792003956564819968' + ), + // BigInt('120'), + // BigInt('1234'), + // BigInt('51234'), + ], + ] + await createTableWithData(columns, insertValues, 'bigint') + await selectAndAssert(assertValues) + }) + + fit('should stream', async () => { + await streamJSON() + await streamRowBinary() + expect(1).toEqual(1) + }) + + async function streamJSON() { + const start = +new Date() const rs = await client.query({ - query: `SELECT * FROM ${tableName} ORDER BY i DESC`, - format: 'RowBinaryWithNamesAndTypes', + query: `SELECT * FROM random ORDER BY id ASC LIMIT 100000`, + format: 'JSONEachRow', + }) + const values = await rs.json() + console.log( + `JSON elapsed: ${+new Date() - start} ms, length: ${values.length}` + ) + return values.length + } + + async function streamRowBinary() { + const start = +new Date() + const rs = await client.query({ + query: `SELECT * FROM random ORDER BY id ASC LIMIT 100000`, + format: 'RowBinary', + }) + const values = await (rs as RowBinaryResultSet).get() + // console.log(values) + console.log( + `RowBinary elapsed: ${+new Date() - start} ms, length: ${values.length}` + ) + return values.length + } + + async function selectAndAssert(assertValues: unknown[][]) { + const rs = await client.query({ + query: `SELECT * EXCEPT id FROM ${tableName} ORDER BY id ASC`, + format: 'RowBinary', }) const values: unknown[][] = [] for await (const rows of rs.stream()) { @@ -40,9 +131,127 @@ fdescribe('[Node.js] stream RowBinary', () => { values.push(row) }) } - expect(values).toEqual([ - [42, 'foo'], - [-5, 'bar'], - ]) - }) + expect(values).toEqual(assertValues) + } + + async function createTableWithData( + colNameToType: string[][], + insertValues: unknown[][], + testName: string + ) { + tableName = `insert_stream_row_binary_${testName}_${guid()}` + const cols = colNameToType + .map(([name, type]) => `${name} ${type}`) + .join(', ') + await client.command({ + query: `CREATE TABLE ${tableName} (id UInt32, ${cols}) ENGINE MergeTree ORDER BY (id)`, + clickhouse_settings: { + wait_end_of_query: 1, + }, + }) + console.log(`Created table ${tableName}`) + let id = 1 + await client.insert({ + table: tableName, + values: insertValues.map((value) => [id++, ...value]), + format: 'JSONCompactEachRow', + }) + } }) + +const _types = [ + ['b', 'Boolean'], + ['i1', 'Int8'], + ['i2', 'Int16'], + ['i3', 'Int32'], + ['i4', 'Int64'], + // ['i5', 'Int128'], + // ['i6', 'Int256'], + ['u1', 'UInt8'], + ['u2', 'UInt16'], + ['u3', 'UInt32'], + ['u4', 'UInt64'], + // ['u5', 'UInt128'], + // ['u6', 'UInt256'], + ['s', 'String'], +] + .map(([name, type]) => `${name} ${type}`) + .join(', ') + +const _values = [ + { + id: 1, + b: true, + i1: 127, + i2: 32767, + i3: 2147483647, + i4: '9223372036854775807', + // i5: '170141183460469231731687303715884105727', + // i6: '57896044618658097711785492504343953926634992332820282019728792003956564819967', + u1: 255, + u2: 65535, + u3: 4294967295, + u4: '18446744073709551615', + // u5: '340282366920938463463374607431768211455', + // u6: '115792089237316195423570985008687907853269984665640564039457584007913129639935', + s: 'foo', + }, + { + id: 2, + b: false, + i1: -128, + i2: -32768, + i3: -2147483648, + i4: '-9223372036854775808', + // i5: '-170141183460469231731687303715884105728', + // i6: '-57896044618658097711785492504343953926634992332820282019728792003956564819968', + u1: 120, + u2: 1234, + u3: 51234, + u4: '421342', + // u5: '15324355', + // u6: '41345135123432', + s: 'bar', + }, +] + +const _assertValues = [ + [ + true, + 127, + 32767, + 2147483647, + BigInt('9223372036854775807'), + // BigInt('170141183460469231731687303715884105727'), + // BigInt( + // '57896044618658097711785492504343953926634992332820282019728792003956564819967' + // ), + 255, + 65535, + 4294967295, + BigInt('18446744073709551615'), + // BigInt('340282366920938463463374607431768211455'), + // BigInt( + // '115792089237316195423570985008687907853269984665640564039457584007913129639935' + // ), + 'foo', + ], + [ + false, + -128, + -32768, + -2147483648, + BigInt('-9223372036854775808'), + // BigInt('-170141183460469231731687303715884105728'), + // BigInt( + // '-57896044618658097711785492504343953926634992332820282019728792003956564819968' + // ), + 120, + 1234, + 51234, + BigInt('421342'), + // BigInt('15324355'), + // BigInt('41345135123432'), + 'bar', + ], +] diff --git a/packages/client-node/src/config.ts b/packages/client-node/src/config.ts index b8f960fb..e291daf4 100644 --- a/packages/client-node/src/config.ts +++ b/packages/client-node/src/config.ts @@ -10,7 +10,7 @@ import { } from '@clickhouse/client-common' import type Stream from 'stream' import { createConnection, type TLSParams } from './connection' -import { ResultSet } from './result_set' +import { ResultSet, RowBinaryResultSet } from './result_set' import { NodeValuesEncoder } from './utils' export type NodeClickHouseClientConfigOptions = @@ -118,8 +118,14 @@ export const NodeConfigImpl: Required< make_result_set: ( stream: Stream.Readable, format: DataFormat, - session_id: string - ) => new ResultSet(stream, format, session_id), + query_id: string + ) => { + if (format === 'RowBinary') { + return new RowBinaryResultSet(stream, format, query_id) + } else { + return new ResultSet(stream, format, query_id) + } + }, close_stream: async (stream) => { stream.destroy() }, diff --git a/packages/client-node/src/result_set.ts b/packages/client-node/src/result_set.ts index 8b6057bf..8d0fd69d 100644 --- a/packages/client-node/src/result_set.ts +++ b/packages/client-node/src/result_set.ts @@ -1,7 +1,7 @@ /* eslint-disable no-console */ import type { BaseResultSet, DataFormat, Row } from '@clickhouse/client-common' import { decode, validateStreamFormat } from '@clickhouse/client-common' -import { RowBinaryDecoder } from '@clickhouse/client-common/src/data_formatter/row_binary' +import { DecodedColumns, RowBinaryColumns } from '@clickhouse/client-common/src/data_formatter' import { Buffer } from 'buffer' import type { TransformCallback } from 'stream' import Stream, { Transform } from 'stream' @@ -131,56 +131,145 @@ export class RowBinaryResultSet implements BaseResultSet { ) } + async get(): Promise { + if (this.format !== 'RowBinary') { + throw new Error( + `Can't use RowBinaryResultSet if the format is not RowBinary` + ) + } + const result: unknown[][] = [] + await new Promise((resolve, reject) => { + this.stream() + .on('data', (rows: unknown[][]) => { + result.push(...rows) + }) + .on('end', resolve) + .on('error', reject) + }) + return result + } + stream(): Stream.Readable { - // If the underlying stream has already ended by calling `text` or `json`, - // Stream.pipeline will create a new empty stream + // If the underlying stream has already ended, + // Stream.pipeline will create a new empty stream, // but without "readableEnded" flag set to true if (this._stream.readableEnded) { throw Error(streamAlreadyConsumedMessage) } - if (this.format !== 'RowBinaryWithNamesAndTypes') { + if (this.format !== 'RowBinary') { throw new Error( `Can't use RowBinaryResultSet if the format is not RowBinary` ) } + let loc = 0 + let columns: DecodedColumns[0] | undefined + let incompleteChunk: Uint8Array | undefined + let row: unknown[] = [] + let lastColumnIdx: number | undefined + const toRows = new Transform({ transform( chunk: Buffer, _encoding: BufferEncoding, callback: TransformCallback ) { - const src = chunk.subarray() + // console.log(`got a new chunk, len: ${chunk.length}`) + let src: Uint8Array + if (incompleteChunk !== undefined) { + // console.log('got an incomplete chunk', incompleteChunk.length) + src = Buffer.concat([incompleteChunk, chunk.subarray()]) + incompleteChunk = undefined + } else { + //console.log('no incomplete chunk') + src = chunk.subarray() + } + if (columns === undefined) { + const res = RowBinaryColumns.decode(src) + if ('error' in res) { + callback(new Error(res.error)) + return + } + columns = res[0] + loc = res[1] + //console.log(`Columns ${columns.names} with types ${columns.types}. Next loc after columns: ${loc}`) + } + let decodeResult: [unknown, number] | null const rows: unknown[][] = [] - let res: [unknown, number] - const colDataRes = RowBinaryDecoder.columns(src) - const { names, types } = colDataRes[0] - let loc = colDataRes[1] - console.log(colDataRes[0]) - console.log(`Next loc: ${loc}`) - while (loc < src.length) { - const values = new Array(names.length) - types.forEach((t, i) => { - switch (t) { - case 'Int8': - res = RowBinaryDecoder.int8(src, loc) - console.log(`Int8: ${res[0]}, next loc: ${res[1]}`) - values[i] = res[0] - loc = res[1] - break - case 'String': - res = RowBinaryDecoder.string(src, loc) - console.log(`String: ${res[0]}, next loc: ${res[1]}`) - values[i] = res[0] - loc = res[1] - break - default: - throw new Error(`Unknown type ${t}`) + // an incomplete row from the previous chunk; continue from the known column index + if (lastColumnIdx !== undefined) { + // console.log('incomplete idx:', lastColumnIdx) + for (let i = lastColumnIdx; i < columns.decoders.length; i++) { + // FIXME - handle null properly; currently assuming that the second chunk will be enough (but it maybe not be) + decodeResult = columns.decoders[i](src, loc) + if (decodeResult === null) { + callback(new Error('Not enough data to decode the row')) + return + } else { + // console.log( + // `Decoded incomplete column ${columns.names[i]} at loc ${loc} with result ${decodeResult}` + // ) + row[i] = decodeResult[0] + loc = decodeResult[1] } - }) - rows.push(values) + } + // console.log('incomplete push:', row) + rows.push(row) + lastColumnIdx = undefined + } + // done with the previous incomplete rows; processing the rows as normal + // console.log('loc and src len', loc, src.length) + while (loc <= src.length) { + row = new Array(columns.names.length) + for (let i = 0; i < columns.decoders.length; i++) { + decodeResult = columns.decoders[i](src, loc) + // console.log( + // `Decoded column ${columns.names[i]} at loc ${loc} with result ${decodeResult}` + // ) + // maybe not enough data to finish the row + if (decodeResult === null) { + // console.log( + // `Decode result is null for column ${columns?.names[i]}` + // ) + // keep the remaining data to add to the next chunk + incompleteChunk = src.subarray(loc) + loc = 0 + lastColumnIdx = i + if (rows.length > 0) { + this.push(rows) + } + callback() + return + } else { + if (String(decodeResult[0]).length > 100) { + throw new Error('foo') + } + // decoded a value + row[i] = decodeResult[0] + loc = decodeResult[1] + if (loc > src.length) { + loc = loc - src.length + incompleteChunk = src.subarray(loc) + // if there are more columns to decode, keep the index + if (i < columns.decoders.length - 1) { + lastColumnIdx = i + } else { + rows.push(row) + } + if (rows.length > 0) { + this.push(rows) + } + callback() + return + } + } + } + // console.log('complete push, maybe there is more:', row) + rows.push(row) + } + if (rows.length > 0) { + this.push(rows) } - this.push(rows) callback() }, autoDestroy: true, From c6bf81aeb7d6dafb5a9b8de09266087a99638171 Mon Sep 17 00:00:00 2001 From: slvrtrn Date: Tue, 5 Mar 2024 03:11:19 +0100 Subject: [PATCH 03/14] More experiments. Working BigInt/Date. --- benchmarks/leaks/row_binary.ts | 92 +++++++- .../unit/row_binary_decoders.test.ts | 38 +++ ....test.ts => row_binary_read_bytes.test.ts} | 83 ++----- .../src/data_formatter/formatter.ts | 2 + .../src/data_formatter/row_binary.ts | 216 ------------------ .../src/data_formatter/row_binary/decoder.ts | 203 ++++++++++++++++ .../src/data_formatter/row_binary/index.ts | 2 + .../data_formatter/row_binary/read_bytes.ts | 48 ++++ .../node_stream_row_binary.test.ts | 35 --- packages/client-node/src/config.ts | 3 +- packages/client-node/src/result_set.ts | 179 --------------- .../client-node/src/row_binary_result_set.ts | 146 ++++++++++++ 12 files changed, 540 insertions(+), 507 deletions(-) create mode 100644 packages/client-common/__tests__/unit/row_binary_decoders.test.ts rename packages/client-common/__tests__/unit/{row_binary_decoder.test.ts => row_binary_read_bytes.test.ts} (50%) delete mode 100644 packages/client-common/src/data_formatter/row_binary.ts create mode 100644 packages/client-common/src/data_formatter/row_binary/decoder.ts create mode 100644 packages/client-common/src/data_formatter/row_binary/index.ts create mode 100644 packages/client-common/src/data_formatter/row_binary/read_bytes.ts create mode 100644 packages/client-node/src/row_binary_result_set.ts diff --git a/benchmarks/leaks/row_binary.ts b/benchmarks/leaks/row_binary.ts index f3798c03..1c03d8ee 100644 --- a/benchmarks/leaks/row_binary.ts +++ b/benchmarks/leaks/row_binary.ts @@ -1,26 +1,98 @@ +import type { Row } from '@clickhouse/client' import { createClient } from '@clickhouse/client' -import { RowBinaryResultSet } from '@clickhouse/client/result_set' +import type { RowBinaryResultSet } from '@clickhouse/client/row_binary_result_set' +import { attachExceptionHandlers } from './shared' + +/* + +CREATE TABLE default.fluff +( + `id` UInt32, + `s1` String, + `s2` String, + `u8` UInt8, + `i8` Int8, + `u16` UInt16, + `i16` Int16, + `u32` UInt32, + `i32` Int32, + `u64` UInt64, + `i64` Int64, + `u128` UInt128, + `i128` Int128, + `u256` UInt256, + `i256` Int256, + `date` Date +) +ENGINE = MergeTree +ORDER BY id + +INSERT INTO fluff SELECT * +FROM generateRandom('id UInt32, s1 String, s2 String, u8 UInt8, i8 Int8, u16 UInt16, i16 Int16, u32 UInt32, i32 Int32, u64 UInt64, i64 Int64, u128 UInt128, i128 Int128, u256 UInt256, i256 Int256, date Date') +LIMIT 5000000 + + */ + +const query = `SELECT * FROM fluff ORDER BY id ASC LIMIT 1000000` void (async () => { const client = createClient({ url: 'http://localhost:8123', }) - async function streamRowBinary() { + + async function benchmarkJSONEachRow() { + const start = +new Date() + const rs = await client.query({ + query, + format: 'JSONCompactEachRow', + }) + const values = [] + await new Promise((resolve, reject) => { + rs.stream() + .on('data', (rows: Row[]) => { + rows.forEach((row) => { + values.push(row.json()) + }) + }) + .on('end', resolve) + .on('error', reject) + }) + console.log( + `JSONCompactEachRow elapsed: ${+new Date() - start} ms, total: ${ + values.length + }` + ) + return values.length + } + + async function benchmarkRowBinary() { const start = +new Date() const rs = await client.query({ - query: `SELECT * FROM random ORDER BY id ASC LIMIT 10000`, + query, format: 'RowBinary', }) - const values = await (rs as RowBinaryResultSet).get() - // console.log(values) - // console.log( - // `RowBinary elapsed: ${+new Date() - start} ms, length: ${values.length}` - // ) + const values: unknown[][] = [] + await new Promise((resolve, reject) => { + ;(rs as RowBinaryResultSet) + .stream() + .on('data', (rows: unknown[][]) => { + rows.forEach((row) => { + values.push(row) + }) + }) + .on('end', resolve) + .on('error', reject) + }) + console.log( + `RowBinary elapsed: ${+new Date() - start} ms, total: ${values.length}` + ) return values.length } - for (let i = 0; i < 1000; i++) { - await streamRowBinary() + attachExceptionHandlers() + for (let i = 0; i < 10; i++) { + await benchmarkJSONEachRow() + await benchmarkRowBinary() } process.exit(0) diff --git a/packages/client-common/__tests__/unit/row_binary_decoders.test.ts b/packages/client-common/__tests__/unit/row_binary_decoders.test.ts new file mode 100644 index 00000000..e01bf260 --- /dev/null +++ b/packages/client-common/__tests__/unit/row_binary_decoders.test.ts @@ -0,0 +1,38 @@ +import { + removeLowCardinality, + RowBinaryTypesDecoder, +} from '../../src/data_formatter' + +fdescribe('RowBinary decoders', () => { + it('should decode Date', () => { + const args: [Uint8Array, Date][] = [ + [new Uint8Array([0x00, 0x00]), new Date('1970-01-01T00:00:00.000Z')], + [new Uint8Array([0x01, 0x00]), new Date('1970-01-02T00:00:00.000Z')], + [new Uint8Array([0x02, 0x00]), new Date('1970-01-03T00:00:00.000Z')], + [new Uint8Array([0x10, 0x00]), new Date('1970-01-17T00:00:00.000Z')], + [new Uint8Array([0x4a, 0x4d]), new Date('2024-03-04T00:00:00.000Z')], + [new Uint8Array([0xff, 0xff]), new Date('2149-06-06T00:00:00.000Z')], + ] + args.forEach(([src, expected]) => { + const res = RowBinaryTypesDecoder.date(src, 0)! + expect(+res[0]) + .withContext( + `Decoded ${src.toString()}. Result ${res[0]} != expected ${expected}` + ) + .toEqual(+expected) + }) + }) + + it('should remove low cardinality', async () => { + const args: [string, string][] = [ + ['LowCardinality(String)', 'String'], + ['LowCardinality(Nullable(String))', 'Nullable(String)'], + ['LowCardinality(Array(String))', 'Array(String)'], + ['Nullable(String)', 'Nullable(String)'], + ['String', 'String'], + ] + args.forEach(([src, expected]) => { + expect(removeLowCardinality(src)).toEqual(expected) + }) + }) +}) diff --git a/packages/client-common/__tests__/unit/row_binary_decoder.test.ts b/packages/client-common/__tests__/unit/row_binary_read_bytes.test.ts similarity index 50% rename from packages/client-common/__tests__/unit/row_binary_decoder.test.ts rename to packages/client-common/__tests__/unit/row_binary_read_bytes.test.ts index 1f1e9577..8303ae3b 100644 --- a/packages/client-common/__tests__/unit/row_binary_decoder.test.ts +++ b/packages/client-common/__tests__/unit/row_binary_read_bytes.test.ts @@ -1,10 +1,9 @@ import { - readBytesAsInt, - readBytesAsSignedBigInt, readBytesAsUnsignedBigInt, + readBytesAsUnsignedInt, } from '../../src/data_formatter' -describe('RowBinary decoder', () => { +fdescribe('RowBinary read bytes', () => { describe('Unsigned integers', () => { it('should decode UInt16', async () => { const args: [Uint8Array, number][] = [ @@ -13,11 +12,11 @@ describe('RowBinary decoder', () => { [new Uint8Array([0x02, 0x00]), 2], [new Uint8Array([0x10, 0x00]), 16], [new Uint8Array([0xff, 0x00]), 255], - [new Uint8Array([0xff, 0xff]), 65_535], - [new Uint8Array([0x00, 0x80]), 32_768], + [new Uint8Array([0xff, 0xff]), 65535], + [new Uint8Array([0x00, 0x80]), 32768], ] args.forEach(([src, expected]) => { - expect(readBytesAsInt(src, 0, 2, false)) + expect(readBytesAsUnsignedInt(src, 0, 2)) .withContext(ctx(src, expected)) .toBe(expected) }) @@ -29,61 +28,23 @@ describe('RowBinary decoder', () => { [new Uint8Array([0x02, 0x00, 0x00, 0x00]), 2], [new Uint8Array([0x10, 0x00, 0x00, 0x00]), 16], [new Uint8Array([0xff, 0x00, 0x00, 0x00]), 255], - [new Uint8Array([0xff, 0xff, 0x00, 0x00]), 65_535], - [new Uint8Array([0xff, 0xff, 0xff, 0x00]), 16_777_215], - [new Uint8Array([0xff, 0xff, 0xff, 0x7f]), 2_147_483_647], - [new Uint8Array([0xff, 0xff, 0xff, 0xff]), 4_294_967_295], - [new Uint8Array([0x00, 0x00, 0x00, 0x80]), 2_147_483_648], + [new Uint8Array([0xff, 0xff, 0x00, 0x00]), 65535], + [new Uint8Array([0xff, 0xff, 0xff, 0x00]), 16777215], + [new Uint8Array([0xff, 0xff, 0xff, 0x7f]), 2147483647], + [new Uint8Array([0xff, 0xff, 0xff, 0xff]), 4294967295], + [new Uint8Array([0x00, 0x00, 0x00, 0x80]), 2147483648], ] args.forEach(([src, expected]) => { - expect(readBytesAsInt(src, 0, 4, false)) + expect(readBytesAsUnsignedInt(src, 0, 4)) .withContext(ctx(src, expected)) .toBe(expected) }) }) }) - describe('Signed integers', () => { - it('should decode Int16', async () => { - const args: [Uint8Array, number][] = [ - [new Uint8Array([0x00, 0x00]), 0], - [new Uint8Array([0x01, 0x00]), 1], - [new Uint8Array([0x02, 0x00]), 2], - [new Uint8Array([0x10, 0x00]), 16], - [new Uint8Array([0xff, 0x00]), 255], - [new Uint8Array([0xff, 0xff]), -1], - [new Uint8Array([0x00, 0x80]), -32_768], - ] - args.forEach(([src, expected]) => { - expect(readBytesAsInt(src, 0, 2, true)) - .withContext(ctx(src, expected)) - .toBe(expected) - }) - }) - it('should decode Int32', async () => { - const args: [Uint8Array, number][] = [ - [new Uint8Array([0x00, 0x00, 0x00, 0x00]), 0], - [new Uint8Array([0x01, 0x00, 0x00, 0x00]), 1], - [new Uint8Array([0x02, 0x00, 0x00, 0x00]), 2], - [new Uint8Array([0x10, 0x00, 0x00, 0x00]), 16], - [new Uint8Array([0xff, 0x00, 0x00, 0x00]), 255], - [new Uint8Array([0xff, 0xff, 0x00, 0x00]), 65_535], - [new Uint8Array([0xff, 0xff, 0xff, 0x00]), 16_777_215], - [new Uint8Array([0xff, 0xff, 0xff, 0x7f]), 2_147_483_647], - [new Uint8Array([0xff, 0xff, 0xff, 0xff]), -1], - [new Uint8Array([0x00, 0x00, 0x00, 0x80]), -2_147_483_648], - ] - args.forEach(([src, expected]) => { - expect(readBytesAsInt(src, 0, 4, true)) - .withContext(ctx(src, expected)) - .toBe(expected) - }) - }) - }) - - describe('BigInt', () => { + describe('Unsigned big integers', () => { it('should decode UInt64', async () => { - const args: [Uint8Array, BigInt][] = [ + const args: [Uint8Array, bigint][] = [ [new Uint8Array([0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]), 0n], [new Uint8Array([0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]), 1n], [new Uint8Array([0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]), 2n], @@ -93,15 +54,15 @@ describe('RowBinary decoder', () => { ], [ new Uint8Array([0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]), - 65_535n, + 65535n, ], [ new Uint8Array([0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00]), - 16_777_215n, + 16777215n, ], [ new Uint8Array([0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00]), - 4_294_967_295n, + 4294967295n, ], [ new Uint8Array([0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00]), @@ -127,19 +88,9 @@ describe('RowBinary decoder', () => { .toBe(expected) }) }) - - it('should decode Int64 ', async () => { - expect( - readBytesAsSignedBigInt( - new Uint8Array([0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80]), - 0, - 8 - ) - ).toEqual(1n) - }) }) - function ctx(src: Uint8Array, expected: number | BigInt) { + function ctx(src: Uint8Array, expected: number | bigint) { return `Expected ${src.toString()} to be decoded as ${expected}` } }) diff --git a/packages/client-common/src/data_formatter/formatter.ts b/packages/client-common/src/data_formatter/formatter.ts index 4a472065..82912870 100644 --- a/packages/client-common/src/data_formatter/formatter.ts +++ b/packages/client-common/src/data_formatter/formatter.ts @@ -32,6 +32,8 @@ const supportedRawFormats = [ 'CustomSeparatedWithNames', 'CustomSeparatedWithNamesAndTypes', 'Parquet', + // translates to RowBinaryWithNamesAndTypes under the hood (see client/formatQuery); + // we expose a shorter name to the user for simplicity. 'RowBinary', ] as const diff --git a/packages/client-common/src/data_formatter/row_binary.ts b/packages/client-common/src/data_formatter/row_binary.ts deleted file mode 100644 index d5abe28c..00000000 --- a/packages/client-common/src/data_formatter/row_binary.ts +++ /dev/null @@ -1,216 +0,0 @@ -export type ColumnType = - | 'Bool' - | 'UInt8' - | 'Int8' - | 'UInt16' - | 'Int16' - | 'UInt32' - | 'Int32' - // | 'UInt64' - // | 'Int64' - // | 'UInt128' - // | 'Int128' - // | 'UInt256' - // | 'Int256' - | 'String' - -export type TypeDecoder = ( - src: Uint8Array, - loc: number -) => DecodeResult | null - -export type DecodeResult = [T, number] -export type DecodeError = { error: string } -export type DecodedColumns = DecodeResult<{ - names: string[] - types: ColumnType[] - decoders: TypeDecoder[] -}> - -export const RowBinaryTypesDecoder = { - bool: (src: Uint8Array, loc: number): DecodeResult | null => { - //// [1,2,3,4] - len 4; max loc 3 - if (src.length < loc + 1) return null - const x = src[loc] === 1 - return [x, loc + 1] - }, - uint8: (src: Uint8Array, loc: number): DecodeResult | null => { - if (src.length < loc + 1) return null - const x = src[loc] - return [x, loc + 1] - }, - int8: (src: Uint8Array, loc: number): DecodeResult | null => { - if (src.length < loc + 1) return null - const x = src[loc] - return [x, loc + 1] - }, - uint16: (src: Uint8Array, loc: number): DecodeResult | null => { - if (src.length < loc + 2) return null - const x = readBytesAsInt(src, loc, 2, false) - return [x, loc + 2] - }, - int16: (src: Uint8Array, loc: number): DecodeResult | null => { - if (src.length < loc + 2) return null - const x = readBytesAsInt(src, loc, 2, true) - return [x, loc + 2] - }, - uint32: (src: Uint8Array, loc: number): DecodeResult | null => { - if (src.length < loc + 4) return null - const x = readBytesAsInt(src, loc, 4, false) - return [x, loc + 4] - }, - int32: (src: Uint8Array, loc: number): DecodeResult | null => { - if (src.length < loc + 4) return null - const x = readBytesAsInt(src, loc, 4, true) - return [x, loc + 4] - }, - // uint64: (src: Uint8Array, loc: number): DecodeResult => { - // return [readBytesAsUnsignedBigInt(src, loc, 8), loc + 8] - // }, - // int64: (src: Uint8Array, loc: number): DecodeResult => { - // return [readBytesAsUnsignedBigInt(src, loc, 8), loc + 8] - // }, - // uint128: (src: Uint8Array, loc: number): DecodeResult => { - // return [readBytesAsUnsignedBigInt(src, loc, 16), loc + 16] - // }, - // int128: (src: Uint8Array, loc: number): DecodeResult => { - // return [readBytesAsUnsignedBigInt(src, loc, 16), loc + 16] - // }, - // uint256: (src: Uint8Array, loc: number): DecodeResult => { - // return [readBytesAsUnsignedBigInt(src, loc, 32), loc + 32] - // }, - // int256: (src: Uint8Array, loc: number): DecodeResult => { - // return [readBytesAsUnsignedBigInt(src, loc, 32), loc + 32] - // }, - string: (src: Uint8Array, loc: number): DecodeResult | null => { - return readLEB128String(src, loc) - }, -} - -export const RowBinaryColumnTypeToDecoder: { - [key in ColumnType]: TypeDecoder -} = { - Bool: RowBinaryTypesDecoder.bool, - UInt8: RowBinaryTypesDecoder.uint8, - Int8: RowBinaryTypesDecoder.int8, - UInt16: RowBinaryTypesDecoder.uint16, - Int16: RowBinaryTypesDecoder.int16, - UInt32: RowBinaryTypesDecoder.uint32, - Int32: RowBinaryTypesDecoder.int32, - // UInt64: RowBinaryTypesDecoder.uint64, - // Int64: RowBinaryTypesDecoder.int64, - // UInt128: RowBinaryTypesDecoder.uint128, - // Int128: RowBinaryTypesDecoder.int128, - // UInt256: RowBinaryTypesDecoder.uint256, - // Int256: RowBinaryTypesDecoder.int256, - String: RowBinaryTypesDecoder.string, -} - -export const RowBinaryColumns = { - decode: (src: Uint8Array): DecodedColumns | DecodeError => { - const res = readLEB128(src, 0) - const numColumns = res[0] - let nextLoc = res[1] - console.log(`Total columns: ${numColumns}`) - const names = new Array(numColumns) - const types = new Array(numColumns) - const decoders: TypeDecoder[] = new Array(numColumns) - for (let i = 0; i < numColumns; i++) { - const res = readLEB128String(src, nextLoc)! // FIXME non-null assertion - nextLoc = res[1] - names[i] = res[0] - } - for (let i = 0; i < numColumns; i++) { - const res = readLEB128String(src, nextLoc)! // FIXME non-null assertion - nextLoc = res[1] - decoders[i] = RowBinaryColumnTypeToDecoder[res[0] as ColumnType] - if (decoders[i] === undefined) { - return { error: `Unknown column type ${res[0]}` } - } - types[i] = res[0] as ColumnType - } - return [{ names, types, decoders }, nextLoc] - }, -} - -export function readLEB128(src: Uint8Array, loc: number): DecodeResult { - let result = 0 - let shift = 0 - let ix = 0 - // eslint-disable-next-line no-constant-condition - while (true) { - const byte = src[loc + ix] - ix++ - result |= (byte & 0x7f) << shift - shift += 7 - if ((0x80 & byte) === 0) { - if (shift < 32 && (byte & 0x40) !== 0) { - return [result | (~0 << shift), loc + ix] - } - return [result, loc + ix] - } - } -} - -export function readLEB128String( - src: Uint8Array, - loc: number -): DecodeResult | null { - // console.log(`Decoding string at loc ${loc}, src len: ${src.length}`) - if (src.length < loc + 1) return null - const [length, nextLoc] = readLEB128(src, loc) - // console.log(`Got next loc for string, next loc ${nextLoc}, len: ${length}, src len: ${src.length}`) - if (src.length < nextLoc + length) return null - return [src.slice(nextLoc, nextLoc + length).toString(), nextLoc + length] -} - -export function readBytesAsInt( - src: Uint8Array, - loc: number, - bytes: 2 | 4, // (U)Int16 | (U)Int32 - signed: boolean -): number { - let result = 0 - for (let i = 0; i < bytes; i++) { - result |= src[loc + i] << (8 * i) - } - result = result >>> 0 - const max = 2 ** (bytes * 8) - if (signed && result > max / 2 - 1) { - return result - max - } - return result -} - -export function readBytesAsUnsignedBigInt( - src: Uint8Array, - loc: number, - bytes: 8 | 16 | 32 // (U)Int64 | (U)Int128 | (U)Int256 -): BigInt { - let result = 0n - for (let i = bytes - 1; i >= 0; i--) { - // console.log(src[loc + i]) - result = (result << 8n) + BigInt(src[loc + i]) - } - console.log( - `(BigInt) Decoded ${bytes} bytes ${src - .slice(loc, loc + bytes) - .toString()} into ${result}` - ) - return result -} - -export function readBytesAsSignedBigInt( - src: Uint8Array, - loc: number, - bytes: 8 | 16 | 32 // (U)Int64 | (U)Int128 | (U)Int256 -): BigInt { - let result = 0n - for (let i = bytes / 4 - 1; i >= 0; i--) { - const dec = readBytesAsInt(src, loc + i * 4, 4, true) - console.log(`Decoded: ${dec}`) - result += BigInt(dec) - } - // console.log(`(BigInt) Decoded ${bytes} bytes into ${result}`) - return result -} diff --git a/packages/client-common/src/data_formatter/row_binary/decoder.ts b/packages/client-common/src/data_formatter/row_binary/decoder.ts new file mode 100644 index 00000000..47298ed2 --- /dev/null +++ b/packages/client-common/src/data_formatter/row_binary/decoder.ts @@ -0,0 +1,203 @@ +import type { DecodeResult } from './read_bytes' +import { + readBytesAsUnsignedBigInt, + readBytesAsUnsignedInt, + readBytesAsUnsignedLEB128, +} from './read_bytes' + +export type ColumnType = + | 'Bool' + | 'UInt8' + | 'Int8' + | 'UInt16' + | 'Int16' + | 'UInt32' + | 'Int32' + | 'UInt64' + | 'Int64' + | 'UInt128' + | 'Int128' + | 'UInt256' + | 'Int256' + | 'String' + | 'Date' + +export type TypeDecoder = ( + src: Uint8Array, + loc: number +) => DecodeResult | null + +export type DecodeError = { error: string } +export type DecodedColumns = DecodeResult<{ + names: string[] + types: ColumnType[] + decoders: TypeDecoder[] +}> + +const Int8Overflow = 128 +const UInt8Overflow = 256 + +const Int16Overflow = 32768 +const UInt16Overflow = 65536 + +const Int32Overflow = 2147483648 +const UInt32Overflow = 4294967296 + +const Int64Overflow = 9223372036854775808n +const UInt64Overflow = 18446744073709551616n + +const Int128Overflow = 170141183460469231731687303715884105728n +const UInt128Overflow = 340282366920938463463374607431768211456n + +const Int256Overflow = + 57896044618658097711785492504343953926634992332820282019728792003956564819968n +const UInt256Overflow = + 115792089237316195423570985008687907853269984665640564039457584007913129639936n + +const DayMillis = 24 * 3600 * 1000 +const TxtDecoder = new TextDecoder() + +export const RowBinaryTypesDecoder = { + bool: (src: Uint8Array, loc: number): DecodeResult | null => { + if (src.length < loc + 1) return null + return [src[loc] === 1, loc + 1] + }, + uint8: (src: Uint8Array, loc: number): DecodeResult | null => { + if (src.length < loc + 1) return null + return [src[loc], loc + 1] + }, + int8: (src: Uint8Array, loc: number): DecodeResult | null => { + if (src.length < loc + 1) return null + const x = src[loc] + return [x < Int8Overflow ? x : x - UInt8Overflow, loc + 1] + }, + uint16: (src: Uint8Array, loc: number): DecodeResult | null => { + if (src.length < loc + 2) return null + return [readBytesAsUnsignedInt(src, loc, 2), loc + 2] + }, + int16: (src: Uint8Array, loc: number): DecodeResult | null => { + if (src.length < loc + 2) return null + const x = readBytesAsUnsignedInt(src, loc, 2) + return [x < Int16Overflow ? x : x - UInt16Overflow, loc + 2] + }, + uint32: (src: Uint8Array, loc: number): DecodeResult | null => { + if (src.length < loc + 4) return null + return [readBytesAsUnsignedInt(src, loc, 4), loc + 4] + }, + int32: (src: Uint8Array, loc: number): DecodeResult | null => { + if (src.length < loc + 4) return null + const x = readBytesAsUnsignedInt(src, loc, 4) + return [x < Int32Overflow ? x : x - UInt32Overflow, loc + 4] + }, + uint64: (src: Uint8Array, loc: number): DecodeResult | null => { + if (src.length < loc + 8) return null + return [readBytesAsUnsignedBigInt(src, loc, 8), loc + 8] + }, + int64: (src: Uint8Array, loc: number): DecodeResult | null => { + if (src.length < loc + 8) return null + const x = readBytesAsUnsignedBigInt(src, loc, 8) + return [x < Int64Overflow ? x : x - UInt64Overflow, loc + 8] + }, + uint128: (src: Uint8Array, loc: number): DecodeResult | null => { + if (src.length < loc + 16) return null + return [readBytesAsUnsignedBigInt(src, loc, 16), loc + 16] + }, + int128: (src: Uint8Array, loc: number): DecodeResult | null => { + if (src.length < loc + 16) return null + const x = readBytesAsUnsignedBigInt(src, loc, 16) + return [x < Int128Overflow ? x : x - UInt128Overflow, loc + 16] + }, + uint256: (src: Uint8Array, loc: number): DecodeResult | null => { + if (src.length < loc + 32) return null + return [readBytesAsUnsignedBigInt(src, loc, 32), loc + 32] + }, + int256: (src: Uint8Array, loc: number): DecodeResult | null => { + if (src.length < loc + 32) return null + const x = readBytesAsUnsignedBigInt(src, loc, 32) + return [x < Int256Overflow ? x : x - UInt256Overflow, loc + 32] + }, + string: (src: Uint8Array, loc: number): DecodeResult | null => { + if (src.length < loc + 1) return null + const res = readBytesAsUnsignedLEB128(src, loc) + if (res === null) { + return null + } + const [length, nextLoc] = res + if (src.length < nextLoc + length) return null + return [ + TxtDecoder.decode(src.slice(nextLoc, nextLoc + length)), + nextLoc + length, + ] + }, + date: (src: Uint8Array, loc: number): DecodeResult | null => { + if (src.length < loc + 2) return null + const days = readBytesAsUnsignedInt(src, loc, 2) + const date = new Date(days * DayMillis) + return [date, loc + 2] + }, +} + +export const RowBinaryColumnTypeToDecoder: { + [key in ColumnType]: TypeDecoder +} = { + Bool: RowBinaryTypesDecoder.bool, + UInt8: RowBinaryTypesDecoder.uint8, + Int8: RowBinaryTypesDecoder.int8, + UInt16: RowBinaryTypesDecoder.uint16, + Int16: RowBinaryTypesDecoder.int16, + UInt32: RowBinaryTypesDecoder.uint32, + Int32: RowBinaryTypesDecoder.int32, + UInt64: RowBinaryTypesDecoder.uint64, + Int64: RowBinaryTypesDecoder.int64, + UInt128: RowBinaryTypesDecoder.uint128, + Int128: RowBinaryTypesDecoder.int128, + UInt256: RowBinaryTypesDecoder.uint256, + Int256: RowBinaryTypesDecoder.int256, + String: RowBinaryTypesDecoder.string, + Date: RowBinaryTypesDecoder.date, +} + +export const RowBinaryColumns = { + decode: (src: Uint8Array): DecodedColumns | DecodeError => { + const res = readBytesAsUnsignedLEB128(src, 0) + if (res === null) { + return { error: 'Not enough data to decode the number of columns' } + } + const numColumns = res[0] + let nextLoc = res[1] + const names = new Array(numColumns) + const types = new Array(numColumns) + const decoders: TypeDecoder[] = new Array(numColumns) + for (let i = 0; i < numColumns; i++) { + const res = RowBinaryTypesDecoder.string(src, nextLoc) + if (res === null) { + return { error: `Not enough data to decode column ${i} name` } + } + nextLoc = res[1] + names[i] = res[0] + } + for (let i = 0; i < numColumns; i++) { + const res = RowBinaryTypesDecoder.string(src, nextLoc) + if (res === null) { + return { error: `Not enough data to decode column ${i} type` } + } + nextLoc = res[1] + const colType = removeLowCardinality(res[0]) + decoders[i] = RowBinaryColumnTypeToDecoder[colType] + if (decoders[i] === undefined) { + return { + error: `Unknown column type ${res[0]} (normalized: ${colType})`, + } + } + types[i] = colType + } + return [{ names, types, decoders }, nextLoc] + }, +} + +export function removeLowCardinality(colType: string): ColumnType { + if (colType.startsWith('LowCardinality')) { + return colType.slice(15, -1) as ColumnType + } + return colType as ColumnType +} diff --git a/packages/client-common/src/data_formatter/row_binary/index.ts b/packages/client-common/src/data_formatter/row_binary/index.ts new file mode 100644 index 00000000..571fd4e1 --- /dev/null +++ b/packages/client-common/src/data_formatter/row_binary/index.ts @@ -0,0 +1,2 @@ +export * from './decoder' +export * from './read_bytes' diff --git a/packages/client-common/src/data_formatter/row_binary/read_bytes.ts b/packages/client-common/src/data_formatter/row_binary/read_bytes.ts new file mode 100644 index 00000000..f76f8fab --- /dev/null +++ b/packages/client-common/src/data_formatter/row_binary/read_bytes.ts @@ -0,0 +1,48 @@ +// Decoded value + the next index to scan from +export type DecodeResult = [T, number] + +export function readBytesAsUnsignedLEB128( + src: Uint8Array, + loc: number +): DecodeResult | null { + let result = 0 + let shift = 0 + let ix = 0 + let byte: number + // eslint-disable-next-line no-constant-condition + while (true) { + if (src.length < loc + ix + 1) { + return null + } + byte = src[loc + ix++] + result |= (byte & 0x7f) << shift + if (byte >> 7 === 0) { + return [result, loc + ix] + } + shift += 7 + } +} + +export function readBytesAsUnsignedInt( + src: Uint8Array, + loc: number, + bytes: 2 | 4 // (U)Int16 | (U)Int32 +): number { + let result = 0 + for (let i = bytes - 1; i >= 0; i--) { + result = (result << 8) + src[loc + i] + } + return result >>> 0 +} + +export function readBytesAsUnsignedBigInt( + src: Uint8Array, + loc: number, + bytes: 8 | 16 | 32 // (U)Int64 | (U)Int128 | (U)Int256 +): bigint { + let result = 0n + for (let i = bytes - 1; i >= 0; i--) { + result = (result << 8n) + BigInt(src[loc + i]) + } + return result +} diff --git a/packages/client-node/__tests__/integration/node_stream_row_binary.test.ts b/packages/client-node/__tests__/integration/node_stream_row_binary.test.ts index 4c3316b2..80a148c5 100644 --- a/packages/client-node/__tests__/integration/node_stream_row_binary.test.ts +++ b/packages/client-node/__tests__/integration/node_stream_row_binary.test.ts @@ -1,7 +1,6 @@ import type { ClickHouseClient } from '@clickhouse/client-common' import { createTestClient, guid } from '@test/utils' import type Stream from 'stream' -import { RowBinaryResultSet } from '../../src/result_set' describe('[Node.js] stream RowBinary', () => { let client: ClickHouseClient @@ -87,39 +86,6 @@ describe('[Node.js] stream RowBinary', () => { await selectAndAssert(assertValues) }) - fit('should stream', async () => { - await streamJSON() - await streamRowBinary() - expect(1).toEqual(1) - }) - - async function streamJSON() { - const start = +new Date() - const rs = await client.query({ - query: `SELECT * FROM random ORDER BY id ASC LIMIT 100000`, - format: 'JSONEachRow', - }) - const values = await rs.json() - console.log( - `JSON elapsed: ${+new Date() - start} ms, length: ${values.length}` - ) - return values.length - } - - async function streamRowBinary() { - const start = +new Date() - const rs = await client.query({ - query: `SELECT * FROM random ORDER BY id ASC LIMIT 100000`, - format: 'RowBinary', - }) - const values = await (rs as RowBinaryResultSet).get() - // console.log(values) - console.log( - `RowBinary elapsed: ${+new Date() - start} ms, length: ${values.length}` - ) - return values.length - } - async function selectAndAssert(assertValues: unknown[][]) { const rs = await client.query({ query: `SELECT * EXCEPT id FROM ${tableName} ORDER BY id ASC`, @@ -149,7 +115,6 @@ describe('[Node.js] stream RowBinary', () => { wait_end_of_query: 1, }, }) - console.log(`Created table ${tableName}`) let id = 1 await client.insert({ table: tableName, diff --git a/packages/client-node/src/config.ts b/packages/client-node/src/config.ts index e291daf4..51350ee4 100644 --- a/packages/client-node/src/config.ts +++ b/packages/client-node/src/config.ts @@ -10,7 +10,8 @@ import { } from '@clickhouse/client-common' import type Stream from 'stream' import { createConnection, type TLSParams } from './connection' -import { ResultSet, RowBinaryResultSet } from './result_set' +import { ResultSet } from './result_set' +import { RowBinaryResultSet } from './row_binary_result_set' import { NodeValuesEncoder } from './utils' export type NodeClickHouseClientConfigOptions = diff --git a/packages/client-node/src/result_set.ts b/packages/client-node/src/result_set.ts index 8d0fd69d..0a5d9224 100644 --- a/packages/client-node/src/result_set.ts +++ b/packages/client-node/src/result_set.ts @@ -1,7 +1,6 @@ /* eslint-disable no-console */ import type { BaseResultSet, DataFormat, Row } from '@clickhouse/client-common' import { decode, validateStreamFormat } from '@clickhouse/client-common' -import { DecodedColumns, RowBinaryColumns } from '@clickhouse/client-common/src/data_formatter' import { Buffer } from 'buffer' import type { TransformCallback } from 'stream' import Stream, { Transform } from 'stream' @@ -112,182 +111,4 @@ export class ResultSet implements BaseResultSet { } } -export class RowBinaryResultSet implements BaseResultSet { - constructor( - private _stream: Stream.Readable, - private readonly format: DataFormat, - public readonly query_id: string - ) {} - - async text(): Promise { - throw new Error( - `Can't call 'text()' on RowBinary result set; please use 'stream' instead` - ) - } - - async json(): Promise { - throw new Error( - `Can't call 'json()' on RowBinary result set; please use 'stream' instead` - ) - } - - async get(): Promise { - if (this.format !== 'RowBinary') { - throw new Error( - `Can't use RowBinaryResultSet if the format is not RowBinary` - ) - } - const result: unknown[][] = [] - await new Promise((resolve, reject) => { - this.stream() - .on('data', (rows: unknown[][]) => { - result.push(...rows) - }) - .on('end', resolve) - .on('error', reject) - }) - return result - } - - stream(): Stream.Readable { - // If the underlying stream has already ended, - // Stream.pipeline will create a new empty stream, - // but without "readableEnded" flag set to true - if (this._stream.readableEnded) { - throw Error(streamAlreadyConsumedMessage) - } - if (this.format !== 'RowBinary') { - throw new Error( - `Can't use RowBinaryResultSet if the format is not RowBinary` - ) - } - - let loc = 0 - let columns: DecodedColumns[0] | undefined - let incompleteChunk: Uint8Array | undefined - let row: unknown[] = [] - let lastColumnIdx: number | undefined - - const toRows = new Transform({ - transform( - chunk: Buffer, - _encoding: BufferEncoding, - callback: TransformCallback - ) { - // console.log(`got a new chunk, len: ${chunk.length}`) - let src: Uint8Array - if (incompleteChunk !== undefined) { - // console.log('got an incomplete chunk', incompleteChunk.length) - src = Buffer.concat([incompleteChunk, chunk.subarray()]) - incompleteChunk = undefined - } else { - //console.log('no incomplete chunk') - src = chunk.subarray() - } - if (columns === undefined) { - const res = RowBinaryColumns.decode(src) - if ('error' in res) { - callback(new Error(res.error)) - return - } - columns = res[0] - loc = res[1] - //console.log(`Columns ${columns.names} with types ${columns.types}. Next loc after columns: ${loc}`) - } - let decodeResult: [unknown, number] | null - const rows: unknown[][] = [] - // an incomplete row from the previous chunk; continue from the known column index - if (lastColumnIdx !== undefined) { - // console.log('incomplete idx:', lastColumnIdx) - for (let i = lastColumnIdx; i < columns.decoders.length; i++) { - // FIXME - handle null properly; currently assuming that the second chunk will be enough (but it maybe not be) - decodeResult = columns.decoders[i](src, loc) - if (decodeResult === null) { - callback(new Error('Not enough data to decode the row')) - return - } else { - // console.log( - // `Decoded incomplete column ${columns.names[i]} at loc ${loc} with result ${decodeResult}` - // ) - row[i] = decodeResult[0] - loc = decodeResult[1] - } - } - // console.log('incomplete push:', row) - rows.push(row) - lastColumnIdx = undefined - } - // done with the previous incomplete rows; processing the rows as normal - // console.log('loc and src len', loc, src.length) - while (loc <= src.length) { - row = new Array(columns.names.length) - for (let i = 0; i < columns.decoders.length; i++) { - decodeResult = columns.decoders[i](src, loc) - // console.log( - // `Decoded column ${columns.names[i]} at loc ${loc} with result ${decodeResult}` - // ) - // maybe not enough data to finish the row - if (decodeResult === null) { - // console.log( - // `Decode result is null for column ${columns?.names[i]}` - // ) - // keep the remaining data to add to the next chunk - incompleteChunk = src.subarray(loc) - loc = 0 - lastColumnIdx = i - if (rows.length > 0) { - this.push(rows) - } - callback() - return - } else { - if (String(decodeResult[0]).length > 100) { - throw new Error('foo') - } - // decoded a value - row[i] = decodeResult[0] - loc = decodeResult[1] - if (loc > src.length) { - loc = loc - src.length - incompleteChunk = src.subarray(loc) - // if there are more columns to decode, keep the index - if (i < columns.decoders.length - 1) { - lastColumnIdx = i - } else { - rows.push(row) - } - if (rows.length > 0) { - this.push(rows) - } - callback() - return - } - } - } - // console.log('complete push, maybe there is more:', row) - rows.push(row) - } - if (rows.length > 0) { - this.push(rows) - } - callback() - }, - autoDestroy: true, - objectMode: true, - }) - - return Stream.pipeline(this._stream, toRows, function pipelineCb(err) { - if (err) { - // FIXME: use logger instead - // eslint-disable-next-line no-console - console.error(err) - } - }) - } - - close() { - this._stream.destroy() - } -} - const streamAlreadyConsumedMessage = 'Stream has been already consumed' diff --git a/packages/client-node/src/row_binary_result_set.ts b/packages/client-node/src/row_binary_result_set.ts new file mode 100644 index 00000000..a5145697 --- /dev/null +++ b/packages/client-node/src/row_binary_result_set.ts @@ -0,0 +1,146 @@ +import type { BaseResultSet, DataFormat } from '@clickhouse/client-common' +import type { DecodedColumns } from '@clickhouse/client-common/src/data_formatter' +import { RowBinaryColumns } from '@clickhouse/client-common/src/data_formatter' +import { Buffer } from 'buffer' +import Stream, { Transform, type TransformCallback } from 'stream' + +export class RowBinaryResultSet implements BaseResultSet { + constructor( + private _stream: Stream.Readable, + private readonly format: DataFormat, + public readonly query_id: string + ) {} + + async text(): Promise { + throw new Error( + `Can't call 'text()' on RowBinary result set; please use 'stream' instead` + ) + } + + async json(): Promise { + throw new Error( + `Can't call 'json()' on RowBinary result set; please use 'stream' instead` + ) + } + + async get(): Promise { + if (this.format !== 'RowBinary') { + throw new Error( + `Can't use RowBinaryResultSet if the format is not RowBinary` + ) + } + const result: unknown[][] = [] + await new Promise((resolve, reject) => { + this.stream() + .on('data', (rows: unknown[][]) => { + for (let i = 0; i < rows.length; i++) { + result.push(rows[i]) + } + }) + .on('end', resolve) + .on('error', reject) + }) + return result + } + + stream(): Stream.Readable { + // If the underlying stream has already ended, + // Stream.pipeline will create a new empty stream, + // but without "readableEnded" flag set to true + if (this._stream.readableEnded) { + throw Error('Stream has been already consumed') + } + if (this.format !== 'RowBinary') { + throw Error(`Format ${this.format} is not RowBinary`) + } + + let columns: DecodedColumns[0] | undefined + let incompleteChunk: Uint8Array | undefined + let columnIndex = 0 + const rowsToPush: unknown[][] = [] + + const toRows = new Transform({ + transform( + chunk: Buffer, + _encoding: BufferEncoding, + callback: TransformCallback + ) { + //console.log(`transform call, chunk length: ${chunk.length}`) + let src: Buffer + if (incompleteChunk !== undefined) { + src = Buffer.concat([incompleteChunk, chunk.subarray()]) + incompleteChunk = undefined + } else { + src = chunk.subarray() + } + + let loc = 0 + if (columns === undefined) { + const res = RowBinaryColumns.decode(src) + if ('error' in res) { + return callback(new Error(res.error)) + } + columns = res[0] + loc = res[1] + } + + while (loc < src.length) { + const row = new Array(columns.names.length) + while (columnIndex < columns.names.length) { + const decodeResult = columns.decoders[columnIndex](src, loc) + //console.log(decodeResult, loc, src.length, columns?.names[columnIndex], columns?.types[columnIndex]) + // not enough data to finish the row - null indicates that + if (decodeResult === null) { + // will be added to the beginning of the next received chunk + incompleteChunk = src.subarray(loc) + if (rowsToPush.length > 0) { + this.push(rowsToPush) + rowsToPush.length = 0 + } + return callback() + } else { + // decoded a value + row[columnIndex] = decodeResult[0] + loc = decodeResult[1] + columnIndex++ + } + } + rowsToPush.push(row) + columnIndex = 0 + } + + if (loc > src.length) { + incompleteChunk = src.subarray(loc - src.length) + } + + if (rowsToPush.length > 0) { + this.push(rowsToPush) + rowsToPush.length = 0 + } + + return callback() + }, + final(callback: TransformCallback) { + if (rowsToPush.length > 0) { + this.push(rowsToPush) + rowsToPush.length = 0 + } + return callback() + }, + autoDestroy: true, + objectMode: true, + }) + + return Stream.pipeline(this._stream, toRows, function pipelineCb(err) { + if (err) { + // FIXME: use logger instead + // eslint-disable-next-line no-console + console.error(err) + } + }) + } + + close() { + this._stream.destroy() + } +} From 26115d2bc2e9f126279b53cec8b020610e8b9388 Mon Sep 17 00:00:00 2001 From: slvrtrn Date: Wed, 6 Mar 2024 19:11:48 +0100 Subject: [PATCH 04/14] Floats, adjust columns decoding. --- .docker/clickhouse/single_node_tls/Dockerfile | 2 +- benchmarks/leaks/row_binary.ts | 63 ++++++--- docker-compose.cluster.yml | 4 +- docker-compose.yml | 2 +- .../unit/row_binary_decoders.test.ts | 29 ++--- .../unit/row_binary_read_bytes.test.ts | 22 ++++ .../src/data_formatter/row_binary/columns.ts | 123 ++++++++++++++++++ .../src/data_formatter/row_binary/index.ts | 3 +- .../data_formatter/row_binary/read_bytes.ts | 13 ++ .../row_binary/{decoder.ts => types.ts} | 98 ++++++-------- .../client-node/src/row_binary_result_set.ts | 12 ++ 11 files changed, 275 insertions(+), 96 deletions(-) create mode 100644 packages/client-common/src/data_formatter/row_binary/columns.ts rename packages/client-common/src/data_formatter/row_binary/{decoder.ts => types.ts} (73%) diff --git a/.docker/clickhouse/single_node_tls/Dockerfile b/.docker/clickhouse/single_node_tls/Dockerfile index 983e2112..d9ca8884 100644 --- a/.docker/clickhouse/single_node_tls/Dockerfile +++ b/.docker/clickhouse/single_node_tls/Dockerfile @@ -1,4 +1,4 @@ -FROM clickhouse/clickhouse-server:23.11-alpine +FROM clickhouse/clickhouse-server:24.2-alpine COPY .docker/clickhouse/single_node_tls/certificates /etc/clickhouse-server/certs RUN chown clickhouse:clickhouse -R /etc/clickhouse-server/certs \ && chmod 600 /etc/clickhouse-server/certs/* \ diff --git a/benchmarks/leaks/row_binary.ts b/benchmarks/leaks/row_binary.ts index 1c03d8ee..b069fcbf 100644 --- a/benchmarks/leaks/row_binary.ts +++ b/benchmarks/leaks/row_binary.ts @@ -33,36 +33,58 @@ LIMIT 5000000 */ -const query = `SELECT * FROM fluff ORDER BY id ASC LIMIT 1000000` +const limit = 50000 +const query = `SELECT * FROM fluff ORDER BY id ASC LIMIT 5` +// const query = `SELECT * FROM large_strings ORDER BY id ASC LIMIT ${limit}` +// const query = `SELECT * EXCEPT (i128, i256, u128, u256) FROM fluff ORDER BY id ASC LIMIT ${limit}` void (async () => { const client = createClient({ url: 'http://localhost:8123', }) - async function benchmarkJSONEachRow() { + async function benchmarkJSON(format: 'JSONEachRow' | 'JSONCompactEachRow') { const start = +new Date() const rs = await client.query({ query, - format: 'JSONCompactEachRow', + format, }) - const values = [] + let total = 0 await new Promise((resolve, reject) => { rs.stream() .on('data', (rows: Row[]) => { rows.forEach((row) => { - values.push(row.json()) + console.log(row.json()) + total++ }) }) .on('end', resolve) .on('error', reject) }) - console.log( - `JSONCompactEachRow elapsed: ${+new Date() - start} ms, total: ${ - values.length - }` - ) - return values.length + console.log(`${format} elapsed: ${+new Date() - start} ms, total: ${total}`) + return total + } + + async function benchmarkCSV() { + const start = +new Date() + const rs = await client.query({ + query, + format: 'CSV', + }) + let total = 0 + await new Promise((resolve, reject) => { + rs.stream() + .on('data', (rows: Row[]) => { + rows.forEach((row) => { + row.text.split(',') + total++ + }) + }) + .on('end', resolve) + .on('error', reject) + }) + console.log(`CSV elapsed: ${+new Date() - start} ms, total: ${total}`) + return total } async function benchmarkRowBinary() { @@ -71,29 +93,30 @@ void (async () => { query, format: 'RowBinary', }) - const values: unknown[][] = [] + let total = 0 await new Promise((resolve, reject) => { ;(rs as RowBinaryResultSet) .stream() .on('data', (rows: unknown[][]) => { rows.forEach((row) => { - values.push(row) + total++ + // if (total === limit) { + console.log(`Last row`, row) + // } }) }) .on('end', resolve) .on('error', reject) }) - console.log( - `RowBinary elapsed: ${+new Date() - start} ms, total: ${values.length}` - ) - return values.length + console.log(`RowBinary elapsed: ${+new Date() - start} ms, total: ${total}`) + return total } attachExceptionHandlers() - for (let i = 0; i < 10; i++) { - await benchmarkJSONEachRow() + for (let i = 0; i < 3; i++) { + await benchmarkJSON('JSONCompactEachRow') + // await benchmarkCSV() await benchmarkRowBinary() } - process.exit(0) })() diff --git a/docker-compose.cluster.yml b/docker-compose.cluster.yml index a6babd00..dc70c5c5 100644 --- a/docker-compose.cluster.yml +++ b/docker-compose.cluster.yml @@ -2,7 +2,7 @@ version: '2.3' services: clickhouse1: - image: 'clickhouse/clickhouse-server:${CLICKHOUSE_VERSION-23.11-alpine}' + image: 'clickhouse/clickhouse-server:${CLICKHOUSE_VERSION-24.2-alpine}' ulimits: nofile: soft: 262144 @@ -19,7 +19,7 @@ services: - './.docker/clickhouse/users.xml:/etc/clickhouse-server/users.xml' clickhouse2: - image: 'clickhouse/clickhouse-server:${CLICKHOUSE_VERSION-23.11-alpine}' + image: 'clickhouse/clickhouse-server:${CLICKHOUSE_VERSION-24.2-alpine}' ulimits: nofile: soft: 262144 diff --git a/docker-compose.yml b/docker-compose.yml index e08376b9..c3776c59 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,7 +1,7 @@ version: '3.8' services: clickhouse: - image: 'clickhouse/clickhouse-server:${CLICKHOUSE_VERSION-23.11-alpine}' + image: 'clickhouse/clickhouse-server:${CLICKHOUSE_VERSION-24.2-alpine}' container_name: 'clickhouse-js-clickhouse-server' ports: - '8123:8123' diff --git a/packages/client-common/__tests__/unit/row_binary_decoders.test.ts b/packages/client-common/__tests__/unit/row_binary_decoders.test.ts index e01bf260..7bdf42b2 100644 --- a/packages/client-common/__tests__/unit/row_binary_decoders.test.ts +++ b/packages/client-common/__tests__/unit/row_binary_decoders.test.ts @@ -1,7 +1,4 @@ -import { - removeLowCardinality, - RowBinaryTypesDecoder, -} from '../../src/data_formatter' +import { RowBinaryTypesDecoder } from '../../src/data_formatter' fdescribe('RowBinary decoders', () => { it('should decode Date', () => { @@ -23,16 +20,16 @@ fdescribe('RowBinary decoders', () => { }) }) - it('should remove low cardinality', async () => { - const args: [string, string][] = [ - ['LowCardinality(String)', 'String'], - ['LowCardinality(Nullable(String))', 'Nullable(String)'], - ['LowCardinality(Array(String))', 'Array(String)'], - ['Nullable(String)', 'Nullable(String)'], - ['String', 'String'], - ] - args.forEach(([src, expected]) => { - expect(removeLowCardinality(src)).toEqual(expected) - }) - }) + // it('should remove low cardinality', async () => { + // const args: [string, string][] = [ + // ['LowCardinality(String)', 'String'], + // ['LowCardinality(Nullable(String))', 'Nullable(String)'], + // ['LowCardinality(Array(String))', 'Array(String)'], + // ['Nullable(String)', 'Nullable(String)'], + // ['String', 'String'], + // ] + // args.forEach(([src, expected]) => { + // expect(re(src)).toEqual(expected) + // }) + // }) }) diff --git a/packages/client-common/__tests__/unit/row_binary_read_bytes.test.ts b/packages/client-common/__tests__/unit/row_binary_read_bytes.test.ts index 8303ae3b..41e47232 100644 --- a/packages/client-common/__tests__/unit/row_binary_read_bytes.test.ts +++ b/packages/client-common/__tests__/unit/row_binary_read_bytes.test.ts @@ -1,4 +1,5 @@ import { + readBytesAsFloat32, readBytesAsUnsignedBigInt, readBytesAsUnsignedInt, } from '../../src/data_formatter' @@ -90,6 +91,27 @@ fdescribe('RowBinary read bytes', () => { }) }) + fdescribe('Floats', () => { + it('should decode Float32', async () => { + const args: [Uint8Array, number][] = [ + [new Uint8Array([0x00, 0x00, 0x00, 0x00]), 0], + // some reference values from a random dataset (not 100% matching the CH output, because floats) + [new Uint8Array([151, 136, 46, 6]), 3.2826113095459874e-35], + [new Uint8Array([176, 183, 118, 153]), -1.2754997313209913e-23], + [new Uint8Array([114, 233, 40, 161]), -5.72295763540352e-19], + [new Uint8Array([112, 205, 62, 233]), -1.4416628555694005e25], + [new Uint8Array([43, 253, 113, 82]), 259833643008], + [new Uint8Array([165, 173, 250, 112]), 6.206494065007942e29], + [new Uint8Array([175, 228, 124, 108]), 1.2229169371247749e27], + ] + args.forEach(([src, expected]) => { + expect(readBytesAsFloat32(src, 0)) + .withContext(ctx(src, expected)) + .toBe(expected) + }) + }) + }) + function ctx(src: Uint8Array, expected: number | bigint) { return `Expected ${src.toString()} to be decoded as ${expected}` } diff --git a/packages/client-common/src/data_formatter/row_binary/columns.ts b/packages/client-common/src/data_formatter/row_binary/columns.ts new file mode 100644 index 00000000..0656ac5f --- /dev/null +++ b/packages/client-common/src/data_formatter/row_binary/columns.ts @@ -0,0 +1,123 @@ +import type { DecodeResult } from './read_bytes' +import { readBytesAsUnsignedLEB128 } from './read_bytes' +import type { + ColumnType, + DecodedColumnType, + DecodeError, + TypeDecoder, +} from './types' +import { RowBinaryColumnTypeToDecoder, RowBinaryTypesDecoder } from './types' + +export type DecodedColumns = DecodeResult<{ + names: string[] + types: DecodedColumnType[] + decoders: TypeDecoder[] +}> + +export const RowBinaryColumns = { + decode: (src: Uint8Array): DecodedColumns | DecodeError => { + const res = readBytesAsUnsignedLEB128(src, 0) + if (res === null) { + return { error: 'Not enough data to decode the number of columns' } + } + const numColumns = res[0] + let nextLoc = res[1] + const names = new Array(numColumns) + const types = new Array(numColumns) + const decoders: TypeDecoder[] = new Array(numColumns) + for (let i = 0; i < numColumns; i++) { + const res = RowBinaryTypesDecoder.string(src, nextLoc) + if (res === null) { + return { error: `Not enough data to decode column ${i} name` } + } + nextLoc = res[1] + names[i] = res[0] + } + for (let i = 0; i < numColumns; i++) { + const res = RowBinaryTypesDecoder.string(src, nextLoc) + if (res === null) { + return { error: `Not enough data to decode column ${i} type` } + } + nextLoc = res[1] + const decodedColumn = decodeColumnType(res[0]) + if (!(decodedColumn.columnType in RowBinaryColumnTypeToDecoder)) { + return { + error: `No matching type decoder for client type in ${decodedColumn}`, + } + } + const columnType = decodedColumn.columnType as ColumnType + const typeDecoder = RowBinaryColumnTypeToDecoder[columnType] + decoders[i] = decodedColumn.isNullable + ? RowBinaryTypesDecoder.nullable(typeDecoder) + : typeDecoder + types[i] = { + ...decodedColumn, + columnType, + } + } + // console.log(`Decoded columns: ${names}, ${types}`) + return [{ names, types, decoders }, nextLoc] + }, +} + +type DecodeColumnSimpleType = { + type: 'Simple' + // from ClickHouse as is + dbType: string + // without LowCardinality and Nullable + columnType: string + isNullable: boolean + isLowCardinality: boolean +} +type DecodeColumnArrayType = { + type: 'Array' + innerType: + | DecodeColumnSimpleType + | DecodeColumnArrayType + | DecodeColumnMapType +} +type DecodeColumnMapType = { + type: 'Map' + keyType: DecodeColumnSimpleType + valueType: + | DecodeColumnSimpleType + | DecodeColumnArrayType + | DecodeColumnMapType +} +type DecodeColumnTypeResult = + | DecodeColumnSimpleType + | DecodeColumnArrayType + | DecodeColumnMapType + +export function decodeColumnType(dbType: string): { + // from ClickHouse as is + dbType: string + // without LowCardinality and Nullable + columnType: string + isNullable: boolean + isLowCardinality: boolean + type: 'Simple' +} { + // if (dbType.startsWith('Map(')) { + // dbType = dbType.slice(4, -1) + // + // } + let columnType = dbType + let isNullable = false + let isLowCardinality = false + if (columnType.startsWith('LowCardinality')) { + columnType = columnType.slice(15, -1) + isLowCardinality = true + } + if (columnType.startsWith('Nullable')) { + columnType = columnType.slice(9, -1) + isNullable = true + } + return { + dbType, + columnType, + isNullable, + isLowCardinality, + type: 'Simple', + } +} diff --git a/packages/client-common/src/data_formatter/row_binary/index.ts b/packages/client-common/src/data_formatter/row_binary/index.ts index 571fd4e1..bfca905a 100644 --- a/packages/client-common/src/data_formatter/row_binary/index.ts +++ b/packages/client-common/src/data_formatter/row_binary/index.ts @@ -1,2 +1,3 @@ -export * from './decoder' +export * from './columns' export * from './read_bytes' +export * from './types' diff --git a/packages/client-common/src/data_formatter/row_binary/read_bytes.ts b/packages/client-common/src/data_formatter/row_binary/read_bytes.ts index f76f8fab..59e174d7 100644 --- a/packages/client-common/src/data_formatter/row_binary/read_bytes.ts +++ b/packages/client-common/src/data_formatter/row_binary/read_bytes.ts @@ -1,6 +1,7 @@ // Decoded value + the next index to scan from export type DecodeResult = [T, number] +// May return null since we cannot determine how many bytes we need to read in advance export function readBytesAsUnsignedLEB128( src: Uint8Array, loc: number @@ -23,6 +24,8 @@ export function readBytesAsUnsignedLEB128( } } +// FIXME: use DecodeResult | null for all methods and do the validation here +// instead of relying on the caller export function readBytesAsUnsignedInt( src: Uint8Array, loc: number, @@ -46,3 +49,13 @@ export function readBytesAsUnsignedBigInt( } return result } + +export function readBytesAsFloat32(src: Uint8Array, loc: number) { + // FIXME: maybe can be optimized without DataView + return new DataView(src.buffer.slice(loc, loc + 4)).getFloat32(0, true) +} + +export function readBytesAsFloat64(src: Uint8Array, loc: number) { + // FIXME: maybe can be optimized without DataView + return new DataView(src.buffer.slice(loc, loc + 8)).getFloat64(0, true) +} diff --git a/packages/client-common/src/data_formatter/row_binary/decoder.ts b/packages/client-common/src/data_formatter/row_binary/types.ts similarity index 73% rename from packages/client-common/src/data_formatter/row_binary/decoder.ts rename to packages/client-common/src/data_formatter/row_binary/types.ts index 47298ed2..d4a414a1 100644 --- a/packages/client-common/src/data_formatter/row_binary/decoder.ts +++ b/packages/client-common/src/data_formatter/row_binary/types.ts @@ -1,5 +1,7 @@ import type { DecodeResult } from './read_bytes' import { + readBytesAsFloat32, + readBytesAsFloat64, readBytesAsUnsignedBigInt, readBytesAsUnsignedInt, readBytesAsUnsignedLEB128, @@ -19,8 +21,11 @@ export type ColumnType = | 'Int128' | 'UInt256' | 'Int256' + | 'Float32' + | 'Float64' | 'String' | 'Date' + | 'Date32' export type TypeDecoder = ( src: Uint8Array, @@ -28,11 +33,14 @@ export type TypeDecoder = ( ) => DecodeResult | null export type DecodeError = { error: string } -export type DecodedColumns = DecodeResult<{ - names: string[] - types: ColumnType[] - decoders: TypeDecoder[] -}> +export type DecodedColumnType = { + dbType: string + columnType: ColumnType + isNullable: boolean + isLowCardinality: boolean +} + +type DateMapper = (days: number) => T const Int8Overflow = 128 const UInt8Overflow = 256 @@ -116,6 +124,14 @@ export const RowBinaryTypesDecoder = { const x = readBytesAsUnsignedBigInt(src, loc, 32) return [x < Int256Overflow ? x : x - UInt256Overflow, loc + 32] }, + float32: (src: Uint8Array, loc: number): DecodeResult | null => { + if (src.length < loc + 4) return null + return [readBytesAsFloat32(src, loc), loc + 4] + }, + float64: (src: Uint8Array, loc: number): DecodeResult | null => { + if (src.length < loc + 8) return null + return [readBytesAsFloat64(src, loc), loc + 8] + }, string: (src: Uint8Array, loc: number): DecodeResult | null => { if (src.length < loc + 1) return null const res = readBytesAsUnsignedLEB128(src, loc) @@ -130,11 +146,25 @@ export const RowBinaryTypesDecoder = { ] }, date: (src: Uint8Array, loc: number): DecodeResult | null => { - if (src.length < loc + 2) return null - const days = readBytesAsUnsignedInt(src, loc, 2) - const date = new Date(days * DayMillis) - return [date, loc + 2] - }, + const res = RowBinaryTypesDecoder.uint16(src, loc) + if (res === null) return null + return [new Date(res[0] * DayMillis), res[1]] + }, + date32: (src: Uint8Array, loc: number): DecodeResult | null => { + const res = RowBinaryTypesDecoder.int32(src, loc) + if (res === null) return null + return [new Date(res[0] * DayMillis), res[1]] + }, + nullable: + (baseTypeDecoder: TypeDecoder) => + (src: Uint8Array, loc: number): DecodeResult | null => { + const res = RowBinaryTypesDecoder.uint8(src, loc) + if (res === null) return null + if (res[0] === 1) { + return [null, res[1]] + } + return baseTypeDecoder(src, res[1]) + }, } export const RowBinaryColumnTypeToDecoder: { @@ -153,51 +183,9 @@ export const RowBinaryColumnTypeToDecoder: { Int128: RowBinaryTypesDecoder.int128, UInt256: RowBinaryTypesDecoder.uint256, Int256: RowBinaryTypesDecoder.int256, + Float32: RowBinaryTypesDecoder.float32, + Float64: RowBinaryTypesDecoder.float64, String: RowBinaryTypesDecoder.string, Date: RowBinaryTypesDecoder.date, -} - -export const RowBinaryColumns = { - decode: (src: Uint8Array): DecodedColumns | DecodeError => { - const res = readBytesAsUnsignedLEB128(src, 0) - if (res === null) { - return { error: 'Not enough data to decode the number of columns' } - } - const numColumns = res[0] - let nextLoc = res[1] - const names = new Array(numColumns) - const types = new Array(numColumns) - const decoders: TypeDecoder[] = new Array(numColumns) - for (let i = 0; i < numColumns; i++) { - const res = RowBinaryTypesDecoder.string(src, nextLoc) - if (res === null) { - return { error: `Not enough data to decode column ${i} name` } - } - nextLoc = res[1] - names[i] = res[0] - } - for (let i = 0; i < numColumns; i++) { - const res = RowBinaryTypesDecoder.string(src, nextLoc) - if (res === null) { - return { error: `Not enough data to decode column ${i} type` } - } - nextLoc = res[1] - const colType = removeLowCardinality(res[0]) - decoders[i] = RowBinaryColumnTypeToDecoder[colType] - if (decoders[i] === undefined) { - return { - error: `Unknown column type ${res[0]} (normalized: ${colType})`, - } - } - types[i] = colType - } - return [{ names, types, decoders }, nextLoc] - }, -} - -export function removeLowCardinality(colType: string): ColumnType { - if (colType.startsWith('LowCardinality')) { - return colType.slice(15, -1) as ColumnType - } - return colType as ColumnType + Date32: RowBinaryTypesDecoder.date32, } diff --git a/packages/client-node/src/row_binary_result_set.ts b/packages/client-node/src/row_binary_result_set.ts index a5145697..ad747d1b 100644 --- a/packages/client-node/src/row_binary_result_set.ts +++ b/packages/client-node/src/row_binary_result_set.ts @@ -4,6 +4,18 @@ import { RowBinaryColumns } from '@clickhouse/client-common/src/data_formatter' import { Buffer } from 'buffer' import Stream, { Transform, type TransformCallback } from 'stream' +// draft; currently unused. +export interface RowBinaryMappers { + date?: (daysSinceEpochUInt16: number) => T + date32?: (daysSinceEpochInt32: number) => T + datetime?: (secondsSinceEpochUInt32: number, timezone?: string) => T + datetime64?: (seconds: bigint, nanos: number, timezone?: string) => T + decimal?: (whole: number | bigint, fractional: number | bigint) => T +} +export interface RowBinaryResultSetOptions { + mappers?: RowBinaryMappers +} + export class RowBinaryResultSet implements BaseResultSet { constructor( private _stream: Stream.Readable, From 709a447d3dc763a9305ed1d7b599dacff1a97707 Mon Sep 17 00:00:00 2001 From: slvrtrn Date: Fri, 22 Mar 2024 05:29:29 +0100 Subject: [PATCH 05/14] WIP --- benchmarks/leaks/strings.ts | 0 benchmarks/parsing/strings.ts | 41 +++ .../src/data_formatter/row_binary/columns.ts | 123 --------- .../row_binary/columns_header.ts | 207 +++++++++++++++ .../row_binary/columns_parser.ts | 248 ++++++++++++++++++ .../src/data_formatter/row_binary/errors.ts | 32 +++ .../src/data_formatter/row_binary/index.ts | 5 +- .../src/data_formatter/row_binary/mappers.ts | 17 ++ .../src/data_formatter/row_binary/types.ts | 246 +++++++++++++---- .../row_binary/types_data_view.ts | 246 +++++++++++++++++ .../client-node/src/row_binary_result_set.ts | 158 +++++++++-- 11 files changed, 1124 insertions(+), 199 deletions(-) create mode 100644 benchmarks/leaks/strings.ts create mode 100644 benchmarks/parsing/strings.ts delete mode 100644 packages/client-common/src/data_formatter/row_binary/columns.ts create mode 100644 packages/client-common/src/data_formatter/row_binary/columns_header.ts create mode 100644 packages/client-common/src/data_formatter/row_binary/columns_parser.ts create mode 100644 packages/client-common/src/data_formatter/row_binary/errors.ts create mode 100644 packages/client-common/src/data_formatter/row_binary/mappers.ts create mode 100644 packages/client-common/src/data_formatter/row_binary/types_data_view.ts diff --git a/benchmarks/leaks/strings.ts b/benchmarks/leaks/strings.ts new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/parsing/strings.ts b/benchmarks/parsing/strings.ts new file mode 100644 index 00000000..8011dd06 --- /dev/null +++ b/benchmarks/parsing/strings.ts @@ -0,0 +1,41 @@ +/* +From https://stackoverflow.com/a/37037034/4575540 + +Checked with Node.js 20 - no difference. + +strConcat +128888890 - 238ms +strTemplate +128888890 - 235ms +strConcat +128888890 - 234ms +strTemplate +128888890 - 228ms + + */ +void (() => { + function strConcat(i: number) { + return 'abc' + i + 'def' + } + + function strTemplate(i: number) { + return `abc${i}def` + } + + function run(strategy: (i: number) => string) { + const before = new Date().getTime() + let len = 0 + for (let i = 0; i < 10000000; i += 1) { + len += strategy(i).length + } + console.log(len + ' - ' + (new Date().getTime() - before) + 'ms') + } + + for (let i = 0; i < 10; i++) { + console.log('strConcat') + run(strConcat) + + console.log('strTemplate') + run(strTemplate) + } +})() diff --git a/packages/client-common/src/data_formatter/row_binary/columns.ts b/packages/client-common/src/data_formatter/row_binary/columns.ts deleted file mode 100644 index 0656ac5f..00000000 --- a/packages/client-common/src/data_formatter/row_binary/columns.ts +++ /dev/null @@ -1,123 +0,0 @@ -import type { DecodeResult } from './read_bytes' -import { readBytesAsUnsignedLEB128 } from './read_bytes' -import type { - ColumnType, - DecodedColumnType, - DecodeError, - TypeDecoder, -} from './types' -import { RowBinaryColumnTypeToDecoder, RowBinaryTypesDecoder } from './types' - -export type DecodedColumns = DecodeResult<{ - names: string[] - types: DecodedColumnType[] - decoders: TypeDecoder[] -}> - -export const RowBinaryColumns = { - decode: (src: Uint8Array): DecodedColumns | DecodeError => { - const res = readBytesAsUnsignedLEB128(src, 0) - if (res === null) { - return { error: 'Not enough data to decode the number of columns' } - } - const numColumns = res[0] - let nextLoc = res[1] - const names = new Array(numColumns) - const types = new Array(numColumns) - const decoders: TypeDecoder[] = new Array(numColumns) - for (let i = 0; i < numColumns; i++) { - const res = RowBinaryTypesDecoder.string(src, nextLoc) - if (res === null) { - return { error: `Not enough data to decode column ${i} name` } - } - nextLoc = res[1] - names[i] = res[0] - } - for (let i = 0; i < numColumns; i++) { - const res = RowBinaryTypesDecoder.string(src, nextLoc) - if (res === null) { - return { error: `Not enough data to decode column ${i} type` } - } - nextLoc = res[1] - const decodedColumn = decodeColumnType(res[0]) - if (!(decodedColumn.columnType in RowBinaryColumnTypeToDecoder)) { - return { - error: `No matching type decoder for client type in ${decodedColumn}`, - } - } - const columnType = decodedColumn.columnType as ColumnType - const typeDecoder = RowBinaryColumnTypeToDecoder[columnType] - decoders[i] = decodedColumn.isNullable - ? RowBinaryTypesDecoder.nullable(typeDecoder) - : typeDecoder - types[i] = { - ...decodedColumn, - columnType, - } - } - // console.log(`Decoded columns: ${names}, ${types}`) - return [{ names, types, decoders }, nextLoc] - }, -} - -type DecodeColumnSimpleType = { - type: 'Simple' - // from ClickHouse as is - dbType: string - // without LowCardinality and Nullable - columnType: string - isNullable: boolean - isLowCardinality: boolean -} -type DecodeColumnArrayType = { - type: 'Array' - innerType: - | DecodeColumnSimpleType - | DecodeColumnArrayType - | DecodeColumnMapType -} -type DecodeColumnMapType = { - type: 'Map' - keyType: DecodeColumnSimpleType - valueType: - | DecodeColumnSimpleType - | DecodeColumnArrayType - | DecodeColumnMapType -} -type DecodeColumnTypeResult = - | DecodeColumnSimpleType - | DecodeColumnArrayType - | DecodeColumnMapType - -export function decodeColumnType(dbType: string): { - // from ClickHouse as is - dbType: string - // without LowCardinality and Nullable - columnType: string - isNullable: boolean - isLowCardinality: boolean - type: 'Simple' -} { - // if (dbType.startsWith('Map(')) { - // dbType = dbType.slice(4, -1) - // - // } - let columnType = dbType - let isNullable = false - let isLowCardinality = false - if (columnType.startsWith('LowCardinality')) { - columnType = columnType.slice(15, -1) - isLowCardinality = true - } - if (columnType.startsWith('Nullable')) { - columnType = columnType.slice(9, -1) - isNullable = true - } - return { - dbType, - columnType, - isNullable, - isLowCardinality, - type: 'Simple', - } -} diff --git a/packages/client-common/src/data_formatter/row_binary/columns_header.ts b/packages/client-common/src/data_formatter/row_binary/columns_header.ts new file mode 100644 index 00000000..f416ad8c --- /dev/null +++ b/packages/client-common/src/data_formatter/row_binary/columns_header.ts @@ -0,0 +1,207 @@ +import type { DecimalParams, ParsedColumnType } from './columns_parser' +import { RowBinaryColumnTypesParser } from './columns_parser' +import { ClickHouseRowBinaryError } from './errors' +import type { DecodeResult } from './read_bytes' +import { readBytesAsUnsignedLEB128 } from './read_bytes' +import { + RowBinarySimpleDecoders, + RowBinaryTypesDecoder, + SimpleTypeDecoder, + TypeDecoder, +} from './types' + +export type DecodedColumns = DecodeResult<{ + names: string[] + types: ParsedColumnType[] + decoders: SimpleTypeDecoder[] +}> + +/** @throws ClickHouseRowBinaryError */ +export class RowBinaryColumnsHeader { + static decode(src: Uint8Array): DecodedColumns { + const res = readBytesAsUnsignedLEB128(src, 0) + if (res === null) { + throw ClickHouseRowBinaryError.headerDecodingError( + 'Not enough data to decode number of columns', + {} + ) + } + const numColumns = res[0] + let nextLoc = res[1] + const names = new Array(numColumns) + const types = new Array(numColumns) + const decoders = new Array(numColumns) + for (let i = 0; i < numColumns; i++) { + const res = RowBinaryTypesDecoder.string(src, nextLoc) + if (res === null) { + throw ClickHouseRowBinaryError.headerDecodingError( + `Not enough data to decode column name`, + { i, names, numColumns, nextLoc } + ) + } + nextLoc = res[1] + names[i] = res[0] + } + for (let i = 0; i < numColumns; i++) { + const res = RowBinaryTypesDecoder.string(src, nextLoc) + if (res === null) { + throw ClickHouseRowBinaryError.headerDecodingError( + `Not enough data to decode column type`, + { i, names, types, numColumns, nextLoc } + ) + } + nextLoc = res[1] + const col = RowBinaryColumnTypesParser.parseColumnType(res[0]) + types[i] = col + let valueDecoder: TypeDecoder + switch (col.type) { + case 'Simple': + decoders[i] = RowBinarySimpleDecoders[col.columnType] + break + case 'Decimal': + decoders[i] = getDecimalDecoder(col.params) + break + case 'Array': + if (col.valueType === 'Decimal') { + valueDecoder = getDecimalDecoder(col.decimalParams) + } else { + valueDecoder = RowBinarySimpleDecoders[col.valueType] + } + decoders[i] = RowBinaryTypesDecoder.array( + col.valueNullable + ? RowBinaryTypesDecoder.nullable(valueDecoder) + : valueDecoder, + col.dimensions + ) + break + case 'Nullable': + if (col.valueType === 'Decimal') { + valueDecoder = getDecimalDecoder(col.decimalParams) + } else { + valueDecoder = RowBinarySimpleDecoders[col.valueType] + } + decoders[i] = RowBinaryTypesDecoder.nullable(valueDecoder) + break + default: + throw ClickHouseRowBinaryError.headerDecodingError( + 'Unsupported column type', + { col } + ) + } + } + // console.log(`Decoded columns:`, names, types) + return [{ names, types, decoders }, nextLoc] + } +} + +function getDecimalDecoder(decimalParams: DecimalParams): SimpleTypeDecoder { + const intSize = decimalParams.intSize + if (intSize === 32) { + return RowBinaryTypesDecoder.decimal32(decimalParams.scale) + } + if (intSize === 64) { + return RowBinaryTypesDecoder.decimal64(decimalParams.scale) + } + // for tests only (128 and 256 support is there) + throw new Error(`Unsupported Decimal size: ${intSize}`) +} +// +// export class RowBinaryColumnsHeaderDataView { +// static decode(src: Uint8Array): DecodeResult<{ +// names: string[] +// types: ParsedColumnType[] +// decoders: SimpleTypeDecoderDataView[] +// }> +// { +// const res = readBytesAsUnsignedLEB128(src, 0) +// if (res === null) { +// throw ClickHouseRowBinaryError.headerDecodingError( +// 'Not enough data to decode number of columns', +// {} +// ) +// } +// const numColumns = res[0] +// let nextLoc = res[1] +// const names = new Array(numColumns) +// const types = new Array(numColumns) +// const decoders = new Array(numColumns) +// for (let i = 0; i < numColumns; i++) { +// const res = RowBinaryTypesDecoder.string(src, nextLoc) +// if (res === null) { +// throw ClickHouseRowBinaryError.headerDecodingError( +// `Not enough data to decode column name`, +// { i, names, numColumns, nextLoc } +// ) +// } +// nextLoc = res[1] +// names[i] = res[0] +// } +// for (let i = 0; i < numColumns; i++) { +// const res = RowBinaryTypesDecoder.string(src, nextLoc) +// if (res === null) { +// throw ClickHouseRowBinaryError.headerDecodingError( +// `Not enough data to decode column type`, +// { i, names, types, numColumns, nextLoc } +// ) +// } +// nextLoc = res[1] +// const col = RowBinaryColumnTypesParser.parseColumnType(res[0]) +// types[i] = col +// let valueDecoder: SimpleTypeDecoderDataView +// switch (col.type) { +// case 'Simple': +// decoders[i] = +// RowBinarySimpleDecodersDataView[ +// col.columnType as keyof RowBinaryTypesDecoderDataView +// ] +// break +// case 'Decimal': +// decoders[i] = RowBinaryTypesDecoderDataView.decimal( +// col.params.precision, +// col.params.scale +// ) +// break +// case 'Array': +// // if (col.valueType === 'Decimal') { +// // valueDecoder = RowBinaryTypesDecoder.decimal( +// // col.decimalParams.precision, +// // col.decimalParams.scale +// // ) +// // } else { +// // valueDecoder = +// // RowBinarySimpleDecodersDataView[ +// // col.valueType as keyof RowBinaryTypesDecoderDataView +// // ] +// // } +// // decoders[i] = RowBinaryTypesDecoderDataView.array( +// // col.valueNullable +// // ? RowBinaryTypesDecoder.nullable(valueDecoder) +// // : valueDecoder, +// // col.dimensions +// // ) +// throw new Error('Array type is not supported yet') +// case 'Nullable': +// if (col.valueType === 'Decimal') { +// valueDecoder = RowBinaryTypesDecoderDataView.decimal( +// col.decimalParams.precision, +// col.decimalParams.scale +// ) +// } else { +// valueDecoder = +// RowBinarySimpleDecodersDataView[ +// col.valueType as keyof RowBinaryTypesDecoderDataView +// ] +// } +// decoders[i] = RowBinaryTypesDecoderDataView.nullable(valueDecoder) +// break +// default: +// throw ClickHouseRowBinaryError.headerDecodingError( +// 'Unsupported column type', +// { col } +// ) +// } +// } +// // console.log(`Decoded columns:`, names, types) +// return [{ names, types, decoders }, nextLoc] +// } +// } diff --git a/packages/client-common/src/data_formatter/row_binary/columns_parser.ts b/packages/client-common/src/data_formatter/row_binary/columns_parser.ts new file mode 100644 index 00000000..ed844e75 --- /dev/null +++ b/packages/client-common/src/data_formatter/row_binary/columns_parser.ts @@ -0,0 +1,248 @@ +import { ClickHouseRowBinaryError } from './errors' +import type { SimpleColumnType } from './types' +import { RowBinarySimpleDecoders } from './types' + +export interface ParsedColumnSimple { + type: 'Simple' + /** Without LowCardinality and Nullable. For example: + * * UInt8 -> UInt8 + * * LowCardinality(Nullable(String)) -> String */ + columnType: SimpleColumnType + dbType: string +} + +export type ParsedColumnNullable = + | { + type: 'Nullable' + /** Used to determine how to decode T from Nullable(T) */ + valueType: SimpleColumnType + dbType: string + } + | { + type: 'Nullable' + valueType: 'Decimal' + decimalParams: DecimalParams + dbType: string + } + +/** Array cannot be Nullable or LowCardinality, but its inner type can be. + * Arrays can be multidimensional, e.g. Array(Array(Array(T))). + * Arrays are allowed to have a Map as the inner type. + */ +export interface DecodedColumnMap { + type: 'Map' + key: ParsedColumnSimple + value: ParsedColumnSimple | ParsedColumnArray | DecodedColumnMap + dbType: string +} + +/** Int size for Decimal depends on the Precision + * * 32 bits for precision < 10 (JS number) + * * 64 bits for precision < 19 (JS BigInt) + * * 128 bits for precision < 39 (JS BigInt) + * * 256 bits for precision >= 39 (JS BigInt) + */ +export interface DecimalParams { + precision: number + scale: number + intSize: 32 | 64 | 128 | 256 +} +export interface ParsedColumnDecimal { + type: 'Decimal' + params: DecimalParams + dbType: string +} + +/** Array cannot be Nullable or LowCardinality, but its value type can be. + * Arrays can be multidimensional, e.g. Array(Array(Array(T))). + * Arrays are allowed to have a Map as the value type. + */ +export type ParsedColumnArray = + | { + type: 'Array' + dimensions: number + /** Represents the final value type; nested arrays are handled with {@link ParsedColumnArray.dimensions} */ + valueType: SimpleColumnType + valueNullable: boolean + dbType: string + } + | { + type: 'Array' + dimensions: number + valueType: 'Decimal' + valueNullable: boolean + decimalParams: DecimalParams + dbType: string + } +export type ParsedColumnType = + | ParsedColumnSimple + | ParsedColumnNullable + | ParsedColumnDecimal + | ParsedColumnArray +// | DecodedColumnMap // TODO - add Map support. + +export class RowBinaryColumnTypesParser { + static parseColumnType(dbType: string): ParsedColumnType { + let columnType = dbType + let isNullable = false + if (columnType.startsWith(LowCardinalityPrefix)) { + columnType = columnType.slice(LowCardinalityPrefix.length, -1) + } + if (columnType.startsWith(NullablePrefix)) { + columnType = columnType.slice(NullablePrefix.length, -1) + isNullable = true + } + let result: ParsedColumnType + if (columnType.startsWith(DecimalPrefix)) { + result = { + type: 'Decimal', + params: RowBinaryColumnTypesParser.parseDecimalParams({ + dbType, + columnType, + }), + dbType, + } + } else if (columnType.startsWith(ArrayPrefix)) { + result = RowBinaryColumnTypesParser.parseArrayType({ dbType, columnType }) + } else if (columnType.startsWith(MapPrefix)) { + throw ClickHouseRowBinaryError.headerDecodingError( + 'Map types are not supported yet', + { columnType } + ) + } else { + if (columnType in RowBinarySimpleDecoders) { + result = { + type: 'Simple', + columnType: columnType as SimpleColumnType, + dbType, + } + } else { + throw ClickHouseRowBinaryError.headerDecodingError( + 'Unsupported column type', + { columnType } + ) + } + } + if (isNullable) { + // console.log('Got a nullable:', result) + if (result.type === 'Array') { + throw ClickHouseRowBinaryError.headerDecodingError( + 'Array cannot be Nullable', + { dbType } + ) + } + if (result.type === 'Decimal') { + return { + type: 'Nullable', + valueType: 'Decimal', + decimalParams: result.params, + dbType, + } + } + return { + type: 'Nullable', + valueType: result.columnType, + dbType, + } + } else { + return result + } + } + + static parseDecimalParams({ + columnType, + dbType, + }: ParseColumnTypeParams): DecimalParams { + const split = columnType.slice(DecimalPrefix.length, -1).split(',') + if (split.length !== 2) { + throw ClickHouseRowBinaryError.headerDecodingError( + 'Invalid Decimal type', + { dbType, columnType, split } + ) + } + const params: DecimalParams = { + precision: parseInt(split[0], 10), + scale: parseInt(split[1], 10), + intSize: 32, + } + if (params.precision > 38) { + params.intSize = 256 + } else if (params.precision > 18) { + params.intSize = 128 + } else if (params.precision > 9) { + params.intSize = 64 + } + return params + } + + static parseArrayType({ + columnType, + dbType, + }: ParseColumnTypeParams): ParsedColumnArray { + let dimensions = 0 + while (columnType.length > 0) { + if (columnType.startsWith(ArrayPrefix)) { + columnType.slice(ArrayPrefix.length, -1) // Array(T) -> T + dimensions++ + } else { + break + } + } + if (dimensions === 0) { + throw ClickHouseRowBinaryError.headerDecodingError( + 'Array type without dimensions', + { columnType } + ) + } + if (dimensions > 10) { + throw ClickHouseRowBinaryError.headerDecodingError( + 'Array type with too many dimensions', + { columnType } + ) + } + const valueNullable = columnType.startsWith(NullablePrefix) + if (valueNullable) { + columnType = columnType.slice(NullablePrefix.length, -1) + } + if (columnType.startsWith(DecimalPrefix)) { + const decimalParams = RowBinaryColumnTypesParser.parseDecimalParams({ + dbType, + columnType, + }) + return { + type: 'Array', + valueType: 'Decimal', + valueNullable, + decimalParams, + dimensions, + dbType, + } + } + if (columnType in RowBinarySimpleDecoders) { + return { + type: 'Array', + valueType: columnType as SimpleColumnType, + valueNullable, + dimensions, + dbType, + } + } + throw ClickHouseRowBinaryError.headerDecodingError( + 'Unsupported array value type', + { dbType, columnType } + ) + } +} + +interface ParseColumnTypeParams { + dbType: string + columnType: string +} + +const NullablePrefix = 'Nullable(' as const +const LowCardinalityPrefix = 'LowCardinality(' as const +const DecimalPrefix = 'Decimal(' as const +const ArrayPrefix = 'Array(' as const +const MapPrefix = 'Map(' as const +// const TuplePrefix = 'Tuple(' as const +// const EnumPrefix = 'Enum(' as const diff --git a/packages/client-common/src/data_formatter/row_binary/errors.ts b/packages/client-common/src/data_formatter/row_binary/errors.ts new file mode 100644 index 00000000..99967a72 --- /dev/null +++ b/packages/client-common/src/data_formatter/row_binary/errors.ts @@ -0,0 +1,32 @@ +const HeaderDecodingError = 'HEADER_DECODING_ERROR' + +export class ClickHouseRowBinaryError extends Error { + readonly args: Record + constructor({ message, args }: ClickHouseRowBinaryError) { + super(message) + this.args = args + + // Set the prototype explicitly, see: + // https://github.com/Microsoft/TypeScript/wiki/Breaking-Changes#extending-built-ins-like-error-array-and-map-may-no-longer-work + Object.setPrototypeOf(this, ClickHouseRowBinaryError.prototype) + } + static headerDecodingError( + message: string, + args?: Record + ): ClickHouseRowBinaryError { + return new ClickHouseRowBinaryError({ + name: HeaderDecodingError, + args: args ?? {}, + message, + }) + } + static decoderNotFoundError( + col: Record + ): ClickHouseRowBinaryError { + return new ClickHouseRowBinaryError({ + name: HeaderDecodingError, + message: 'Could find a suitable decoder for this column', + args: { col }, + }) + } +} diff --git a/packages/client-common/src/data_formatter/row_binary/index.ts b/packages/client-common/src/data_formatter/row_binary/index.ts index bfca905a..d8646301 100644 --- a/packages/client-common/src/data_formatter/row_binary/index.ts +++ b/packages/client-common/src/data_formatter/row_binary/index.ts @@ -1,3 +1,6 @@ -export * from './columns' +export * from './columns_header' +export * from './columns_parser' export * from './read_bytes' export * from './types' +export * from './errors' +export * from './mappers' diff --git a/packages/client-common/src/data_formatter/row_binary/mappers.ts b/packages/client-common/src/data_formatter/row_binary/mappers.ts new file mode 100644 index 00000000..3527ba2d --- /dev/null +++ b/packages/client-common/src/data_formatter/row_binary/mappers.ts @@ -0,0 +1,17 @@ +export interface RowBinaryMappers { + date?: (daysSinceEpoch: number) => T + date32?: (daysSinceOrBeforeEpoch: number) => T + datetime?: (secondsSinceEpoch: number, timezone?: string) => T + datetime64?: ( + secondsSinceOrBeforeEpoch: bigint, + nanosOfSecond: number, + timezone?: string + ) => T + /** Decimal types with scale more than 9: Decimal64, Decimal128, Decimal256 */ + decimal?: (whole: bigint, fractional: bigint) => T + /** Decimal types with scale 9 and less */ + decimal32?: (whole: number, fractional: number) => T +} +export interface RowBinaryResultSetOptions { + mappers?: RowBinaryMappers +} diff --git a/packages/client-common/src/data_formatter/row_binary/types.ts b/packages/client-common/src/data_formatter/row_binary/types.ts index d4a414a1..4580d001 100644 --- a/packages/client-common/src/data_formatter/row_binary/types.ts +++ b/packages/client-common/src/data_formatter/row_binary/types.ts @@ -1,5 +1,5 @@ -import type { DecodeResult } from './read_bytes' import { + DecodeResult, readBytesAsFloat32, readBytesAsFloat64, readBytesAsUnsignedBigInt, @@ -7,7 +7,8 @@ import { readBytesAsUnsignedLEB128, } from './read_bytes' -export type ColumnType = +export type SimpleColumnType = + /** {@link SimpleTypeDecoder} */ | 'Bool' | 'UInt8' | 'Int8' @@ -26,21 +27,43 @@ export type ColumnType = | 'String' | 'Date' | 'Date32' +export type ColumnType = + | SimpleColumnType + /** {@link DecimalTypeDecoder} */ + | 'Decimal' + /** {@link ArrayTypeDecoder} */ + | 'Array' -export type TypeDecoder = ( +export type SimpleTypeDecoder = ( src: Uint8Array, loc: number ) => DecodeResult | null +export type DecimalTypeDecoder = ( + precision: number, + scale: number +) => SimpleTypeDecoder +export type NullableTypeDecoder = ( + baseTypeDecoder: SimpleTypeDecoder | DecimalTypeDecoder +) => SimpleTypeDecoder +export type ArrayTypeDecoder = ( + innerDecoder: SimpleTypeDecoder, + dimensions: number +) => SimpleTypeDecoder +export type TypeDecoder = + | SimpleTypeDecoder + | DecimalTypeDecoder + | ArrayTypeDecoder -export type DecodeError = { error: string } -export type DecodedColumnType = { - dbType: string - columnType: ColumnType - isNullable: boolean - isLowCardinality: boolean -} +// TBD: nested key type safety? +export type MapTypeDecoder = ( + keyDecoder: SimpleTypeDecoder, + valueDecoder: + | SimpleTypeDecoder + | ArrayTypeDecoder + | MapTypeDecoder +) => SimpleTypeDecoder> -type DateMapper = (days: number) => T +// type DateMapper = (days: number) => T const Int8Overflow = 128 const UInt8Overflow = 256 @@ -62,77 +85,89 @@ const Int256Overflow = const UInt256Overflow = 115792089237316195423570985008687907853269984665640564039457584007913129639936n +// const DecimalScaleMultipliersNumber: Record = {} +// for (let i = 0; i < 10; i++) { +// DecimalScaleMultipliersNumber[i] = 10 ** i +// } +// const DecimalScaleMultipliersBigInt: Record = {} +// for (let i = 0; i < 77; i++) { +// DecimalScaleMultipliersBigInt[i] = BigInt(10 ** i) +// } +// console.log(DecimalScaleMultipliers) + const DayMillis = 24 * 3600 * 1000 const TxtDecoder = new TextDecoder() -export const RowBinaryTypesDecoder = { - bool: (src: Uint8Array, loc: number): DecodeResult | null => { +export class RowBinaryTypesDecoder { + static bool(src: Uint8Array, loc: number): DecodeResult | null { if (src.length < loc + 1) return null return [src[loc] === 1, loc + 1] - }, - uint8: (src: Uint8Array, loc: number): DecodeResult | null => { + } + static uint8(src: Uint8Array, loc: number): DecodeResult | null { if (src.length < loc + 1) return null return [src[loc], loc + 1] - }, - int8: (src: Uint8Array, loc: number): DecodeResult | null => { + } + static int8(src: Uint8Array, loc: number): DecodeResult | null { if (src.length < loc + 1) return null const x = src[loc] return [x < Int8Overflow ? x : x - UInt8Overflow, loc + 1] - }, - uint16: (src: Uint8Array, loc: number): DecodeResult | null => { + } + static uint16(src: Uint8Array, loc: number): DecodeResult | null { if (src.length < loc + 2) return null return [readBytesAsUnsignedInt(src, loc, 2), loc + 2] - }, - int16: (src: Uint8Array, loc: number): DecodeResult | null => { + } + static int16(src: Uint8Array, loc: number): DecodeResult | null { if (src.length < loc + 2) return null const x = readBytesAsUnsignedInt(src, loc, 2) return [x < Int16Overflow ? x : x - UInt16Overflow, loc + 2] - }, - uint32: (src: Uint8Array, loc: number): DecodeResult | null => { + } + static uint32(src: Uint8Array, loc: number): DecodeResult | null { if (src.length < loc + 4) return null return [readBytesAsUnsignedInt(src, loc, 4), loc + 4] - }, - int32: (src: Uint8Array, loc: number): DecodeResult | null => { + } + static int32(src: Uint8Array, loc: number): DecodeResult | null { if (src.length < loc + 4) return null const x = readBytesAsUnsignedInt(src, loc, 4) return [x < Int32Overflow ? x : x - UInt32Overflow, loc + 4] - }, - uint64: (src: Uint8Array, loc: number): DecodeResult | null => { + } + static uint64(src: Uint8Array, loc: number): DecodeResult | null { if (src.length < loc + 8) return null return [readBytesAsUnsignedBigInt(src, loc, 8), loc + 8] - }, - int64: (src: Uint8Array, loc: number): DecodeResult | null => { + } + static int64(src: Uint8Array, loc: number): DecodeResult | null { if (src.length < loc + 8) return null const x = readBytesAsUnsignedBigInt(src, loc, 8) return [x < Int64Overflow ? x : x - UInt64Overflow, loc + 8] - }, - uint128: (src: Uint8Array, loc: number): DecodeResult | null => { + } + static uint128(src: Uint8Array, loc: number): DecodeResult | null { if (src.length < loc + 16) return null return [readBytesAsUnsignedBigInt(src, loc, 16), loc + 16] - }, - int128: (src: Uint8Array, loc: number): DecodeResult | null => { + } + static int128(src: Uint8Array, loc: number): DecodeResult | null { if (src.length < loc + 16) return null const x = readBytesAsUnsignedBigInt(src, loc, 16) return [x < Int128Overflow ? x : x - UInt128Overflow, loc + 16] - }, - uint256: (src: Uint8Array, loc: number): DecodeResult | null => { + } + static uint256(src: Uint8Array, loc: number): DecodeResult | null { if (src.length < loc + 32) return null return [readBytesAsUnsignedBigInt(src, loc, 32), loc + 32] - }, - int256: (src: Uint8Array, loc: number): DecodeResult | null => { + } + static int256(src: Uint8Array, loc: number): DecodeResult | null { if (src.length < loc + 32) return null const x = readBytesAsUnsignedBigInt(src, loc, 32) return [x < Int256Overflow ? x : x - UInt256Overflow, loc + 32] - }, - float32: (src: Uint8Array, loc: number): DecodeResult | null => { + } + static float32(src: Uint8Array, loc: number): DecodeResult | null { if (src.length < loc + 4) return null - return [readBytesAsFloat32(src, loc), loc + 4] - }, - float64: (src: Uint8Array, loc: number): DecodeResult | null => { + const f32 = readBytesAsFloat32(src, loc) + // console.log(f32) + return [f32, loc + 4] + } + static float64(src: Uint8Array, loc: number): DecodeResult | null { if (src.length < loc + 8) return null return [readBytesAsFloat64(src, loc), loc + 8] - }, - string: (src: Uint8Array, loc: number): DecodeResult | null => { + } + static string(src: Uint8Array, loc: number): DecodeResult | null { if (src.length < loc + 1) return null const res = readBytesAsUnsignedLEB128(src, loc) if (res === null) { @@ -141,34 +176,133 @@ export const RowBinaryTypesDecoder = { const [length, nextLoc] = res if (src.length < nextLoc + length) return null return [ - TxtDecoder.decode(src.slice(nextLoc, nextLoc + length)), + TxtDecoder.decode(src.subarray(nextLoc, nextLoc + length)), nextLoc + length, ] - }, - date: (src: Uint8Array, loc: number): DecodeResult | null => { + } + static date(src: Uint8Array, loc: number): DecodeResult | null { const res = RowBinaryTypesDecoder.uint16(src, loc) if (res === null) return null return [new Date(res[0] * DayMillis), res[1]] - }, - date32: (src: Uint8Array, loc: number): DecodeResult | null => { + } + static date32(src: Uint8Array, loc: number): DecodeResult | null { const res = RowBinaryTypesDecoder.int32(src, loc) if (res === null) return null return [new Date(res[0] * DayMillis), res[1]] - }, - nullable: - (baseTypeDecoder: TypeDecoder) => - (src: Uint8Array, loc: number): DecodeResult | null => { + } + static nullable( + baseTypeDecoder: SimpleTypeDecoder + ): (src: Uint8Array, loc: number) => DecodeResult | null { + return (src: Uint8Array, loc: number) => { const res = RowBinaryTypesDecoder.uint8(src, loc) if (res === null) return null if (res[0] === 1) { return [null, res[1]] } return baseTypeDecoder(src, res[1]) - }, + } + } + // static decimal( + // precision: number, + // scale: number + // ): (src: Uint8Array, loc: number) => DecodeResult | null { + // const intSize = getDecimalIntSize(precision) + // let scaleMultiplier: number | bigint + // if (intSize === 32) { + // scaleMultiplier = 10 ** scale + // } else { + // scaleMultiplier = BigInt(10 ** scale) + // } + // // const scaleMultiplier = + // // intSize === 32 + // // ? DecimalScaleMultipliersNumber[scale] + // // : DecimalScaleMultipliersBigInt[scale] + // return (src: Uint8Array, loc: number) => { + // if (intSize === 32) { + // const res = RowBinaryTypesDecoder.int32(src, loc) + // if (res === null) return null + // const whole = ~~(res[0] / (scaleMultiplier as number)) + // const fractional = res[0] % (scaleMultiplier as number) + // return [`${whole.toString(10)}.${fractional.toString(10)}`, res[1]] + // } + // let res: DecodeResult | null + // if (intSize === 64) { + // if (src.length < loc + 8) return null + // const x = readBytesAsUnsignedBigInt(src, loc, 8) + // res = [x < Int64Overflow ? x : x - UInt64Overflow, loc + 8] + // } else if (intSize === 128) { + // res = RowBinaryTypesDecoder.int128(src, loc) + // } else if (intSize === 256) { + // res = RowBinaryTypesDecoder.int256(src, loc) + // } else { + // throw new Error(`Unsupported int size: ${intSize}`) + // } + // if (res === null) return null + // const whole = res[0] / (scaleMultiplier as bigint) + // const fractional = res[0] % (scaleMultiplier as bigint) + // return [`${whole.toString(10)}.${fractional.toString(10)}`, res[1]] + // } + // } + static decimal32( + scale: number + ): (src: Uint8Array, loc: number) => DecodeResult | null { + const scaleMultiplier = 10 ** scale + return (src: Uint8Array, loc: number) => { + const res = RowBinaryTypesDecoder.int32(src, loc) + if (res === null) return null + const whole = ~~(res[0] / (scaleMultiplier as number)) + const fractional = res[0] % (scaleMultiplier as number) + return [`${whole.toString(10)}.${fractional.toString(10)}`, res[1]] + } + } + static decimal64( + scale: number + ): (src: Uint8Array, loc: number) => DecodeResult | null { + return (src: Uint8Array, loc: number) => { + const res = RowBinaryTypesDecoder.int64(src, loc) + if (res === null) return null + // avoid any bigint math here, it's super slow + const str = res[0].toString() + const dotIndex = str.length - scale + const whole = str.slice(0, dotIndex) + const fractional = str.slice(dotIndex) + return [`${whole}.${fractional}`, res[1]] + } + } + static array( + innerDecoder: + | SimpleTypeDecoder + | ReturnType + | ReturnType>, + dimensions = 0 + ): (src: Uint8Array, loc: number) => DecodeResult> | null { + return (src: Uint8Array, loc: number) => { + const leb128 = readBytesAsUnsignedLEB128(src, loc) + if (leb128 === null) return null + const result = new Array(leb128[0]) + if (dimensions === 0) { + for (let i = 0; i < leb128[0]; i++) { + const res = innerDecoder(src, leb128[1]) + if (res === null) return null + result[i] = res[0] + } + } else { + return this.array(innerDecoder, dimensions - 1)(src, leb128[1]) + } + return null + } + } +} + +export function getDecimalIntSize(precision: number): 32 | 128 | 64 | 256 { + if (precision < 10) return 32 + if (precision < 19) return 64 + if (precision < 39) return 128 + return 256 } -export const RowBinaryColumnTypeToDecoder: { - [key in ColumnType]: TypeDecoder +export const RowBinarySimpleDecoders: { + [key in SimpleColumnType]: SimpleTypeDecoder } = { Bool: RowBinaryTypesDecoder.bool, UInt8: RowBinaryTypesDecoder.uint8, diff --git a/packages/client-common/src/data_formatter/row_binary/types_data_view.ts b/packages/client-common/src/data_formatter/row_binary/types_data_view.ts new file mode 100644 index 00000000..61ffd794 --- /dev/null +++ b/packages/client-common/src/data_formatter/row_binary/types_data_view.ts @@ -0,0 +1,246 @@ +// import type { DecodeResult } from './read_bytes' +// import { +// readBytesAsFloat32, +// readBytesAsFloat64, +// readBytesAsUnsignedBigInt, +// readBytesAsUnsignedInt, +// readBytesAsUnsignedLEB128, +// } from './read_bytes' +// import { +// DecimalTypeDecoder, +// getDecimalIntSize, +// NullableTypeDecoder, +// SimpleColumnType, +// SimpleTypeDecoder, +// } from './types' +// +// const Int8Overflow = 128 +// const UInt8Overflow = 256 +// +// const Int16Overflow = 32768 +// const UInt16Overflow = 65536 +// +// const Int32Overflow = 2147483648 +// const UInt32Overflow = 4294967296 +// +// const Int64Overflow = 9223372036854775808n +// const UInt64Overflow = 18446744073709551616n +// +// const Int128Overflow = 170141183460469231731687303715884105728n +// const UInt128Overflow = 340282366920938463463374607431768211456n +// +// const Int256Overflow = +// 57896044618658097711785492504343953926634992332820282019728792003956564819968n +// const UInt256Overflow = +// 115792089237316195423570985008687907853269984665640564039457584007913129639936n +// +// const DayMillis = 24 * 3600 * 1000 +// const TxtDecoder = new TextDecoder() +// +// export type SimpleTypeDecoderDataView = ( +// src: DataView, +// loc: number +// ) => DecodeResult | null +// +// export class RowBinaryTypesDecoderDataView { +// static bool(src: DataView, loc: number): DecodeResult | null { +// if (src.byteLength < loc + 1) return null +// return [src.getUint8(loc) === 1, loc + 1] +// } +// static uint8(src: DataView, loc: number): DecodeResult | null { +// if (src.byteLength < loc + 1) return null +// return [src.getUint8(loc), loc + 1] +// } +// static int8(src: DataView, loc: number): DecodeResult | null { +// if (src.byteLength < loc + 1) return null +// return [src.getInt8(loc), loc + 1] +// } +// static uint16(src: DataView, loc: number): DecodeResult | null { +// if (src.byteLength < loc + 2) return null +// return [src.getUint16(loc), loc + 2] +// } +// static int16(src: DataView, loc: number): DecodeResult | null { +// if (src.byteLength < loc + 2) return null +// return [src.getInt16(loc), loc + 2] +// } +// static uint32(src: DataView, loc: number): DecodeResult | null { +// if (src.byteLength < loc + 4) return null +// return [src.getUint32(loc), loc + 4] +// } +// static int32(src: DataView, loc: number): DecodeResult | null { +// if (src.byteLength < loc + 4) return null +// return [src.getInt32(loc), loc + 4] +// } +// static uint64(src: DataView, loc: number): DecodeResult | null { +// if (src.byteLength < loc + 8) return null +// return [src.getBigInt64(loc), loc + 8] +// } +// static int64(src: DataView, loc: number): DecodeResult | null { +// if (src.byteLength < loc + 8) return null +// const x = src.getBigInt64(loc) +// return [x < Int64Overflow ? x : x - UInt64Overflow, loc + 8] +// } +// // static uint128(src: DataView, loc: number): DecodeResult | null { +// // if (src.byteLength < loc + 16) return null +// // return [readBytesAsUnsignedBigInt(src, loc, 16), loc + 16] +// // } +// // static int128(src: DataView, loc: number): DecodeResult | null { +// // if (src.byteLength < loc + 16) return null +// // const x = readBytesAsUnsignedBigInt(src, loc, 16) +// // return [x < Int128Overflow ? x : x - UInt128Overflow, loc + 16] +// // } +// // static uint256(src: DataView, loc: number): DecodeResult | null { +// // if (src.byteLength < loc + 32) return null +// // return [readBytesAsUnsignedBigInt(src, loc, 32), loc + 32] +// // } +// // static int256(src: DataView, loc: number): DecodeResult | null { +// // if (src.byteLength < loc + 32) return null +// // const x = readBytesAsUnsignedBigInt(src, loc, 32) +// // return [x < Int256Overflow ? x : x - UInt256Overflow, loc + 32] +// // } +// static float32(src: DataView, loc: number): DecodeResult | null { +// if (src.byteLength < loc + 4) return null +// return [src.getFloat32(loc), loc + 4] +// } +// static float64(src: DataView, loc: number): DecodeResult | null { +// if (src.byteLength < loc + 8) return null +// return [src.getFloat64(loc), loc + 8] +// } +// // static string(src: DataView, loc: number): DecodeResult | null { +// // if (src.byteLength < loc + 1) return null +// // const res = readBytesAsUnsignedLEB128(src.buffer, loc) +// // if (res === null) { +// // return null +// // } +// // const [length, nextLoc] = res +// // if (src.byteLength < nextLoc + length) return null +// // return [ +// // TxtDecoder.decode(src.buffer.slice(nextLoc, nextLoc + length)), +// // nextLoc + length, +// // ] +// // } +// static date(src: DataView, loc: number): DecodeResult | null { +// const res = RowBinaryTypesDecoderDataView.uint16(src, loc) +// if (res === null) return null +// return [new Date(res[0] * DayMillis), res[1]] +// } +// +// static date32(src: DataView, loc: number): DecodeResult | null { +// const res = RowBinaryTypesDecoderDataView.int32(src, loc) +// if (res === null) return null +// return [new Date(res[0] * DayMillis), res[1]] +// } +// static nullable( +// baseTypeDecoder: SimpleTypeDecoderDataView +// ): (src: DataView, loc: number) => DecodeResult | null { +// return (src: DataView, loc: number) => { +// const res = RowBinaryTypesDecoderDataView.uint8(src, loc) +// if (res === null) return null +// if (res[0] === 1) { +// return [null, res[1]] +// } +// return baseTypeDecoder(src, res[1]) +// } +// } +// static decimal( +// precision: number, +// scale: number +// ): (src: DataView, loc: number) => DecodeResult | null { +// const intSize = getDecimalIntSize(precision) +// let scaleMultiplier: number | bigint +// if (intSize === 32) { +// scaleMultiplier = 10 ** scale +// } else { +// scaleMultiplier = BigInt(10 ** scale) +// } +// return (src: DataView, loc: number) => { +// if (intSize === 32) { +// const res = RowBinaryTypesDecoderDataView.int32(src, loc) +// if (res === null) return null +// const whole = Math.floor(res[0] / (scaleMultiplier as number)) +// const fractional = res[0] % (scaleMultiplier as number) +// return [`${whole.toString(10)}.${fractional.toString(10)}`, res[1]] +// } +// let res: DecodeResult | null +// if (intSize === 64) { +// res = RowBinaryTypesDecoderDataView.int64(src, loc) +// } else if (intSize === 128) { +// throw new Error('Unsupported int size: 128') +// // res = RowBinaryTypesDecoderDataView.int128(src, loc) +// } else if (intSize === 256) { +// // res = RowBinaryTypesDecoderDataView.int256(src, loc) +// throw new Error('Unsupported int size: 256') +// } else { +// throw new Error(`Unsupported int size: ${intSize}`) +// } +// if (res === null) return null +// const whole = res[0] / (scaleMultiplier as bigint) +// const fractional = res[0] % (scaleMultiplier as bigint) +// return [`${whole.toString(10)}.${fractional.toString(10)}`, res[1]] +// } +// } +// // static array( +// // innerDecoder: +// // | SimpleTypeDecoder +// // | ReturnType +// // | ReturnType>, +// // dimensions = 0 +// // ): (src: DataView, loc: number) => DecodeResult> | null { +// // return (src: DataView, loc: number) => { +// // const leb128 = readBytesAsUnsignedLEB128(src, loc) +// // if (leb128 === null) return null +// // const result = new Array(leb128[0]) +// // if (dimensions === 0) { +// // for (let i = 0; i < leb128[0]; i++) { +// // const res = innerDecoder(src, leb128[1]) +// // if (res === null) return null +// // result[i] = res[0] +// // } +// // } else { +// // return this.array(innerDecoder, dimensions - 1)(src, leb128[1]) +// // } +// // return null +// // } +// // } +// } +// +// export const RowBinarySimpleDecodersDataView: { +// [key in +// | 'Bool' +// | 'UInt8' +// | 'Int8' +// | 'UInt16' +// | 'Int16' +// | 'UInt32' +// | 'Int32' +// | 'UInt64' +// | 'Int64' +// // | 'UInt128' +// // | 'Int128' +// // | 'UInt256' +// // | 'Int256' +// | 'Float32' +// | 'Float64' +// // | 'String' +// | 'Date' +// | 'Date32']: SimpleTypeDecoderDataView +// } = { +// Bool: RowBinaryTypesDecoderDataView.bool, +// UInt8: RowBinaryTypesDecoderDataView.uint8, +// Int8: RowBinaryTypesDecoderDataView.int8, +// UInt16: RowBinaryTypesDecoderDataView.uint16, +// Int16: RowBinaryTypesDecoderDataView.int16, +// UInt32: RowBinaryTypesDecoderDataView.uint32, +// Int32: RowBinaryTypesDecoderDataView.int32, +// UInt64: RowBinaryTypesDecoderDataView.uint64, +// Int64: RowBinaryTypesDecoderDataView.int64, +// // UInt128: RowBinaryTypesDecoderDataView.uint128, +// // Int128: RowBinaryTypesDecoderDataView.int128, +// // UInt256: RowBinaryTypesDecoderDataView.uint256, +// // Int256: RowBinaryTypesDecoderDataView.int256, +// Float32: RowBinaryTypesDecoderDataView.float32, +// Float64: RowBinaryTypesDecoderDataView.float64, +// // String: RowBinaryTypesDecoderDataView.string, +// Date: RowBinaryTypesDecoderDataView.date, +// Date32: RowBinaryTypesDecoderDataView.date32, +// } diff --git a/packages/client-node/src/row_binary_result_set.ts b/packages/client-node/src/row_binary_result_set.ts index ad747d1b..e316a859 100644 --- a/packages/client-node/src/row_binary_result_set.ts +++ b/packages/client-node/src/row_binary_result_set.ts @@ -1,21 +1,9 @@ import type { BaseResultSet, DataFormat } from '@clickhouse/client-common' -import type { DecodedColumns } from '@clickhouse/client-common/src/data_formatter' -import { RowBinaryColumns } from '@clickhouse/client-common/src/data_formatter' +import type { DecodedColumns } from '@clickhouse/client-common/src/data_formatter/row_binary/columns_header' +import { RowBinaryColumnsHeader } from '@clickhouse/client-common/src/data_formatter/row_binary/columns_header' import { Buffer } from 'buffer' import Stream, { Transform, type TransformCallback } from 'stream' -// draft; currently unused. -export interface RowBinaryMappers { - date?: (daysSinceEpochUInt16: number) => T - date32?: (daysSinceEpochInt32: number) => T - datetime?: (secondsSinceEpochUInt32: number, timezone?: string) => T - datetime64?: (seconds: bigint, nanos: number, timezone?: string) => T - decimal?: (whole: number | bigint, fractional: number | bigint) => T -} -export interface RowBinaryResultSetOptions { - mappers?: RowBinaryMappers -} - export class RowBinaryResultSet implements BaseResultSet { constructor( private _stream: Stream.Readable, @@ -71,6 +59,11 @@ export class RowBinaryResultSet implements BaseResultSet { let columnIndex = 0 const rowsToPush: unknown[][] = [] + const measures: Record = {} + let iterations = 0 + let incompleteChunksTotal = 0 + const NS_PER_SEC = 1e9 + const toRows = new Transform({ transform( chunk: Buffer, @@ -80,6 +73,7 @@ export class RowBinaryResultSet implements BaseResultSet { //console.log(`transform call, chunk length: ${chunk.length}`) let src: Buffer if (incompleteChunk !== undefined) { + incompleteChunksTotal++ src = Buffer.concat([incompleteChunk, chunk.subarray()]) incompleteChunk = undefined } else { @@ -88,24 +82,37 @@ export class RowBinaryResultSet implements BaseResultSet { let loc = 0 if (columns === undefined) { - const res = RowBinaryColumns.decode(src) - if ('error' in res) { - return callback(new Error(res.error)) + try { + const res = RowBinaryColumnsHeader.decode(src) + columns = res[0] + loc = res[1] + } catch (err) { + return callback(err as Error) } - columns = res[0] - loc = res[1] + } + function logIterationExecutionTime(end: [number, number]) { + const col = columns!.types[columnIndex] + const name = columns!.names[columnIndex] + const execTime = end[0] * NS_PER_SEC + end[1] + iterations++ + const key = `${col.dbType} - ${name}` + measures[key] = (measures[key] || 0) + execTime } while (loc < src.length) { const row = new Array(columns.names.length) while (columnIndex < columns.names.length) { + const start = process.hrtime() const decodeResult = columns.decoders[columnIndex](src, loc) + const end = process.hrtime(start) + logIterationExecutionTime(end) //console.log(decodeResult, loc, src.length, columns?.names[columnIndex], columns?.types[columnIndex]) // not enough data to finish the row - null indicates that if (decodeResult === null) { // will be added to the beginning of the next received chunk incompleteChunk = src.subarray(loc) if (rowsToPush.length > 0) { + // console.log(`pushing ${rowsToPush.length} rows`) this.push(rowsToPush) rowsToPush.length = 0 } @@ -126,6 +133,7 @@ export class RowBinaryResultSet implements BaseResultSet { } if (rowsToPush.length > 0) { + // console.log(`pushing ${rowsToPush.length} rows`) this.push(rowsToPush) rowsToPush.length = 0 } @@ -137,6 +145,11 @@ export class RowBinaryResultSet implements BaseResultSet { this.push(rowsToPush) rowsToPush.length = 0 } + console.log(`Measures (${iterations})`, measures) + for (const key in measures) { + console.log(`Avg ns for ${key}:`, measures[key] / iterations) + } + console.log(`Incomplete chunks total:`, incompleteChunksTotal) return callback() }, autoDestroy: true, @@ -151,6 +164,113 @@ export class RowBinaryResultSet implements BaseResultSet { } }) } + // + // streamDataView() { + // // If the underlying stream has already ended, + // // Stream.pipeline will create a new empty stream, + // // but without "readableEnded" flag set to true + // if (this._stream.readableEnded) { + // throw Error('Stream has been already consumed') + // } + // if (this.format !== 'RowBinary') { + // throw Error(`Format ${this.format} is not RowBinary`) + // } + // + // let columns: { names: string[]; types: ParsedColumnType[]; decoders: SimpleTypeDecoderDataView[] } + // let incompleteChunk: Uint8Array | undefined + // let columnIndex = 0 + // const rowsToPush: unknown[][] = [] + // + // const toRows = new Transform({ + // transform( + // chunk: Buffer, + // _encoding: BufferEncoding, + // callback: TransformCallback + // ) { + // //console.log(`transform call, chunk length: ${chunk.length}`) + // let src: DataView + // if (incompleteChunk !== undefined) { + // const uint8Arr = new Uint8Array(incompleteChunk.length + chunk.length) + // uint8Arr.set(incompleteChunk) + // uint8Arr.set(chunk, incompleteChunk.length) + // src = new DataView(uint8Arr.buffer) + // incompleteChunk = undefined + // } else { + // src = new DataView(chunk.buffer) + // } + // + // let loc = 0 + // if (columns === undefined) { + // try { + // const res = RowBinaryColumnsHeaderDataView.decode(chunk) + // columns = res[0] + // loc = res[1] + // } catch (err) { + // return callback(err as Error) + // } + // } + // + // while (loc < src.byteLength) { + // const row = new Array(columns.names.length) + // while (columnIndex < columns.names.length) { + // const decodeResult = ( + // columns.decoders[columnIndex] as any as SimpleTypeDecoderDataView + // )(src, loc) + // //console.log(decodeResult, loc, src.length, columns?.names[columnIndex], columns?.types[columnIndex]) + // // not enough data to finish the row - null indicates that + // if (decodeResult === null) { + // // will be added to the beginning of the next received chunk + // incompleteChunk = new Uint8Array(src.buffer.slice(loc)) + // if (rowsToPush.length > 0) { + // // console.log(`pushing ${rowsToPush.length} rows`) + // this.push(rowsToPush) + // rowsToPush.length = 0 + // } + // return callback() + // } else { + // // decoded a value + // row[columnIndex] = decodeResult[0] + // loc = decodeResult[1] + // columnIndex++ + // } + // } + // rowsToPush.push(row) + // columnIndex = 0 + // } + // + // if (loc > src.byteLength) { + // incompleteChunk = new Uint8Array( + // src.buffer.slice(loc - src.byteLength) + // ) + // } + // + // if (rowsToPush.length > 0) { + // // console.log(`pushing ${rowsToPush.length} rows`) + // this.push(rowsToPush) + // rowsToPush.length = 0 + // } + // + // return callback() + // }, + // final(callback: TransformCallback) { + // if (rowsToPush.length > 0) { + // this.push(rowsToPush) + // rowsToPush.length = 0 + // } + // return callback() + // }, + // autoDestroy: true, + // objectMode: true, + // }) + // + // return Stream.pipeline(this._stream, toRows, function pipelineCb(err) { + // if (err) { + // // FIXME: use logger instead + // // eslint-disable-next-line no-console + // console.error(err) + // } + // }) + // } close() { this._stream.destroy() From cf5a63ad586875448184f334248817c131efd1f7 Mon Sep 17 00:00:00 2001 From: slvrtrn Date: Sat, 23 Mar 2024 10:31:15 +0100 Subject: [PATCH 06/14] WIP --- benchmarks/parsing/strings.ts | 41 -- .../unit/row_binary_columns_parser.test.ts | 141 ++++++ .../unit/row_binary_decoders.test.ts | 15 +- .../unit/row_binary_read_bytes.test.ts | 118 ----- .../row_binary/columns_header.ts | 178 +++----- .../row_binary/columns_parser.ts | 416 ++++++++++++------ .../src/data_formatter/row_binary/errors.ts | 2 +- .../src/data_formatter/row_binary/index.ts | 1 - .../data_formatter/row_binary/read_bytes.ts | 38 +- .../src/data_formatter/row_binary/types.ts | 259 +++++------ packages/client-node/src/index.ts | 4 + .../client-node/src/row_binary_result_set.ts | 251 ++++------- 12 files changed, 699 insertions(+), 765 deletions(-) delete mode 100644 benchmarks/parsing/strings.ts create mode 100644 packages/client-common/__tests__/unit/row_binary_columns_parser.test.ts delete mode 100644 packages/client-common/__tests__/unit/row_binary_read_bytes.test.ts diff --git a/benchmarks/parsing/strings.ts b/benchmarks/parsing/strings.ts deleted file mode 100644 index 8011dd06..00000000 --- a/benchmarks/parsing/strings.ts +++ /dev/null @@ -1,41 +0,0 @@ -/* -From https://stackoverflow.com/a/37037034/4575540 - -Checked with Node.js 20 - no difference. - -strConcat -128888890 - 238ms -strTemplate -128888890 - 235ms -strConcat -128888890 - 234ms -strTemplate -128888890 - 228ms - - */ -void (() => { - function strConcat(i: number) { - return 'abc' + i + 'def' - } - - function strTemplate(i: number) { - return `abc${i}def` - } - - function run(strategy: (i: number) => string) { - const before = new Date().getTime() - let len = 0 - for (let i = 0; i < 10000000; i += 1) { - len += strategy(i).length - } - console.log(len + ' - ' + (new Date().getTime() - before) + 'ms') - } - - for (let i = 0; i < 10; i++) { - console.log('strConcat') - run(strConcat) - - console.log('strTemplate') - run(strTemplate) - } -})() diff --git a/packages/client-common/__tests__/unit/row_binary_columns_parser.test.ts b/packages/client-common/__tests__/unit/row_binary_columns_parser.test.ts new file mode 100644 index 00000000..04dccab3 --- /dev/null +++ b/packages/client-common/__tests__/unit/row_binary_columns_parser.test.ts @@ -0,0 +1,141 @@ +import { parseEnum } from '../../src/data_formatter/row_binary/columns_parser' + +fdescribe('RowBinaryColumnsParser', () => { + describe('Enum', () => { + // pass-through; will be used as-is in the result and in the error messages. + const dbType = 'SomeEnumTypeFromDB' + it('should parse Enum8', async () => { + const args: [string, Map][] = [ + ["Enum8('a' = 1)", new Map([[1, 'a']])], + [ + "Enum8('a' = 0, 'b' = 2)", + new Map([ + [0, 'a'], + [2, 'b'], + ]), + ], + [ + "Enum8('a' = 1, 'b' = 2, 'c' = 42)", + new Map([ + [1, 'a'], + [2, 'b'], + [42, 'c'], + ]), + ], + [ + "Enum8('f'' = 1, 'x =' = 2, 'b'''' = 3, ''c==' = 42)", + new Map([ + [1, "f'"], + [2, 'x ='], + [3, "b'''"], + [42, "'c=="], + ]), + ], + ] + args.forEach(([columnType, values]) => { + expect(parseEnum({ columnType, dbType })) + .withContext( + `Expected ${columnType} to be parsed as Enum8 [${[ + ...values.entries(), + ]}]` + ) + .toEqual({ + type: 'Enum', + intSize: 8, + dbType, + values, + }) + }) + }) + it('should parse Enum16', async () => { + const args: [string, Map][] = [ + ["Enum16('a' = 1)", new Map([[1, 'a']])], + [ + "Enum16('a' = 0, 'b' = 2)", + new Map([ + [0, 'a'], + [2, 'b'], + ]), + ], + [ + "Enum16('a' = 1, 'b' = 2, 'c' = 42)", + new Map([ + [1, 'a'], + [2, 'b'], + [42, 'c'], + ]), + ], + [ + "Enum16('f'' = 1, 'x =' = 2, 'b'''' = 3, ''c==' = 25000)", + new Map([ + [1, "f'"], + [2, 'x ='], + [3, "b'''"], + [25000, "'c=="], + ]), + ], + ] + args.forEach(([columnType, values]) => { + expect(parseEnum({ columnType, dbType })) + .withContext( + `Expected ${columnType} to be parsed as Enum16 [${[ + ...values.entries(), + ]}]` + ) + .toEqual({ + type: 'Enum', + intSize: 16, + dbType, + values, + }) + }) + }) + it('should throw when the type is not a valid enum', async () => { + const args: [string][] = [ + ['Enum'], // should be either 8 or 16 + ['Enum32'], + ['Enum64'], + ['String'], + ['Enum(String)'], + ] + args.forEach(([columnType]) => { + expect(() => parseEnum({ columnType, dbType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Expected Enum to be either Enum8 or Enum16') + }) + }) + it('should throw when the values are not valid', async () => { + const negativeArgs: [string][] = [ + ["Enum8('a' = x)"], + ["Enum8('foo')"], + ] + negativeArgs.forEach(([columnType]) => { + expect(() => parseEnum({ columnType, dbType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Invalid Enum type values') + }) + }) + it('should throw on duplicate indices', async () => { + const args: [string][] = [ + ["Enum8('a' = 0, 'b' = 0)"], + ["Enum8('a' = 0, 'b' = 1, 'c' = 1)"], + ] + args.forEach(([columnType]) => { + expect(() => parseEnum({ columnType, dbType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Duplicate Enum index') + }) + }) + it('should throw on duplicate names', async () => { + const args: [string][] = [ + ["Enum8('a' = 0, 'a' = 1)"], + ["Enum8('a' = 0, 'b' = 1, 'b' = 2)"], + ] + args.forEach(([columnType]) => { + expect(() => parseEnum({ columnType, dbType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Duplicate Enum name') + }) + }) + }) +}) diff --git a/packages/client-common/__tests__/unit/row_binary_decoders.test.ts b/packages/client-common/__tests__/unit/row_binary_decoders.test.ts index 7bdf42b2..118ac806 100644 --- a/packages/client-common/__tests__/unit/row_binary_decoders.test.ts +++ b/packages/client-common/__tests__/unit/row_binary_decoders.test.ts @@ -11,7 +11,7 @@ fdescribe('RowBinary decoders', () => { [new Uint8Array([0xff, 0xff]), new Date('2149-06-06T00:00:00.000Z')], ] args.forEach(([src, expected]) => { - const res = RowBinaryTypesDecoder.date(src, 0)! + const res = RowBinaryTypesDecoder.date(Buffer.from(src), 0)! expect(+res[0]) .withContext( `Decoded ${src.toString()}. Result ${res[0]} != expected ${expected}` @@ -19,17 +19,4 @@ fdescribe('RowBinary decoders', () => { .toEqual(+expected) }) }) - - // it('should remove low cardinality', async () => { - // const args: [string, string][] = [ - // ['LowCardinality(String)', 'String'], - // ['LowCardinality(Nullable(String))', 'Nullable(String)'], - // ['LowCardinality(Array(String))', 'Array(String)'], - // ['Nullable(String)', 'Nullable(String)'], - // ['String', 'String'], - // ] - // args.forEach(([src, expected]) => { - // expect(re(src)).toEqual(expected) - // }) - // }) }) diff --git a/packages/client-common/__tests__/unit/row_binary_read_bytes.test.ts b/packages/client-common/__tests__/unit/row_binary_read_bytes.test.ts deleted file mode 100644 index 41e47232..00000000 --- a/packages/client-common/__tests__/unit/row_binary_read_bytes.test.ts +++ /dev/null @@ -1,118 +0,0 @@ -import { - readBytesAsFloat32, - readBytesAsUnsignedBigInt, - readBytesAsUnsignedInt, -} from '../../src/data_formatter' - -fdescribe('RowBinary read bytes', () => { - describe('Unsigned integers', () => { - it('should decode UInt16', async () => { - const args: [Uint8Array, number][] = [ - [new Uint8Array([0x00, 0x00]), 0], - [new Uint8Array([0x01, 0x00]), 1], - [new Uint8Array([0x02, 0x00]), 2], - [new Uint8Array([0x10, 0x00]), 16], - [new Uint8Array([0xff, 0x00]), 255], - [new Uint8Array([0xff, 0xff]), 65535], - [new Uint8Array([0x00, 0x80]), 32768], - ] - args.forEach(([src, expected]) => { - expect(readBytesAsUnsignedInt(src, 0, 2)) - .withContext(ctx(src, expected)) - .toBe(expected) - }) - }) - it('should decode UInt32', async () => { - const args: [Uint8Array, number][] = [ - [new Uint8Array([0x00, 0x00, 0x00, 0x00]), 0], - [new Uint8Array([0x01, 0x00, 0x00, 0x00]), 1], - [new Uint8Array([0x02, 0x00, 0x00, 0x00]), 2], - [new Uint8Array([0x10, 0x00, 0x00, 0x00]), 16], - [new Uint8Array([0xff, 0x00, 0x00, 0x00]), 255], - [new Uint8Array([0xff, 0xff, 0x00, 0x00]), 65535], - [new Uint8Array([0xff, 0xff, 0xff, 0x00]), 16777215], - [new Uint8Array([0xff, 0xff, 0xff, 0x7f]), 2147483647], - [new Uint8Array([0xff, 0xff, 0xff, 0xff]), 4294967295], - [new Uint8Array([0x00, 0x00, 0x00, 0x80]), 2147483648], - ] - args.forEach(([src, expected]) => { - expect(readBytesAsUnsignedInt(src, 0, 4)) - .withContext(ctx(src, expected)) - .toBe(expected) - }) - }) - }) - - describe('Unsigned big integers', () => { - it('should decode UInt64', async () => { - const args: [Uint8Array, bigint][] = [ - [new Uint8Array([0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]), 0n], - [new Uint8Array([0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]), 1n], - [new Uint8Array([0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]), 2n], - [ - new Uint8Array([0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]), - 255n, - ], - [ - new Uint8Array([0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]), - 65535n, - ], - [ - new Uint8Array([0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00]), - 16777215n, - ], - [ - new Uint8Array([0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00]), - 4294967295n, - ], - [ - new Uint8Array([0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00]), - 1099511627775n, - ], - [ - new Uint8Array([0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00]), - 281474976710655n, - ], - [ - new Uint8Array([0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00]), - 72057594037927935n, - ], - [ - new Uint8Array([0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff]), - 18446744073709551615n, - ], - ] - - args.forEach(([src, expected]) => { - expect(readBytesAsUnsignedBigInt(src, 0, 8)) - .withContext(ctx(src, expected)) - .toBe(expected) - }) - }) - }) - - fdescribe('Floats', () => { - it('should decode Float32', async () => { - const args: [Uint8Array, number][] = [ - [new Uint8Array([0x00, 0x00, 0x00, 0x00]), 0], - // some reference values from a random dataset (not 100% matching the CH output, because floats) - [new Uint8Array([151, 136, 46, 6]), 3.2826113095459874e-35], - [new Uint8Array([176, 183, 118, 153]), -1.2754997313209913e-23], - [new Uint8Array([114, 233, 40, 161]), -5.72295763540352e-19], - [new Uint8Array([112, 205, 62, 233]), -1.4416628555694005e25], - [new Uint8Array([43, 253, 113, 82]), 259833643008], - [new Uint8Array([165, 173, 250, 112]), 6.206494065007942e29], - [new Uint8Array([175, 228, 124, 108]), 1.2229169371247749e27], - ] - args.forEach(([src, expected]) => { - expect(readBytesAsFloat32(src, 0)) - .withContext(ctx(src, expected)) - .toBe(expected) - }) - }) - }) - - function ctx(src: Uint8Array, expected: number | bigint) { - return `Expected ${src.toString()} to be decoded as ${expected}` - } -}) diff --git a/packages/client-common/src/data_formatter/row_binary/columns_header.ts b/packages/client-common/src/data_formatter/row_binary/columns_header.ts index f416ad8c..d36507a4 100644 --- a/packages/client-common/src/data_formatter/row_binary/columns_header.ts +++ b/packages/client-common/src/data_formatter/row_binary/columns_header.ts @@ -1,5 +1,10 @@ -import type { DecimalParams, ParsedColumnType } from './columns_parser' -import { RowBinaryColumnTypesParser } from './columns_parser' +import { + DecimalParams, + parseColumnType, + ParsedColumnArray, + ParsedColumnNullable, + ParsedColumnType, +} from './columns_parser' import { ClickHouseRowBinaryError } from './errors' import type { DecodeResult } from './read_bytes' import { readBytesAsUnsignedLEB128 } from './read_bytes' @@ -18,7 +23,7 @@ export type DecodedColumns = DecodeResult<{ /** @throws ClickHouseRowBinaryError */ export class RowBinaryColumnsHeader { - static decode(src: Uint8Array): DecodedColumns { + static decode(src: Buffer): DecodedColumns { const res = readBytesAsUnsignedLEB128(src, 0) if (res === null) { throw ClickHouseRowBinaryError.headerDecodingError( @@ -27,6 +32,12 @@ export class RowBinaryColumnsHeader { ) } const numColumns = res[0] + if (numColumns === 0) { + throw ClickHouseRowBinaryError.headerDecodingError( + 'Unexpected zero number of columns', + {} + ) + } let nextLoc = res[1] const names = new Array(numColumns) const types = new Array(numColumns) @@ -51,7 +62,7 @@ export class RowBinaryColumnsHeader { ) } nextLoc = res[1] - const col = RowBinaryColumnTypesParser.parseColumnType(res[0]) + const col = parseColumnType(res[0]) types[i] = col let valueDecoder: TypeDecoder switch (col.type) { @@ -62,25 +73,10 @@ export class RowBinaryColumnsHeader { decoders[i] = getDecimalDecoder(col.params) break case 'Array': - if (col.valueType === 'Decimal') { - valueDecoder = getDecimalDecoder(col.decimalParams) - } else { - valueDecoder = RowBinarySimpleDecoders[col.valueType] - } - decoders[i] = RowBinaryTypesDecoder.array( - col.valueNullable - ? RowBinaryTypesDecoder.nullable(valueDecoder) - : valueDecoder, - col.dimensions - ) + decoders[i] = getArrayDecoder(col) break case 'Nullable': - if (col.valueType === 'Decimal') { - valueDecoder = getDecimalDecoder(col.decimalParams) - } else { - valueDecoder = RowBinarySimpleDecoders[col.valueType] - } - decoders[i] = RowBinaryTypesDecoder.nullable(valueDecoder) + decoders[i] = getNullableDecoder(col) break default: throw ClickHouseRowBinaryError.headerDecodingError( @@ -105,103 +101,43 @@ function getDecimalDecoder(decimalParams: DecimalParams): SimpleTypeDecoder { // for tests only (128 and 256 support is there) throw new Error(`Unsupported Decimal size: ${intSize}`) } -// -// export class RowBinaryColumnsHeaderDataView { -// static decode(src: Uint8Array): DecodeResult<{ -// names: string[] -// types: ParsedColumnType[] -// decoders: SimpleTypeDecoderDataView[] -// }> -// { -// const res = readBytesAsUnsignedLEB128(src, 0) -// if (res === null) { -// throw ClickHouseRowBinaryError.headerDecodingError( -// 'Not enough data to decode number of columns', -// {} -// ) -// } -// const numColumns = res[0] -// let nextLoc = res[1] -// const names = new Array(numColumns) -// const types = new Array(numColumns) -// const decoders = new Array(numColumns) -// for (let i = 0; i < numColumns; i++) { -// const res = RowBinaryTypesDecoder.string(src, nextLoc) -// if (res === null) { -// throw ClickHouseRowBinaryError.headerDecodingError( -// `Not enough data to decode column name`, -// { i, names, numColumns, nextLoc } -// ) -// } -// nextLoc = res[1] -// names[i] = res[0] -// } -// for (let i = 0; i < numColumns; i++) { -// const res = RowBinaryTypesDecoder.string(src, nextLoc) -// if (res === null) { -// throw ClickHouseRowBinaryError.headerDecodingError( -// `Not enough data to decode column type`, -// { i, names, types, numColumns, nextLoc } -// ) -// } -// nextLoc = res[1] -// const col = RowBinaryColumnTypesParser.parseColumnType(res[0]) -// types[i] = col -// let valueDecoder: SimpleTypeDecoderDataView -// switch (col.type) { -// case 'Simple': -// decoders[i] = -// RowBinarySimpleDecodersDataView[ -// col.columnType as keyof RowBinaryTypesDecoderDataView -// ] -// break -// case 'Decimal': -// decoders[i] = RowBinaryTypesDecoderDataView.decimal( -// col.params.precision, -// col.params.scale -// ) -// break -// case 'Array': -// // if (col.valueType === 'Decimal') { -// // valueDecoder = RowBinaryTypesDecoder.decimal( -// // col.decimalParams.precision, -// // col.decimalParams.scale -// // ) -// // } else { -// // valueDecoder = -// // RowBinarySimpleDecodersDataView[ -// // col.valueType as keyof RowBinaryTypesDecoderDataView -// // ] -// // } -// // decoders[i] = RowBinaryTypesDecoderDataView.array( -// // col.valueNullable -// // ? RowBinaryTypesDecoder.nullable(valueDecoder) -// // : valueDecoder, -// // col.dimensions -// // ) -// throw new Error('Array type is not supported yet') -// case 'Nullable': -// if (col.valueType === 'Decimal') { -// valueDecoder = RowBinaryTypesDecoderDataView.decimal( -// col.decimalParams.precision, -// col.decimalParams.scale -// ) -// } else { -// valueDecoder = -// RowBinarySimpleDecodersDataView[ -// col.valueType as keyof RowBinaryTypesDecoderDataView -// ] -// } -// decoders[i] = RowBinaryTypesDecoderDataView.nullable(valueDecoder) -// break -// default: -// throw ClickHouseRowBinaryError.headerDecodingError( -// 'Unsupported column type', -// { col } -// ) -// } -// } -// // console.log(`Decoded columns:`, names, types) -// return [{ names, types, decoders }, nextLoc] -// } -// } + +function getEnumDecoder( + intSize: 8 | 16, + values: Map +): SimpleTypeDecoder { + if (intSize === 8) { + return RowBinaryTypesDecoder.enum8(values) + } + if (intSize === 16) { + return RowBinaryTypesDecoder.enum16(values) + } + throw new Error(`Unsupported Enum size: ${intSize}`) +} + +function getArrayDecoder(col: ParsedColumnArray): SimpleTypeDecoder { + let valueDecoder + if (col.valueType === 'Decimal') { + valueDecoder = getDecimalDecoder(col.decimalParams) + } else { + valueDecoder = RowBinarySimpleDecoders[col.valueType] + } + return RowBinaryTypesDecoder.array( + col.valueNullable + ? RowBinaryTypesDecoder.nullable(valueDecoder) + : valueDecoder, + col.dimensions + ) +} + +function getNullableDecoder(col: ParsedColumnNullable) { + let valueDecoder + if (col.valueType === 'Decimal') { + valueDecoder = getDecimalDecoder(col.decimalParams) + } else if (col.valueType === 'Enum') { + valueDecoder = getEnumDecoder(col.intSize, col.values) + } else { + valueDecoder = RowBinarySimpleDecoders[col.valueType] + } + return RowBinaryTypesDecoder.nullable(valueDecoder) +} diff --git a/packages/client-common/src/data_formatter/row_binary/columns_parser.ts b/packages/client-common/src/data_formatter/row_binary/columns_parser.ts index ed844e75..e5c6bdd8 100644 --- a/packages/client-common/src/data_formatter/row_binary/columns_parser.ts +++ b/packages/client-common/src/data_formatter/row_binary/columns_parser.ts @@ -21,18 +21,27 @@ export type ParsedColumnNullable = | { type: 'Nullable' valueType: 'Decimal' - decimalParams: DecimalParams + decimalParams: ParsedColumnDecimal['params'] + dbType: string + } + | { + type: 'Nullable' + valueType: 'Enum' + values: ParsedColumnEnum['values'] + intSize: ParsedColumnEnum['intSize'] dbType: string } -/** Array cannot be Nullable or LowCardinality, but its inner type can be. - * Arrays can be multidimensional, e.g. Array(Array(Array(T))). - * Arrays are allowed to have a Map as the inner type. - */ -export interface DecodedColumnMap { - type: 'Map' - key: ParsedColumnSimple - value: ParsedColumnSimple | ParsedColumnArray | DecodedColumnMap +export interface ParsedColumnEnum { + type: 'Enum' + values: Map + intSize: 8 | 16 + dbType: string +} + +export interface ParseColumnTuple { + type: 'Tuple' + elements: ParsedColumnType[] dbType: string } @@ -74,164 +83,298 @@ export type ParsedColumnArray = decimalParams: DecimalParams dbType: string } + +// export interface ParsedColumnMap { +// type: 'Map' +// key: ParsedColumnSimple +// value: ParsedColumnSimple | ParsedColumnArray | ParsedColumnMap +// dbType: string +// } // TODO - add Map support. + export type ParsedColumnType = | ParsedColumnSimple | ParsedColumnNullable | ParsedColumnDecimal | ParsedColumnArray -// | DecodedColumnMap // TODO - add Map support. - -export class RowBinaryColumnTypesParser { - static parseColumnType(dbType: string): ParsedColumnType { - let columnType = dbType - let isNullable = false - if (columnType.startsWith(LowCardinalityPrefix)) { - columnType = columnType.slice(LowCardinalityPrefix.length, -1) - } - if (columnType.startsWith(NullablePrefix)) { - columnType = columnType.slice(NullablePrefix.length, -1) - isNullable = true + | ParsedColumnEnum +// | ParsedColumnMap // TODO - add Map support. + +export function parseColumnType(dbType: string): ParsedColumnType { + let columnType = dbType + let isNullable = false + if (columnType.startsWith(LowCardinalityPrefix)) { + columnType = columnType.slice(LowCardinalityPrefix.length, -1) + } + if (columnType.startsWith(NullablePrefix)) { + columnType = columnType.slice(NullablePrefix.length, -1) + isNullable = true + } + let result: ParsedColumnType + if (columnType.startsWith(DecimalPrefix)) { + const params = parseDecimalParams({ + dbType, + columnType, + }) + result = { + type: 'Decimal', + params, + dbType, } - let result: ParsedColumnType - if (columnType.startsWith(DecimalPrefix)) { + } else if ( + columnType.startsWith(Enum8Prefix) || + columnType.startsWith(Enum16Prefix) + ) { + result = parseEnum({ dbType, columnType }) + } else if (columnType.startsWith(ArrayPrefix)) { + result = parseArrayType({ dbType, columnType }) + } else if (columnType.startsWith(MapPrefix)) { + throw ClickHouseRowBinaryError.headerDecodingError( + 'Map types are not supported yet', + { columnType } + ) + } else { + // "Simple" types + if (columnType in RowBinarySimpleDecoders) { result = { - type: 'Decimal', - params: RowBinaryColumnTypesParser.parseDecimalParams({ - dbType, - columnType, - }), + type: 'Simple', + columnType: columnType as SimpleColumnType, dbType, } - } else if (columnType.startsWith(ArrayPrefix)) { - result = RowBinaryColumnTypesParser.parseArrayType({ dbType, columnType }) - } else if (columnType.startsWith(MapPrefix)) { + } else { throw ClickHouseRowBinaryError.headerDecodingError( - 'Map types are not supported yet', + 'Unsupported column type', { columnType } ) - } else { - if (columnType in RowBinarySimpleDecoders) { - result = { - type: 'Simple', - columnType: columnType as SimpleColumnType, - dbType, - } - } else { - throw ClickHouseRowBinaryError.headerDecodingError( - 'Unsupported column type', - { columnType } - ) - } } - if (isNullable) { - // console.log('Got a nullable:', result) - if (result.type === 'Array') { - throw ClickHouseRowBinaryError.headerDecodingError( - 'Array cannot be Nullable', - { dbType } - ) - } - if (result.type === 'Decimal') { - return { - type: 'Nullable', - valueType: 'Decimal', - decimalParams: result.params, - dbType, - } + } + if (isNullable) { + return asNullableType(result, dbType) + } else { + return result + } +} + +export function parseDecimalParams({ + columnType, + dbType, +}: ParseColumnTypeParams): DecimalParams { + if (!columnType.startsWith(DecimalPrefix)) { + throw ClickHouseRowBinaryError.headerDecodingError('Invalid Decimal type', { + dbType, + columnType, + }) + } + + const split = columnType.slice(DecimalPrefix.length, -1).split(',') + if (split.length !== 2) { + throw ClickHouseRowBinaryError.headerDecodingError('Invalid Decimal type', { + dbType, + columnType, + split, + }) + } + const params: DecimalParams = { + precision: parseInt(split[0], 10), + scale: parseInt(split[1], 10), + intSize: 32, + } + if (params.precision > 38) { + params.intSize = 256 + } else if (params.precision > 18) { + params.intSize = 128 + } else if (params.precision > 9) { + params.intSize = 64 + } + return params +} + +export function parseEnum({ + columnType, + dbType, +}: ParseColumnTypeParams): ParsedColumnEnum { + let intSize: 8 | 16 + if (columnType.startsWith(Enum8Prefix)) { + columnType = columnType.slice(Enum8Prefix.length, -1) + intSize = 8 + } else if (columnType.startsWith(Enum16Prefix)) { + columnType = columnType.slice(Enum16Prefix.length, -1) + intSize = 16 + } else { + throw ClickHouseRowBinaryError.headerDecodingError( + 'Expected Enum to be either Enum8 or Enum16', + { + columnType, + dbType, } - return { - type: 'Nullable', - valueType: result.columnType, + ) + } + const matches = [...columnType.matchAll(/(?:'(.*?)' = (\d+),?)+/g)] + if (matches.length === 0) { + throw ClickHouseRowBinaryError.headerDecodingError( + 'Invalid Enum type values', + { + columnType, dbType, } - } else { - return result - } + ) } - static parseDecimalParams({ - columnType, - dbType, - }: ParseColumnTypeParams): DecimalParams { - const split = columnType.slice(DecimalPrefix.length, -1).split(',') - if (split.length !== 2) { + // FIXME: regex is not enough to validate possibly incorrect Enum values. + // needs to be processed char by char instead. + const names: string[] = [] + const values = new Map() + for (const match of matches) { + const index = parseInt(match[2], 10) + if (index < 0 || Number.isNaN(index)) { throw ClickHouseRowBinaryError.headerDecodingError( - 'Invalid Decimal type', - { dbType, columnType, split } + 'Enum index must be >= 0', + { columnType, dbType, index, matches: [...matches] } ) } - const params: DecimalParams = { - precision: parseInt(split[0], 10), - scale: parseInt(split[1], 10), - intSize: 32, - } - if (params.precision > 38) { - params.intSize = 256 - } else if (params.precision > 18) { - params.intSize = 128 - } else if (params.precision > 9) { - params.intSize = 64 - } - return params - } - - static parseArrayType({ - columnType, - dbType, - }: ParseColumnTypeParams): ParsedColumnArray { - let dimensions = 0 - while (columnType.length > 0) { - if (columnType.startsWith(ArrayPrefix)) { - columnType.slice(ArrayPrefix.length, -1) // Array(T) -> T - dimensions++ - } else { - break - } - } - if (dimensions === 0) { + if (values.has(index)) { throw ClickHouseRowBinaryError.headerDecodingError( - 'Array type without dimensions', - { columnType } + 'Duplicate Enum index', + { columnType, dbType, index, matches: [...matches] } ) } - if (dimensions > 10) { + if (names.includes(match[1])) { throw ClickHouseRowBinaryError.headerDecodingError( - 'Array type with too many dimensions', - { columnType } + 'Duplicate Enum name', + { columnType, dbType, name: match[1], matches: [...matches] } ) } - const valueNullable = columnType.startsWith(NullablePrefix) - if (valueNullable) { - columnType = columnType.slice(NullablePrefix.length, -1) + values.set(index, match[1]) + names.push(match[1]) + } + + return { + type: 'Enum', + intSize, + values, + dbType, + } +} + +export function parseTupleType({ + columnType, + dbType, +}: ParseColumnTypeParams): ParseColumnTuple { + if (!columnType.startsWith(TuplePrefix)) { + throw ClickHouseRowBinaryError.headerDecodingError('Invalid Tuple type', { + columnType, + dbType, + }) + } + columnType = columnType.slice(TuplePrefix.length, -1) + // TODO. + return { + type: 'Tuple', + elements: [], + dbType, + } +} + +export function parseArrayType({ + columnType, + dbType, +}: ParseColumnTypeParams): ParsedColumnArray { + if (!columnType.startsWith(ArrayPrefix)) { + throw ClickHouseRowBinaryError.headerDecodingError('Invalid Array type', { + columnType, + dbType, + }) + } + + let dimensions = 0 + while (columnType.length > 0) { + if (columnType.startsWith(ArrayPrefix)) { + columnType.slice(ArrayPrefix.length, -1) // Array(T) -> T + dimensions++ + } else { + break } - if (columnType.startsWith(DecimalPrefix)) { - const decimalParams = RowBinaryColumnTypesParser.parseDecimalParams({ - dbType, - columnType, - }) - return { - type: 'Array', - valueType: 'Decimal', - valueNullable, - decimalParams, - dimensions, - dbType, - } + } + if (dimensions === 0) { + throw ClickHouseRowBinaryError.headerDecodingError( + 'Array type without dimensions', + { columnType } + ) + } + if (dimensions > 10) { + throw ClickHouseRowBinaryError.headerDecodingError( + 'Array type with too many dimensions', + { columnType } + ) + } + const valueNullable = columnType.startsWith(NullablePrefix) + if (valueNullable) { + columnType = columnType.slice(NullablePrefix.length, -1) + } + if (columnType.startsWith(DecimalPrefix)) { + const decimalParams = parseDecimalParams({ + dbType, + columnType, + }) + return { + type: 'Array', + valueType: 'Decimal', + valueNullable, + decimalParams, + dimensions, + dbType, } - if (columnType in RowBinarySimpleDecoders) { - return { - type: 'Array', - valueType: columnType as SimpleColumnType, - valueNullable, - dimensions, - dbType, - } + } + if (columnType in RowBinarySimpleDecoders) { + return { + type: 'Array', + valueType: columnType as SimpleColumnType, + valueNullable, + dimensions, + dbType, } + } + throw ClickHouseRowBinaryError.headerDecodingError( + 'Unsupported array value type', + { dbType, columnType } + ) +} + +export function asNullableType( + result: + | ParsedColumnSimple + | ParsedColumnEnum + | ParsedColumnDecimal + | ParsedColumnArray, + dbType: string +): ParsedColumnNullable { + if (result.type === 'Array') { throw ClickHouseRowBinaryError.headerDecodingError( - 'Unsupported array value type', - { dbType, columnType } + 'Array cannot be Nullable', + { dbType } ) } + if (result.type === 'Decimal') { + return { + type: 'Nullable', + valueType: 'Decimal', + decimalParams: result.params, + dbType, + } + } + if (result.type === 'Enum') { + return { + type: 'Nullable', + valueType: 'Enum', + values: result.values, + intSize: result.intSize, + dbType, + } + } + return { + type: 'Nullable', + valueType: result.columnType, + dbType, + } } interface ParseColumnTypeParams { @@ -244,5 +387,6 @@ const LowCardinalityPrefix = 'LowCardinality(' as const const DecimalPrefix = 'Decimal(' as const const ArrayPrefix = 'Array(' as const const MapPrefix = 'Map(' as const -// const TuplePrefix = 'Tuple(' as const -// const EnumPrefix = 'Enum(' as const +const Enum8Prefix = 'Enum8(' as const +const Enum16Prefix = 'Enum16(' as const +const TuplePrefix = 'Tuple(' as const diff --git a/packages/client-common/src/data_formatter/row_binary/errors.ts b/packages/client-common/src/data_formatter/row_binary/errors.ts index 99967a72..a5092b8b 100644 --- a/packages/client-common/src/data_formatter/row_binary/errors.ts +++ b/packages/client-common/src/data_formatter/row_binary/errors.ts @@ -1,4 +1,4 @@ -const HeaderDecodingError = 'HEADER_DECODING_ERROR' +const HeaderDecodingError = 'HEADER_DECODING_ERROR' as const export class ClickHouseRowBinaryError extends Error { readonly args: Record diff --git a/packages/client-common/src/data_formatter/row_binary/index.ts b/packages/client-common/src/data_formatter/row_binary/index.ts index d8646301..07594db4 100644 --- a/packages/client-common/src/data_formatter/row_binary/index.ts +++ b/packages/client-common/src/data_formatter/row_binary/index.ts @@ -1,5 +1,4 @@ export * from './columns_header' -export * from './columns_parser' export * from './read_bytes' export * from './types' export * from './errors' diff --git a/packages/client-common/src/data_formatter/row_binary/read_bytes.ts b/packages/client-common/src/data_formatter/row_binary/read_bytes.ts index 59e174d7..a759b13b 100644 --- a/packages/client-common/src/data_formatter/row_binary/read_bytes.ts +++ b/packages/client-common/src/data_formatter/row_binary/read_bytes.ts @@ -3,7 +3,7 @@ export type DecodeResult = [T, number] // May return null since we cannot determine how many bytes we need to read in advance export function readBytesAsUnsignedLEB128( - src: Uint8Array, + src: Buffer, loc: number ): DecodeResult | null { let result = 0 @@ -23,39 +23,3 @@ export function readBytesAsUnsignedLEB128( shift += 7 } } - -// FIXME: use DecodeResult | null for all methods and do the validation here -// instead of relying on the caller -export function readBytesAsUnsignedInt( - src: Uint8Array, - loc: number, - bytes: 2 | 4 // (U)Int16 | (U)Int32 -): number { - let result = 0 - for (let i = bytes - 1; i >= 0; i--) { - result = (result << 8) + src[loc + i] - } - return result >>> 0 -} - -export function readBytesAsUnsignedBigInt( - src: Uint8Array, - loc: number, - bytes: 8 | 16 | 32 // (U)Int64 | (U)Int128 | (U)Int256 -): bigint { - let result = 0n - for (let i = bytes - 1; i >= 0; i--) { - result = (result << 8n) + BigInt(src[loc + i]) - } - return result -} - -export function readBytesAsFloat32(src: Uint8Array, loc: number) { - // FIXME: maybe can be optimized without DataView - return new DataView(src.buffer.slice(loc, loc + 4)).getFloat32(0, true) -} - -export function readBytesAsFloat64(src: Uint8Array, loc: number) { - // FIXME: maybe can be optimized without DataView - return new DataView(src.buffer.slice(loc, loc + 8)).getFloat64(0, true) -} diff --git a/packages/client-common/src/data_formatter/row_binary/types.ts b/packages/client-common/src/data_formatter/row_binary/types.ts index 4580d001..13429802 100644 --- a/packages/client-common/src/data_formatter/row_binary/types.ts +++ b/packages/client-common/src/data_formatter/row_binary/types.ts @@ -1,11 +1,5 @@ -import { - DecodeResult, - readBytesAsFloat32, - readBytesAsFloat64, - readBytesAsUnsignedBigInt, - readBytesAsUnsignedInt, - readBytesAsUnsignedLEB128, -} from './read_bytes' +import { Buffer } from 'buffer' +import { DecodeResult, readBytesAsUnsignedLEB128 } from './read_bytes' export type SimpleColumnType = /** {@link SimpleTypeDecoder} */ @@ -18,24 +12,18 @@ export type SimpleColumnType = | 'Int32' | 'UInt64' | 'Int64' - | 'UInt128' - | 'Int128' - | 'UInt256' - | 'Int256' + // | 'UInt128' + // | 'Int128' + // | 'UInt256' + // | 'Int256' | 'Float32' | 'Float64' | 'String' | 'Date' | 'Date32' -export type ColumnType = - | SimpleColumnType - /** {@link DecimalTypeDecoder} */ - | 'Decimal' - /** {@link ArrayTypeDecoder} */ - | 'Array' export type SimpleTypeDecoder = ( - src: Uint8Array, + src: Buffer, loc: number ) => DecodeResult | null export type DecimalTypeDecoder = ( @@ -63,149 +51,129 @@ export type MapTypeDecoder = ( | MapTypeDecoder ) => SimpleTypeDecoder> -// type DateMapper = (days: number) => T - -const Int8Overflow = 128 -const UInt8Overflow = 256 - -const Int16Overflow = 32768 -const UInt16Overflow = 65536 - -const Int32Overflow = 2147483648 -const UInt32Overflow = 4294967296 - -const Int64Overflow = 9223372036854775808n -const UInt64Overflow = 18446744073709551616n - -const Int128Overflow = 170141183460469231731687303715884105728n -const UInt128Overflow = 340282366920938463463374607431768211456n - -const Int256Overflow = - 57896044618658097711785492504343953926634992332820282019728792003956564819968n -const UInt256Overflow = - 115792089237316195423570985008687907853269984665640564039457584007913129639936n - -// const DecimalScaleMultipliersNumber: Record = {} -// for (let i = 0; i < 10; i++) { -// DecimalScaleMultipliersNumber[i] = 10 ** i -// } -// const DecimalScaleMultipliersBigInt: Record = {} -// for (let i = 0; i < 77; i++) { -// DecimalScaleMultipliersBigInt[i] = BigInt(10 ** i) -// } -// console.log(DecimalScaleMultipliers) - const DayMillis = 24 * 3600 * 1000 -const TxtDecoder = new TextDecoder() export class RowBinaryTypesDecoder { - static bool(src: Uint8Array, loc: number): DecodeResult | null { + static bool(src: Buffer, loc: number): DecodeResult | null { if (src.length < loc + 1) return null return [src[loc] === 1, loc + 1] } - static uint8(src: Uint8Array, loc: number): DecodeResult | null { + static uint8(src: Buffer, loc: number): DecodeResult | null { if (src.length < loc + 1) return null return [src[loc], loc + 1] } - static int8(src: Uint8Array, loc: number): DecodeResult | null { + static int8(src: Buffer, loc: number): DecodeResult | null { if (src.length < loc + 1) return null - const x = src[loc] - return [x < Int8Overflow ? x : x - UInt8Overflow, loc + 1] + return [src.readInt8(loc), loc + 1] } - static uint16(src: Uint8Array, loc: number): DecodeResult | null { + static uint16(src: Buffer, loc: number): DecodeResult | null { if (src.length < loc + 2) return null - return [readBytesAsUnsignedInt(src, loc, 2), loc + 2] + return [src.readUint16LE(loc), loc + 2] } - static int16(src: Uint8Array, loc: number): DecodeResult | null { + static int16(src: Buffer, loc: number): DecodeResult | null { if (src.length < loc + 2) return null - const x = readBytesAsUnsignedInt(src, loc, 2) - return [x < Int16Overflow ? x : x - UInt16Overflow, loc + 2] + return [src.readInt16LE(loc), loc + 2] } - static uint32(src: Uint8Array, loc: number): DecodeResult | null { + static uint32(src: Buffer, loc: number): DecodeResult | null { if (src.length < loc + 4) return null - return [readBytesAsUnsignedInt(src, loc, 4), loc + 4] + return [src.readUInt32LE(loc), loc + 4] } - static int32(src: Uint8Array, loc: number): DecodeResult | null { + static int32(src: Buffer, loc: number): DecodeResult | null { if (src.length < loc + 4) return null - const x = readBytesAsUnsignedInt(src, loc, 4) - return [x < Int32Overflow ? x : x - UInt32Overflow, loc + 4] + return [src.readInt32LE(loc), loc + 4] } - static uint64(src: Uint8Array, loc: number): DecodeResult | null { + static uint64(src: Buffer, loc: number): DecodeResult | null { if (src.length < loc + 8) return null - return [readBytesAsUnsignedBigInt(src, loc, 8), loc + 8] + return [src.readBigUInt64LE(loc), loc + 8] } - static int64(src: Uint8Array, loc: number): DecodeResult | null { + static int64(src: Buffer, loc: number): DecodeResult | null { if (src.length < loc + 8) return null - const x = readBytesAsUnsignedBigInt(src, loc, 8) - return [x < Int64Overflow ? x : x - UInt64Overflow, loc + 8] - } - static uint128(src: Uint8Array, loc: number): DecodeResult | null { - if (src.length < loc + 16) return null - return [readBytesAsUnsignedBigInt(src, loc, 16), loc + 16] - } - static int128(src: Uint8Array, loc: number): DecodeResult | null { - if (src.length < loc + 16) return null - const x = readBytesAsUnsignedBigInt(src, loc, 16) - return [x < Int128Overflow ? x : x - UInt128Overflow, loc + 16] + return [src.readBigInt64LE(loc), loc + 8] } - static uint256(src: Uint8Array, loc: number): DecodeResult | null { - if (src.length < loc + 32) return null - return [readBytesAsUnsignedBigInt(src, loc, 32), loc + 32] - } - static int256(src: Uint8Array, loc: number): DecodeResult | null { - if (src.length < loc + 32) return null - const x = readBytesAsUnsignedBigInt(src, loc, 32) - return [x < Int256Overflow ? x : x - UInt256Overflow, loc + 32] - } - static float32(src: Uint8Array, loc: number): DecodeResult | null { + // static uint128(src: Buffer, loc: number): DecodeResult | null { + // if (src.length < loc + 16) return null + // return [readBytesAsUnsignedBigInt(src, loc, 16), loc + 16] + // } + // static int128(src: Buffer, loc: number): DecodeResult | null { + // if (src.length < loc + 16) return null + // const x = readBytesAsUnsignedBigInt(src, loc, 16) + // return [x < Int128Overflow ? x : x - UInt128Overflow, loc + 16] + // } + // static uint256(src: Buffer, loc: number): DecodeResult | null { + // if (src.length < loc + 32) return null + // return [readBytesAsUnsignedBigInt(src, loc, 32), loc + 32] + // } + // static int256(src: Buffer, loc: number): DecodeResult | null { + // if (src.length < loc + 32) return null + // const x = readBytesAsUnsignedBigInt(src, loc, 32) + // return [x < Int256Overflow ? x : x - UInt256Overflow, loc + 32] + // } + static float32(src: Buffer, loc: number): DecodeResult | null { if (src.length < loc + 4) return null - const f32 = readBytesAsFloat32(src, loc) // console.log(f32) - return [f32, loc + 4] + return [src.readFloatLE(loc), loc + 4] } - static float64(src: Uint8Array, loc: number): DecodeResult | null { + static float64(src: Buffer, loc: number): DecodeResult | null { if (src.length < loc + 8) return null - return [readBytesAsFloat64(src, loc), loc + 8] + return [src.readDoubleLE(loc), loc + 8] } - static string(src: Uint8Array, loc: number): DecodeResult | null { + static string(src: Buffer, loc: number): DecodeResult | null { if (src.length < loc + 1) return null const res = readBytesAsUnsignedLEB128(src, loc) if (res === null) { return null } const [length, nextLoc] = res - if (src.length < nextLoc + length) return null - return [ - TxtDecoder.decode(src.subarray(nextLoc, nextLoc + length)), - nextLoc + length, - ] + const endLoc = nextLoc + length + if (src.length < endLoc) return null + return [src.toString('utf8', nextLoc, endLoc), endLoc] } - static date(src: Uint8Array, loc: number): DecodeResult | null { - const res = RowBinaryTypesDecoder.uint16(src, loc) - if (res === null) return null - return [new Date(res[0] * DayMillis), res[1]] + static date(src: Buffer, loc: number): DecodeResult | null { + if (src.length < loc + 2) return null + const daysSinceEpoch = src.readUInt16LE(loc) + return [new Date(daysSinceEpoch * DayMillis), loc + 2] } - static date32(src: Uint8Array, loc: number): DecodeResult | null { - const res = RowBinaryTypesDecoder.int32(src, loc) - if (res === null) return null - return [new Date(res[0] * DayMillis), res[1]] + static date32(src: Buffer, loc: number): DecodeResult | null { + if (src.length < loc + 4) return null + const daysBeforeOrSinceEpoch = src.readInt32LE(loc) + return [new Date(daysBeforeOrSinceEpoch * DayMillis), loc + 4] } static nullable( baseTypeDecoder: SimpleTypeDecoder - ): (src: Uint8Array, loc: number) => DecodeResult | null { - return (src: Uint8Array, loc: number) => { - const res = RowBinaryTypesDecoder.uint8(src, loc) - if (res === null) return null - if (res[0] === 1) { - return [null, res[1]] + ): (src: Buffer, loc: number) => DecodeResult | null { + return (src: Buffer, loc: number) => { + if (src.length < loc + 1) return null + const isNull = src[loc] + if (isNull === 1) { + return [null, loc + 1] } - return baseTypeDecoder(src, res[1]) + return baseTypeDecoder(src, loc + 1) + } + } + static enum8( + values: Map + ): (src: Buffer, loc: number) => DecodeResult | null { + return (src: Buffer, loc: number) => { + if (src.length < loc + 1) return null + const index = src.readUInt8(loc) + const value = values.get(index)! // TODO: handle missing values + return [value, loc + 1] + } + } + static enum16( + values: Map + ): (src: Buffer, loc: number) => DecodeResult | null { + return (src: Buffer, loc: number) => { + if (src.length < loc + 2) return null + const index = src.readUInt16LE(loc) + const value = values.get(index)! // TODO: handle missing values + return [value, loc + 2] } } // static decimal( // precision: number, // scale: number - // ): (src: Uint8Array, loc: number) => DecodeResult | null { + // ): (src: Buffer, loc: number) => DecodeResult | null { // const intSize = getDecimalIntSize(precision) // let scaleMultiplier: number | bigint // if (intSize === 32) { @@ -217,7 +185,7 @@ export class RowBinaryTypesDecoder { // // intSize === 32 // // ? DecimalScaleMultipliersNumber[scale] // // : DecimalScaleMultipliersBigInt[scale] - // return (src: Uint8Array, loc: number) => { + // return (src: Buffer, loc: number) => { // if (intSize === 32) { // const res = RowBinaryTypesDecoder.int32(src, loc) // if (res === null) return null @@ -244,29 +212,36 @@ export class RowBinaryTypesDecoder { // } // } static decimal32( - scale: number - ): (src: Uint8Array, loc: number) => DecodeResult | null { + scale: number, + mapper?: (whole: number, fractional: number) => T + ): (src: Buffer, loc: number) => DecodeResult | null { const scaleMultiplier = 10 ** scale - return (src: Uint8Array, loc: number) => { - const res = RowBinaryTypesDecoder.int32(src, loc) - if (res === null) return null - const whole = ~~(res[0] / (scaleMultiplier as number)) - const fractional = res[0] % (scaleMultiplier as number) - return [`${whole.toString(10)}.${fractional.toString(10)}`, res[1]] + return (src: Buffer, loc: number) => { + if (src.length < loc + 4) return null + const fullDecimal32 = src.readInt32LE(loc) + const whole = ~~(fullDecimal32 / (scaleMultiplier as number)) + const fractional = fullDecimal32 % (scaleMultiplier as number) + if (mapper !== undefined) { + return [mapper(whole, fractional), loc + 4] + } + return [`${whole.toString(10)}.${fractional.toString(10)}`, loc + 4] } } static decimal64( scale: number - ): (src: Uint8Array, loc: number) => DecodeResult | null { - return (src: Uint8Array, loc: number) => { - const res = RowBinaryTypesDecoder.int64(src, loc) - if (res === null) return null - // avoid any bigint math here, it's super slow - const str = res[0].toString() - const dotIndex = str.length - scale - const whole = str.slice(0, dotIndex) - const fractional = str.slice(dotIndex) - return [`${whole}.${fractional}`, res[1]] + ): (src: Buffer, loc: number) => DecodeResult | null { + const scaleMultiplier = BigInt(10) ** BigInt(scale) + return (src: Buffer, loc: number) => { + if (src.length < loc + 8) return null + const fullDecimal64 = src.readBigInt64LE(loc) + // Avoid BigInt math; it's slower than just dealing with a string + const str = fullDecimal64.toString(10) + if (scale >= str.length) { + return [`0.${str}`, loc + 8] + } else { + const dotIndex = str.length - scale + return [`${str.slice(0, dotIndex)}.${str.slice(dotIndex)}`, loc + 8] + } } } static array( @@ -275,8 +250,8 @@ export class RowBinaryTypesDecoder { | ReturnType | ReturnType>, dimensions = 0 - ): (src: Uint8Array, loc: number) => DecodeResult> | null { - return (src: Uint8Array, loc: number) => { + ): (src: Buffer, loc: number) => DecodeResult> | null { + return (src: Buffer, loc: number) => { const leb128 = readBytesAsUnsignedLEB128(src, loc) if (leb128 === null) return null const result = new Array(leb128[0]) @@ -313,10 +288,10 @@ export const RowBinarySimpleDecoders: { Int32: RowBinaryTypesDecoder.int32, UInt64: RowBinaryTypesDecoder.uint64, Int64: RowBinaryTypesDecoder.int64, - UInt128: RowBinaryTypesDecoder.uint128, - Int128: RowBinaryTypesDecoder.int128, - UInt256: RowBinaryTypesDecoder.uint256, - Int256: RowBinaryTypesDecoder.int256, + // UInt128: RowBinaryTypesDecoder.uint128, + // Int128: RowBinaryTypesDecoder.int128, + // UInt256: RowBinaryTypesDecoder.uint256, + // Int256: RowBinaryTypesDecoder.int256, Float32: RowBinaryTypesDecoder.float32, Float64: RowBinaryTypesDecoder.float64, String: RowBinaryTypesDecoder.string, diff --git a/packages/client-node/src/index.ts b/packages/client-node/src/index.ts index 6ae917df..98b29d7d 100644 --- a/packages/client-node/src/index.ts +++ b/packages/client-node/src/index.ts @@ -1,6 +1,10 @@ export { createClient } from './client' export { NodeClickHouseClientConfigOptions as ClickHouseClientConfigOptions } from './config' export { ResultSet } from './result_set' +export { + RowBinaryStreamParams, + RowBinaryResultSet, +} from './row_binary_result_set' /** Re-export @clickhouse/client-common types */ export { diff --git a/packages/client-node/src/row_binary_result_set.ts b/packages/client-node/src/row_binary_result_set.ts index e316a859..c4fd8ef8 100644 --- a/packages/client-node/src/row_binary_result_set.ts +++ b/packages/client-node/src/row_binary_result_set.ts @@ -4,6 +4,18 @@ import { RowBinaryColumnsHeader } from '@clickhouse/client-common/src/data_forma import { Buffer } from 'buffer' import Stream, { Transform, type TransformCallback } from 'stream' +export interface RowBinaryStreamParams { + /** Determines whether each row will be returned as an array or an object. Possible options: 'Array', 'Object'. + * + * NB: Object mode will reduce performance by approximately 25-30%, as there will be processing overhead + * (similar to JSONEachRow vs JSONCompactEachRow). + * + * Default: 'Array'. */ + mode?: 'Array' | 'Object' +} + +// FIXME: remove BaseResultSet inheritance (after 1.0.0 is merged). +// FIXME: add logger (after 1.0.0 is merged). export class RowBinaryResultSet implements BaseResultSet { constructor( private _stream: Stream.Readable, @@ -11,30 +23,37 @@ export class RowBinaryResultSet implements BaseResultSet { public readonly query_id: string ) {} + // FIXME: remove this (after 1.0.0 is merged). async text(): Promise { throw new Error( `Can't call 'text()' on RowBinary result set; please use 'stream' instead` ) } + // FIXME: remove this (after 1.0.0 is merged). async json(): Promise { throw new Error( `Can't call 'json()' on RowBinary result set; please use 'stream' instead` ) } - async get(): Promise { + /** Consume the entire stream at once and get all the rows as a single array. + * If your result set might be too large, consider using {@link stream} instead. + * + * @returns {Promise} - An array of rows. + */ + async get(params?: RowBinaryStreamParams): Promise { if (this.format !== 'RowBinary') { throw new Error( `Can't use RowBinaryResultSet if the format is not RowBinary` ) } - const result: unknown[][] = [] + const result: any[] = [] await new Promise((resolve, reject) => { - this.stream() + this.stream(params) .on('data', (rows: unknown[][]) => { for (let i = 0; i < rows.length; i++) { - result.push(rows[i]) + result.push(rows[i] as any) } }) .on('end', resolve) @@ -43,7 +62,8 @@ export class RowBinaryResultSet implements BaseResultSet { return result } - stream(): Stream.Readable { + // FIXME: return StreamReadable after 1.0.0. + stream(params?: RowBinaryStreamParams): Stream.Readable { // If the underlying stream has already ended, // Stream.pipeline will create a new empty stream, // but without "readableEnded" flag set to true @@ -54,15 +74,24 @@ export class RowBinaryResultSet implements BaseResultSet { throw Error(`Format ${this.format} is not RowBinary`) } + // ClickHouse columns with their types; decoded from the header in the first chunk(s) let columns: DecodedColumns[0] | undefined - let incompleteChunk: Uint8Array | undefined + // Current column index in the row being processed let columnIndex = 0 - const rowsToPush: unknown[][] = [] + // Fully decoded rows, pending to be pushed downstream + let decodedRows: any[] = [] + // Whether to return each row as an object or an array + const asObject = params?.mode === 'Object' ?? false + // Used as a prototype if it's Object mode + let protoObject: any - const measures: Record = {} - let iterations = 0 - let incompleteChunksTotal = 0 - const NS_PER_SEC = 1e9 + let src: Buffer + let incompleteChunk: Buffer | undefined + + // const measures: Record = {} + // let iterations = 0 + // let incompleteChunksTotal = 0 + // const NS_PER_SEC = 1e9 const toRows = new Transform({ transform( @@ -70,14 +99,18 @@ export class RowBinaryResultSet implements BaseResultSet { _encoding: BufferEncoding, callback: TransformCallback ) { - //console.log(`transform call, chunk length: ${chunk.length}`) - let src: Buffer + if (chunk.length === 0) { + return callback() + } + if (incompleteChunk !== undefined) { - incompleteChunksTotal++ - src = Buffer.concat([incompleteChunk, chunk.subarray()]) + src = Buffer.concat( + [incompleteChunk, chunk], + incompleteChunk.length + chunk.length + ) incompleteChunk = undefined } else { - src = chunk.subarray() + src = chunk } let loc = 0 @@ -86,70 +119,87 @@ export class RowBinaryResultSet implements BaseResultSet { const res = RowBinaryColumnsHeader.decode(src) columns = res[0] loc = res[1] + if (asObject) { + protoObject = Object.create(null) + for (let i = 0; i < columns.names.length; i++) { + protoObject[columns.names[i]] = undefined + } + } } catch (err) { return callback(err as Error) } } - function logIterationExecutionTime(end: [number, number]) { - const col = columns!.types[columnIndex] - const name = columns!.names[columnIndex] - const execTime = end[0] * NS_PER_SEC + end[1] - iterations++ - const key = `${col.dbType} - ${name}` - measures[key] = (measures[key] || 0) + execTime - } + // function logIterationExecutionTime(end: [number, number]) { + // const col = columns!.types[columnIndex] + // const name = columns!.names[columnIndex] + // const execTime = end[0] * NS_PER_SEC + end[1] + // iterations++ + // const key = `${col.dbType} - ${name}` + // measures[key] = (measures[key] || 0) + execTime + // } + + let lastLoc = 0 while (loc < src.length) { - const row = new Array(columns.names.length) + const row = asObject + ? Object.create(protoObject) + : new Array(columns.names.length) while (columnIndex < columns.names.length) { - const start = process.hrtime() + // const start = process.hrtime() const decodeResult = columns.decoders[columnIndex](src, loc) - const end = process.hrtime(start) - logIterationExecutionTime(end) - //console.log(decodeResult, loc, src.length, columns?.names[columnIndex], columns?.types[columnIndex]) + + // const end = process.hrtime(start) + // logIterationExecutionTime(end) + // not enough data to finish the row - null indicates that if (decodeResult === null) { + // incompleteChunksTotal++ // will be added to the beginning of the next received chunk incompleteChunk = src.subarray(loc) - if (rowsToPush.length > 0) { + if (decodedRows.length > 0) { // console.log(`pushing ${rowsToPush.length} rows`) - this.push(rowsToPush) - rowsToPush.length = 0 + this.push(decodedRows) + decodedRows = [] } return callback() } else { - // decoded a value - row[columnIndex] = decodeResult[0] + // successfully decoded a value for the column + if (asObject) { + ;(row as any)[columns.names[columnIndex]] = decodeResult[0] + } else { + ;(row as any[])[columnIndex] = decodeResult[0] + } loc = decodeResult[1] columnIndex++ + lastLoc = loc } } - rowsToPush.push(row) + decodedRows.push(row) columnIndex = 0 } if (loc > src.length) { - incompleteChunk = src.subarray(loc - src.length) + console.log(`loc > src.length, ${loc} > ${src.length}`) } - if (rowsToPush.length > 0) { + if (decodedRows.length > 0) { // console.log(`pushing ${rowsToPush.length} rows`) - this.push(rowsToPush) - rowsToPush.length = 0 + this.push(decodedRows) + decodedRows = [] } return callback() }, final(callback: TransformCallback) { - if (rowsToPush.length > 0) { - this.push(rowsToPush) - rowsToPush.length = 0 - } - console.log(`Measures (${iterations})`, measures) - for (const key in measures) { - console.log(`Avg ns for ${key}:`, measures[key] / iterations) + if (decodedRows.length > 0) { + this.push(decodedRows) + decodedRows = [] } - console.log(`Incomplete chunks total:`, incompleteChunksTotal) + // console.log(`Measures (${iterations})`, measures) + // for (const key in measures) { + // console.log(`Avg ns for ${key}:`, measures[key] / iterations) + // } + // console.log(`Incomplete chunks total:`, incompleteChunksTotal) return callback() }, autoDestroy: true, @@ -164,113 +214,6 @@ export class RowBinaryResultSet implements BaseResultSet { } }) } - // - // streamDataView() { - // // If the underlying stream has already ended, - // // Stream.pipeline will create a new empty stream, - // // but without "readableEnded" flag set to true - // if (this._stream.readableEnded) { - // throw Error('Stream has been already consumed') - // } - // if (this.format !== 'RowBinary') { - // throw Error(`Format ${this.format} is not RowBinary`) - // } - // - // let columns: { names: string[]; types: ParsedColumnType[]; decoders: SimpleTypeDecoderDataView[] } - // let incompleteChunk: Uint8Array | undefined - // let columnIndex = 0 - // const rowsToPush: unknown[][] = [] - // - // const toRows = new Transform({ - // transform( - // chunk: Buffer, - // _encoding: BufferEncoding, - // callback: TransformCallback - // ) { - // //console.log(`transform call, chunk length: ${chunk.length}`) - // let src: DataView - // if (incompleteChunk !== undefined) { - // const uint8Arr = new Uint8Array(incompleteChunk.length + chunk.length) - // uint8Arr.set(incompleteChunk) - // uint8Arr.set(chunk, incompleteChunk.length) - // src = new DataView(uint8Arr.buffer) - // incompleteChunk = undefined - // } else { - // src = new DataView(chunk.buffer) - // } - // - // let loc = 0 - // if (columns === undefined) { - // try { - // const res = RowBinaryColumnsHeaderDataView.decode(chunk) - // columns = res[0] - // loc = res[1] - // } catch (err) { - // return callback(err as Error) - // } - // } - // - // while (loc < src.byteLength) { - // const row = new Array(columns.names.length) - // while (columnIndex < columns.names.length) { - // const decodeResult = ( - // columns.decoders[columnIndex] as any as SimpleTypeDecoderDataView - // )(src, loc) - // //console.log(decodeResult, loc, src.length, columns?.names[columnIndex], columns?.types[columnIndex]) - // // not enough data to finish the row - null indicates that - // if (decodeResult === null) { - // // will be added to the beginning of the next received chunk - // incompleteChunk = new Uint8Array(src.buffer.slice(loc)) - // if (rowsToPush.length > 0) { - // // console.log(`pushing ${rowsToPush.length} rows`) - // this.push(rowsToPush) - // rowsToPush.length = 0 - // } - // return callback() - // } else { - // // decoded a value - // row[columnIndex] = decodeResult[0] - // loc = decodeResult[1] - // columnIndex++ - // } - // } - // rowsToPush.push(row) - // columnIndex = 0 - // } - // - // if (loc > src.byteLength) { - // incompleteChunk = new Uint8Array( - // src.buffer.slice(loc - src.byteLength) - // ) - // } - // - // if (rowsToPush.length > 0) { - // // console.log(`pushing ${rowsToPush.length} rows`) - // this.push(rowsToPush) - // rowsToPush.length = 0 - // } - // - // return callback() - // }, - // final(callback: TransformCallback) { - // if (rowsToPush.length > 0) { - // this.push(rowsToPush) - // rowsToPush.length = 0 - // } - // return callback() - // }, - // autoDestroy: true, - // objectMode: true, - // }) - // - // return Stream.pipeline(this._stream, toRows, function pipelineCb(err) { - // if (err) { - // // FIXME: use logger instead - // // eslint-disable-next-line no-console - // console.error(err) - // } - // }) - // } close() { this._stream.destroy() From c0d48ad0395c3f10c45c575f0cb784388fc382a4 Mon Sep 17 00:00:00 2001 From: slvrtrn Date: Sat, 23 Mar 2024 17:16:19 +0100 Subject: [PATCH 07/14] Correct Enum parser --- .../unit/row_binary_columns_parser.test.ts | 182 +++++++++--------- .../row_binary/columns_header.ts | 14 +- .../row_binary/columns_parser.ts | 164 +++++++++++----- 3 files changed, 218 insertions(+), 142 deletions(-) diff --git a/packages/client-common/__tests__/unit/row_binary_columns_parser.test.ts b/packages/client-common/__tests__/unit/row_binary_columns_parser.test.ts index 04dccab3..87611cba 100644 --- a/packages/client-common/__tests__/unit/row_binary_columns_parser.test.ts +++ b/packages/client-common/__tests__/unit/row_binary_columns_parser.test.ts @@ -4,92 +4,99 @@ fdescribe('RowBinaryColumnsParser', () => { describe('Enum', () => { // pass-through; will be used as-is in the result and in the error messages. const dbType = 'SomeEnumTypeFromDB' - it('should parse Enum8', async () => { - const args: [string, Map][] = [ - ["Enum8('a' = 1)", new Map([[1, 'a']])], - [ - "Enum8('a' = 0, 'b' = 2)", - new Map([ - [0, 'a'], - [2, 'b'], - ]), - ], - [ - "Enum8('a' = 1, 'b' = 2, 'c' = 42)", - new Map([ - [1, 'a'], - [2, 'b'], - [42, 'c'], - ]), - ], - [ - "Enum8('f'' = 1, 'x =' = 2, 'b'''' = 3, ''c==' = 42)", - new Map([ - [1, "f'"], - [2, 'x ='], - [3, "b'''"], - [42, "'c=="], - ]), - ], + it('should parse correct values', async () => { + type TestArgs = { + columnType: string + expectedValues: Map + expectedIntSize: 8 | 16 + } + const enumTypes: ['Enum8' | 'Enum16', 8 | 16][] = [ + ['Enum8', 8], + ['Enum16', 16], ] - args.forEach(([columnType, values]) => { - expect(parseEnum({ columnType, dbType })) - .withContext( - `Expected ${columnType} to be parsed as Enum8 [${[ - ...values.entries(), - ]}]` - ) - .toEqual({ - type: 'Enum', - intSize: 8, - dbType, - values, - }) - }) - }) - it('should parse Enum16', async () => { - const args: [string, Map][] = [ - ["Enum16('a' = 1)", new Map([[1, 'a']])], - [ - "Enum16('a' = 0, 'b' = 2)", - new Map([ - [0, 'a'], - [2, 'b'], - ]), - ], - [ - "Enum16('a' = 1, 'b' = 2, 'c' = 42)", - new Map([ - [1, 'a'], - [2, 'b'], - [42, 'c'], - ]), - ], - [ - "Enum16('f'' = 1, 'x =' = 2, 'b'''' = 3, ''c==' = 25000)", - new Map([ - [1, "f'"], - [2, 'x ='], - [3, "b'''"], - [25000, "'c=="], - ]), - ], - ] - args.forEach(([columnType, values]) => { - expect(parseEnum({ columnType, dbType })) - .withContext( - `Expected ${columnType} to be parsed as Enum16 [${[ - ...values.entries(), - ]}]` - ) - .toEqual({ - type: 'Enum', - intSize: 16, - dbType, - values, - }) - }) + const allEnumSizeArgs: TestArgs[][] = enumTypes.map( + ([enumType, expectedIntSize]) => [ + { + columnType: `${enumType}('a' = 1)`, + expectedValues: new Map([[1, 'a']]), + expectedIntSize, + }, + { + columnType: `${enumType}('a' = 0, 'b' = 2)`, + expectedValues: new Map([ + [0, 'a'], + [2, 'b'], + ]), + expectedIntSize, + }, + { + columnType: `${enumType}('a' = 1, 'b' = 2, 'c' = 42)`, + expectedValues: new Map([ + [1, 'a'], + [2, 'b'], + [42, 'c'], + ]), + expectedIntSize, + }, + { + columnType: `${enumType}('f\\'' = 1, 'x =' = 2, 'b\\'\\'\\'' = 3, '\\'c=4=' = 42, '4' = 100)`, + expectedValues: new Map([ + [1, "f\\'"], + [2, 'x ='], + [3, "b\\'\\'\\'"], + [42, "\\'c=4="], + [100, '4'], + ]), + expectedIntSize, + }, + { + columnType: `${enumType}('' = 0)`, + expectedValues: new Map([[0, '']]), + expectedIntSize, + }, + { + columnType: `${enumType}('' = 42)`, + expectedValues: new Map([[42, '']]), + expectedIntSize, + }, + { + columnType: `${enumType}('foo' = 1, '' = 42)`, + expectedValues: new Map([ + [1, 'foo'], + [42, ''], + ]), + expectedIntSize, + }, + { + columnType: `${enumType}('' = 0, 'foo' = 42)`, + expectedValues: new Map([ + [0, ''], + [42, 'foo'], + ]), + expectedIntSize, + }, + ] + ) + + allEnumSizeArgs.forEach((args) => + args.forEach(({ columnType, expectedValues, expectedIntSize }) => { + const result = parseEnum({ columnType, dbType }) + expect(result) + .withContext( + `Expected ${columnType} to be parsed as an Enum with intSize ${expectedIntSize} and values [${[ + ...expectedValues.entries(), + ]}]` + ) + .toEqual({ + type: 'Enum', + intSize: expectedIntSize, + values: expectedValues, + dbType, + }) + }) + ) }) + it('should throw when the type is not a valid enum', async () => { const args: [string][] = [ ['Enum'], // should be either 8 or 16 @@ -105,14 +112,11 @@ fdescribe('RowBinaryColumnsParser', () => { }) }) it('should throw when the values are not valid', async () => { - const negativeArgs: [string][] = [ - ["Enum8('a' = x)"], - ["Enum8('foo')"], - ] + const negativeArgs: [string][] = [["Enum8('a' = x)"], ["Enum8('foo')"]] negativeArgs.forEach(([columnType]) => { expect(() => parseEnum({ columnType, dbType })) .withContext(`Expected ${columnType} to throw`) - .toThrowError('Invalid Enum type values') + .toThrowError('Expected Enum index to be a valid number') }) }) it('should throw on duplicate indices', async () => { diff --git a/packages/client-common/src/data_formatter/row_binary/columns_header.ts b/packages/client-common/src/data_formatter/row_binary/columns_header.ts index d36507a4..6e946e1c 100644 --- a/packages/client-common/src/data_formatter/row_binary/columns_header.ts +++ b/packages/client-common/src/data_formatter/row_binary/columns_header.ts @@ -1,19 +1,15 @@ -import { +import type { DecimalParams, - parseColumnType, ParsedColumnArray, ParsedColumnNullable, ParsedColumnType, } from './columns_parser' +import { parseColumnType } from './columns_parser' import { ClickHouseRowBinaryError } from './errors' import type { DecodeResult } from './read_bytes' import { readBytesAsUnsignedLEB128 } from './read_bytes' -import { - RowBinarySimpleDecoders, - RowBinaryTypesDecoder, - SimpleTypeDecoder, - TypeDecoder, -} from './types' +import type { SimpleTypeDecoder, TypeDecoder } from './types' +import { RowBinarySimpleDecoders, RowBinaryTypesDecoder } from './types' export type DecodedColumns = DecodeResult<{ names: string[] @@ -119,6 +115,8 @@ function getArrayDecoder(col: ParsedColumnArray): SimpleTypeDecoder { let valueDecoder if (col.valueType === 'Decimal') { valueDecoder = getDecimalDecoder(col.decimalParams) + } else if (col.valueType === 'Enum') { + valueDecoder = getEnumDecoder(col.intSize, col.values) } else { valueDecoder = RowBinarySimpleDecoders[col.valueType] } diff --git a/packages/client-common/src/data_formatter/row_binary/columns_parser.ts b/packages/client-common/src/data_formatter/row_binary/columns_parser.ts index e5c6bdd8..c2eb34a2 100644 --- a/packages/client-common/src/data_formatter/row_binary/columns_parser.ts +++ b/packages/client-common/src/data_formatter/row_binary/columns_parser.ts @@ -8,33 +8,34 @@ export interface ParsedColumnSimple { * * UInt8 -> UInt8 * * LowCardinality(Nullable(String)) -> String */ columnType: SimpleColumnType + /** ClickHouse type as it is defined in the table. */ dbType: string } +interface ParsedColumnNullableBase { + type: 'Nullable' + dbType: string +} export type ParsedColumnNullable = - | { - type: 'Nullable' + | (ParsedColumnNullableBase & { /** Used to determine how to decode T from Nullable(T) */ valueType: SimpleColumnType - dbType: string - } - | { - type: 'Nullable' + }) + | (ParsedColumnNullableBase & { valueType: 'Decimal' decimalParams: ParsedColumnDecimal['params'] - dbType: string - } - | { - type: 'Nullable' + }) + | (ParsedColumnNullableBase & { valueType: 'Enum' values: ParsedColumnEnum['values'] intSize: ParsedColumnEnum['intSize'] - dbType: string - } + }) export interface ParsedColumnEnum { type: 'Enum' + /** Index to name */ values: Map + /** UInt8 or UInt16 */ intSize: 8 | 16 dbType: string } @@ -66,28 +67,32 @@ export interface ParsedColumnDecimal { * Arrays can be multidimensional, e.g. Array(Array(Array(T))). * Arrays are allowed to have a Map as the value type. */ +interface ParsedColumnArrayBase { + type: 'Array' + valueNullable: boolean + /** Array(T) = 1 dimension, Array(Array(T)) = 2, etc. */ + dimensions: number + dbType: string +} export type ParsedColumnArray = - | { - type: 'Array' - dimensions: number + | (ParsedColumnArrayBase & { /** Represents the final value type; nested arrays are handled with {@link ParsedColumnArray.dimensions} */ valueType: SimpleColumnType - valueNullable: boolean - dbType: string - } - | { - type: 'Array' - dimensions: number + }) + | (ParsedColumnArrayBase & { valueType: 'Decimal' - valueNullable: boolean decimalParams: DecimalParams - dbType: string - } + }) + | (ParsedColumnArrayBase & { + valueType: 'Enum' + values: ParsedColumnEnum['values'] + intSize: ParsedColumnEnum['intSize'] + }) // TODO: add Tuple support. // export interface ParsedColumnMap { // type: 'Map' // key: ParsedColumnSimple -// value: ParsedColumnSimple | ParsedColumnArray | ParsedColumnMap +// value: ParsedColumnType // dbType: string // } // TODO - add Map support. @@ -208,8 +213,8 @@ export function parseEnum({ } ) } - const matches = [...columnType.matchAll(/(?:'(.*?)' = (\d+),?)+/g)] - if (matches.length === 0) { + + if (columnType.length < 2) { throw ClickHouseRowBinaryError.headerDecodingError( 'Invalid Enum type values', { @@ -219,38 +224,92 @@ export function parseEnum({ ) } - // FIXME: regex is not enough to validate possibly incorrect Enum values. - // needs to be processed char by char instead. const names: string[] = [] - const values = new Map() - for (const match of matches) { - const index = parseInt(match[2], 10) - if (index < 0 || Number.isNaN(index)) { + const indices: number[] = [] + let parsingName = true // false when parsing the index + let charEscaped = false // we should ignore escaped ticks + let startIndex = 1 // Skip the first ' + + function pushEnumIndex(start: number, end: number) { + const index = parseInt(columnType.slice(start, end), 10) + if (Number.isNaN(index) || index < 0) { throw ClickHouseRowBinaryError.headerDecodingError( - 'Enum index must be >= 0', - { columnType, dbType, index, matches: [...matches] } + 'Expected Enum index to be a valid number', + { + columnType, + dbType, + names, + indices, + index, + start, + end, + } ) } - if (values.has(index)) { + if (indices.includes(index)) { throw ClickHouseRowBinaryError.headerDecodingError( 'Duplicate Enum index', - { columnType, dbType, index, matches: [...matches] } + { columnType, dbType, index, names, indices } ) } - if (names.includes(match[1])) { - throw ClickHouseRowBinaryError.headerDecodingError( - 'Duplicate Enum name', - { columnType, dbType, name: match[1], matches: [...matches] } - ) + indices.push(index) + } + + // Should support the most complicated enums, such as Enum8('f\'' = 1, 'x =' = 2, 'b\'\'\'' = 3, '\'c=4=' = 42, '4' = 100) + for (let i = 1; i < columnType.length; i++) { + if (parsingName) { + if (!charEscaped) { + if (columnType[i] === '\\') { + charEscaped = true + } else if (columnType[i] === "'") { + // non-escaped closing tick - push the name + const name = columnType.slice(startIndex, i) + if (names.includes(name)) { + throw ClickHouseRowBinaryError.headerDecodingError( + 'Duplicate Enum name', + { columnType, dbType, name, names, indices } + ) + } + names.push(name) + i += 4 // skip ` = ` and the first digit, as it will always have at least one. + startIndex = i + parsingName = false + } + } else { + // current char was escaped, ignoring. + charEscaped = false + } + } else { + // Parsing the index + if (columnType[i] < '0' || columnType[i] > '9') { + pushEnumIndex(startIndex, i) + // the char at this index should be comma. + i += 2 // skip ` '`, but not the first char - ClickHouse allows something like Enum8('foo' = 0, '' = 42) + startIndex = i + 1 + parsingName = true + charEscaped = false + } } - values.set(index, match[1]) - names.push(match[1]) + } + + // Push the last index + pushEnumIndex(startIndex, columnType.length) + if (names.length !== indices.length) { + throw ClickHouseRowBinaryError.headerDecodingError( + 'Expected Enum to have the same number of names and indices', + { columnType, dbType, names, indices } + ) + } + + const values = new Map() + for (let i = 0; i < names.length; i++) { + values.set(indices[i], names[i]) } return { type: 'Enum', - intSize, values, + intSize, dbType, } } @@ -324,6 +383,21 @@ export function parseArrayType({ dbType, } } + if ( + columnType.startsWith(Enum8Prefix) || + columnType.startsWith(Enum16Prefix) + ) { + const { values, intSize } = parseEnum({ dbType, columnType }) + return { + type: 'Array', + valueType: 'Enum', + valueNullable, + values, + intSize, + dimensions, + dbType, + } + } if (columnType in RowBinarySimpleDecoders) { return { type: 'Array', From f8baa31a632b43bd8bf21f7928bfbb6b866d4196 Mon Sep 17 00:00:00 2001 From: slvrtrn Date: Sat, 23 Mar 2024 23:01:03 +0100 Subject: [PATCH 08/14] Add more column types parsers --- .../unit/row_binary_columns_parser.test.ts | 161 ++--- .../row_binary_columns_parser_array.test.ts | 308 +++++++++ ...row_binary_columns_parser_datetime.test.ts | 116 ++++ .../row_binary_columns_parser_decimal.test.ts | 103 +++ .../row_binary_columns_parser_enum.test.ts | 164 +++++ .../row_binary_columns_parser_map.test.ts | 41 ++ ...row_binary_columns_parser_nullable.test.ts | 266 ++++++++ .../row_binary/columns_header.ts | 40 +- .../row_binary/columns_parser.ts | 605 +++++++++++------- 9 files changed, 1441 insertions(+), 363 deletions(-) create mode 100644 packages/client-common/__tests__/unit/row_binary_columns_parser_array.test.ts create mode 100644 packages/client-common/__tests__/unit/row_binary_columns_parser_datetime.test.ts create mode 100644 packages/client-common/__tests__/unit/row_binary_columns_parser_decimal.test.ts create mode 100644 packages/client-common/__tests__/unit/row_binary_columns_parser_enum.test.ts create mode 100644 packages/client-common/__tests__/unit/row_binary_columns_parser_map.test.ts create mode 100644 packages/client-common/__tests__/unit/row_binary_columns_parser_nullable.test.ts diff --git a/packages/client-common/__tests__/unit/row_binary_columns_parser.test.ts b/packages/client-common/__tests__/unit/row_binary_columns_parser.test.ts index 87611cba..1723d3be 100644 --- a/packages/client-common/__tests__/unit/row_binary_columns_parser.test.ts +++ b/packages/client-common/__tests__/unit/row_binary_columns_parser.test.ts @@ -1,144 +1,55 @@ -import { parseEnum } from '../../src/data_formatter/row_binary/columns_parser' +import { parseFixedStringType } from '../../src/data_formatter/row_binary/columns_parser' -fdescribe('RowBinaryColumnsParser', () => { - describe('Enum', () => { - // pass-through; will be used as-is in the result and in the error messages. - const dbType = 'SomeEnumTypeFromDB' - it('should parse correct values', async () => { - type TestArgs = { - columnType: string - expectedValues: Map - expectedIntSize: 8 | 16 - } - const enumTypes: ['Enum8' | 'Enum16', 8 | 16][] = [ - ['Enum8', 8], - ['Enum16', 16], +fdescribe('RowBinary column types parser', () => { + describe('FixedString', () => { + it('should parse FixedString', async () => { + const args: [string, number][] = [ + ['FixedString(1)', 1], + ['FixedString(42)', 42], + ['FixedString(100)', 100], + ['FixedString(32768)', 32768], ] - const allEnumSizeArgs: TestArgs[][] = enumTypes.map( - ([enumType, expectedIntSize]) => [ - { - columnType: `${enumType}('a' = 1)`, - expectedValues: new Map([[1, 'a']]), - expectedIntSize, - }, - { - columnType: `${enumType}('a' = 0, 'b' = 2)`, - expectedValues: new Map([ - [0, 'a'], - [2, 'b'], - ]), - expectedIntSize, - }, - { - columnType: `${enumType}('a' = 1, 'b' = 2, 'c' = 42)`, - expectedValues: new Map([ - [1, 'a'], - [2, 'b'], - [42, 'c'], - ]), - expectedIntSize, - }, - { - columnType: `${enumType}('f\\'' = 1, 'x =' = 2, 'b\\'\\'\\'' = 3, '\\'c=4=' = 42, '4' = 100)`, - expectedValues: new Map([ - [1, "f\\'"], - [2, 'x ='], - [3, "b\\'\\'\\'"], - [42, "\\'c=4="], - [100, '4'], - ]), - expectedIntSize, - }, - { - columnType: `${enumType}('' = 0)`, - expectedValues: new Map([[0, '']]), - expectedIntSize, - }, - { - columnType: `${enumType}('' = 42)`, - expectedValues: new Map([[42, '']]), - expectedIntSize, - }, - { - columnType: `${enumType}('foo' = 1, '' = 42)`, - expectedValues: new Map([ - [1, 'foo'], - [42, ''], - ]), - expectedIntSize, - }, - { - columnType: `${enumType}('' = 0, 'foo' = 42)`, - expectedValues: new Map([ - [0, ''], - [42, 'foo'], - ]), - expectedIntSize, - }, - ] - ) - - allEnumSizeArgs.forEach((args) => - args.forEach(({ columnType, expectedValues, expectedIntSize }) => { - const result = parseEnum({ columnType, dbType }) - expect(result) - .withContext( - `Expected ${columnType} to be parsed as an Enum with intSize ${expectedIntSize} and values [${[ - ...expectedValues.entries(), - ]}]` - ) - .toEqual({ - type: 'Enum', - intSize: expectedIntSize, - values: expectedValues, - dbType, - }) + args.forEach(([columnType, sizeBytes]) => { + const result = parseFixedStringType({ + columnType, + sourceType: columnType, }) - ) + expect(result) + .withContext( + `Expected ${columnType} to be parsed as a FixedString with size ${sizeBytes}` + ) + .toEqual({ type: 'FixedString', sizeBytes, sourceType: columnType }) + }) }) - it('should throw when the type is not a valid enum', async () => { + it('should throw on invalid FixedString type', async () => { const args: [string][] = [ - ['Enum'], // should be either 8 or 16 - ['Enum32'], - ['Enum64'], + ['FixedString'], + ['FixedString('], + ['FixedString()'], ['String'], - ['Enum(String)'], - ] - args.forEach(([columnType]) => { - expect(() => parseEnum({ columnType, dbType })) - .withContext(`Expected ${columnType} to throw`) - .toThrowError('Expected Enum to be either Enum8 or Enum16') - }) - }) - it('should throw when the values are not valid', async () => { - const negativeArgs: [string][] = [["Enum8('a' = x)"], ["Enum8('foo')"]] - negativeArgs.forEach(([columnType]) => { - expect(() => parseEnum({ columnType, dbType })) - .withContext(`Expected ${columnType} to throw`) - .toThrowError('Expected Enum index to be a valid number') - }) - }) - it('should throw on duplicate indices', async () => { - const args: [string][] = [ - ["Enum8('a' = 0, 'b' = 0)"], - ["Enum8('a' = 0, 'b' = 1, 'c' = 1)"], ] args.forEach(([columnType]) => { - expect(() => parseEnum({ columnType, dbType })) + expect(() => + parseFixedStringType({ columnType, sourceType: columnType }) + ) .withContext(`Expected ${columnType} to throw`) - .toThrowError('Duplicate Enum index') + .toThrowError('Invalid FixedString type') }) }) - it('should throw on duplicate names', async () => { + + it('should throw on invalid FixedString size', async () => { const args: [string][] = [ - ["Enum8('a' = 0, 'a' = 1)"], - ["Enum8('a' = 0, 'b' = 1, 'b' = 2)"], + ['FixedString(0)'], + ['FixedString(x)'], + [`FixedString(')`], ] args.forEach(([columnType]) => { - expect(() => parseEnum({ columnType, dbType })) + expect(() => + parseFixedStringType({ columnType, sourceType: columnType }) + ) .withContext(`Expected ${columnType} to throw`) - .toThrowError('Duplicate Enum name') + .toThrowError('Invalid FixedString size in bytes') }) }) }) diff --git a/packages/client-common/__tests__/unit/row_binary_columns_parser_array.test.ts b/packages/client-common/__tests__/unit/row_binary_columns_parser_array.test.ts new file mode 100644 index 00000000..8633ca58 --- /dev/null +++ b/packages/client-common/__tests__/unit/row_binary_columns_parser_array.test.ts @@ -0,0 +1,308 @@ +import type { SimpleColumnType } from '../../src/data_formatter' +import type { + ParsedColumnDateTime, + ParsedColumnDateTime64, + ParsedColumnEnum, +} from '../../src/data_formatter/row_binary/columns_parser' +import { parseArrayType } from '../../src/data_formatter/row_binary/columns_parser' + +fdescribe('RowBinary column types parser - Array', () => { + it('should parse Array with a simple value type', async () => { + type TestArgs = { + columnType: string + valueType: SimpleColumnType + dimensions: number + } + const args: TestArgs[] = [ + { + columnType: 'Array(String)', + valueType: 'String', + dimensions: 1, + }, + { + columnType: 'Array(UInt8)', + valueType: 'UInt8', + dimensions: 1, + }, + { + columnType: 'Array(Array(Int32))', + valueType: 'Int32', + dimensions: 2, + }, + { + columnType: 'Array(Array(Array(Date32)))', + valueType: 'Date32', + dimensions: 3, + }, + { + columnType: 'Array(Array(Array(Array(Float32))))', + valueType: 'Float32', + dimensions: 4, + }, + ] + args.forEach((args: TestArgs) => { + const { columnType, valueType, dimensions } = args + const result = parseArrayType({ columnType, sourceType: columnType }) + expect(result) + .withContext( + `Expected ${columnType} to be parsed as an Array with value type ${valueType} and ${dimensions} dimensions` + ) + .toEqual({ + type: 'Array', + value: { + type: 'Simple', + columnType: valueType, + sourceType: valueType, // T + }, + sourceType: columnType, // Array(T) + dimensions, + }) + }) + }) + + it('should parse Array with Nullable', async () => { + type TestArgs = { + columnType: string + valueType: SimpleColumnType + dimensions: number + } + const args: TestArgs[] = [ + { + columnType: 'Array(Nullable(String))', + valueType: 'String', + dimensions: 1, + }, + { + columnType: 'Array(Array(Nullable(Int32)))', + valueType: 'Int32', + dimensions: 2, + }, + ] + args.forEach(({ columnType, valueType, dimensions }: TestArgs) => { + const result = parseArrayType({ columnType, sourceType: columnType }) + expect(result) + .withContext( + `Expected ${columnType} to be parsed as an Array with value type ${valueType} and ${dimensions} dimensions` + ) + .toEqual({ + type: 'Array', + value: { + type: 'Nullable', + value: { + type: 'Simple', + columnType: valueType, + sourceType: valueType, // T + }, + sourceType: `Nullable(${valueType})`, // Nullable(T) + }, + sourceType: columnType, // Array(Nullable(T)) + dimensions, + }) + }) + }) + + it('should parse Array with Enum value type', async () => { + type TestArgs = { + value: ParsedColumnEnum + dimensions: number + columnType: string + } + const sourceEnum8 = `Enum8('foo' = 42)` + const valuesEnum8 = new Map([[42, 'foo']]) + const sourceEnum16 = `Enum16('bar' = 144, 'qaz' = 500)` + const valuesEnum16 = new Map([ + [144, 'bar'], + [500, 'qaz'], + ]) + const args: TestArgs[] = [ + { + value: { + type: 'Enum', + intSize: 8, + values: valuesEnum8, + sourceType: sourceEnum8, + }, + dimensions: 1, + columnType: `Array(${sourceEnum8})`, + }, + { + value: { + type: 'Enum', + intSize: 16, + values: valuesEnum16, + sourceType: sourceEnum16, + }, + dimensions: 1, + columnType: `Array(${sourceEnum16})`, + }, + { + value: { + type: 'Enum', + intSize: 8, + values: valuesEnum8, + sourceType: sourceEnum8, + }, + dimensions: 2, + columnType: `Array(Array(${sourceEnum8}))`, + }, + { + value: { + type: 'Enum', + intSize: 16, + values: valuesEnum16, + sourceType: sourceEnum16, + }, + dimensions: 3, + columnType: `Array(Array(Array(${sourceEnum16})))`, + }, + ] + args.forEach(({ columnType, dimensions, value }) => { + const result = parseArrayType({ columnType, sourceType: columnType }) + expect(result) + .withContext( + `Expected ${columnType} to be parsed as an Array with value type ${value.sourceType} and ${dimensions} dimensions` + ) + .toEqual({ + type: 'Array', + sourceType: columnType, + dimensions, + value, + }) + }) + }) + + it('should parse Array of DateTime', async () => { + type TestArgs = { + value: ParsedColumnDateTime + dimensions: number + columnType: string + } + const args: TestArgs[] = [ + { + value: { + type: 'DateTime', + timezone: null, + sourceType: 'DateTime', + }, + dimensions: 1, + columnType: 'Array(DateTime)', + }, + { + value: { + type: 'DateTime', + timezone: 'UTC', + sourceType: `DateTime('UTC')`, + }, + dimensions: 1, + columnType: `Array(DateTime('UTC'))`, + }, + { + value: { + type: 'DateTime', + timezone: 'Etc/GMT-5', + sourceType: `DateTime('Etc/GMT-5')`, + }, + dimensions: 2, + columnType: `Array(Array(DateTime('Etc/GMT-5')))`, + }, + ] + args.forEach(({ columnType, dimensions, value }) => { + const result = parseArrayType({ columnType, sourceType: columnType }) + expect(result) + .withContext( + `Expected ${columnType} to be parsed as an Array with value type ${value.sourceType} and ${dimensions} dimensions` + ) + .toEqual({ + type: 'Array', + sourceType: columnType, + dimensions, + value, + }) + }) + }) + + it('should parse Array of DateTime64', async () => { + type TestArgs = { + value: ParsedColumnDateTime64 + dimensions: number + columnType: string + } + const args: TestArgs[] = [ + { + value: { + type: 'DateTime64', + timezone: null, + sourceType: 'DateTime64(0)', + precision: 0, + }, + dimensions: 1, + columnType: 'Array(DateTime64(0))', + }, + { + value: { + type: 'DateTime64', + timezone: 'UTC', + sourceType: `DateTime64(3, 'UTC')`, + precision: 3, + }, + dimensions: 1, + columnType: `Array(DateTime64(3, 'UTC'))`, + }, + { + value: { + type: 'DateTime64', + timezone: 'Etc/GMT-5', + sourceType: `DateTime64(6, 'Etc/GMT-5')`, + precision: 6, + }, + dimensions: 2, + columnType: `Array(Array(DateTime64(6, 'Etc/GMT-5')))`, + }, + { + value: { + type: 'DateTime64', + timezone: 'Europe/Sofia', + sourceType: `DateTime64(9, 'Europe/Sofia')`, + precision: 9, + }, + dimensions: 3, + columnType: `Array(Array(Array(DateTime64(9, 'Europe/Sofia'))))`, + }, + ] + + args.forEach(({ columnType, dimensions, value }) => { + const result = parseArrayType({ columnType, sourceType: columnType }) + expect(result) + .withContext( + `Expected ${columnType} to be parsed as an Array with value type ${value.sourceType} and ${dimensions} dimensions` + ) + .toEqual({ + type: 'Array', + sourceType: columnType, + dimensions, + value, + }) + }) + }) + + // TODO: Map type test. + + it('should throw on invalid Array type', async () => { + // Array(Int8) is the shortest valid definition + const args = [ + ['Array'], + ['Array('], + ['Array()'], + ['Array(a'], + ['Array(ab'], + ['Array(ab)'], + ['Array(abc)'], + ['String'], + ] + args.forEach(([columnType]) => { + expect(() => parseArrayType({ columnType, sourceType: columnType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Invalid Array type') + }) + }) +}) diff --git a/packages/client-common/__tests__/unit/row_binary_columns_parser_datetime.test.ts b/packages/client-common/__tests__/unit/row_binary_columns_parser_datetime.test.ts new file mode 100644 index 00000000..d415756a --- /dev/null +++ b/packages/client-common/__tests__/unit/row_binary_columns_parser_datetime.test.ts @@ -0,0 +1,116 @@ +import { + parseDateTime64Type, + parseDateTimeType, +} from '../../src/data_formatter/row_binary/columns_parser' + +fdescribe('RowBinary column types parser - DateTime and DateTime64', () => { + describe('DateTime', () => { + it('should parse DateTime', async () => { + const args: [string, string | null][] = [ + ['DateTime', null], + [`DateTime('GB')`, 'GB'], + [`DateTime('UTC')`, 'UTC'], + [`DateTime('Europe/Amsterdam')`, 'Europe/Amsterdam'], + ] + args.forEach(([columnType, timezone]) => { + const result = parseDateTimeType({ columnType, sourceType: columnType }) + expect(result) + .withContext(`Expected ${columnType} to be parsed as a DateTime`) + .toEqual({ type: 'DateTime', sourceType: columnType, timezone }) + }) + }) + + it('should throw on invalid DateTime', async () => { + // DateTime('GB') has the least amount of chars allowed for a valid DateTime type. + const args: [string][] = [ + ['DateTime()'], + [`DateTime(')`], + [`DateTime('')`], + [`DateTime('A')`], + ['String'], + ] + args.forEach(([columnType]) => { + expect(() => parseDateTimeType({ columnType, sourceType: columnType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Invalid DateTime type') + }) + }) + }) + + describe('DateTime64', () => { + const precisionRange = [...Array(10).keys()] // 0..9 + + it('should parse DateTime64 without timezone', async () => { + const args: [string, number][] = precisionRange.map((precision) => [ + `DateTime64(${precision})`, + precision, + ]) + args.forEach(([columnType, precision]) => { + const result = parseDateTime64Type({ + columnType, + sourceType: columnType, + }) + expect(result) + .withContext( + `Expected ${columnType} to be parsed as a DateTime64 with precision ${precision}` + ) + .toEqual({ + type: 'DateTime64', + timezone: null, + sourceType: columnType, + precision, + }) + }) + }) + + it('should parse DateTime64 with timezone', async () => { + const allPrecisionArgs: [string, number, string][][] = precisionRange.map( + (precision) => [ + [`DateTime64(${precision}, 'GB')`, precision, 'GB'], + [`DateTime64(${precision}, 'UTC')`, precision, 'UTC'], + [`DateTime64(${precision}, 'Etc/GMT-5')`, precision, 'Etc/GMT-5'], + ] + ) + allPrecisionArgs.forEach((args) => + args.forEach(([columnType, precision, timezone]) => { + const result = parseDateTime64Type({ + columnType, + sourceType: columnType, + }) + expect(result) + .withContext( + `Expected ${columnType} to be parsed as a DateTime64 with precision ${precision} and timezone ${timezone}` + ) + .toEqual({ + type: 'DateTime64', + sourceType: columnType, + timezone, + precision, + }) + }) + ) + }) + + it('should throw on invalid DateTime64 type', async () => { + const args = [['DateTime64('], ['DateTime64()'], ['String']] + args.forEach(([columnType]) => { + expect(() => + parseDateTime64Type({ columnType, sourceType: columnType }) + ) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Invalid DateTime64 type') + }) + }) + + it('should throw on invalid DateTime64 precision', async () => { + const args = [[`DateTime64(')`], [`DateTime64(foo)`]] + args.forEach(([columnType]) => { + expect(() => + parseDateTime64Type({ columnType, sourceType: columnType }) + ) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Invalid DateTime64 precision') + }) + }) + }) +}) diff --git a/packages/client-common/__tests__/unit/row_binary_columns_parser_decimal.test.ts b/packages/client-common/__tests__/unit/row_binary_columns_parser_decimal.test.ts new file mode 100644 index 00000000..430eb7cd --- /dev/null +++ b/packages/client-common/__tests__/unit/row_binary_columns_parser_decimal.test.ts @@ -0,0 +1,103 @@ +import { parseDecimalType } from '../../src/data_formatter/row_binary/columns_parser' + +fdescribe('RowBinary column types parser - Decimal', () => { + type TestArgs = { + sourceType: string + precision: number + scale: number + intSize: 32 | 64 | 128 | 256 + } + + it('should parse Decimal', async () => { + const args: TestArgs[] = [ + { + sourceType: 'Decimal(7, 2)', + precision: 7, + scale: 2, + intSize: 32, + }, + { + sourceType: 'Decimal(12, 4)', + precision: 12, + scale: 4, + intSize: 64, + }, + { + sourceType: 'Decimal(27, 6)', + precision: 27, + scale: 6, + intSize: 128, + }, + { + sourceType: 'Decimal(42, 8)', + precision: 42, + scale: 8, + intSize: 256, + }, + ] + args.forEach(({ sourceType, precision, scale, intSize }) => { + const result = parseDecimalType({ columnType: sourceType, sourceType }) + expect(result) + .withContext( + `Expected ${sourceType} to be parsed as a Decimal with precision ${precision}, scale ${scale} and intSize ${intSize}` + ) + .toEqual({ + type: 'Decimal', + params: { precision, scale, intSize }, + sourceType, + }) + }) + }) + + it('should throw on invalid Decimal type', async () => { + const args: [string][] = [ + ['Decimal'], + ['Decimal('], + ['Decimal()'], + ['Decimal(1)'], + ['Decimal(1,)'], + ['Decimal(1, )'], + ['String'], + ] + args.forEach(([columnType]) => { + expect(() => parseDecimalType({ columnType, sourceType: columnType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Invalid Decimal type') + }) + }) + + it('should throw on invalid Decimal precision', async () => { + const args: [string][] = [ + ['Decimal(0, 0)'], + ['Decimal(x, 0)'], + [`Decimal(', ')`], + [`Decimal(77, 1)`], // max is 76 + ] + args.forEach(([columnType]) => { + expect(() => parseDecimalType({ columnType, sourceType: columnType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Invalid Decimal precision') + }) + }) + + it('should throw on invalid Decimal scale', async () => { + const args: [string][] = [ + ['Decimal(1, 2)'], // scale should be less than precision + ['Decimal(1, x)'], + [`Decimal(42, ,)`], + [`Decimal(42, ')`], + ] + args.forEach(([columnType]) => { + expect(() => parseDecimalType({ columnType, sourceType: columnType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Invalid Decimal scale') + }) + }) + + it('should throw when precision or scale cannot be parsed', async () => { + const columnType = 'Decimal(foobar)' + expect(() => + parseDecimalType({ columnType, sourceType: columnType }) + ).toThrowError('Expected Decimal type to have both precision and scale') + }) +}) diff --git a/packages/client-common/__tests__/unit/row_binary_columns_parser_enum.test.ts b/packages/client-common/__tests__/unit/row_binary_columns_parser_enum.test.ts new file mode 100644 index 00000000..7b805972 --- /dev/null +++ b/packages/client-common/__tests__/unit/row_binary_columns_parser_enum.test.ts @@ -0,0 +1,164 @@ +import { parseEnumType } from '../../src/data_formatter/row_binary/columns_parser' + +fdescribe('RowBinary column types parser - Enum', () => { + const enumTypes: ['Enum8' | 'Enum16', 8 | 16][] = [ + ['Enum8', 8], + ['Enum16', 16], + ] + + it('should parse correct values', async () => { + type TestArgs = { + columnType: string + expectedValues: Map + expectedIntSize: 8 | 16 + } + const allEnumSizeArgs: TestArgs[][] = enumTypes.map( + ([enumType, expectedIntSize]) => [ + { + columnType: `${enumType}('a' = 1)`, + expectedValues: new Map([[1, 'a']]), + expectedIntSize, + }, + { + columnType: `${enumType}('a' = 0, 'b' = 2)`, + expectedValues: new Map([ + [0, 'a'], + [2, 'b'], + ]), + expectedIntSize, + }, + { + columnType: `${enumType}('a' = 1, 'b' = 2, 'c' = 42)`, + expectedValues: new Map([ + [1, 'a'], + [2, 'b'], + [42, 'c'], + ]), + expectedIntSize, + }, + { + columnType: `${enumType}('f\\'' = 1, 'x =' = 2, 'b\\'\\'\\'' = 3, '\\'c=4=' = 42, '4' = 100)`, + expectedValues: new Map([ + [1, "f\\'"], + [2, 'x ='], + [3, "b\\'\\'\\'"], + [42, "\\'c=4="], + [100, '4'], + ]), + expectedIntSize, + }, + { + columnType: `${enumType}('' = 0)`, + expectedValues: new Map([[0, '']]), + expectedIntSize, + }, + { + columnType: `${enumType}('' = 42)`, + expectedValues: new Map([[42, '']]), + expectedIntSize, + }, + { + columnType: `${enumType}('foo' = 1, '' = 42)`, + expectedValues: new Map([ + [1, 'foo'], + [42, ''], + ]), + expectedIntSize, + }, + { + columnType: `${enumType}('' = 0, 'foo' = 42)`, + expectedValues: new Map([ + [0, ''], + [42, 'foo'], + ]), + expectedIntSize, + }, + ] + ) + + allEnumSizeArgs.forEach((args) => + args.forEach(({ columnType, expectedValues, expectedIntSize }) => { + const result = parseEnumType({ columnType, sourceType: columnType }) + expect(result) + .withContext( + `Expected ${columnType} to be parsed as an Enum with intSize ${expectedIntSize} and values [${[ + ...expectedValues.entries(), + ]}]` + ) + .toEqual({ + type: 'Enum', + intSize: expectedIntSize, + values: expectedValues, + sourceType: columnType, + }) + }) + ) + }) + + it('should throw when the type is not a valid enum', async () => { + const args: [string][] = [ + ['Enum'], // should be either 8 or 16 + ['Enum32'], + ['Enum64'], + ['String'], + ['Enum(String)'], + ] + args.forEach(([columnType]) => { + expect(() => parseEnumType({ columnType, sourceType: columnType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Expected Enum to be either Enum8 or Enum16') + }) + }) + + it('should throw when the values are not valid', async () => { + const args: [string][] = [["Enum8('a' = x)"], ["Enum16('foo' = 'bar')"]] + args.forEach(([columnType]) => { + expect(() => parseEnumType({ columnType, sourceType: columnType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Expected Enum index to be a valid number') + }) + }) + + it('should throw on duplicate indices', async () => { + const args: [string][] = [ + ["Enum8('a' = 0, 'b' = 0)"], + ["Enum8('a' = 0, 'b' = 1, 'c' = 1)"], + ] + args.forEach(([columnType]) => { + expect(() => parseEnumType({ columnType, sourceType: columnType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Duplicate Enum index') + }) + }) + + it('should throw on duplicate names', async () => { + const args: [string][] = [ + ["Enum8('a' = 0, 'a' = 1)"], + ["Enum8('a' = 0, 'b' = 1, 'b' = 2)"], + ] + args.forEach(([columnType]) => { + expect(() => parseEnumType({ columnType, sourceType: columnType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Duplicate Enum name') + }) + }) + + it('should throw when Enum has no values to parse', async () => { + // The minimal allowed Enum definition is Enum8('' = 0), i.e. 6 chars inside. + const allEnumTypeArgs: string[][] = enumTypes.map(([enumType]) => [ + `${enumType}()`, + `${enumType}(')`, + `${enumType}('')`, + `${enumType}('' )`, + `${enumType}('' =)`, + `${enumType}('' = )`, + ]) + allEnumTypeArgs.forEach((args) => + args.forEach((columnType) => { + expect(() => parseEnumType({ columnType, sourceType: columnType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Invalid Enum type values') + }) + ) + }) +}) diff --git a/packages/client-common/__tests__/unit/row_binary_columns_parser_map.test.ts b/packages/client-common/__tests__/unit/row_binary_columns_parser_map.test.ts new file mode 100644 index 00000000..b9a9ee23 --- /dev/null +++ b/packages/client-common/__tests__/unit/row_binary_columns_parser_map.test.ts @@ -0,0 +1,41 @@ +import type { ParsedColumnMap } from '../../src/data_formatter/row_binary/columns_parser' +import { parseMapType } from '../../src/data_formatter/row_binary/columns_parser' + +fdescribe('RowBinary column types parser - Map', () => { + it('should parse Map with simple types', async () => { + const args: [ParsedColumnMap, string][] = [ + [ + { + type: 'Map', + key: { type: 'Simple', columnType: 'String', sourceType: 'String' }, + value: { type: 'Simple', columnType: 'UInt8', sourceType: 'UInt8' }, + sourceType: 'Map(String, UInt8)', + }, + 'Map(String, UInt8)', + ], + [ + { + type: 'Map', + key: { type: 'Simple', columnType: 'Int32', sourceType: 'Int32' }, + value: { + type: 'Simple', + columnType: 'Float32', + sourceType: 'Float32', + }, + sourceType: 'Map(Int32, Float32)', + }, + 'Map(Int32, Float32)', + ], + ] + args.forEach(([expected, sourceType]) => { + const result = parseMapType({ columnType: sourceType, sourceType }) + expect(result) + .withContext( + `Expected ${sourceType} to be parsed as a Map with key type ${expected.key.sourceType} and value type ${expected.value.sourceType}` + ) + .toEqual(expected) + }) + }) + + // TODO: rest of the allowed types. +}) diff --git a/packages/client-common/__tests__/unit/row_binary_columns_parser_nullable.test.ts b/packages/client-common/__tests__/unit/row_binary_columns_parser_nullable.test.ts new file mode 100644 index 00000000..c98be99f --- /dev/null +++ b/packages/client-common/__tests__/unit/row_binary_columns_parser_nullable.test.ts @@ -0,0 +1,266 @@ +import type { + ParsedColumnDateTime, + ParsedColumnDateTime64, + ParsedColumnDecimal, + ParsedColumnEnum, + ParsedColumnSimple, +} from '../../src/data_formatter/row_binary/columns_parser' +import { asNullableType } from '../../src/data_formatter/row_binary/columns_parser' + +fdescribe('RowBinary column types parser - Nullable', () => { + it('should wrap a simple type', async () => { + const args: [ParsedColumnSimple, string][] = [ + [ + { type: 'Simple', columnType: 'String', sourceType: 'String' }, + 'Nullable(String)', + ], + [ + { type: 'Simple', columnType: 'UInt8', sourceType: 'UInt8' }, + 'Nullable(UInt8)', + ], + [ + { type: 'Simple', columnType: 'Int32', sourceType: 'Int32' }, + 'Nullable(Int32)', + ], + [ + { type: 'Simple', columnType: 'Float32', sourceType: 'Float32' }, + 'Nullable(Float32)', + ], + ] + args.forEach(([value, sourceType]) => { + const result = asNullableType(value, sourceType) + expect(result) + .withContext( + `Expected ${value.columnType} to be wrapped as ${sourceType}` + ) + .toEqual({ + type: 'Nullable', + sourceType, + value, + }) + }) + }) + + it('should wrap an Enum', async () => { + const sourceEnum8 = `Enum8('foo' = 42)` + const valuesEnum8 = new Map([[42, 'foo']]) + const sourceEnum16 = `Enum16('bar' = 144, 'qaz' = 500)` + const valuesEnum16 = new Map([ + [144, 'bar'], + [500, 'qaz'], + ]) + const args: [ParsedColumnEnum, string][] = [ + [ + { + type: 'Enum', + intSize: 8, + values: valuesEnum8, + sourceType: sourceEnum8, + }, + 'Nullable(Enum8)', + ], + [ + { + type: 'Enum', + intSize: 16, + values: valuesEnum16, + sourceType: sourceEnum16, + }, + 'Nullable(Enum16)', + ], + ] + args.forEach(([value, sourceType]) => { + const result = asNullableType(value, sourceType) + expect(result) + .withContext(`Expected ${value.type} to be wrapped as ${sourceType}`) + .toEqual({ + type: 'Nullable', + sourceType, + value, + }) + }) + }) + + it('should wrap a Decimal', async () => { + const args: [ParsedColumnDecimal, string][] = [ + [ + { + type: 'Decimal', + params: { intSize: 32, precision: 4, scale: 3 }, + sourceType: 'Decimal(4, 3)', + }, + 'Nullable(Decimal(4, 3))', + ], + [ + { + type: 'Decimal', + params: { intSize: 64, precision: 12, scale: 6 }, + sourceType: 'Decimal(12, 6)', + }, + 'Nullable(Decimal(12, 6))', + ], + [ + { + type: 'Decimal', + params: { intSize: 128, precision: 24, scale: 12 }, + sourceType: 'Decimal(24, 12)', + }, + 'Nullable(Decimal(24, 12))', + ], + [ + { + type: 'Decimal', + params: { intSize: 256, precision: 42, scale: 20 }, + sourceType: 'Decimal(42, 20)', + }, + 'Nullable(Decimal(42, 20))', + ], + ] + args.forEach(([value, sourceType]) => { + const result = asNullableType(value, sourceType) + expect(result) + .withContext( + `Expected ${value.sourceType} to be wrapped as ${sourceType}` + ) + .toEqual({ + type: 'Nullable', + sourceType, + value, + }) + }) + }) + + it('should wrap a DateTime', async () => { + const args: [ParsedColumnDateTime, string][] = [ + [ + { type: 'DateTime', timezone: null, sourceType: 'DateTime' }, + 'Nullable(DateTime)', + ], + [ + { type: 'DateTime', timezone: 'UTC', sourceType: "DateTime('UTC')" }, + `Nullable(DateTime('UTC'))`, + ], + [ + { type: 'DateTime', timezone: 'GB', sourceType: "DateTime('GB')" }, + `Nullable(DateTime('GB'))`, + ], + [ + { + type: 'DateTime', + timezone: 'Etc/GMT-5', + sourceType: `DateTime('Etc/GMT-5')`, + }, + `Nullable(DateTime('Etc/GMT-5'))`, + ], + ] + args.forEach(([value, sourceType]) => { + const result = asNullableType(value, sourceType) + expect(result) + .withContext( + `Expected ${value.sourceType} to be wrapped as ${sourceType}` + ) + .toEqual({ + type: 'Nullable', + sourceType, + value, + }) + }) + }) + + it('should wrap a DateTime64', async () => { + const args: [ParsedColumnDateTime64, string][] = [ + [ + { + type: 'DateTime64', + timezone: null, + sourceType: 'DateTime64(0)', + precision: 3, + }, + 'Nullable(DateTime64(0))', + ], + [ + { + type: 'DateTime64', + timezone: null, + sourceType: 'DateTime64(3)', + precision: 3, + }, + 'Nullable(DateTime64(3))', + ], + [ + { + type: 'DateTime64', + timezone: 'UTC', + sourceType: `DateTime64(3, 'UTC')`, + precision: 3, + }, + `Nullable(DateTime64(3, 'UTC'))`, + ], + [ + { + type: 'DateTime64', + timezone: 'GB', + sourceType: `DateTime64(6, 'GB')`, + precision: 6, + }, + `Nullable(DateTime64(6, 'GB'))`, + ], + [ + { + type: 'DateTime64', + timezone: 'Etc/GMT-5', + sourceType: `DateTime64(9, 'Etc/GMT-5')`, + precision: 9, + }, + `Nullable(DateTime64(9, 'Etc/GMT-5'))`, + ], + ] + args.forEach(([value, sourceType]) => { + const result = asNullableType(value, sourceType) + expect(result) + .withContext( + `Expected ${value.sourceType} to be wrapped as ${sourceType}` + ) + .toEqual({ + type: 'Nullable', + sourceType, + value, + }) + }) + }) + + it('should throw in case of Array or Map', async () => { + const columnUInt8: ParsedColumnSimple = { + type: 'Simple', + columnType: 'UInt8', + sourceType: 'UInt8', + } + const columnString: ParsedColumnSimple = { + type: 'Simple', + columnType: 'String', + sourceType: 'String', + } + expect(() => + asNullableType( + { + type: 'Map', + key: columnUInt8, + value: columnString, + sourceType: 'Map(UInt8, String)', + }, + '...' + ) + ).toThrowError('Map cannot be Nullable') + expect(() => + asNullableType( + { + type: 'Array', + value: columnUInt8, + dimensions: 1, + sourceType: 'Array(UInt8)', + }, + '...' + ) + ).toThrowError('Array cannot be Nullable') + }) +}) diff --git a/packages/client-common/src/data_formatter/row_binary/columns_header.ts b/packages/client-common/src/data_formatter/row_binary/columns_header.ts index 6e946e1c..97d7ba17 100644 --- a/packages/client-common/src/data_formatter/row_binary/columns_header.ts +++ b/packages/client-common/src/data_formatter/row_binary/columns_header.ts @@ -8,7 +8,7 @@ import { parseColumnType } from './columns_parser' import { ClickHouseRowBinaryError } from './errors' import type { DecodeResult } from './read_bytes' import { readBytesAsUnsignedLEB128 } from './read_bytes' -import type { SimpleTypeDecoder, TypeDecoder } from './types' +import type { SimpleTypeDecoder } from './types' import { RowBinarySimpleDecoders, RowBinaryTypesDecoder } from './types' export type DecodedColumns = DecodeResult<{ @@ -60,7 +60,6 @@ export class RowBinaryColumnsHeader { nextLoc = res[1] const col = parseColumnType(res[0]) types[i] = col - let valueDecoder: TypeDecoder switch (col.type) { case 'Simple': decoders[i] = RowBinarySimpleDecoders[col.columnType] @@ -76,7 +75,7 @@ export class RowBinaryColumnsHeader { break default: throw ClickHouseRowBinaryError.headerDecodingError( - 'Unsupported column type', + `Unsupported column type ${col.type}`, { col } ) } @@ -113,29 +112,32 @@ function getEnumDecoder( function getArrayDecoder(col: ParsedColumnArray): SimpleTypeDecoder { let valueDecoder - if (col.valueType === 'Decimal') { - valueDecoder = getDecimalDecoder(col.decimalParams) - } else if (col.valueType === 'Enum') { - valueDecoder = getEnumDecoder(col.intSize, col.values) + if (col.value.type === 'Simple') { + valueDecoder = RowBinarySimpleDecoders[col.value.columnType] + } else if (col.value.type === 'Decimal') { + valueDecoder = getDecimalDecoder(col.value.params) + } else if (col.value.type === 'Enum') { + valueDecoder = getEnumDecoder(col.value.intSize, col.value.values) + } else if (col.value.type === 'Nullable') { + valueDecoder = getNullableDecoder(col.value) } else { - valueDecoder = RowBinarySimpleDecoders[col.valueType] + // FIXME: add other types + throw new Error(`Unsupported Array value type: ${col.value.type}`) } - return RowBinaryTypesDecoder.array( - col.valueNullable - ? RowBinaryTypesDecoder.nullable(valueDecoder) - : valueDecoder, - col.dimensions - ) + return RowBinaryTypesDecoder.array(valueDecoder, col.dimensions) } function getNullableDecoder(col: ParsedColumnNullable) { let valueDecoder - if (col.valueType === 'Decimal') { - valueDecoder = getDecimalDecoder(col.decimalParams) - } else if (col.valueType === 'Enum') { - valueDecoder = getEnumDecoder(col.intSize, col.values) + if (col.value.type === 'Simple') { + valueDecoder = RowBinarySimpleDecoders[col.value.columnType] + } else if (col.value.type === 'Decimal') { + valueDecoder = getDecimalDecoder(col.value.params) + } else if (col.value.type === 'Enum') { + valueDecoder = getEnumDecoder(col.value.intSize, col.value.values) } else { - valueDecoder = RowBinarySimpleDecoders[col.valueType] + // FIXME: add other types + throw new Error(`Unsupported Nullable value type: ${col.value.type}`) } return RowBinaryTypesDecoder.nullable(valueDecoder) } diff --git a/packages/client-common/src/data_formatter/row_binary/columns_parser.ts b/packages/client-common/src/data_formatter/row_binary/columns_parser.ts index c2eb34a2..db134734 100644 --- a/packages/client-common/src/data_formatter/row_binary/columns_parser.ts +++ b/packages/client-common/src/data_formatter/row_binary/columns_parser.ts @@ -8,28 +8,29 @@ export interface ParsedColumnSimple { * * UInt8 -> UInt8 * * LowCardinality(Nullable(String)) -> String */ columnType: SimpleColumnType - /** ClickHouse type as it is defined in the table. */ - dbType: string + /** The original type before parsing. */ + sourceType: string } -interface ParsedColumnNullableBase { - type: 'Nullable' - dbType: string +export interface ParsedColumnFixedString { + type: 'FixedString' + sizeBytes: number + sourceType: string +} + +export interface ParsedColumnDateTime { + type: 'DateTime' + timezone: string | null + sourceType: string +} + +export interface ParsedColumnDateTime64 { + type: 'DateTime64' + timezone: string | null + /** Valid range: [0 : 9] */ + precision: number + sourceType: string } -export type ParsedColumnNullable = - | (ParsedColumnNullableBase & { - /** Used to determine how to decode T from Nullable(T) */ - valueType: SimpleColumnType - }) - | (ParsedColumnNullableBase & { - valueType: 'Decimal' - decimalParams: ParsedColumnDecimal['params'] - }) - | (ParsedColumnNullableBase & { - valueType: 'Enum' - values: ParsedColumnEnum['values'] - intSize: ParsedColumnEnum['intSize'] - }) export interface ParsedColumnEnum { type: 'Enum' @@ -37,13 +38,7 @@ export interface ParsedColumnEnum { values: Map /** UInt8 or UInt16 */ intSize: 8 | 16 - dbType: string -} - -export interface ParseColumnTuple { - type: 'Tuple' - elements: ParsedColumnType[] - dbType: string + sourceType: string } /** Int size for Decimal depends on the Precision @@ -60,52 +55,81 @@ export interface DecimalParams { export interface ParsedColumnDecimal { type: 'Decimal' params: DecimalParams - dbType: string + sourceType: string +} + +/** Array or Map itself cannot be Nullable */ +export interface ParsedColumnNullable { + type: 'Nullable' + value: + | ParsedColumnSimple + | ParsedColumnEnum + | ParsedColumnDecimal + | ParsedColumnFixedString + | ParsedColumnDateTime + | ParsedColumnDateTime64 + sourceType: string } /** Array cannot be Nullable or LowCardinality, but its value type can be. * Arrays can be multidimensional, e.g. Array(Array(Array(T))). * Arrays are allowed to have a Map as the value type. */ -interface ParsedColumnArrayBase { +export interface ParsedColumnArray { type: 'Array' - valueNullable: boolean + value: + | ParsedColumnNullable + | ParsedColumnSimple + | ParsedColumnFixedString + | ParsedColumnDecimal + | ParsedColumnEnum + | ParsedColumnMap + | ParsedColumnDateTime + | ParsedColumnDateTime64 /** Array(T) = 1 dimension, Array(Array(T)) = 2, etc. */ dimensions: number - dbType: string + sourceType: string } -export type ParsedColumnArray = - | (ParsedColumnArrayBase & { - /** Represents the final value type; nested arrays are handled with {@link ParsedColumnArray.dimensions} */ - valueType: SimpleColumnType - }) - | (ParsedColumnArrayBase & { - valueType: 'Decimal' - decimalParams: DecimalParams - }) - | (ParsedColumnArrayBase & { - valueType: 'Enum' - values: ParsedColumnEnum['values'] - intSize: ParsedColumnEnum['intSize'] - }) // TODO: add Tuple support. - -// export interface ParsedColumnMap { -// type: 'Map' -// key: ParsedColumnSimple -// value: ParsedColumnType -// dbType: string -// } // TODO - add Map support. + +/** @see https://clickhouse.com/docs/en/sql-reference/data-types/map */ +export interface ParsedColumnMap { + type: 'Map' + /** Possible key types: + * - String, Integer, UUID, Date, Date32 ({@link ParsedColumnSimple}) + * - FixedString + * - DateTime + * - Enum + */ + key: + | ParsedColumnSimple + | ParsedColumnFixedString + | ParsedColumnEnum + | ParsedColumnDateTime + /** Possible value type: arbitrary, including Map and Array. */ + value: ParsedColumnType + sourceType: string +} + +// TODO: Tuple support. +// export interface ParseColumnTuple { +// type: 'Tuple' +// elements: ParsedColumnType[] +// sourceType: string +// } export type ParsedColumnType = | ParsedColumnSimple + | ParsedColumnFixedString | ParsedColumnNullable | ParsedColumnDecimal + | ParsedColumnDateTime + | ParsedColumnDateTime64 | ParsedColumnArray | ParsedColumnEnum -// | ParsedColumnMap // TODO - add Map support. + | ParsedColumnMap -export function parseColumnType(dbType: string): ParsedColumnType { - let columnType = dbType +export function parseColumnType(sourceType: string): ParsedColumnType { + let columnType = sourceType let isNullable = false if (columnType.startsWith(LowCardinalityPrefix)) { columnType = columnType.slice(LowCardinalityPrefix.length, -1) @@ -115,87 +139,103 @@ export function parseColumnType(dbType: string): ParsedColumnType { isNullable = true } let result: ParsedColumnType - if (columnType.startsWith(DecimalPrefix)) { - const params = parseDecimalParams({ - dbType, - columnType, - }) + if (columnType in RowBinarySimpleDecoders) { result = { - type: 'Decimal', - params, - dbType, + type: 'Simple', + columnType: columnType as SimpleColumnType, + sourceType, } + } else if (columnType.startsWith(DecimalPrefix)) { + result = parseDecimalType({ + sourceType, + columnType, + }) + } else if (columnType.startsWith(DateTime64Prefix)) { + result = parseDateTime64Type({ sourceType, columnType }) + } else if (columnType.startsWith(DateTimePrefix)) { + result = parseDateTimeType({ sourceType, columnType }) } else if ( columnType.startsWith(Enum8Prefix) || columnType.startsWith(Enum16Prefix) ) { - result = parseEnum({ dbType, columnType }) + result = parseEnumType({ sourceType, columnType }) } else if (columnType.startsWith(ArrayPrefix)) { - result = parseArrayType({ dbType, columnType }) + result = parseArrayType({ sourceType, columnType }) } else if (columnType.startsWith(MapPrefix)) { + result = parseMapType({ sourceType, columnType }) + } else { throw ClickHouseRowBinaryError.headerDecodingError( - 'Map types are not supported yet', + 'Unsupported column type', { columnType } ) - } else { - // "Simple" types - if (columnType in RowBinarySimpleDecoders) { - result = { - type: 'Simple', - columnType: columnType as SimpleColumnType, - dbType, - } - } else { - throw ClickHouseRowBinaryError.headerDecodingError( - 'Unsupported column type', - { columnType } - ) - } } if (isNullable) { - return asNullableType(result, dbType) + return asNullableType(result, sourceType) } else { return result } } -export function parseDecimalParams({ +export function parseDecimalType({ columnType, - dbType, -}: ParseColumnTypeParams): DecimalParams { - if (!columnType.startsWith(DecimalPrefix)) { + sourceType, +}: ParseColumnTypeParams): ParsedColumnDecimal { + if ( + !columnType.startsWith(DecimalPrefix) || + columnType.length < DecimalPrefix.length + 5 // Decimal(1, 0) is the shortest valid definition + ) { throw ClickHouseRowBinaryError.headerDecodingError('Invalid Decimal type', { - dbType, + sourceType, columnType, }) } - - const split = columnType.slice(DecimalPrefix.length, -1).split(',') + const split = columnType.slice(DecimalPrefix.length, -1).split(', ') if (split.length !== 2) { - throw ClickHouseRowBinaryError.headerDecodingError('Invalid Decimal type', { - dbType, - columnType, - split, - }) + throw ClickHouseRowBinaryError.headerDecodingError( + 'Expected Decimal type to have both precision and scale', + { + sourceType, + columnType, + split, + } + ) + } + let intSize: DecimalParams['intSize'] = 32 + const precision = parseInt(split[0], 10) + if (Number.isNaN(precision) || precision < 1 || precision > 76) { + throw ClickHouseRowBinaryError.headerDecodingError( + 'Invalid Decimal precision', + { columnType, sourceType, precision } + ) + } + const scale = parseInt(split[1], 10) + if (Number.isNaN(scale) || scale < 0 || scale > precision) { + throw ClickHouseRowBinaryError.headerDecodingError( + 'Invalid Decimal scale', + { columnType, sourceType, precision, scale } + ) } - const params: DecimalParams = { - precision: parseInt(split[0], 10), - scale: parseInt(split[1], 10), - intSize: 32, + if (precision > 38) { + intSize = 256 + } else if (precision > 18) { + intSize = 128 + } else if (precision > 9) { + intSize = 64 } - if (params.precision > 38) { - params.intSize = 256 - } else if (params.precision > 18) { - params.intSize = 128 - } else if (params.precision > 9) { - params.intSize = 64 + return { + type: 'Decimal', + params: { + precision, + scale, + intSize, + }, + sourceType, } - return params } -export function parseEnum({ +export function parseEnumType({ columnType, - dbType, + sourceType, }: ParseColumnTypeParams): ParsedColumnEnum { let intSize: 8 | 16 if (columnType.startsWith(Enum8Prefix)) { @@ -209,17 +249,18 @@ export function parseEnum({ 'Expected Enum to be either Enum8 or Enum16', { columnType, - dbType, + sourceType, } ) } - if (columnType.length < 2) { + // The minimal allowed Enum definition is Enum8('' = 0), i.e. 6 chars inside. + if (columnType.length < 6) { throw ClickHouseRowBinaryError.headerDecodingError( 'Invalid Enum type values', { columnType, - dbType, + sourceType, } ) } @@ -230,31 +271,6 @@ export function parseEnum({ let charEscaped = false // we should ignore escaped ticks let startIndex = 1 // Skip the first ' - function pushEnumIndex(start: number, end: number) { - const index = parseInt(columnType.slice(start, end), 10) - if (Number.isNaN(index) || index < 0) { - throw ClickHouseRowBinaryError.headerDecodingError( - 'Expected Enum index to be a valid number', - { - columnType, - dbType, - names, - indices, - index, - start, - end, - } - ) - } - if (indices.includes(index)) { - throw ClickHouseRowBinaryError.headerDecodingError( - 'Duplicate Enum index', - { columnType, dbType, index, names, indices } - ) - } - indices.push(index) - } - // Should support the most complicated enums, such as Enum8('f\'' = 1, 'x =' = 2, 'b\'\'\'' = 3, '\'c=4=' = 42, '4' = 100) for (let i = 1; i < columnType.length; i++) { if (parsingName) { @@ -267,7 +283,7 @@ export function parseEnum({ if (names.includes(name)) { throw ClickHouseRowBinaryError.headerDecodingError( 'Duplicate Enum name', - { columnType, dbType, name, names, indices } + { columnType, sourceType, name, names, indices } ) } names.push(name) @@ -297,7 +313,7 @@ export function parseEnum({ if (names.length !== indices.length) { throw ClickHouseRowBinaryError.headerDecodingError( 'Expected Enum to have the same number of names and indices', - { columnType, dbType, names, indices } + { columnType, sourceType, names, indices } ) } @@ -310,150 +326,297 @@ export function parseEnum({ type: 'Enum', values, intSize, - dbType, + sourceType, + } + + function pushEnumIndex(start: number, end: number) { + const index = parseInt(columnType.slice(start, end), 10) + if (Number.isNaN(index) || index < 0) { + throw ClickHouseRowBinaryError.headerDecodingError( + 'Expected Enum index to be a valid number', + { + columnType, + sourceType, + names, + indices, + index, + start, + end, + } + ) + } + if (indices.includes(index)) { + throw ClickHouseRowBinaryError.headerDecodingError( + 'Duplicate Enum index', + { columnType, sourceType, index, names, indices } + ) + } + indices.push(index) } } -export function parseTupleType({ +export function parseMapType({ columnType, - dbType, -}: ParseColumnTypeParams): ParseColumnTuple { - if (!columnType.startsWith(TuplePrefix)) { - throw ClickHouseRowBinaryError.headerDecodingError('Invalid Tuple type', { + sourceType, +}: ParseColumnTypeParams): ParsedColumnMap { + if ( + !columnType.startsWith(MapPrefix) || + columnType.length < MapPrefix.length + 11 // the shortest definition seems to be Map(Int8, Int8) + ) { + throw ClickHouseRowBinaryError.headerDecodingError('Invalid Map type', { columnType, - dbType, + sourceType, }) } - columnType = columnType.slice(TuplePrefix.length, -1) - // TODO. + columnType = columnType.slice(MapPrefix.length, -1) + + let openParens = 0 // consider the type parsed once we reach a comma outside of parens. + + let keyColumnType: string | undefined + for (let i = 0; i < columnType.length; i++) { + if (openParens === 0) { + if (columnType[i] === ',') { + keyColumnType = columnType.slice(0, i) + break + } + } + if (columnType[i] === '(') { + openParens++ + } else if (columnType[i] === ')') { + openParens-- + } + } + if (keyColumnType === undefined) { + throw ClickHouseRowBinaryError.headerDecodingError( + 'Could not parse Map key type', + { + sourceType, + columnType, + } + ) + } + + const key = parseColumnType(keyColumnType) + if ( + key.type === 'DateTime64' || + key.type === 'Nullable' || + key.type === 'Array' || + key.type === 'Map' || + key.type === 'Decimal' // TODO: disallow Tuple as well. + ) { + throw ClickHouseRowBinaryError.headerDecodingError('Invalid Map key type', { + key, + sourceType, + }) + } + + const value = parseColumnType(columnType.slice(keyColumnType.length + 2)) return { - type: 'Tuple', - elements: [], - dbType, + type: 'Map', + key, + value, + sourceType, } } +// TODO. +// export function parseTupleType({ +// columnType, +// dbType, +// }: ParseColumnTypeParams): ParseColumnTuple { +// if (!columnType.startsWith(TuplePrefix)) { +// throw ClickHouseRowBinaryError.headerDecodingError('Invalid Tuple type', { +// columnType, +// dbType, +// }) +// } +// // columnType = columnType.slice(TuplePrefix.length, -1) +// return { +// type: 'Tuple', +// elements: [], +// dbType, +// } +// } + export function parseArrayType({ columnType, - dbType, + sourceType, }: ParseColumnTypeParams): ParsedColumnArray { - if (!columnType.startsWith(ArrayPrefix)) { + if ( + !columnType.startsWith(ArrayPrefix) || + columnType.length < ArrayPrefix.length + 5 // Array(Int8) is the shortest valid definition + ) { throw ClickHouseRowBinaryError.headerDecodingError('Invalid Array type', { columnType, - dbType, + sourceType, }) } let dimensions = 0 while (columnType.length > 0) { if (columnType.startsWith(ArrayPrefix)) { - columnType.slice(ArrayPrefix.length, -1) // Array(T) -> T + columnType = columnType.slice(ArrayPrefix.length, -1) // Array(T) -> T dimensions++ } else { break } } - if (dimensions === 0) { + if (dimensions === 0 || dimensions > 10) { + // TODO: check how many we can handle; max 10 seems more than enough. throw ClickHouseRowBinaryError.headerDecodingError( - 'Array type without dimensions', + 'Expected Array to have between 1 and 10 dimensions', { columnType } ) } - if (dimensions > 10) { + const value = parseColumnType(columnType) + if (value.type === 'Array') { throw ClickHouseRowBinaryError.headerDecodingError( - 'Array type with too many dimensions', - { columnType } + 'Unexpected Array as value type', + { columnType, sourceType } ) } - const valueNullable = columnType.startsWith(NullablePrefix) - if (valueNullable) { - columnType = columnType.slice(NullablePrefix.length, -1) - } - if (columnType.startsWith(DecimalPrefix)) { - const decimalParams = parseDecimalParams({ - dbType, - columnType, - }) - return { - type: 'Array', - valueType: 'Decimal', - valueNullable, - decimalParams, - dimensions, - dbType, - } + return { + type: 'Array', + value, + dimensions, + sourceType, } +} + +export function parseDateTimeType({ + columnType, + sourceType, +}: ParseColumnTypeParams): ParsedColumnDateTime { if ( - columnType.startsWith(Enum8Prefix) || - columnType.startsWith(Enum16Prefix) + columnType.startsWith(DateTimeWithTimezonePrefix) && + columnType.length > DateTimeWithTimezonePrefix.length + 4 // DateTime('GB') has the least amount of chars ) { - const { values, intSize } = parseEnum({ dbType, columnType }) + const timezone = columnType.slice(DateTimeWithTimezonePrefix.length + 1, -2) return { - type: 'Array', - valueType: 'Enum', - valueNullable, - values, - intSize, - dimensions, - dbType, + type: 'DateTime', + timezone, + sourceType, } - } - if (columnType in RowBinarySimpleDecoders) { + } else if ( + columnType.startsWith(DateTimePrefix) && + columnType.length === DateTimePrefix.length + ) { return { - type: 'Array', - valueType: columnType as SimpleColumnType, - valueNullable, - dimensions, - dbType, + type: 'DateTime', + timezone: null, + sourceType, } + } else { + throw ClickHouseRowBinaryError.headerDecodingError( + 'Invalid DateTime type', + { + columnType, + sourceType, + } + ) + } +} + +export function parseDateTime64Type({ + columnType, + sourceType, +}: ParseColumnTypeParams): ParsedColumnDateTime64 { + if ( + !columnType.startsWith(DateTime64Prefix) || + columnType.length < DateTime64Prefix.length + 2 // should at least have a precision + ) { + throw ClickHouseRowBinaryError.headerDecodingError( + 'Invalid DateTime64 type', + { + columnType, + sourceType, + } + ) + } + const precision = parseInt(columnType[DateTime64Prefix.length], 10) + if (Number.isNaN(precision) || precision < 0 || precision > 9) { + throw ClickHouseRowBinaryError.headerDecodingError( + 'Invalid DateTime64 precision', + { + columnType, + sourceType, + precision, + } + ) + } + let timezone = null + if (columnType.length > DateTime64Prefix.length + 2) { + // e.g. DateTime64(3, 'UTC') -> UTC + timezone = columnType.slice(DateTime64Prefix.length + 4, -2) + } + return { + type: 'DateTime64', + timezone, + precision, + sourceType, + } +} + +export function parseFixedStringType({ + columnType, + sourceType, +}: ParseColumnTypeParams): ParsedColumnFixedString { + if ( + !columnType.startsWith(FixedStringPrefix) || + columnType.length < FixedStringPrefix.length + 2 // i.e. at least FixedString(1) + ) { + throw ClickHouseRowBinaryError.headerDecodingError( + 'Invalid FixedString type', + { columnType, sourceType } + ) + } + const sizeBytes = parseInt(columnType.slice(FixedStringPrefix.length, -1), 10) + if (Number.isNaN(sizeBytes) || sizeBytes < 1) { + throw ClickHouseRowBinaryError.headerDecodingError( + 'Invalid FixedString size in bytes', + { columnType, sourceType, sizeBytes } + ) + } + return { + type: 'FixedString', + sizeBytes, + sourceType, } - throw ClickHouseRowBinaryError.headerDecodingError( - 'Unsupported array value type', - { dbType, columnType } - ) } export function asNullableType( - result: + value: | ParsedColumnSimple | ParsedColumnEnum | ParsedColumnDecimal - | ParsedColumnArray, - dbType: string + | ParsedColumnArray + | ParsedColumnMap + | ParsedColumnDateTime + | ParsedColumnDateTime64, + sourceType: string ): ParsedColumnNullable { - if (result.type === 'Array') { + // TODO: disallow Tuple as well. + if (value.type === 'Array' || value.type === 'Map') { throw ClickHouseRowBinaryError.headerDecodingError( - 'Array cannot be Nullable', - { dbType } + `${value.type} cannot be Nullable`, + { sourceType } ) } - if (result.type === 'Decimal') { - return { - type: 'Nullable', - valueType: 'Decimal', - decimalParams: result.params, - dbType, - } - } - if (result.type === 'Enum') { - return { - type: 'Nullable', - valueType: 'Enum', - values: result.values, - intSize: result.intSize, - dbType, - } + if (value.sourceType.startsWith(NullablePrefix)) { + value.sourceType = value.sourceType.slice(NullablePrefix.length, -1) } return { type: 'Nullable', - valueType: result.columnType, - dbType, + sourceType, + value, } } interface ParseColumnTypeParams { - dbType: string + /** A particular type to parse, such as DateTime. */ columnType: string + /** Full type definition, such as Map(String, DateTime). */ + sourceType: string } const NullablePrefix = 'Nullable(' as const @@ -464,3 +627,7 @@ const MapPrefix = 'Map(' as const const Enum8Prefix = 'Enum8(' as const const Enum16Prefix = 'Enum16(' as const const TuplePrefix = 'Tuple(' as const +const DateTimePrefix = 'DateTime' as const +const DateTimeWithTimezonePrefix = 'DateTime(' as const +const DateTime64Prefix = 'DateTime64(' as const +const FixedStringPrefix = 'FixedString(' as const From ea7f1fc9bc24bfd8f69a2eb3f0a2d7e015a8abd5 Mon Sep 17 00:00:00 2001 From: slvrtrn Date: Sat, 23 Mar 2024 23:06:27 +0100 Subject: [PATCH 09/14] Remove outdated files --- benchmarks/leaks/strings.ts | 0 .../row_binary/types_data_view.ts | 246 ------------------ 2 files changed, 246 deletions(-) delete mode 100644 benchmarks/leaks/strings.ts delete mode 100644 packages/client-common/src/data_formatter/row_binary/types_data_view.ts diff --git a/benchmarks/leaks/strings.ts b/benchmarks/leaks/strings.ts deleted file mode 100644 index e69de29b..00000000 diff --git a/packages/client-common/src/data_formatter/row_binary/types_data_view.ts b/packages/client-common/src/data_formatter/row_binary/types_data_view.ts deleted file mode 100644 index 61ffd794..00000000 --- a/packages/client-common/src/data_formatter/row_binary/types_data_view.ts +++ /dev/null @@ -1,246 +0,0 @@ -// import type { DecodeResult } from './read_bytes' -// import { -// readBytesAsFloat32, -// readBytesAsFloat64, -// readBytesAsUnsignedBigInt, -// readBytesAsUnsignedInt, -// readBytesAsUnsignedLEB128, -// } from './read_bytes' -// import { -// DecimalTypeDecoder, -// getDecimalIntSize, -// NullableTypeDecoder, -// SimpleColumnType, -// SimpleTypeDecoder, -// } from './types' -// -// const Int8Overflow = 128 -// const UInt8Overflow = 256 -// -// const Int16Overflow = 32768 -// const UInt16Overflow = 65536 -// -// const Int32Overflow = 2147483648 -// const UInt32Overflow = 4294967296 -// -// const Int64Overflow = 9223372036854775808n -// const UInt64Overflow = 18446744073709551616n -// -// const Int128Overflow = 170141183460469231731687303715884105728n -// const UInt128Overflow = 340282366920938463463374607431768211456n -// -// const Int256Overflow = -// 57896044618658097711785492504343953926634992332820282019728792003956564819968n -// const UInt256Overflow = -// 115792089237316195423570985008687907853269984665640564039457584007913129639936n -// -// const DayMillis = 24 * 3600 * 1000 -// const TxtDecoder = new TextDecoder() -// -// export type SimpleTypeDecoderDataView = ( -// src: DataView, -// loc: number -// ) => DecodeResult | null -// -// export class RowBinaryTypesDecoderDataView { -// static bool(src: DataView, loc: number): DecodeResult | null { -// if (src.byteLength < loc + 1) return null -// return [src.getUint8(loc) === 1, loc + 1] -// } -// static uint8(src: DataView, loc: number): DecodeResult | null { -// if (src.byteLength < loc + 1) return null -// return [src.getUint8(loc), loc + 1] -// } -// static int8(src: DataView, loc: number): DecodeResult | null { -// if (src.byteLength < loc + 1) return null -// return [src.getInt8(loc), loc + 1] -// } -// static uint16(src: DataView, loc: number): DecodeResult | null { -// if (src.byteLength < loc + 2) return null -// return [src.getUint16(loc), loc + 2] -// } -// static int16(src: DataView, loc: number): DecodeResult | null { -// if (src.byteLength < loc + 2) return null -// return [src.getInt16(loc), loc + 2] -// } -// static uint32(src: DataView, loc: number): DecodeResult | null { -// if (src.byteLength < loc + 4) return null -// return [src.getUint32(loc), loc + 4] -// } -// static int32(src: DataView, loc: number): DecodeResult | null { -// if (src.byteLength < loc + 4) return null -// return [src.getInt32(loc), loc + 4] -// } -// static uint64(src: DataView, loc: number): DecodeResult | null { -// if (src.byteLength < loc + 8) return null -// return [src.getBigInt64(loc), loc + 8] -// } -// static int64(src: DataView, loc: number): DecodeResult | null { -// if (src.byteLength < loc + 8) return null -// const x = src.getBigInt64(loc) -// return [x < Int64Overflow ? x : x - UInt64Overflow, loc + 8] -// } -// // static uint128(src: DataView, loc: number): DecodeResult | null { -// // if (src.byteLength < loc + 16) return null -// // return [readBytesAsUnsignedBigInt(src, loc, 16), loc + 16] -// // } -// // static int128(src: DataView, loc: number): DecodeResult | null { -// // if (src.byteLength < loc + 16) return null -// // const x = readBytesAsUnsignedBigInt(src, loc, 16) -// // return [x < Int128Overflow ? x : x - UInt128Overflow, loc + 16] -// // } -// // static uint256(src: DataView, loc: number): DecodeResult | null { -// // if (src.byteLength < loc + 32) return null -// // return [readBytesAsUnsignedBigInt(src, loc, 32), loc + 32] -// // } -// // static int256(src: DataView, loc: number): DecodeResult | null { -// // if (src.byteLength < loc + 32) return null -// // const x = readBytesAsUnsignedBigInt(src, loc, 32) -// // return [x < Int256Overflow ? x : x - UInt256Overflow, loc + 32] -// // } -// static float32(src: DataView, loc: number): DecodeResult | null { -// if (src.byteLength < loc + 4) return null -// return [src.getFloat32(loc), loc + 4] -// } -// static float64(src: DataView, loc: number): DecodeResult | null { -// if (src.byteLength < loc + 8) return null -// return [src.getFloat64(loc), loc + 8] -// } -// // static string(src: DataView, loc: number): DecodeResult | null { -// // if (src.byteLength < loc + 1) return null -// // const res = readBytesAsUnsignedLEB128(src.buffer, loc) -// // if (res === null) { -// // return null -// // } -// // const [length, nextLoc] = res -// // if (src.byteLength < nextLoc + length) return null -// // return [ -// // TxtDecoder.decode(src.buffer.slice(nextLoc, nextLoc + length)), -// // nextLoc + length, -// // ] -// // } -// static date(src: DataView, loc: number): DecodeResult | null { -// const res = RowBinaryTypesDecoderDataView.uint16(src, loc) -// if (res === null) return null -// return [new Date(res[0] * DayMillis), res[1]] -// } -// -// static date32(src: DataView, loc: number): DecodeResult | null { -// const res = RowBinaryTypesDecoderDataView.int32(src, loc) -// if (res === null) return null -// return [new Date(res[0] * DayMillis), res[1]] -// } -// static nullable( -// baseTypeDecoder: SimpleTypeDecoderDataView -// ): (src: DataView, loc: number) => DecodeResult | null { -// return (src: DataView, loc: number) => { -// const res = RowBinaryTypesDecoderDataView.uint8(src, loc) -// if (res === null) return null -// if (res[0] === 1) { -// return [null, res[1]] -// } -// return baseTypeDecoder(src, res[1]) -// } -// } -// static decimal( -// precision: number, -// scale: number -// ): (src: DataView, loc: number) => DecodeResult | null { -// const intSize = getDecimalIntSize(precision) -// let scaleMultiplier: number | bigint -// if (intSize === 32) { -// scaleMultiplier = 10 ** scale -// } else { -// scaleMultiplier = BigInt(10 ** scale) -// } -// return (src: DataView, loc: number) => { -// if (intSize === 32) { -// const res = RowBinaryTypesDecoderDataView.int32(src, loc) -// if (res === null) return null -// const whole = Math.floor(res[0] / (scaleMultiplier as number)) -// const fractional = res[0] % (scaleMultiplier as number) -// return [`${whole.toString(10)}.${fractional.toString(10)}`, res[1]] -// } -// let res: DecodeResult | null -// if (intSize === 64) { -// res = RowBinaryTypesDecoderDataView.int64(src, loc) -// } else if (intSize === 128) { -// throw new Error('Unsupported int size: 128') -// // res = RowBinaryTypesDecoderDataView.int128(src, loc) -// } else if (intSize === 256) { -// // res = RowBinaryTypesDecoderDataView.int256(src, loc) -// throw new Error('Unsupported int size: 256') -// } else { -// throw new Error(`Unsupported int size: ${intSize}`) -// } -// if (res === null) return null -// const whole = res[0] / (scaleMultiplier as bigint) -// const fractional = res[0] % (scaleMultiplier as bigint) -// return [`${whole.toString(10)}.${fractional.toString(10)}`, res[1]] -// } -// } -// // static array( -// // innerDecoder: -// // | SimpleTypeDecoder -// // | ReturnType -// // | ReturnType>, -// // dimensions = 0 -// // ): (src: DataView, loc: number) => DecodeResult> | null { -// // return (src: DataView, loc: number) => { -// // const leb128 = readBytesAsUnsignedLEB128(src, loc) -// // if (leb128 === null) return null -// // const result = new Array(leb128[0]) -// // if (dimensions === 0) { -// // for (let i = 0; i < leb128[0]; i++) { -// // const res = innerDecoder(src, leb128[1]) -// // if (res === null) return null -// // result[i] = res[0] -// // } -// // } else { -// // return this.array(innerDecoder, dimensions - 1)(src, leb128[1]) -// // } -// // return null -// // } -// // } -// } -// -// export const RowBinarySimpleDecodersDataView: { -// [key in -// | 'Bool' -// | 'UInt8' -// | 'Int8' -// | 'UInt16' -// | 'Int16' -// | 'UInt32' -// | 'Int32' -// | 'UInt64' -// | 'Int64' -// // | 'UInt128' -// // | 'Int128' -// // | 'UInt256' -// // | 'Int256' -// | 'Float32' -// | 'Float64' -// // | 'String' -// | 'Date' -// | 'Date32']: SimpleTypeDecoderDataView -// } = { -// Bool: RowBinaryTypesDecoderDataView.bool, -// UInt8: RowBinaryTypesDecoderDataView.uint8, -// Int8: RowBinaryTypesDecoderDataView.int8, -// UInt16: RowBinaryTypesDecoderDataView.uint16, -// Int16: RowBinaryTypesDecoderDataView.int16, -// UInt32: RowBinaryTypesDecoderDataView.uint32, -// Int32: RowBinaryTypesDecoderDataView.int32, -// UInt64: RowBinaryTypesDecoderDataView.uint64, -// Int64: RowBinaryTypesDecoderDataView.int64, -// // UInt128: RowBinaryTypesDecoderDataView.uint128, -// // Int128: RowBinaryTypesDecoderDataView.int128, -// // UInt256: RowBinaryTypesDecoderDataView.uint256, -// // Int256: RowBinaryTypesDecoderDataView.int256, -// Float32: RowBinaryTypesDecoderDataView.float32, -// Float64: RowBinaryTypesDecoderDataView.float64, -// // String: RowBinaryTypesDecoderDataView.string, -// Date: RowBinaryTypesDecoderDataView.date, -// Date32: RowBinaryTypesDecoderDataView.date32, -// } From 6b86e521f8c09435e9a71cd682b3a4da23d8e214 Mon Sep 17 00:00:00 2001 From: slvrtrn Date: Sun, 24 Mar 2024 09:33:34 +0100 Subject: [PATCH 10/14] Remove outdated files --- benchmarks/leaks/row_binary.ts | 122 --------------------------------- 1 file changed, 122 deletions(-) delete mode 100644 benchmarks/leaks/row_binary.ts diff --git a/benchmarks/leaks/row_binary.ts b/benchmarks/leaks/row_binary.ts deleted file mode 100644 index b069fcbf..00000000 --- a/benchmarks/leaks/row_binary.ts +++ /dev/null @@ -1,122 +0,0 @@ -import type { Row } from '@clickhouse/client' -import { createClient } from '@clickhouse/client' -import type { RowBinaryResultSet } from '@clickhouse/client/row_binary_result_set' -import { attachExceptionHandlers } from './shared' - -/* - -CREATE TABLE default.fluff -( - `id` UInt32, - `s1` String, - `s2` String, - `u8` UInt8, - `i8` Int8, - `u16` UInt16, - `i16` Int16, - `u32` UInt32, - `i32` Int32, - `u64` UInt64, - `i64` Int64, - `u128` UInt128, - `i128` Int128, - `u256` UInt256, - `i256` Int256, - `date` Date -) -ENGINE = MergeTree -ORDER BY id - -INSERT INTO fluff SELECT * -FROM generateRandom('id UInt32, s1 String, s2 String, u8 UInt8, i8 Int8, u16 UInt16, i16 Int16, u32 UInt32, i32 Int32, u64 UInt64, i64 Int64, u128 UInt128, i128 Int128, u256 UInt256, i256 Int256, date Date') -LIMIT 5000000 - - */ - -const limit = 50000 -const query = `SELECT * FROM fluff ORDER BY id ASC LIMIT 5` -// const query = `SELECT * FROM large_strings ORDER BY id ASC LIMIT ${limit}` -// const query = `SELECT * EXCEPT (i128, i256, u128, u256) FROM fluff ORDER BY id ASC LIMIT ${limit}` - -void (async () => { - const client = createClient({ - url: 'http://localhost:8123', - }) - - async function benchmarkJSON(format: 'JSONEachRow' | 'JSONCompactEachRow') { - const start = +new Date() - const rs = await client.query({ - query, - format, - }) - let total = 0 - await new Promise((resolve, reject) => { - rs.stream() - .on('data', (rows: Row[]) => { - rows.forEach((row) => { - console.log(row.json()) - total++ - }) - }) - .on('end', resolve) - .on('error', reject) - }) - console.log(`${format} elapsed: ${+new Date() - start} ms, total: ${total}`) - return total - } - - async function benchmarkCSV() { - const start = +new Date() - const rs = await client.query({ - query, - format: 'CSV', - }) - let total = 0 - await new Promise((resolve, reject) => { - rs.stream() - .on('data', (rows: Row[]) => { - rows.forEach((row) => { - row.text.split(',') - total++ - }) - }) - .on('end', resolve) - .on('error', reject) - }) - console.log(`CSV elapsed: ${+new Date() - start} ms, total: ${total}`) - return total - } - - async function benchmarkRowBinary() { - const start = +new Date() - const rs = await client.query({ - query, - format: 'RowBinary', - }) - let total = 0 - await new Promise((resolve, reject) => { - ;(rs as RowBinaryResultSet) - .stream() - .on('data', (rows: unknown[][]) => { - rows.forEach((row) => { - total++ - // if (total === limit) { - console.log(`Last row`, row) - // } - }) - }) - .on('end', resolve) - .on('error', reject) - }) - console.log(`RowBinary elapsed: ${+new Date() - start} ms, total: ${total}`) - return total - } - - attachExceptionHandlers() - for (let i = 0; i < 3; i++) { - await benchmarkJSON('JSONCompactEachRow') - // await benchmarkCSV() - await benchmarkRowBinary() - } - process.exit(0) -})() From b9e2daee28453dd1b4ad27bff548a570593b6b4f Mon Sep 17 00:00:00 2001 From: slvrtrn Date: Sun, 24 Mar 2024 09:35:32 +0100 Subject: [PATCH 11/14] Add gitignore entry --- .gitignore | 1 + benchmarks/tsconfig.json | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index c3ebb5bb..8e20c8f6 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ dist/ .idea node_modules benchmarks/leaks/input +benchmarks/test *.tgz .npmrc webpack diff --git a/benchmarks/tsconfig.json b/benchmarks/tsconfig.json index 4bd870c0..1ade2df7 100644 --- a/benchmarks/tsconfig.json +++ b/benchmarks/tsconfig.json @@ -1,6 +1,6 @@ { "extends": "../tsconfig.json", - "include": ["leaks/**/*.ts"], + "include": ["leaks/**/*.ts", "test/**/*.ts"], "compilerOptions": { "noUnusedLocals": false, "noUnusedParameters": false, From c8aeda91f04614141e3bd0f58912fb07d062f050 Mon Sep 17 00:00:00 2001 From: slvrtrn Date: Sun, 24 Mar 2024 13:04:11 +0100 Subject: [PATCH 12/14] Add Tuple parsers --- .gitignore | 2 +- benchmarks/tsconfig.json | 2 +- package.json | 4 +- .../unit/row_binary_columns_parser.test.ts | 2 +- .../row_binary_columns_parser_array.test.ts | 2 +- ...row_binary_columns_parser_datetime.test.ts | 2 +- .../row_binary_columns_parser_decimal.test.ts | 2 +- .../row_binary_columns_parser_enum.test.ts | 107 ++------- .../row_binary_columns_parser_map.test.ts | 2 +- ...row_binary_columns_parser_nullable.test.ts | 2 +- .../row_binary_columns_parser_tuple.test.ts | 164 +++++++++++++ .../unit/row_binary_decoders.test.ts | 2 +- .../client-common/__tests__/utils/index.ts | 1 + .../__tests__/utils/row_binary/index.ts | 1 + .../utils/row_binary/row_binary_test_args.ts | 108 +++++++++ .../row_binary/columns_parser.ts | 222 +++++++++++------- 16 files changed, 437 insertions(+), 188 deletions(-) create mode 100644 packages/client-common/__tests__/unit/row_binary_columns_parser_tuple.test.ts create mode 100644 packages/client-common/__tests__/utils/row_binary/index.ts create mode 100644 packages/client-common/__tests__/utils/row_binary/row_binary_test_args.ts diff --git a/.gitignore b/.gitignore index 8e20c8f6..09d01a3e 100644 --- a/.gitignore +++ b/.gitignore @@ -3,7 +3,7 @@ dist/ .idea node_modules benchmarks/leaks/input -benchmarks/test +benchmarks/dev *.tgz .npmrc webpack diff --git a/benchmarks/tsconfig.json b/benchmarks/tsconfig.json index 1ade2df7..5970b0f7 100644 --- a/benchmarks/tsconfig.json +++ b/benchmarks/tsconfig.json @@ -1,6 +1,6 @@ { "extends": "../tsconfig.json", - "include": ["leaks/**/*.ts", "test/**/*.ts"], + "include": ["leaks/**/*.ts", "dev/*.ts"], "compilerOptions": { "noUnusedLocals": false, "noUnusedParameters": false, diff --git a/package.json b/package.json index 471b4425..29a9548f 100644 --- a/package.json +++ b/package.json @@ -42,7 +42,7 @@ "devDependencies": { "@faker-js/faker": "^8.2.0", "@types/jasmine": "^4.3.2", - "@types/node": "^18.11.18", + "@types/node": "^20.11.30", "@types/sinon": "^10.0.15", "@types/split2": "^3.2.1", "@types/uuid": "^9.0.2", @@ -74,7 +74,7 @@ "ts-node": "^10.9.1", "tsconfig-paths": "^4.2.0", "tsconfig-paths-webpack-plugin": "^4.0.1", - "typescript": "^4.9.4", + "typescript": "^4.9.5", "uuid": "^9.0.0", "webpack": "^5.84.1", "webpack-cli": "^5.1.4", diff --git a/packages/client-common/__tests__/unit/row_binary_columns_parser.test.ts b/packages/client-common/__tests__/unit/row_binary_columns_parser.test.ts index 1723d3be..13e80d67 100644 --- a/packages/client-common/__tests__/unit/row_binary_columns_parser.test.ts +++ b/packages/client-common/__tests__/unit/row_binary_columns_parser.test.ts @@ -1,6 +1,6 @@ import { parseFixedStringType } from '../../src/data_formatter/row_binary/columns_parser' -fdescribe('RowBinary column types parser', () => { +describe('RowBinary column types parser', () => { describe('FixedString', () => { it('should parse FixedString', async () => { const args: [string, number][] = [ diff --git a/packages/client-common/__tests__/unit/row_binary_columns_parser_array.test.ts b/packages/client-common/__tests__/unit/row_binary_columns_parser_array.test.ts index 8633ca58..f448ea28 100644 --- a/packages/client-common/__tests__/unit/row_binary_columns_parser_array.test.ts +++ b/packages/client-common/__tests__/unit/row_binary_columns_parser_array.test.ts @@ -6,7 +6,7 @@ import type { } from '../../src/data_formatter/row_binary/columns_parser' import { parseArrayType } from '../../src/data_formatter/row_binary/columns_parser' -fdescribe('RowBinary column types parser - Array', () => { +describe('RowBinary column types parser - Array', () => { it('should parse Array with a simple value type', async () => { type TestArgs = { columnType: string diff --git a/packages/client-common/__tests__/unit/row_binary_columns_parser_datetime.test.ts b/packages/client-common/__tests__/unit/row_binary_columns_parser_datetime.test.ts index d415756a..f87e9c51 100644 --- a/packages/client-common/__tests__/unit/row_binary_columns_parser_datetime.test.ts +++ b/packages/client-common/__tests__/unit/row_binary_columns_parser_datetime.test.ts @@ -3,7 +3,7 @@ import { parseDateTimeType, } from '../../src/data_formatter/row_binary/columns_parser' -fdescribe('RowBinary column types parser - DateTime and DateTime64', () => { +describe('RowBinary column types parser - DateTime and DateTime64', () => { describe('DateTime', () => { it('should parse DateTime', async () => { const args: [string, string | null][] = [ diff --git a/packages/client-common/__tests__/unit/row_binary_columns_parser_decimal.test.ts b/packages/client-common/__tests__/unit/row_binary_columns_parser_decimal.test.ts index 430eb7cd..8b84b0a3 100644 --- a/packages/client-common/__tests__/unit/row_binary_columns_parser_decimal.test.ts +++ b/packages/client-common/__tests__/unit/row_binary_columns_parser_decimal.test.ts @@ -1,6 +1,6 @@ import { parseDecimalType } from '../../src/data_formatter/row_binary/columns_parser' -fdescribe('RowBinary column types parser - Decimal', () => { +describe('RowBinary column types parser - Decimal', () => { type TestArgs = { sourceType: string precision: number diff --git a/packages/client-common/__tests__/unit/row_binary_columns_parser_enum.test.ts b/packages/client-common/__tests__/unit/row_binary_columns_parser_enum.test.ts index 7b805972..bcf2a425 100644 --- a/packages/client-common/__tests__/unit/row_binary_columns_parser_enum.test.ts +++ b/packages/client-common/__tests__/unit/row_binary_columns_parser_enum.test.ts @@ -1,98 +1,23 @@ +import { enumTypes, parsedEnumTestArgs } from '@test/utils' import { parseEnumType } from '../../src/data_formatter/row_binary/columns_parser' -fdescribe('RowBinary column types parser - Enum', () => { - const enumTypes: ['Enum8' | 'Enum16', 8 | 16][] = [ - ['Enum8', 8], - ['Enum16', 16], - ] - +describe('RowBinary column types parser - Enum', () => { it('should parse correct values', async () => { - type TestArgs = { - columnType: string - expectedValues: Map - expectedIntSize: 8 | 16 - } - const allEnumSizeArgs: TestArgs[][] = enumTypes.map( - ([enumType, expectedIntSize]) => [ - { - columnType: `${enumType}('a' = 1)`, - expectedValues: new Map([[1, 'a']]), - expectedIntSize, - }, - { - columnType: `${enumType}('a' = 0, 'b' = 2)`, - expectedValues: new Map([ - [0, 'a'], - [2, 'b'], - ]), - expectedIntSize, - }, - { - columnType: `${enumType}('a' = 1, 'b' = 2, 'c' = 42)`, - expectedValues: new Map([ - [1, 'a'], - [2, 'b'], - [42, 'c'], - ]), - expectedIntSize, - }, - { - columnType: `${enumType}('f\\'' = 1, 'x =' = 2, 'b\\'\\'\\'' = 3, '\\'c=4=' = 42, '4' = 100)`, - expectedValues: new Map([ - [1, "f\\'"], - [2, 'x ='], - [3, "b\\'\\'\\'"], - [42, "\\'c=4="], - [100, '4'], - ]), - expectedIntSize, - }, - { - columnType: `${enumType}('' = 0)`, - expectedValues: new Map([[0, '']]), - expectedIntSize, - }, - { - columnType: `${enumType}('' = 42)`, - expectedValues: new Map([[42, '']]), - expectedIntSize, - }, - { - columnType: `${enumType}('foo' = 1, '' = 42)`, - expectedValues: new Map([ - [1, 'foo'], - [42, ''], - ]), - expectedIntSize, - }, - { - columnType: `${enumType}('' = 0, 'foo' = 42)`, - expectedValues: new Map([ - [0, ''], - [42, 'foo'], - ]), - expectedIntSize, - }, - ] - ) - - allEnumSizeArgs.forEach((args) => - args.forEach(({ columnType, expectedValues, expectedIntSize }) => { - const result = parseEnumType({ columnType, sourceType: columnType }) - expect(result) - .withContext( - `Expected ${columnType} to be parsed as an Enum with intSize ${expectedIntSize} and values [${[ - ...expectedValues.entries(), - ]}]` - ) - .toEqual({ - type: 'Enum', - intSize: expectedIntSize, - values: expectedValues, - sourceType: columnType, - }) + parsedEnumTestArgs.forEach((expected) => { + const result = parseEnumType({ + sourceType: expected.sourceType, + columnType: expected.sourceType, }) - ) + expect(result) + .withContext( + `Expected ${ + expected.sourceType + } to be parsed as an Enum with intSize ${ + expected.intSize + } and values [${[...expected.values.entries()]}]` + ) + .toEqual(expected) + }) }) it('should throw when the type is not a valid enum', async () => { diff --git a/packages/client-common/__tests__/unit/row_binary_columns_parser_map.test.ts b/packages/client-common/__tests__/unit/row_binary_columns_parser_map.test.ts index b9a9ee23..bdd15172 100644 --- a/packages/client-common/__tests__/unit/row_binary_columns_parser_map.test.ts +++ b/packages/client-common/__tests__/unit/row_binary_columns_parser_map.test.ts @@ -1,7 +1,7 @@ import type { ParsedColumnMap } from '../../src/data_formatter/row_binary/columns_parser' import { parseMapType } from '../../src/data_formatter/row_binary/columns_parser' -fdescribe('RowBinary column types parser - Map', () => { +describe('RowBinary column types parser - Map', () => { it('should parse Map with simple types', async () => { const args: [ParsedColumnMap, string][] = [ [ diff --git a/packages/client-common/__tests__/unit/row_binary_columns_parser_nullable.test.ts b/packages/client-common/__tests__/unit/row_binary_columns_parser_nullable.test.ts index c98be99f..b26bb724 100644 --- a/packages/client-common/__tests__/unit/row_binary_columns_parser_nullable.test.ts +++ b/packages/client-common/__tests__/unit/row_binary_columns_parser_nullable.test.ts @@ -7,7 +7,7 @@ import type { } from '../../src/data_formatter/row_binary/columns_parser' import { asNullableType } from '../../src/data_formatter/row_binary/columns_parser' -fdescribe('RowBinary column types parser - Nullable', () => { +describe('RowBinary column types parser - Nullable', () => { it('should wrap a simple type', async () => { const args: [ParsedColumnSimple, string][] = [ [ diff --git a/packages/client-common/__tests__/unit/row_binary_columns_parser_tuple.test.ts b/packages/client-common/__tests__/unit/row_binary_columns_parser_tuple.test.ts new file mode 100644 index 00000000..bf9ec5ac --- /dev/null +++ b/packages/client-common/__tests__/unit/row_binary_columns_parser_tuple.test.ts @@ -0,0 +1,164 @@ +import { parsedEnumTestArgs } from '@test/utils' +import type { + ParsedColumnDateTime, + ParsedColumnDateTime64, + ParsedColumnFixedString, + ParsedColumnSimple, + ParsedColumnTuple, +} from '../../src/data_formatter/row_binary/columns_parser' +import { parseTupleType } from '../../src/data_formatter/row_binary/columns_parser' + +fdescribe('RowBinary column types parser - Tuple', () => { + it('should parse Tuple with simple types', async () => { + const args: TestArgs[] = [ + { + sourceType: 'Tuple(String, UInt8)', + expected: { + type: 'Tuple', + elements: [ + { type: 'Simple', columnType: 'String', sourceType: 'String' }, + { type: 'Simple', columnType: 'UInt8', sourceType: 'UInt8' }, + ], + sourceType: 'Tuple(String, UInt8)', + }, + }, + { + sourceType: 'Tuple(Int32, Float32)', + expected: { + type: 'Tuple', + elements: [ + { type: 'Simple', columnType: 'Int32', sourceType: 'Int32' }, + { type: 'Simple', columnType: 'Float32', sourceType: 'Float32' }, + ], + sourceType: 'Tuple(Int32, Float32)', + }, + }, + ] + args.forEach(({ expected, sourceType }) => { + const result = parseTupleType({ columnType: sourceType, sourceType }) + expect(result) + .withContext( + `Expected ${sourceType} to have ${joinElements(expected)} elements` + ) + .toEqual(expected) + }) + }) + + it('should parse Tuple with Decimals', async () => { + const args: TestArgs[] = [ + { + sourceType: 'Tuple(Decimal(7, 2), Decimal(18, 4))', + expected: { + type: 'Tuple', + elements: [ + { + type: 'Decimal', + sourceType: 'Decimal(7, 2)', + params: { precision: 7, scale: 2, intSize: 32 }, + }, + { + type: 'Decimal', + sourceType: 'Decimal(18, 4)', + params: { precision: 18, scale: 4, intSize: 64 }, + }, + ], + sourceType: 'Tuple(Decimal(7, 2), Decimal(18, 4))', + }, + }, + ] + args.forEach(({ expected, sourceType }) => { + const result = parseTupleType({ columnType: sourceType, sourceType }) + expect(result) + .withContext( + `Expected ${sourceType} to have ${joinElements(expected)} elements` + ) + .toEqual(expected) + }) + }) + + it('should parse Tuple with Enums', async () => { + const args: TestArgs[] = parsedEnumTestArgs.map((enumElement) => { + // e.g. Tuple(String, Enum8('a' = 1)) + const sourceType = `Tuple(${stringElement.sourceType}, ${enumElement.sourceType})` + return { + sourceType, + expected: { + type: 'Tuple', + elements: [stringElement, enumElement], + sourceType, + }, + } + }) + args.forEach(({ expected, sourceType }) => { + const result = parseTupleType({ columnType: sourceType, sourceType }) + expect(result) + .withContext( + `Expected ${sourceType} to have ${joinElements(expected)} elements` + ) + .toEqual(expected) + }) + }) + + it('should parse Tuple with FixedString/DateTime', async () => { + const fixedStringElement: ParsedColumnFixedString = { + type: 'FixedString', + sourceType: 'FixedString(16)', + sizeBytes: 16, + } + const dateTimeElement: ParsedColumnDateTime = { + type: 'DateTime', + timezone: null, + sourceType: 'DateTime', + } + const dateTimeWithTimezoneElement: ParsedColumnDateTime = { + type: 'DateTime', + timezone: 'Europe/Amsterdam', + sourceType: `DateTime('Europe/Amsterdam')`, + } + const dateTime64Element: ParsedColumnDateTime64 = { + type: 'DateTime64', + timezone: null, + precision: 3, + sourceType: 'DateTime64(3)', + } + const dateTime64WithTimezoneElement: ParsedColumnDateTime64 = { + type: 'DateTime64', + timezone: 'Europe/Amsterdam', + precision: 9, + sourceType: `DateTime64(9, 'Europe/Amsterdam')`, + } + const elements = [ + fixedStringElement, + dateTimeElement, + dateTimeWithTimezoneElement, + dateTime64Element, + dateTime64WithTimezoneElement, + ] + const elementsSourceTypes = elements.map((el) => el.sourceType).join(', ') + const sourceType = `Tuple(${elementsSourceTypes})` + const expected: ParsedColumnTuple = { + type: 'Tuple', + elements, + sourceType, + } + const result = parseTupleType({ columnType: sourceType, sourceType }) + expect(result).toEqual(expected) + }) + + // TODO: Simple types permutations, Nullable, Arrays, Maps, Nested Tuples + + const stringElement: ParsedColumnSimple = { + type: 'Simple', + sourceType: 'String', + columnType: 'String', + } +}) + +function joinElements(expected: ParsedColumnTuple) { + return expected.elements.map((el) => el.sourceType).join(', ') +} + +type TestArgs = { + sourceType: string + expected: ParsedColumnTuple +} diff --git a/packages/client-common/__tests__/unit/row_binary_decoders.test.ts b/packages/client-common/__tests__/unit/row_binary_decoders.test.ts index 118ac806..e644c14b 100644 --- a/packages/client-common/__tests__/unit/row_binary_decoders.test.ts +++ b/packages/client-common/__tests__/unit/row_binary_decoders.test.ts @@ -1,6 +1,6 @@ import { RowBinaryTypesDecoder } from '../../src/data_formatter' -fdescribe('RowBinary decoders', () => { +describe('RowBinary decoders', () => { it('should decode Date', () => { const args: [Uint8Array, Date][] = [ [new Uint8Array([0x00, 0x00]), new Date('1970-01-01T00:00:00.000Z')], diff --git a/packages/client-common/__tests__/utils/index.ts b/packages/client-common/__tests__/utils/index.ts index 849fd37f..589deec3 100644 --- a/packages/client-common/__tests__/utils/index.ts +++ b/packages/client-common/__tests__/utils/index.ts @@ -11,3 +11,4 @@ export { TestEnv } from './test_env' export { sleep } from './sleep' export { whenOnEnv } from './jasmine' export { getRandomInt } from './random' +export * from './row_binary' diff --git a/packages/client-common/__tests__/utils/row_binary/index.ts b/packages/client-common/__tests__/utils/row_binary/index.ts new file mode 100644 index 00000000..25981934 --- /dev/null +++ b/packages/client-common/__tests__/utils/row_binary/index.ts @@ -0,0 +1 @@ +export * from './row_binary_test_args' diff --git a/packages/client-common/__tests__/utils/row_binary/row_binary_test_args.ts b/packages/client-common/__tests__/utils/row_binary/row_binary_test_args.ts new file mode 100644 index 00000000..c8311a84 --- /dev/null +++ b/packages/client-common/__tests__/utils/row_binary/row_binary_test_args.ts @@ -0,0 +1,108 @@ +import type { ParsedColumnEnum } from '../../../src/data_formatter/row_binary/columns_parser' + +export const enumTypes: ['Enum8' | 'Enum16', 8 | 16][] = [ + ['Enum8', 8], + ['Enum16', 16], +] + +export const parsedEnumTestArgs: ParsedColumnEnum[] = enumTypes.flatMap( + ([enumType, intSize]) => [ + { + type: 'Enum', + sourceType: `${enumType}('a' = 1)`, + values: new Map([[1, 'a']]), + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('a' = 0, 'b' = 2)`, + values: new Map([ + [0, 'a'], + [2, 'b'], + ]), + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('a' = 1, 'b' = 2, 'c' = 42)`, + values: new Map([ + [1, 'a'], + [2, 'b'], + [42, 'c'], + ]), + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('f\\'' = 1, 'x =' = 2, 'b\\'\\'\\'' = 3, '\\'c=4=' = 42, '4' = 100)`, + values: new Map([ + [1, "f\\'"], + [2, 'x ='], + [3, "b\\'\\'\\'"], + [42, "\\'c=4="], + [100, '4'], + ]), + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('f\\'()' = 1)`, + values: new Map([[1, "f\\'()"]]), + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('\\'' = 0)`, + values: new Map([[0, `\\'`]]), + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('' = 0)`, + values: new Map([[0, '']]), + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('' = 42)`, + values: new Map([[42, '']]), + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('foo' = 1, '' = 42)`, + values: new Map([ + [1, 'foo'], + [42, ''], + ]), + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('' = 0, 'foo' = 42)`, + values: new Map([ + [0, ''], + [42, 'foo'], + ]), + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('(' = 1)`, + values: new Map([[1, '(']]), + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}(')' = 1)`, + values: new Map([[1, ')']]), + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('()' = 1)`, + values: new Map([[1, '()']]), + intSize, + }, + ] +) diff --git a/packages/client-common/src/data_formatter/row_binary/columns_parser.ts b/packages/client-common/src/data_formatter/row_binary/columns_parser.ts index db134734..92ff858a 100644 --- a/packages/client-common/src/data_formatter/row_binary/columns_parser.ts +++ b/packages/client-common/src/data_formatter/row_binary/columns_parser.ts @@ -58,7 +58,7 @@ export interface ParsedColumnDecimal { sourceType: string } -/** Array or Map itself cannot be Nullable */ +/** Tuple, Array or Map itself cannot be Nullable */ export interface ParsedColumnNullable { type: 'Nullable' value: @@ -86,6 +86,7 @@ export interface ParsedColumnArray { | ParsedColumnMap | ParsedColumnDateTime | ParsedColumnDateTime64 + | ParsedColumnTuple /** Array(T) = 1 dimension, Array(Array(T)) = 2, etc. */ dimensions: number sourceType: string @@ -105,27 +106,28 @@ export interface ParsedColumnMap { | ParsedColumnFixedString | ParsedColumnEnum | ParsedColumnDateTime - /** Possible value type: arbitrary, including Map and Array. */ + /** Value types are arbitrary, including Map, Array, and Tuple. */ value: ParsedColumnType sourceType: string } -// TODO: Tuple support. -// export interface ParseColumnTuple { -// type: 'Tuple' -// elements: ParsedColumnType[] -// sourceType: string -// } +export interface ParsedColumnTuple { + type: 'Tuple' + /** Element types are arbitrary, including Map, Array, and Tuple. */ + elements: ParsedColumnType[] + sourceType: string +} export type ParsedColumnType = | ParsedColumnSimple + | ParsedColumnEnum | ParsedColumnFixedString | ParsedColumnNullable | ParsedColumnDecimal | ParsedColumnDateTime | ParsedColumnDateTime64 | ParsedColumnArray - | ParsedColumnEnum + | ParsedColumnTuple | ParsedColumnMap export function parseColumnType(sourceType: string): ParsedColumnType { @@ -154,6 +156,8 @@ export function parseColumnType(sourceType: string): ParsedColumnType { result = parseDateTime64Type({ sourceType, columnType }) } else if (columnType.startsWith(DateTimePrefix)) { result = parseDateTimeType({ sourceType, columnType }) + } else if (columnType.startsWith(FixedStringPrefix)) { + result = parseFixedStringType({ sourceType, columnType }) } else if ( columnType.startsWith(Enum8Prefix) || columnType.startsWith(Enum16Prefix) @@ -163,6 +167,8 @@ export function parseColumnType(sourceType: string): ParsedColumnType { result = parseArrayType({ sourceType, columnType }) } else if (columnType.startsWith(MapPrefix)) { result = parseMapType({ sourceType, columnType }) + } else if (columnType.startsWith(TuplePrefix)) { + result = parseTupleType({ sourceType, columnType }) } else { throw ClickHouseRowBinaryError.headerDecodingError( 'Unsupported column type', @@ -253,7 +259,6 @@ export function parseEnumType({ } ) } - // The minimal allowed Enum definition is Enum8('' = 0), i.e. 6 chars inside. if (columnType.length < 6) { throw ClickHouseRowBinaryError.headerDecodingError( @@ -274,10 +279,12 @@ export function parseEnumType({ // Should support the most complicated enums, such as Enum8('f\'' = 1, 'x =' = 2, 'b\'\'\'' = 3, '\'c=4=' = 42, '4' = 100) for (let i = 1; i < columnType.length; i++) { if (parsingName) { - if (!charEscaped) { - if (columnType[i] === '\\') { + if (charEscaped) { + charEscaped = false + } else { + if (columnType.charCodeAt(i) === BackslashASCII) { charEscaped = true - } else if (columnType[i] === "'") { + } else if (columnType.charCodeAt(i) === SingleQuoteASCII) { // non-escaped closing tick - push the name const name = columnType.slice(startIndex, i) if (names.includes(name)) { @@ -291,21 +298,20 @@ export function parseEnumType({ startIndex = i parsingName = false } - } else { - // current char was escaped, ignoring. - charEscaped = false - } - } else { - // Parsing the index - if (columnType[i] < '0' || columnType[i] > '9') { - pushEnumIndex(startIndex, i) - // the char at this index should be comma. - i += 2 // skip ` '`, but not the first char - ClickHouse allows something like Enum8('foo' = 0, '' = 42) - startIndex = i + 1 - parsingName = true - charEscaped = false } } + // Parsing the index, skipping next iterations until the first non-digit one + else if ( + columnType.charCodeAt(i) < ZeroASCII || + columnType.charCodeAt(i) > NineASCII + ) { + pushEnumIndex(startIndex, i) + // the char at this index should be comma. + i += 2 // skip ` '`, but not the first char - ClickHouse allows something like Enum8('foo' = 0, '' = 42) + startIndex = i + 1 + parsingName = true + charEscaped = false + } } // Push the last index @@ -321,7 +327,6 @@ export function parseEnumType({ for (let i = 0; i < names.length; i++) { values.set(indices[i], names[i]) } - return { type: 'Enum', values, @@ -369,48 +374,22 @@ export function parseMapType({ }) } columnType = columnType.slice(MapPrefix.length, -1) - - let openParens = 0 // consider the type parsed once we reach a comma outside of parens. - - let keyColumnType: string | undefined - for (let i = 0; i < columnType.length; i++) { - if (openParens === 0) { - if (columnType[i] === ',') { - keyColumnType = columnType.slice(0, i) - break - } - } - if (columnType[i] === '(') { - openParens++ - } else if (columnType[i] === ')') { - openParens-- - } - } - if (keyColumnType === undefined) { - throw ClickHouseRowBinaryError.headerDecodingError( - 'Could not parse Map key type', - { - sourceType, - columnType, - } - ) - } - - const key = parseColumnType(keyColumnType) + const [keyType, valueType] = getElementsTypes({ columnType, sourceType }, 2) + const key = parseColumnType(keyType) if ( key.type === 'DateTime64' || key.type === 'Nullable' || key.type === 'Array' || key.type === 'Map' || - key.type === 'Decimal' // TODO: disallow Tuple as well. + key.type === 'Decimal' || + key.type === 'Tuple' ) { throw ClickHouseRowBinaryError.headerDecodingError('Invalid Map key type', { key, sourceType, }) } - - const value = parseColumnType(columnType.slice(keyColumnType.length + 2)) + const value = parseColumnType(valueType) return { type: 'Map', key, @@ -419,24 +398,29 @@ export function parseMapType({ } } -// TODO. -// export function parseTupleType({ -// columnType, -// dbType, -// }: ParseColumnTypeParams): ParseColumnTuple { -// if (!columnType.startsWith(TuplePrefix)) { -// throw ClickHouseRowBinaryError.headerDecodingError('Invalid Tuple type', { -// columnType, -// dbType, -// }) -// } -// // columnType = columnType.slice(TuplePrefix.length, -1) -// return { -// type: 'Tuple', -// elements: [], -// dbType, -// } -// } +export function parseTupleType({ + columnType, + sourceType, +}: ParseColumnTypeParams): ParsedColumnTuple { + if ( + !columnType.startsWith(TuplePrefix) || + columnType.length < TuplePrefix.length + 5 // Tuple(Int8) is the shortest valid definition + ) { + throw ClickHouseRowBinaryError.headerDecodingError('Invalid Tuple type', { + columnType, + sourceType, + }) + } + columnType = columnType.slice(TuplePrefix.length, -1) + const elements = getElementsTypes({ columnType, sourceType }, 1).map((type) => + parseColumnType(type) + ) + return { + type: 'Tuple', + elements, + sourceType, + } +} export function parseArrayType({ columnType, @@ -585,18 +569,15 @@ export function parseFixedStringType({ } export function asNullableType( - value: - | ParsedColumnSimple - | ParsedColumnEnum - | ParsedColumnDecimal - | ParsedColumnArray - | ParsedColumnMap - | ParsedColumnDateTime - | ParsedColumnDateTime64, + value: ParsedColumnType, sourceType: string ): ParsedColumnNullable { - // TODO: disallow Tuple as well. - if (value.type === 'Array' || value.type === 'Map') { + if ( + value.type === 'Array' || + value.type === 'Map' || + value.type === 'Tuple' || + value.type === 'Nullable' + ) { throw ClickHouseRowBinaryError.headerDecodingError( `${value.type} cannot be Nullable`, { sourceType } @@ -612,6 +593,67 @@ export function asNullableType( } } +/** Used for Map key/value types and Tuple elements. + * * `String, UInt8` results in [`String`, `UInt8`]. + * * `String, UInt8, Array(String)` results in [`String`, `UInt8`, `Array(String)`]. + * * Throws if parsed values are below the required minimum. */ +export function getElementsTypes( + { columnType, sourceType }: ParseColumnTypeParams, + minElements: number +): string[] { + const elements: string[] = [] + /** Consider the element type parsed once we reach a comma outside of parens AND after an unescaped tick. + * The most complicated cases are values names in the self-defined Enum types: + * * `Tuple(Enum8('f\'()' = 1))` -> `f\'()` + * * `Tuple(Enum8('(' = 1))` -> `(` + * See also: {@link parseEnumType }, which works similarly (but has to deal with the indices following the names). */ + let openParens = 0 + let quoteOpen = false + let charEscaped = false + let lastElementIndex = 0 + for (let i = 0; i < columnType.length; i++) { + // prettier-ignore + // console.log(i, 'Current char:', columnType[i], 'openParens:', openParens, 'quoteOpen:', quoteOpen, 'charEscaped:', charEscaped) + if (charEscaped) { + charEscaped = false + } else if (columnType.charCodeAt(i) === BackslashASCII) { + charEscaped = true + } else if (columnType.charCodeAt(i) === SingleQuoteASCII) { + quoteOpen = !quoteOpen // unescaped quote + } else { + if (!quoteOpen) { + if (columnType.charCodeAt(i) === LeftParenASCII) { + openParens++ + } else if (columnType.charCodeAt(i) === RightParenASCII) { + openParens-- + } else if (columnType.charCodeAt(i) === CommaASCII) { + if (openParens === 0) { + elements.push(columnType.slice(lastElementIndex, i)) + // console.log('Pushed element:', elements[elements.length - 1]) + i += 2 // skip ', ' + lastElementIndex = i + } + } + } + } + } + + // prettier-ignore + // console.log('Final elements:', elements, 'nextElementIndex:', lastElementIndex, 'minElements:', minElements, 'openParens:', openParens) + + // Push the remaining part of the type if it seems to be valid (at least all parentheses are closed) + if (!openParens && lastElementIndex < columnType.length - 1) { + elements.push(columnType.slice(lastElementIndex)) + } + if (elements.length < minElements) { + throw ClickHouseRowBinaryError.headerDecodingError( + 'Expected more elements in the type', + { sourceType, columnType, elements, minElements } + ) + } + return elements +} + interface ParseColumnTypeParams { /** A particular type to parse, such as DateTime. */ columnType: string @@ -631,3 +673,11 @@ const DateTimePrefix = 'DateTime' as const const DateTimeWithTimezonePrefix = 'DateTime(' as const const DateTime64Prefix = 'DateTime64(' as const const FixedStringPrefix = 'FixedString(' as const + +const SingleQuoteASCII = 39 as const +const LeftParenASCII = 40 as const +const RightParenASCII = 41 as const +const CommaASCII = 44 as const +const ZeroASCII = 48 as const +const NineASCII = 57 as const +const BackslashASCII = 92 as const From ca97598f34b42ef08615b3ca6c227931514ad666 Mon Sep 17 00:00:00 2001 From: slvrtrn Date: Tue, 2 Apr 2024 11:43:45 +0200 Subject: [PATCH 13/14] [skip ci] Merge 1.0.0 changes --- .../unit/row_binary_columns_parser.test.ts | 6 +- .../row_binary_columns_parser_array.test.ts | 10 +- ...row_binary_columns_parser_datetime.test.ts | 12 +- .../row_binary_columns_parser_decimal.test.ts | 4 +- .../row_binary_columns_parser_enum.test.ts | 4 +- .../row_binary_columns_parser_map.test.ts | 2 +- ...row_binary_columns_parser_nullable.test.ts | 16 +-- .../row_binary_columns_parser_tuple.test.ts | 6 +- .../unit/row_binary_decoders.test.ts | 2 +- .../utils/row_binary/row_binary_test_args.ts | 2 +- .../row_binary/columns_header.ts | 12 +- .../row_binary/columns_parser.ts | 44 ++++---- .../src/data_formatter/row_binary/errors.ts | 4 +- .../src/data_formatter/row_binary/mappers.ts | 2 +- .../data_formatter/row_binary/read_bytes.ts | 2 +- .../src/data_formatter/row_binary/types.ts | 29 ++--- .../node_stream_row_binary.test.ts | 103 +----------------- .../client-node/src/row_binary_result_set.ts | 24 ++-- 18 files changed, 94 insertions(+), 190 deletions(-) diff --git a/packages/client-common/__tests__/unit/row_binary_columns_parser.test.ts b/packages/client-common/__tests__/unit/row_binary_columns_parser.test.ts index 13e80d67..af9a3a90 100644 --- a/packages/client-common/__tests__/unit/row_binary_columns_parser.test.ts +++ b/packages/client-common/__tests__/unit/row_binary_columns_parser.test.ts @@ -16,7 +16,7 @@ describe('RowBinary column types parser', () => { }) expect(result) .withContext( - `Expected ${columnType} to be parsed as a FixedString with size ${sizeBytes}` + `Expected ${columnType} to be parsed as a FixedString with size ${sizeBytes}`, ) .toEqual({ type: 'FixedString', sizeBytes, sourceType: columnType }) }) @@ -31,7 +31,7 @@ describe('RowBinary column types parser', () => { ] args.forEach(([columnType]) => { expect(() => - parseFixedStringType({ columnType, sourceType: columnType }) + parseFixedStringType({ columnType, sourceType: columnType }), ) .withContext(`Expected ${columnType} to throw`) .toThrowError('Invalid FixedString type') @@ -46,7 +46,7 @@ describe('RowBinary column types parser', () => { ] args.forEach(([columnType]) => { expect(() => - parseFixedStringType({ columnType, sourceType: columnType }) + parseFixedStringType({ columnType, sourceType: columnType }), ) .withContext(`Expected ${columnType} to throw`) .toThrowError('Invalid FixedString size in bytes') diff --git a/packages/client-common/__tests__/unit/row_binary_columns_parser_array.test.ts b/packages/client-common/__tests__/unit/row_binary_columns_parser_array.test.ts index f448ea28..9e6688bf 100644 --- a/packages/client-common/__tests__/unit/row_binary_columns_parser_array.test.ts +++ b/packages/client-common/__tests__/unit/row_binary_columns_parser_array.test.ts @@ -45,7 +45,7 @@ describe('RowBinary column types parser - Array', () => { const result = parseArrayType({ columnType, sourceType: columnType }) expect(result) .withContext( - `Expected ${columnType} to be parsed as an Array with value type ${valueType} and ${dimensions} dimensions` + `Expected ${columnType} to be parsed as an Array with value type ${valueType} and ${dimensions} dimensions`, ) .toEqual({ type: 'Array', @@ -82,7 +82,7 @@ describe('RowBinary column types parser - Array', () => { const result = parseArrayType({ columnType, sourceType: columnType }) expect(result) .withContext( - `Expected ${columnType} to be parsed as an Array with value type ${valueType} and ${dimensions} dimensions` + `Expected ${columnType} to be parsed as an Array with value type ${valueType} and ${dimensions} dimensions`, ) .toEqual({ type: 'Array', @@ -160,7 +160,7 @@ describe('RowBinary column types parser - Array', () => { const result = parseArrayType({ columnType, sourceType: columnType }) expect(result) .withContext( - `Expected ${columnType} to be parsed as an Array with value type ${value.sourceType} and ${dimensions} dimensions` + `Expected ${columnType} to be parsed as an Array with value type ${value.sourceType} and ${dimensions} dimensions`, ) .toEqual({ type: 'Array', @@ -210,7 +210,7 @@ describe('RowBinary column types parser - Array', () => { const result = parseArrayType({ columnType, sourceType: columnType }) expect(result) .withContext( - `Expected ${columnType} to be parsed as an Array with value type ${value.sourceType} and ${dimensions} dimensions` + `Expected ${columnType} to be parsed as an Array with value type ${value.sourceType} and ${dimensions} dimensions`, ) .toEqual({ type: 'Array', @@ -274,7 +274,7 @@ describe('RowBinary column types parser - Array', () => { const result = parseArrayType({ columnType, sourceType: columnType }) expect(result) .withContext( - `Expected ${columnType} to be parsed as an Array with value type ${value.sourceType} and ${dimensions} dimensions` + `Expected ${columnType} to be parsed as an Array with value type ${value.sourceType} and ${dimensions} dimensions`, ) .toEqual({ type: 'Array', diff --git a/packages/client-common/__tests__/unit/row_binary_columns_parser_datetime.test.ts b/packages/client-common/__tests__/unit/row_binary_columns_parser_datetime.test.ts index f87e9c51..2ea6eee9 100644 --- a/packages/client-common/__tests__/unit/row_binary_columns_parser_datetime.test.ts +++ b/packages/client-common/__tests__/unit/row_binary_columns_parser_datetime.test.ts @@ -52,7 +52,7 @@ describe('RowBinary column types parser - DateTime and DateTime64', () => { }) expect(result) .withContext( - `Expected ${columnType} to be parsed as a DateTime64 with precision ${precision}` + `Expected ${columnType} to be parsed as a DateTime64 with precision ${precision}`, ) .toEqual({ type: 'DateTime64', @@ -69,7 +69,7 @@ describe('RowBinary column types parser - DateTime and DateTime64', () => { [`DateTime64(${precision}, 'GB')`, precision, 'GB'], [`DateTime64(${precision}, 'UTC')`, precision, 'UTC'], [`DateTime64(${precision}, 'Etc/GMT-5')`, precision, 'Etc/GMT-5'], - ] + ], ) allPrecisionArgs.forEach((args) => args.forEach(([columnType, precision, timezone]) => { @@ -79,7 +79,7 @@ describe('RowBinary column types parser - DateTime and DateTime64', () => { }) expect(result) .withContext( - `Expected ${columnType} to be parsed as a DateTime64 with precision ${precision} and timezone ${timezone}` + `Expected ${columnType} to be parsed as a DateTime64 with precision ${precision} and timezone ${timezone}`, ) .toEqual({ type: 'DateTime64', @@ -87,7 +87,7 @@ describe('RowBinary column types parser - DateTime and DateTime64', () => { timezone, precision, }) - }) + }), ) }) @@ -95,7 +95,7 @@ describe('RowBinary column types parser - DateTime and DateTime64', () => { const args = [['DateTime64('], ['DateTime64()'], ['String']] args.forEach(([columnType]) => { expect(() => - parseDateTime64Type({ columnType, sourceType: columnType }) + parseDateTime64Type({ columnType, sourceType: columnType }), ) .withContext(`Expected ${columnType} to throw`) .toThrowError('Invalid DateTime64 type') @@ -106,7 +106,7 @@ describe('RowBinary column types parser - DateTime and DateTime64', () => { const args = [[`DateTime64(')`], [`DateTime64(foo)`]] args.forEach(([columnType]) => { expect(() => - parseDateTime64Type({ columnType, sourceType: columnType }) + parseDateTime64Type({ columnType, sourceType: columnType }), ) .withContext(`Expected ${columnType} to throw`) .toThrowError('Invalid DateTime64 precision') diff --git a/packages/client-common/__tests__/unit/row_binary_columns_parser_decimal.test.ts b/packages/client-common/__tests__/unit/row_binary_columns_parser_decimal.test.ts index 8b84b0a3..6d583721 100644 --- a/packages/client-common/__tests__/unit/row_binary_columns_parser_decimal.test.ts +++ b/packages/client-common/__tests__/unit/row_binary_columns_parser_decimal.test.ts @@ -39,7 +39,7 @@ describe('RowBinary column types parser - Decimal', () => { const result = parseDecimalType({ columnType: sourceType, sourceType }) expect(result) .withContext( - `Expected ${sourceType} to be parsed as a Decimal with precision ${precision}, scale ${scale} and intSize ${intSize}` + `Expected ${sourceType} to be parsed as a Decimal with precision ${precision}, scale ${scale} and intSize ${intSize}`, ) .toEqual({ type: 'Decimal', @@ -97,7 +97,7 @@ describe('RowBinary column types parser - Decimal', () => { it('should throw when precision or scale cannot be parsed', async () => { const columnType = 'Decimal(foobar)' expect(() => - parseDecimalType({ columnType, sourceType: columnType }) + parseDecimalType({ columnType, sourceType: columnType }), ).toThrowError('Expected Decimal type to have both precision and scale') }) }) diff --git a/packages/client-common/__tests__/unit/row_binary_columns_parser_enum.test.ts b/packages/client-common/__tests__/unit/row_binary_columns_parser_enum.test.ts index bcf2a425..0fb4445c 100644 --- a/packages/client-common/__tests__/unit/row_binary_columns_parser_enum.test.ts +++ b/packages/client-common/__tests__/unit/row_binary_columns_parser_enum.test.ts @@ -14,7 +14,7 @@ describe('RowBinary column types parser - Enum', () => { expected.sourceType } to be parsed as an Enum with intSize ${ expected.intSize - } and values [${[...expected.values.entries()]}]` + } and values [${[...expected.values.entries()]}]`, ) .toEqual(expected) }) @@ -83,7 +83,7 @@ describe('RowBinary column types parser - Enum', () => { expect(() => parseEnumType({ columnType, sourceType: columnType })) .withContext(`Expected ${columnType} to throw`) .toThrowError('Invalid Enum type values') - }) + }), ) }) }) diff --git a/packages/client-common/__tests__/unit/row_binary_columns_parser_map.test.ts b/packages/client-common/__tests__/unit/row_binary_columns_parser_map.test.ts index bdd15172..dd269664 100644 --- a/packages/client-common/__tests__/unit/row_binary_columns_parser_map.test.ts +++ b/packages/client-common/__tests__/unit/row_binary_columns_parser_map.test.ts @@ -31,7 +31,7 @@ describe('RowBinary column types parser - Map', () => { const result = parseMapType({ columnType: sourceType, sourceType }) expect(result) .withContext( - `Expected ${sourceType} to be parsed as a Map with key type ${expected.key.sourceType} and value type ${expected.value.sourceType}` + `Expected ${sourceType} to be parsed as a Map with key type ${expected.key.sourceType} and value type ${expected.value.sourceType}`, ) .toEqual(expected) }) diff --git a/packages/client-common/__tests__/unit/row_binary_columns_parser_nullable.test.ts b/packages/client-common/__tests__/unit/row_binary_columns_parser_nullable.test.ts index b26bb724..b9e29c15 100644 --- a/packages/client-common/__tests__/unit/row_binary_columns_parser_nullable.test.ts +++ b/packages/client-common/__tests__/unit/row_binary_columns_parser_nullable.test.ts @@ -31,7 +31,7 @@ describe('RowBinary column types parser - Nullable', () => { const result = asNullableType(value, sourceType) expect(result) .withContext( - `Expected ${value.columnType} to be wrapped as ${sourceType}` + `Expected ${value.columnType} to be wrapped as ${sourceType}`, ) .toEqual({ type: 'Nullable', @@ -120,7 +120,7 @@ describe('RowBinary column types parser - Nullable', () => { const result = asNullableType(value, sourceType) expect(result) .withContext( - `Expected ${value.sourceType} to be wrapped as ${sourceType}` + `Expected ${value.sourceType} to be wrapped as ${sourceType}`, ) .toEqual({ type: 'Nullable', @@ -157,7 +157,7 @@ describe('RowBinary column types parser - Nullable', () => { const result = asNullableType(value, sourceType) expect(result) .withContext( - `Expected ${value.sourceType} to be wrapped as ${sourceType}` + `Expected ${value.sourceType} to be wrapped as ${sourceType}`, ) .toEqual({ type: 'Nullable', @@ -219,7 +219,7 @@ describe('RowBinary column types parser - Nullable', () => { const result = asNullableType(value, sourceType) expect(result) .withContext( - `Expected ${value.sourceType} to be wrapped as ${sourceType}` + `Expected ${value.sourceType} to be wrapped as ${sourceType}`, ) .toEqual({ type: 'Nullable', @@ -248,8 +248,8 @@ describe('RowBinary column types parser - Nullable', () => { value: columnString, sourceType: 'Map(UInt8, String)', }, - '...' - ) + '...', + ), ).toThrowError('Map cannot be Nullable') expect(() => asNullableType( @@ -259,8 +259,8 @@ describe('RowBinary column types parser - Nullable', () => { dimensions: 1, sourceType: 'Array(UInt8)', }, - '...' - ) + '...', + ), ).toThrowError('Array cannot be Nullable') }) }) diff --git a/packages/client-common/__tests__/unit/row_binary_columns_parser_tuple.test.ts b/packages/client-common/__tests__/unit/row_binary_columns_parser_tuple.test.ts index bf9ec5ac..49893a20 100644 --- a/packages/client-common/__tests__/unit/row_binary_columns_parser_tuple.test.ts +++ b/packages/client-common/__tests__/unit/row_binary_columns_parser_tuple.test.ts @@ -38,7 +38,7 @@ fdescribe('RowBinary column types parser - Tuple', () => { const result = parseTupleType({ columnType: sourceType, sourceType }) expect(result) .withContext( - `Expected ${sourceType} to have ${joinElements(expected)} elements` + `Expected ${sourceType} to have ${joinElements(expected)} elements`, ) .toEqual(expected) }) @@ -70,7 +70,7 @@ fdescribe('RowBinary column types parser - Tuple', () => { const result = parseTupleType({ columnType: sourceType, sourceType }) expect(result) .withContext( - `Expected ${sourceType} to have ${joinElements(expected)} elements` + `Expected ${sourceType} to have ${joinElements(expected)} elements`, ) .toEqual(expected) }) @@ -93,7 +93,7 @@ fdescribe('RowBinary column types parser - Tuple', () => { const result = parseTupleType({ columnType: sourceType, sourceType }) expect(result) .withContext( - `Expected ${sourceType} to have ${joinElements(expected)} elements` + `Expected ${sourceType} to have ${joinElements(expected)} elements`, ) .toEqual(expected) }) diff --git a/packages/client-common/__tests__/unit/row_binary_decoders.test.ts b/packages/client-common/__tests__/unit/row_binary_decoders.test.ts index e644c14b..587cbfc2 100644 --- a/packages/client-common/__tests__/unit/row_binary_decoders.test.ts +++ b/packages/client-common/__tests__/unit/row_binary_decoders.test.ts @@ -14,7 +14,7 @@ describe('RowBinary decoders', () => { const res = RowBinaryTypesDecoder.date(Buffer.from(src), 0)! expect(+res[0]) .withContext( - `Decoded ${src.toString()}. Result ${res[0]} != expected ${expected}` + `Decoded ${src.toString()}. Result ${res[0]} != expected ${expected}`, ) .toEqual(+expected) }) diff --git a/packages/client-common/__tests__/utils/row_binary/row_binary_test_args.ts b/packages/client-common/__tests__/utils/row_binary/row_binary_test_args.ts index c8311a84..53c4f5ef 100644 --- a/packages/client-common/__tests__/utils/row_binary/row_binary_test_args.ts +++ b/packages/client-common/__tests__/utils/row_binary/row_binary_test_args.ts @@ -104,5 +104,5 @@ export const parsedEnumTestArgs: ParsedColumnEnum[] = enumTypes.flatMap( values: new Map([[1, '()']]), intSize, }, - ] + ], ) diff --git a/packages/client-common/src/data_formatter/row_binary/columns_header.ts b/packages/client-common/src/data_formatter/row_binary/columns_header.ts index 97d7ba17..3a5b6d6c 100644 --- a/packages/client-common/src/data_formatter/row_binary/columns_header.ts +++ b/packages/client-common/src/data_formatter/row_binary/columns_header.ts @@ -24,14 +24,14 @@ export class RowBinaryColumnsHeader { if (res === null) { throw ClickHouseRowBinaryError.headerDecodingError( 'Not enough data to decode number of columns', - {} + {}, ) } const numColumns = res[0] if (numColumns === 0) { throw ClickHouseRowBinaryError.headerDecodingError( 'Unexpected zero number of columns', - {} + {}, ) } let nextLoc = res[1] @@ -43,7 +43,7 @@ export class RowBinaryColumnsHeader { if (res === null) { throw ClickHouseRowBinaryError.headerDecodingError( `Not enough data to decode column name`, - { i, names, numColumns, nextLoc } + { i, names, numColumns, nextLoc }, ) } nextLoc = res[1] @@ -54,7 +54,7 @@ export class RowBinaryColumnsHeader { if (res === null) { throw ClickHouseRowBinaryError.headerDecodingError( `Not enough data to decode column type`, - { i, names, types, numColumns, nextLoc } + { i, names, types, numColumns, nextLoc }, ) } nextLoc = res[1] @@ -76,7 +76,7 @@ export class RowBinaryColumnsHeader { default: throw ClickHouseRowBinaryError.headerDecodingError( `Unsupported column type ${col.type}`, - { col } + { col }, ) } } @@ -99,7 +99,7 @@ function getDecimalDecoder(decimalParams: DecimalParams): SimpleTypeDecoder { function getEnumDecoder( intSize: 8 | 16, - values: Map + values: Map, ): SimpleTypeDecoder { if (intSize === 8) { return RowBinaryTypesDecoder.enum8(values) diff --git a/packages/client-common/src/data_formatter/row_binary/columns_parser.ts b/packages/client-common/src/data_formatter/row_binary/columns_parser.ts index 92ff858a..b3b63ce0 100644 --- a/packages/client-common/src/data_formatter/row_binary/columns_parser.ts +++ b/packages/client-common/src/data_formatter/row_binary/columns_parser.ts @@ -172,7 +172,7 @@ export function parseColumnType(sourceType: string): ParsedColumnType { } else { throw ClickHouseRowBinaryError.headerDecodingError( 'Unsupported column type', - { columnType } + { columnType }, ) } if (isNullable) { @@ -203,7 +203,7 @@ export function parseDecimalType({ sourceType, columnType, split, - } + }, ) } let intSize: DecimalParams['intSize'] = 32 @@ -211,14 +211,14 @@ export function parseDecimalType({ if (Number.isNaN(precision) || precision < 1 || precision > 76) { throw ClickHouseRowBinaryError.headerDecodingError( 'Invalid Decimal precision', - { columnType, sourceType, precision } + { columnType, sourceType, precision }, ) } const scale = parseInt(split[1], 10) if (Number.isNaN(scale) || scale < 0 || scale > precision) { throw ClickHouseRowBinaryError.headerDecodingError( 'Invalid Decimal scale', - { columnType, sourceType, precision, scale } + { columnType, sourceType, precision, scale }, ) } if (precision > 38) { @@ -256,7 +256,7 @@ export function parseEnumType({ { columnType, sourceType, - } + }, ) } // The minimal allowed Enum definition is Enum8('' = 0), i.e. 6 chars inside. @@ -266,7 +266,7 @@ export function parseEnumType({ { columnType, sourceType, - } + }, ) } @@ -290,7 +290,7 @@ export function parseEnumType({ if (names.includes(name)) { throw ClickHouseRowBinaryError.headerDecodingError( 'Duplicate Enum name', - { columnType, sourceType, name, names, indices } + { columnType, sourceType, name, names, indices }, ) } names.push(name) @@ -319,7 +319,7 @@ export function parseEnumType({ if (names.length !== indices.length) { throw ClickHouseRowBinaryError.headerDecodingError( 'Expected Enum to have the same number of names and indices', - { columnType, sourceType, names, indices } + { columnType, sourceType, names, indices }, ) } @@ -347,13 +347,13 @@ export function parseEnumType({ index, start, end, - } + }, ) } if (indices.includes(index)) { throw ClickHouseRowBinaryError.headerDecodingError( 'Duplicate Enum index', - { columnType, sourceType, index, names, indices } + { columnType, sourceType, index, names, indices }, ) } indices.push(index) @@ -413,7 +413,7 @@ export function parseTupleType({ } columnType = columnType.slice(TuplePrefix.length, -1) const elements = getElementsTypes({ columnType, sourceType }, 1).map((type) => - parseColumnType(type) + parseColumnType(type), ) return { type: 'Tuple', @@ -449,14 +449,14 @@ export function parseArrayType({ // TODO: check how many we can handle; max 10 seems more than enough. throw ClickHouseRowBinaryError.headerDecodingError( 'Expected Array to have between 1 and 10 dimensions', - { columnType } + { columnType }, ) } const value = parseColumnType(columnType) if (value.type === 'Array') { throw ClickHouseRowBinaryError.headerDecodingError( 'Unexpected Array as value type', - { columnType, sourceType } + { columnType, sourceType }, ) } return { @@ -496,7 +496,7 @@ export function parseDateTimeType({ { columnType, sourceType, - } + }, ) } } @@ -514,7 +514,7 @@ export function parseDateTime64Type({ { columnType, sourceType, - } + }, ) } const precision = parseInt(columnType[DateTime64Prefix.length], 10) @@ -525,7 +525,7 @@ export function parseDateTime64Type({ columnType, sourceType, precision, - } + }, ) } let timezone = null @@ -551,14 +551,14 @@ export function parseFixedStringType({ ) { throw ClickHouseRowBinaryError.headerDecodingError( 'Invalid FixedString type', - { columnType, sourceType } + { columnType, sourceType }, ) } const sizeBytes = parseInt(columnType.slice(FixedStringPrefix.length, -1), 10) if (Number.isNaN(sizeBytes) || sizeBytes < 1) { throw ClickHouseRowBinaryError.headerDecodingError( 'Invalid FixedString size in bytes', - { columnType, sourceType, sizeBytes } + { columnType, sourceType, sizeBytes }, ) } return { @@ -570,7 +570,7 @@ export function parseFixedStringType({ export function asNullableType( value: ParsedColumnType, - sourceType: string + sourceType: string, ): ParsedColumnNullable { if ( value.type === 'Array' || @@ -580,7 +580,7 @@ export function asNullableType( ) { throw ClickHouseRowBinaryError.headerDecodingError( `${value.type} cannot be Nullable`, - { sourceType } + { sourceType }, ) } if (value.sourceType.startsWith(NullablePrefix)) { @@ -599,7 +599,7 @@ export function asNullableType( * * Throws if parsed values are below the required minimum. */ export function getElementsTypes( { columnType, sourceType }: ParseColumnTypeParams, - minElements: number + minElements: number, ): string[] { const elements: string[] = [] /** Consider the element type parsed once we reach a comma outside of parens AND after an unescaped tick. @@ -648,7 +648,7 @@ export function getElementsTypes( if (elements.length < minElements) { throw ClickHouseRowBinaryError.headerDecodingError( 'Expected more elements in the type', - { sourceType, columnType, elements, minElements } + { sourceType, columnType, elements, minElements }, ) } return elements diff --git a/packages/client-common/src/data_formatter/row_binary/errors.ts b/packages/client-common/src/data_formatter/row_binary/errors.ts index a5092b8b..44f903c8 100644 --- a/packages/client-common/src/data_formatter/row_binary/errors.ts +++ b/packages/client-common/src/data_formatter/row_binary/errors.ts @@ -12,7 +12,7 @@ export class ClickHouseRowBinaryError extends Error { } static headerDecodingError( message: string, - args?: Record + args?: Record, ): ClickHouseRowBinaryError { return new ClickHouseRowBinaryError({ name: HeaderDecodingError, @@ -21,7 +21,7 @@ export class ClickHouseRowBinaryError extends Error { }) } static decoderNotFoundError( - col: Record + col: Record, ): ClickHouseRowBinaryError { return new ClickHouseRowBinaryError({ name: HeaderDecodingError, diff --git a/packages/client-common/src/data_formatter/row_binary/mappers.ts b/packages/client-common/src/data_formatter/row_binary/mappers.ts index 3527ba2d..014509fd 100644 --- a/packages/client-common/src/data_formatter/row_binary/mappers.ts +++ b/packages/client-common/src/data_formatter/row_binary/mappers.ts @@ -5,7 +5,7 @@ export interface RowBinaryMappers { datetime64?: ( secondsSinceOrBeforeEpoch: bigint, nanosOfSecond: number, - timezone?: string + timezone?: string, ) => T /** Decimal types with scale more than 9: Decimal64, Decimal128, Decimal256 */ decimal?: (whole: bigint, fractional: bigint) => T diff --git a/packages/client-common/src/data_formatter/row_binary/read_bytes.ts b/packages/client-common/src/data_formatter/row_binary/read_bytes.ts index a759b13b..8e45f5ee 100644 --- a/packages/client-common/src/data_formatter/row_binary/read_bytes.ts +++ b/packages/client-common/src/data_formatter/row_binary/read_bytes.ts @@ -4,7 +4,7 @@ export type DecodeResult = [T, number] // May return null since we cannot determine how many bytes we need to read in advance export function readBytesAsUnsignedLEB128( src: Buffer, - loc: number + loc: number, ): DecodeResult | null { let result = 0 let shift = 0 diff --git a/packages/client-common/src/data_formatter/row_binary/types.ts b/packages/client-common/src/data_formatter/row_binary/types.ts index 13429802..62bd553a 100644 --- a/packages/client-common/src/data_formatter/row_binary/types.ts +++ b/packages/client-common/src/data_formatter/row_binary/types.ts @@ -1,5 +1,6 @@ -import { Buffer } from 'buffer' -import { DecodeResult, readBytesAsUnsignedLEB128 } from './read_bytes' +import type { Buffer } from 'buffer' +import type { DecodeResult } from './read_bytes' +import { readBytesAsUnsignedLEB128 } from './read_bytes' export type SimpleColumnType = /** {@link SimpleTypeDecoder} */ @@ -24,18 +25,18 @@ export type SimpleColumnType = export type SimpleTypeDecoder = ( src: Buffer, - loc: number + loc: number, ) => DecodeResult | null export type DecimalTypeDecoder = ( precision: number, - scale: number + scale: number, ) => SimpleTypeDecoder export type NullableTypeDecoder = ( - baseTypeDecoder: SimpleTypeDecoder | DecimalTypeDecoder + baseTypeDecoder: SimpleTypeDecoder | DecimalTypeDecoder, ) => SimpleTypeDecoder export type ArrayTypeDecoder = ( innerDecoder: SimpleTypeDecoder, - dimensions: number + dimensions: number, ) => SimpleTypeDecoder export type TypeDecoder = | SimpleTypeDecoder @@ -48,7 +49,7 @@ export type MapTypeDecoder = ( valueDecoder: | SimpleTypeDecoder | ArrayTypeDecoder - | MapTypeDecoder + | MapTypeDecoder, ) => SimpleTypeDecoder> const DayMillis = 24 * 3600 * 1000 @@ -139,7 +140,7 @@ export class RowBinaryTypesDecoder { return [new Date(daysBeforeOrSinceEpoch * DayMillis), loc + 4] } static nullable( - baseTypeDecoder: SimpleTypeDecoder + baseTypeDecoder: SimpleTypeDecoder, ): (src: Buffer, loc: number) => DecodeResult | null { return (src: Buffer, loc: number) => { if (src.length < loc + 1) return null @@ -151,7 +152,7 @@ export class RowBinaryTypesDecoder { } } static enum8( - values: Map + values: Map, ): (src: Buffer, loc: number) => DecodeResult | null { return (src: Buffer, loc: number) => { if (src.length < loc + 1) return null @@ -161,7 +162,7 @@ export class RowBinaryTypesDecoder { } } static enum16( - values: Map + values: Map, ): (src: Buffer, loc: number) => DecodeResult | null { return (src: Buffer, loc: number) => { if (src.length < loc + 2) return null @@ -213,7 +214,7 @@ export class RowBinaryTypesDecoder { // } static decimal32( scale: number, - mapper?: (whole: number, fractional: number) => T + mapper?: (whole: number, fractional: number) => T, ): (src: Buffer, loc: number) => DecodeResult | null { const scaleMultiplier = 10 ** scale return (src: Buffer, loc: number) => { @@ -228,9 +229,9 @@ export class RowBinaryTypesDecoder { } } static decimal64( - scale: number + scale: number, ): (src: Buffer, loc: number) => DecodeResult | null { - const scaleMultiplier = BigInt(10) ** BigInt(scale) + // const scaleMultiplier = BigInt(10) ** BigInt(scale) return (src: Buffer, loc: number) => { if (src.length < loc + 8) return null const fullDecimal64 = src.readBigInt64LE(loc) @@ -249,7 +250,7 @@ export class RowBinaryTypesDecoder { | SimpleTypeDecoder | ReturnType | ReturnType>, - dimensions = 0 + dimensions = 0, ): (src: Buffer, loc: number) => DecodeResult> | null { return (src: Buffer, loc: number) => { const leb128 = readBytesAsUnsignedLEB128(src, loc) diff --git a/packages/client-node/__tests__/integration/node_stream_row_binary.test.ts b/packages/client-node/__tests__/integration/node_stream_row_binary.test.ts index 80a148c5..d15ebe51 100644 --- a/packages/client-node/__tests__/integration/node_stream_row_binary.test.ts +++ b/packages/client-node/__tests__/integration/node_stream_row_binary.test.ts @@ -63,7 +63,7 @@ describe('[Node.js] stream RowBinary', () => { BigInt('9223372036854775807'), BigInt('170141183460469231731687303715884105727'), BigInt( - '57896044618658097711785492504343953926634992332820282019728792003956564819967' + '57896044618658097711785492504343953926634992332820282019728792003956564819967', ), // BigInt('18446744073709551615'), // BigInt('340282366920938463463374607431768211455'), @@ -75,7 +75,7 @@ describe('[Node.js] stream RowBinary', () => { BigInt('-9223372036854775808'), BigInt('-170141183460469231731687303715884105728'), BigInt( - '-57896044618658097711785492504343953926634992332820282019728792003956564819968' + '-57896044618658097711785492504343953926634992332820282019728792003956564819968', ), // BigInt('120'), // BigInt('1234'), @@ -103,7 +103,7 @@ describe('[Node.js] stream RowBinary', () => { async function createTableWithData( colNameToType: string[][], insertValues: unknown[][], - testName: string + testName: string, ) { tableName = `insert_stream_row_binary_${testName}_${guid()}` const cols = colNameToType @@ -123,100 +123,3 @@ describe('[Node.js] stream RowBinary', () => { }) } }) - -const _types = [ - ['b', 'Boolean'], - ['i1', 'Int8'], - ['i2', 'Int16'], - ['i3', 'Int32'], - ['i4', 'Int64'], - // ['i5', 'Int128'], - // ['i6', 'Int256'], - ['u1', 'UInt8'], - ['u2', 'UInt16'], - ['u3', 'UInt32'], - ['u4', 'UInt64'], - // ['u5', 'UInt128'], - // ['u6', 'UInt256'], - ['s', 'String'], -] - .map(([name, type]) => `${name} ${type}`) - .join(', ') - -const _values = [ - { - id: 1, - b: true, - i1: 127, - i2: 32767, - i3: 2147483647, - i4: '9223372036854775807', - // i5: '170141183460469231731687303715884105727', - // i6: '57896044618658097711785492504343953926634992332820282019728792003956564819967', - u1: 255, - u2: 65535, - u3: 4294967295, - u4: '18446744073709551615', - // u5: '340282366920938463463374607431768211455', - // u6: '115792089237316195423570985008687907853269984665640564039457584007913129639935', - s: 'foo', - }, - { - id: 2, - b: false, - i1: -128, - i2: -32768, - i3: -2147483648, - i4: '-9223372036854775808', - // i5: '-170141183460469231731687303715884105728', - // i6: '-57896044618658097711785492504343953926634992332820282019728792003956564819968', - u1: 120, - u2: 1234, - u3: 51234, - u4: '421342', - // u5: '15324355', - // u6: '41345135123432', - s: 'bar', - }, -] - -const _assertValues = [ - [ - true, - 127, - 32767, - 2147483647, - BigInt('9223372036854775807'), - // BigInt('170141183460469231731687303715884105727'), - // BigInt( - // '57896044618658097711785492504343953926634992332820282019728792003956564819967' - // ), - 255, - 65535, - 4294967295, - BigInt('18446744073709551615'), - // BigInt('340282366920938463463374607431768211455'), - // BigInt( - // '115792089237316195423570985008687907853269984665640564039457584007913129639935' - // ), - 'foo', - ], - [ - false, - -128, - -32768, - -2147483648, - BigInt('-9223372036854775808'), - // BigInt('-170141183460469231731687303715884105728'), - // BigInt( - // '-57896044618658097711785492504343953926634992332820282019728792003956564819968' - // ), - 120, - 1234, - 51234, - BigInt('421342'), - // BigInt('15324355'), - // BigInt('41345135123432'), - 'bar', - ], -] diff --git a/packages/client-node/src/row_binary_result_set.ts b/packages/client-node/src/row_binary_result_set.ts index c4fd8ef8..15b79ab0 100644 --- a/packages/client-node/src/row_binary_result_set.ts +++ b/packages/client-node/src/row_binary_result_set.ts @@ -16,24 +16,26 @@ export interface RowBinaryStreamParams { // FIXME: remove BaseResultSet inheritance (after 1.0.0 is merged). // FIXME: add logger (after 1.0.0 is merged). -export class RowBinaryResultSet implements BaseResultSet { +export class RowBinaryResultSet + implements BaseResultSet +{ constructor( private _stream: Stream.Readable, private readonly format: DataFormat, - public readonly query_id: string + public readonly query_id: string, ) {} // FIXME: remove this (after 1.0.0 is merged). async text(): Promise { throw new Error( - `Can't call 'text()' on RowBinary result set; please use 'stream' instead` + `Can't call 'text()' on RowBinary result set; please use 'stream' instead`, ) } // FIXME: remove this (after 1.0.0 is merged). async json(): Promise { throw new Error( - `Can't call 'json()' on RowBinary result set; please use 'stream' instead` + `Can't call 'json()' on RowBinary result set; please use 'stream' instead`, ) } @@ -45,7 +47,7 @@ export class RowBinaryResultSet implements BaseResultSet { async get(params?: RowBinaryStreamParams): Promise { if (this.format !== 'RowBinary') { throw new Error( - `Can't use RowBinaryResultSet if the format is not RowBinary` + `Can't use RowBinaryResultSet if the format is not RowBinary`, ) } const result: any[] = [] @@ -97,7 +99,7 @@ export class RowBinaryResultSet implements BaseResultSet { transform( chunk: Buffer, _encoding: BufferEncoding, - callback: TransformCallback + callback: TransformCallback, ) { if (chunk.length === 0) { return callback() @@ -106,7 +108,7 @@ export class RowBinaryResultSet implements BaseResultSet { if (incompleteChunk !== undefined) { src = Buffer.concat( [incompleteChunk, chunk], - incompleteChunk.length + chunk.length + incompleteChunk.length + chunk.length, ) incompleteChunk = undefined } else { @@ -139,7 +141,6 @@ export class RowBinaryResultSet implements BaseResultSet { // measures[key] = (measures[key] || 0) + execTime // } - let lastLoc = 0 while (loc < src.length) { const row = asObject ? Object.create(protoObject) @@ -171,16 +172,15 @@ export class RowBinaryResultSet implements BaseResultSet { } loc = decodeResult[1] columnIndex++ - lastLoc = loc } } decodedRows.push(row) columnIndex = 0 } - if (loc > src.length) { - console.log(`loc > src.length, ${loc} > ${src.length}`) - } + // if (loc > src.length) { + // console.log(`loc > src.length, ${loc} > ${src.length}`) + // } if (decodedRows.length > 0) { // console.log(`pushing ${rowsToPush.length} rows`) From 684f55457cfe973aa006eea7271f017dadec25ac Mon Sep 17 00:00:00 2001 From: slvrtrn Date: Tue, 2 Apr 2024 11:45:00 +0200 Subject: [PATCH 14/14] [skip ci] Fix ResultSet instantiation --- packages/client-node/src/config.ts | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/client-node/src/config.ts b/packages/client-node/src/config.ts index 17acd78f..714e69b1 100644 --- a/packages/client-node/src/config.ts +++ b/packages/client-node/src/config.ts @@ -10,6 +10,7 @@ import { import type Stream from 'stream' import { createConnection, type TLSParams } from './connection' import { ResultSet } from './result_set' +import { RowBinaryResultSet } from './row_binary_result_set' import { NodeValuesEncoder } from './utils' export type NodeClickHouseClientConfigOptions = @@ -102,7 +103,13 @@ export const NodeConfigImpl: Required< stream: Stream.Readable, format: DataFormat, query_id: string, - ) => new ResultSet(stream, format, query_id)) as any, + ) => { + if (format === 'RowBinary') { + return new RowBinaryResultSet(stream, format, query_id) + } else { + return new ResultSet(stream, format, query_id) + } + }) as any, close_stream: async (stream) => { stream.destroy() },