From 5622f0336c5369952dbf012b8a3f5a68454a80ed Mon Sep 17 00:00:00 2001 From: gs-gunjan Date: Wed, 12 Feb 2025 16:04:09 +0530 Subject: [PATCH] datacube: fixing caching for csv --- .changeset/tidy-ads-judge.md | 6 ++ .../src/stores/LegendDataCubeCacheManager.ts | 67 +++++++++++++------ .../src/format/FormatterUtils.ts | 8 ++- 3 files changed, 59 insertions(+), 22 deletions(-) create mode 100644 .changeset/tidy-ads-judge.md diff --git a/.changeset/tidy-ads-judge.md b/.changeset/tidy-ads-judge.md new file mode 100644 index 0000000000..eba4897e82 --- /dev/null +++ b/.changeset/tidy-ads-judge.md @@ -0,0 +1,6 @@ +--- +'@finos/legend-application-data-cube': patch +'@finos/legend-shared': patch +--- + +Fixing Caching for CSV diff --git a/packages/legend-application-data-cube/src/stores/LegendDataCubeCacheManager.ts b/packages/legend-application-data-cube/src/stores/LegendDataCubeCacheManager.ts index 03a6dc08a2..2916ae05ad 100644 --- a/packages/legend-application-data-cube/src/stores/LegendDataCubeCacheManager.ts +++ b/packages/legend-application-data-cube/src/stores/LegendDataCubeCacheManager.ts @@ -27,6 +27,7 @@ import { } from '@finos/legend-graph'; import { assertNonNullable, + csvStringify, guaranteeNonNullable, UnsupportedOperationError, } from '@finos/legend-shared'; @@ -92,34 +93,62 @@ export class LegendDataCubeDataCubeCacheManager { const connection = await this.database.connect(); - const columnString = result.builder.columns - .map((col) => col.name) - .join(','); - - const dataString: string[] = [columnString]; - - result.result.rows.forEach((row) => { - const updatedRows = row.values.map((val) => { - if (val !== null && typeof val === 'string') { - return `'${val.replaceAll(`'`, `''`)}'`; - } else if (val === null) { - return `NULL`; + const columns: string[] = []; + const columnNames: string[] = []; + result.builder.columns.forEach((col) => { + let colType: string; + switch (col.type as string) { + case PRIMITIVE_TYPE.BOOLEAN: { + colType = 'BOOLEAN'; + break; + } + case PRIMITIVE_TYPE.INTEGER: { + colType = 'INTEGER'; + break; } - return val; - }); - dataString.push(`${updatedRows.join(',')}`); + case PRIMITIVE_TYPE.NUMBER: + case PRIMITIVE_TYPE.DECIMAL: + case PRIMITIVE_TYPE.FLOAT: { + colType = 'FLOAT'; + break; + } + // We don't use type DATE because DuckDB will automatically convert it to a TIMESTAMP + case PRIMITIVE_TYPE.STRICTDATE: + case PRIMITIVE_TYPE.DATETIME: + case PRIMITIVE_TYPE.DATE: { + colType = 'VARCHAR'; + break; + } + case PRIMITIVE_TYPE.STRING: { + colType = 'VARCHAR'; + break; + } + default: { + throw new UnsupportedOperationError( + `Can't initialize cache: failed to find matching DuckDB type for Pure type '${col.type}'`, + ); + } + } + columns.push(`"${col.name}" ${colType}`); + columnNames.push(col.name); }); - const csvString = dataString.join('\n'); + const CREATE_TABLE_SQL = `CREATE TABLE ${schema}.${table} (${columns.join(',')})`; + await connection.query(CREATE_TABLE_SQL); + + const data = result.result.rows.map((row) => row.values); + + const csv = csvStringify([columnNames, ...data], { + escapeChar: `'`, + quoteChar: `'`, + }); - await this._database?.registerFileText(csvFileName, csvString); + await this._database?.registerFileText(csvFileName, csv); await connection.insertCSVFromPath(csvFileName, { schema: schema, name: table, create: false, - header: true, - detect: true, escape: `'`, quote: `'`, delimiter: ',', diff --git a/packages/legend-shared/src/format/FormatterUtils.ts b/packages/legend-shared/src/format/FormatterUtils.ts index 095177880e..8cd3b1d51c 100644 --- a/packages/legend-shared/src/format/FormatterUtils.ts +++ b/packages/legend-shared/src/format/FormatterUtils.ts @@ -21,7 +21,7 @@ import { parse as losslessParse, isSafeNumber as lossIsSafeNumber, } from 'lossless-json'; -import CSVParser from 'papaparse'; +import CSVParser, { type UnparseConfig } from 'papaparse'; import { assertNonNullable } from '../error/AssertionUtils.js'; export const capitalize = (value: string): string => @@ -152,8 +152,10 @@ export const parseCSVString = (value: string): string[] | undefined => { } }; -export const csvStringify = (value: unknown[]): string => - CSVParser.unparse(value); +export const csvStringify = ( + value: unknown[], + config?: UnparseConfig, +): string => CSVParser.unparse(value, config); /** * One very common use case is that we get the JSON as response from the server than we will convert this to a string and persist