diff --git a/.changeset/little-crabs-exist.md b/.changeset/little-crabs-exist.md new file mode 100644 index 0000000000..3796f6794d --- /dev/null +++ b/.changeset/little-crabs-exist.md @@ -0,0 +1,6 @@ +--- +'@finos/legend-application-data-cube': patch +'@finos/legend-data-cube': patch +--- + +datacube: enable saving and loading local file source in DataCube grid diff --git a/packages/legend-application-data-cube/src/components/builder/LegendDataCubeBuilder.tsx b/packages/legend-application-data-cube/src/components/builder/LegendDataCubeBuilder.tsx index 9f9abda7c6..0e35369527 100644 --- a/packages/legend-application-data-cube/src/components/builder/LegendDataCubeBuilder.tsx +++ b/packages/legend-application-data-cube/src/components/builder/LegendDataCubeBuilder.tsx @@ -38,7 +38,6 @@ import { import { useEffect } from 'react'; import { LegendDataCubeSettingStorageKey } from '../../__lib__/LegendDataCubeSetting.js'; import type { LegendDataCubeBuilderStore } from '../../stores/builder/LegendDataCubeBuilderStore.js'; -import { LocalFileDataCubeSource } from '../../stores/model/LocalFileDataCubeSource.js'; const LegendDataCubeBuilderHeader = observer(() => { const store = useLegendDataCubeBuilderStore(); @@ -58,11 +57,7 @@ const LegendDataCubeBuilderHeader = observer(() => { store.saverDisplay.open()} > Save DataCube diff --git a/packages/legend-application-data-cube/src/components/builder/LegendDataCubeSourceLoader.tsx b/packages/legend-application-data-cube/src/components/builder/LegendDataCubeSourceLoader.tsx new file mode 100644 index 0000000000..1291160c03 --- /dev/null +++ b/packages/legend-application-data-cube/src/components/builder/LegendDataCubeSourceLoader.tsx @@ -0,0 +1,63 @@ +/** + * Copyright (c) 2020-present, Goldman Sachs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { observer } from 'mobx-react-lite'; +import { FormButton } from '@finos/legend-data-cube'; +import { useLegendDataCubeBuilderStore } from './LegendDataCubeBuilderStoreProvider.js'; +import { LocalFileDataCubeSourceLoaderBuilderState } from '../../stores/builder/source/loader/LocalFileDataCubeSourceLoaderBuilderState.js'; +import { LocalFileDataCubeSourceLoader } from './source/loader/LocalFileDataCubeSourceLoader.js'; + +export const LegendDataCubeSourceLoader = observer(() => { + const store = useLegendDataCubeBuilderStore(); + const state = store.sourceLoader; + const sourceLoaderBuilder = state.sourceLoaderBuilder; + + return ( + <> +
+
+
+
+
+ {sourceLoaderBuilder instanceof + LocalFileDataCubeSourceLoaderBuilderState && ( + + )} +
+
+
+
+
+ state.display.close()}>Cancel + { + state + .finalize() + .catch((error) => store.alertService.alertUnhandledError(error)); + }} + > + OK + +
+ + ); +}); diff --git a/packages/legend-application-data-cube/src/components/builder/source/LocalFileDataCubeSourceBuilder.tsx b/packages/legend-application-data-cube/src/components/builder/source/LocalFileDataCubeSourceBuilder.tsx index cf4ff4eb1d..00d2b15571 100644 --- a/packages/legend-application-data-cube/src/components/builder/source/LocalFileDataCubeSourceBuilder.tsx +++ b/packages/legend-application-data-cube/src/components/builder/source/LocalFileDataCubeSourceBuilder.tsx @@ -31,7 +31,7 @@ export const LocalFileDataCubeSourceBuilder = observer( text={`Currently, support for local file comes with the following limitations: - Only CSV files are supported, but not all variants of CSV files are supported (required header row, comma delimiter, single escape quote). - Data from uploaded file will not be stored nor shared. -- DataCube created with local file source cannot be saved.`} +- DataCube from uploaded file can be stored but when loading this, you will have to reupload source data.`} />
{ + const { sourceBuilder } = props; + + return ( +
+ +
+ { + sourceBuilder.processFile(event.target.files?.[0]); + }} + className="w-full" + /> +
+ {sourceBuilder.previewText !== undefined && ( +
+ +
+ )} +
+ ); + }, +); diff --git a/packages/legend-application-data-cube/src/stores/LegendDataCubeDataCubeEngine.ts b/packages/legend-application-data-cube/src/stores/LegendDataCubeDataCubeEngine.ts index 39d186cab1..780a5c6b1f 100644 --- a/packages/legend-application-data-cube/src/stores/LegendDataCubeDataCubeEngine.ts +++ b/packages/legend-application-data-cube/src/stores/LegendDataCubeDataCubeEngine.ts @@ -194,12 +194,89 @@ export class LegendDataCubeDataCubeEngine extends DataCubeEngine { RawLocalFileQueryDataCubeSource.serialization.fromJson(value); const source = new LocalFileDataCubeSource(); source.fileName = rawSource.fileName; + source.fileFormat = rawSource.fileFormat; source.count = rawSource.count; - source.db = rawSource.db; - source.model = rawSource.model; - source.runtime = rawSource.runtime; - source.schema = rawSource.schema; - source.table = rawSource.table; + + const { schemaName, tableName, tableSpec } = + LegendDataCubeDuckDBEngine.getTableDetailsByReference( + rawSource.dbReference, + ); + + const { model, database, schema, table, runtime } = + this._synthesizeMinimalModelContext({ + schemaName, + tableName, + tableColumns: tableSpec.map((col) => { + const column = new V1_Column(); + column.name = col[0] as string; + // TODO: confirm this is in accordance to engine + // check if we have a duckdb enum mapping + // See https://duckdb.org/docs/sql/data_types/overview.html + switch (col[1] as string) { + case 'BIT': { + column.type = new V1_Bit(); + break; + } + case 'BOOLEAN': { + // TODO: understand why boolean is not present in relationalDataType + column.type = new V1_VarChar(); + break; + } + case 'DATE': { + column.type = new V1_Date(); + break; + } + case 'DECIMAL': { + column.type = new V1_Decimal(); + break; + } + case 'DOUBLE': { + column.type = new V1_Double(); + break; + } + case 'FLOAT': { + column.type = new V1_Float(); + break; + } + case 'INTEGER': { + column.type = new V1_Integer(); + break; + } + case 'TININT': { + column.type = new V1_TinyInt(); + break; + } + case 'SMALLINT': { + column.type = new V1_SmallInt(); + break; + } + case 'BIGINT': { + column.type = new V1_BigInt(); + break; + } + case 'TIMESTAMP': { + column.type = new V1_Timestamp(); + break; + } + case 'VARCHAR': { + column.type = new V1_VarChar(); + break; + } + default: { + throw new UnsupportedOperationError( + `Can't ingest local file data: failed to find matching relational data type for DuckDB type '${col[1]}' when synthesizing table definition`, + ); + } + } + return column; + }), + }); + + source.db = database.path; + source.model = model; + source.table = table.name; + source.schema = schema.name; + source.runtime = runtime.path; const query = new V1_ClassInstance(); query.type = V1_ClassInstanceType.RELATION_STORE_ACCESSOR; @@ -797,93 +874,14 @@ export class LegendDataCubeDataCubeEngine extends DataCubeEngine { } } - async ingestLocalFileData( - data: string, - format: string, - ): Promise { - const { - schema: schemaName, - table: tableName, - tableSpec, - } = await this._duckDBEngine.ingestLocalFileData(data, format); - - const { model, database, schema, table, runtime } = - this._synthesizeMinimalModelContext({ - schemaName, - tableName, - tableColumns: tableSpec.map((col) => { - const column = new V1_Column(); - column.name = col[0] as string; - // TODO: confirm this is in accordance to engine - // check if we have a duckdb enum mapping - // See https://duckdb.org/docs/sql/data_types/overview.html - switch (col[1] as string) { - case 'BIT': { - column.type = new V1_Bit(); - break; - } - case 'BOOLEAN': { - // TODO: understand why boolean is not present in relationalDataType - column.type = new V1_VarChar(); - break; - } - case 'DATE': { - column.type = new V1_Date(); - break; - } - case 'DECIMAL': { - column.type = new V1_Decimal(); - break; - } - case 'DOUBLE': { - column.type = new V1_Double(); - break; - } - case 'FLOAT': { - column.type = new V1_Float(); - break; - } - case 'INTEGER': { - column.type = new V1_Integer(); - break; - } - case 'TININT': { - column.type = new V1_TinyInt(); - break; - } - case 'SMALLINT': { - column.type = new V1_SmallInt(); - break; - } - case 'BIGINT': { - column.type = new V1_BigInt(); - break; - } - case 'TIMESTAMP': { - column.type = new V1_Timestamp(); - break; - } - case 'VARCHAR': { - column.type = new V1_VarChar(); - break; - } - default: { - throw new UnsupportedOperationError( - `Can't ingest local file data: failed to find matching relational data type for DuckDB type '${col[1]}' when synthesizing table definition`, - ); - } - } - return column; - }), - }); + async ingestLocalFileData(data: string, format: string) { + const { dbReference, columnNames } = + await this._duckDBEngine.ingestLocalFileData(data, format); + return { dbReference, columnNames }; + } - const source = new LocalFileDataCubeSource(); - source.model = model; - source.runtime = runtime.path; - source.db = database.path; - source.schema = schema.name; - source.table = table.name; - return source; + async clearLocalFileIngestData() { + await this._duckDBEngine.clearLocalFileDataIngest(); } private _synthesizeMinimalModelContext(data: { diff --git a/packages/legend-application-data-cube/src/stores/LegendDataCubeDuckDBEngine.ts b/packages/legend-application-data-cube/src/stores/LegendDataCubeDuckDBEngine.ts index 1101667283..ad17ea5432 100644 --- a/packages/legend-application-data-cube/src/stores/LegendDataCubeDuckDBEngine.ts +++ b/packages/legend-application-data-cube/src/stores/LegendDataCubeDuckDBEngine.ts @@ -30,6 +30,7 @@ import { csvStringify, guaranteeNonNullable, UnsupportedOperationError, + uuid, } from '@finos/legend-shared'; import type { CachedDataCubeSource } from '@finos/legend-data-cube'; import { Type } from 'apache-arrow'; @@ -45,9 +46,14 @@ export class LegendDataCubeDuckDBEngine { // https://duckdb.org/docs/guides/meta/describe.html private static readonly COLUMN_NAME = 'column_name'; private static readonly COLUMN_TYPE = 'column_type'; + private static readonly TABLE_NAME = 'table_name'; // Options for creating csv using papa parser: https://www.papaparse.com/docs#config private static readonly ESCAPE_CHAR = `'`; private static readonly QUOTE_CHAR = `'`; + private static dbReferenceMap: Map< + string, + { schemaName: string; tableName: string; tableSpec: unknown[][] } + > = new Map(); private _database?: duckdb.AsyncDuckDB | undefined; @@ -58,6 +64,13 @@ export class LegendDataCubeDuckDBEngine { ); } + static getTableDetailsByReference(ref: string) { + return guaranteeNonNullable( + LegendDataCubeDuckDBEngine.dbReferenceMap.get(ref), + `Can't find reference ${ref}`, + ); + } + async initialize() { // Initialize DuckDB with WASM // See: https://duckdb.org/docs/api/wasm/instantiation.html @@ -128,6 +141,7 @@ export class LegendDataCubeDuckDBEngine { } async ingestLocalFileData(data: string, format: string) { + LegendDataCubeDuckDBEngine.dbReferenceMap.clear(); const schema = LegendDataCubeDuckDBEngine.DUCKDB_DEFAULT_SCHEMA_NAME; LegendDataCubeDuckDBEngine.ingestFileTableCounter += 1; const table = `${LegendDataCubeDuckDBEngine.INGEST_TABLE_NAME_PREFIX}${LegendDataCubeDuckDBEngine.ingestFileTableCounter}`; @@ -166,7 +180,38 @@ export class LegendDataCubeDuckDBEngine { ]); await connection.close(); - return { schema, table, tableSpec }; + const ref = uuid(); + LegendDataCubeDuckDBEngine.dbReferenceMap.set(ref, { + schemaName: schema, + tableName: table, + tableSpec, + }); + + return { + dbReference: ref, + columnNames: tableSpec.map((spec) => spec[0] as string), + }; + } + + async clearLocalFileDataIngest() { + const connection = await this.database.connect(); + const tablesResult = await connection.query(` + SELECT table_name + FROM information_schema.tables + WHERE table_schema = '${LegendDataCubeDuckDBEngine.DUCKDB_DEFAULT_SCHEMA_NAME}' + AND table_name LIKE '${LegendDataCubeDuckDBEngine.INGEST_TABLE_NAME_PREFIX}%'`); // Filter tables starting with the prefix + + const tableNames = tablesResult + .toArray() + .map((row) => row[LegendDataCubeDuckDBEngine.TABLE_NAME] as string); + + await Promise.all( + tableNames.map((table) => + connection.query(` + DROP TABLE IF EXISTS "${LegendDataCubeDuckDBEngine.DUCKDB_DEFAULT_SCHEMA_NAME}.${table}"; + `), + ), + ); } async runSQLQuery(sql: string) { diff --git a/packages/legend-application-data-cube/src/stores/builder/LegendDataCubeBuilderStore.tsx b/packages/legend-application-data-cube/src/stores/builder/LegendDataCubeBuilderStore.tsx index a0fd9e0ec3..d0fb94131a 100644 --- a/packages/legend-application-data-cube/src/stores/builder/LegendDataCubeBuilderStore.tsx +++ b/packages/legend-application-data-cube/src/stores/builder/LegendDataCubeBuilderStore.tsx @@ -41,6 +41,7 @@ import { ActionState, assertErrorThrown, formatDate, + guaranteeNonNullable, isString, uuid, } from '@finos/legend-shared'; @@ -60,6 +61,7 @@ import { LegendDataCubeBlockingWindowState } from '../../components/LegendDataCu import { LegendDataCubeDeleteConfirmation } from '../../components/builder/LegendDataCubeDeleteConfirmation.js'; import { LegendDataCubeAbout } from '../../components/builder/LegendDataCubeBuilder.js'; import { LegendDataCubeSourceViewer } from '../../components/builder/LegendDataCubeSourceViewer.js'; +import { LegendDataCubeSourceLoaderState } from './LegendDataCubeSourceLoaderState.js'; export class LegendDataCubeBuilderState { readonly uuid = uuid(); @@ -134,6 +136,7 @@ export class LegendDataCubeBuilderStore { readonly loadState = ActionState.create(); readonly loader: LegendDataCubeLoaderState; builder?: LegendDataCubeBuilderState | undefined; + readonly sourceLoader: LegendDataCubeSourceLoaderState; readonly sourceViewerDisplay: DisplayState; private passedFirstLoad = false; @@ -171,6 +174,7 @@ export class LegendDataCubeBuilderStore { this.creator = new LegendDataCubeCreatorState(this); this.loader = new LegendDataCubeLoaderState(this); + this.sourceLoader = new LegendDataCubeSourceLoaderState(this); this.saverDisplay = new LegendDataCubeBlockingWindowState( 'Save DataCube', () => , @@ -295,6 +299,27 @@ export class LegendDataCubeBuilderStore { const specification = DataCubeSpecification.serialization.fromJson( persistentDataCube.content, ); + + if ( + !this.saveState.hasSucceeded && + this.sourceLoader.isPartialSouce(specification.source._type as string) + ) { + this.sourceLoader.changeSourceBuilder( + specification.source._type as string, + ); + this.sourceLoader.setSource(specification.source); + this.sourceLoader.display.open(); + await new Promise((resolve) => { + const checkIfClosed = setInterval(() => { + if (!this.sourceLoader.display.isOpen) { + clearInterval(checkIfClosed); + resolve(); + } + }, 100); + }); + specification.source = guaranteeNonNullable(this.sourceLoader.source); + } + this.setBuilder( new LegendDataCubeBuilderState(specification, persistentDataCube), ); diff --git a/packages/legend-application-data-cube/src/stores/builder/LegendDataCubeSourceLoaderState.tsx b/packages/legend-application-data-cube/src/stores/builder/LegendDataCubeSourceLoaderState.tsx new file mode 100644 index 0000000000..c8153fd6a8 --- /dev/null +++ b/packages/legend-application-data-cube/src/stores/builder/LegendDataCubeSourceLoaderState.tsx @@ -0,0 +1,120 @@ +/** + * Copyright (c) 2020-present, Goldman Sachs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import { + ActionState, + assertErrorThrown, + UnsupportedOperationError, + type PlainObject, +} from '@finos/legend-shared'; +import { makeObservable, observable, action } from 'mobx'; +import type { LegendDataCubeBuilderStore } from './LegendDataCubeBuilderStore.js'; +import { + DEFAULT_TOOL_PANEL_WINDOW_CONFIG, + type DisplayState, + type DataCubeAlertService, +} from '@finos/legend-data-cube'; +import { LegendDataCubeSourceLoader } from '../../components/builder/LegendDataCubeSourceLoader.js'; +import type { LegendDataCubeDataCubeEngine } from '../LegendDataCubeDataCubeEngine.js'; +import type { LegendDataCubeApplicationStore } from '../LegendDataCubeBaseStore.js'; +import { type LegendDataCubeSourceLoaderBuilderState } from './source/loader/LegendDataCubeSourceLoaderBuilderState.js'; +import { LocalFileDataCubeSourceLoaderBuilderState } from './source/loader/LocalFileDataCubeSourceLoaderBuilderState.js'; +import { LOCAL_FILE_QUERY_DATA_CUBE_SOURCE_TYPE } from '../model/LocalFileDataCubeSource.js'; + +export class LegendDataCubeSourceLoaderState { + private readonly _application: LegendDataCubeApplicationStore; + private readonly _engine: LegendDataCubeDataCubeEngine; + private readonly _alertService: DataCubeAlertService; + + source: PlainObject | undefined; + + readonly display: DisplayState; + readonly searchState = ActionState.create(); + readonly finalizeState = ActionState.create(); + + sourceLoaderBuilder: LegendDataCubeSourceLoaderBuilderState; + + constructor(store: LegendDataCubeBuilderStore) { + makeObservable(this, { + source: observable, + setSource: action, + }); + + this._application = store.application; + this._engine = store.engine; + this._alertService = store.alertService; + this.sourceLoaderBuilder = this.createSourceLoaderBuilder( + LOCAL_FILE_QUERY_DATA_CUBE_SOURCE_TYPE, + ); + + this.display = store.layoutService.newDisplay( + 'Reupload Source Data', + () => , + { + ...DEFAULT_TOOL_PANEL_WINDOW_CONFIG, + width: 500, + minWidth: 500, + }, + ); + } + + setSource(source: PlainObject) { + this.source = source; + } + + isPartialSouce(type: string): boolean { + if (type === LOCAL_FILE_QUERY_DATA_CUBE_SOURCE_TYPE) { + return true; + } + return false; + } + + changeSourceBuilder(type: string): void { + this.sourceLoaderBuilder = this.createSourceLoaderBuilder(type); + } + + private createSourceLoaderBuilder( + type: string, + ): LegendDataCubeSourceLoaderBuilderState { + // We can implement this as a switch when + switch (type) { + case LOCAL_FILE_QUERY_DATA_CUBE_SOURCE_TYPE: + return new LocalFileDataCubeSourceLoaderBuilderState( + this._application, + this._engine, + ); + default: + throw new UnsupportedOperationError( + `Can't create source builder for unsupported type '${type}'`, + ); + } + } + + async finalize() { + try { + this.finalizeState.inProgress(); + this.sourceLoaderBuilder.validateSourceData(this.source); + this.source = await this.sourceLoaderBuilder.generateSourceData(); + this.display.close(); + this.finalizeState.pass(); + } catch (error) { + assertErrorThrown(error); + this._alertService.alertError(error, { + message: `DataCube Load Failure: ${error.message}`, + }); + this.finalizeState.fail(); + } + } +} diff --git a/packages/legend-application-data-cube/src/stores/builder/source/LocalFileDataCubeSourceBuilderState.ts b/packages/legend-application-data-cube/src/stores/builder/source/LocalFileDataCubeSourceBuilderState.ts index b04d5cb743..fe4e7e8017 100644 --- a/packages/legend-application-data-cube/src/stores/builder/source/LocalFileDataCubeSourceBuilderState.ts +++ b/packages/legend-application-data-cube/src/stores/builder/source/LocalFileDataCubeSourceBuilderState.ts @@ -17,7 +17,7 @@ import { ActionState, csvStringify, - guaranteeType, + guaranteeNonNullable, IllegalStateError, parseCSVFile, type PlainObject, @@ -30,7 +30,6 @@ import type { LegendDataCubeApplicationStore } from '../../LegendDataCubeBaseSto import { action, makeObservable, observable } from 'mobx'; import type { LegendDataCubeDataCubeEngine } from '../../LegendDataCubeDataCubeEngine.js'; import { - LocalFileDataCubeSource, LocalFileDataCubeSourceFormat, RawLocalFileQueryDataCubeSource, } from '../../model/LocalFileDataCubeSource.js'; @@ -39,7 +38,7 @@ export class LocalFileDataCubeSourceBuilderState extends LegendDataCubeSourceBui readonly processState = ActionState.create(); fileName?: string | undefined; - fileFormat?: string | undefined; + fileFormat?: LocalFileDataCubeSourceFormat | undefined; // NOTE: type string is suitable for CSV/Excel, etc. but will not be appropriate // for other format that we want to support, e.g. arrow/parquet fileData?: string | undefined; @@ -74,7 +73,7 @@ export class LocalFileDataCubeSourceBuilderState extends LegendDataCubeSourceBui this.fileName = fileName; } - setFileFormat(format: string | undefined) { + setFileFormat(format: LocalFileDataCubeSourceFormat | undefined) { this.fileFormat = format; } @@ -114,7 +113,7 @@ export class LocalFileDataCubeSourceBuilderState extends LegendDataCubeSourceBui csvStringify(result.data, { escapeChar: `'`, quoteChar: `'` }), ); this.setFileName(fileName); - this.setFileFormat(fileFormat); + this.setFileFormat(LocalFileDataCubeSourceFormat.CSV); this.setRowCount(result.data.length); this.setPreviewText( csvStringify(result.data.slice(0, 100), { @@ -160,19 +159,16 @@ export class LocalFileDataCubeSourceBuilderState extends LegendDataCubeSourceBui ); } - const source = guaranteeType( + const tableDetails = guaranteeNonNullable( await this._engine.ingestLocalFileData(this.fileData, this.fileFormat), - LocalFileDataCubeSource, - `Can't generate data source`, + `Can't generate reference for local file source`, ); const rawSource = new RawLocalFileQueryDataCubeSource(); - rawSource.count = this.rowCount; rawSource.fileName = this.fileName; - rawSource.db = source.db; - rawSource.model = source.model; - rawSource.schema = source.schema; - rawSource.table = source.table; - rawSource.runtime = source.runtime; + rawSource.fileFormat = this.fileFormat; + rawSource.dbReference = tableDetails.dbReference; + rawSource.columnNames = tableDetails.columnNames; + rawSource.count = this.rowCount; return RawLocalFileQueryDataCubeSource.serialization.toJson(rawSource); } diff --git a/packages/legend-application-data-cube/src/stores/builder/source/loader/LegendDataCubeSourceLoaderBuilderState.ts b/packages/legend-application-data-cube/src/stores/builder/source/loader/LegendDataCubeSourceLoaderBuilderState.ts new file mode 100644 index 0000000000..cc5eb5197f --- /dev/null +++ b/packages/legend-application-data-cube/src/stores/builder/source/loader/LegendDataCubeSourceLoaderBuilderState.ts @@ -0,0 +1,38 @@ +/** + * Copyright (c) 2020-present, Goldman Sachs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { type PlainObject } from '@finos/legend-shared'; +import type { LegendDataCubeApplicationStore } from '../../../LegendDataCubeBaseStore.js'; +import type { LegendDataCubeDataCubeEngine } from '../../../LegendDataCubeDataCubeEngine.js'; + +export abstract class LegendDataCubeSourceLoaderBuilderState { + protected readonly _application: LegendDataCubeApplicationStore; + protected readonly _engine: LegendDataCubeDataCubeEngine; + + constructor( + application: LegendDataCubeApplicationStore, + engine: LegendDataCubeDataCubeEngine, + ) { + this._application = application; + this._engine = engine; + } + + abstract get isValid(): boolean; + + abstract generateSourceData(): Promise; + + abstract validateSourceData(source: PlainObject | undefined): boolean; +} diff --git a/packages/legend-application-data-cube/src/stores/builder/source/loader/LocalFileDataCubeSourceLoaderBuilderState.ts b/packages/legend-application-data-cube/src/stores/builder/source/loader/LocalFileDataCubeSourceLoaderBuilderState.ts new file mode 100644 index 0000000000..b27df4356f --- /dev/null +++ b/packages/legend-application-data-cube/src/stores/builder/source/loader/LocalFileDataCubeSourceLoaderBuilderState.ts @@ -0,0 +1,211 @@ +/** + * Copyright (c) 2020-present, Goldman Sachs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { + ActionState, + csvStringify, + guaranteeNonNullable, + IllegalStateError, + parseCSVFile, + type PlainObject, +} from '@finos/legend-shared'; +import { makeObservable, observable, action } from 'mobx'; +import type { LegendDataCubeApplicationStore } from '../../../LegendDataCubeBaseStore.js'; +import type { LegendDataCubeDataCubeEngine } from '../../../LegendDataCubeDataCubeEngine.js'; +import { + LocalFileDataCubeSourceFormat, + RawLocalFileQueryDataCubeSource, +} from '../../../model/LocalFileDataCubeSource.js'; +import { LegendDataCubeSourceLoaderBuilderState } from './LegendDataCubeSourceLoaderBuilderState.js'; + +export class LocalFileDataCubeSourceLoaderBuilderState extends LegendDataCubeSourceLoaderBuilderState { + readonly processState = ActionState.create(); + + fileName?: string | undefined; + fileFormat?: LocalFileDataCubeSourceFormat | undefined; + // NOTE: type string is suitable for CSV/Excel, etc. but will not be appropriate + // for other format that we want to support, e.g. arrow/parquet + fileData?: string | undefined; + previewText?: string | undefined; + rowCount?: number | undefined; + columnNames?: string[] | undefined; + + constructor( + application: LegendDataCubeApplicationStore, + engine: LegendDataCubeDataCubeEngine, + ) { + super(application, engine); + + makeObservable(this, { + fileName: observable, + setFileName: action, + + columnNames: observable, + setColumnNames: action, + + fileFormat: observable, + setFileFormat: action, + + fileData: observable, + setFileData: action, + + previewText: observable, + setPreviewText: action, + + rowCount: observable, + setRowCount: action, + }); + } + + setFileName(fileName: string | undefined) { + this.fileName = fileName; + } + + setColumnNames(columnNames: string[] | undefined) { + this.columnNames = columnNames; + } + + setFileFormat(format: LocalFileDataCubeSourceFormat | undefined) { + this.fileFormat = format; + } + + setFileData(data: string | undefined) { + this.fileData = data; + } + + setRowCount(count: number | undefined) { + this.rowCount = count; + } + + setPreviewText(text: string | undefined) { + this.previewText = text; + } + + processFile(file: File | undefined) { + this.setFileName(undefined); + this.setColumnNames(undefined); + this.setFileFormat(undefined); + this.setFileData(undefined); + this.setRowCount(undefined); + this.setPreviewText(undefined); + + if (!file) { + return; + } + + this.processState.inProgress(); + + const fileName = file.name; + const fileFormat = fileName.split('.').pop(); + + switch (fileFormat?.toLowerCase()) { + case LocalFileDataCubeSourceFormat.CSV.toLowerCase(): { + parseCSVFile(file, { + complete: (result) => { + this.setFileData( + csvStringify(result.data, { escapeChar: `'`, quoteChar: `'` }), + ); + this.setColumnNames( + Object.keys(result.data.at(0) as object).filter( + (key) => key !== '', + ), + ); + this.setFileName(fileName); + this.setFileFormat(LocalFileDataCubeSourceFormat.CSV); + this.setRowCount(result.data.length); + this.setPreviewText( + csvStringify(result.data.slice(0, 100), { + escapeChar: `'`, + quoteChar: `'`, + }), + ); + }, + header: true, + dynamicTyping: false, + skipEmptyLines: true, + }); + break; + } + default: { + this.processState.complete(); + throw new IllegalStateError( + `Can't process file with format '${fileFormat}'`, + ); + } + } + + this.processState.complete(); + } + + override get isValid(): boolean { + return Boolean(this.fileData); + } + + override async generateSourceData(): Promise { + if ( + !this.fileData || + !this.fileName || + !this.fileFormat || + this.rowCount === undefined + ) { + throw new IllegalStateError( + `Can't generate source data: file data and information is not set`, + ); + } + + await this._engine.clearLocalFileIngestData(); + + const tableDetails = guaranteeNonNullable( + await this._engine.ingestLocalFileData(this.fileData, this.fileFormat), + `Can't generate reference for local file source`, + ); + // TODO: might have to store columnNames for validation purpose + const rawSource = new RawLocalFileQueryDataCubeSource(); + rawSource.fileName = this.fileName; + rawSource.fileFormat = this.fileFormat; + rawSource.dbReference = tableDetails.dbReference; + rawSource.columnNames = tableDetails.columnNames; + rawSource.count = this.rowCount; + + return RawLocalFileQueryDataCubeSource.serialization.toJson(rawSource); + } + + override validateSourceData(source: PlainObject) { + const deserializeSource = + RawLocalFileQueryDataCubeSource.serialization.fromJson(source); + const intersectingColumns = guaranteeNonNullable( + this.columnNames?.filter((col) => + deserializeSource.columnNames.includes(col), + ), + ); + if (deserializeSource.fileName !== this.fileName) { + throw new Error( + `File name mismatch: Expected ${deserializeSource.fileName}, got ${this.fileName}`, + ); + } + if (deserializeSource.fileFormat !== this.fileFormat) { + throw new Error( + `File format mismatch: Expected ${deserializeSource.fileFormat}, got ${this.fileFormat}`, + ); + } + if (intersectingColumns.length !== deserializeSource.columnNames.length) { + throw new Error( + `Columns mismatch: Expected [${deserializeSource.columnNames.join(',')}], got [${this.columnNames?.join(',')}]`, + ); + } + return true; + } +} diff --git a/packages/legend-application-data-cube/src/stores/model/LocalFileDataCubeSource.ts b/packages/legend-application-data-cube/src/stores/model/LocalFileDataCubeSource.ts index 5bcb77232c..c13597911e 100644 --- a/packages/legend-application-data-cube/src/stores/model/LocalFileDataCubeSource.ts +++ b/packages/legend-application-data-cube/src/stores/model/LocalFileDataCubeSource.ts @@ -15,13 +15,12 @@ */ import { DataCubeSource } from '@finos/legend-data-cube'; -import { type V1_PureModelContextData } from '@finos/legend-graph'; import { SerializationFactory, usingConstantValueSchema, type PlainObject, } from '@finos/legend-shared'; -import { createModelSchema, primitive, raw } from 'serializr'; +import { createModelSchema, list, primitive } from 'serializr'; export const LOCAL_FILE_QUERY_DATA_CUBE_SOURCE_TYPE = 'localFile'; @@ -31,7 +30,7 @@ export enum LocalFileDataCubeSourceFormat { } export class LocalFileDataCubeSource extends DataCubeSource { - model!: PlainObject; + model!: PlainObject; runtime!: string; db!: string; schema!: string; @@ -42,26 +41,20 @@ export class LocalFileDataCubeSource extends DataCubeSource { } export class RawLocalFileQueryDataCubeSource { - model!: PlainObject; - runtime!: string; - db!: string; - schema!: string; - table!: string; - count!: number; fileName!: string; fileFormat!: LocalFileDataCubeSourceFormat; + dbReference!: string; + count!: number; + columnNames!: string[]; static readonly serialization = new SerializationFactory( createModelSchema(RawLocalFileQueryDataCubeSource, { _type: usingConstantValueSchema(LOCAL_FILE_QUERY_DATA_CUBE_SOURCE_TYPE), - count: primitive(), - db: primitive(), fileFormat: primitive(), fileName: primitive(), - model: raw(), - runtime: primitive(), - schema: primitive(), - table: primitive(), + count: primitive(), + dbReference: primitive(), + columnNames: list(primitive()), }), ); } diff --git a/packages/legend-data-cube/src/stores/core/model/CachedDataCubeSource.ts b/packages/legend-data-cube/src/stores/core/model/CachedDataCubeSource.ts index 740fdff164..198a7b4ce4 100644 --- a/packages/legend-data-cube/src/stores/core/model/CachedDataCubeSource.ts +++ b/packages/legend-data-cube/src/stores/core/model/CachedDataCubeSource.ts @@ -14,12 +14,11 @@ * limitations under the License. */ -import type { V1_PureModelContextData } from '@finos/legend-graph'; import { DataCubeSource } from './DataCubeSource.js'; import type { PlainObject } from '@finos/legend-shared'; export class CachedDataCubeSource extends DataCubeSource { - model!: PlainObject; + model!: PlainObject; runtime!: string; db!: string; schema!: string;