diff --git a/lib/include/duckdb/web/io/web_filesystem.h b/lib/include/duckdb/web/io/web_filesystem.h index 71a1ef780..96638f992 100644 --- a/lib/include/duckdb/web/io/web_filesystem.h +++ b/lib/include/duckdb/web/io/web_filesystem.h @@ -209,6 +209,8 @@ class WebFileSystem : public duckdb::FileSystem { DataBuffer file_buffer); /// Try to drop a specific file bool TryDropFile(std::string_view file_name); + /// drop a specific file + void DropFile(std::string_view file_name); /// Drop all files without references (including buffers) void DropDanglingFiles(); /// Configure file statistics diff --git a/lib/js-stubs.js b/lib/js-stubs.js index 2371d1e93..86a8cfbfc 100644 --- a/lib/js-stubs.js +++ b/lib/js-stubs.js @@ -15,6 +15,10 @@ addToLibrary({ duckdb_web_fs_file_sync: function (fileId) { return globalThis.DUCKDB_RUNTIME.syncFile(Module, fileId); }, + duckdb_web_fs_file_drop_file__sig: 'vpi', + duckdb_web_fs_file_drop_file: function (fileName, fileNameLen) { + return globalThis.DUCKDB_RUNTIME.dropFile(Module, fileName, fileNameLen); + }, duckdb_web_fs_file_close__sig: 'vi', duckdb_web_fs_file_close: function (fileId) { return globalThis.DUCKDB_RUNTIME.closeFile(Module, fileId); diff --git a/lib/src/io/web_filesystem.cc b/lib/src/io/web_filesystem.cc index 8faaa8ca5..aa6cdfa28 100644 --- a/lib/src/io/web_filesystem.cc +++ b/lib/src/io/web_filesystem.cc @@ -117,6 +117,7 @@ RT_FN(void duckdb_web_fs_file_close(size_t file_id), { auto &infos = GetLocalState(); infos.handles.erase(file_id); }); +RT_FN(void duckdb_web_fs_file_drop_file(const char *fileName, size_t pathLen), {}); RT_FN(void duckdb_web_fs_file_truncate(size_t file_id, double new_size), { GetOrOpen(file_id).Truncate(new_size); }); RT_FN(time_t duckdb_web_fs_file_get_last_modified_time(size_t file_id), { auto &file = GetOrOpen(file_id); @@ -455,6 +456,7 @@ void WebFileSystem::DropDanglingFiles() { for (auto &[file_id, file] : files_by_id_) { if (file->handle_count_ == 0) { files_by_name_.erase(file->file_name_); + DropFile(file->file_name_); if (file->data_url_.has_value()) { files_by_url_.erase(file->data_url_.value()); } @@ -483,6 +485,13 @@ bool WebFileSystem::TryDropFile(std::string_view file_name) { return false; } +/// drop a file +void WebFileSystem::DropFile(std::string_view file_name) { + DEBUG_TRACE(); + std::string fileNameS = std::string{file_name}; + duckdb_web_fs_file_drop_file(fileNameS.c_str(), fileNameS.size()); +} + /// Write the global filesystem info rapidjson::Value WebFileSystem::WriteGlobalFileInfo(rapidjson::Document &doc, uint32_t cache_epoch) { DEBUG_TRACE(); diff --git a/lib/src/webdb.cc b/lib/src/webdb.cc index 981e5d618..23639047c 100644 --- a/lib/src/webdb.cc +++ b/lib/src/webdb.cc @@ -912,18 +912,29 @@ arrow::Status WebDB::RegisterFileBuffer(std::string_view file_name, std::unique_ /// Drop all files arrow::Status WebDB::DropFiles() { file_page_buffer_->DropDanglingFiles(); - pinned_web_files_.clear(); + std::vector files_to_drop; + for (const auto& [key, handle] : pinned_web_files_) { + files_to_drop.push_back(handle->GetName()); + } + for (const auto& fileName : files_to_drop) { + arrow::Status status = DropFile(fileName); + if (!status.ok()) { + return arrow::Status::Invalid("Failed to drop file: " + fileName); + } + } if (auto fs = io::WebFileSystem::Get()) { fs->DropDanglingFiles(); } return arrow::Status::OK(); } /// Drop a file -arrow::Status WebDB::DropFile(std::string_view file_name) { - file_page_buffer_->TryDropFile(file_name); - pinned_web_files_.erase(file_name); +arrow::Status WebDB::DropFile(std::string_view fileName) { + file_page_buffer_->TryDropFile(fileName); + pinned_web_files_.erase(fileName); if (auto fs = io::WebFileSystem::Get()) { - if (!fs->TryDropFile(file_name)) { + if (fs->TryDropFile(fileName)) { + fs->DropFile(fileName); + } else { return arrow::Status::Invalid("file is in use"); } } diff --git a/packages/duckdb-wasm/src/bindings/bindings_base.ts b/packages/duckdb-wasm/src/bindings/bindings_base.ts index 571f9fe48..af940318e 100644 --- a/packages/duckdb-wasm/src/bindings/bindings_base.ts +++ b/packages/duckdb-wasm/src/bindings/bindings_base.ts @@ -473,7 +473,11 @@ export abstract class DuckDBBindingsBase implements DuckDBBindings { if (protocol === DuckDBDataProtocol.BROWSER_FSACCESS && handle instanceof FileSystemFileHandle) { // handle is an async handle, should convert to sync handle const fileHandle: FileSystemFileHandle = handle as any; - handle = (await fileHandle.createSyncAccessHandle()) as any; + try { + handle = (await fileHandle.createSyncAccessHandle()) as any; + } catch (e: any) { + throw new Error( e.message + ":" + name ); + } } const [s, d, n] = callSRet( this.mod, diff --git a/packages/duckdb-wasm/src/bindings/runtime.ts b/packages/duckdb-wasm/src/bindings/runtime.ts index cb501c3a6..4bc360db3 100644 --- a/packages/duckdb-wasm/src/bindings/runtime.ts +++ b/packages/duckdb-wasm/src/bindings/runtime.ts @@ -140,6 +140,7 @@ export interface DuckDBRuntime { openFile(mod: DuckDBModule, fileId: number, flags: FileFlags): void; syncFile(mod: DuckDBModule, fileId: number): void; closeFile(mod: DuckDBModule, fileId: number): void; + dropFile(mod: DuckDBModule, fileNamePtr: number, fileNameLen:number): void; getLastFileModificationTime(mod: DuckDBModule, fileId: number): number; truncateFile(mod: DuckDBModule, fileId: number, newSize: number): void; readFile(mod: DuckDBModule, fileId: number, buffer: number, bytes: number, location: number): number; @@ -155,6 +156,7 @@ export interface DuckDBRuntime { checkFile(mod: DuckDBModule, pathPtr: number, pathLen: number): boolean; removeFile(mod: DuckDBModule, pathPtr: number, pathLen: number): void; + // Prepare a file handle that could only be acquired aschronously prepareDBFileHandle?: (path: string, protocol: DuckDBDataProtocol) => Promise; // Call a scalar UDF function @@ -177,6 +179,7 @@ export const DEFAULT_RUNTIME: DuckDBRuntime = { openFile: (_mod: DuckDBModule, _fileId: number, flags: FileFlags): void => {}, syncFile: (_mod: DuckDBModule, _fileId: number): void => {}, closeFile: (_mod: DuckDBModule, _fileId: number): void => {}, + dropFile: (_mod: DuckDBModule, _fileNamePtr: number, _fileNameLen:number): void => {}, getLastFileModificationTime: (_mod: DuckDBModule, _fileId: number): number => { return 0; }, diff --git a/packages/duckdb-wasm/src/bindings/runtime_browser.ts b/packages/duckdb-wasm/src/bindings/runtime_browser.ts index ef01d6b4c..8e46955fe 100644 --- a/packages/duckdb-wasm/src/bindings/runtime_browser.ts +++ b/packages/duckdb-wasm/src/bindings/runtime_browser.ts @@ -1,5 +1,5 @@ -import { StatusCode } from '../status'; -import { addS3Headers, getHTTPUrl } from '../utils'; +import {StatusCode} from '../status'; +import {addS3Headers, getHTTPUrl} from '../utils'; import { callSRet, @@ -23,7 +23,7 @@ export const BROWSER_RUNTIME: DuckDBRuntime & { _files: Map; _fileInfoCache: Map; _globalFileInfo: DuckDBGlobalFileInfo | null; - _preparedHandles: Record; + _preparedHandles: Record; getFileInfo(mod: DuckDBModule, fileId: number): DuckDBFileInfo | null; getGlobalFileInfo(mod: DuckDBModule): DuckDBGlobalFileInfo | null; @@ -93,7 +93,7 @@ export const BROWSER_RUNTIME: DuckDBRuntime & { if (info == null) { return null; } - BROWSER_RUNTIME._globalFileInfo = { ...info, blob: null } as DuckDBGlobalFileInfo; + BROWSER_RUNTIME._globalFileInfo = { ...info, blob: null} as DuckDBGlobalFileInfo; return BROWSER_RUNTIME._globalFileInfo; } catch (e: any) { @@ -137,13 +137,17 @@ export const BROWSER_RUNTIME: DuckDBRuntime & { } throw e; }); - const handle = await fileHandle.createSyncAccessHandle(); - BROWSER_RUNTIME._preparedHandles[path] = handle; - return { - path, - handle, - fromCached: false, - }; + try { + const handle = await fileHandle.createSyncAccessHandle(); + BROWSER_RUNTIME._preparedHandles[path] = handle; + return { + path, + handle, + fromCached: false, + }; + } catch (e: any) { + throw new Error(e.message + ":" + name); + } }; const result: PreparedDBFileHandle[] = []; for (const filePath of filePaths) { @@ -485,9 +489,25 @@ export const BROWSER_RUNTIME: DuckDBRuntime & { if (!handle) { throw new Error(`No OPFS access handle registered with name: ${file.fileName}`); } - handle.flush(); - handle.close(); - BROWSER_RUNTIME._files.delete(file.fileName); + return handle.flush(); + } + } + }, + dropFile: (mod: DuckDBModule, fileNamePtr: number, fileNameLen: number) => { + const fileName = readString(mod, fileNamePtr, fileNameLen); + const handle: FileSystemSyncAccessHandle = BROWSER_RUNTIME._files?.get(fileName); + if (handle) { + BROWSER_RUNTIME._files.delete(fileName); + if (handle instanceof FileSystemSyncAccessHandle) { + try { + handle.flush(); + handle.close(); + } catch (e: any) { + throw new Error(`Cannot drop file with name: ${fileName}`); + } + } + if (handle instanceof Blob) { + // nothing } } }, diff --git a/packages/duckdb-wasm/src/bindings/runtime_node.ts b/packages/duckdb-wasm/src/bindings/runtime_node.ts index aa63822e2..2f92368bc 100644 --- a/packages/duckdb-wasm/src/bindings/runtime_node.ts +++ b/packages/duckdb-wasm/src/bindings/runtime_node.ts @@ -127,6 +127,7 @@ export const NODE_RUNTIME: DuckDBRuntime & { } return 0; }, + dropFile: (mod: DuckDBModule, _fileNamePtr: number, _fileNameLen:number) => {}, truncateFile: (mod: DuckDBModule, fileId: number, newSize: number) => { try { const file = NODE_RUNTIME.resolveFileInfo(mod, fileId); diff --git a/packages/duckdb-wasm/test/opfs.test.ts b/packages/duckdb-wasm/test/opfs.test.ts index 407e5fc4a..ddf52eafb 100644 --- a/packages/duckdb-wasm/test/opfs.test.ts +++ b/packages/duckdb-wasm/test/opfs.test.ts @@ -44,7 +44,7 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo removeFiles(); }); - describe('Load Data', () => { + describe('Load Data in OPFS', () => { it('Imporet Small Parquet file', async () => { await conn.send(`CREATE TABLE stu AS SELECT * FROM "${baseDir}/uni/studenten.parquet"`); await conn.send(`CHECKPOINT;`); @@ -71,7 +71,7 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo expect(table.getChildAt(0)?.get(0)).toBeGreaterThan(60_000); }); - it('Load Existing DB File in OPFS', async () => { + it('Load Existing DB File', async () => { await conn.send(`CREATE TABLE tmp AS SELECT * FROM "${baseDir}/tpch/0_01/parquet/lineitem.parquet"`); await conn.send(`CHECKPOINT;`); await conn.close(); @@ -96,7 +96,57 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo expect(table.getChildAt(0)?.get(0)).toBeGreaterThan(60_000); }); - it('Export as CSV to OPFS + Load CSV that are already in OPFS', async () => { + it('Load Parquet file that are already', async () => { + const parquetBuffer = await fetch(`${baseDir}/tpch/0_01/parquet/lineitem.parquet`).then(res => + res.arrayBuffer(), + ); + const opfsRoot = await navigator.storage.getDirectory(); + const fileHandle = await opfsRoot.getFileHandle('test.parquet', {create: true}); + const writable = await fileHandle.createWritable(); + await writable.write(parquetBuffer); + await writable.close(); + + await db.registerFileHandle('test.parquet', fileHandle, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); + await conn.send(`CREATE TABLE lineitem1 AS SELECT * FROM read_parquet('test.parquet')`); + await conn.send(`CHECKPOINT;`); + await conn.send(`CREATE TABLE lineitem2 AS SELECT * FROM read_parquet('test.parquet')`); + await conn.send(`CHECKPOINT;`); + await conn.send(`CREATE TABLE lineitem3 AS SELECT * FROM read_parquet('test.parquet')`); + await conn.send(`CHECKPOINT;`); + + { + const result1 = await conn.send(`SELECT count(*)::INTEGER as cnt FROM lineitem1;`); + const batches1 = []; + for await (const batch of result1) { + batches1.push(batch); + } + const table1 = await new arrow.Table<{ cnt: arrow.Int }>(batches1); + expect(table1.getChildAt(0)?.get(0)).toBeGreaterThan(60_000); + } + + { + const result2 = await conn.send(`SELECT count(*)::INTEGER as cnt FROM lineitem2;`); + const batches2 = []; + for await (const batch of result2) { + batches2.push(batch); + } + const table2 = await new arrow.Table<{ cnt: arrow.Int }>(batches2); + expect(table2.getChildAt(0)?.get(0)).toBeGreaterThan(60_000); + } + + { + const result3 = await conn.send(`SELECT count(*)::INTEGER as cnt FROM lineitem3;`); + const batches3 = []; + for await (const batch of result3) { + batches3.push(batch); + } + const table3 = await new arrow.Table<{ cnt: arrow.Int }>(batches3); + expect(table3.getChildAt(0)?.get(0)).toBeGreaterThan(60_000); + } + + }); + + it('Drop File + Export as CSV to OPFS + Load CSV', async () => { const opfsRoot = await navigator.storage.getDirectory(); const testHandle = await opfsRoot.getFileHandle('test.csv', {create: true}); await db.registerFileHandle('test.csv', testHandle, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); @@ -117,29 +167,64 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo } const table = await new arrow.Table<{ cnt: arrow.Int }>(batches); expect(table.getChildAt(0)?.get(0)).toBeGreaterThan(60_000); + + await db.dropFile('test.csv'); }); - it('Load Parquet file that are already in OPFS', async () => { - const parquetBuffer = await fetch(`${baseDir}/tpch/0_01/parquet/lineitem.parquet`).then(res => - res.arrayBuffer(), - ); + + it('Drop Files + Export as CSV to OPFS + Load CSV', async () => { const opfsRoot = await navigator.storage.getDirectory(); - const fileHandle = await opfsRoot.getFileHandle('test.parquet', {create: true}); - const writable = await fileHandle.createWritable(); - await writable.write(parquetBuffer); - await writable.close(); + const testHandle1 = await opfsRoot.getFileHandle('test1.csv', {create: true}); + const testHandle2 = await opfsRoot.getFileHandle('test2.csv', {create: true}); + const testHandle3 = await opfsRoot.getFileHandle('test3.csv', {create: true}); + await db.registerFileHandle('test1.csv', testHandle1, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); + await db.registerFileHandle('test2.csv', testHandle2, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); + await db.registerFileHandle('test3.csv', testHandle3, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); - await db.registerFileHandle('test.parquet', fileHandle, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); - await conn.send(`CREATE TABLE lineitem AS SELECT * FROM read_parquet('test.parquet')`); - await conn.send(`CHECKPOINT;`); + await conn.send(`CREATE TABLE zzz AS SELECT * FROM "${baseDir}/tpch/0_01/parquet/lineitem.parquet"`); + await conn.send(`COPY (SELECT * FROM zzz) TO 'test1.csv'`); + await conn.send(`COPY (SELECT * FROM zzz) TO 'test2.csv'`); + await conn.send(`COPY (SELECT * FROM zzz) TO 'test3.csv'`); + await conn.close(); - const result = await conn.send(`SELECT count(*)::INTEGER as cnt FROM lineitem;`); - const batches = []; - for await (const batch of result) { - batches.push(batch); + await db.dropFiles(); + await db.reset(); + + await db.open({}); + conn = await db.connect(); + await db.registerFileHandle('test1.csv', testHandle1, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); + await db.registerFileHandle('test2.csv', testHandle2, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); + await db.registerFileHandle('test3.csv', testHandle3, duckdb.DuckDBDataProtocol.BROWSER_FSACCESS, true); + + { + const result1 = await conn.send(`SELECT count(*)::INTEGER as cnt FROM 'test1.csv';`); + const batches1 = []; + for await (const batch of result1) { + batches1.push(batch); + } + const table1 = await new arrow.Table<{ cnt: arrow.Int }>(batches1); + expect(table1.getChildAt(0)?.get(0)).toBeGreaterThan(60_000); } - const table = await new arrow.Table<{ cnt: arrow.Int }>(batches); - expect(table.getChildAt(0)?.get(0)).toBeGreaterThan(60_000); + { + const result2 = await conn.send(`SELECT count(*)::INTEGER as cnt FROM 'test2.csv';`); + const batches2 = []; + for await (const batch of result2) { + batches2.push(batch); + } + const table2 = await new arrow.Table<{ cnt: arrow.Int }>(batches2); + expect(table2.getChildAt(0)?.get(0)).toBeGreaterThan(60_000); + } + { + const result3 = await conn.send(`SELECT count(*)::INTEGER as cnt FROM 'test3.csv';`); + const batches3 = []; + for await (const batch of result3) { + batches3.push(batch); + } + const table3 = await new arrow.Table<{ cnt: arrow.Int }>(batches3); + expect(table3.getChildAt(0)?.get(0)).toBeGreaterThan(60_000); + } + + await db.dropFiles(); }); }); @@ -151,6 +236,12 @@ export function testOPFS(baseDir: string, bundle: () => duckdb.DuckDBBundle): vo }); await opfsRoot.removeEntry('test.csv').catch(() => { }); + await opfsRoot.removeEntry('test1.csv').catch(() => { + }); + await opfsRoot.removeEntry('test2.csv').catch(() => { + }); + await opfsRoot.removeEntry('test3.csv').catch(() => { + }); await opfsRoot.removeEntry('test.parquet').catch(() => { }); }