Skip to content

Commit

Permalink
Merge remote-tracking branch 'duckdb/main' into next
Browse files Browse the repository at this point in the history
  • Loading branch information
carlopi committed Jan 16, 2025
2 parents b315e18 + 0f29e56 commit 0f0f7a8
Show file tree
Hide file tree
Showing 25 changed files with 763 additions and 105 deletions.
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Copyright 2018-2024 Stichting DuckDB Foundation
Copyright 2018-2025 Stichting DuckDB Foundation

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

Expand Down
2 changes: 1 addition & 1 deletion examples/esbuild-node/index.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import * as duckdb from '@duckdb/duckdb-wasm';
import * as arrow from 'apache-arrow';
import path from 'path';
import Worker from 'web-worker';
import { createRequire } from 'module';

const require = createRequire(import.meta.url);
const DUCKDB_DIST = path.dirname(require.resolve('@duckdb/duckdb-wasm'));
const Worker = require('web-worker');

(async () => {
try {
Expand Down
2 changes: 2 additions & 0 deletions lib/include/duckdb/web/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ struct WebDBConfig {
std::optional<int8_t> access_mode = std::nullopt;
/// The thread count
uint32_t maximum_threads = (STATIC_WEBDB_FEATURES & (1 << WebDBFeature::THREADS)) ? 4 : 1;
/// The direct io flag
bool use_direct_io = false;
/// The query config
QueryConfig query = {
.cast_bigint_to_double = std::nullopt,
Expand Down
10 changes: 9 additions & 1 deletion lib/include/duckdb/web/io/web_filesystem.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,13 @@ class WebFileSystem : public duckdb::FileSystem {
/// Delete copy constructor
WebFileHandle(const WebFileHandle &) = delete;
/// Destructor
virtual ~WebFileHandle() { Close(); }
virtual ~WebFileHandle() {
try {
Close();
} catch (...) {
// Avoid crashes if Close happens to throw
}
}
/// Get the file name
auto &GetName() const { return file_->file_name_; }
/// Resolve readahead
Expand Down Expand Up @@ -209,6 +215,8 @@ class WebFileSystem : public duckdb::FileSystem {
DataBuffer file_buffer);
/// Try to drop a specific file
bool TryDropFile(std::string_view file_name);
/// drop a specific file
void DropFile(std::string_view file_name);
/// Drop all files without references (including buffers)
void DropDanglingFiles();
/// Configure file statistics
Expand Down
4 changes: 4 additions & 0 deletions lib/js-stubs.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ addToLibrary({
duckdb_web_fs_file_sync: function (fileId) {
return globalThis.DUCKDB_RUNTIME.syncFile(Module, fileId);
},
duckdb_web_fs_file_drop_file__sig: 'vpi',
duckdb_web_fs_file_drop_file: function (fileName, fileNameLen) {
return globalThis.DUCKDB_RUNTIME.dropFile(Module, fileName, fileNameLen);
},
duckdb_web_fs_file_close__sig: 'vi',
duckdb_web_fs_file_close: function (fileId) {
return globalThis.DUCKDB_RUNTIME.closeFile(Module, fileId);
Expand Down
4 changes: 4 additions & 0 deletions lib/src/arrow_type_mapping.cc
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ arrow::Result<duckdb::LogicalType> mapArrowTypeToDuckDB(const arrow::DataType& t
case arrow::Type::type::EXTENSION:
case arrow::Type::type::SPARSE_UNION:
case arrow::Type::type::DENSE_UNION:
case arrow::Type::type::STRING_VIEW:
case arrow::Type::type::BINARY_VIEW:
case arrow::Type::type::LIST_VIEW:
case arrow::Type::type::LARGE_LIST_VIEW:
return arrow::Status::NotImplemented("DuckDB type mapping for: ", type.ToString());
}
return duckdb::LogicalTypeId::INVALID;
Expand Down
3 changes: 3 additions & 0 deletions lib/src/config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ WebDBConfig WebDBConfig::ReadFrom(std::string_view args_json) {
if (doc.HasMember("allowUnsignedExtensions") && doc["allowUnsignedExtensions"].IsBool()) {
config.allow_unsigned_extensions = doc["allowUnsignedExtensions"].GetBool();
}
if (doc.HasMember("useDirectIO") && doc["useDirectIO"].IsBool()) {
config.use_direct_io = doc["useDirectIO"].GetBool();
}
if (doc.HasMember("query") && doc["query"].IsObject()) {
auto q = doc["query"].GetObject();
if (q.HasMember("queryPollingInterval") && q["queryPollingInterval"].IsNumber()) {
Expand Down
23 changes: 18 additions & 5 deletions lib/src/io/web_filesystem.cc
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ RT_FN(void duckdb_web_fs_file_close(size_t file_id), {
auto &infos = GetLocalState();
infos.handles.erase(file_id);
});
RT_FN(void duckdb_web_fs_file_drop_file(const char *fileName, size_t pathLen), {});
RT_FN(void duckdb_web_fs_file_truncate(size_t file_id, double new_size), { GetOrOpen(file_id).Truncate(new_size); });
RT_FN(time_t duckdb_web_fs_file_get_last_modified_time(size_t file_id), {
auto &file = GetOrOpen(file_id);
Expand Down Expand Up @@ -226,6 +227,8 @@ WebFileSystem::DataProtocol WebFileSystem::inferDataProtocol(std::string_view ur
proto = WebFileSystem::DataProtocol::HTTP;
} else if (hasPrefix(url, "s3://")) {
proto = WebFileSystem::DataProtocol::S3;
} else if (hasPrefix(url, "opfs://")) {
proto = WebFileSystem::DataProtocol::BROWSER_FSACCESS;
} else if (hasPrefix(url, "file://")) {
data_url = std::string_view{url}.substr(7);
proto = default_data_protocol_;
Expand Down Expand Up @@ -453,6 +456,7 @@ void WebFileSystem::DropDanglingFiles() {
for (auto &[file_id, file] : files_by_id_) {
if (file->handle_count_ == 0) {
files_by_name_.erase(file->file_name_);
DropFile(file->file_name_);
if (file->data_url_.has_value()) {
files_by_url_.erase(file->data_url_.value());
}
Expand Down Expand Up @@ -481,6 +485,13 @@ bool WebFileSystem::TryDropFile(std::string_view file_name) {
return false;
}

/// drop a file
void WebFileSystem::DropFile(std::string_view file_name) {
DEBUG_TRACE();
std::string fileNameS = std::string{file_name};
duckdb_web_fs_file_drop_file(fileNameS.c_str(), fileNameS.size());
}

/// Write the global filesystem info
rapidjson::Value WebFileSystem::WriteGlobalFileInfo(rapidjson::Document &doc, uint32_t cache_epoch) {
DEBUG_TRACE();
Expand Down Expand Up @@ -793,7 +804,7 @@ void WebFileSystem::Write(duckdb::FileHandle &handle, void *buffer, int64_t nr_b
auto file_size = file_hdl.file_->file_size_;
auto writer = static_cast<char *>(buffer);
file_hdl.position_ = location;
while (nr_bytes > 0 && location < file_size) {
while (nr_bytes > 0) {
auto n = Write(handle, writer, nr_bytes);
writer += n;
nr_bytes -= n;
Expand Down Expand Up @@ -1006,10 +1017,12 @@ void WebFileSystem::FileSync(duckdb::FileHandle &handle) {
vector<std::string> WebFileSystem::Glob(const std::string &path, FileOpener *opener) {
std::unique_lock<LightMutex> fs_guard{fs_mutex_};
std::vector<std::string> results;
auto glob = glob_to_regex(path);
for (auto [name, file] : files_by_name_) {
if (std::regex_match(file->file_name_, glob)) {
results.push_back(std::string{name});
if (!FileSystem::IsRemoteFile(path)) {
auto glob = glob_to_regex(path);
for (auto [name, file] : files_by_name_) {
if (std::regex_match(file->file_name_, glob)) {
results.push_back(std::string{name});
}
}
}
auto &state = GetLocalState();
Expand Down
6 changes: 6 additions & 0 deletions lib/src/json_typedef.cc
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,12 @@ arrow::Result<rapidjson::Value> WriteSQLType(rapidjson::Document& doc, const duc
case duckdb::LogicalTypeId::AGGREGATE_STATE:
case duckdb::LogicalTypeId::BIT:
case duckdb::LogicalTypeId::LAMBDA:
case duckdb::LogicalTypeId::STRING_LITERAL:
case duckdb::LogicalTypeId::INTEGER_LITERAL:
case duckdb::LogicalTypeId::UHUGEINT:
case duckdb::LogicalTypeId::UNION:
case duckdb::LogicalTypeId::ARRAY:
case duckdb::LogicalTypeId::VARINT:
break;
}
return out;
Expand Down
22 changes: 17 additions & 5 deletions lib/src/webdb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -829,6 +829,7 @@ arrow::Status WebDB::Open(std::string_view args_json) {
db_config.options.access_mode = access_mode;
db_config.options.duckdb_api = "wasm";
db_config.options.custom_user_agent = config_->custom_user_agent;
db_config.options.use_direct_io = config_->use_direct_io;
auto db = make_shared_ptr<duckdb::DuckDB>(config_->path, &db_config);
#ifndef WASM_LOADABLE_EXTENSIONS
duckdb_web_parquet_init(db.get());
Expand Down Expand Up @@ -912,18 +913,29 @@ arrow::Status WebDB::RegisterFileBuffer(std::string_view file_name, std::unique_
/// Drop all files
arrow::Status WebDB::DropFiles() {
file_page_buffer_->DropDanglingFiles();
pinned_web_files_.clear();
std::vector<std::string> files_to_drop;
for (const auto& [key, handle] : pinned_web_files_) {
files_to_drop.push_back(handle->GetName());
}
for (const auto& fileName : files_to_drop) {
arrow::Status status = DropFile(fileName);
if (!status.ok()) {
return arrow::Status::Invalid("Failed to drop file: " + fileName);
}
}
if (auto fs = io::WebFileSystem::Get()) {
fs->DropDanglingFiles();
}
return arrow::Status::OK();
}
/// Drop a file
arrow::Status WebDB::DropFile(std::string_view file_name) {
file_page_buffer_->TryDropFile(file_name);
pinned_web_files_.erase(file_name);
arrow::Status WebDB::DropFile(std::string_view fileName) {
file_page_buffer_->TryDropFile(fileName);
pinned_web_files_.erase(fileName);
if (auto fs = io::WebFileSystem::Get()) {
if (!fs->TryDropFile(file_name)) {
if (fs->TryDropFile(fileName)) {
fs->DropFile(fileName);
} else {
return arrow::Status::Invalid("file is in use");
}
}
Expand Down
Loading

0 comments on commit 0f0f7a8

Please sign in to comment.