@@ -51,13 +51,38 @@ impl DuckClient {
5151 format ! ( "{name}/{format}/" , name = benchmark. dataset_name( ) ) . to_data_path ( )
5252 } ;
5353 let dir = base_path. join ( format. name ( ) ) ;
54- std:: fs:: create_dir_all ( & dir) ?;
5554 let db_path = dir. join ( "duckdb.db" ) ;
5655
56+ if format == Format :: OnDiskDuckDB && data_url. scheme ( ) != "file" {
57+ anyhow:: bail!( "DuckDB format requires local data prepared by data-gen" ) ;
58+ }
59+
60+ if format == Format :: OnDiskDuckDB {
61+ if !db_path. exists ( ) {
62+ anyhow:: bail!(
63+ "prepared DuckDB database is missing at {}. Generate it with \
64+ `vx-bench prepare-data {} --formats-json '[\" duckdb\" ]'` or \
65+ `cargo run --bin data-gen -- {} --formats duckdb` using the same --opt values.",
66+ db_path. display( ) ,
67+ benchmark. dataset_name( ) ,
68+ benchmark. dataset_name( ) ,
69+ ) ;
70+ }
71+ } else {
72+ std:: fs:: create_dir_all ( & dir) ?;
73+ }
74+
5775 tracing:: info!( db_path = %db_path. display( ) , "Opening DuckDB" ) ;
5876
5977 if delete_database && db_path. exists ( ) {
60- std:: fs:: remove_file ( & db_path) ?;
78+ if format == Format :: OnDiskDuckDB {
79+ tracing:: info!(
80+ db_path = %db_path. display( ) ,
81+ "Keeping prepared DuckDB format database"
82+ ) ;
83+ } else {
84+ std:: fs:: remove_file ( & db_path) ?;
85+ }
6186 }
6287
6388 let ( db, connection) = Self :: open_and_setup_database ( Some ( db_path. clone ( ) ) , threads) ?;
@@ -147,9 +172,14 @@ impl DuckClient {
147172 benchmark : & B ,
148173 file_format : Format ,
149174 ) -> Result < ( ) > {
175+ if file_format == Format :: OnDiskDuckDB {
176+ // Native DuckDB data is materialized by data-gen. The opened database already
177+ // contains benchmark tables, so there is nothing to register here.
178+ return Ok ( ( ) ) ;
179+ }
180+
150181 let object_type = match file_format {
151182 Format :: Parquet | Format :: OnDiskVortex | Format :: VortexCompact => "VIEW" ,
152- Format :: OnDiskDuckDB => "TABLE" ,
153183 Format :: Lance => {
154184 anyhow:: bail!(
155185 "Lance format is not supported for DuckDB engine. \
@@ -159,11 +189,7 @@ impl DuckClient {
159189 format => anyhow:: bail!( "Format {format} isn't supported for DuckDB" ) ,
160190 } ;
161191
162- // DuckDB loads from parquet for OnDiskDuckDB format
163- let load_format = match file_format {
164- Format :: Parquet | Format :: OnDiskDuckDB => Format :: Parquet ,
165- f => f,
166- } ;
192+ let load_format = file_format;
167193
168194 // Get the base URL for the format's data directory
169195 let format_url = benchmark. format_path ( load_format, benchmark. data_url ( ) ) ?;
0 commit comments