Skip to content

Commit d98d7e2

Browse files
committed
initial
Signed-off-by: Mikhail Kot <to@myrrc.dev>
1 parent 52e26d1 commit d98d7e2

5 files changed

Lines changed: 232 additions & 72 deletions

File tree

vortex-bench/src/runner.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,10 +184,11 @@ impl SqlBenchmarkRunner {
184184
if let Some(expected_counts) = &self.expected_row_counts
185185
&& query_idx < expected_counts.len()
186186
{
187+
let expected = expected_counts[query_idx];
187188
assert_eq!(
188189
row_count,
189-
expected_counts[query_idx],
190-
"Row count mismatch for query {query_idx} - {engine}:{format}",
190+
expected,
191+
"Row count mismatch for query {query_idx} - {engine}:{format}, expected {expected}, got {row_count}",
191192
engine = self.engine,
192193
);
193194
}

vortex-duckdb/cpp/table_function.cpp

Lines changed: 85 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@ DUCKDB_INCLUDES_BEGIN
1515
#include "duckdb/main/capi/capi_internal.hpp"
1616
#include "duckdb/main/connection.hpp"
1717
#include "duckdb/parser/parsed_data/create_table_function_info.hpp"
18+
#include "duckdb/planner/expression/bound_operator_expression.hpp"
19+
#include "duckdb/planner/expression/bound_comparison_expression.hpp"
20+
#include "duckdb/planner/expression/bound_between_expression.hpp"
21+
#include "duckdb/planner/expression/bound_conjunction_expression.hpp"
22+
#include "duckdb/planner/expression/bound_function_expression.hpp"
1823
DUCKDB_INCLUDES_END
1924

2025
using namespace duckdb;
@@ -263,6 +268,19 @@ void function(ClientContext &, TableFunctionInput &input, DataChunk &output) {
263268
}
264269
}
265270

271+
/*
272+
* Table filter pushdown is used twice in duckdb:
273+
*
274+
* 1. Planning time: duckdb uses file metadata (filename, hive_partitioning
275+
* options in MultiFileReader) to prune files based on filename or hive
276+
* partition data i.e. month, year, etc. This happens before any file IO.
277+
* We don't use this because we have own file-level pruning in
278+
* FileStatsLayoutReader.
279+
*
280+
* 2. Scan time. As we have filter_pushdown = true, filter expressions are
281+
* converted to TableFilterSet and pushed down to Vortex. We convert them to
282+
* vortex expressions and use as filter options while initializing the scan.
283+
*/
266284
void c_pushdown_complex_filter(ClientContext &,
267285
LogicalGet &,
268286
FunctionData *bind_data,
@@ -278,8 +296,6 @@ void c_pushdown_complex_filter(ClientContext &,
278296
if (error_out) {
279297
throw BinderException(IntoErrString(error_out));
280298
}
281-
282-
// If the pushdown complex filter returns true, we can remove the filter from the list.
283299
iter = pushed ? filters.erase(iter) : std::next(iter);
284300
}
285301
}
@@ -381,6 +397,70 @@ InsertionOrderPreservingMap<string> c_to_string(TableFunctionToStringInput &inpu
381397
return result;
382398
}
383399

400+
/*
401+
* Called either before pushdown_complex_filter or a table filter expression
402+
* call. In pushdown_complex_filter we can tell DuckDB we can't push the
403+
* filter down by returning Ok(None) but this isn't an option for a table
404+
* filter. Be conservative and allow only DuckDB expressions we know will
405+
* either always produce a valid Vortex expression or return an error, so no
406+
* Ok(None) case.
407+
*
408+
* See src/convert/expr.rs.
409+
*/
410+
bool pushdown_expression(const BaseExpression &expr) {
411+
using enum ExpressionClass;
412+
switch (expr.GetExpressionClass()) {
413+
case BOUND_COLUMN_REF:
414+
case BOUND_CONSTANT:
415+
case BOUND_REF:
416+
return true;
417+
case BOUND_COMPARISON: {
418+
const auto &comparison = expr.Cast<BoundComparisonExpression>();
419+
return pushdown_expression(*comparison.left) && pushdown_expression(*comparison.right);
420+
}
421+
case BOUND_BETWEEN: {
422+
const auto &between = expr.Cast<BoundBetweenExpression>();
423+
return pushdown_expression(*between.input) && pushdown_expression(*between.lower) &&
424+
pushdown_expression(*between.upper);
425+
}
426+
case BOUND_CONJUNCTION: {
427+
for (const auto &child : expr.Cast<BoundConjunctionExpression>().children) {
428+
if (!pushdown_expression(*child)) {
429+
return false;
430+
}
431+
}
432+
return true;
433+
}
434+
case BOUND_FUNCTION: {
435+
constexpr std::array<std::string_view, 6> supported =
436+
{"struct_extract", "contains", "prefix", "suffix", "~~", "!~~"};
437+
const std::string_view name = expr.Cast<BoundFunctionExpression>().function.name;
438+
return std::find(supported.begin(), supported.end(), name) != supported.end();
439+
}
440+
case BOUND_OPERATOR: {
441+
switch (expr.GetExpressionType()) {
442+
case ExpressionType::OPERATOR_NOT:
443+
case ExpressionType::OPERATOR_IS_NULL:
444+
case ExpressionType::OPERATOR_IS_NOT_NULL:
445+
case ExpressionType::COMPARE_IN:
446+
case ExpressionType::COMPARE_NOT_IN:
447+
break;
448+
default:
449+
return false;
450+
}
451+
452+
for (const auto &child : expr.Cast<BoundOperatorExpression>().children) {
453+
if (!pushdown_expression(*child)) {
454+
return false;
455+
}
456+
}
457+
return true;
458+
}
459+
default:
460+
return false;
461+
}
462+
}
463+
384464
extern "C" duckdb_state duckdb_vx_tfunc_register(duckdb_database ffi_db, const duckdb_vx_tfunc_vtab_t *vtab) {
385465
D_ASSERT(ffi_db);
386466
D_ASSERT(vtab);
@@ -395,6 +475,9 @@ extern "C" duckdb_state duckdb_vx_tfunc_register(duckdb_database ffi_db, const d
395475
tf.sampling_pushdown = false;
396476

397477
tf.pushdown_complex_filter = c_pushdown_complex_filter;
478+
tf.pushdown_expression = [](auto &, const auto &, Expression &expression) {
479+
return pushdown_expression(expression);
480+
};
398481
tf.cardinality = c_cardinality;
399482
tf.get_partition_info = get_partition_info;
400483
tf.get_partition_data = get_partition_data;

0 commit comments

Comments
 (0)