@@ -15,6 +15,11 @@ DUCKDB_INCLUDES_BEGIN
1515#include " duckdb/main/capi/capi_internal.hpp"
1616#include " duckdb/main/connection.hpp"
1717#include " duckdb/parser/parsed_data/create_table_function_info.hpp"
18+ #include " duckdb/planner/expression/bound_operator_expression.hpp"
19+ #include " duckdb/planner/expression/bound_comparison_expression.hpp"
20+ #include " duckdb/planner/expression/bound_between_expression.hpp"
21+ #include " duckdb/planner/expression/bound_conjunction_expression.hpp"
22+ #include " duckdb/planner/expression/bound_function_expression.hpp"
1823DUCKDB_INCLUDES_END
1924
2025using namespace duckdb ;
@@ -263,6 +268,19 @@ void function(ClientContext &, TableFunctionInput &input, DataChunk &output) {
263268 }
264269}
265270
271+ /*
272+ * Table filter pushdown is used twice in duckdb:
273+ *
274+ * 1. Planning time: duckdb uses file metadata (filename, hive_partitioning
275+ * options in MultiFileReader) to prune files based on filename or hive
276+ * partition data i.e. month, year, etc. This happens before any file IO.
277+ * We don't use this because we have own file-level pruning in
278+ * FileStatsLayoutReader.
279+ *
280+ * 2. Scan time. As we have filter_pushdown = true, filter expressions are
281+ * converted to TableFilterSet and pushed down to Vortex. We convert them to
282+ * vortex expressions and use as filter options while initializing the scan.
283+ */
266284void c_pushdown_complex_filter (ClientContext &,
267285 LogicalGet &,
268286 FunctionData *bind_data,
@@ -278,8 +296,6 @@ void c_pushdown_complex_filter(ClientContext &,
278296 if (error_out) {
279297 throw BinderException (IntoErrString (error_out));
280298 }
281-
282- // If the pushdown complex filter returns true, we can remove the filter from the list.
283299 iter = pushed ? filters.erase (iter) : std::next (iter);
284300 }
285301}
@@ -381,6 +397,70 @@ InsertionOrderPreservingMap<string> c_to_string(TableFunctionToStringInput &inpu
381397 return result;
382398}
383399
400+ /*
401+ * Called either before pushdown_complex_filter or a table filter expression
402+ * call. In pushdown_complex_filter we can tell DuckDB we can't push the
403+ * filter down by returning Ok(None) but this isn't an option for a table
404+ * filter. Be conservative and allow only DuckDB expressions we know will
405+ * either always produce a valid Vortex expression or return an error, so no
406+ * Ok(None) case.
407+ *
408+ * See src/convert/expr.rs.
409+ */
410+ bool pushdown_expression (const BaseExpression &expr) {
411+ using enum ExpressionClass;
412+ switch (expr.GetExpressionClass ()) {
413+ case BOUND_COLUMN_REF :
414+ case BOUND_CONSTANT :
415+ case BOUND_REF :
416+ return true ;
417+ case BOUND_COMPARISON : {
418+ const auto &comparison = expr.Cast <BoundComparisonExpression>();
419+ return pushdown_expression (*comparison.left ) && pushdown_expression (*comparison.right );
420+ }
421+ case BOUND_BETWEEN : {
422+ const auto &between = expr.Cast <BoundBetweenExpression>();
423+ return pushdown_expression (*between.input ) && pushdown_expression (*between.lower ) &&
424+ pushdown_expression (*between.upper );
425+ }
426+ case BOUND_CONJUNCTION : {
427+ for (const auto &child : expr.Cast <BoundConjunctionExpression>().children ) {
428+ if (!pushdown_expression (*child)) {
429+ return false ;
430+ }
431+ }
432+ return true ;
433+ }
434+ case BOUND_FUNCTION : {
435+ constexpr std::array<std::string_view, 6 > supported =
436+ {" struct_extract" , " contains" , " prefix" , " suffix" , " ~~" , " !~~" };
437+ const std::string_view name = expr.Cast <BoundFunctionExpression>().function .name ;
438+ return std::find (supported.begin (), supported.end (), name) != supported.end ();
439+ }
440+ case BOUND_OPERATOR : {
441+ switch (expr.GetExpressionType ()) {
442+ case ExpressionType::OPERATOR_NOT :
443+ case ExpressionType::OPERATOR_IS_NULL :
444+ case ExpressionType::OPERATOR_IS_NOT_NULL :
445+ case ExpressionType::COMPARE_IN :
446+ case ExpressionType::COMPARE_NOT_IN :
447+ break ;
448+ default :
449+ return false ;
450+ }
451+
452+ for (const auto &child : expr.Cast <BoundOperatorExpression>().children ) {
453+ if (!pushdown_expression (*child)) {
454+ return false ;
455+ }
456+ }
457+ return true ;
458+ }
459+ default :
460+ return false ;
461+ }
462+ }
463+
384464extern " C" duckdb_state duckdb_vx_tfunc_register (duckdb_database ffi_db, const duckdb_vx_tfunc_vtab_t *vtab) {
385465 D_ASSERT (ffi_db);
386466 D_ASSERT (vtab);
@@ -395,6 +475,9 @@ extern "C" duckdb_state duckdb_vx_tfunc_register(duckdb_database ffi_db, const d
395475 tf.sampling_pushdown = false ;
396476
397477 tf.pushdown_complex_filter = c_pushdown_complex_filter;
478+ tf.pushdown_expression = [](auto &, const auto &, Expression &expression) {
479+ return pushdown_expression (expression);
480+ };
398481 tf.cardinality = c_cardinality;
399482 tf.get_partition_info = get_partition_info;
400483 tf.get_partition_data = get_partition_data;
0 commit comments