Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# dbplyr (development version)

* `filter()` after a sequence of `left_join()` and `inner_joins()` no longer generates a subquery (#722).
* `summarise()` now reports grouping immediately, rather than when you summarise.
* `sql_optimise()` has been removed. It was only used for two cases (filter + summarise and arrange + summarise), and these are now handled at a higher level (#1720).
* `distinct()` after a join no longer creates a subquery (#722).
Expand Down
12 changes: 10 additions & 2 deletions R/db-sql.R
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,7 @@ sql_query_join <- function(
by = NULL,
na_matches = FALSE,
...,
where = NULL,
lvl = 0
) {
check_dots_used()
Expand All @@ -390,6 +391,7 @@ sql_query_join.DBIConnection <- function(
by = NULL,
na_matches = FALSE,
...,
where = NULL,
lvl = 0
) {
JOIN <- switch(
Expand All @@ -412,7 +414,8 @@ sql_query_join.DBIConnection <- function(
sql_clause_select(con, select),
sql_clause_from(x),
sql_clause(JOIN, y),
sql_clause("ON", on, sep = " AND", parens = TRUE, lvl = 1)
sql_clause("ON", on, sep = " AND", parens = TRUE, lvl = 1),
sql_clause_where(where)
)
sql_format_clauses(clauses, lvl, con)
}
Expand All @@ -426,6 +429,7 @@ dbplyr_query_join <- function(
na_matches = FALSE,
...,
select = NULL,
where = NULL,
lvl = 0
) {
check_2ed(con)
Expand All @@ -437,6 +441,7 @@ dbplyr_query_join <- function(
type = type,
by = by,
na_matches = na_matches,
where = where,
...,
lvl = lvl
)
Expand All @@ -452,6 +457,7 @@ sql_query_multi_join <- function(
by_list,
select,
...,
where = NULL,
distinct = FALSE,
lvl = 0
) {
Expand Down Expand Up @@ -497,6 +503,7 @@ sql_query_multi_join.DBIConnection <- function(
by_list,
select,
...,
where = NULL,
distinct = FALSE,
lvl = 0
) {
Expand Down Expand Up @@ -524,7 +531,8 @@ sql_query_multi_join.DBIConnection <- function(
clauses <- list2(
sql_clause_select(con, select, distinct),
sql_clause_from(from),
!!!out
!!!out,
sql_clause_where(where)
)
sql_format_clauses(clauses, lvl = lvl, con = con)
}
Expand Down
27 changes: 27 additions & 0 deletions R/lazy-join-query.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ lazy_multi_join_query <- function(
joins,
table_names,
vars,
where = list(),
distinct = FALSE,
group_vars = op_grps(x),
order_vars = op_sort(x),
Expand All @@ -31,6 +32,7 @@ lazy_multi_join_query <- function(
joins = joins,
table_names = table_names,
vars = vars,
where = where,
distinct = distinct,
group_vars = group_vars,
order_vars = order_vars,
Expand All @@ -47,6 +49,7 @@ lazy_rf_join_query <- function(
by,
table_names,
vars,
where = list(),
group_vars = op_grps(x),
order_vars = op_sort(x),
frame = op_frame(x),
Expand Down Expand Up @@ -74,6 +77,7 @@ lazy_rf_join_query <- function(
by = by,
table_names = table_names,
vars = vars,
where = where,
group_vars = group_vars,
order_vars = order_vars,
frame = frame
Expand Down Expand Up @@ -190,11 +194,34 @@ sql_build.lazy_multi_join_query <- function(op, con, ..., sql_options = NULL) {
}
)

join_vars <- sql_multi_join_vars(
con,
op$vars,
table_vars,
use_star = FALSE,
qualify_all_columns = sql_options$qualify_all_columns
)
# WHERE happens after SELECT, but columns names are disambiguated using
# SELECT expressions, so need to backtransform
where <- lapply(op$where, \(expr) {
replace_sym(
expr,
names(join_vars),
lapply(unname(join_vars), \(x) sql(x[[1]]))
)
})
where_sql <- translate_sql_(
where,
con = con,
context = list(clause = "WHERE")
)

multi_join_query(
x = sql_build(op$x, con, sql_options = sql_options),
joins = op$joins,
table_names = table_names_out,
select = select_sql,
where = where_sql,
distinct = op$distinct
)
}
Expand Down
18 changes: 15 additions & 3 deletions R/query-join.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ join_query <- function(
type = "inner",
by = NULL,
suffix = c(".x", ".y"),
na_matches = FALSE
na_matches = FALSE,
where = NULL
) {
structure(
list(
Expand All @@ -19,19 +20,28 @@ join_query <- function(
select = select,
type = type,
by = by,
na_matches = na_matches
na_matches = na_matches,
where = where
),
class = c("join_query", "query")
)
}

multi_join_query <- function(x, joins, table_names, select, distinct = FALSE) {
multi_join_query <- function(
x,
joins,
table_names,
select,
where = NULL,
distinct = FALSE
) {
structure(
list(
x = x,
joins = joins,
table_names = table_names,
select = select,
where = where,
distinct = distinct
),
class = c("multi_join_query", "query")
Expand Down Expand Up @@ -91,6 +101,7 @@ sql_render.join_query <- function(
by = query$by,
na_matches = query$na_matches,
select = query$select,
where = query$where,
lvl = lvl
)
}
Expand All @@ -117,6 +128,7 @@ sql_render.multi_join_query <- function(
table_names = query$table_names,
by_list = query$by_list,
select = query$select,
where = query$where,
distinct = query$distinct,
lvl = lvl
)
Expand Down
116 changes: 53 additions & 63 deletions R/verb-filter.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,83 +44,74 @@ filter.tbl_lazy <- function(.data, ..., .by = NULL, .preserve = FALSE) {

add_filter <- function(.data, dots) {
con <- remote_con(.data)
lazy_query <- .data$lazy_query
dots <- unname(dots)

dots_use_window_fun <- uses_window_fun(dots, con)

if (filter_can_use_having(lazy_query, dots_use_window_fun)) {
return(filter_via_having(lazy_query, dots))
}

if (!dots_use_window_fun) {
if (filter_needs_new_query(dots, lazy_query, con)) {
lazy_select_query(
x = lazy_query,
where = dots
)
} else {
exprs <- lazy_query$select$expr
nms <- lazy_query$select$name
projection <- purrr::map2_lgl(
exprs,
nms,
\(expr, name) is_symbol(expr) && !identical(expr, sym(name))
)

if (any(projection)) {
dots <- purrr::map(
dots,
replace_sym,
nms[projection],
exprs[projection]
)
}

lazy_query$where <- c(lazy_query$where, dots)
lazy_query
}
} else {
# Do partial evaluation, then extract out window functions
where <- translate_window_where_all(
dots,
env_names(dbplyr_sql_translation(con)$window)
)
# Handle window functions by adding an intermediate mutate
# by definition this has to create a subquery
if (uses_window_fun(dots, con)) {
window_funs <- env_names(dbplyr_sql_translation(con)$window)
where <- translate_window_where_all(dots, window_funs)

# Add extracted window expressions as columns
mutated <- mutate(.data, !!!where$comp)

# And filter with the modified `where` using the new columns
original_vars <- op_vars(.data)
lazy_select_query(
return(lazy_select_query(
x = mutated$lazy_query,
select = syms(set_names(original_vars)),
where = where$expr
)
))
}

lazy_query <- .data$lazy_query
if (filter_can_use_having(lazy_query)) {
names <- lazy_query$select$name
exprs <- purrr::map_if(lazy_query$select$expr, is_quosure, quo_get_expr)
dots <- purrr::map(dots, replace_sym, names, exprs)

lazy_query$having <- c(lazy_query$having, dots)
lazy_query
} else if (filter_can_inline(dots, lazy_query, con)) {
# WHERE happens before SELECT so can't refer to aliases
# might be either a lazy_select_query or a lazy_multi_join_query
if (is_lazy_select_query(lazy_query)) {
dots <- rename_aliases(dots, lazy_query$select)
}

lazy_query$where <- c(lazy_query$where, dots)
lazy_query
} else {
lazy_select_query(x = lazy_query, where = dots)
}
}

filter_needs_new_query <- function(dots, lazy_query, con) {
if (!is_lazy_select_query(lazy_query)) {
filter_can_inline <- function(dots, lazy_query, con) {
if (inherits(lazy_query, "lazy_multi_join_query")) {
# can't use mutated variables, window funs, or SQL
return(TRUE)
}

if (!is_lazy_select_query(lazy_query)) {
return(FALSE)
}

if (uses_mutated_vars(dots, lazy_query$select)) {
return(TRUE)
return(FALSE)
}

if (uses_window_fun(lazy_query$select$expr, con)) {
return(TRUE)
return(FALSE)
}

if (any_expr_uses_sql(lazy_query$select$expr)) {
return(TRUE)
return(FALSE)
}

FALSE
TRUE
}

filter_can_use_having <- function(lazy_query, dots_use_window_fun) {
filter_can_use_having <- function(lazy_query) {
# From the Postgres documentation: https://www.postgresql.org/docs/current/sql-select.html#SQL-HAVING
# Each column referenced in condition must unambiguously reference a grouping
# column, unless the reference appears within an aggregate function or the
Expand All @@ -134,24 +125,23 @@ filter_can_use_having <- function(lazy_query, dots_use_window_fun) {
# Therefore, if `filter()` does not use a window function, then we only use
# grouping or aggregated columns

if (dots_use_window_fun) {
return(FALSE)
}

if (!is_lazy_select_query(lazy_query)) {
return(FALSE)
if (is_lazy_select_query(lazy_query)) {
lazy_query$select_operation == "summarise"
} else {
FALSE
}

lazy_query$select_operation == "summarise"
}

filter_via_having <- function(lazy_query, dots) {
names <- lazy_query$select$name
exprs <- purrr::map_if(lazy_query$select$expr, is_quosure, quo_get_expr)
dots <- purrr::map(dots, replace_sym, names, exprs)
rename_aliases <- function(dots, select) {
exprs <- select$expr
nms <- select$name
projection <- purrr::map_lgl(exprs, is_symbol)

if (!any(projection)) {
return(dots)
}

lazy_query$having <- c(lazy_query$having, dots)
lazy_query
purrr::map(dots, \(dot) replace_sym(dot, nms[projection], exprs[projection]))
}

check_filter <- function(...) {
Expand Down
2 changes: 2 additions & 0 deletions man/db-sql.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading