Skip to content

Slow with dm::dm_filter() #441

Open
Open
@krlmlr

Description

@krlmlr

Even with empty duck tibbles, see dm_duckplyr_ptype . Want to investigate what's going on here. It's not the "meta" functionality, checked that.

library(dm)

if (!file.exists("dm_local.qs")) {
  dm <- dm_financial()

  dm_local <- dm |>
    collect()

  qs::qsave(dm_local, file = "dm_local.qs")
}

dm_local <- qs::qread("dm_local.qs")

dm_local |>
  dm_filter(districts = (id == 1L)) |>
  system.time()
#>    user  system elapsed 
#>   0.184   0.008   0.196

dm_local_ptype <-
  dm_local |>
  dm_ptype()

dm_local_ptype |>
  dm_filter(districts = (id == 1L)) |>
  system.time()
#>    user  system elapsed 
#>   0.077   0.002   0.078

dm_local |>
  dm_zoom_to(districts) |>
  filter(id == 1L) |>
  dm_update_zoomed() |>
  dm_zoom_to(accounts) |>
  semi_join(districts) |>
  dm_update_zoomed() |>
  dm_zoom_to(loans) |>
  semi_join(accounts) |>
  dm_update_zoomed() |>
  dm_zoom_to(orders) |>
  semi_join(accounts) |>
  dm_update_zoomed() |>
  dm_zoom_to(trans) |>
  semi_join(accounts) |>
  dm_update_zoomed() |>
  dm_zoom_to(disps) |>
  semi_join(accounts) |>
  dm_update_zoomed() |>
  dm_zoom_to(cards) |>
  semi_join(disps) |>
  dm_update_zoomed() |>
  dm_zoom_to(clients) |>
  semi_join(disps) |>
  dm_update_zoomed() |>
  system.time()
#>    user  system elapsed 
#>   0.031   0.001   0.032

library(duckplyr)
#> ✔ Overwriting dplyr methods with duckplyr methods.
#> ℹ Turn off with `duckplyr::methods_restore()`.

trans <- pull_tbl(dm_local, "trans", keyed = TRUE) |> as_duck_tbl()
districts <- pull_tbl(dm_local, "districts", keyed = TRUE) |> as_duck_tbl()
clients <- pull_tbl(dm_local, "clients", keyed = TRUE) |> as_duck_tbl()
orders <- pull_tbl(dm_local, "orders", keyed = TRUE) |> as_duck_tbl()
cards <- pull_tbl(dm_local, "cards", keyed = TRUE) |> as_duck_tbl()
disps <- pull_tbl(dm_local, "disps", keyed = TRUE) |> as_duck_tbl()
tkeys <- pull_tbl(dm_local, "tkeys", keyed = TRUE) |> as_duck_tbl()
accounts <- pull_tbl(dm_local, "accounts", keyed = TRUE) |> as_duck_tbl()
loans <- pull_tbl(dm_local, "loans", keyed = TRUE) |> as_duck_tbl()

dm_duckplyr <-
  dm::dm(
    trans,
    districts,
    clients,
    orders,
    cards,
    disps,
    tkeys,
    accounts,
    loans,
  ) %>%
  dm::dm_add_pk(trans, id) %>%
  dm::dm_add_pk(districts, id) %>%
  dm::dm_add_pk(clients, id) %>%
  dm::dm_add_pk(orders, id) %>%
  dm::dm_add_pk(cards, id) %>%
  dm::dm_add_pk(disps, id) %>%
  dm::dm_add_pk(accounts, id) %>%
  dm::dm_add_pk(loans, id) %>%
  dm::dm_add_fk(accounts, district_id, districts) %>%
  dm::dm_add_fk(disps, client_id, clients) %>%
  dm::dm_add_fk(cards, disp_id, disps) %>%
  dm::dm_add_fk(loans, account_id, accounts) %>%
  dm::dm_add_fk(orders, account_id, accounts) %>%
  dm::dm_add_fk(disps, account_id, accounts) %>%
  dm::dm_add_fk(trans, account_id, accounts) %>%
  dm::dm_set_colors(`#006400FF` = loans)

dm_duckplyr |>
  dm_filter(districts = (id == 1L)) |>
  system.time()
#>    user  system elapsed 
#>   2.103   0.055   2.193

dm_duckplyr |>
  dm_zoom_to(districts) |>
  filter(id == 1L) |>
  dm_update_zoomed() |>
  dm_zoom_to(accounts) |>
  semi_join(districts) |>
  dm_update_zoomed() |>
  dm_zoom_to(loans) |>
  semi_join(accounts) |>
  dm_update_zoomed() |>
  dm_zoom_to(orders) |>
  semi_join(accounts) |>
  dm_update_zoomed() |>
  dm_zoom_to(trans) |>
  semi_join(accounts) |>
  dm_update_zoomed() |>
  dm_zoom_to(disps) |>
  semi_join(accounts) |>
  dm_update_zoomed() |>
  dm_zoom_to(cards) |>
  semi_join(disps) |>
  dm_update_zoomed() |>
  dm_zoom_to(clients) |>
  semi_join(disps) |>
  dm_update_zoomed() |>
  system.time()
#>    user  system elapsed 
#>   0.262   0.000   0.263

dm_duckplyr |>
  dm_zoom_to(districts) |>
  filter(id == 1L) |>
  compute() |>
  dm_update_zoomed() |>
  dm_zoom_to(accounts) |>
  semi_join(districts) |>
  compute() |>
  dm_update_zoomed() |>
  dm_zoom_to(loans) |>
  semi_join(accounts) |>
  compute() |>
  dm_update_zoomed() |>
  dm_zoom_to(orders) |>
  semi_join(accounts) |>
  compute() |>
  dm_update_zoomed() |>
  dm_zoom_to(trans) |>
  semi_join(accounts) |>
  compute() |>
  dm_update_zoomed() |>
  dm_zoom_to(disps) |>
  semi_join(accounts) |>
  compute() |>
  dm_update_zoomed() |>
  dm_zoom_to(cards) |>
  semi_join(disps) |>
  compute() |>
  dm_update_zoomed() |>
  dm_zoom_to(clients) |>
  semi_join(disps) |>
  compute() |>
  dm_update_zoomed() |>
  system.time()
#>    user  system elapsed 
#>   0.276   0.008   0.284

trans <- pull_tbl(dm_local_ptype, "trans", keyed = TRUE) |> as_duck_tbl()
districts <- pull_tbl(dm_local_ptype, "districts", keyed = TRUE) |> as_duck_tbl()
clients <- pull_tbl(dm_local_ptype, "clients", keyed = TRUE) |> as_duck_tbl()
orders <- pull_tbl(dm_local_ptype, "orders", keyed = TRUE) |> as_duck_tbl()
cards <- pull_tbl(dm_local_ptype, "cards", keyed = TRUE) |> as_duck_tbl()
disps <- pull_tbl(dm_local_ptype, "disps", keyed = TRUE) |> as_duck_tbl()
tkeys <- pull_tbl(dm_local_ptype, "tkeys", keyed = TRUE) |> as_duck_tbl()
accounts <- pull_tbl(dm_local_ptype, "accounts", keyed = TRUE) |> as_duck_tbl()
loans <- pull_tbl(dm_local_ptype, "loans", keyed = TRUE) |> as_duck_tbl()

dm_duckplyr_ptype <-
  dm::dm(
    trans,
    districts,
    clients,
    orders,
    cards,
    disps,
    tkeys,
    accounts,
    loans,
  ) %>%
  dm::dm_add_pk(trans, id) %>%
  dm::dm_add_pk(districts, id) %>%
  dm::dm_add_pk(clients, id) %>%
  dm::dm_add_pk(orders, id) %>%
  dm::dm_add_pk(cards, id) %>%
  dm::dm_add_pk(disps, id) %>%
  dm::dm_add_pk(accounts, id) %>%
  dm::dm_add_pk(loans, id) %>%
  dm::dm_add_fk(accounts, district_id, districts) %>%
  dm::dm_add_fk(disps, client_id, clients) %>%
  dm::dm_add_fk(cards, disp_id, disps) %>%
  dm::dm_add_fk(loans, account_id, accounts) %>%
  dm::dm_add_fk(orders, account_id, accounts) %>%
  dm::dm_add_fk(disps, account_id, accounts) %>%
  dm::dm_add_fk(trans, account_id, accounts) %>%
  dm::dm_set_colors(`#006400FF` = loans)

dm_duckplyr_ptype |>
  dm_filter(districts = (id == 1L)) |>
  system.time()
#>    user  system elapsed 
#>   1.806   0.042   1.867

Created on 2025-01-04 with reprex v2.1.1

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions