Skip to content

Commit 05cb650

Browse files
authored
feat: add DuckDB join_asof (#1860)
1 parent aa8a501 commit 05cb650

File tree

6 files changed

+202
-119
lines changed

6 files changed

+202
-119
lines changed

narwhals/_arrow/dataframe.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -383,11 +383,10 @@ def join_asof(
383383
*,
384384
left_on: str | None,
385385
right_on: str | None,
386-
on: str | None,
387-
by_left: str | list[str] | None,
388-
by_right: str | list[str] | None,
389-
by: str | list[str] | None,
386+
by_left: list[str] | None,
387+
by_right: list[str] | None,
390388
strategy: Literal["backward", "forward", "nearest"],
389+
suffix: str,
391390
) -> Self:
392391
msg = "join_asof is not yet supported on PyArrow tables" # pragma: no cover
393392
raise NotImplementedError(msg)

narwhals/_dask/dataframe.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -334,13 +334,12 @@ def join_asof(
334334
self: Self,
335335
other: Self,
336336
*,
337-
left_on: str | None = None,
338-
right_on: str | None = None,
339-
on: str | None = None,
340-
by_left: str | list[str] | None = None,
341-
by_right: str | list[str] | None = None,
342-
by: str | list[str] | None = None,
343-
strategy: Literal["backward", "forward", "nearest"] = "backward",
337+
left_on: str | None,
338+
right_on: str | None,
339+
by_left: list[str] | None,
340+
by_right: list[str] | None,
341+
strategy: Literal["backward", "forward", "nearest"],
342+
suffix: str,
344343
) -> Self:
345344
plx = self.__native_namespace__()
346345
return self._from_native_frame(
@@ -349,12 +348,10 @@ def join_asof(
349348
other._native_frame,
350349
left_on=left_on,
351350
right_on=right_on,
352-
on=on,
353351
left_by=by_left,
354352
right_by=by_right,
355-
by=by,
356353
direction=strategy,
357-
suffixes=("", "_right"),
354+
suffixes=("", suffix),
358355
),
359356
)
360357

narwhals/_duckdb/dataframe.py

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from typing import Literal
77
from typing import Sequence
88

9+
import duckdb
910
from duckdb import ColumnExpression
1011

1112
from narwhals._duckdb.utils import native_to_narwhals_dtype
@@ -22,7 +23,6 @@
2223
if TYPE_CHECKING:
2324
from types import ModuleType
2425

25-
import duckdb
2626
import pandas as pd
2727
import pyarrow as pa
2828
from typing_extensions import Self
@@ -260,6 +260,51 @@ def join(
260260
res = rel.select(", ".join(select)).set_alias(original_alias)
261261
return self._from_native_frame(res)
262262

263+
def join_asof(
264+
self: Self,
265+
other: Self,
266+
*,
267+
left_on: str | None,
268+
right_on: str | None,
269+
by_left: list[str] | None,
270+
by_right: list[str] | None,
271+
strategy: Literal["backward", "forward", "nearest"],
272+
suffix: str,
273+
) -> Self:
274+
lhs = self._native_frame
275+
rhs = other._native_frame
276+
conditions = []
277+
if by_left is not None and by_right is not None:
278+
conditions += [
279+
f'lhs."{left}" = rhs."{right}"' for left, right in zip(by_left, by_right)
280+
]
281+
else:
282+
by_left = by_right = []
283+
if strategy == "backward":
284+
conditions += [f'lhs."{left_on}" >= rhs."{right_on}"']
285+
elif strategy == "forward":
286+
conditions += [f'lhs."{left_on}" <= rhs."{right_on}"']
287+
else:
288+
msg = "Only 'backward' and 'forward' strategies are currently supported for DuckDB"
289+
raise NotImplementedError(msg)
290+
condition = " and ".join(conditions)
291+
select = ["lhs.*"]
292+
for col in rhs.columns:
293+
if col in lhs.columns and (
294+
right_on is None or col not in [right_on, *by_right]
295+
):
296+
select.append(f'rhs."{col}" as "{col}{suffix}"')
297+
elif right_on is None or col not in [right_on, *by_right]:
298+
select.append(col)
299+
query = f"""
300+
SELECT {",".join(select)}
301+
FROM lhs
302+
ASOF LEFT JOIN rhs
303+
ON {condition}
304+
""" # noqa: S608
305+
res = duckdb.sql(query)
306+
return self._from_native_frame(res)
307+
263308
def collect_schema(self: Self) -> dict[str, DType]:
264309
return {
265310
column_name: native_to_narwhals_dtype(str(duckdb_dtype), self._version)

narwhals/_pandas_like/dataframe.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -660,13 +660,12 @@ def join_asof(
660660
self: Self,
661661
other: Self,
662662
*,
663-
left_on: str | None = None,
664-
right_on: str | None = None,
665-
on: str | None = None,
666-
by_left: str | list[str] | None = None,
667-
by_right: str | list[str] | None = None,
668-
by: str | list[str] | None = None,
669-
strategy: Literal["backward", "forward", "nearest"] = "backward",
663+
left_on: str | None,
664+
right_on: str | None,
665+
by_left: list[str] | None,
666+
by_right: list[str] | None,
667+
strategy: Literal["backward", "forward", "nearest"],
668+
suffix: str,
670669
) -> Self:
671670
plx = self.__native_namespace__()
672671
return self._from_native_frame(
@@ -675,12 +674,10 @@ def join_asof(
675674
other._native_frame,
676675
left_on=left_on,
677676
right_on=right_on,
678-
on=on,
679677
left_by=by_left,
680678
right_by=by_right,
681-
by=by,
682679
direction=strategy,
683-
suffixes=("", "_right"),
680+
suffixes=("", suffix),
684681
),
685682
)
686683

narwhals/dataframe.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,7 @@ def join_asof(
277277
by_right: str | list[str] | None = None,
278278
by: str | list[str] | None = None,
279279
strategy: Literal["backward", "forward", "nearest"] = "backward",
280+
suffix: str = "_right",
280281
) -> Self:
281282
_supported_strategies = ("backward", "forward", "nearest")
282283

@@ -302,25 +303,22 @@ def join_asof(
302303
msg = "If `by` is specified, `by_left` and `by_right` should be None."
303304
raise ValueError(msg)
304305
if on is not None:
305-
return self._from_compliant_dataframe(
306-
self._compliant_frame.join_asof(
307-
self._extract_compliant(other),
308-
on=on,
309-
by_left=by_left,
310-
by_right=by_right,
311-
by=by,
312-
strategy=strategy,
313-
)
314-
)
306+
left_on = right_on = on
307+
if by is not None:
308+
by_left = by_right = by
309+
if isinstance(by_left, str):
310+
by_left = [by_left]
311+
if isinstance(by_right, str):
312+
by_right = [by_right]
315313
return self._from_compliant_dataframe(
316314
self._compliant_frame.join_asof(
317315
self._extract_compliant(other),
318316
left_on=left_on,
319317
right_on=right_on,
320318
by_left=by_left,
321319
by_right=by_right,
322-
by=by,
323320
strategy=strategy,
321+
suffix=suffix,
324322
)
325323
)
326324

@@ -2748,6 +2746,7 @@ def join_asof(
27482746
by_right: str | list[str] | None = None,
27492747
by: str | list[str] | None = None,
27502748
strategy: Literal["backward", "forward", "nearest"] = "backward",
2749+
suffix: str = "_right",
27512750
) -> Self:
27522751
"""Perform an asof join.
27532752
@@ -2764,6 +2763,7 @@ def join_asof(
27642763
by_right: join on these columns before doing asof join.
27652764
by: join on these columns before doing asof join.
27662765
strategy: Join strategy. The default is "backward".
2766+
suffix: Suffix to append to columns with a duplicate name.
27672767
27682768
* *backward*: selects the last row in the right DataFrame whose "on" key is less than or equal to the left's key.
27692769
* *forward*: selects the first row in the right DataFrame whose "on" key is greater than or equal to the left's key.
@@ -2924,6 +2924,7 @@ def join_asof(
29242924
by_right=by_right,
29252925
by=by,
29262926
strategy=strategy,
2927+
suffix=suffix,
29272928
)
29282929

29292930
# --- descriptive ---
@@ -5030,6 +5031,7 @@ def join_asof(
50305031
by_right: str | list[str] | None = None,
50315032
by: str | list[str] | None = None,
50325033
strategy: Literal["backward", "forward", "nearest"] = "backward",
5034+
suffix: str = "_right",
50335035
) -> Self:
50345036
"""Perform an asof join.
50355037
@@ -5058,6 +5060,8 @@ def join_asof(
50585060
* *forward*: selects the first row in the right DataFrame whose "on" key is greater than or equal to the left's key.
50595061
* *nearest*: search selects the last row in the right DataFrame whose value is nearest to the left's key.
50605062
5063+
suffix: Suffix to append to columns with a duplicate name.
5064+
50615065
Returns:
50625066
A new joined LazyFrame.
50635067
@@ -5224,6 +5228,7 @@ def join_asof(
52245228
by_right=by_right,
52255229
by=by,
52265230
strategy=strategy,
5231+
suffix=suffix,
52275232
)
52285233

52295234
def clone(self: Self) -> Self:

0 commit comments

Comments
 (0)