Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: datetime selector #1822

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions docs/api-reference/selectors.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ set operations are supported:
- boolean
- by_dtype
- categorical
- datetime
- numeric
- string
show_root_heading: false
Expand Down
2 changes: 2 additions & 0 deletions docs/api-reference/typing.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ Narwhals comes fully statically typed. In addition to `nw.DataFrame`, `nw.Expr`,
- IntoFrameT
- IntoSeries
- IntoSeriesT
- SizeUnit
- TimeUnit
show_source: false
show_bases: false

Expand Down
4 changes: 2 additions & 2 deletions narwhals/_arrow/expr_dt.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from typing import Literal

from narwhals._expression_parsing import reuse_series_namespace_implementation

if TYPE_CHECKING:
from typing_extensions import Self

from narwhals._arrow.expr import ArrowExpr
from narwhals.typing import TimeUnit


class ArrowExprDateTimeNamespace:
Expand All @@ -30,7 +30,7 @@ def convert_time_zone(self: Self, time_zone: str) -> ArrowExpr:
self._compliant_expr, "dt", "convert_time_zone", time_zone=time_zone
)

def timestamp(self: Self, time_unit: Literal["ns", "us", "ms"] = "us") -> ArrowExpr:
def timestamp(self: Self, time_unit: TimeUnit) -> ArrowExpr:
return reuse_series_namespace_implementation(
self._compliant_expr, "dt", "timestamp", time_unit=time_unit
)
Expand Down
19 changes: 18 additions & 1 deletion narwhals/_arrow/selectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,16 @@
from narwhals.utils import import_dtypes_module

if TYPE_CHECKING:
from collections.abc import Collection
from collections.abc import Container
from datetime import timezone

from typing_extensions import Self

from narwhals._arrow.dataframe import ArrowDataFrame
from narwhals._arrow.series import ArrowSeries
from narwhals.dtypes import DType
from narwhals.typing import TimeUnit
from narwhals.utils import Version


Expand All @@ -26,7 +31,7 @@ def __init__(
self._implementation = Implementation.PYARROW
self._version = version

def by_dtype(self: Self, dtypes: list[DType | type[DType]]) -> ArrowSelector:
def by_dtype(self: Self, dtypes: Container[DType | type[DType]]) -> ArrowSelector:
def func(df: ArrowDataFrame) -> list[ArrowSeries]:
return [df[col] for col in df.columns if df.schema[col] in dtypes]

Expand Down Expand Up @@ -85,6 +90,18 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
kwargs={},
)

def datetime(
self: Self,
time_unit: TimeUnit | Collection[TimeUnit] | None,
time_zone: str | timezone | Collection[str | timezone | None] | None,
) -> ArrowSelector:
from narwhals.utils import _parse_datetime_selector_to_datetimes

datetime_dtypes = _parse_datetime_selector_to_datetimes(
time_unit=time_unit, time_zone=time_zone, version=self._version
)
return self.by_dtype(datetime_dtypes)


class ArrowSelector(ArrowExpr):
def __repr__(self: Self) -> str: # pragma: no cover
Expand Down
4 changes: 2 additions & 2 deletions narwhals/_arrow/series_dt.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from typing import Literal

import pyarrow as pa
import pyarrow.compute as pc
Expand All @@ -13,6 +12,7 @@
from typing_extensions import Self

from narwhals._arrow.series import ArrowSeries
from narwhals.typing import TimeUnit


class ArrowSeriesDateTimeNamespace:
Expand Down Expand Up @@ -49,7 +49,7 @@ def convert_time_zone(self: Self, time_zone: str) -> ArrowSeries:

return self._compliant_series._from_native_series(result)

def timestamp(self: Self, time_unit: Literal["ns", "us", "ms"] = "us") -> ArrowSeries:
def timestamp(self: Self, time_unit: TimeUnit) -> ArrowSeries:
s = self._compliant_series._native_series
dtype = self._compliant_series.dtype
dtypes = import_dtypes_module(self._compliant_series._version)
Expand Down
6 changes: 3 additions & 3 deletions narwhals/_dask/expr_dt.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from typing import Literal

from narwhals._pandas_like.utils import calculate_timestamp_date
from narwhals._pandas_like.utils import calculate_timestamp_datetime
Expand All @@ -16,6 +15,7 @@
import dask_expr as dx

from narwhals._dask.expr import DaskExpr
from narwhals.typing import TimeUnit


class DaskExprDateTimeNamespace:
Expand Down Expand Up @@ -143,8 +143,8 @@ def func(s: dx.Series, time_zone: str) -> dx.Series:
returns_scalar=self._compliant_expr._returns_scalar,
)

def timestamp(self, time_unit: Literal["ns", "us", "ms"] = "us") -> DaskExpr:
def func(s: dx.Series, time_unit: Literal["ns", "us", "ms"] = "us") -> dx.Series:
def timestamp(self, time_unit: TimeUnit) -> DaskExpr:
def func(s: dx.Series, time_unit: TimeUnit) -> dx.Series:
dtype = native_to_narwhals_dtype(
s, self._compliant_expr._version, Implementation.DASK
)
Expand Down
20 changes: 19 additions & 1 deletion narwhals/_dask/selectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,16 @@
import dask.dataframe.dask_expr as dx
except ModuleNotFoundError:
import dask_expr as dx

from collections.abc import Collection
from collections.abc import Container
from datetime import timezone

from typing_extensions import Self

from narwhals._dask.dataframe import DaskLazyFrame
from narwhals.dtypes import DType
from narwhals.typing import TimeUnit
from narwhals.utils import Version


Expand All @@ -26,7 +32,7 @@ def __init__(
self._backend_version = backend_version
self._version = version

def by_dtype(self: Self, dtypes: list[DType | type[DType]]) -> DaskSelector:
def by_dtype(self: Self, dtypes: Container[DType | type[DType]]) -> DaskSelector:
def func(df: DaskLazyFrame) -> list[Any]:
return [
df._native_frame[col] for col in df.columns if df.schema[col] in dtypes
Expand Down Expand Up @@ -89,6 +95,18 @@ def func(df: DaskLazyFrame) -> list[Any]:
kwargs={},
)

def datetime(
self: Self,
time_unit: TimeUnit | Collection[TimeUnit] | None,
time_zone: str | timezone | Collection[str | timezone | None] | None,
) -> DaskSelector: # pragma: no cover
Copy link
Member Author

@FBruzzesi FBruzzesi Jan 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For dask, the selector works, but the cast fails with:

TypeError: Cannot use .astype to convert from timezone-aware dtype to timezone-naive dtype. Use obj.tz_localize(None) or obj.tz_convert('UTC').tz_localize(None) instead.

from narwhals.utils import _parse_datetime_selector_to_datetimes

datetime_dtypes = _parse_datetime_selector_to_datetimes(
time_unit=time_unit, time_zone=time_zone, version=self._version
)
return self.by_dtype(datetime_dtypes)


class DaskSelector(DaskExpr):
def __repr__(self: Self) -> str: # pragma: no cover
Expand Down
4 changes: 2 additions & 2 deletions narwhals/_pandas_like/expr_dt.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from typing import Literal

from narwhals._expression_parsing import reuse_series_namespace_implementation

if TYPE_CHECKING:
from narwhals._pandas_like.expr import PandasLikeExpr
from narwhals.typing import TimeUnit


class PandasLikeExprDateTimeNamespace:
Expand Down Expand Up @@ -99,7 +99,7 @@ def convert_time_zone(self, time_zone: str) -> PandasLikeExpr:
self._compliant_expr, "dt", "convert_time_zone", time_zone=time_zone
)

def timestamp(self, time_unit: Literal["ns", "us", "ms"] = "us") -> PandasLikeExpr:
def timestamp(self, time_unit: TimeUnit) -> PandasLikeExpr:
return reuse_series_namespace_implementation(
self._compliant_expr, "dt", "timestamp", time_unit=time_unit
)
33 changes: 26 additions & 7 deletions narwhals/_pandas_like/selectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,23 @@
from narwhals.utils import import_dtypes_module

if TYPE_CHECKING:
from collections.abc import Collection
from collections.abc import Container
from datetime import timezone

from typing_extensions import Self

from narwhals._pandas_like.dataframe import PandasLikeDataFrame
from narwhals._pandas_like.series import PandasLikeSeries
from narwhals.dtypes import DType
from narwhals.typing import TimeUnit
from narwhals.utils import Implementation
from narwhals.utils import Version


class PandasSelectorNamespace:
def __init__(
self,
self: Self,
*,
implementation: Implementation,
backend_version: tuple[int, ...],
Expand All @@ -27,7 +34,7 @@ def __init__(
self._backend_version = backend_version
self._version = version

def by_dtype(self, dtypes: list[DType | type[DType]]) -> PandasSelector:
def by_dtype(self: Self, dtypes: Container[DType | type[DType]]) -> PandasSelector:
def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
return [df[col] for col in df.columns if df.schema[col] in dtypes]

Expand All @@ -43,7 +50,7 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
kwargs={"dtypes": dtypes},
)

def numeric(self) -> PandasSelector:
def numeric(self: Self) -> PandasSelector:
dtypes = import_dtypes_module(self._version)
return self.by_dtype(
[
Expand All @@ -60,19 +67,19 @@ def numeric(self) -> PandasSelector:
],
)

def categorical(self) -> PandasSelector:
def categorical(self: Self) -> PandasSelector:
dtypes = import_dtypes_module(self._version)
return self.by_dtype([dtypes.Categorical])

def string(self) -> PandasSelector:
def string(self: Self) -> PandasSelector:
dtypes = import_dtypes_module(self._version)
return self.by_dtype([dtypes.String])

def boolean(self) -> PandasSelector:
def boolean(self: Self) -> PandasSelector:
dtypes = import_dtypes_module(self._version)
return self.by_dtype([dtypes.Boolean])

def all(self) -> PandasSelector:
def all(self: Self) -> PandasSelector:
def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
return [df[col] for col in df.columns]

Expand All @@ -88,6 +95,18 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
kwargs={},
)

def datetime(
self: Self,
time_unit: TimeUnit | Collection[TimeUnit] | None,
time_zone: str | timezone | Collection[str | timezone | None] | None,
) -> PandasSelector:
from narwhals.utils import _parse_datetime_selector_to_datetimes

datetime_dtypes = _parse_datetime_selector_to_datetimes(
time_unit=time_unit, time_zone=time_zone, version=self._version
)
return self.by_dtype(datetime_dtypes)


class PandasSelector(PandasLikeExpr):
def __repr__(self) -> str: # pragma: no cover
Expand Down
4 changes: 2 additions & 2 deletions narwhals/_pandas_like/series_dt.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from typing import TYPE_CHECKING
from typing import Any
from typing import Literal

from narwhals._pandas_like.utils import calculate_timestamp_date
from narwhals._pandas_like.utils import calculate_timestamp_datetime
Expand All @@ -12,6 +11,7 @@

if TYPE_CHECKING:
from narwhals._pandas_like.series import PandasLikeSeries
from narwhals.typing import TimeUnit


class PandasLikeSeriesDateTimeNamespace:
Expand Down Expand Up @@ -206,7 +206,7 @@ def convert_time_zone(self, time_zone: str) -> PandasLikeSeries:
result = self._compliant_series._native_series.dt.tz_convert(time_zone)
return self._compliant_series._from_native_series(result)

def timestamp(self, time_unit: Literal["ns", "us", "ms"] = "us") -> PandasLikeSeries:
def timestamp(self, time_unit: TimeUnit) -> PandasLikeSeries:
s = self._compliant_series._native_series
dtype = self._compliant_series.dtype
is_pyarrow_dtype = "pyarrow" in str(self._compliant_series._native_series.dtype)
Expand Down
6 changes: 3 additions & 3 deletions narwhals/_pandas_like/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from typing import TYPE_CHECKING
from typing import Any
from typing import Iterable
from typing import Literal
from typing import Sequence
from typing import TypeVar

Expand All @@ -23,6 +22,7 @@
from narwhals._pandas_like.expr import PandasLikeExpr
from narwhals._pandas_like.series import PandasLikeSeries
from narwhals.dtypes import DType
from narwhals.typing import TimeUnit
from narwhals.utils import Version

ExprT = TypeVar("ExprT", bound=PandasLikeExpr)
Expand Down Expand Up @@ -449,13 +449,13 @@ def non_object_native_to_narwhals_dtype(
if (match_ := PATTERN_PD_DATETIME.match(dtype)) or (
match_ := PATTERN_PA_DATETIME.match(dtype)
):
dt_time_unit: Literal["us", "ns", "ms", "s"] = match_.group("time_unit") # type: ignore[assignment]
dt_time_unit: TimeUnit = match_.group("time_unit") # type: ignore[assignment]
dt_time_zone: str | None = match_.group("time_zone")
return dtypes.Datetime(dt_time_unit, dt_time_zone)
if (match_ := PATTERN_PD_DURATION.match(dtype)) or (
match_ := PATTERN_PA_DURATION.match(dtype)
):
du_time_unit: Literal["us", "ns", "ms", "s"] = match_.group("time_unit") # type: ignore[assignment]
du_time_unit: TimeUnit = match_.group("time_unit") # type: ignore[assignment]
return dtypes.Duration(du_time_unit)
if dtype == "date32[day][pyarrow]":
return dtypes.Date()
Expand Down
19 changes: 19 additions & 0 deletions narwhals/_polars/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,17 @@
from narwhals.utils import Implementation

if TYPE_CHECKING:
from collections.abc import Collection
from datetime import timezone

from typing_extensions import Self

from narwhals._polars.dataframe import PolarsDataFrame
from narwhals._polars.dataframe import PolarsLazyFrame
from narwhals._polars.expr import PolarsExpr
from narwhals._polars.typing import IntoPolarsExpr
from narwhals.dtypes import DType
from narwhals.typing import TimeUnit
from narwhals.utils import Version


Expand Down Expand Up @@ -285,3 +289,18 @@ def all(self: Self) -> PolarsExpr:
version=self._version,
backend_version=self._backend_version,
)

def datetime(
self: Self,
time_unit: TimeUnit | Collection[TimeUnit] | None,
time_zone: str | timezone | Collection[str | timezone | None] | None,
) -> PolarsExpr:
import polars as pl

from narwhals._polars.expr import PolarsExpr

return PolarsExpr(
pl.selectors.datetime(time_unit=time_unit, time_zone=time_zone), # type: ignore[arg-type]
version=self._version,
backend_version=self._backend_version,
)
Loading
Loading