Skip to content

Commit

Permalink
build: make NumPy, pandas, and Arrow deps optional (ibis-project#152)
Browse files Browse the repository at this point in the history
Co-authored-by: Deepyaman Datta <[email protected]>
  • Loading branch information
jitingxu1 and deepyaman authored Sep 13, 2024
1 parent a2d5829 commit aa71647
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 8 deletions.
3 changes: 2 additions & 1 deletion ibis_ml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,12 @@ def _auto_patch_skorch() -> None:
return

import ibis.expr.types as ir
import numpy as np

old_fit = skorch.net.NeuralNet.fit

def fit(self, X, y=None, **fit_params):
import numpy as np

if isinstance(y, ir.Column):
y = np.asarray(y)

Expand Down
32 changes: 26 additions & 6 deletions ibis_ml/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@
import ibis
import ibis.expr.operations as ops
import ibis.expr.types as ir
import numpy as np
import pandas as pd
import pyarrow as pa
from ibis.common.dispatch import lazy_singledispatch

if TYPE_CHECKING:
import dask.dataframe as dd
import numpy as np
import pandas as pd
import polars as pl
import pyarrow as pa
import xgboost as xgb
from sklearn.utils._estimator_html_repr import _VisualBlock

Expand All @@ -45,6 +45,9 @@ def _ibis_table_to_numpy(table: ir.Table) -> np.ndarray:

def _y_as_dataframe(y: Any) -> pd.DataFrame:
"""Coerce `y` to a pandas dataframe"""
import numpy as np
import pandas as pd

if isinstance(y, pd.DataFrame):
return y
elif isinstance(y, pd.Series):
Expand Down Expand Up @@ -144,8 +147,11 @@ def _(X, y=None, maintain_order=False):
return table, tuple(y.columns), None


@normalize_table.register(pd.DataFrame)
@normalize_table.register("pd.DataFrame")
def _(X, y=None, maintain_order=False):
import numpy as np
import pandas as pd

if y is not None:
y = _y_as_dataframe(y)
table = pd.concat([X, y], axis=1)
Expand All @@ -162,8 +168,11 @@ def _(X, y=None, maintain_order=False):
return ibis.memtable(table), targets, index


@normalize_table.register(np.ndarray)
@normalize_table.register("np.ndarray")
def _(X, y=None, maintain_order=False):
import numpy as np
import pandas as pd

X = pd.DataFrame(X, columns=[f"x{i}" for i in range(X.shape[-1])])
if y is not None:
y = _y_as_dataframe(y)
Expand All @@ -181,8 +190,11 @@ def _(X, y=None, maintain_order=False):
return ibis.memtable(table), targets, index


@normalize_table.register(pa.Table)
@normalize_table.register("pa.Table")
def _(X, y=None, maintain_order=False):
import numpy as np
import pyarrow as pa

if y is not None:
if isinstance(y, (pa.ChunkedArray, pa.Array)):
y = pa.Table.from_pydict({"y": y})
Expand Down Expand Up @@ -246,6 +258,8 @@ def get_categories(self, column: str) -> pa.Array | None:
return self.categories.get(column)

def set_categories(self, column: str, values: pa.Array | list[Any]) -> None:
import pyarrow as pa

self.categories[column] = pa.array(values)

def drop_categories(self, column: str) -> None:
Expand All @@ -255,6 +269,8 @@ def drop_categories(self, column: str) -> None:
def _categorize_wrap_reader(
reader: pa.RecordBatchReader, categories: dict[str, pa.Array]
) -> Iterable[pa.RecordBatch]:
import pyarrow as pa

for batch in reader:
out = {}
for name, col in zip(batch.schema.names, batch.columns):
Expand Down Expand Up @@ -620,6 +636,8 @@ def _categorize_pandas(self, df: pd.DataFrame) -> pd.DataFrame:
return df

def _categorize_pyarrow(self, table: pa.Table) -> pa.Table:
import pyarrow as pa

if not self.metadata_.categories:
return table

Expand All @@ -645,6 +663,8 @@ def _categorize_dask_dataframe(self, ddf: dd.DataFrame) -> dd.DataFrame:
def _categorize_pyarrow_batches(
self, reader: pa.RecordBatchReader
) -> pa.RecordBatchReader:
import pyarrow as pa

if not self.metadata_.categories:
return reader

Expand Down
5 changes: 4 additions & 1 deletion ibis_ml/steps/_discretize.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import ibis
import ibis.expr.types as ir
import numpy as np

from ibis_ml.core import Metadata, Step
from ibis_ml.select import SelectionType, selector
Expand Down Expand Up @@ -94,6 +93,8 @@ def fit_table(self, table: ir.Table, metadata: Metadata) -> None:
def _fit_uniform_strategy(
self, table: ir.Table, columns: list[str]
) -> dict[str, list[float]]:
import numpy as np

aggs = []
for col_name in columns:
col = table[col_name]
Expand All @@ -117,6 +118,8 @@ def _fit_uniform_strategy(
def _fit_quantile_strategy(
self, table: ir.Table, columns: list[str]
) -> dict[str, list[float]]:
import numpy as np

aggs = []
percentiles = np.linspace(0, 1, self.n_bins + 1)
for col_name in columns:
Expand Down
15 changes: 15 additions & 0 deletions tests/test_optional_dependencies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import sys
from importlib import import_module, reload
from unittest.mock import patch

import pytest


# https://stackoverflow.com/a/65163627
@pytest.mark.parametrize("optional_dependency", ["numpy", "pandas", "pyarrow"])
def test_without_dependency(optional_dependency):
with patch.dict(sys.modules, {optional_dependency: None}):
if "ibis_ml" in sys.modules:
reload(sys.modules["ibis_ml"])
else:
import_module("ibis_ml")

0 comments on commit aa71647

Please sign in to comment.