diff --git a/ibis_ml/steps/_impute.py b/ibis_ml/steps/_impute.py index 1436cab..7d0bfbf 100644 --- a/ibis_ml/steps/_impute.py +++ b/ibis_ml/steps/_impute.py @@ -1,5 +1,6 @@ from __future__ import annotations +import math from typing import TYPE_CHECKING, Any import ibis.expr.types as ir @@ -14,6 +15,8 @@ def _fillna(col, val): + if val is None or col.type().is_floating() and math.isnan(val): + raise ValueError(f"Cannot fill column {col.get_name()!r} with `None` or `NaN`") if col.type().is_floating(): return (col.isnull() | col.isnan()).ifelse(val, col) # noqa: PD003 else: diff --git a/tests/test_impute.py b/tests/test_impute.py new file mode 100644 index 0000000..8ec7e62 --- /dev/null +++ b/tests/test_impute.py @@ -0,0 +1,56 @@ +import ibis +import numpy as np +import pandas as pd +import pandas.testing as tm +import pytest + +import ibis_ml as ml + + +@pytest.mark.parametrize( + ("mode", "col_name", "expected"), + [ + ("mean", "floating_col", 1.0), + ("median", "floating_col", 0.0), + ("mode", "floating_col", 0.0), + ("mean", "int_col", 1), + ("median", "int_col", 0), + ("mode", "int_col", 0), + ("mode", "string_col", "a"), + ] +) +def test_impute(mode, col_name, expected): + mode_class = getattr(ml, f"Impute{mode.capitalize()}") + step = mode_class(col_name) + train_table = ibis.memtable( + { + "floating_col": [0.0, 0.0, 3.0, None, np.nan], + "int_col": [0, 0, 3, None, None], + "string_col": ["a", "a", "c", None, None], + "null_col": [None]*5, + } + ) + test_table = ibis.memtable( + { + col_name: [None], + } + ) + step.fit_table(train_table, ml.core.Metadata()) + result = step.transform_table(test_table) + expected = pd.DataFrame( + { + col_name: [expected], + } + ) + tm.assert_frame_equal(result.execute(), expected, check_dtype=False) + + # null col will raise a ValueError + test_table = ibis.memtable( + { + "null_col": [None], + } + ) + with pytest.raises(ValueError): + step = mode_class("null_col") + step.fit_table(train_table, ml.core.Metadata()) + step.transform_table(test_table) \ No newline at end of file