diff --git a/README.md b/README.md index 3d8a620..8282161 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ IbisML is a library for building scalable ML pipelines using Ibis: - Preprocess your data at scale on any [Ibis](https://ibis-project.org/)-supported backend. -- Compose [`Recipe`](/reference/core.html#ibisml.Recipe)s with other scikit-learn +- Compose [`Recipe`](/reference/core.html#ibis_ml.Recipe)s with other scikit-learn estimators using [`Pipeline`](https://scikit-learn.org/stable/modules/compose.html#pipeline-chaining-estimators)s. - Seamlessly integrate with [scikit-learn](https://scikit-learn.org/stable/), @@ -20,7 +20,7 @@ IbisML is a library for building scalable ML pipelines using Ibis: ```python import ibis -import ibisml as ml +import ibis_ml as ml # A recipe for a feature engineering pipeline that: # - imputes missing values in numeric columns with their mean diff --git a/docs/_quarto.yml b/docs/_quarto.yml index d3332aa..e0b2cff 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -88,7 +88,7 @@ format: css: styles.css quartodoc: - package: ibisml + package: ibis_ml title: Reference sidebar: reference/_sidebar.yml dynamic: true @@ -98,7 +98,7 @@ quartodoc: signature_name: short sections: - title: Core - package: ibisml + package: ibis_ml contents: - kind: page path: core @@ -136,7 +136,7 @@ quartodoc: - title: Steps desc: Define steps in a recipe - package: ibisml + package: ibis_ml contents: - kind: page path: steps-imputation diff --git a/docs/index.qmd b/docs/index.qmd index d6511ce..6da7806 100644 --- a/docs/index.qmd +++ b/docs/index.qmd @@ -11,7 +11,7 @@ hide-description: true - Preprocess your data at scale on any [Ibis](https://ibis-project.org/)-supported backend. -- Compose [`Recipe`](/reference/core.html#ibisml.Recipe)s with other scikit-learn +- Compose [`Recipe`](/reference/core.html#ibis_ml.Recipe)s with other scikit-learn estimators using [`Pipeline`](https://scikit-learn.org/stable/modules/compose.html#pipeline-chaining-estimators)s. - Seamlessly integrate with [scikit-learn](https://scikit-learn.org/stable/), @@ -23,7 +23,7 @@ hide-description: true ### Install IbisML ```bash -pip install ibisml +pip install ibis-ml ``` ### Create your first recipe @@ -34,7 +34,7 @@ mean of each numeric column and then normalize numeric data to have a standard d of one and a mean of zero. ```{python} -import ibisml as ml +import ibis_ml as ml imputer = ml.ImputeMean(ml.numeric()) scaler = ml.ScaleStandard(ml.numeric()) diff --git a/docs/tutorial/index.ipynb b/docs/tutorial/index.ipynb index be6c9e0..c1a3330 100644 --- a/docs/tutorial/index.ipynb +++ b/docs/tutorial/index.ipynb @@ -532,7 +532,7 @@ "metadata": {}, "outputs": [], "source": [ - "import ibisml as ml\n", + "import ibis_ml as ml\n", "\n", "flights_rec = ml.Recipe(\n", " ml.ExpandDate(\"date\", components=[\"dow\", \"month\"]),\n", @@ -1095,7 +1095,7 @@ " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", "}\n", - "
Pipeline(steps=[('flights_rec', <ibisml.core.Recipe object at 0x149360730>),\n", + "Pipeline(steps=[('flights_rec', <ibis_ml.core.Recipe object at 0x149360730>),\n", " ('mod',\n", " <class 'skorch.classifier.NeuralNetClassifier'>[initialized](\n", " module_=MyModule(\n", @@ -1106,7 +1106,7 @@ " (output): Linear(in_features=10, out_features=2, bias=True)\n", " (softmax): Softmax(dim=-1)\n", " ),\n", - "))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.Pipeline(steps=[('flights_rec', <ibisml.core.Recipe object at 0x149360730>),\n", + "))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org." ], "text/plain": [ - "Pipeline(steps=[('flights_rec',Pipeline(steps=[('flights_rec', <ibis_ml.core.Recipe object at 0x149360730>),\n", " ('mod',\n", " <class 'skorch.classifier.NeuralNetClassifier'>[initialized](\n", " module_=MyModule(\n", @@ -1117,7 +1117,7 @@ " (output): Linear(in_features=10, out_features=2, bias=True)\n", " (softmax): Softmax(dim=-1)\n", " ),\n", - "))])<ibisml.core.Recipe object at 0x149360730><class 'skorch.classifier.NeuralNetClassifier'>[initialized](\n", + "))])<ibis_ml.core.Recipe object at 0x149360730><class 'skorch.classifier.NeuralNetClassifier'>[initialized](\n", " module_=MyModule(\n", " (dense0): Linear(in_features=10, out_features=10, bias=True)\n", " (nonlin): ReLU()\n", @@ -1129,7 +1129,7 @@ ")),\n", + "Pipeline(steps=[('flights_rec', ),\n", " ('mod',\n", " [initialized](\n", " module_=MyModule(\n", diff --git a/ibisml/__init__.py b/ibis_ml/__init__.py similarity index 89% rename from ibisml/__init__.py rename to ibis_ml/__init__.py index 815db59..7ab5c4c 100644 --- a/ibisml/__init__.py +++ b/ibis_ml/__init__.py @@ -1,7 +1,7 @@ """IbisML is a library for building scalable ML pipelines using Ibis.""" -from ibisml.core import Recipe, Step -from ibisml.select import ( +from ibis_ml.core import Recipe, Step +from ibis_ml.select import ( categorical, cols, contains, @@ -22,7 +22,7 @@ timestamp, where, ) -from ibisml.steps import * +from ibis_ml.steps import * from ._version import __version__ diff --git a/ibisml/_version.py b/ibis_ml/_version.py similarity index 54% rename from ibisml/_version.py rename to ibis_ml/_version.py index 32b3600..60d1435 100644 --- a/ibisml/_version.py +++ b/ibis_ml/_version.py @@ -1,3 +1,3 @@ from importlib.metadata import version -__version__ = version("ibisml") +__version__ = version("ibis_ml") diff --git a/ibisml/core.py b/ibis_ml/core.py similarity index 99% rename from ibisml/core.py rename to ibis_ml/core.py index 100233d..bbf090a 100644 --- a/ibisml/core.py +++ b/ibis_ml/core.py @@ -261,7 +261,7 @@ def _get_categorize_chunk() -> Callable[[str, list[str], Any], pd.DataFrame]: """Wrap the `categorize` function in a closure, so cloudpickle will encode the full function. - This avoids requiring `ibisml` or `ibis` exist on the worker nodes of the + This avoids requiring `ibis_ml` or `ibis` exist on the worker nodes of the dask cluster. """ diff --git a/ibisml/select.py b/ibis_ml/select.py similarity index 99% rename from ibisml/select.py rename to ibis_ml/select.py index 7a8b66e..af1a8b9 100644 --- a/ibisml/select.py +++ b/ibis_ml/select.py @@ -8,7 +8,7 @@ import ibis.expr.types as ir if TYPE_CHECKING: - from ibisml.core import Metadata + from ibis_ml.core import Metadata class Selector: diff --git a/ibis_ml/steps/__init__.py b/ibis_ml/steps/__init__.py new file mode 100644 index 0000000..2e0b1ca --- /dev/null +++ b/ibis_ml/steps/__init__.py @@ -0,0 +1,36 @@ +from ibis_ml.steps.common import Cast, Drop, Mutate, MutateAt +from ibis_ml.steps.discretize import DiscretizeKBins +from ibis_ml.steps.encode import ( + CategoricalEncode, + CountEncode, + OneHotEncode, + TargetEncode, +) +from ibis_ml.steps.feature_engineering import PolynomialFeatures +from ibis_ml.steps.feature_selection import ZeroVariance +from ibis_ml.steps.impute import FillNA, ImputeMean, ImputeMedian, ImputeMode +from ibis_ml.steps.standardize import ScaleMinMax, ScaleStandard +from ibis_ml.steps.temporal import ExpandDate, ExpandDateTime, ExpandTime + +__all__ = ( + "Cast", + "CategoricalEncode", + "CountEncode", + "DiscretizeKBins", + "Drop", + "ExpandDate", + "ExpandDateTime", + "ExpandTime", + "FillNA", + "ImputeMean", + "ImputeMedian", + "ImputeMode", + "Mutate", + "MutateAt", + "OneHotEncode", + "PolynomialFeatures", + "ScaleMinMax", + "ScaleStandard", + "TargetEncode", + "ZeroVariance", +) diff --git a/ibisml/steps/common.py b/ibis_ml/steps/common.py similarity index 96% rename from ibisml/steps/common.py rename to ibis_ml/steps/common.py index 736ff10..d61a830 100644 --- a/ibisml/steps/common.py +++ b/ibis_ml/steps/common.py @@ -6,8 +6,8 @@ import ibis.expr.types as ir from ibis.common.deferred import Deferred -from ibisml.core import Metadata, Step -from ibisml.select import SelectionType, selector +from ibis_ml.core import Metadata, Step +from ibis_ml.select import SelectionType, selector class Drop(Step): @@ -20,7 +20,7 @@ class Drop(Step): Examples -------- - >>> import ibisml as ml + >>> import ibis_ml as ml Drop all non-numeric columns @@ -59,7 +59,7 @@ class Cast(Step): Examples -------- - >>> import ibisml as ml + >>> import ibis_ml as ml Cast all numeric columns to float64 @@ -107,7 +107,7 @@ class MutateAt(Step): Examples -------- - >>> import ibisml as ml + >>> import ibis_ml as ml >>> from ibis import _ Replace all numeric columns with their absolute values. @@ -170,7 +170,7 @@ class Mutate(Step): Examples -------- - >>> import ibisml as ml + >>> import ibis_ml as ml >>> from ibis import _ Define a new column ``c`` as ``a**2 + b**2`` diff --git a/ibisml/steps/discretize.py b/ibis_ml/steps/discretize.py similarity index 96% rename from ibisml/steps/discretize.py rename to ibis_ml/steps/discretize.py index b0a806f..71147f9 100644 --- a/ibisml/steps/discretize.py +++ b/ibis_ml/steps/discretize.py @@ -6,8 +6,8 @@ import ibis.expr.types as ir import numpy as np -from ibisml.core import Metadata, Step -from ibisml.select import SelectionType, selector +from ibis_ml.core import Metadata, Step +from ibis_ml.select import SelectionType, selector class DiscretizeKBins(Step): @@ -34,8 +34,8 @@ class DiscretizeKBins(Step): Examples -------- >>> import ibis - >>> import ibisml as ml - >>> from ibisml.core import Metadata + >>> import ibis_ml as ml + >>> from ibis_ml.core import Metadata >>> ibis.options.interactive = True Load penguins dataset diff --git a/ibisml/steps/encode.py b/ibis_ml/steps/encode.py similarity index 97% rename from ibisml/steps/encode.py rename to ibis_ml/steps/encode.py index 4067d42..ce623ec 100644 --- a/ibisml/steps/encode.py +++ b/ibis_ml/steps/encode.py @@ -7,9 +7,9 @@ import ibis import ibis.expr.types as ir -from ibisml.core import Metadata, Step -from ibisml.select import SelectionType, selector -from ibisml.steps.impute import FillNA +from ibis_ml.core import Metadata, Step +from ibis_ml.select import SelectionType, selector +from ibis_ml.steps.impute import FillNA def _compute_categories( @@ -94,7 +94,7 @@ class OneHotEncode(Step): Examples -------- - >>> import ibisml as ml + >>> import ibis_ml as ml One-hot encode all string columns. @@ -183,7 +183,7 @@ class CategoricalEncode(Step): Examples -------- - >>> import ibisml as ml + >>> import ibis_ml as ml Categorical encode all string columns. @@ -261,7 +261,7 @@ class CountEncode(Step): Examples -------- - >>> import ibisml as ml + >>> import ibis_ml as ml Count encode all string columns. @@ -309,7 +309,7 @@ class TargetEncode(Step): Examples -------- - >>> import ibisml as ml + >>> import ibis_ml as ml Target encode all string columns. diff --git a/ibisml/steps/feature_engineering.py b/ibis_ml/steps/feature_engineering.py similarity index 95% rename from ibisml/steps/feature_engineering.py rename to ibis_ml/steps/feature_engineering.py index 9aee412..2ffc9c1 100644 --- a/ibisml/steps/feature_engineering.py +++ b/ibis_ml/steps/feature_engineering.py @@ -8,8 +8,8 @@ import ibis.expr.types as ir -from ibisml.core import Metadata, Step -from ibisml.select import SelectionType, selector +from ibis_ml.core import Metadata, Step +from ibis_ml.select import SelectionType, selector class PolynomialFeatures(Step): @@ -25,7 +25,7 @@ class PolynomialFeatures(Step): Examples -------- - >>> import ibisml as ml + >>> import ibis_ml as ml Generate polynomial features for all numeric columns with a degree is 2. diff --git a/ibisml/steps/feature_selection.py b/ibis_ml/steps/feature_selection.py similarity index 95% rename from ibisml/steps/feature_selection.py rename to ibis_ml/steps/feature_selection.py index 3f94ab2..851af04 100644 --- a/ibisml/steps/feature_selection.py +++ b/ibis_ml/steps/feature_selection.py @@ -4,8 +4,8 @@ import ibis.expr.types as ir -from ibisml.core import Metadata, Step -from ibisml.select import SelectionType, selector +from ibis_ml.core import Metadata, Step +from ibis_ml.select import SelectionType, selector class ZeroVariance(Step): @@ -22,7 +22,7 @@ class ZeroVariance(Step): Examples -------- - >>> import ibisml as ml + >>> import ibis_ml as ml To remove columns with zero variance: >>> step = ml.ZeroVariance(ml.everything()) diff --git a/ibisml/steps/impute.py b/ibis_ml/steps/impute.py similarity index 95% rename from ibisml/steps/impute.py rename to ibis_ml/steps/impute.py index a36d6e7..f8cac14 100644 --- a/ibisml/steps/impute.py +++ b/ibis_ml/steps/impute.py @@ -4,8 +4,8 @@ import ibis.expr.types as ir -from ibisml.core import Metadata, Step -from ibisml.select import SelectionType, selector +from ibis_ml.core import Metadata, Step +from ibis_ml.select import SelectionType, selector def _fillna(col, val): @@ -28,7 +28,7 @@ class FillNA(Step): Examples -------- - >>> import ibisml as ml + >>> import ibis_ml as ml Fill all NULL values in numeric columns with 0. @@ -91,7 +91,7 @@ class ImputeMean(_BaseImpute): Examples -------- - >>> import ibisml as ml + >>> import ibis_ml as ml Replace NULL values in all numeric columns with their respective means, computed from the training dataset. @@ -119,7 +119,7 @@ class ImputeMedian(_BaseImpute): Examples -------- - >>> import ibisml as ml + >>> import ibis_ml as ml Replace NULL values in all numeric columns with their respective medians, computed from the training dataset. @@ -147,7 +147,7 @@ class ImputeMode(_BaseImpute): Examples -------- - >>> import ibisml as ml + >>> import ibis_ml as ml Replace NULL values in all numeric columns with their respective modes, computed from the training dataset. diff --git a/ibisml/steps/standardize.py b/ibis_ml/steps/standardize.py similarity index 95% rename from ibisml/steps/standardize.py rename to ibis_ml/steps/standardize.py index 60153a1..a8667d3 100644 --- a/ibisml/steps/standardize.py +++ b/ibis_ml/steps/standardize.py @@ -4,8 +4,8 @@ import ibis.expr.types as ir -from ibisml.core import Metadata, Step -from ibisml.select import SelectionType, selector +from ibis_ml.core import Metadata, Step +from ibis_ml.select import SelectionType, selector class ScaleMinMax(Step): @@ -19,7 +19,7 @@ class ScaleMinMax(Step): Examples -------- - >>> import ibisml as ml + >>> import ibis_ml as ml Normalize all numeric columns. @@ -78,7 +78,7 @@ class ScaleStandard(Step): Examples -------- - >>> import ibisml as ml + >>> import ibis_ml as ml Normalize all numeric columns. diff --git a/ibisml/steps/temporal.py b/ibis_ml/steps/temporal.py similarity index 98% rename from ibisml/steps/temporal.py rename to ibis_ml/steps/temporal.py index b0f98b2..21b055c 100644 --- a/ibisml/steps/temporal.py +++ b/ibis_ml/steps/temporal.py @@ -2,8 +2,8 @@ from typing import TYPE_CHECKING, Any, Iterable, Literal, Sequence -from ibisml.core import Metadata, Step -from ibisml.select import SelectionType, selector +from ibis_ml.core import Metadata, Step +from ibis_ml.select import SelectionType, selector if TYPE_CHECKING: import ibis.expr.types as ir @@ -38,7 +38,7 @@ class ExpandDateTime(Step): Examples -------- - >>> import ibisml as ml + >>> import ibis_ml as ml Expand date and time columns using the default components @@ -169,7 +169,7 @@ class ExpandDate(Step): Examples -------- - >>> import ibisml as ml + >>> import ibis_ml as ml Expand date columns using the default components @@ -273,7 +273,7 @@ class ExpandTime(Step): Examples -------- - >>> import ibisml as ml + >>> import ibis_ml as ml Expand time columns using the default components diff --git a/ibisml/steps/__init__.py b/ibisml/steps/__init__.py deleted file mode 100644 index 6f3dea5..0000000 --- a/ibisml/steps/__init__.py +++ /dev/null @@ -1,36 +0,0 @@ -from ibisml.steps.common import Cast, Drop, Mutate, MutateAt -from ibisml.steps.discretize import DiscretizeKBins -from ibisml.steps.encode import ( - CategoricalEncode, - CountEncode, - OneHotEncode, - TargetEncode, -) -from ibisml.steps.feature_engineering import PolynomialFeatures -from ibisml.steps.feature_selection import ZeroVariance -from ibisml.steps.impute import FillNA, ImputeMean, ImputeMedian, ImputeMode -from ibisml.steps.standardize import ScaleMinMax, ScaleStandard -from ibisml.steps.temporal import ExpandDate, ExpandDateTime, ExpandTime - -__all__ = ( - "Cast", - "CategoricalEncode", - "CountEncode", - "DiscretizeKBins", - "Drop", - "ExpandDate", - "ExpandDateTime", - "ExpandTime", - "FillNA", - "ImputeMean", - "ImputeMedian", - "ImputeMode", - "Mutate", - "MutateAt", - "OneHotEncode", - "PolynomialFeatures", - "ScaleMinMax", - "ScaleStandard", - "TargetEncode", - "ZeroVariance", -) diff --git a/pyproject.toml b/pyproject.toml index b7739e7..0bf4db4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ requires = ["setuptools", "setuptools_scm"] build-backend = "setuptools.build_meta" [project] -name = "ibisml" +name = "ibis-ml" description = "Tools for developing ML pipelines using Ibis" readme = "README.md" requires-python = ">= 3.8" @@ -20,7 +20,7 @@ dev = ["ibis-framework[duckdb,examples]", "pytest", "pytest-cov", "scikit-learn" include-package-data = false [tool.setuptools.packages.find] -include = ["ibisml*"] +include = ["ibis_ml*"] [tool.ruff.lint] select = [ diff --git a/tests/test_common.py b/tests/test_common.py index 00df69f..5ff5abd 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -2,7 +2,7 @@ import ibis.expr.datatypes as dt from ibis import _ -import ibisml as ml +import ibis_ml as ml def test_drop(): diff --git a/tests/test_core.py b/tests/test_core.py index c59bd2d..8f9272b 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -6,8 +6,8 @@ import pytest from ibis import _ -import ibisml as ml -from ibisml.core import normalize_table +import ibis_ml as ml +from ibis_ml.core import normalize_table class Shuffle(ml.Step): diff --git a/tests/test_discretize.py b/tests/test_discretize.py index d106e34..af63b7f 100644 --- a/tests/test_discretize.py +++ b/tests/test_discretize.py @@ -3,7 +3,7 @@ import pandas.testing as tm import pytest -import ibisml as ml +import ibis_ml as ml @pytest.mark.parametrize("strategy", ["uniform", "quantile"]) diff --git a/tests/test_encode.py b/tests/test_encode.py index d97eb6f..27b7c9a 100644 --- a/tests/test_encode.py +++ b/tests/test_encode.py @@ -5,7 +5,7 @@ import pytest from sklearn.preprocessing import TargetEncoder -import ibisml as ml +import ibis_ml as ml @pytest.fixture() diff --git a/tests/test_feature_engineering.py b/tests/test_feature_engineering.py index 5d007f3..9faf48e 100644 --- a/tests/test_feature_engineering.py +++ b/tests/test_feature_engineering.py @@ -2,7 +2,7 @@ import pandas.testing as tm import pytest -import ibisml as ml +import ibis_ml as ml @pytest.fixture() diff --git a/tests/test_feature_selection.py b/tests/test_feature_selection.py index be7191f..cd5e5a1 100644 --- a/tests/test_feature_selection.py +++ b/tests/test_feature_selection.py @@ -1,7 +1,7 @@ import ibis import pandas as pd -import ibisml as ml +import ibis_ml as ml def test_zero_variance(): diff --git a/tests/test_select.py b/tests/test_select.py index 1642d0c..b81ffdf 100644 --- a/tests/test_select.py +++ b/tests/test_select.py @@ -2,7 +2,7 @@ import ibis.expr.datatypes as dt import pytest -import ibisml as ml +import ibis_ml as ml def eval_select(selector): diff --git a/tests/test_temporal.py b/tests/test_temporal.py index c1619a5..141ab74 100644 --- a/tests/test_temporal.py +++ b/tests/test_temporal.py @@ -1,7 +1,7 @@ import ibis from ibis import _ -import ibisml as ml +import ibis_ml as ml def test_expand_date():