From 7676a9024e1ebd9a16de53c5ae521515690c97fd Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Thu, 27 Apr 2023 22:53:33 +0100 Subject: [PATCH 1/5] Add `null` object, and update top-level API specification --- .../dataframe_api/__init__.py | 38 ++++++++++++++++++- spec/API_specification/index.rst | 14 +++++++ 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py index 1dc36a89..8018a1f2 100644 --- a/spec/API_specification/dataframe_api/__init__.py +++ b/spec/API_specification/dataframe_api/__init__.py @@ -14,8 +14,9 @@ __dataframe_api_version__: str = "YYYY.MM" """ -String representing the version of the DataFrame API specification to which the -conforming implementation adheres. +String representing the version of the DataFrame API specification to which +the conforming implementation adheres. Set to a concrete value for a stable +implementation of the dataframe API standard. """ def concat(dataframes: Sequence[DataFrame]) -> DataFrame: @@ -73,3 +74,36 @@ def dataframe_from_dict(data: Mapping[str, Column]) -> DataFrame: DataFrame """ ... + +class null: + """ + A `null` singleton object to represent missing data. + + ``null`` may be used when constructing a `Column` from a Python sequence. + It supports ``is``, and does not support ``==`` and ``bool``. + + Methods + ------- + __bool__ + __eq__ + + """ + def __eq__(self): + """ + Raises + ------ + RuntimeError + A missing value must not be compared for equality. Use ``is`` to check + if an object *is* this ``null`` object, and `DataFrame.isnull` or + `Column.isnull` to check for presence of missing values. + """ + ... + + def __bool__(self): + """ + Raises + ------ + TypeError + Truthiness of a missing value is ambiguous + """ + ... diff --git a/spec/API_specification/index.rst b/spec/API_specification/index.rst index 1a0fbc34..419cdd86 100644 --- a/spec/API_specification/index.rst +++ b/spec/API_specification/index.rst @@ -5,6 +5,20 @@ API specification .. currentmodule:: dataframe_api +The API consists of dataframe, column and groupby classes, plus a small number +of objects and functions in the top-level namespace. The latter are: + +.. autosummary:: + :toctree: generated + :template: attribute.rst + :nosignatures: + + __dataframe_api_version__ + null + +The ``DataFrame``, ``Column`` and ``GroupBy`` objects have the following +methods and attributes: + .. toctree:: :maxdepth: 3 From 3489e3198b8033b0354e14c203223911568d50db Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Wed, 10 May 2023 20:02:33 +0200 Subject: [PATCH 2/5] Address review comments on singleton and duck typing `null` --- .../dataframe_api/__init__.py | 47 +++++++++---------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py index 8018a1f2..5890f468 100644 --- a/spec/API_specification/dataframe_api/__init__.py +++ b/spec/API_specification/dataframe_api/__init__.py @@ -77,33 +77,28 @@ def dataframe_from_dict(data: Mapping[str, Column]) -> DataFrame: class null: """ - A `null` singleton object to represent missing data. + A `null` object to represent missing data. - ``null`` may be used when constructing a `Column` from a Python sequence. - It supports ``is``, and does not support ``==`` and ``bool``. + ``null`` is a scalar, and may be used when constructing a `Column` from a + Python sequence with `column_from_sequence`. It does not support ``is``, + ``==`` or ``bool``. - Methods - ------- - __bool__ - __eq__ + Raises + ------ + TypeError + From ``__eq__`` and from ``__bool__``. + + For ``_eq__``: a missing value must not be compared for equality + directly. Instead, use `DataFrame.isnull` or `Column.isnull` to check + for presence of missing values. + + For ``__bool__``: truthiness of a missing value is ambiguous. + + Notes + ----- + Like for Python scalars, the ``null`` object may be duck typed so it can + reside on (e.g.) a GPU. Hence, the builtin ``is`` keyword should not be + used to check if an object *is* the ``null`` object. """ - def __eq__(self): - """ - Raises - ------ - RuntimeError - A missing value must not be compared for equality. Use ``is`` to check - if an object *is* this ``null`` object, and `DataFrame.isnull` or - `Column.isnull` to check for presence of missing values. - """ - ... - - def __bool__(self): - """ - Raises - ------ - TypeError - Truthiness of a missing value is ambiguous - """ - ... + ... From 55d53b6b5292f43742844c2a285ec2b307317126 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Wed, 17 May 2023 13:14:11 +0200 Subject: [PATCH 3/5] Add a free `isnull` function to the top-level namespace --- .../dataframe_api/__init__.py | 19 ++++++++++++++++++- spec/API_specification/index.rst | 1 + 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py index 5890f468..68ad2ec0 100644 --- a/spec/API_specification/dataframe_api/__init__.py +++ b/spec/API_specification/dataframe_api/__init__.py @@ -3,7 +3,7 @@ """ from __future__ import annotations -from typing import Mapping, Sequence +from typing import Mapping, Sequence, Any from .column_object import * from .dataframe_object import * @@ -102,3 +102,20 @@ class null: """ ... + +def isnull(value: Any, /) -> bool: + """ + Check if an object is a `null` scalar. + + Parameters + ---------- + value : Any + Any input type is valid. + + Returns + ------- + bool + True if the input is a `null` object from the same library which + implements the dataframe API standard, False otherwise. + + """ diff --git a/spec/API_specification/index.rst b/spec/API_specification/index.rst index 419cdd86..16a0cb61 100644 --- a/spec/API_specification/index.rst +++ b/spec/API_specification/index.rst @@ -14,6 +14,7 @@ of objects and functions in the top-level namespace. The latter are: :nosignatures: __dataframe_api_version__ + isnull null The ``DataFrame``, ``Column`` and ``GroupBy`` objects have the following From ea276ca2e26dc36237ff9f6b581809898d2deaeb Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Thu, 18 May 2023 14:25:33 +0100 Subject: [PATCH 4/5] fix _eq__ typo --- spec/API_specification/dataframe_api/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py index 68ad2ec0..4e1fc5ec 100644 --- a/spec/API_specification/dataframe_api/__init__.py +++ b/spec/API_specification/dataframe_api/__init__.py @@ -88,7 +88,7 @@ class null: TypeError From ``__eq__`` and from ``__bool__``. - For ``_eq__``: a missing value must not be compared for equality + For ``__eq__``: a missing value must not be compared for equality directly. Instead, use `DataFrame.isnull` or `Column.isnull` to check for presence of missing values. From 884c67bedf50d1b4e25d938b80fec0dff464ddf9 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Thu, 18 May 2023 15:32:49 +0200 Subject: [PATCH 5/5] Change `isnull` type annotation to `object` --- spec/API_specification/dataframe_api/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py index 4e1fc5ec..cd6ef443 100644 --- a/spec/API_specification/dataframe_api/__init__.py +++ b/spec/API_specification/dataframe_api/__init__.py @@ -103,13 +103,13 @@ class null: """ ... -def isnull(value: Any, /) -> bool: +def isnull(value: object, /) -> bool: """ Check if an object is a `null` scalar. Parameters ---------- - value : Any + value : object Any input type is valid. Returns