diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index be4b9c218f9f5..1e1097a268e55 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -44,6 +44,7 @@ Other enhancements - :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`) - :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`) - :meth:`DataFrame.pivot_table` and :func:`pivot_table` now allow the passing of keyword arguments to ``aggfunc`` through ``**kwargs`` (:issue:`57884`) +- :meth:`DataFrame` now supports to create a new :class:`DataFrame` from a :py:class:`collections.abc.Mapping` object (:issue:`58803`) - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`) - :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`) - Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index f2af69fcc9d84..96d0e796e1c16 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -87,6 +87,7 @@ if TYPE_CHECKING: from collections.abc import ( + Mapping, Sequence, Sized, ) @@ -860,13 +861,13 @@ def infer_dtype_from_scalar(val) -> tuple[DtypeObj, Any]: return dtype, val -def dict_compat(d: dict[Scalar, Scalar]) -> dict[Scalar, Scalar]: +def dict_compat(d: Mapping[Scalar, Scalar]) -> dict[Scalar, Scalar]: """ - Convert datetimelike-keyed dicts to a Timestamp-keyed dict. + Convert datetimelike-keyed Mappings to a Timestamp-keyed dict. Parameters ---------- - d: dict-like object + d: Mapping object Returns ------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index fab798dd617b7..191315512371f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -513,12 +513,12 @@ class DataFrame(NDFrame, OpsMixin): Parameters ---------- - data : ndarray (structured or homogeneous), Iterable, dict, or DataFrame - Dict can contain Series, arrays, constants, dataclass or list-like objects. If - data is a dict, column order follows insertion-order. If a dict contains Series - which have an index defined, it is aligned by its index. This alignment also - occurs if data is a Series or a DataFrame itself. Alignment is done on - Series/DataFrame inputs. + data : ndarray (structured or homogeneous), Iterable, Mapping, or DataFrame + Mapping can contain Series, arrays, constants, dataclass or list-like objects. + If data is a Mapping, column order follows insertion-order. If a Mapping + contains Series which have an index defined, it is aligned by its index. This + alignment also occurs if data is a Series or a DataFrame itself. Alignment is + done on Series/DataFrame inputs. If data is a list of dicts, column order follows insertion-order. @@ -735,7 +735,7 @@ def __init__( raise ValueError("columns cannot be a set") if copy is None: - if isinstance(data, dict): + if isinstance(data, Mapping): # retain pre-GH#38939 default behavior copy = True elif not isinstance(data, (Index, DataFrame, Series)): @@ -754,7 +754,7 @@ def __init__( data, axes={"index": index, "columns": columns}, dtype=dtype, copy=copy ) - elif isinstance(data, dict): + elif isinstance(data, Mapping): # GH#38939 de facto copy defaults to False only in non-dict cases mgr = dict_to_mgr(data, index, columns, dtype=dtype, copy=copy) elif isinstance(data, ma.MaskedArray): @@ -1735,7 +1735,7 @@ def __rmatmul__(self, other) -> DataFrame: @classmethod def from_dict( cls, - data: dict, + data: Mapping, orient: FromDictOrient = "columns", dtype: Dtype | None = None, columns: Axes | None = None, @@ -1748,7 +1748,7 @@ def from_dict( Parameters ---------- - data : dict + data : Mapping Of the form {field : array-like} or {field : dict}. orient : {'columns', 'index', 'tight'}, default 'columns' The "orientation" of the data. If the keys of the passed dict diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 0d149f47fd08c..07353121c5bff 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -347,7 +347,7 @@ def _check_values_indices_shape_match( def dict_to_mgr( - data: dict, + data: abc.Mapping, index, columns, *, @@ -536,7 +536,7 @@ def _homogenize( refs.append(val._references) val = val._values else: - if isinstance(val, dict): + if isinstance(val, abc.Mapping): # GH#41785 this _should_ be equivalent to (but faster than) # val = Series(val, index=index)._values if oindex is None: @@ -578,7 +578,7 @@ def _extract_index(data) -> Index: if isinstance(val, ABCSeries): have_series = True indexes.append(val.index) - elif isinstance(val, dict): + elif isinstance(val, abc.Mapping): have_dicts = True indexes.append(list(val.keys())) elif is_list_like(val) and getattr(val, "ndim", 1) == 1: diff --git a/pandas/tests/frame/common.py b/pandas/tests/frame/common.py index fc41d7907a240..2f68d6b831579 100644 --- a/pandas/tests/frame/common.py +++ b/pandas/tests/frame/common.py @@ -1,5 +1,9 @@ from __future__ import annotations +from collections.abc import ( + Mapping, + Sequence, +) from typing import TYPE_CHECKING from pandas import ( @@ -11,6 +15,31 @@ from pandas._typing import AxisInt +class DictWrapper(Mapping): + def __init__(self, d: dict) -> None: + self._dict = d + + def __getitem__(self, key): + return self._dict[key] + + def __iter__(self): + return self._dict.__iter__() + + def __len__(self): + return self._dict.__len__() + + +class ListWrapper(Sequence): + def __init__(self, lst: list) -> None: + self._list = lst + + def __getitem__(self, i): + return self._list[i] + + def __len__(self): + return self._list.__len__() + + def _check_mixed_float(df, dtype=None): # float16 are most likely to be upcasted to float32 dtypes = {"A": "float32", "B": "float32", "C": "float16", "D": "float64"} diff --git a/pandas/tests/frame/constructors/test_from_dict.py b/pandas/tests/frame/constructors/test_from_dict.py index 60a8e688b3b8a..1706460cc9f44 100644 --- a/pandas/tests/frame/constructors/test_from_dict.py +++ b/pandas/tests/frame/constructors/test_from_dict.py @@ -11,8 +11,10 @@ MultiIndex, RangeIndex, Series, + date_range, ) import pandas._testing as tm +from pandas.tests.frame.common import DictWrapper class TestFromDict: @@ -135,6 +137,27 @@ def test_constructor_from_ordered_dict(self): result = DataFrame.from_dict(a, orient="index") tm.assert_frame_equal(result, expected) + def test_constructor_from_mapping(self): + idx = Index(date_range("20130101", periods=3, tz="US/Eastern"), name="foo") + dr = date_range("20130110", periods=3) + + # construction + expected = DataFrame(DictWrapper({"A": idx, "B": dr})) + result = DataFrame.from_dict(DictWrapper({"A": idx, "B": dr})) + tm.assert_frame_equal(result, expected) + + def test_constructor_from_mapping_of_mapping(self): + data = DictWrapper( + { + "a": DictWrapper({"x": 1, "y": 2}), + "b": DictWrapper({"x": 3, "y": 4}), + "c": DictWrapper({"x": 5, "y": 6}), + } + ) + expected = DataFrame(data) + result = DataFrame.from_dict(data) + tm.assert_frame_equal(result, expected) + def test_from_dict_columns_parameter(self): # GH#18529 # Test new columns parameter for from_dict that was added to make diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index c0b9e6549c4ba..a5ef08bc4623b 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -61,6 +61,10 @@ SparseArray, TimedeltaArray, ) +from pandas.tests.frame.common import ( + DictWrapper, + ListWrapper, +) MIXED_FLOAT_DTYPES = ["float16", "float32", "float64"] MIXED_INT_DTYPES = [ @@ -2917,6 +2921,72 @@ def test_from_dict(self): tm.assert_series_equal(df["A"], Series(idx, name="A")) tm.assert_series_equal(df["B"], Series(dr, name="B")) + def test_from_mapping(self): + idx = Index(date_range("20130101", periods=3, tz="US/Eastern"), name="foo") + dr = date_range("20130110", periods=3) + + # construction + df = DataFrame(DictWrapper({"A": idx, "B": dr})) + assert df["A"].dtype, "M8[ns, US/Eastern" + assert df["A"].name == "A" + tm.assert_series_equal(df["A"], Series(idx, name="A")) + tm.assert_series_equal(df["B"], Series(dr, name="B")) + + def test_from_mapping_of_dict(self): + data = { + "a": {"x": 1, "y": 2}, + "b": {"x": 3, "y": 4}, + "c": {"x": 5, "y": 6}, + } + expected = DataFrame(data) + + # construction + result = DataFrame(DictWrapper(data)) + tm.assert_frame_equal(result, expected) + + def test_from_mapping_of_mapping(self): + data = { + "a": {"x": 1, "y": 2}, + "b": {"x": 3, "y": 4}, + "c": {"x": 5, "y": 6}, + } + expected = DataFrame(data) + + # construction + wrapped = DictWrapper({k: DictWrapper(v) for k, v in data.items()}) + result = DataFrame(wrapped) + tm.assert_frame_equal(result, expected) + + def test_from_mapping_list(self): + idx = Index(date_range("20130101", periods=3, tz="US/Eastern"), name="foo") + dr = date_range("20130110", periods=3) + data = DataFrame({"A": idx, "B": dr}) + mapping_list = [ + DictWrapper(record) for record in data.to_dict(orient="records") + ] + + # construction + df = DataFrame(mapping_list) + assert df["A"].dtype, "M8[ns, US/Eastern" + assert df["A"].name == "A" + tm.assert_series_equal(df["A"], Series(idx, name="A")) + tm.assert_series_equal(df["B"], Series(dr, name="B")) + + def test_from_mapping_sequence(self): + idx = Index(date_range("20130101", periods=3, tz="US/Eastern"), name="foo") + dr = date_range("20130110", periods=3) + data = DataFrame({"A": idx, "B": dr}) + mapping_list = ListWrapper( + [DictWrapper(record) for record in data.to_dict(orient="records")] + ) + + # construction + df = DataFrame(mapping_list) + assert df["A"].dtype, "M8[ns, US/Eastern" + assert df["A"].name == "A" + tm.assert_series_equal(df["A"], Series(idx, name="A")) + tm.assert_series_equal(df["B"], Series(dr, name="B")) + def test_from_index(self): # from index idx2 = date_range("20130101", periods=3, tz="US/Eastern", name="foo")