pandas-dev · mrkn · May 22, 2024 · May 23, 2024 · May 23, 2024 · May 23, 2024
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -44,6 +44,7 @@ Other enhancements
 - :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`)
 - :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`)
 - :meth:`DataFrame.pivot_table` and :func:`pivot_table` now allow the passing of keyword arguments to ``aggfunc`` through ``**kwargs`` (:issue:`57884`)
+- :meth:`DataFrame` now supports to create a new :class:`DataFrame` from a :py:class:`collections.abc.Mapping` object (:issue:`58803`)
 - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
 - :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
 - Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -87,6 +87,7 @@
 
 if TYPE_CHECKING:
     from collections.abc import (
+        Mapping,
         Sequence,
         Sized,
     )
@@ -860,13 +861,13 @@ def infer_dtype_from_scalar(val) -> tuple[DtypeObj, Any]:
     return dtype, val
 
 
-def dict_compat(d: dict[Scalar, Scalar]) -> dict[Scalar, Scalar]:
+def dict_compat(d: Mapping[Scalar, Scalar]) -> dict[Scalar, Scalar]:
     """
-    Convert datetimelike-keyed dicts to a Timestamp-keyed dict.
+    Convert datetimelike-keyed Mappings to a Timestamp-keyed dict.
 
     Parameters
     ----------
-    d: dict-like object
+    d: Mapping object
 
     Returns
     -------

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -513,12 +513,12 @@ class DataFrame(NDFrame, OpsMixin):
 
     Parameters
     ----------
-    data : ndarray (structured or homogeneous), Iterable, dict, or DataFrame
-        Dict can contain Series, arrays, constants, dataclass or list-like objects. If
-        data is a dict, column order follows insertion-order. If a dict contains Series
-        which have an index defined, it is aligned by its index. This alignment also
-        occurs if data is a Series or a DataFrame itself. Alignment is done on
-        Series/DataFrame inputs.
+    data : ndarray (structured or homogeneous), Iterable, Mapping, or DataFrame
+        Mapping can contain Series, arrays, constants, dataclass or list-like objects.
+        If data is a Mapping, column order follows insertion-order. If a Mapping
+        contains Series which have an index defined, it is aligned by its index. This
+        alignment also occurs if data is a Series or a DataFrame itself. Alignment is
+        done on Series/DataFrame inputs.
 
         If data is a list of dicts, column order follows insertion-order.
 
@@ -735,7 +735,7 @@ def __init__(
             raise ValueError("columns cannot be a set")
 
         if copy is None:
-            if isinstance(data, dict):
+            if isinstance(data, Mapping):
                 # retain pre-GH#38939 default behavior
                 copy = True
             elif not isinstance(data, (Index, DataFrame, Series)):
@@ -754,7 +754,7 @@ def __init__(
                 data, axes={"index": index, "columns": columns}, dtype=dtype, copy=copy
             )
 
-        elif isinstance(data, dict):
+        elif isinstance(data, Mapping):
             # GH#38939 de facto copy defaults to False only in non-dict cases
             mgr = dict_to_mgr(data, index, columns, dtype=dtype, copy=copy)
         elif isinstance(data, ma.MaskedArray):
@@ -1735,7 +1735,7 @@ def __rmatmul__(self, other) -> DataFrame:
     @classmethod
     def from_dict(
         cls,
-        data: dict,
+        data: Mapping,
         orient: FromDictOrient = "columns",
         dtype: Dtype | None = None,
         columns: Axes | None = None,
@@ -1748,7 +1748,7 @@ def from_dict(
 
         Parameters
         ----------
-        data : dict
+        data : Mapping
             Of the form {field : array-like} or {field : dict}.
         orient : {'columns', 'index', 'tight'}, default 'columns'
             The "orientation" of the data. If the keys of the passed dict

diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
@@ -347,7 +347,7 @@ def _check_values_indices_shape_match(
 
 
 def dict_to_mgr(
-    data: dict,
+    data: abc.Mapping,
     index,
     columns,
     *,
@@ -536,7 +536,7 @@ def _homogenize(
             refs.append(val._references)
             val = val._values
         else:
-            if isinstance(val, dict):
+            if isinstance(val, abc.Mapping):
                 # GH#41785 this _should_ be equivalent to (but faster than)
                 #  val = Series(val, index=index)._values
                 if oindex is None:
@@ -578,7 +578,7 @@ def _extract_index(data) -> Index:
         if isinstance(val, ABCSeries):
             have_series = True
             indexes.append(val.index)
-        elif isinstance(val, dict):
+        elif isinstance(val, abc.Mapping):
             have_dicts = True
             indexes.append(list(val.keys()))
         elif is_list_like(val) and getattr(val, "ndim", 1) == 1:

diff --git a/pandas/tests/frame/common.py b/pandas/tests/frame/common.py
@@ -1,5 +1,9 @@
 from __future__ import annotations
 
+from collections.abc import (
+    Mapping,
+    Sequence,
+)
 from typing import TYPE_CHECKING
 
 from pandas import (
@@ -11,6 +15,31 @@
     from pandas._typing import AxisInt
 
 
+class DictWrapper(Mapping):
+    def __init__(self, d: dict) -> None:
+        self._dict = d
+
+    def __getitem__(self, key):
+        return self._dict[key]
+
+    def __iter__(self):
+        return self._dict.__iter__()
+
+    def __len__(self):
+        return self._dict.__len__()
+
+
+class ListWrapper(Sequence):
+    def __init__(self, lst: list) -> None:
+        self._list = lst
+
+    def __getitem__(self, i):
+        return self._list[i]
+
+    def __len__(self):
+        return self._list.__len__()
+
+
 def _check_mixed_float(df, dtype=None):
     # float16 are most likely to be upcasted to float32
     dtypes = {"A": "float32", "B": "float32", "C": "float16", "D": "float64"}

diff --git a/pandas/tests/frame/constructors/test_from_dict.py b/pandas/tests/frame/constructors/test_from_dict.py
@@ -11,8 +11,10 @@
     MultiIndex,
     RangeIndex,
     Series,
+    date_range,
 )
 import pandas._testing as tm
+from pandas.tests.frame.common import DictWrapper
 
 
 class TestFromDict:
@@ -135,6 +137,27 @@ def test_constructor_from_ordered_dict(self):
         result = DataFrame.from_dict(a, orient="index")
         tm.assert_frame_equal(result, expected)
 
+    def test_constructor_from_mapping(self):
+        idx = Index(date_range("20130101", periods=3, tz="US/Eastern"), name="foo")
+        dr = date_range("20130110", periods=3)
+
+        # construction
+        expected = DataFrame(DictWrapper({"A": idx, "B": dr}))
+        result = DataFrame.from_dict(DictWrapper({"A": idx, "B": dr}))
+        tm.assert_frame_equal(result, expected)
+
+    def test_constructor_from_mapping_of_mapping(self):
+        data = DictWrapper(
+            {
+                "a": DictWrapper({"x": 1, "y": 2}),
+                "b": DictWrapper({"x": 3, "y": 4}),
+                "c": DictWrapper({"x": 5, "y": 6}),
+            }
+        )
+        expected = DataFrame(data)
+        result = DataFrame.from_dict(data)
+        tm.assert_frame_equal(result, expected)
+
     def test_from_dict_columns_parameter(self):
         # GH#18529
         # Test new columns parameter for from_dict that was added to make

diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -61,6 +61,10 @@
     SparseArray,
     TimedeltaArray,
 )
+from pandas.tests.frame.common import (
+    DictWrapper,
+    ListWrapper,
+)
 
 MIXED_FLOAT_DTYPES = ["float16", "float32", "float64"]
 MIXED_INT_DTYPES = [
@@ -2917,6 +2921,72 @@ def test_from_dict(self):
         tm.assert_series_equal(df["A"], Series(idx, name="A"))
         tm.assert_series_equal(df["B"], Series(dr, name="B"))
 
+    def test_from_mapping(self):
+        idx = Index(date_range("20130101", periods=3, tz="US/Eastern"), name="foo")
+        dr = date_range("20130110", periods=3)
+
+        # construction
+        df = DataFrame(DictWrapper({"A": idx, "B": dr}))
+        assert df["A"].dtype, "M8[ns, US/Eastern"
+        assert df["A"].name == "A"
+        tm.assert_series_equal(df["A"], Series(idx, name="A"))
+        tm.assert_series_equal(df["B"], Series(dr, name="B"))
+
+    def test_from_mapping_of_dict(self):
+        data = {
+            "a": {"x": 1, "y": 2},
+            "b": {"x": 3, "y": 4},
+            "c": {"x": 5, "y": 6},
+        }
+        expected = DataFrame(data)
+
+        # construction
+        result = DataFrame(DictWrapper(data))
+        tm.assert_frame_equal(result, expected)
+
+    def test_from_mapping_of_mapping(self):
+        data = {
+            "a": {"x": 1, "y": 2},
+            "b": {"x": 3, "y": 4},
+            "c": {"x": 5, "y": 6},
+        }
+        expected = DataFrame(data)
+
+        # construction
+        wrapped = DictWrapper({k: DictWrapper(v) for k, v in data.items()})
+        result = DataFrame(wrapped)
+        tm.assert_frame_equal(result, expected)
+
+    def test_from_mapping_list(self):
+        idx = Index(date_range("20130101", periods=3, tz="US/Eastern"), name="foo")
+        dr = date_range("20130110", periods=3)
+        data = DataFrame({"A": idx, "B": dr})
+        mapping_list = [
+            DictWrapper(record) for record in data.to_dict(orient="records")
+        ]
+
+        # construction
+        df = DataFrame(mapping_list)
+        assert df["A"].dtype, "M8[ns, US/Eastern"
+        assert df["A"].name == "A"
+        tm.assert_series_equal(df["A"], Series(idx, name="A"))
+        tm.assert_series_equal(df["B"], Series(dr, name="B"))
+
+    def test_from_mapping_sequence(self):
+        idx = Index(date_range("20130101", periods=3, tz="US/Eastern"), name="foo")
+        dr = date_range("20130110", periods=3)
+        data = DataFrame({"A": idx, "B": dr})
+        mapping_list = ListWrapper(
+            [DictWrapper(record) for record in data.to_dict(orient="records")]
+        )
+
+        # construction
+        df = DataFrame(mapping_list)
+        assert df["A"].dtype, "M8[ns, US/Eastern"
+        assert df["A"].name == "A"
+        tm.assert_series_equal(df["A"], Series(idx, name="A"))
+        tm.assert_series_equal(df["B"], Series(dr, name="B"))
+
     def test_from_index(self):
         # from index
         idx2 = date_range("20130101", periods=3, tz="US/Eastern", name="foo")