Skip to content

Commit

Permalink
ENH: concat of nullable int + bool preserves int dtype (pandas-dev#34985
Browse files Browse the repository at this point in the history
)
  • Loading branch information
jorisvandenbossche authored Jul 8, 2020
1 parent 74f77a1 commit 42fd7e7
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 6 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ Other enhancements
- :meth:`DataFrame.hist`, :meth:`Series.hist`, :meth:`core.groupby.DataFrameGroupBy.hist`, and :meth:`core.groupby.SeriesGroupBy.hist` have gained the ``legend`` argument. Set to True to show a legend in the histogram. (:issue:`6279`)
- :func:`concat` and :meth:`~DataFrame.append` now preserve extension dtypes, for example
combining a nullable integer column with a numpy integer column will no longer
result in object dtype but preserve the integer dtype (:issue:`33607`, :issue:`34339`).
result in object dtype but preserve the integer dtype (:issue:`33607`, :issue:`34339`, :issue:`34095`).
- :meth:`~pandas.io.gbq.read_gbq` now allows to disable progress bar (:issue:`33360`).
- :meth:`~pandas.io.gbq.read_gbq` now supports the ``max_results`` kwarg from ``pandas-gbq`` (:issue:`34639`).
- :meth:`DataFrame.cov` and :meth:`Series.cov` now support a new parameter ddof to support delta degrees of freedom as in the corresponding numpy methods (:issue:`34611`).
Expand Down
9 changes: 6 additions & 3 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,13 @@ def construct_array_type(cls) -> Type["IntegerArray"]:
return IntegerArray

def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
# for now only handle other integer types
# we only handle nullable EA dtypes and numeric numpy dtypes
if not all(
isinstance(t, _IntegerDtype)
or (isinstance(t, np.dtype) and np.issubdtype(t, np.integer))
isinstance(t, BaseMaskedDtype)
or (
isinstance(t, np.dtype)
and (np.issubdtype(t, np.number) or np.issubdtype(t, np.bool_))
)
for t in dtypes
):
return None
Expand Down
45 changes: 43 additions & 2 deletions pandas/tests/arrays/integer/test_concat.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import numpy as np
import pytest

import pandas as pd
Expand All @@ -15,12 +16,52 @@
(["Int32", "UInt32"], "Int64"),
# this still gives object (awaiting float extension dtype)
(["Int64", "UInt64"], "object"),
(["Int64", "boolean"], "Int64"),
(["UInt8", "boolean"], "UInt8"),
],
)
def test_concat_series(to_concat_dtypes, result_dtype):

result = pd.concat([pd.Series([1, 2, pd.NA], dtype=t) for t in to_concat_dtypes])
expected = pd.concat([pd.Series([1, 2, pd.NA], dtype=object)] * 2).astype(
result = pd.concat([pd.Series([0, 1, pd.NA], dtype=t) for t in to_concat_dtypes])
expected = pd.concat([pd.Series([0, 1, pd.NA], dtype=object)] * 2).astype(
result_dtype
)
tm.assert_series_equal(result, expected)

# order doesn't matter for result
result = pd.concat(
[pd.Series([0, 1, pd.NA], dtype=t) for t in to_concat_dtypes[::-1]]
)
expected = pd.concat([pd.Series([0, 1, pd.NA], dtype=object)] * 2).astype(
result_dtype
)
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize(
"to_concat_dtypes, result_dtype",
[
(["Int64", "int64"], "Int64"),
(["UInt64", "uint64"], "UInt64"),
(["Int8", "int8"], "Int8"),
(["Int8", "int16"], "Int16"),
(["UInt8", "int8"], "Int16"),
(["Int32", "uint32"], "Int64"),
# this still gives object (awaiting float extension dtype)
(["Int64", "uint64"], "object"),
(["Int64", "bool"], "Int64"),
(["UInt8", "bool"], "UInt8"),
],
)
def test_concat_series_with_numpy(to_concat_dtypes, result_dtype):

s1 = pd.Series([0, 1, pd.NA], dtype=to_concat_dtypes[0])
s2 = pd.Series(np.array([0, 1], dtype=to_concat_dtypes[1]))
result = pd.concat([s1, s2], ignore_index=True)
expected = pd.Series([0, 1, pd.NA, 0, 1], dtype=object).astype(result_dtype)
tm.assert_series_equal(result, expected)

# order doesn't matter for result
result = pd.concat([s2, s1], ignore_index=True)
expected = pd.Series([0, 1, 0, 1, pd.NA], dtype=object).astype(result_dtype)
tm.assert_series_equal(result, expected)

0 comments on commit 42fd7e7

Please sign in to comment.