Skip to content

Commit 15ca85b

Browse files
authored
DEPR: back-compat shim for select_dtypes (#62718)
1 parent 4e2c38c commit 15ca85b

File tree

4 files changed

+51
-10
lines changed

4 files changed

+51
-10
lines changed

doc/source/user_guide/basics.rst

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2372,11 +2372,15 @@ integers:
23722372
23732373
df.select_dtypes(include=["number", "bool"], exclude=["unsignedinteger"])
23742374
2375-
To select string columns you must use the ``object`` dtype:
2375+
To select string columns include ``str``:
23762376

23772377
.. ipython:: python
23782378
2379-
df.select_dtypes(include=["object"])
2379+
df.select_dtypes(include=[str])
2380+
2381+
.. note::
2382+
2383+
This is a change in pandas 3.0. Previously strings were stored in ``object`` dtype columns, so would be selected with ``include=[object]``. See https://pandas.pydata.org/docs/user_guide/migration-3-strings.html#hardcoded-use-of-object-dtype.
23802384

23812385
To see all the child dtypes of a generic ``dtype`` like ``numpy.number`` you
23822386
can define a function that returns a tree of child dtypes:

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -717,6 +717,7 @@ Other Deprecations
717717
- Deprecated using ``epoch`` date format in :meth:`DataFrame.to_json` and :meth:`Series.to_json`, use ``iso`` instead. (:issue:`57063`)
718718
- Deprecated allowing ``fill_value`` that cannot be held in the original dtype (excepting NA values for integer and bool dtypes) in :meth:`Series.unstack` and :meth:`DataFrame.unstack` (:issue:`12189`, :issue:`53868`)
719719
- Deprecated allowing ``fill_value`` that cannot be held in the original dtype (excepting NA values for integer and bool dtypes) in :meth:`Series.shift` and :meth:`DataFrame.shift` (:issue:`53802`)
720+
- Deprecated backward-compatibility behavior for :meth:`DataFrame.select_dtypes` matching "str" dtype when ``np.object_`` is specified (:issue:`61916`)
720721
- Deprecated option "future.no_silent_downcasting", as it is no longer used. In a future version accessing this option will raise (:issue:`59502`)
721722
- Deprecated slicing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` using a ``datetime.date`` object, explicitly cast to :class:`Timestamp` instead (:issue:`35830`)
722723

pandas/core/frame.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5237,6 +5237,27 @@ def predicate(arr: ArrayLike) -> bool:
52375237

52385238
return True
52395239

5240+
blk_dtypes = [blk.dtype for blk in self._mgr.blocks]
5241+
if (
5242+
np.object_ in include
5243+
and str not in include
5244+
and str not in exclude
5245+
and any(
5246+
isinstance(dtype, StringDtype) and dtype.na_value is np.nan
5247+
for dtype in blk_dtypes
5248+
)
5249+
):
5250+
# GH#61916
5251+
warnings.warn(
5252+
"For backward compatibility, 'str' dtypes are included by "
5253+
"select_dtypes when 'object' dtype is specified. "
5254+
"This behavior is deprecated and will be removed in a future "
5255+
"version. Explicitly pass 'str' to `include` to select them, "
5256+
"or to `exclude` to remove them and silence this warning.",
5257+
Pandas4Warning,
5258+
stacklevel=find_stack_level(),
5259+
)
5260+
52405261
mgr = self._mgr._get_data_subset(predicate).copy(deep=False)
52415262
return self._constructor_from_mgr(mgr, axes=mgr.axes).__finalize__(self)
52425263

pandas/tests/frame/methods/test_select_dtypes.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
import pytest
33

4+
from pandas.errors import Pandas4Warning
5+
46
from pandas.core.dtypes.dtypes import ExtensionDtype
57

68
import pandas as pd
@@ -102,7 +104,12 @@ def test_select_dtypes_include_using_list_like(self, using_infer_string):
102104
ri = df.select_dtypes(include=[str])
103105
tm.assert_frame_equal(ri, ei)
104106

105-
ri = df.select_dtypes(include=["object"])
107+
msg = "For backward compatibility, 'str' dtypes are included"
108+
warn = None
109+
if using_infer_string:
110+
warn = Pandas4Warning
111+
with tm.assert_produces_warning(warn, match=msg):
112+
ri = df.select_dtypes(include=["object"])
106113
ei = df[["a"]]
107114
tm.assert_frame_equal(ri, ei)
108115

@@ -312,15 +319,18 @@ def test_select_dtypes_not_an_attr_but_still_valid_dtype(self, using_infer_strin
312319
)
313320
df["g"] = df.f.diff()
314321
assert not hasattr(np, "u8")
315-
r = df.select_dtypes(include=["i8", "O"], exclude=["timedelta"])
316-
# if using_infer_string:
317-
# TODO warn
322+
323+
msg = "For backward compatibility, 'str' dtypes are included"
324+
warn = None
325+
if using_infer_string:
326+
warn = Pandas4Warning
327+
with tm.assert_produces_warning(warn, match=msg):
328+
r = df.select_dtypes(include=["i8", "O"], exclude=["timedelta"])
318329
e = df[["a", "b"]]
319330
tm.assert_frame_equal(r, e)
320331

321-
r = df.select_dtypes(include=["i8", "O", "timedelta64[ns]"])
322-
# if using_infer_string:
323-
# TODO warn
332+
with tm.assert_produces_warning(warn, match=msg):
333+
r = df.select_dtypes(include=["i8", "O", "timedelta64[ns]"])
324334
e = df[["a", "b", "g"]]
325335
tm.assert_frame_equal(r, e)
326336

@@ -497,7 +507,12 @@ def test_select_dtype_object_and_str(self, using_infer_string):
497507
)
498508

499509
# with "object" -> only select the object or default str dtype column
500-
result = df.select_dtypes(include=["object"])
510+
msg = "For backward compatibility, 'str' dtypes are included"
511+
warn = None
512+
if using_infer_string:
513+
warn = Pandas4Warning
514+
with tm.assert_produces_warning(warn, match=msg):
515+
result = df.select_dtypes(include=["object"])
501516
expected = df[["a"]]
502517
tm.assert_frame_equal(result, expected)
503518

0 commit comments

Comments
 (0)