Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions doc/source/user_guide/basics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2372,11 +2372,11 @@ integers:

df.select_dtypes(include=["number", "bool"], exclude=["unsignedinteger"])

To select string columns you must use the ``object`` dtype:
To select string columns include ``str``:

.. ipython:: python

df.select_dtypes(include=["object"])
df.select_dtypes(include=[str])

Comment on lines 2375 to 2380
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would maybe add a note that this changed in pandas 3.0 and that for pandas<3, include="object" was used. Maybe with a link to https://pandas.pydata.org/docs/user_guide/migration-3-strings.html#hardcoded-use-of-object-dtype

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea, updated.

To see all the child dtypes of a generic ``dtype`` like ``numpy.number`` you
can define a function that returns a tree of child dtypes:
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,7 @@ Other Deprecations
- Deprecated using ``epoch`` date format in :meth:`DataFrame.to_json` and :meth:`Series.to_json`, use ``iso`` instead. (:issue:`57063`)
- Deprecated allowing ``fill_value`` that cannot be held in the original dtype (excepting NA values for integer and bool dtypes) in :meth:`Series.unstack` and :meth:`DataFrame.unstack` (:issue:`12189`, :issue:`53868`)
- Deprecated allowing ``fill_value`` that cannot be held in the original dtype (excepting NA values for integer and bool dtypes) in :meth:`Series.shift` and :meth:`DataFrame.shift` (:issue:`53802`)
- Deprecated backward-compatibility behavior for :meth:`DataFrame.select_dtypes` matching "str" dtype when ``np.object_`` is specified (:issue:`61916`)
- Deprecated option "future.no_silent_downcasting", as it is no longer used. In a future version accessing this option will raise (:issue:`59502`)
- Deprecated slicing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` using a ``datetime.date`` object, explicitly cast to :class:`Timestamp` instead (:issue:`35830`)

Expand Down
21 changes: 21 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5237,6 +5237,27 @@ def predicate(arr: ArrayLike) -> bool:

return True

blk_dtypes = [blk.dtype for blk in self._mgr.blocks]
if (
np.object_ in include
and str not in include
and str not in exclude
and any(
isinstance(dtype, StringDtype) and dtype.na_value is np.nan
for dtype in blk_dtypes
)
):
# GH#61916
warnings.warn(
"For backward compatibility, 'str' dtypes are included by "
"select_dtypes when object dtypes are specified. "
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"select_dtypes when object dtypes are specified. "
"select_dtypes when 'object' dtype is specified. "

"This behavior is deprecated and will be removed in a future "
"version. Explicitly pass 'str' to `include` to select them, "
"or to `exclude` to remove them and silence this warning.",
Pandas4Warning,
stacklevel=find_stack_level(),
)

mgr = self._mgr._get_data_subset(predicate).copy(deep=False)
return self._constructor_from_mgr(mgr, axes=mgr.axes).__finalize__(self)

Expand Down
31 changes: 23 additions & 8 deletions pandas/tests/frame/methods/test_select_dtypes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

from pandas.errors import Pandas4Warning

from pandas.core.dtypes.dtypes import ExtensionDtype

import pandas as pd
Expand Down Expand Up @@ -102,7 +104,12 @@ def test_select_dtypes_include_using_list_like(self, using_infer_string):
ri = df.select_dtypes(include=[str])
tm.assert_frame_equal(ri, ei)

ri = df.select_dtypes(include=["object"])
msg = "For backward compatibility, 'str' dtypes are included"
warn = None
if using_infer_string:
warn = Pandas4Warning
with tm.assert_produces_warning(warn, match=msg):
ri = df.select_dtypes(include=["object"])
ei = df[["a"]]
tm.assert_frame_equal(ri, ei)

Expand Down Expand Up @@ -312,15 +319,18 @@ def test_select_dtypes_not_an_attr_but_still_valid_dtype(self, using_infer_strin
)
df["g"] = df.f.diff()
assert not hasattr(np, "u8")
r = df.select_dtypes(include=["i8", "O"], exclude=["timedelta"])
# if using_infer_string:
# TODO warn

msg = "For backward compatibility, 'str' dtypes are included"
warn = None
if using_infer_string:
warn = Pandas4Warning
with tm.assert_produces_warning(warn, match=msg):
r = df.select_dtypes(include=["i8", "O"], exclude=["timedelta"])
e = df[["a", "b"]]
tm.assert_frame_equal(r, e)

r = df.select_dtypes(include=["i8", "O", "timedelta64[ns]"])
# if using_infer_string:
# TODO warn
with tm.assert_produces_warning(warn, match=msg):
r = df.select_dtypes(include=["i8", "O", "timedelta64[ns]"])
e = df[["a", "b", "g"]]
tm.assert_frame_equal(r, e)

Expand Down Expand Up @@ -497,7 +507,12 @@ def test_select_dtype_object_and_str(self, using_infer_string):
)

# with "object" -> only select the object or default str dtype column
result = df.select_dtypes(include=["object"])
msg = "For backward compatibility, 'str' dtypes are included"
warn = None
if using_infer_string:
warn = Pandas4Warning
with tm.assert_produces_warning(warn, match=msg):
result = df.select_dtypes(include=["object"])
expected = df[["a"]]
tm.assert_frame_equal(result, expected)

Expand Down
Loading