diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index b25a310a15d19..19f029d6aed68 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -851,6 +851,7 @@ Categorical - Bug when passing categorical data to :class:`Index` constructor along with ``dtype=object`` incorrectly returning a :class:`CategoricalIndex` instead of object-dtype :class:`Index` (:issue:`32167`) - Bug where :class:`Categorical` comparison operator ``__ne__`` would incorrectly evaluate to ``False`` when either element was missing (:issue:`32276`) - :meth:`Categorical.fillna` now accepts :class:`Categorical` ``other`` argument (:issue:`32420`) +- Repr of :class:`Categorical` was not distinguishing between int and str (:issue:`33676`) Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index dcf2015245518..9e3ca4cc53363 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -604,8 +604,8 @@ def factorize( >>> codes array([0, 0, 1]...) >>> uniques - [a, c] - Categories (3, object): [a, b, c] + ['a', 'c'] + Categories (3, object): ['a', 'b', 'c'] Notice that ``'b'`` is in ``uniques.categories``, despite not being present in ``cat.values``. diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 7f2c61ff7d955..5565b85f8d59a 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -846,14 +846,14 @@ def factorize(self, na_sentinel: int = -1) -> Tuple[np.ndarray, "ExtensionArray" -------- >>> cat = pd.Categorical(['a', 'b', 'c']) >>> cat - [a, b, c] - Categories (3, object): [a, b, c] + ['a', 'b', 'c'] + Categories (3, object): ['a', 'b', 'c'] >>> cat.repeat(2) - [a, a, b, b, c, c] - Categories (3, object): [a, b, c] + ['a', 'a', 'b', 'b', 'c', 'c'] + Categories (3, object): ['a', 'b', 'c'] >>> cat.repeat([1, 2, 3]) - [a, b, b, c, c, c] - Categories (3, object): [a, b, c] + ['a', 'b', 'b', 'c', 'c', 'c'] + Categories (3, object): ['a', 'b', 'c'] """ @Substitution(klass="ExtensionArray") diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 3d469ec28b9c4..1fedfa70cc469 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1,3 +1,5 @@ +from csv import QUOTE_NONNUMERIC +from functools import partial import operator from shutil import get_terminal_size from typing import Dict, Hashable, List, Type, Union, cast @@ -275,8 +277,8 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject): Categories (3, int64): [1, 2, 3] >>> pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c']) - [a, b, c, a, b, c] - Categories (3, object): [a, b, c] + ['a', 'b', 'c', 'a', 'b', 'c'] + Categories (3, object): ['a', 'b', 'c'] Ordered `Categoricals` can be sorted according to the custom order of the categories and can have a min and max value. @@ -284,8 +286,8 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject): >>> c = pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c'], ordered=True, ... categories=['c', 'b', 'a']) >>> c - [a, b, c, a, b, c] - Categories (3, object): [c < b < a] + ['a', 'b', 'c', 'a', 'b', 'c'] + Categories (3, object): ['c' < 'b' < 'a'] >>> c.min() 'c' """ @@ -598,8 +600,8 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None): -------- >>> dtype = pd.CategoricalDtype(['a', 'b'], ordered=True) >>> pd.Categorical.from_codes(codes=[0, 1, 0, 1], dtype=dtype) - [a, b, a, b] - Categories (2, object): [a < b] + ['a', 'b', 'a', 'b'] + Categories (2, object): ['a' < 'b'] """ dtype = CategoricalDtype._from_values_or_dtype( categories=categories, ordered=ordered, dtype=dtype @@ -659,13 +661,13 @@ def _set_categories(self, categories, fastpath=False): -------- >>> c = pd.Categorical(['a', 'b']) >>> c - [a, b] - Categories (2, object): [a, b] + ['a', 'b'] + Categories (2, object): ['a', 'b'] >>> c._set_categories(pd.Index(['a', 'c'])) >>> c - [a, c] - Categories (2, object): [a, c] + ['a', 'c'] + Categories (2, object): ['a', 'c'] """ if fastpath: new_dtype = CategoricalDtype._from_fastpath(categories, self.ordered) @@ -885,14 +887,14 @@ def rename_categories(self, new_categories, inplace=False): categories not in the dictionary are passed through >>> c.rename_categories({'a': 'A', 'c': 'C'}) - [A, A, b] - Categories (2, object): [A, b] + ['A', 'A', 'b'] + Categories (2, object): ['A', 'b'] You may also provide a callable to create the new categories >>> c.rename_categories(lambda x: x.upper()) - [A, A, B] - Categories (2, object): [A, B] + ['A', 'A', 'B'] + Categories (2, object): ['A', 'B'] """ inplace = validate_bool_kwarg(inplace, "inplace") cat = self if inplace else self.copy() @@ -1128,22 +1130,22 @@ def map(self, mapper): -------- >>> cat = pd.Categorical(['a', 'b', 'c']) >>> cat - [a, b, c] - Categories (3, object): [a, b, c] + ['a', 'b', 'c'] + Categories (3, object): ['a', 'b', 'c'] >>> cat.map(lambda x: x.upper()) - [A, B, C] - Categories (3, object): [A, B, C] + ['A', 'B', 'C'] + Categories (3, object): ['A', 'B', 'C'] >>> cat.map({'a': 'first', 'b': 'second', 'c': 'third'}) - [first, second, third] - Categories (3, object): [first, second, third] + ['first', 'second', 'third'] + Categories (3, object): ['first', 'second', 'third'] If the mapping is one-to-one the ordering of the categories is preserved: >>> cat = pd.Categorical(['a', 'b', 'c'], ordered=True) >>> cat - [a, b, c] - Categories (3, object): [a < b < c] + ['a', 'b', 'c'] + Categories (3, object): ['a' < 'b' < 'c'] >>> cat.map({'a': 3, 'b': 2, 'c': 1}) [3, 2, 1] Categories (3, int64): [3 < 2 < 1] @@ -1778,29 +1780,29 @@ def take(self: _T, indexer, allow_fill: bool = False, fill_value=None) -> _T: -------- >>> cat = pd.Categorical(['a', 'a', 'b']) >>> cat - [a, a, b] - Categories (2, object): [a, b] + ['a', 'a', 'b'] + Categories (2, object): ['a', 'b'] Specify ``allow_fill==False`` to have negative indices mean indexing from the right. >>> cat.take([0, -1, -2], allow_fill=False) - [a, b, a] - Categories (2, object): [a, b] + ['a', 'b', 'a'] + Categories (2, object): ['a', 'b'] With ``allow_fill=True``, indices equal to ``-1`` mean "missing" values that should be filled with the `fill_value`, which is ``np.nan`` by default. >>> cat.take([0, -1, -1], allow_fill=True) - [a, NaN, NaN] - Categories (2, object): [a, b] + ['a', NaN, NaN] + Categories (2, object): ['a', 'b'] The fill value can be specified. >>> cat.take([0, -1, -1], allow_fill=True, fill_value='a') - [a, a, a] - Categories (2, object): [a, b] + ['a', 'a', 'a'] + Categories (2, object): ['a', 'b'] Specifying a fill value that's not in ``self.categories`` will raise a ``ValueError``. @@ -1872,13 +1874,16 @@ def _repr_categories(self): ) from pandas.io.formats import format as fmt + format_array = partial( + fmt.format_array, formatter=None, quoting=QUOTE_NONNUMERIC + ) if len(self.categories) > max_categories: num = max_categories // 2 - head = fmt.format_array(self.categories[:num], None) - tail = fmt.format_array(self.categories[-num:], None) + head = format_array(self.categories[:num]) + tail = format_array(self.categories[-num:]) category_strs = head + ["..."] + tail else: - category_strs = fmt.format_array(self.categories, None) + category_strs = format_array(self.categories) # Strip all leading spaces, which format_array adds for columns... category_strs = [x.strip() for x in category_strs] @@ -2051,8 +2056,8 @@ def _reverse_indexer(self) -> Dict[Hashable, np.ndarray]: -------- >>> c = pd.Categorical(list('aabca')) >>> c - [a, a, b, c, a] - Categories (3, object): [a, b, c] + ['a', 'a', 'b', 'c', 'a'] + Categories (3, object): ['a', 'b', 'c'] >>> c.categories Index(['a', 'b', 'c'], dtype='object') >>> c.codes @@ -2199,20 +2204,20 @@ def unique(self): order of appearance. >>> pd.Categorical(list("baabc")).unique() - [b, a, c] - Categories (3, object): [b, a, c] + ['b', 'a', 'c'] + Categories (3, object): ['b', 'a', 'c'] >>> pd.Categorical(list("baabc"), categories=list("abc")).unique() - [b, a, c] - Categories (3, object): [b, a, c] + ['b', 'a', 'c'] + Categories (3, object): ['b', 'a', 'c'] An ordered Categorical preserves the category ordering. >>> pd.Categorical( ... list("baabc"), categories=list("abc"), ordered=True ... ).unique() - [b, a, c] - Categories (3, object): [a < b < c] + ['b', 'a', 'c'] + Categories (3, object): ['a' < 'b' < 'c'] """ # unlike np.unique, unique1d does not sort unique_codes = unique1d(self.codes) @@ -2465,7 +2470,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): 4 c 5 c dtype: category - Categories (3, object): [a, b, c] + Categories (3, object): ['a', 'b', 'c'] >>> s.cat.categories Index(['a', 'b', 'c'], dtype='object') @@ -2478,7 +2483,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): 4 a 5 a dtype: category - Categories (3, object): [c, b, a] + Categories (3, object): ['c', 'b', 'a'] >>> s.cat.reorder_categories(list("cba")) 0 a @@ -2488,7 +2493,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): 4 c 5 c dtype: category - Categories (3, object): [c, b, a] + Categories (3, object): ['c', 'b', 'a'] >>> s.cat.add_categories(["d", "e"]) 0 a @@ -2498,7 +2503,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): 4 c 5 c dtype: category - Categories (5, object): [a, b, c, d, e] + Categories (5, object): ['a', 'b', 'c', 'd', 'e'] >>> s.cat.remove_categories(["a", "c"]) 0 NaN @@ -2508,7 +2513,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): 4 NaN 5 NaN dtype: category - Categories (1, object): [b] + Categories (1, object): ['b'] >>> s1 = s.cat.add_categories(["d", "e"]) >>> s1.cat.remove_unused_categories() @@ -2519,7 +2524,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): 4 c 5 c dtype: category - Categories (3, object): [a, b, c] + Categories (3, object): ['a', 'b', 'c'] >>> s.cat.set_categories(list("abcde")) 0 a @@ -2529,7 +2534,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): 4 c 5 c dtype: category - Categories (5, object): [a, b, c, d, e] + Categories (5, object): ['a', 'b', 'c', 'd', 'e'] >>> s.cat.as_ordered() 0 a @@ -2539,7 +2544,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): 4 c 5 c dtype: category - Categories (3, object): [a < b < c] + Categories (3, object): ['a' < 'b' < 'c'] >>> s.cat.as_unordered() 0 a @@ -2549,7 +2554,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): 4 c 5 c dtype: category - Categories (3, object): [a, b, c] + Categories (3, object): ['a', 'b', 'c'] """ def __init__(self, data): diff --git a/pandas/core/base.py b/pandas/core/base.py index e790b1d7f106e..813de491ffdb3 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -743,8 +743,8 @@ def array(self) -> ExtensionArray: >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a'])) >>> ser.array - [a, b, a] - Categories (2, object): [a, b] + ['a', 'b', 'a'] + Categories (2, object): ['a', 'b'] """ raise AbstractMethodError(self) @@ -1481,8 +1481,8 @@ def factorize(self, sort=False, na_sentinel=-1): ... ['apple', 'bread', 'bread', 'cheese', 'milk'], ordered=True ... ) >>> ser - [apple, bread, bread, cheese, milk] - Categories (4, object): [apple < bread < cheese < milk] + ['apple', 'bread', 'bread', 'cheese', 'milk'] + Categories (4, object): ['apple' < 'bread' < 'cheese' < 'milk'] >>> ser.searchsorted('bread') 1 diff --git a/pandas/core/construction.py b/pandas/core/construction.py index b110a316a76d9..9ac661f97a56e 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -217,15 +217,15 @@ def array( You can use the string alias for `dtype` >>> pd.array(['a', 'b', 'a'], dtype='category') - [a, b, a] - Categories (2, object): [a, b] + ['a', 'b', 'a'] + Categories (2, object): ['a', 'b'] Or specify the actual dtype >>> pd.array(['a', 'b', 'a'], ... dtype=pd.CategoricalDtype(['a', 'b', 'c'], ordered=True)) - [a, b, a] - Categories (3, object): [a < b < c] + ['a', 'b', 'a'] + Categories (3, object): ['a' < 'b' < 'c'] If pandas does not infer a dedicated extension type a :class:`arrays.PandasArray` is returned. @@ -357,8 +357,8 @@ def extract_array(obj, extract_numpy: bool = False): Examples -------- >>> extract_array(pd.Series(['a', 'b', 'c'], dtype='category')) - [a, b, c] - Categories (3, object): [a, b, c] + ['a', 'b', 'c'] + Categories (3, object): ['a', 'b', 'c'] Other objects like lists, arrays, and DataFrames are just passed through. diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 71686bfc313fb..4b7c818f487ac 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -228,16 +228,16 @@ def union_categoricals( >>> a = pd.Categorical(["b", "c"]) >>> b = pd.Categorical(["a", "b"]) >>> union_categoricals([a, b]) - [b, c, a, b] - Categories (3, object): [b, c, a] + ['b', 'c', 'a', 'b'] + Categories (3, object): ['b', 'c', 'a'] By default, the resulting categories will be ordered as they appear in the `categories` of the data. If you want the categories to be lexsorted, use `sort_categories=True` argument. >>> union_categoricals([a, b], sort_categories=True) - [b, c, a, b] - Categories (3, object): [a, b, c] + ['b', 'c', 'a', 'b'] + Categories (3, object): ['a', 'b', 'c'] `union_categoricals` also works with the case of combining two categoricals of the same categories and order information (e.g. what @@ -246,8 +246,8 @@ def union_categoricals( >>> a = pd.Categorical(["a", "b"], ordered=True) >>> b = pd.Categorical(["a", "b", "a"], ordered=True) >>> union_categoricals([a, b]) - [a, b, a, b, a] - Categories (2, object): [a < b] + ['a', 'b', 'a', 'b', 'a'] + Categories (2, object): ['a' < 'b'] Raises `TypeError` because the categories are ordered and not identical. @@ -266,8 +266,8 @@ def union_categoricals( >>> a = pd.Categorical(["a", "b", "c"], ordered=True) >>> b = pd.Categorical(["c", "b", "a"], ordered=True) >>> union_categoricals([a, b], ignore_order=True) - [a, b, c, c, b, a] - Categories (3, object): [a, b, c] + ['a', 'b', 'c', 'c', 'b', 'a'] + Categories (3, object): ['a', 'b', 'c'] `union_categoricals` also works with a `CategoricalIndex`, or `Series` containing categorical data, but note that the resulting array will @@ -276,8 +276,8 @@ def union_categoricals( >>> a = pd.Series(["b", "c"], dtype='category') >>> b = pd.Series(["a", "b"], dtype='category') >>> union_categoricals([a, b]) - [b, c, a, b] - Categories (3, object): [b, c, a] + ['b', 'c', 'a', 'b'] + Categories (3, object): ['b', 'c', 'a'] """ from pandas import Categorical from pandas.core.arrays.categorical import recode_for_categories diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index b9d16ac5959e3..a9d2430717e4f 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -230,7 +230,7 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): 2 a 3 NaN dtype: category - Categories (2, object): [b < a] + Categories (2, object): ['b' < 'a'] An empty CategoricalDtype with a specific dtype can be created by providing an empty index. As follows, diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index b6735282acaff..f7723bee532ff 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -150,16 +150,16 @@ def cut( >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), ... 3, labels=["bad", "medium", "good"]) - [bad, good, medium, medium, good, bad] - Categories (3, object): [bad < medium < good] + ['bad', 'good', 'medium', 'medium', 'good', 'bad'] + Categories (3, object): ['bad' < 'medium' < 'good'] ``ordered=False`` will result in unordered categories when labels are passed. This parameter can be used to allow non-unique labels: >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3, ... labels=["B", "A", "B"], ordered=False) - [B, B, A, A, B, B] - Categories (2, object): [A, B] + ['B', 'B', 'A', 'A', 'B', 'B'] + Categories (2, object): ['A', 'B'] ``labels=False`` implies you just want the bins back. diff --git a/pandas/core/series.py b/pandas/core/series.py index d8cf8308142a6..a652af5efc590 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -524,8 +524,8 @@ def values(self): array(['a', 'a', 'b', 'c'], dtype=object) >>> pd.Series(list('aabc')).astype('category').values - [a, a, b, c] - Categories (3, object): [a, b, c] + ['a', 'a', 'b', 'c'] + Categories (3, object): ['a', 'b', 'c'] Timezone aware datetime data is converted to UTC: @@ -1850,15 +1850,15 @@ def unique(self): appearance. >>> pd.Series(pd.Categorical(list('baabc'))).unique() - [b, a, c] - Categories (3, object): [b, a, c] + ['b', 'a', 'c'] + Categories (3, object): ['b', 'a', 'c'] An ordered Categorical preserves the category ordering. >>> pd.Series(pd.Categorical(list('baabc'), categories=list('abc'), ... ordered=True)).unique() - [b, a, c] - Categories (3, object): [a < b < c] + ['b', 'a', 'c'] + Categories (3, object): ['a' < 'b' < 'c'] """ result = super().unique() return result diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 68a88fee83187..3a96a9ba8ad69 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -4,6 +4,7 @@ """ from contextlib import contextmanager +from csv import QUOTE_NONE, QUOTE_NONNUMERIC from datetime import tzinfo import decimal from functools import partial @@ -176,6 +177,7 @@ def __init__( self.na_rep = na_rep self.length = length self.footer = footer + self.quoting = QUOTE_NONNUMERIC def _get_footer(self) -> str: footer = "" @@ -200,6 +202,7 @@ def _get_formatted_values(self) -> List[str]: None, float_format=None, na_rep=self.na_rep, + quoting=self.quoting, ) def to_string(self) -> str: @@ -1109,6 +1112,7 @@ def format_array( justify: str = "right", decimal: str = ".", leading_space: Optional[bool] = None, + quoting: Optional[int] = None, ) -> List[str]: """ Format an array for printing. @@ -1171,6 +1175,7 @@ def format_array( justify=justify, decimal=decimal, leading_space=leading_space, + quoting=quoting, ) return fmt_obj.get_result() @@ -1216,11 +1221,15 @@ def _format_strings(self) -> List[str]: else: float_format = self.float_format - formatter = ( - self.formatter - if self.formatter is not None - else (lambda x: pprint_thing(x, escape_chars=("\t", "\r", "\n"))) - ) + if self.formatter is not None: + formatter = self.formatter + else: + quote_strings = self.quoting is not None and self.quoting != QUOTE_NONE + formatter = partial( + pprint_thing, + escape_chars=("\t", "\r", "\n"), + quote_strings=quote_strings, + ) def _format(x): if self.na_rep is not None and is_scalar(x) and isna(x): diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py index d08c4b47dd3cb..735b062eae80e 100644 --- a/pandas/tests/arrays/categorical/test_repr.py +++ b/pandas/tests/arrays/categorical/test_repr.py @@ -14,7 +14,10 @@ class TestCategoricalReprWithFactor(TestCategorical): def test_print(self): - expected = ["[a, b, b, a, a, c, c, c]", "Categories (3, object): [a < b < c]"] + expected = [ + "['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']", + "Categories (3, object): ['a' < 'b' < 'c']", + ] expected = "\n".join(expected) actual = repr(self.factor) assert actual == expected @@ -24,9 +27,9 @@ class TestCategoricalRepr: def test_big_print(self): factor = Categorical([0, 1, 2, 0, 1, 2] * 100, ["a", "b", "c"], fastpath=True) expected = [ - "[a, b, c, a, b, ..., b, c, a, b, c]", + "['a', 'b', 'c', 'a', 'b', ..., 'b', 'c', 'a', 'b', 'c']", "Length: 600", - "Categories (3, object): [a, b, c]", + "Categories (3, object): ['a', 'b', 'c']", ] expected = "\n".join(expected) @@ -36,13 +39,13 @@ def test_big_print(self): def test_empty_print(self): factor = Categorical([], ["a", "b", "c"]) - expected = "[], Categories (3, object): [a, b, c]" + expected = "[], Categories (3, object): ['a', 'b', 'c']" actual = repr(factor) assert actual == expected assert expected == actual factor = Categorical([], ["a", "b", "c"], ordered=True) - expected = "[], Categories (3, object): [a < b < c]" + expected = "[], Categories (3, object): ['a' < 'b' < 'c']" actual = repr(factor) assert expected == actual @@ -64,17 +67,17 @@ def test_print_none_width(self): def test_unicode_print(self): c = Categorical(["aaaaa", "bb", "cccc"] * 20) expected = """\ -[aaaaa, bb, cccc, aaaaa, bb, ..., bb, cccc, aaaaa, bb, cccc] +['aaaaa', 'bb', 'cccc', 'aaaaa', 'bb', ..., 'bb', 'cccc', 'aaaaa', 'bb', 'cccc'] Length: 60 -Categories (3, object): [aaaaa, bb, cccc]""" +Categories (3, object): ['aaaaa', 'bb', 'cccc']""" assert repr(c) == expected c = Categorical(["ああああ", "いいいいい", "ううううううう"] * 20) expected = """\ -[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう] +['ああああ', 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', ..., 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', 'ううううううう'] Length: 60 -Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa +Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']""" # noqa assert repr(c) == expected @@ -83,9 +86,9 @@ def test_unicode_print(self): with option_context("display.unicode.east_asian_width", True): c = Categorical(["ああああ", "いいいいい", "ううううううう"] * 20) - expected = """[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう] + expected = """['ああああ', 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', ..., 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', 'ううううううう'] Length: 60 -Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa +Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']""" # noqa assert repr(c) == expected @@ -523,3 +526,9 @@ def test_categorical_index_repr_timedelta_ordered(self): categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=True, dtype='category')""" # noqa assert repr(i) == exp + + def test_categorical_str_repr(self): + # GH 33676 + result = repr(Categorical([1, "2", 3, 4])) + expected = "[1, '2', 3, 4]\nCategories (4, object): [1, 3, 4, '2']" + assert result == expected diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py index 77f942a9e32ec..b861b37b49f89 100644 --- a/pandas/tests/series/test_repr.py +++ b/pandas/tests/series/test_repr.py @@ -270,7 +270,7 @@ def test_categorical_repr(self): "0 a\n1 b\n" + " ..\n" + "48 a\n49 b\n" - + "Length: 50, dtype: category\nCategories (2, object): [a, b]" + + "Length: 50, dtype: category\nCategories (2, object): ['a', 'b']" ) with option_context("display.max_rows", 5): assert exp == repr(a) @@ -279,7 +279,7 @@ def test_categorical_repr(self): a = Series(Categorical(["a", "b"], categories=levs, ordered=True)) exp = ( "0 a\n1 b\n" + "dtype: category\n" - "Categories (26, object): [a < b < c < d ... w < x < y < z]" + "Categories (26, object): ['a' < 'b' < 'c' < 'd' ... 'w' < 'x' < 'y' < 'z']" ) assert exp == a.__str__() diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py index 859c8474562a3..1284cc9d4f49b 100644 --- a/pandas/tests/util/test_assert_series_equal.py +++ b/pandas/tests/util/test_assert_series_equal.py @@ -183,10 +183,10 @@ def test_series_equal_categorical_values_mismatch(rtol): Series values are different \\(66\\.66667 %\\) \\[index\\]: \\[0, 1, 2\\] -\\[left\\]: \\[a, b, c\\] -Categories \\(3, object\\): \\[a, b, c\\] -\\[right\\]: \\[a, c, b\\] -Categories \\(3, object\\): \\[a, b, c\\]""" +\\[left\\]: \\['a', 'b', 'c'\\] +Categories \\(3, object\\): \\['a', 'b', 'c'\\] +\\[right\\]: \\['a', 'c', 'b'\\] +Categories \\(3, object\\): \\['a', 'b', 'c'\\]""" s1 = Series(Categorical(["a", "b", "c"])) s2 = Series(Categorical(["a", "c", "b"])) diff --git a/web/pandas/community/blog/extension-arrays.md b/web/pandas/community/blog/extension-arrays.md index ea8a9a28ba242..61a77738a259c 100644 --- a/web/pandas/community/blog/extension-arrays.md +++ b/web/pandas/community/blog/extension-arrays.md @@ -117,11 +117,11 @@ library). For example, consider `Categorical`, 1 b 2 a dtype: category -Categories (3, object): [a, b, c] +Categories (3, object): ['a', 'b', 'c'] >>> ser.values [a, b, a] -Categories (3, object): [a, b, c] +Categories (3, object): ['a', 'b', 'c'] ``` In this case `.values` is a Categorical, not a NumPy array. For period-dtype @@ -143,7 +143,7 @@ So with our Categorical example, ```python >>> ser.array [a, b, a] -Categories (3, object): [a, b, c] +Categories (3, object): ['a', 'b', 'c'] >>> ser.to_numpy() array(['a', 'b', 'a'], dtype=object)