Skip to content

Commit 8c7f344

Browse files
committed
Fix void dtype handling
1 parent c5457f6 commit 8c7f344

File tree

3 files changed

+175
-1
lines changed

3 files changed

+175
-1
lines changed

pandas/core/generic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,7 @@ def _validate_dtype(cls, dtype) -> DtypeObj | None:
478478
dtype = pandas_dtype(dtype)
479479

480480
# a compound dtype
481-
if dtype.kind == "V":
481+
if dtype.names is not None:
482482
raise NotImplementedError(
483483
"compound dtypes are not implemented "
484484
f"in the {cls.__name__} constructor"

pandas/tests/extension/uuid/__init__.py

Whitespace-only changes.
Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
from __future__ import annotations
2+
3+
from collections.abc import (
4+
Iterable,
5+
Sequence,
6+
)
7+
from typing import (
8+
TYPE_CHECKING,
9+
ClassVar,
10+
Self,
11+
get_args,
12+
)
13+
from uuid import UUID
14+
15+
import numpy as np
16+
17+
from pandas.core.dtypes.dtypes import ExtensionDtype
18+
19+
import pandas as pd
20+
from pandas.core.algorithms import take
21+
from pandas.core.arrays.base import ExtensionArray
22+
from pandas.core.arrays.boolean import BooleanArray
23+
from pandas.core.indexers.utils import check_array_indexer
24+
from pandas.core.ops.common import unpack_zerodim_and_defer
25+
26+
if TYPE_CHECKING:
27+
import builtins
28+
29+
from numpy.typing import NDArray
30+
31+
from pandas.core.arrays.boolean import BooleanArray
32+
33+
34+
UuidLike = UUID | bytes | int | str
35+
36+
# 16 void bytes: 128 bit, every pattern valid, no funky behavior like 0 stripping.
37+
_UuidNumpyDtype = np.dtype("V16")
38+
39+
40+
def _to_uuid(v: UuidLike) -> UUID:
41+
match v:
42+
case UUID():
43+
return v
44+
case bytes():
45+
return UUID(bytes=v)
46+
case int():
47+
return UUID(int=v)
48+
case str():
49+
return UUID(v)
50+
msg = f"Unknown type for Uuid: {type(v)} is not {get_args(UuidLike)}"
51+
raise TypeError(msg)
52+
53+
54+
class UuidDtype(ExtensionDtype):
55+
# ExtensionDtype essential API (3 class attrs and methods)
56+
57+
name: ClassVar[str] = "uuid"
58+
type: ClassVar[builtins.type[UUID]] = UUID
59+
60+
@classmethod
61+
def construct_array_type(cls) -> builtins.type[UuidExtensionArray]:
62+
return UuidExtensionArray
63+
64+
# ExtensionDtype overrides
65+
kind: ClassVar[str] = _UuidNumpyDtype.kind
66+
67+
68+
class UuidExtensionArray(ExtensionArray):
69+
# Implementation details and convenience
70+
71+
_data: NDArray[np.void]
72+
73+
def __init__(self, values: Iterable[UuidLike], *, copy: bool = False) -> None:
74+
if isinstance(values, np.ndarray):
75+
self._data = values.astype(_UuidNumpyDtype, copy=copy)
76+
else:
77+
# TODO: more efficient
78+
self._data = np.array(
79+
[_to_uuid(x).bytes for x in values], dtype=_UuidNumpyDtype
80+
)
81+
82+
if self._data.ndim != 1:
83+
raise ValueError("Array only supports 1-d arrays")
84+
85+
# ExtensionArray essential API (11 class attrs and methods)
86+
87+
dtype: ClassVar[UuidDtype] = UuidDtype()
88+
89+
@classmethod
90+
def _from_sequence(
91+
cls,
92+
data: Iterable[UuidLike],
93+
dtype: UuidDtype | None = None,
94+
copy: bool = False,
95+
) -> Self:
96+
if dtype is None:
97+
dtype = UuidDtype()
98+
return cls(data, copy=copy)
99+
100+
def __getitem__(self, index) -> Self | UUID:
101+
if isinstance(index, int):
102+
return UUID(bytes=self._data[index].tobytes())
103+
index = check_array_indexer(self, index)
104+
return self._simple_new(self._data[index])
105+
106+
def __len__(self) -> int:
107+
return len(self._data)
108+
109+
@unpack_zerodim_and_defer("__eq__")
110+
def __eq__(self, other: object) -> BooleanArray:
111+
return self._cmp("eq", other)
112+
113+
def nbytes(self) -> int:
114+
return self._data.nbytes
115+
116+
def isna(self) -> NDArray[np.bool_]:
117+
return pd.isna(self._data)
118+
119+
def take(
120+
self, indexer, *, allow_fill: bool = False, fill_value: UUID | None = None
121+
) -> Self:
122+
if allow_fill and fill_value is None:
123+
fill_value = self.dtype.na_value
124+
125+
result = take(self._data, indexer, allow_fill=allow_fill, fill_value=fill_value)
126+
return self._simple_new(result)
127+
128+
def copy(self) -> Self:
129+
return self._simple_new(self._data.copy())
130+
131+
@classmethod
132+
def _concat_same_type(cls, to_concat: Sequence[Self]) -> Self:
133+
return cls._simple_new(np.concatenate([x._data for x in to_concat]))
134+
135+
# Helpers
136+
137+
@classmethod
138+
def _simple_new(cls, values: NDArray[np.void]) -> Self:
139+
result = UuidExtensionArray.__new__(cls)
140+
result._data = values
141+
return result
142+
143+
def _cmp(self, op: str, other) -> BooleanArray:
144+
if isinstance(other, UuidExtensionArray):
145+
other = other._data
146+
elif isinstance(other, Sequence):
147+
other = np.asarray(other)
148+
if other.ndim > 1:
149+
raise NotImplementedError("can only perform ops with 1-d structures")
150+
if len(self) != len(other):
151+
raise ValueError("Lengths must match to compare")
152+
153+
method = getattr(self._data, f"__{op}__")
154+
result = method(other)
155+
156+
rv: BooleanArray = pd.array(result, dtype="boolean")
157+
return rv
158+
159+
160+
def test_construct() -> None:
161+
"""Tests that we can construct UuidExtensionArray from a list of valid values."""
162+
from uuid import uuid4
163+
164+
a = UuidExtensionArray([0, u := uuid4()])
165+
assert a[0] == UUID(int=0)
166+
assert a[1] == u
167+
168+
169+
def test_series() -> None:
170+
"""Tests that Series accepts unstructured void dtypes."""
171+
from uuid import uuid4
172+
173+
s = pd.Series([u := uuid4()], dtype=UuidDtype(), name="s")
174+
assert str(u) in str(s)

0 commit comments

Comments
 (0)