Skip to content

Commit 1c194b9

Browse files
committed
REF: move implementation to ArrowStringArrayMixin
1 parent 5745019 commit 1c194b9

File tree

3 files changed

+24
-24
lines changed

3 files changed

+24
-24
lines changed

pandas/core/arrays/_arrow_string_mixins.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,3 +97,26 @@ def _str_removesuffix(self, suffix: str):
9797
removed = pc.utf8_slice_codeunits(self._pa_array, 0, stop=-len(suffix))
9898
result = pc.if_else(ends_with, removed, self._pa_array)
9999
return type(self)(result)
100+
101+
def _str_find(self, sub: str, start: int = 0, end: int | None = None):
102+
if (start == 0 or start is None) and end is None:
103+
result = pc.find_substring(self._pa_array, sub)
104+
else:
105+
if sub == "":
106+
# GH#56792
107+
result = self._apply_elementwise(lambda val: val.find(sub, start, end))
108+
return self._convert_int_result(pa.chunked_array(result))
109+
if start is None:
110+
start_offset = 0
111+
start = 0
112+
elif start < 0:
113+
start_offset = pc.add(start, pc.utf8_length(self._pa_array))
114+
start_offset = pc.if_else(pc.less(start_offset, 0), 0, start_offset)
115+
else:
116+
start_offset = start
117+
slices = pc.utf8_slice_codeunits(self._pa_array, start, stop=end)
118+
result = pc.find_substring(slices, sub)
119+
found = pc.not_equal(result, pa.scalar(-1, type=result.type))
120+
offset_result = pc.add(result, start_offset)
121+
result = pc.if_else(found, offset_result, -1)
122+
return self._convert_int_result(result)

pandas/core/arrays/arrow/array.py

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2419,29 +2419,6 @@ def _str_fullmatch(
24192419
pat = f"{pat}$"
24202420
return self._str_match(pat, case, flags, na)
24212421

2422-
def _str_find(self, sub: str, start: int = 0, end: int | None = None) -> Self:
2423-
if (start == 0 or start is None) and end is None:
2424-
result = pc.find_substring(self._pa_array, sub)
2425-
else:
2426-
if sub == "":
2427-
# GH 56792
2428-
result = self._apply_elementwise(lambda val: val.find(sub, start, end))
2429-
return self._convert_int_result(pa.chunked_array(result))
2430-
if start is None:
2431-
start_offset = 0
2432-
start = 0
2433-
elif start < 0:
2434-
start_offset = pc.add(start, pc.utf8_length(self._pa_array))
2435-
start_offset = pc.if_else(pc.less(start_offset, 0), 0, start_offset)
2436-
else:
2437-
start_offset = start
2438-
slices = pc.utf8_slice_codeunits(self._pa_array, start, stop=end)
2439-
result = pc.find_substring(slices, sub)
2440-
found = pc.not_equal(result, pa.scalar(-1, type=result.type))
2441-
offset_result = pc.add(result, start_offset)
2442-
result = pc.if_else(found, offset_result, -1)
2443-
return self._convert_int_result(result)
2444-
24452422
def _str_join(self, sep: str) -> Self:
24462423
if pa.types.is_string(self._pa_array.type) or pa.types.is_large_string(
24472424
self._pa_array.type

pandas/core/arrays/string_arrow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -480,7 +480,7 @@ def _str_find(self, sub: str, start: int = 0, end: int | None = None):
480480
):
481481
# https://github.com/pandas-dev/pandas/pull/59562/files#r1725688888
482482
return super()._str_find(sub, start, end)
483-
return ArrowExtensionArray._str_find(self, sub, start, end)
483+
return ArrowStringArrayMixin._str_find(self, sub, start, end)
484484

485485
def _str_get_dummies(self, sep: str = "|"):
486486
dummies_pa, labels = ArrowExtensionArray(self._pa_array)._str_get_dummies(sep)

0 commit comments

Comments
 (0)