Skip to content

Commit 2482adb

Browse files
authored
Added z-function implementation in strings/algorithms.py (#523)
1 parent 1aa53bf commit 2482adb

File tree

2 files changed

+55
-1
lines changed

2 files changed

+55
-1
lines changed

pydatastructs/strings/algorithms.py

+49
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ def find(text, query, algorithm, **kwargs):
3131
'rabin_karp' -> Rabin–Karp algorithm as given in [2].
3232
3333
'boyer_moore' -> Boyer-Moore algorithm as given in [3].
34+
35+
'z_function' -> Z-function algorithm as given in [4].
36+
3437
backend: pydatastructs.Backend
3538
The backend to be used.
3639
Optional, by default, the best available
@@ -67,6 +70,7 @@ def find(text, query, algorithm, **kwargs):
6770
.. [1] https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm
6871
.. [2] https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm
6972
.. [3] https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string-search_algorithm
73+
.. [4] https://usaco.guide/CPH.pdf#page=257
7074
"""
7175
raise_if_backend_is_not_python(
7276
find, kwargs.get('backend', Backend.PYTHON))
@@ -196,3 +200,48 @@ def _boyer_moore(text, query):
196200
else:
197201
shift += max(1, j + 1)
198202
return positions
203+
204+
def _z_vector(text, query):
205+
string = text
206+
if query != "":
207+
string = query + str("$") + text
208+
209+
z_fct = OneDimensionalArray(int, len(string))
210+
z_fct.fill(0)
211+
212+
curr_pos = 1
213+
seg_left = 0
214+
seg_right = 0
215+
216+
for curr_pos in range(1,len(string)):
217+
if curr_pos <= seg_right:
218+
z_fct[curr_pos] = min(seg_right - curr_pos + 1, z_fct[curr_pos - seg_left])
219+
220+
while curr_pos + z_fct[curr_pos] < len(string) and \
221+
string[z_fct[curr_pos]] == string[curr_pos + z_fct[curr_pos]]:
222+
z_fct[curr_pos] += 1
223+
224+
if curr_pos + z_fct[curr_pos] - 1 > seg_right:
225+
seg_left = curr_pos
226+
seg_right = curr_pos + z_fct[curr_pos] - 1
227+
228+
final_z_fct = DynamicOneDimensionalArray(int, 0)
229+
start_index = 0
230+
if query != "":
231+
start_index = len(query) + 1
232+
for pos in range(start_index, len(string)):
233+
final_z_fct.append(z_fct[pos])
234+
235+
return final_z_fct
236+
237+
def _z_function(text, query):
238+
positions = DynamicOneDimensionalArray(int, 0)
239+
if len(text) == 0 or len(query) == 0:
240+
return positions
241+
242+
fct = _z_vector(text, query)
243+
for pos in range(len(fct)):
244+
if fct[pos] == len(query):
245+
positions.append(pos)
246+
247+
return positions

pydatastructs/strings/tests/test_algorithms.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,17 @@ def test_rka():
1111
def test_bm():
1212
_test_common_string_matching('boyer_moore')
1313

14+
def test_zf():
15+
_test_common_string_matching('z_function')
16+
1417
def _test_common_string_matching(algorithm):
1518
true_text_pattern_dictionary = {
1619
"Knuth-Morris-Pratt": "-Morris-",
1720
"abcabcabcabdabcabdabcabca": "abcabdabcabca",
1821
"aefcdfaecdaefaefcdaefeaefcdcdeae": "aefcdaefeaefcd",
1922
"aaaaaaaa": "aaa",
20-
"fullstringmatch": "fullstringmatch"
23+
"fullstringmatch": "fullstringmatch",
24+
"z-function": "z-fun"
2125
}
2226
for test_case_key in true_text_pattern_dictionary:
2327
text = test_case_key
@@ -32,6 +36,7 @@ def _test_common_string_matching(algorithm):
3236
"abcabcabcabdabcabdabcabca": "qwertyuiopzxcvbnm",
3337
"aefcdfaecdaefaefcdaefeaefcdcdeae": "cdaefaefe",
3438
"fullstringmatch": "fullstrinmatch",
39+
"z-function": "function-",
3540
"abc": "",
3641
"": "abc"
3742
}

0 commit comments

Comments
 (0)