-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhelpers.py
51 lines (40 loc) · 1.27 KB
/
helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from nltk.tokenize import sent_tokenize, word_tokenize
def lines(a, b):
"""Return lines in both a and b"""
A = a.split("\n")
B = b.split("\n")
final_list = []
for i in A:
if i in B and i not in final_list and i != '':
final_list.append(i)
return final_list
def sentences(a, b):
"""Return sentences in both a and b"""
A = sent_tokenize(a)
B = sent_tokenize(b)
final_list = []
for i in A:
if i in B and i not in final_list:
final_list.append(i)
return final_list
def substrings(a, b, n):
"""Return substrings of length n in both a and b"""
A = [words for words in word_tokenize(a)]
B = [words for words in word_tokenize(b)]
A_substrings = []
B_substrings = []
final_list = []
for words in A:
counter = 0
while (counter+n) <= (len(words)):
A_substrings.append(words[counter:(counter+n)])
counter += 1
for words in B:
counter = 0
while (counter+n) <= (len(words)):
B_substrings.append(words[counter:(counter+n)])
counter += 1
for substring in A_substrings:
if substring in B_substrings and substring not in final_list:
final_list.append(substring)
return final_list