Skip to content

Commit a2473b6

Browse files
authored
Add files via upload
1 parent 46eb4ce commit a2473b6

File tree

1 file changed

+294
-0
lines changed

1 file changed

+294
-0
lines changed

Diff for: PythonExercises.py

+294
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,294 @@
1+
#!/usr/bin/env python
2+
3+
def stemWord(w):
4+
"""Renders a word in to its generic form.
5+
6+
This function is used preprocess words for NLP.
7+
It removes all trailing punctuation marks "';-?.,!:".
8+
It also removes the possessive "'s" from words.
9+
And it converts it to all lower case
10+
11+
Args:
12+
w (str): A string containing the input word
13+
14+
Returns:
15+
str a simpler version of the word
16+
"""
17+
w=w.split("'")[0]#Separates and eliminates all after the "'"
18+
w=[letter for letter in w if not (letter in ';-?.,!:')] #Eliminates the simbols on w
19+
return ''.join(w).lower() #Joins and puts in lowercase
20+
21+
22+
class LanguageModel:
23+
"""Class that contains the data needed for the analysis of text
24+
"""
25+
@staticmethod
26+
def getWordFreq(txt):
27+
"""Returns a dictionary where the keys are stemmed words and values
28+
are the number they occured in txt.
29+
30+
As na example if txt == "The dog likes the cat!" the resulting
31+
frequency dictionary should be {"the":2,"dog":1,"likes":1,"cat":1}
32+
33+
Hints:
34+
-Use split to break the text in to a list of words
35+
stem all words in the list
36+
-Make sure a dictionary element exists before operating on it.
37+
38+
Args:
39+
txt (str): a string containing free text
40+
41+
Returns:
42+
dict: A dictionary whith stemmed words (str) for keys and int
43+
values containing the occurence count of these words.
44+
"""
45+
46+
list1 = txt.split()
47+
newlist = []
48+
dic = {}
49+
for word in list1:
50+
newlist.append(stemWord(word))
51+
52+
for word in newlist:
53+
if dic.has_key(word) == True:
54+
dic[word] = dic[word] +1
55+
else:
56+
dic[word] = 1
57+
return dic
58+
59+
@staticmethod
60+
def mergeWordFreqDict(frDict1,frDict2):
61+
"""Takes two dictionaries containing word frequencies and returns a
62+
single dictionary containing their sum.
63+
64+
In essence this fuction takes the frequencies produced from two
65+
different strings with text and returns a dictionary with the
66+
word frequencies of the concatenation of these two strings.
67+
68+
Hints:
69+
-Dictionary .keys() returns a list so you might need to cast it
70+
to a set, if you want to get the union.
71+
-It is better if you create a dictionary with 0 for all words in
72+
both dictionaries and then update the values
73+
-If frDict1=={"hello":1,"world":1} and frDict2=={"goodbye":1,"world":1},
74+
the result must be {"hello":1,"world":2,"goodbye":1}
75+
-The simplest solution involves 3 non nested for loops.
76+
77+
Args:
78+
frDict1 (dict): a dictionary with stemmed words as keys and
79+
positive integers as values.
80+
frDict2 (dict): a dictionary with stemmed words as keys and
81+
positive integers as values.
82+
83+
Returns:
84+
dict: a dictionary with stemmed words as keys and positive
85+
integers as values.
86+
"""
87+
88+
new_dict = frDict1
89+
90+
for i in frDict2:
91+
if i in new_dict:
92+
new_dict[i] += frDict2[i]
93+
else:
94+
new_dict[i] = frDict2[i]
95+
96+
return new_dict
97+
98+
def __init__(self,txtList=[]):
99+
"""LangueModel constructor
100+
101+
Initialises the class members to valid values.
102+
__texts is a list with one or more strings with texts.
103+
__wordFreq is a dictionary with stemmed words for keys and the
104+
count of the occurences of each word (int) as values.
105+
106+
Args:
107+
txtList (list): A list of strings where each string will
108+
contains some text.
109+
"""
110+
self.__wordFreq={}#Dictionary with the word's frequencies.
111+
self.__texts=[]#A list with a text string
112+
if txtList.__class__ != [].__class__:
113+
raise Exception('txtList must be a list of strings')
114+
for txt in txtList:
115+
self.addText(txt)#add text to the list
116+
117+
def addText(self,txt):
118+
"""Adds a string containing text to the model
119+
120+
This method just uses getWordFreq and mergeWordFreqDict static
121+
methods on a specific instance of the class
122+
123+
Args:
124+
txt (str): the string containing text to be added
125+
"""
126+
self.__texts.append(txt)
127+
newFreq=LanguageModel.getWordFreq(txt) #newFreq is a dictionary with the word's frequencies of the added text
128+
self.__wordFreq=LanguageModel.mergeWordFreqDict(self.__wordFreq,newFreq) #Sum of the two dictionaries
129+
130+
def addTextFile(self,fileName):
131+
"""Ads text contained in a text-file
132+
133+
Args:
134+
fileName (str): the absolute or relative path to a file
135+
containing text.
136+
"""
137+
self.addText(open(fileName).read())#Adds text in a file
138+
139+
def wordCount(self):
140+
"""Returns the total number of words found in self.__texts
141+
142+
Hints:
143+
-The answer can be writen in a single line
144+
-The method values() of dict is the key to solving this question
145+
-The distionary __wordFreq contains how many times each word was
146+
found in the texts
147+
148+
Returns:
149+
int: the count of all the words
150+
"""
151+
152+
return sum(self.__wordFreq.values())
153+
154+
155+
def uniqueWordCount(self):
156+
"""Returns the number of unique words found in self.__texts
157+
158+
Unique word means that a word occuring twice or more times, counts
159+
as one.
160+
161+
Hints:
162+
-The answer can be writen in a single line
163+
-The method keys() of dict is the key to solving this question
164+
165+
Returns:
166+
int: the count of unique words
167+
168+
"""
169+
170+
return len(self.__wordFreq.keys())
171+
172+
173+
def getWordProbabillity(self,word):
174+
"""Returns the probabillity of a word occuring according to the
175+
model
176+
177+
The probabillity of a word occuring is the number of times it has
178+
occured divided by the count of all word occurences in __texts
179+
180+
Args:
181+
word (str): an string with a word which is not necessarilly
182+
stemmed.
183+
184+
Returns:
185+
float: a float between 0 and 1 that contains the probabillity
186+
"""
187+
stemmedWord=stemWord(word)#Converts 'word' in a simple string in lowercase
188+
if stemmedWord in self.__wordFreq.keys():#if the word is in the list
189+
return self.__wordFreq[stemmedWord]/float(self.wordCount())#Divide between all the words to know the probabillity
190+
else:
191+
return 0#If not found return 0
192+
193+
def __str__(self):
194+
"""Generate a string description of the Language Model
195+
196+
Hints:
197+
-The result must be constructed with string concatenation
198+
-Cast an integer to a string before concatening it.
199+
-Use the already availabe methods to obtain information
200+
-lm=LanguageModel(['hello world','Goodbye World!'])
201+
lm.__str__() will return
202+
"LanguageModel\n\t#texts:2\n\t#words:4\n\t#unique words:3\n"
203+
-self.__texts, is a list containing all texts the LanguageModel has
204+
seen.
205+
206+
Returns:
207+
string: A description of the language model spanning 4 lines.
208+
"""
209+
uwords = self.uniqueWordCount(self)
210+
nwords = self.wordCount(self)
211+
texts = len(self.__texts(self))
212+
213+
lm = "The lenguage model has:\n\t" + "texts:" + str(texts) + "\n\t"+ "words:" + str(nwords) + "\n\t" + "unique words:" + str(uwords) + "\n"
214+
215+
return lm
216+
217+
def __repr__(self):
218+
"""Generate a string description of the Language Model that allows
219+
to reconstruct it
220+
221+
Returns:
222+
string: A python expression that invockes the constructor of the
223+
class so that if executed a deep copy of the LangueageModel is
224+
obtained.
225+
"""
226+
res=str(self.__class__)+'('+self.__texts.__repr__()+')'
227+
return res
228+
229+
def getWordsByProbabillity(self):
230+
"""Produces a list containing all stemmed words from the language
231+
model sorted from the most probable to the least probable
232+
233+
Hints:
234+
-function reversed returns a list with reverse order of the input
235+
list
236+
-function sorted returns a list with the elements of the input sorted
237+
in ascending order.
238+
-A list of tuples is sorted by the first element of each tuple
239+
240+
Returns:
241+
list: a list of strings (not tuples!)
242+
"""
243+
finalist = {}
244+
newdict = self.__wordFreq.keys()
245+
for i in newdict:
246+
finalist[i] = self.getWordProbabillity(i)
247+
248+
sorted_keys = sorted(finalist, key=finalist.__getitem__)
249+
250+
l = list(reversed(sorted_keys))
251+
252+
return l
253+
254+
def isPalindrome(sentence):
255+
"""Tells us whether a string is a palindrome.
256+
257+
Pallindromes are sentences whos characters read in both directions are
258+
the same. Testing for pallindromes ignores spaces and puntuation marks
259+
as if they did not exist.
260+
261+
Hits:
262+
-A list can be indexed form the end with negative values.
263+
-The first character in a string is at position 0
264+
If a=[1,"b",3,4] Then a[-1] is 4, a[-2] is 3, etc.
265+
-The expression a[len(a)-1]==a[-1] is always True if a is not empty
266+
-You will need to use .split() and .join methods of the str type
267+
268+
Args:
269+
sentence (str): A string with one or more words assumed to have no
270+
possessive (stemWord can help).
271+
272+
Returns:
273+
bool: The return value. True if the sentence was a palindrome, False
274+
otherwise.
275+
"""
276+
277+
answer = True
278+
279+
newstring = stemWord(sentence)
280+
281+
s_no_space = newstring.replace(" ","")#remplaza el espacio
282+
283+
if s_no_space == s_no_space[::-1]:#compara las dos listas
284+
answer = True#si son iguales es que son cap i cua
285+
else:
286+
answer = False
287+
288+
return answer
289+
290+
291+
if __name__ == '__main__':
292+
#Everything here is ignored by joc-de-proves
293+
#You can debug your program by testing your functions and classes here
294+
pass

0 commit comments

Comments
 (0)