Add files via upload

jalbertsr · web-flow · commit a2473b63cb7e · 2017-08-16T21:38:46.000+02:00
diff --git a/PythonExercises.py b/PythonExercises.py
@@ -0,0 +1,294 @@
+#!/usr/bin/env python
+
+def stemWord(w):
+    """Renders a word in to its generic form.
+    
+    This function is used preprocess words for NLP.
+    It removes all trailing punctuation marks "';-?.,!:".
+    It also removes the possessive "'s" from words.
+    And it converts it to all lower case
+    
+    Args:
+        w (str): A string containing the input word
+    
+    Returns:
+        str a simpler version of the word
+    """
+    w=w.split("'")[0]#Separates and eliminates all after the "'"
+    w=[letter for letter in w if not (letter in ';-?.,!:')] #Eliminates the simbols on w
+    return ''.join(w).lower() #Joins and puts in lowercase 
+
+
+class LanguageModel:
+    """Class that contains the data needed for the analysis of text
+    """
+    @staticmethod
+    def getWordFreq(txt):
+        """Returns a dictionary where the keys are stemmed words and values
+        are the number they occured in txt.
+        
+        As na example if txt == "The dog likes the cat!" the resulting 
+        frequency dictionary should be {"the":2,"dog":1,"likes":1,"cat":1}
+        
+        Hints: 
+            -Use split to break the text in to a list of words
+            stem all words in the list
+            -Make sure a dictionary element exists before operating on it.
+        
+        Args:
+            txt (str): a string containing free text
+            
+        Returns:
+            dict: A dictionary whith stemmed words (str) for keys and int 
+            values containing the occurence count of these words.
+        """
+        
+        list1 = txt.split()
+        newlist = []
+        dic = {}
+        for word in list1:
+           newlist.append(stemWord(word))
+    
+        for word in newlist:
+            if dic.has_key(word) == True:
+                dic[word] = dic[word] +1
+            else:
+                dic[word] = 1
+        return dic
+        
+    @staticmethod
+    def mergeWordFreqDict(frDict1,frDict2):
+        """Takes two dictionaries containing word frequencies and returns a 
+        single dictionary containing their sum.
+        
+        In essence this fuction takes the frequencies produced from two
+        different strings with text and returns a dictionary with the 
+        word frequencies of the concatenation of these two strings.
+        
+        Hints:
+            -Dictionary .keys() returns a list so you might need to cast it 
+            to a set, if you want to get the union.
+            -It is better if you create a dictionary with 0 for all words in
+            both dictionaries and then update the values
+            -If frDict1=={"hello":1,"world":1} and frDict2=={"goodbye":1,"world":1},
+            the result must be {"hello":1,"world":2,"goodbye":1}
+            -The simplest solution involves 3 non nested for loops.
+        
+        Args:
+            frDict1 (dict): a dictionary with stemmed words as keys and 
+            positive integers as values.
+            frDict2 (dict): a dictionary with stemmed words as keys and 
+            positive integers as values.
+        
+        Returns:
+            dict: a dictionary with stemmed words as keys and positive 
+            integers as values.
+        """
+        
+        new_dict = frDict1         
+        
+        for i in frDict2:
+            if i in new_dict:
+                new_dict[i] += frDict2[i]
+            else:
+                new_dict[i] = frDict2[i]
+            
+        return new_dict
+
+    def __init__(self,txtList=[]):
+        """LangueModel constructor
+        
+        Initialises the class members to valid values.
+        __texts is a list with one or more strings with texts.
+        __wordFreq is a dictionary with stemmed words for keys and the 
+        count of the occurences of each word (int) as values.
+        
+        Args:
+            txtList (list): A list of strings where each string will 
+            contains some text.
+        """
+        self.__wordFreq={}#Dictionary with the word's frequencies.
+        self.__texts=[]#A list with a text string
+        if txtList.__class__ != [].__class__:
+            raise Exception('txtList must be a list of strings')
+        for txt in txtList:
+            self.addText(txt)#add text to the list
+
+    def addText(self,txt):
+        """Adds a string containing text to the model
+        
+        This method just uses getWordFreq and mergeWordFreqDict static 
+        methods on a specific instance of the class
+        
+        Args:
+            txt (str): the string containing text to be added
+        """
+        self.__texts.append(txt)  
+        newFreq=LanguageModel.getWordFreq(txt)   #newFreq is a dictionary with the word's frequencies of the added text
+        self.__wordFreq=LanguageModel.mergeWordFreqDict(self.__wordFreq,newFreq)  #Sum of the two dictionaries
+
+    def addTextFile(self,fileName):
+        """Ads text contained in a text-file
+        
+        Args:
+            fileName (str): the absolute or relative path to a file 
+            containing text.
+        """
+        self.addText(open(fileName).read())#Adds text in a file
+
+    def wordCount(self):
+        """Returns the total number of words found in self.__texts
+        
+        Hints:
+        -The answer can be writen in a single line
+        -The method values() of dict is the key to solving this question
+        -The distionary __wordFreq contains how many times each word was 
+        found in the texts
+
+        Returns:
+            int: the count of all the words
+        """
+        
+        return sum(self.__wordFreq.values())
+        
+        
+    def uniqueWordCount(self):
+        """Returns the number of unique words found in self.__texts
+
+        Unique word means that a word occuring twice or more times, counts 
+        as one.
+
+        Hints:
+        -The answer can be writen in a single line
+        -The method keys() of dict is the key to solving this question
+
+        Returns:
+            int: the count of unique words
+        
+        """
+        
+        return len(self.__wordFreq.keys())
+        
+
+    def getWordProbabillity(self,word):
+        """Returns the probabillity of a word occuring according to the 
+        model
+        
+        The probabillity of a word occuring is the number of times it has 
+        occured divided by the count of all word occurences in __texts
+        
+        Args:
+            word (str): an string with a word which is not necessarilly 
+            stemmed.
+
+        Returns:
+            float: a float between 0 and 1 that contains the probabillity
+        """
+        stemmedWord=stemWord(word)#Converts 'word' in a simple string in lowercase
+        if stemmedWord in self.__wordFreq.keys():#if the word is in the list
+            return self.__wordFreq[stemmedWord]/float(self.wordCount())#Divide between all the words to know the probabillity  
+        else:
+            return 0#If not found return 0
+
+    def __str__(self):
+        """Generate a string description of the Language Model
+        
+        Hints:
+        -The result must be constructed with string concatenation
+        -Cast an integer to a string before concatening it.
+        -Use the already availabe methods to obtain information
+        -lm=LanguageModel(['hello world','Goodbye World!'])
+        lm.__str__() will return
+        "LanguageModel\n\t#texts:2\n\t#words:4\n\t#unique words:3\n"
+        -self.__texts, is a list containing all texts the LanguageModel has 
+        seen.
+
+        Returns:
+            string: A description of the language model spanning 4 lines.
+        """
+        uwords = self.uniqueWordCount(self)
+        nwords = self.wordCount(self)
+        texts = len(self.__texts(self))
+        
+        lm = "The lenguage model has:\n\t" + "texts:" + str(texts) + "\n\t"+ "words:" + str(nwords) + "\n\t" + "unique words:" + str(uwords) + "\n"
+        
+        return lm
+
+    def __repr__(self):
+        """Generate a string description of the Language Model that allows 
+        to reconstruct it
+        
+        Returns:
+            string: A python expression that invockes the constructor of the
+            class so that if executed a deep copy of the LangueageModel is 
+            obtained.
+        """
+        res=str(self.__class__)+'('+self.__texts.__repr__()+')'
+        return res
+
+    def getWordsByProbabillity(self):
+        """Produces a list containing all stemmed words from the language 
+        model sorted from the most probable to the least probable
+        
+        Hints:
+        -function reversed returns a list with reverse order of the input 
+        list
+        -function sorted returns a list with the elements of the input sorted
+        in ascending order.
+        -A list of tuples is sorted by the first element of each tuple
+
+        Returns:
+            list: a list of strings (not tuples!)
+        """
+        finalist = {}
+        newdict = self.__wordFreq.keys()
+        for i in newdict:
+            finalist[i] = self.getWordProbabillity(i)
+            
+        sorted_keys = sorted(finalist, key=finalist.__getitem__)
+        
+        l = list(reversed(sorted_keys))
+        
+        return l
+             
+    def isPalindrome(sentence):
+        """Tells us whether a string is a palindrome.
+        
+        Pallindromes are sentences whos characters read in both directions are 
+        the same. Testing for pallindromes ignores spaces and puntuation marks
+        as if they did not exist.
+        
+        Hits:
+        -A list can be indexed form the end with negative values. 
+        -The first character in a string is at position 0
+        If a=[1,"b",3,4] Then a[-1] is 4, a[-2] is 3, etc.
+        -The expression a[len(a)-1]==a[-1] is always True if a is not empty
+        -You will need to use .split() and .join methods of the str type
+        
+        Args:
+            sentence (str): A string with one or more words assumed to have no 
+            possessive (stemWord can help).
+        
+        Returns:
+            bool: The return value. True if the sentence was a palindrome, False
+            otherwise.
+        """
+        
+        answer = True 
+              
+        newstring = stemWord(sentence)
+       
+        s_no_space = newstring.replace(" ","")#remplaza el espacio
+    
+        if  s_no_space ==  s_no_space[::-1]:#compara las dos listas
+             answer = True#si son iguales es que son cap i cua
+        else:
+             answer = False
+                
+        return answer
+
+
+    if __name__ == '__main__':
+        #Everything here is ignored by joc-de-proves
+        #You can debug your program by testing your functions and classes here
+        pass