diff --git a/corpus/udpipemodel/english-ewt-ud-2.5-191206.udpipe b/corpus/udpipemodel/english-ewt-ud-2.5-191206.udpipe
new file mode 100644
index 0000000..7f16e14
Binary files /dev/null and b/corpus/udpipemodel/english-ewt-ud-2.5-191206.udpipe differ
diff --git a/requirements.txt b/requirements.txt
index 523768a..39d6d74 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,11 +1,11 @@
-requests==2.25.1
-nltk==3.5
-numpy~=1.19.2
-beautifulsoup4==4.9.3
-corpy==0.3.0
-Flask==1.1.2
-gensim==3.8.3
-pymysql==1.0.2
-pysolr-3.9.0
-mysql~=5.7.24
+requests==2.25.1
+nltk==3.5
+numpy~=1.19.2
+beautifulsoup4==4.9.3
+corpy==0.3.0
+Flask==1.1.2
+gensim==3.8.3
+pymysql==1.0.2
+pysolr-3.9.0
+mysql~=5.7.24
 scikit-learn~=0.24.1
\ No newline at end of file
diff --git a/src/app.py b/src/app.py
index ded21ae..d5f4461 100644
--- a/src/app.py
+++ b/src/app.py
@@ -8,13 +8,11 @@
 from src.service import AppService
 from flask import Flask, render_template, request, redirect, url_for, flash
 
-
 app = Flask(__name__)
 
 # TODO: need to change with the selection different language
 appService = AppService()
 
-
 @app.route('/')
 def index():
     """
@@ -23,7 +21,6 @@ def index():
     """
     return render_template('index.html')
 
-
 @app.route('/find', methods=['POST'])
 def find():
     """
@@ -45,7 +42,6 @@ def find():
                                                       "sel_word": sel_word,
                                                       "sel_result": appService.sel_result})
 
-
 @app.route('/find2', methods=['POST'])
 def find2():
     language_name, sel_word = None, None
@@ -59,7 +55,6 @@ def find2():
                                                       "sel_word": sel_word,
                                                       "sel_result": appService.sel_result})
 
-
 @app.route('/cluster', methods=['POST'])
 def cluster():
     """
@@ -76,13 +71,8 @@ def cluster():
         if not appService.udt_pre_model:
             appService.config_udpipe(language_name)
         cluster_model_file = word2vec_language[language_name]
-        cluster_result, rec_cluster_result = appService.cluster_sentences(
-            language_name, cluster_model_file, cluster_input_sentence, cluster_number)
-        return render_template('cluster.html',
-                               cluster_number=cluster_number,
-                               cluster_result=cluster_result,
-                               rec_cluster_result=rec_cluster_result)
-
+        cluster_result, rec_cluster_result = appService.cluster_sentences(language_name, cluster_model_file,cluster_input_sentence, cluster_number)
+        return render_template('cluster.html',cluster_number=cluster_number,cluster_result=cluster_result,rec_cluster_result=rec_cluster_result)
 
 if __name__ == '__main__':
-    app.run(port=3000, debug=True)
\ No newline at end of file
+    app.run(port=3000, debug=True)
diff --git a/src/databaseClustering.py b/src/databaseClustering.py
index 34a29ae..57545f4 100644
--- a/src/databaseClustering.py
+++ b/src/databaseClustering.py
@@ -1,4 +1,4 @@
-import mysql.connector
+import pymysql.connector
 from mysql.connector import errorcode
 from datetime import datetime
 import pandas as pd
@@ -11,32 +11,44 @@
 from util import db_config
 
 
-
 def train_model(language_name, corpus_path, save_path):
-
-	model = gensim.models.Word2Vec(sentences=corpus_path,
-								   size=150,
-								   window=8,
-								   min_count=2,
-								   workers=2,
-								   iter=10)
-	model.save(save_path + language_name)
-	print('Save succeed')
+    model = gensim.models.Word2Vec(sentences=corpus_path,
+                                   size=150,
+                                   window=8,
+                                   min_count=2,
+                                   workers=2,
+                                   iter=10)
+    model.save(save_path + language_name)
+    print('Save succeed')
 
 
 def load_model(save_path) -> gensim.models.Word2Vec:
-	filename = save_path
-	model = gensim.models.Word2Vec.load(filename)
-	print('Loading succeed')
-	for index, word in enumerate(model.wv.index2word):
-		if index == 5:
-			break
-		vec = ",".join(map(lambda i: str(i), model.wv[word]))
-		print(f"word #{index}/{len(model.wv.index2word)} is {word}, vec = {vec}")
-	return model
-
-def database():
-
+    filename = save_path
+    model = gensim.models.Word2Vec.load(filename)
+    print('Loading succeed')
+    for index, word in enumerate(model.wv.index2word):
+        if index == 5:
+            break
+        vec = ",".join(map(lambda i: str(i), model.wv[word]))
+        print(f"word #{index}/{len(model.wv.index2word)} is {word}, vec = {vec}")
+    return model
+
+
+# def database():
+#    db = mysql.connector.connect(
+#        host='localhost',
+#        user='root',
+#        password='root',
+#        database='psd_project'
+#    )
+#    mycursor = db.cursor()
+#    query_info = ("SELECT sentence FROM english_sentences")
+#    mycursor.execute(query_info)
+#    sentences_df = pd.DataFrame(mycursor.fetchall(), columns=['Sentences'])
+
+#    return sentences_df
+
+#either 37- 48 lines code or 52-63 code needed to be there
 	db = mysql.connector.connect(
 		host=db_config['host'],
 		user=db_config['user'],
@@ -50,76 +62,80 @@ def database():
 
 	return sentences_df
 	
+  
 def textProcessing(text):
-    no_stop =[words for words in text.split() if words.lower() not in string.punctuation]
+    no_stop = [words for words in text.split() if words.lower() not in string.punctuation]
     return no_stop
 
-def cluster_sentences(language_name: str, save_path: str, sentences: List[str], n_clusters: int) :
-
-	n_clusters = int(n_clusters)
-	print("clusters are ",n_clusters)
-	if n_clusters <=0:
-		print("Parameter is Invalid")
-		return
-	if n_clusters > len(sentences):
-		# TODO add log
-		print('number of cluster bigger than sentences count')
-		return
-	# first loading model
-	word2vec_model = load_model(save_path)
-	# second geting vectors for one sentence
-	sent_vectors = []
-	default_dimn = 100
-	# iterator to sentence
-	for word1 in sentences:
-		print(word1)
-		word_vectors = []
-		for words in word1:
-		
-			if words in word2vec_model.wv:
-				word_vectors.append(word2vec_model.wv[words])
-			else:  # not in dict, fill 0
-				word_vectors.append([0] * default_dimn)
-
-	to_array = np.array(word_vectors)
-	sent_vectors.append(to_array.mean(axis=0).tolist())
-	kmeans = KMeans(n_clusters=n_clusters,random_state=0).fit(sent_vectors)
-	labels = kmeans.labels_
-	tmp_labels,examples = [],[]
-	for sent,label in zip(sentences,labels):
-		if label not in tmp_labels:
-			tmp_labels.append(label)
-			examples.append(sent)
-		if len(examples) == n_clusters:
-			break
-	# add bottom logic for cluster
-	if len(examples) < n_clusters:
-		for sent in sentences:
-			if sent not in examples:
-				examples.append(sent)
-			if len(examples) >= n_clusters:
-				break
-
-	return examples
+
+def cluster_sentences(language_name: str, save_path: str, sentences: List[str], n_clusters: int):
+    n_clusters = int(n_clusters)
+    print("clusters are ", n_clusters)
+    if n_clusters <= 0:
+        print("Parameter is Invalid")
+        return
+    if n_clusters > len(sentences):
+        # TODO add log
+        print('number of cluster bigger than sentences count')
+        return
+    # first loading model
+    word2vec_model = load_model(save_path)
+    # second geting vectors for one sentence
+    sent_vectors = []
+    default_dimn = 100
+    # iterator to sentence
+    for word1 in sentences:
+        print(word1)
+        word_vectors = []
+        for words in word1:
+
+            if words in word2vec_model.wv:
+                word_vectors.append(word2vec_model.wv[words])
+            else:  # not in dict, fill 0
+                word_vectors.append([0] * default_dimn)
+
+    to_array = np.array(word_vectors)
+    sent_vectors.append(to_array.mean(axis=0).tolist())
+    kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(sent_vectors)
+    labels = kmeans.labels_
+    tmp_labels, examples = [], []
+    for sent, label in zip(sentences, labels):
+        if label not in tmp_labels:
+            tmp_labels.append(label)
+            examples.append(sent)
+        if len(examples) == n_clusters:
+            break
+    # add bottom logic for cluster
+    if len(examples) < n_clusters:
+        for sent in sentences:
+            if sent not in examples:
+                examples.append(sent)
+            if len(examples) >= n_clusters:
+                break
+
+    return examples
 
 
 a = database()
+
+# file_path = r'C:\Users\haris\Desktop\wordFinder\word2vec'
+# file_path = file_path + 'English'
+
 file_path = './corpus/word2vecmodel/'
 language_name = 'english'
 file_path = file_path + language_name
 load_model(file_path)
 print('All done')
 
-c=a['Sentences'].apply(textProcessing)
+c = a['Sentences'].apply(textProcessing)
 
 # get word vector for one sentence
 sentences = [
-	'Tohru shows great loyalty to whoever he stands by, even back to the time when he was an Enforcer for the Dark Hand.',
-	'The Earth Demon, Dai Gui resembles a large minotaur(with the face of a guardian lion) with great strength.',
-	'Al Mulock was the great-grandson of Sir William Mulock(1843–1944), the former Canadian Postmaster - General.',
-	'Though his surviving images are scarce, his importance to the early history of photography in Asia is great.']
+    'Tohru shows great loyalty to whoever he stands by, even back to the time when he was an Enforcer for the Dark Hand.',
+    'The Earth Demon, Dai Gui resembles a large minotaur(with the face of a guardian lion) with great strength.',
+    'Al Mulock was the great-grandson of Sir William Mulock(1843–1944), the former Canadian Postmaster - General.',
+    'Though his surviving images are scarce, his importance to the early history of photography in Asia is great.']
 
 cluster_result = cluster_sentences(language_name, file_path,c,3)
 print("two examples sentences: \n")
 print(cluster_result)
-
diff --git a/src/service.py b/src/service.py
index 7b4e6bd..e826570 100644
--- a/src/service.py
+++ b/src/service.py
@@ -80,7 +80,7 @@ def database(self):
                                     db_config['host'],
                                     db_config['database'])
         self.cursor = self.store_data.db_connect().cursor()
-        query_info = "SELECT sentence FROM english_sentences"
+        query_info = "SELECT sentence FROM English_sentences"
         self.cursor.execute(query_info)
         sentences_df = pd.DataFrame(self.cursor.fetchall(), columns=['Sentences'])
         return sentences_df
@@ -91,7 +91,7 @@ def clusteringData(self):
                                     db_config['host'],
                                     db_config['database'])
         self.cursor = self.store_data.db_connect().cursor()
-        query_info = "SELECT sentence FROM english_sentences"
+        query_info = "SELECT sentence FROM English_sentences"
         self.cursor.execute(query_info)
         sentences_dataframe = pd.DataFrame(self.cursor.fetchall(), columns=['Sentences'])
         return sentences_dataframe
@@ -130,7 +130,7 @@ def cluster_sentences(self, language_name: str, save_path: str, sentences: List[
             words = self.udt_pre_model.word_segmentation(sent)
             word_vectors = []
             # iterator to word
-            window_words = get_keyword_window(self.sel_result[0][0], words, 5)
+            window_words = get_keyword_window(self.sel_result[0][0], words, 10)
             for word in window_words:
                 if word in word2vec_model.wv:
                     word_vectors.append(word2vec_model.wv[word])
@@ -210,13 +210,18 @@ def _get_examples(self, sentences: List[str], best_labels, n_clusters: int):
     ]
     save_path = './/corpus//english//'
     # first loading udpipe to segement word for each sentence
+    # udt_english = UdpipeTrain(language_list[1],
+    #                           r'C:\Users\haris\Desktop\wordFinder\english-ewt-ud-2.5-191206.udpipe',
+    #                           r'C:\Users\haris\Desktop\wordFinder\haris.txt')
+
    
     udt_english = UdpipeTrain(language_list[1],
                               r'.//corpus//udpipemodel//english.udpipe',
                               r'.//corpus//english//135-0.txt')
     
-    cluster_result = AppService().config_udpipe(language_name).cluster_sentences(language_name, save_path, sentences='3', n_clusters=2)
-    '''
+    #cluster_result = AppService().config_udpipe(language_name).cluster_sentences(language_name, save_path, sentences='3', n_clusters=2)
+    # '''
+
     cluster_result = AppService().config_udpipe(language_name).cluster_sentences(language_name, sentences, 2)
     print("two examples sentences: \n")
     print(cluster_result)
diff --git a/src/templates/cluster.html b/src/templates/cluster.html
index e88a65f..d4ec5b9 100644
--- a/src/templates/cluster.html
+++ b/src/templates/cluster.html
@@ -31,6 +31,7 @@ <h3>Well done!</h3> After clustering, you get {{cluster_number}} example
                         {% for cluster_sentence in cluster_result %}
                         <li class="list-group-item d-flex justify-content-between align-items-center">
                             {{cluster_sentence}}
+                            # Add KWIC Functinality
                         </li>
                         {% endfor %}
                         {% endif %}
diff --git a/src/train/KWIC.py b/src/train/KWIC.py
new file mode 100644
index 0000000..bf8abf0
--- /dev/null
+++ b/src/train/KWIC.py
@@ -0,0 +1,244 @@
+def getNGrams(wordlist, n):
+    return [wordlist[i:i + n] for i in range(len(wordlist) - (n - 1))]
+
+# Given a list of n-grams, return a dictionary of KWICs,
+# indexed by keyword.
+
+def nGramsToKWICDict(ngrams):
+    keyindex = len(ngrams[0]) // 2
+
+    kwicdict = {}
+
+    for k in ngrams:
+        if k[keyindex] not in kwicdict:
+            kwicdict[k[keyindex]] = [k]
+        else:
+            kwicdict[k[keyindex]].append(k)
+    return kwicdict
+
+
+# Given a KWIC, return a string that is formatted for
+# pretty printing.
+
+def prettyPrintKWIC(kwic):
+    n = len(kwic)
+    keyindex = n // 2
+    width = 10
+
+    outstring = ' '.join(kwic[:keyindex]).rjust(width * keyindex)
+    outstring += str(kwic[keyindex]).center(len(kwic[keyindex]) + 6)
+    outstring += ' '.join(kwic[(keyindex + 1):])
+
+    return outstring
+
+def cut_to_sentence(text, keyword, keywordindex):
+    """ Cuts the sentence around a keyword out of the text
+    Arguments
+    ----------
+    text : str
+        Text out of which the sentence should be extracted
+    keyword : str
+        Keyword in the sentence of the text
+    keywordindex: int
+        Index of the keyword in the text
+    Returns
+    -------
+    Indices of of the sentence in the text and a string of the sentence
+    """
+    # Strings after wich a point does not end a sentence
+    safe = ["Ms", "Mr", "Fr", "Hr", "Dipl", "B", "M", "Sc", "Dr", "Prof",
+            "Mo", "Mon", "Di", "Tu", "Tue", "Tues", "Mi", "Wed", "Do", "Th",
+            "Thu", "Thur", "Thurs", "Fr", "Fri", "Sa", "Sat", "So", "Sun",
+            "0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
+            "str"]
+
+    # Find beginning
+    rfind_results = []
+    end_ = keywordindex
+    # Special Case "."
+    while True:
+        rfind_ = text.rfind(". ", 0, end_)
+        if not rfind_ == -1:
+            no_safe = False
+            for i, s in enumerate(safe):
+                if text[0:rfind_][::-1].find(s[::-1]) == 0:
+                    end_ = rfind_ - len(s)
+                    break
+                if i == len(safe)-1:
+                    no_safe = True
+            if no_safe is True:
+                break
+        else:
+            break
+    rfind_results.append(rfind_)
+
+    rfind_results.append(max([text.rfind(sentence_ending, 0, keywordindex)
+                              for sentence_ending in ["! ", "? "]]))
+
+    rfind_result = max(rfind_results)
+    if rfind_result == -1:
+        start = 0
+    else:
+        start = rfind_result + 2
+
+    # Find ending
+    find_results = []
+    start_ = keywordindex+len(keyword)
+    # Special Case "."
+    while True:
+        find_ = text.find(". ", start_)
+        if not find_ == -1:
+            no_safe = False
+            for i, s in enumerate(safe):
+                if text[0:find_][::-1].find(s[::-1]) == 0:
+                    start_ = find_ + len(s)
+                    break
+                if i == len(safe)-1:
+                    no_safe = True
+            if no_safe is True:
+                break
+        else:
+            break
+    find_results.append(find_)
+
+    find_results.extend([text.find(sentence_ending, keywordindex+len(keyword))
+                         for sentence_ending in ["! ", "? "]])
+    find_results_bigger_neg_1 = [i for i in find_results if i >= 0]
+    if not find_results_bigger_neg_1:
+        end = len(text)
+    else:
+        end = min(find_results_bigger_neg_1) + 1
+
+    return list(range(start, end)), text[start:end]
+
+def find_nth_occurrence(text, searchstr, nth=1, startindex=0):
+    """
+    Finds the index of the nth occurence of a searchstr in the text starting
+    from the a given startindex.
+    """
+    start = text.find(searchstr, startindex)
+
+    if start == -1:
+        return len(text)-1
+
+    for i in range(nth-1):
+        find_index = text.find(searchstr, start+len(searchstr))
+        if find_index == -1:
+            return len(text)-1
+        else:
+            start = find_index
+
+    return start
+
+def rfind_nth_occurrence(text, searchstr, nth=1, endindex=None):
+    """
+    Finds the index of the nth occurence of a searchstr in the text going
+    backwards from a given endindex.
+    """
+    if endindex is None:
+        endindex = len(text)
+
+    end = text.rfind(searchstr, 0, endindex)
+
+    if end == -1:
+        return 0
+
+    for i in range(nth-1):
+        rfind_index = text.rfind(searchstr, 0, end)
+        if rfind_index == -1:
+            return 0
+        else:
+            end = rfind_index
+
+    return end
+
+def keywords_in_context(text, keywords, max_words=5, sep="...", cut_sentences=True):
+    """ Returns the relevant context around keywords in a larger text.
+    Arguments
+    ----------
+    text : str
+        Text which should be summerized around keywords.
+    keywords : list of str
+        Keywords whose context we want to extract out of the text.
+    max_words : int
+        Maximum number of words before und after a keyword if no sentence
+        beginning or ending occurs and cut_sentences is set.
+    sep : str
+        String wich represents skipped portions of the text in the result.
+    cut_sentences : bool
+        Set if the context around a keyword is cut at the beginning or end of
+        a sentence
+    Returns
+    -------
+    Summarised text containing the keywords in context as string.
+    """
+    indices_lst = []
+    for k in keywords:
+        start = text.find(k)
+        while not start == -1:
+            indices_lst.append((k, start))
+            start = text.find(k, start+len(k))
+
+    result_indices = set()
+    for index_tpl in indices_lst:
+        keyword, index = index_tpl
+        start = rfind_nth_occurrence(text, " ", nth=max_words+1, endindex=index)
+        if not start == 0:
+            start += 1 # +1 to Remove the first " "
+        end = find_nth_occurrence(text, " ", nth=max_words+1, startindex=index+len(keyword))
+        if end == len(text)-1:
+            end += 1
+        indices_of_text = set(range(start, end))
+        if cut_sentences:
+            sentence_indices, _ = cut_to_sentence(text, keyword, index)
+            indices_of_text.intersection_update(set(sentence_indices))
+        for i in indices_of_text:
+            result_indices.add(i)
+
+    result_indices = list(result_indices)
+    result_indices.sort()
+
+    result = ""
+    i_before = -1
+    for _i, i in enumerate(result_indices):
+        if not (i-1) == i_before:
+            result += " " + sep + " " + text[i]
+            i_before = i
+        else:
+            result += text[i]
+            i_before = i
+
+        # If the last word is not the end of the text add the sperator.
+        if _i == len(result_indices)-1:
+            if not i == len(text)-1:
+                result += " " + sep
+
+    return result
+
+def find_and_replace(text, find_str, replacement_str):
+    """ Find and replace a find_str with a replacement_str in text. """
+    start = text.find(find_str)
+    offset = 0
+    while start != -1:
+        # update the index compatible to the whole text
+        start = start + offset
+
+        # replace (cut the original word out and insert the replacement)
+        text = text[:start] + replacement_str + text[start+len(find_str):]
+
+        offset = start + len(replacement_str)
+        start = text[offset:].find(find_str)
+
+    return text
+
+if __name__ == "__main__":
+    """
+    Text = Sentence which needs to be shrinked
+    Keyword = Searched word
+    """
+    result_text = keywords_in_context(TEXT, KEYWORDS)
+    # Highlight Keywords
+    for k in KEYWORDS:
+        result_text = find_and_replace(result_text, k, "\x1b[34m"+k+"\x1b[0m")
+
+    print(result_text)
diff --git a/src/train/store.py b/src/train/store.py
index 8cf1dd5..853903c 100644
--- a/src/train/store.py
+++ b/src/train/store.py
@@ -175,10 +175,11 @@ def select_data(self, cursor, word, language):
     # put config info of database to db_config variable
     store_data = StoreData(db_config['user'],
                            db_config['password'],
-                           db_config['host'],
-                           db_config['database']
+                           db_config['db_host'],
+                           db_config['db_name']
                            )
     conn = store_data.db_connect()
+
     store_data.create_database(conn.cursor())
     store_data.create_tables(conn.cursor(), TABLES, TABLES_SENTENCES)
     print('TABLES CREATED: SUCCESS')
diff --git a/src/train/train_cluster.py b/src/train/train_cluster.py
index 429bb9b..b9fb294 100644
--- a/src/train/train_cluster.py
+++ b/src/train/train_cluster.py
@@ -128,9 +128,9 @@ def batch():
         print('please input word vector filepath')
 
     # first loading udpipe to segement word for each sentence
-    udt_english = UdpipeTrain(languange_name, udpipe_pre_model_path, corpus_filepath)
+    udt_english = UdpipeTrain(language_name, udpipe_pre_model_path, corpus_filepath)
     # second train to get the word2vec udpipemodel
-    train_model(languange_name, corpus_filepath, file_path, udt_english)
+    train_model(language_name, corpus_filepath, file_path, udt_english)
     # finally, after train we can load udpipemodel to use directly
     load_model(file_path)
     print('All done')
@@ -140,11 +140,12 @@ def batch():
     # udt_lang = UdpipeTrain(lang, udpipe_pre_model_path, corpus_filepath)
     # second train to get the word2vec model
     # word2vec_result_file = 'corpus//word2vecmodel//gensim-word2vec-model-'
+    # word2vec_result_file = 'input//word2vecmodel//gensim-word2vec-model-'
     # train_model(lang, corpus_filepath, word2vec_result_file, udt_lang)
 
 
-if __name__ == "__main__":
-    batch()
+#if __name__ == "__main__":
+    #batch()
     # languange_name = 'English'
     #
     # # input example
@@ -172,11 +173,5 @@ def batch():
     #     file_path = args.wvfp
     # else:
     #     print('please input word vector filepath')
-    #
-    # # first loading udpipe to segement word for each sentence
-    # udt_english = UdpipeTrain(languange_name, udpipe_pre_model_path, corpus_filepath)
-    # # second train to get the word2vec model
-    # train_model(languange_name, corpus_filepath, file_path, udpipe_pre_model_path)
-    # # finally, after train we can load model to use directly
-    # # load_model(file_path)
-    # print('All done')
+
+
diff --git a/src/train/train_model.py b/src/train/train_model.py
index 11b57a6..b5d0494 100644
--- a/src/train/train_model.py
+++ b/src/train/train_model.py
@@ -4,7 +4,6 @@
 Remember: working directory needed to be set to wordfinder!
 """
 
-
 # third-party modules
 import string
 import re
@@ -33,8 +32,8 @@ def __init__(self, language_name, pre_model_name, our_corpus_name):
         try:
             self.store_data = StoreData(db_config['user'],
                                         db_config['password'],
-                                        db_config['host'],
-                                        db_config['database'])
+                                        db_config['db_host'],
+                                        db_config['db_name'])
             self.cursor = self.store_data.db_connect().cursor()
             # second loading udpipe pre-train model
             self.model = Model(self.pre_model_name)
@@ -60,7 +59,7 @@ def clean_data(self, data: str) -> str:
         """
         cleaned_data = re.sub('\w*\d\w*', '', data)
         cleaned_data = re.sub('\[.*?\]', '', cleaned_data)
-        cleaned_data = re.sub('[‘’“”…]','',cleaned_data)
+        cleaned_data = re.sub('[‘’“”…]', '', cleaned_data)
         cleaned_data = re.sub(r'\\t | \\n', '', cleaned_data)
         return cleaned_data
 
@@ -83,7 +82,7 @@ def do_train(self) -> List[TResult]:
             for i, one_sentence in enumerate(word_pos):
                 sentence_text = self.extract_one_sentence(one_sentence)
                 results = self.extract_one_word(one_sentence, sentence_text)
-                self.store_data.insert_data(self.cursor, results, self.language_name)
+                # self.store_data.insert_data(self.cursor, results, self.language_name)
                 print('line %d, batch %d for %s written succeed' % (line_no, i, self.language_name))
             line_no += 1
         print(' all written succeed for corpus of %s' % self.our_corpus_name)
@@ -137,7 +136,7 @@ def extract_one_word(self, sentence, sentence_text: str) -> [TResult]:
         for word in sentence.words:
             if word.lemma and word.lemma not in string.punctuation:
                 if word.lemma and word.upostag and sentence_text:
-                    combined_words .append(TResult(word.lemma, word.upostag, sentence_text))
+                    combined_words.append(TResult(word.lemma, word.upostag, sentence_text))
                     self._word_count += 1
         return combined_words
 
@@ -182,12 +181,14 @@ def batch_train():
         udpipe_pre_model_path = udpipe_language[lang]
         corpus_filepath = corpus_language[lang]
         train_model = UdpipeTrain(lang, udpipe_pre_model_path, corpus_filepath)
-        print('begin train %s corpus' % (lang, ))
+        print('begin train %s corpus' % (lang,))
         train_model.do_train()
         print('done train %s corpus' % (lang,))
 
 
 if __name__ == '__main__':
+
+    # batch_train()
     batch_train()
     parser = argparse.ArgumentParser(description='train corpus to get word, pos, and related sentence')
     parser.add_argument('-udfp', help='udpipe pre-model filepath')
@@ -201,9 +202,15 @@ def batch_train():
         corpus_filepath = args.cfp
     else:
         print('please input corpus filepath')
-    # Italian
+
+# English
+    udt_english = UdpipeTrain(language_list[1], udpipe_pre_model_path, corpus_filepath)
+    udt_english.do_train()
+
+   ''' # Italian
     udt_chinese = UdpipeTrain(language_list[0], udpipe_pre_model_path, corpus_filepath)
-    udt_chinese.do_train()
+    udt_chinese.do_train() 
+    ''''
 '''
 # Chinese
 udt_chinese = UdpipeTrain(language_list[0], udpipe_pre_model_path, corpus_filepath)
@@ -268,4 +275,4 @@ def batch_train():
 # Spanish
 udt_spanish = UdpipeTrain(language_list[15], udpipe_pre_model_path, corpus_filepath)
 udt_spanish.do_train()
-'''
\ No newline at end of file
+'''
diff --git a/src/util.py b/src/util.py
index 013e58e..55e3af3 100644
--- a/src/util.py
+++ b/src/util.py
@@ -7,15 +7,11 @@
 
 # database config
 # cofig for local database
-db_config = {
-    'host': 'psd-wordfinder.mysql.database.azure.com',
-    'database': 'psd_project',
-    'user': 'adminteam@psd-wordfinder',
-    'password': 'jFq&T7bPJXmY',
-    #'client_flags': [mysql.connector.ClientFlag.SSL],
-    #'ssl_ca': './/src//train//DigiCertGlobalRootG2.crt.pem' #vscode
-    'ssl_ca': 'DigiCertGlobalRootG2.crt.pem' #pycharm
-}
+db_config = {'user': 'root',
+             'password': 'root',
+             'db_host': 'localhost',
+             'db_name': 'psd_project'}
+
 
 
 language_list = [
@@ -115,7 +111,6 @@
     'Spanish': './corpus/word2vecmodel/gensim-word2vec-udpipemodel-Spanish'
 }
 
-
 def get_keyword_window(sel_word: str, words_of_sentence: List, length=5) -> List[str]:
     """
     find the index of sel_word at sentence, then decide words of @length size