diff --git a/.github/workflows/release-v1.0_psdwordfinder.yml b/.github/workflows/release-v1.0_psdwordfinder.yml new file mode 100644 index 0000000..d46d59c --- /dev/null +++ b/.github/workflows/release-v1.0_psdwordfinder.yml @@ -0,0 +1,62 @@ +# Docs for the Azure Web Apps Deploy action: https://github.com/Azure/webapps-deploy +# More GitHub Actions for Azure: https://github.com/Azure/actions +# More info on Python, GitHub Actions, and Azure App Service: https://aka.ms/python-webapps-actions + +name: Build and deploy Python app to Azure Web App - psdwordfinder + +on: + push: + branches: + - release-v1.0 + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python version + uses: actions/setup-python@v1 + with: + python-version: '3.8' + + - name: Create and start virtual environment + run: | + python -m venv venv + source venv/bin/activate + + - name: Install dependencies + run: pip install -r requirements.txt + + # Optional: Add step to run tests here (PyTest, Django test suites, etc.) + + - name: Upload artifact for deployment jobs + uses: actions/upload-artifact@v2 + with: + name: python-app + path: | + . + !venv/ + + deploy: + runs-on: ubuntu-latest + needs: build + environment: + name: 'production' + url: ${{ steps.deploy-to-webapp.outputs.webapp-url }} + + steps: + - name: Download artifact from build job + uses: actions/download-artifact@v2 + with: + name: python-app + path: . + + - name: 'Deploy to Azure Web App' + uses: azure/webapps-deploy@v2 + with: + app-name: 'psdwordfinder' + slot-name: 'production' + publish-profile: ${{ secrets.AzureAppService_PublishProfile_fba125a4de7c454cbe8f4c98c4017480 }} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 5e8ba84..f071fdd 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ .idea input __pycache__ +psd_project.sql +log/ diff --git a/README.md b/README.md index e9a0163..b21c3cc 100644 --- a/README.md +++ b/README.md @@ -222,8 +222,7 @@ Right now I found our repository has a problem considerable us to pay enough att such as file path of train corpus, the file path of cluster model, the file path of database config. These file paths cannot be pushed to our base repository! -We should think of a nice way to solve this issue. And I have an idea. We should maintain a common file relative path and all data files and config data should be put inside it. Also, there's another important thing to remember: don't -push these corpus and pre-train models to our base repository. We should maintain a common remote disk to store and then open and share a link to provide everyone in our group to use. +We should think of a nice way to solve this issue. And I have an idea. We should maintain a common file relative path and all data files and config data should be put inside it. Also, there's another important thing to remember: don't push these corpus and pre-train models to our base repository. We should maintain a common remote disk to store and then open and share a link to provide everyone in our group to use. I have created a file named input, there are three files inside it: corpus, udpipemodel, and word2vecmodel. All files in them are hosted at @@ -233,10 +232,36 @@ password: td3e downloading them and put them on root directory of wordfiner folder -### sprint 5 +### Features -1、database: we should build a remote DB @Willie -2、word2vec: two methods of doing that @Zhen -3、we should label every sentence and show all sentences with a label to the cluster web interfaces @all +Beta version supports features: +1. Support query in 10 + languages +2. Support to select a certain language, input corresponding words, and display multiple parts of speech of words +3. Click a part of speech of the word to be looked up to show all the corresponding examples +4. Use KWIC to show examples +5. Support to input different number of clusters +6. Click cluster sentences to get examples containing words +7. Examples showing all words are supported + + +Update features: + +1. KWIC, in the middle of the line + +2. now only show part sentence, it's better to show the whole sentence when click. + + a point on the bank hidden by brush where + +3. in cluster web interface, we should group the sentences as cluster labels, sorting. + +4. .gitignore files + +5. French clustering 3: + + ValueError: Number of labels is 1. Valid values are 2 to n_samples - 1 (inclusive) + + Chinese + +6. there are bugs of cluster function diff --git a/src/app.py b/src/app.py index daf3d71..5c11cfa 100644 --- a/src/app.py +++ b/src/app.py @@ -5,14 +5,15 @@ from src.train.result_model import TResult from src.train.store import StoreData from src.util import language_dict, language_list, db_config, word2vec_language -from src.service import AppService +from src.service import AppService, AppContext from flask import Flask, render_template, request, redirect, url_for, flash - app = Flask(__name__) +app.secret_key = b'_5#y2L"F4Q8z\n\xec]/' # TODO: need to change with the selection different language -appService = AppService() +app_service = AppService() +app_context = AppContext @app.route('/') @@ -40,12 +41,16 @@ def find(): language_id = request.form['sellanguage'] sel_word = request.form['selword'] language_name = language_dict[language_id] - if not appService.udt_pre_model: - appService.config_udpipe(language_name) - appService.find_service(language_name, sel_word) + app_context.sel_word = sel_word + app_context.sel_language = language_name + if not app_service.udt_pre_model: + app_service.config_udpipe(language_name) + app_service.find_service(language_name, sel_word) + sel_result_kwic = app_service.kwic(sel_word, app_service.sel_result) + app_context.sel_result_kwic = sel_result_kwic return render_template('result.html', input_data={"language_name": language_name, "sel_word": sel_word, - "sel_result": appService.sel_result}) + "sel_result": sel_result_kwic}) @app.route('/find2', methods=['POST']) @@ -54,12 +59,16 @@ def find2(): if request.method == 'POST': language_name = request.form['sellanguage'] sel_word = request.form['selword'] - if not appService.udt_pre_model: - appService.config_udpipe(language_name) - appService.find_service(language_name, sel_word) + app_context.sel_word = sel_word + app_context.sel_language = language_name + if not app_service.udt_pre_model: + app_service.config_udpipe(language_name) + app_service.find_service(language_name, sel_word) + sel_result_kwic = app_service.kwic(sel_word, app_service.sel_result) + app_context.sel_result_kwic = sel_result_kwic return render_template('result.html', input_data={"language_name": language_name, "sel_word": sel_word, - "sel_result": appService.sel_result}) + "sel_result": sel_result_kwic}) @app.route('/cluster', methods=['POST']) @@ -75,17 +84,24 @@ def cluster(): language_name = request.form['languageName'] cluster_number = request.form['clusterNumber'] sel_tag = request.form['tagInput1'] - cluster_input_sentence = appService.pos_dict[sel_tag] - if not appService.udt_pre_model: - appService.config_udpipe(language_name) + # TODO: clicking the button of return previous page then clicking cluster button causes a bug + cluster_input_sentence = app_service.pos_dict[sel_tag] + if not app_service.udt_pre_model: + app_service.config_udpipe(language_name) cluster_model_file = word2vec_language[language_name] - cluster_result, rec_cluster_result = appService.cluster_sentences( + cluster_result, rec_cluster_result, sentences, best_labels = app_service.cluster_sentences( language_name, cluster_model_file, cluster_input_sentence, cluster_number) + if not cluster_result: + flash("invalid input to cluster number") + return render_template('result.html', input_data={"language_name": language_name, + "sel_word": app_context.sel_word, + "sel_result": app_context.sel_result_kwic}) return render_template('cluster.html', cluster_number=cluster_number, cluster_result=cluster_result, - rec_cluster_result=rec_cluster_result) + rec_cluster_result=rec_cluster_result, + sentences_with_labels=zip(sentences, best_labels)) if __name__ == '__main__': - app.run(port=3000, debug=True) + app.run(port=3000, host='0.0.0.0') diff --git a/src/service.py b/src/service.py index 15a92c6..3e3ebc6 100644 --- a/src/service.py +++ b/src/service.py @@ -13,10 +13,19 @@ from src.train.result_model import TResult from src.train.store import StoreData -from src.util import * +from src.util import (language_dict, + language_list, + db_config, + corpus_language, + udpipe_language, + get_keyword_window) from src.train.train_cluster import load_model from src.train.train_model import UdpipeTrain from src.train.cluster import Evaluator +import re +from src.train.KWIC import keywords_in_context, find_and_replace +from src.util import get_keyword_window, kwic_show + try: store_data = StoreData(db_config['user'], @@ -110,14 +119,14 @@ def cluster_sentences(self, language_name: str, save_path: str, sentences: List[ n_clusters = int(n_clusters) if n_clusters <= 0: print("Parameter is Invalid") - return + return [None]*4 if n_clusters > len(sentences): # TODO add log print('number of cluster bigger than sentences count') - return + return [None]*4 if len(self.sel_result) <= 0: print('no sentence') - return + return [None]*4 # first loading model word2vec_model = load_model(save_path) # second geting vectors for one sentence @@ -176,7 +185,36 @@ def cluster_sentences(self, language_name: str, save_path: str, sentences: List[ if no_n_input: examples = recommend_sentences - return examples, recommend_sentences + return examples, recommend_sentences, sentences, best_labels + + def kwic(self, selword: str, sentence_with_pos: list): + """ + :param: selword + :param: sentenceWithPOS + + sentence_with_pos examples: + [("NOUN", "bank", ["I go to the bank", "The house lies the right of the river bank"]), + ("VERB", "bank", ["I banked in a slot"]) + """ + # This is similar to sentenceWithPOS but processed after KWIC + result = [] + for sentTuple in sentence_with_pos: + sents_kwic = [] + result.append((sentTuple[0], sentTuple[1], sentTuple[2], sents_kwic)) + + sents_origin = sentTuple[2] + for sent in sents_origin: + # result_text = keywords_in_context(sent, [selword]) + # Highlight Keywords + # result_text = find_and_replace(result_text, selword, "\x1b[34m" + selword + "\x1b[0m") + # sents_kwic.append(result_text) + window_words = get_keyword_window(selword, sent.split(" ")) + result_text = kwic_show(window_words, selword) + if result_text: + print(result_text) + sents_kwic.append(result_text) + + return result def _get_examples(self, sentences: List[str], best_labels, n_clusters: int): tmp_labels, examples = [], [] @@ -196,6 +234,12 @@ def _get_examples(self, sentences: List[str], best_labels, n_clusters: int): return examples +class AppContext(object): + sel_language = None + sel_word = None + sel_result_kwic = None + + if __name__ == "__main__": # get word vector for one sentence language_name = 'English' diff --git a/src/static/js/main.js b/src/static/js/main.js index 41ee639..92f2644 100644 --- a/src/static/js/main.js +++ b/src/static/js/main.js @@ -5,33 +5,76 @@ function init(){ $('#clusterDiv1').hide(); } +// find all indexes of selected word(substr) in sentence(str) +function searchSubStr(str,subStr){ + var positions = new Array(); + var pos = str.indexOf(subStr); + while(pos>-1){ + positions.push(pos); + pos = str.indexOf(subStr,pos+1); + } + return positions; +} + +//function findByTag(selWord, tag, rowResult, wordResultKWIC){ +// /* +// selWord: selected word +// rowResult: sentences +// tag: POS +// +// */ +// $("#tagInput1").attr("value",tag); +// var ulControl = $('#sentencesGroup'); +// ulControl.find("li").remove(); +// if(wordResultKWIC.length > 0){ +// $('#labelId1').show(); +// $('#clusterDiv1').show(); +// } +// for(i=1; i

"; +// if(allIndexes.length > 0){ +// var startIndex = 0; +// for(let j=0; j < allIndexes.length; j++){ +// var part1 = wordResultKWIC[i-1].slice(startIndex,allIndexes[j]) +// var part2 = wordResultKWIC[i-1].slice(allIndexes[j], allIndexes[j] + selWord.length) +// startIndex = allIndexes[j] + selWord.length +// ulcontent = ulcontent + part1 + "" + part2 + ""; +// } +// if(startIndex < wordResultKWIC[i-1].length){ +// ulcontent = ulcontent + wordResultKWIC[i-1].slice(startIndex, wordResultKWIC[i-1].length) +// } +// ulcontent += "

"+i+"" + ""; +// ulControl.append(ulcontent); +// } +// } +// +//} -function findByTag(selWord, tag, rowResult){ +function findByTag(selWord, tag, rowResult, wordResultKWIC){ /* selWord: selected word rowResult: sentences tag: POS + */ $("#tagInput1").attr("value",tag); var ulControl = $('#sentencesGroup'); ulControl.find("li").remove(); - var rowResult1 = rowResult; - if(rowResult1.length > 0){ + if(wordResultKWIC.length > 0){ $('#labelId1').show(); $('#clusterDiv1').show(); } - for(i=1; i" - + "

" + part1 + "" + part2 + "" + part3 + "

" + - ""+i+""+ - ""; - ulControl.append(ulcontent); + + outstr = '
'
+    for(i=1; iWell done! After clustering, you get {{cluster_number}} example
                         {% endfor %}
                         {% endif %}
                     
-
-
                 
 
                 
@@ -60,6 +58,28 @@

Well done!

After clustering, you get {{cluster_number}} example
+
+ +
    + {% if cluster_result %} + {% for cluster_sentence, label in sentences_with_labels %} +
  • + {{cluster_sentence}} + {{label}} +
  • + {% endfor %} + {% endif %} +
+

+ +
+

diff --git a/src/templates/layout.html b/src/templates/layout.html index 3a2c7b5..3692ac3 100644 --- a/src/templates/layout.html +++ b/src/templates/layout.html @@ -20,6 +20,18 @@ + {% block content %} +
+ {% for message in get_flashed_messages() %} +
+ + {{ message }} +
+ {% endfor %} + + {% block page_content %}{% endblock %} +
+ {% endblock %}
{% block body %} @@ -28,6 +40,12 @@ - s + + \ No newline at end of file diff --git a/src/templates/result.html b/src/templates/result.html index 41421eb..29a7c71 100644 --- a/src/templates/result.html +++ b/src/templates/result.html @@ -23,7 +23,7 @@

Well done!

You successfully find {{row_result.1}} + {% endfor %} {% endif %}
diff --git a/src/train/KWIC.py b/src/train/KWIC.py new file mode 100644 index 0000000..82ae5ab --- /dev/null +++ b/src/train/KWIC.py @@ -0,0 +1,271 @@ +def getNGrams(wordlist, n): + return [wordlist[i:i + n] for i in range(len(wordlist) - (n - 1))] + +# Given a list of n-grams, return a dictionary of KWICs, +# indexed by keyword. + + +def nGramsToKWICDict(ngrams): + keyindex = len(ngrams[0]) // 2 + + kwicdict = {} + + for k in ngrams: + if k[keyindex] not in kwicdict: + kwicdict[k[keyindex]] = [k] + else: + kwicdict[k[keyindex]].append(k) + return kwicdict + + +# Given a KWIC, return a string that is formatted for +# pretty printing. + +def prettyPrintKWIC(kwic): + n = len(kwic) + keyindex = n // 2 + width = 10 + + outstring = ' '.join(kwic[:keyindex]).rjust(width * keyindex) + outstring += str(kwic[keyindex]).center(len(kwic[keyindex]) + 6) + outstring += ' '.join(kwic[(keyindex + 1):]) + + return outstring + + +def cut_to_sentence(text, keyword, keywordindex): + """ Cuts the sentence around a keyword out of the text + Arguments + ---------- + text : str + Text out of which the sentence should be extracted + keyword : str + Keyword in the sentence of the text + keywordindex: int + Index of the keyword in the text + Returns + ------- + Indices of of the sentence in the text and a string of the sentence + """ + # Strings after wich a point does not end a sentence + safe = ["Ms", "Mr", "Fr", "Hr", "Dipl", "B", "M", "Sc", "Dr", "Prof", + "Mo", "Mon", "Di", "Tu", "Tue", "Tues", "Mi", "Wed", "Do", "Th", + "Thu", "Thur", "Thurs", "Fr", "Fri", "Sa", "Sat", "So", "Sun", + "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", + "str"] + + # Find beginning + rfind_results = [] + end_ = keywordindex + # Special Case "." + while True: + rfind_ = text.rfind(". ", 0, end_) + if not rfind_ == -1: + no_safe = False + for i, s in enumerate(safe): + if text[0:rfind_][::-1].find(s[::-1]) == 0: + end_ = rfind_ - len(s) + break + if i == len(safe)-1: + no_safe = True + if no_safe is True: + break + else: + break + rfind_results.append(rfind_) + + rfind_results.append(max([text.rfind(sentence_ending, 0, keywordindex) + for sentence_ending in ["! ", "? "]])) + + rfind_result = max(rfind_results) + if rfind_result == -1: + start = 0 + else: + start = rfind_result + 2 + + # Find ending + find_results = [] + start_ = keywordindex+len(keyword) + # Special Case "." + while True: + find_ = text.find(". ", start_) + if not find_ == -1: + no_safe = False + for i, s in enumerate(safe): + if text[0:find_][::-1].find(s[::-1]) == 0: + start_ = find_ + len(s) + break + if i == len(safe)-1: + no_safe = True + if no_safe is True: + break + else: + break + find_results.append(find_) + + find_results.extend([text.find(sentence_ending, keywordindex+len(keyword)) + for sentence_ending in ["! ", "? "]]) + find_results_bigger_neg_1 = [i for i in find_results if i >= 0] + if not find_results_bigger_neg_1: + end = len(text) + else: + end = min(find_results_bigger_neg_1) + 1 + + return list(range(start, end)), text[start:end] + + +def find_nth_occurrence(text, searchstr, nth=1, startindex=0): + """ + Finds the index of the nth occurence of a searchstr in the text starting + from the a given startindex. + """ + start = text.find(searchstr, startindex) + + if start == -1: + return len(text)-1 + + for i in range(nth-1): + find_index = text.find(searchstr, start+len(searchstr)) + if find_index == -1: + return len(text)-1 + else: + start = find_index + + return start + + +def rfind_nth_occurrence(text, searchstr, nth=1, endindex=None): + """ + Finds the index of the nth occurence of a searchstr in the text going + backwards from a given endindex. + """ + if endindex is None: + endindex = len(text) + + end = text.rfind(searchstr, 0, endindex) + + if end == -1: + return 0 + + for i in range(nth-1): + rfind_index = text.rfind(searchstr, 0, end) + if rfind_index == -1: + return 0 + else: + end = rfind_index + + return end + + +def keywords_in_context(text, keywords, max_words=5, sep="...", cut_sentences=True): + """ Returns the relevant context around keywords in a larger text. + Arguments + ---------- + text : str + Text which should be summerized around keywords. + keywords : list of str + Keywords whose context we want to extract out of the text. + max_words : int + Maximum number of words before und after a keyword if no sentence + beginning or ending occurs and cut_sentences is set. + sep : str + String wich represents skipped portions of the text in the result. + cut_sentences : bool + Set if the context around a keyword is cut at the beginning or end of + a sentence + Returns + ------- + Summarised text containing the keywords in context as string. + """ + indices_lst = [] + for k in keywords: + start = text.find(k) + while not start == -1: + indices_lst.append((k, start)) + start = text.find(k, start+len(k)) + + result_indices = set() + for index_tpl in indices_lst: + keyword, index = index_tpl + start = rfind_nth_occurrence(text, " ", nth=max_words+1, endindex=index) + if not start == 0: + start += 1 # +1 to Remove the first " " + end = find_nth_occurrence(text, " ", nth=max_words+1, startindex=index+len(keyword)) + if end == len(text)-1: + end += 1 + indices_of_text = set(range(start, end)) + if cut_sentences: + sentence_indices, _ = cut_to_sentence(text, keyword, index) + indices_of_text.intersection_update(set(sentence_indices)) + for i in indices_of_text: + result_indices.add(i) + + result_indices = list(result_indices) + result_indices.sort() + + result = "" + i_before = -1 + for _i, i in enumerate(result_indices): + if not (i-1) == i_before: + result += " " + sep + " " + text[i] + i_before = i + else: + result += text[i] + i_before = i + + # If the last word is not the end of the text add the sperator. + if _i == len(result_indices)-1: + if not i == len(text)-1: + result += " " + sep + + return result + + +def find_and_replace(text, find_str, replacement_str): + """ Find and replace a find_str with a replacement_str in text. """ + start = text.find(find_str) + offset = 0 + while start != -1: + # update the index compatible to the whole text + start = start + offset + + # replace (cut the original word out and insert the replacement) + text = text[:start] + replacement_str + text[start+len(find_str):] + prettyPrintKWIC(text) + + offset = start + len(replacement_str) + start = text[offset:].find(find_str) + + return text + + +def prettyPrintKWIC(kwic): + n = len(kwic) + keyindex = n // 2 + width = 1 + + outstring = ' '.join(kwic[:keyindex]).rjust(width*keyindex) + outstring += str(kwic[keyindex]).center(len(kwic[keyindex])+6) + outstring += ' '.join(kwic[(keyindex+1):]) + # print(outstring) + return outstring + + +if __name__ == "__main__": + """ + Text = Sentence which needs to be shrinked + Keyword = Searched word + """ + TEXTs = [ + 'In 222 BC, the Romans besieged Acerrae, an Insubre fortification on the right bank of the River Adda between Cremona and Laus Pompeia (Lodi Vecchio).', + 'A spokesman for the bank said "We will be compensating customers who did not receive full services from Affinion, and providing our apology."', + 'One of the first fully functional direct banks in the United States was the Security First Network Bank (SFNB), which was launched in October 1995', + 'At the same time, internet-only banks or "virtual banks" appeared.', + 'Arriving at the Douro, Wellesley was unable to cross the river because Soult\'s army had either destroyed or moved all the boats to the northern bank.'] + KEYWORDS = ['bank'] + for TEXT in TEXTs: + result_text = keywords_in_context(TEXT, KEYWORDS, max_words=3, sep="") + # Highlight Keywords + for k in KEYWORDS: + result_text = find_and_replace(result_text, k, k) + print(result_text) \ No newline at end of file diff --git a/src/train/cluster.py b/src/train/cluster.py index aeca523..b5f90ca 100644 --- a/src/train/cluster.py +++ b/src/train/cluster.py @@ -5,10 +5,10 @@ date:4.2.2021 """ from sklearn import metrics -from sklearn.metrics import pairwise_distances from sklearn.cluster import KMeans from sklearn.cluster import AgglomerativeClustering from sklearn.cluster import DBSCAN +import numpy as np class Evaluator(object): @@ -56,6 +56,12 @@ def higher_better_score(self, labels): """ higher value means better cluster result """ + # only one cluster + if labels.min() == labels.max(): + return 1.0 + # cluster count equals to len of X + if len(np.unique(labels)) == len(self.X): + return 1.0 return metrics.silhouette_score(self.X, labels, metric='euclidean') def nearer_zero_better_score(self, labels): diff --git a/src/util.py b/src/util.py index 81f44df..004cff7 100644 --- a/src/util.py +++ b/src/util.py @@ -3,6 +3,7 @@ # date: 2020.2.28 from typing import List +import re # TODO: keeping update @@ -13,8 +14,8 @@ # database config # cofig for local database db_config = {'user': 'root', - 'password': 'root@123', - 'db_host': 'localhost', + 'password': 'LhxGz102231', + 'db_host': '192.144.171.233', 'db_name': 'psd_project'} # language and corresponding file path of corpus @@ -57,10 +58,16 @@ def get_keyword_window(sel_word: str, words_of_sentence: List, length=5) -> List remember: sel_word is lemmatized """ - if length <= 0: + if length <= 0 or len(words_of_sentence) <= length: return words_of_sentence - index = words_of_sentence.index(sel_word) + index = -1 + for iw, word in enumerate(words_of_sentence): + word = word.lower() + if len(re.findall(sel_word, word)) > 0: + index = iw + if index == -1: + print("warning: cannot find %s in sentence: %s" % (sel_word, words_of_sentence)) return words_of_sentence # backward is not enough if index < length // 2: @@ -82,4 +89,32 @@ def get_keyword_window(sel_word: str, words_of_sentence: List, length=5) -> List return words_of_sentence[index - (length - len(forward_slice)):index] + forward_slice return words_of_sentence[index - length // 2: index + length // 2 + 1] if length % 2 \ - else words_of_sentence[index - length // 2 + 1: index + length // 2 + 1] \ No newline at end of file + else words_of_sentence[index - length // 2 + 1: index + length // 2 + 1] + + +def kwic_show(words_of_sentence, sel_word, sum_sent_length=60, key_word_space=1): + sent = ' '.join(words_of_sentence) + key_index = sent.lower().index(sel_word.lower()) + if key_index != -1: + pre_kwic = sent[:key_index].rjust(sum_sent_length//2) + key_kwic = key_word_space*' ' + sel_word + key_word_space*' ' + post_kwic = sent[key_index+len(sel_word):] + sel_word_kwic = pre_kwic + key_kwic + post_kwic + return sel_word_kwic + return None + + +if __name__ == "__main__": + """ + Text = Sentence which needs to be shrinked + Keyword = Searched word + """ + texts = [ + 'In 222 BC, the Romans besieged Acerrae, an Insubre fortification on the right bank of the River Adda between Cremona and Laus Pompeia (Lodi Vecchio).', + 'A spokesman for the bank said "We will be compensating customers who did not receive full services from Affinion, and providing our apology."', + 'One of the first fully functional direct banks in the United States was the Security First Network bank (SFNB), which was launched in October 1995', + 'At the same time, internet-only banks or "virtual banks" appeared.', + 'Arriving at the Douro, Wellesley was unable to cross the river because Soult\'s army had either destroyed or moved all the boats to the northern bank.'] + for text in texts: + result = get_keyword_window('bank', text.split(' ')) + kwic_show(result, 'bank') \ No newline at end of file