-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnltk.py
62 lines (47 loc) · 1.7 KB
/
nltk.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import string
from collections import Counter
import matplotlib.pyplot as plt
from nltk.corpus import stopwords
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
import nltk
text = open('read.txt', encoding='utf-8').read()
lower_case = text.lower()
cleaned_text = lower_case.translate(str.maketrans('', '', string.punctuation))
# Using word_tokenize because it's faster than split()
tokenized_words = word_tokenize(cleaned_text, "english")
# Removing Stop Words
final_words = []
for word in tokenized_words:
if word not in stopwords.words('english'):
final_words.append(word)
# Lemmatization - From plural to single + Base form of a word (example better-> good)
lemma_words = []
for word in final_words:
word = WordNetLemmatizer().lemmatize(word)
lemma_words.append(word)
emotion_list = []
with open('emotions.txt', 'r') as file:
for line in file:
clear_line = line.replace("\n", '').replace(",", '').replace("'", '').strip()
word, emotion = clear_line.split(':')
if word in lemma_words:
emotion_list.append(emotion)
print(emotion_list)
w = Counter(emotion_list)
print(w)
def sentiment_analyse(sentiment_text):
score = SentimentIntensityAnalyzer().polarity_scores(sentiment_text)
if score['neg'] > score['pos']:
print("Negative Sentiment")
elif score['neg'] < score['pos']:
print("Positive Sentiment")
else:
print("Neutral Sentiment")
sentiment_analyse(cleaned_text)
fig, ax1 = plt.subplots()
ax1.bar(w.keys(), w.values())
fig.autofmt_xdate()
plt.savefig('graph.png')
plt.show()