-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
117 lines (86 loc) · 4.92 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import praw
import os.path
import json
import glob
from textblob import TextBlob
from statistics import mean
import credentials
#client id, secret key, bot name, and username read from credentials.py
reddit = praw.Reddit(client_id = credentials.clientID,
client_secret = credentials.clientSecret,
user_agent = credentials.userAgent,
username = credentials.userName)
target_subreddit = 'depression'#the subreddit to be queried
post_limit = 2000 #limit the posts to be obtained, every 100 added results in a 2 second request delay
def get_data():
titles = [] #list of all the text in post titles
bodys = [] #list of all the body text
comments = [] #list of all of the comment text
"""
this iterates though the API call, .hot indicates what sorting method the call should use.
You have the options to sort by .new, .top, .controversial
"""
for submission in reddit.subreddit(target_subreddit).hot(limit=post_limit):
try:
titles.append(str(submission.title)) #add title of current post
bodys.append(str(submission.selftext))#add body of current post
for comment in submission.comments.list():#iterate though comments from request
if not ("bot" in comment.body):#filter out bots
#write comment to file
comments.append(comment.body)
except AttributeError: #catch if there is no more comments
continue
print("request completed")#let user know request is done
return titles,bodys,comments #return data lists
def clean_data(data):
unwanted_chars = [",", "’", "!", "/n" "'", "."] #charecters to be removed
titles, bodys, comments = [], [], [] #updated lists with removed comments
for title in data[0]:
for char in unwanted_chars: #loop through titles, remove unwanted charecters
title.replace(char, " ")
titles.append(title)
for body in data[1]:
for char in unwanted_chars: #loop through bodys, remove unwanted charecters
body.replace(char, " ")
bodys.append(body)
for comment in data[2]:#loop through comments, remove unwanted charecters
for char in unwanted_chars:
comment.replace(char, " ")
comments.append(comment)
print("unwanted charecters removed")#let user know request is done
return titles,bodys,comments #return new clean lists
#this function gets overall sentiement for the data
def get_sentiment(data):
polarity = [] #an array of numbers in range (-1,1), where -1 is very negative, 0 is neutral, 1 is postitve
subjectivity = [] #an array of numbers in range (-1,1), where -1 is very subjective, 0 is neutral, 1 is not subjective
for title in data[0]:
polarity.append(TextBlob(title).sentiment.polarity) #append polarity using textblob
subjectivity.append(TextBlob(title).sentiment.subjectivity) #append subjectivity using textblob
for body in data[1]:
polarity.append(TextBlob(body).sentiment.polarity)#append polarity using textblob
subjectivity.append(TextBlob(body).sentiment.subjectivity)#append subjectivity using textblob
for comment in data[2]:
polarity.append(TextBlob(comment).sentiment.polarity)#append polarity using textblob
subjectivity.append(TextBlob(comment).sentiment.subjectivity)#append subjectivity using textblob
print("sentiment found")
return polarity, subjectivity #return both arrays
#this function determines the result of the TextBlob NLP, and outputs to the user in a friendly format
def get_result(sentiment):
negative_count = 0 #number of negative posts
positive_count = 0 #number of positive posts
for polarity in sentiment[0]:
if(polarity > 0):
positive_count += 1 #increment positive count
else:
negative_count += 1 #increment negative count
if(negative_count > positive_count):
print("Overall, " + target_subreddit + " is on average negative. " +
"This query produced " + str(negative_count) + " negative posts, and " + str(positive_count) + " positive posts.") #print result
else:
print("Overall, the subreddit " + target_subreddit + " is on average positive. " +
"This query produced " + str(positive_count) + " negative posts, and " + str(negative_count) + " negative posts.") #print result
if __name__ == '__main__':
data = get_data() #get initial data from subreddit
cleaned_data = clean_data(data) # call get_data function, which also calls the clean_data function
sentiment = get_sentiment(cleaned_data) #create a list of predicted sentiment for each comment/post/title
get_result(sentiment) #determine the average sentiment of the entire query, and display in a friendly format