-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnews analysis.py
68 lines (61 loc) · 2.45 KB
/
news analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# Importing the libraries
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import os
import pandas as pd
import matplotlib.pyplot as plt
import nltk
nltk.downloader.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
# Creating a function to get the news title and news content
finwiz_url = 'https://finviz.com/quote.ashx?t='
news_tables = {}
tickers = ['AMZN', 'TSLA', 'GOOG']
for ticker in tickers:
url = finwiz_url + ticker
req = Request(url=url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'})
response = urlopen(req)
html = BeautifulSoup(response)
news_table = html.find(id='news-table')
news_tables[ticker] = news_table
amzn = news_tables['AMZN']
amzn_tr = amzn.findAll('tr')
for i, table_row in enumerate(amzn_tr):
a_text = table_row.a.text
td_text = table_row.td.text
print(a_text)
print(td_text)
if i == 3:
break
parsed_news = []
for file_name, news_table in news_tables.items():
for x in news_table.findAll('tr'):
text = x.a.get_text()
date_scrape = x.td.text.split()
if len(date_scrape) == 1:
time = date_scrape[0]
else:
date = date_scrape[0]
time = date_scrape[1]
ticker = file_name.split('_')[0]
parsed_news.append([ticker, date, time, text])
print(parsed_news[:5])
# Creating a dataframe
vader = SentimentIntensityAnalyzer()
columns = ['ticker', 'date', 'time', 'headline']
parsed_and_scored_news = pd.DataFrame(parsed_news, columns=columns)
scores = parsed_and_scored_news['headline'].apply(vader.polarity_scores).tolist()
scores_df = pd.DataFrame(scores)
parsed_and_scored_news = parsed_and_scored_news.join(scores_df, rsuffix='_right')
parsed_and_scored_news['date'] = pd.to_datetime(parsed_and_scored_news.date).dt.date
print(parsed_and_scored_news)
plt.rcParams['figure.figsize'] = [10, 6]
mean_scores = parsed_and_scored_news.groupby(['ticker', 'date']).mean()
mean_scores = mean_scores.unstack()
mean_scores = mean_scores.xs('compound', axis="columns").transpose()
mean_scores.plot(kind='bar')
plt.grid()
parsed_and_scored_news['date'] = parsed_and_scored_news['date'].astype('datetime64')
parsed_and_scored_news.set_index('date', inplace=True)
# This command can be used to check news and score of a certain date
# parsed_and_scored_news.loc['2022-12-20']