-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathBargraph.py
192 lines (137 loc) · 6.09 KB
/
Bargraph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
import streamlit as st
from urllib.request import urlopen, Request
from datetime import datetime
from bs4 import BeautifulSoup as bs #important for data scraping
import pandas as pd #helps w/ data manipulation
import plotly
import re
import requests # helps send & receive response from web browswer
import plotly.express as px
import json # for graph plotting in website
# NLTK VADER for sentiment analysis
import nltk
nltk.downloader.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
def get_news(ticker):
finviz_url = "https://finviz.com/quote.ashx?t="
url = finviz_url + ticker
req = Request(url=url,headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'})
response = urlopen(req)
# Read the contents of the file into 'html'
html = bs(response)
# Find 'news-table' in the Soup and load it into 'news_table'
news_table = html.find(id='news-table')
return news_table
pass
ticker = "AAPL"
news = get_news(ticker)
print(news.prettify())
def parse_news(news_table):
# Parse news data from HTML table into df
arrayList = []
for x in news_table.find_all('tr'):
# Parameters:
#news_table (BeautifulSoup object): HTML table containing news data
try:
headline = x.a.get_text() #'a' tag for the headline of the article, u need parenthesis so function knows what to call, otherwise it'll be confused
#print(headline)
td = x.td.text.split() #this is to split the time and the date
# print(td)
# td creates a list
if len(td) == 1:
time = td[0]
#set time with 2nd index
else: #this would be anything greater than one
date = td[0] #date is first on list [0], then consecutively time is [1]
time = td[1]
arrayList.append([date, time, headline]) #putting an date, time, and headline arrayList inside of here
#making a list of column names
#set date w first index of td
except:
pass
columnName = ['date', 'time', 'headline']
df = pd.DataFrame(arrayList, columns=columnName)
#replacing 'today' with date, make sure not to use 'dataFrame' bc pandas doesnt know which function ur callign
df['date'] = df['date'].replace('Today', datetime.today().strftime('%Y-%m-%d')) #this is getting the world time & formatting it
df['datetime'] = pd.to_datetime(df['date'] + ' ' + df['time'])
# Returns:
# parsed_news_df (df): Parsed news data with columns: date, time, headline, datetime
return df
#Function: SCORES ARTICLES FOR SENTIMENT ANALYSIS
def score_news(parsed_news_df):
# iterate through parsed_news_df headlines and compute polarity scores
scores = [] #creates an empty list to put the sentiment scores in
for headline in parsed_news_df['headline']:
score = sid.polarity_scores(headline) #computes scores
scores.append(score)
# convert scores to DataFrame
scores_df = pd.DataFrame(scores)
# appending data frames
parsed_scored_news = parsed_news_df.join(scores_df) #adds scores to og dataframe
# Drop unnecessary columns
parsed_scored_news.drop(['date', 'time'], axis=1, inplace=True)
#parsed_scored_news.drop(['date', 'time', 'headline'], axis=1, inplace=True)
# Rename sentiment score column
parsed_scored_news.rename(columns={'sentiment': 'sentiment_score'}, inplace=True)
# Set datetime column as index
parsed_scored_news.set_index('datetime', inplace=True)
return parsed_scored_news
sid = SentimentIntensityAnalyzer()
parse_news(news)
score_news(parse_news(news))
url = "https://finviz.com/quote.ashx?t=AAPL"
response = requests.get(url)
print(response)
html_page = bs(response.content, 'html.parser')
html_page
if not isinstance(html_page, bs):
raise TypeError("`html_page` must be a BeautifulSoup object")
tableNews = get_news(ticker)
parseNews = parse_news(tableNews)
def score_news(parsed_news_df):
# instantiate sentiment intensity analyzer
sia = SentimentIntensityAnalyzer()
# iterate through parsed_news_df headlines and compute polarity scores
scores = []
for headline in parsed_news_df['headline']:
score = sia.polarity_scores(headline)
scores.append(score)
# convert scores to DataFrame
scores_df = pd.DataFrame(scores)
# join data frames
parsed_scored = parsed_news_df.join(scores_df)
# set index of parsed_scored to 'datetime' column
parsed_scored.set_index('datetime', inplace=True)
# drop the 'date' and 'time' columns
parsed_scored.drop(['date', 'time'], axis=1, inplace=True)
# rename the 'compound' column to 'sentiment_score'
parsed_scored.rename(columns={'compund': 'sentiment_score'}, inplace=True)
return parsed_scored
tableNews = get_news(ticker)
print(tableNews)
parseNews = parse_news(tableNews)
print(parseNews)
df = score_news(parseNews)
print(df)
st.dataframe(df)
#check tableData
tableNews[0]
import plotly.express as px
def plot_hourly_sentiment(parsed_and_scored_news, ticker):
# Group by date and ticker columns from scored_news and calculate the mean
mean_scores = parsed_and_scored_news.resample('H').mean()
# Plot a bar chart with plotly
fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title = ticker + ' Hourly Sentiment Scores')
return fig # instead of using fig.show(), we return fig and turn it into a graphjson object for displaying in web page later
def plot_daily_sentiment(parsed_and_scored_news, ticker):
# Group by date and ticker columns from scored_news and calculate the mean
mean_scores = parsed_and_scored_news.resample('D').mean()
# Plot a bar chart with plotly
fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title = ticker + ' Daily Sentiment Scores')
return fig # instead of using fig.show(), we return fig and turn it into a graphjson object for displaying in web page later
# Call the functions
hourly_fig = plot_hourly_sentiment(df, ticker)
daily_fig = plot_daily_sentiment(df, ticker)
# Display the figures using Streamlit
st.plotly_chart(hourly_fig)
st.plotly_chart(daily_fig)