-
Notifications
You must be signed in to change notification settings - Fork 0
/
newsflash.py
173 lines (143 loc) · 4.34 KB
/
newsflash.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
from newspaper import *
from artificial_intelligence import process_batch
from content_categorizer import classify_topic, sentiment_analysis
from news_scraper import get_news
from summary import summarize_text
# List of business sources
business_sources = [
"australian-financial-review",
"bloomberg",
"business-insider",
"financial-post",
"fortune",
"the-wall-street-journal",
]
# List of tech sources
tech_sources = [
"ars-technica",
"engadget",
"hacker-news",
"recode",
"techcrunch",
"techradar",
"the-next-web",
"wired",
]
# List of science sources
science_sources = ["national-geographic", "new-scientist", "next-big-future"]
# List of sports sources
sports_sources = [
"espn",
"bleacher-report",
"four-four-two",
"nfl-news",
"nhl-news",
"talksport",
"the-sport-bible",
"bbc-sport",
]
# List of political sources
political_sources = [
"bbc-news",
"cnn",
"fox-news",
"abc-news",
"breitbart-news",
"axios",
"the-hill",
"msnbc",
]
# Dictionary of politcial affiliation by source
biases = {
"bbc-news": "center",
"cnn": "liberal",
"fox-news": "conservative",
"abc-news": "liberal",
"breitbart-news": "conservative",
"axios": "center",
"the-hill": "center",
"msnbc": "liberal",
}
# Create map to find list based on categorization(since Python doesn't have switch statements)
category_to_list_map = {
"Business": business_sources,
"Computers": tech_sources,
"Science": science_sources,
"Game": sports_sources,
"Sports": sports_sources,
"Society": political_sources,
"Health": political_sources,
"Games": political_sources,
"Arts": political_sources,
"Home": political_sources,
"Recreation": sports_sources,
}
def collect_news(topic):
# Use classification function and store result
topic_category = None
try:
topic_category = classify_topic({"user-query": topic})
topic_category = topic_category["user-query"]
except:
topic_category = ""
# Format topic for api query
topic = topic.replace(" ", "+")
# Determine source list based upon categorization
sources = []
if topic_category != "":
sources = category_to_list_map[topic_category]
else:
sources = category_to_list_map["Society"]
source_dict = {}
for source in sources:
source_dict[source] = False
parsed_articles = get_news(source_dict, topic)
if "No Source Found" in parsed_articles.keys():
source_dict = {}
sources = category_to_list_map["Society"]
for source in sources:
source_dict[source] = False
parsed_articles = get_news(source_dict, topic)
full_texts = []
for source_id, parsed_arr in parsed_articles.items():
article_text = parsed_arr[2]
full_texts.append(article_text)
if source_id in biases:
bias = biases[source_id]
else:
bias = "centrist"
summary = summarize_text(article_text)
if parsed_arr[1] != "Article summary forbidden":
try:
article_nlp = Article(parsed_arr[0])
article_nlp.download()
article_nlp.parse()
article_nlp.nlp()
if (4 * len(article_nlp.summary)) < len(summary):
summary = article_nlp.summary
except:
pass
parsed_articles[source_id] = [
parsed_arr[0],
parsed_arr[1],
summary,
bias,
parsed_arr[3],
]
process_batch(full_texts)
print(full_texts)
return parsed_articles, full_texts
def analyze_article(url, nlp):
config = Config()
config.browser_user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36"
parsed_article = Article(url, config=config)
parsed_article.download()
parsed_article.parse()
title = parsed_article.title
image = parsed_article.top_image
if not nlp:
return title, image, parsed_article.text
parsed_article.nlp()
sentiment_dict = sentiment_analysis({"article_analysis": parsed_article.summary})
sentiment_list = sentiment_dict["article_analysis"]
return title, image, parsed_article.keywords, parsed_article.summary, sentiment_list