-
Notifications
You must be signed in to change notification settings - Fork 46
/
Copy pathinshorts.py
89 lines (72 loc) · 2.38 KB
/
inshorts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# pylint: disable=C0103, C0111, R0914
'''
Make the request to the inshorts url according to category with requests module.
Parse using beautiful soup and lxml to form the newsDictionary.
'''
import requests
from bs4 import BeautifulSoup
def getNews(category):
newsDictionary = {
'success': True,
'category': category,
'data': []
}
try:
htmlBody = requests.get('https://www.inshorts.com/en/read/' + category)
except requests.exceptions.RequestException as e:
newsDictionary['success'] = False
newsDictionary['errorMessage'] = str(e.message)
return newsDictionary
soup = BeautifulSoup(htmlBody.text, 'lxml')
newsCards = soup.find_all(class_='news-card')
if not newsCards:
newsDictionary['success'] = False
newsDictionary['errorMessage'] = 'Invalid Category'
return newsDictionary
for card in newsCards:
try:
title = card.find(class_='news-card-title').find('a').text
except AttributeError:
title = None
try:
imageUrl = card.find(
class_='news-card-image')['style'].split("'")[1]
except AttributeError:
imageUrl = None
try:
url = ('https://www.inshorts.com' + card.find(class_='news-card-title')
.find('a').get('href'))
except AttributeError:
url = None
try:
content = card.find(class_='news-card-content').find('div').text
except AttributeError:
content = None
try:
author = card.find(class_='author').text
except AttributeError:
author = None
try:
date = card.find(clas='date').text
except AttributeError:
date = None
try:
time = card.find(class_='time').text
except AttributeError:
time = None
try:
readMoreUrl = card.find(class_='read-more').find('a').get('href')
except AttributeError:
readMoreUrl = None
newsObject = {
'title': title,
'imageUrl': imageUrl,
'url': url,
'content': content,
'author': author,
'date': date,
'time': time,
'readMoreUrl': readMoreUrl
}
newsDictionary['data'].append(newsObject)
return newsDictionary