Skip to content

Commit 0c33ec4

Browse files
authored
Added the Times of India API
1 parent a73428e commit 0c33ec4

File tree

1 file changed

+124
-0
lines changed

1 file changed

+124
-0
lines changed

Times of India.py

+124
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
import feedparser
2+
import itertools
3+
import requests
4+
import schedule
5+
import re
6+
7+
try:
8+
from bs4 import BeautifulSoup
9+
except ImportError:
10+
from BeautifulSoup import BeautifulSoup
11+
import pandas as pd
12+
13+
feeds = {
14+
'Home':"https://timesofindia.indiatimes.com/rss.cms",
15+
'Top stories':"https://timesofindia.indiatimes.com/rssfeedstopstories.cms",
16+
'Most Recent Stroies':"https://timesofindia.indiatimes.com/rssfeeds/-2128936835.cms",
17+
'India':"https://timesofindia.indiatimes.com/rssfeeds/-2128936835.cms",
18+
'NRI':"https://timesofindia.indiatimes.com/rssfeeds/296589292.cms",
19+
'Business':"https://timesofindia.indiatimes.com/rssfeeds/1898055.cms",
20+
'Cricket':"https://timesofindia.indiatimes.com/rssfeeds/4719161.cms",
21+
'Sports':"https://timesofindia.indiatimes.com/rssfeeds/4719148.cms",
22+
'Health':"https://timesofindia.indiatimes.com/rssfeeds/3908999.cms",
23+
'Science':"https://timesofindia.indiatimes.com/rssfeeds/-2128672765.cms",
24+
'Environment':"https://timesofindia.indiatimes.com/rssfeeds/2647163.cms",
25+
'Tech':"https://timesofindia.indiatimes.com/rssfeeds/5880659.cms",
26+
'Education':"https://timesofindia.indiatimes.com/rssfeeds/913168846.cms",
27+
'Mumbai':"https://timesofindia.indiatimes.com/rssfeeds/-2128838597.cms",
28+
'Delhi':"https://timesofindia.indiatimes.com/rssfeeds/-2128839596.cms",
29+
'Bangalore':"https://timesofindia.indiatimes.com/rssfeeds/-2128833038.cms",
30+
'Hyderabad':"https://timesofindia.indiatimes.com/rssfeeds/-2128816011.cms",
31+
'Chennai':"https://timesofindia.indiatimes.com/rssfeeds/2950623.cms",
32+
'Ahemdabad':"https://timesofindia.indiatimes.com/rssfeeds/-2128821153.cms",
33+
'Allahabad':"https://timesofindia.indiatimes.com/rssfeeds/3947060.cms",
34+
'Bhubaneswar':"https://timesofindia.indiatimes.com/rssfeeds/4118235.cms",
35+
'Coimbatore':"https://timesofindia.indiatimes.com/rssfeeds/7503091.cms",
36+
'Gurgaon':"https://timesofindia.indiatimes.com/rssfeeds/6547154.cms",
37+
'Guwahati':"https://timesofindia.indiatimes.com/rssfeeds/4118215.cms",
38+
'Hubli':"https://timesofindia.indiatimes.com/rssfeeds/3942695.cms",
39+
'Kanpur':"https://timesofindia.indiatimes.com/rssfeeds/3947067.cms",
40+
'Kolkata':"https://timesofindia.indiatimes.com/rssfeeds/-2128830821.cms",
41+
'Ludhiana':"https://timesofindia.indiatimes.com/rssfeeds/3947051.cms",
42+
'Mangalore':"https://timesofindia.indiatimes.com/rssfeeds/3942690.cms",
43+
'Mysore':"https://timesofindia.indiatimes.com/rssfeeds/3942693.cms",
44+
'Noida':"https://timesofindia.indiatimes.com/rssfeeds/8021716.cms",
45+
'Pune':"https://timesofindia.indiatimes.com/rssfeeds/-2128821991.cms",
46+
'Goa':"https://timesofindia.indiatimes.com/rssfeeds/3012535.cms",
47+
'Chandigarh':"https://timesofindia.indiatimes.com/rssfeeds/-2128816762.cms" ,
48+
'Lucknow':"https://timesofindia.indiatimes.com/rssfeeds/-2128819658.cms",
49+
'Patna':"https://timesofindia.indiatimes.com/rssfeeds/-2128817995.cms",
50+
'Jaipur':"https://timesofindia.indiatimes.com/rssfeeds/3012544.cms",
51+
'Nagpur':"https://timesofindia.indiatimes.com/rssfeeds/442002.cms",
52+
'Rajkot':"https://timesofindia.indiatimes.com/rssfeeds/3942663.cms",
53+
'Ranchi':"https://timesofindia.indiatimes.com/rssfeeds/4118245.cms",
54+
'Surat':"https://timesofindia.indiatimes.com/rssfeeds/3942660.cms",
55+
'Vadodara':"https://timesofindia.indiatimes.com/rssfeeds/3942666.cms",
56+
'Varanasi':"https://timesofindia.indiatimes.com/rssfeeds/3947071.cms",
57+
'Thane':"https://timesofindia.indiatimes.com/rssfeeds/3831863.cms",
58+
'Thiruvananthapuram':"https://timesofindia.indiatimes.com/rssfeeds/878156304.cms",
59+
'US':"https://timesofindia.indiatimes.com/rssfeeds/30359486.cms",
60+
'NRI':"https://timesofindia.indiatimes.com/rssfeeds/7098551.cms",
61+
'Pakistan':"https://timesofindia.indiatimes.com/rssfeeds/30359534.cms",
62+
'South Asia':"https://timesofindia.indiatimes.com/rssfeeds/3907412.cms",
63+
'UK':"https://timesofindia.indiatimes.com/rssfeeds/2177298.cms",
64+
'Europe':"https://timesofindia.indiatimes.com/rssfeeds/1898274.cms",
65+
'China':"https://timesofindia.indiatimes.com/rssfeeds/1898184.cms",
66+
'Middle East':"https://timesofindia.indiatimes.com/rssfeeds/1898272.cms",
67+
'Rest of World':"https://timesofindia.indiatimes.com/rssfeeds/671314.cms",
68+
}
69+
70+
all_links=[]
71+
all_category=[]
72+
all_labels=[]
73+
74+
# Function to fetch the rss feed and return the parsed RSS
75+
def parseRSS( rss_url ):
76+
return feedparser.parse( rss_url )
77+
78+
# Function grabs the rss feed headlines (titles) and returns them as a list
79+
def get( rss_url ):
80+
global all_links
81+
global all_category
82+
feed = parseRSS( rss_url )
83+
for newsitem in feed['items']:
84+
all_links.append(newsitem['link'])
85+
all_category.append(newsitem['title'])
86+
87+
88+
# Iterate over the feed urls
89+
90+
for key,url in feeds.items():
91+
get(url)
92+
for a in range(len(all_category)):
93+
all_labels.append("REAL")
94+
95+
96+
content=[]
97+
for x in all_links:
98+
r = requests.get(x,verify=False) # Some of website does not have the certificate
99+
soup = BeautifulSoup(r.content, 'lxml')
100+
print(x)
101+
body=[]
102+
try:
103+
for i in soup.findAll("div", {"class": "_3WlLe clearfix "}):
104+
if(i.get_text()) not in ['','\xa0']:
105+
body.append(i.get_text())
106+
107+
except:
108+
continue
109+
110+
if(len(body)) == 0:
111+
try:
112+
for i in soup.findAll("div", {"class": "Normal"}):
113+
if(i.get_text()) not in ['','\xa0']:
114+
body.append(i.get_text())
115+
except:
116+
continue
117+
118+
body= ''.join(body)
119+
content.append(body)
120+
121+
list_of_tuples = list(zip(all_links, content, all_category, all_labels))
122+
123+
df = pd.DataFrame(list_of_tuples, columns=['all_links','text','title','label'])
124+
df.to_excel("times of india.xlsx",index=False)

0 commit comments

Comments
 (0)