Skip to content

Commit

Permalink
first for only downloading
Browse files Browse the repository at this point in the history
  • Loading branch information
Sachin committed Jul 2, 2016
0 parents commit d03e4da
Showing 1 changed file with 24 additions and 0 deletions.
24 changes: 24 additions & 0 deletions down.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import os
from bs4 import BeautifulSoup
from urllib.request import urlopen, urlretrieve

URL = 'https://www.rbi.org.in/Scripts/bs_viewcontent.aspx?Id=2009'
OUTPUT_DIR = '.'

open_url = urlopen(URL)
try:
html = open_url.read().decode('utf-8')
finally:
open_url.close()

soup = BeautifulSoup(html, "html.parser")
for link in soup.select('a[href^="http://"]'):
href = link.get('href')
if not any(href.endswith(x) for x in ['.csv','.xls','.xlsx']):
continue

filename = os.path.join(OUTPUT_DIR, href.rsplit('/', 1)[-1])
href = href.replace('http://','https://')
urlretrieve(href, filename)
print("Download Complete")

0 comments on commit d03e4da

Please sign in to comment.