diff --git a/scripts/process.py b/scripts/process.py index 6ce4792..482b5d7 100644 --- a/scripts/process.py +++ b/scripts/process.py @@ -1,4 +1,4 @@ -import urllib.request +import requests import os import csv import tempfile @@ -13,43 +13,41 @@ def download(): source = 'http://api.worldbank.org/countries/all/indicators/SP.POP.TOTL?downloadformat=csv' - with urllib.request.urlopen(source) as response: - with open(tmpfile.name, mode="wb") as d: - d.write(response.read()) - tmpfile.close() + response = requests.get(source) + with open(tmpfile.name, 'wb') as d: + d.write(response.content) with zipfile.ZipFile(tmpfile.name, 'r') as zip_ref: zip_ref.extractall(tmpdir.name) - tmpfile.close() - os.unlink(tmpfile.name) + + os.unlink(tmpfile.name) for path in os.scandir(tmpdir.name): if path.is_file(): - #print(path.name) if path.name.startswith('API_SP.POP.TOTL_DS2_EN'): - filename = tmpdir.name + '/' + path.name - + filename = os.path.join(tmpdir.name, path.name) + def process(): - # un-pivot the table global filename - fo = open(filename) - lines = [ row for row in csv.reader(fo) ] + with open(filename) as fo: + lines = [row for row in csv.reader(fo)] headings = lines[4] lines = lines[5:] - outheadings = [ 'Country Name', 'Country Code', 'Year', 'Value' ] + outheadings = ['Country Name', 'Country Code', 'Year', 'Value'] outlines = [] for row in lines: for idx, year in enumerate(headings[4:]): if row[idx+4]: - # do not convert to float as we end up with scientific notation value = row[idx+4] outlines.append(row[:2] + [int(year), value]) - writer = csv.writer(open('data/population.csv', 'w')) - writer.writerow(outheadings) - writer.writerows(outlines) + with open('data/population.csv', 'w', newline='') as f: + writer = csv.writer(f) + writer.writerow(outheadings) + writer.writerows(outlines) -download() -process() +if __name__ == '__main__': + download() + process() diff --git a/scripts/requirements.txt b/scripts/requirements.txt new file mode 100644 index 0000000..ef487e0 --- /dev/null +++ b/scripts/requirements.txt @@ -0,0 +1 @@ +requests==2.32.3 \ No newline at end of file