Skip to content

Commit 4d2d690

Browse files
Remove pandas-datareader (#1033)
* Remove pandas-datareader Use JSON/pandas solution for downloading World Bank indicator data. * Add function `download_world_bank_indicator`. * Add unit test. * Update requirements. * Update CHANGELOG.md * Remove stray print and fix comments * Switch to compatible Petals target branch for testing REVERT THIS! * Fix linter warnings - Add timeout parameter to requests call - Remove unused import * #168 is merged Co-authored-by: Lukas Riedel <[email protected]> * Apply suggestions from code review Use single list instead of nested lists Co-authored-by: Emanuel Schmid <[email protected]> * Update reading WB data * Fall back to parsing dates if conversion to ints fails. * Throw a ValueError if no data is available. --------- Co-authored-by: emanuel-schmid <[email protected]> Co-authored-by: Emanuel Schmid <[email protected]>
1 parent a2d2297 commit 4d2d690

File tree

5 files changed

+112
-13
lines changed

5 files changed

+112
-13
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,17 @@ Code freeze date: YYYY-MM-DD
1010

1111
### Dependency Changes
1212

13+
Removed:
14+
15+
- `pandas-datareader`
16+
1317
### Added
1418
- Added instructions to install Climada petals on Euler cluster in `doc.guide.Guide_Euler.ipynb` [#1029](https://github.com/CLIMADA-project/climada_python/pull/1029)
1519

1620
### Changed
1721
- `Hazard.local_exceedance_intensity`, `Hazard.local_return_period` and `Impact.local_exceedance_impact`, `Impact.local_return_period`, using the `climada.util.interpolation` module: New default (no binning), binning on decimals, and faster implementation [#1012](https://github.com/CLIMADA-project/climada_python/pull/1012)
22+
- World Bank indicator data is now downloaded directly from their API via the function `download_world_bank_indicator`, instead of relying on the `pandas-datareader` package [#1033](https://github.com/CLIMADA-project/climada_python/pull/1033)
23+
1824
### Fixed
1925
- NaN plotting issues in `geo_im_from_array`[#1038](https://github.com/CLIMADA-project/climada_python/pull/1038)
2026

climada/util/finance.py

Lines changed: 77 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,16 @@
2121

2222
__all__ = ["net_present_value", "income_group", "gdp"]
2323

24+
import json
2425
import logging
2526
import shutil
26-
import warnings
2727
import zipfile
2828
from pathlib import Path
2929

3030
import numpy as np
3131
import pandas as pd
3232
import requests
3333
from cartopy.io import shapereader
34-
from pandas_datareader import wb
3534

3635
from climada.util.constants import SYSTEM_DIR
3736
from climada.util.files_handler import download_file
@@ -181,6 +180,77 @@ def gdp(cntry_iso, ref_year, shp_file=None, per_capita=False):
181180
return close_year, close_val
182181

183182

183+
def download_world_bank_indicator(
184+
country_code: str, indicator: str, parse_dates: bool = False
185+
):
186+
"""Download indicator data from the World Bank API for all years or dates on record
187+
188+
Parameters
189+
----------
190+
country_code : str
191+
The country code in ISO alpha 3
192+
indicator : str
193+
The ID of the indicator in the World Bank API
194+
parse_dates : bool, optional
195+
Whether the dates of the indicator data should be parsed as datetime objects.
196+
If ``False`` (default), this will first try to parse them as ``int`` (this only
197+
works for yearly data), and then parse as datetime objects if that fails.
198+
199+
Returns
200+
-------
201+
pd.Series
202+
A series with the values of the indicator for all dates (years) on record
203+
"""
204+
# Download data from API
205+
raw_data = []
206+
pages = np.inf
207+
page = 1
208+
while page <= pages:
209+
response = requests.get(
210+
f"https://api.worldbank.org/v2/countries/{country_code}/indicators/"
211+
f"{indicator}?format=json&page={page}",
212+
timeout=30,
213+
)
214+
json_data = json.loads(response.text)
215+
216+
# Check if we received an error message
217+
try:
218+
if json_data[0]["message"][0]["id"] == "120":
219+
raise RuntimeError(
220+
"Error requesting data from the World Bank API. Did you use the "
221+
"correct country code and indicator ID?"
222+
)
223+
# If no, we should be fine
224+
except KeyError:
225+
pass
226+
227+
# Check if there is no data available
228+
pages = json_data[0]["pages"]
229+
if pages == 0:
230+
raise ValueError(
231+
f"No data available for country {country_code}, indicator {indicator}"
232+
)
233+
234+
# Update the data
235+
page = page + 1
236+
raw_data.extend(json_data[1])
237+
238+
# Create dataframe
239+
data = pd.DataFrame.from_records(raw_data)
240+
241+
# Maybe parse dates
242+
if parse_dates:
243+
data["date"] = pd.DatetimeIndex(data["date"])
244+
else:
245+
try:
246+
data["date"] = data["date"].astype("int")
247+
except TypeError:
248+
data["date"] = pd.DatetimeIndex(data["date"])
249+
250+
# Only return indicator data (with a proper name)
251+
return data.set_index("date")["value"].rename(data["indicator"].iloc[0]["value"])
252+
253+
184254
def world_bank(cntry_iso, ref_year, info_ind):
185255
"""Get country's GDP from World Bank's data at a given year, or
186256
closest year value. If no data, get the natural earth's approximation.
@@ -204,18 +274,14 @@ def world_bank(cntry_iso, ref_year, info_ind):
204274
IOError, KeyError, IndexError
205275
"""
206276
if info_ind != "INC_GRP":
207-
with warnings.catch_warnings():
208-
warnings.simplefilter("ignore")
209-
cntry_gdp = wb.download(
210-
indicator=info_ind, country=cntry_iso, start=1960, end=2030
211-
)
212-
years = np.array(
213-
[int(year) for year in cntry_gdp.index.get_level_values("year")]
277+
cntry_gdp = download_world_bank_indicator(
278+
indicator=info_ind, country_code=cntry_iso, parse_dates=False
214279
)
280+
years = cntry_gdp.index
215281
sort_years = np.abs(years - ref_year).argsort()
216282
close_val = cntry_gdp.iloc[sort_years].dropna()
217-
close_year = int(close_val.iloc[0].name[1])
218-
close_val = float(close_val.iloc[0].values)
283+
close_year = close_val.index[0]
284+
close_val = float(close_val.iloc[0])
219285
else: # income group level
220286
fn_ig = SYSTEM_DIR.joinpath("OGHIST.xls")
221287
dfr_wb = pd.DataFrame()

climada/util/test/test_finance.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626

2727
from climada.util.finance import (
2828
_gdp_twn,
29+
download_world_bank_indicator,
2930
gdp,
3031
income_group,
3132
nat_earth_adm0,
@@ -137,6 +138,34 @@ def test_wb_esp_1950_pass(self):
137138
self.assertEqual(wb_year, ref_year)
138139
self.assertAlmostEqual(wb_val, ref_val)
139140

141+
def test_download_wb_data(self):
142+
"""Test downloading data via the API"""
143+
# Unfortunate reference test
144+
data = download_world_bank_indicator("ESP", "NY.GDP.MKTP.CD")
145+
self.assertAlmostEqual(data[1960], 12424514013.7604)
146+
self.assertEqual(data.name, "GDP (current US$)")
147+
148+
# Check parsing dates
149+
data = download_world_bank_indicator("ESP", "NY.GDP.MKTP.CD", parse_dates=True)
150+
self.assertEqual(data.index[-1], np.datetime64("1960-01-01"))
151+
152+
# Check errors raised
153+
with self.assertRaisesRegex(
154+
RuntimeError,
155+
"Did you use the correct country code",
156+
):
157+
download_world_bank_indicator("Spain", "NY.GDP.MKTP.CD")
158+
with self.assertRaisesRegex(
159+
RuntimeError,
160+
"Did you use the correct country code",
161+
):
162+
download_world_bank_indicator("ESP", "BogusIndicator")
163+
with self.assertRaisesRegex(
164+
ValueError,
165+
"No data available for country AIA, indicator NY.GDP.MKTP.CD",
166+
):
167+
download_world_bank_indicator("AIA", "NY.GDP.MKTP.CD")
168+
140169

141170
class TestWealth2GDP(unittest.TestCase):
142171
"""Test Wealth to GDP factor extraction"""

requirements/env_climada.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ dependencies:
2020
- openpyxl>=3.1
2121
- osm-flex>=1.1
2222
- pandas>=2.1,<2.2 # 2.2 is not compatible with the default pytables=3.7 and yields a very high deprecation warning number through geopandas
23-
- pandas-datareader>=0.10
2423
- pathos>=0.3
2524
- pint>=0.24
2625
- pip

setup.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@
7777
"openpyxl",
7878
"overpy",
7979
"pandas",
80-
"pandas-datareader",
8180
"pathos",
8281
"peewee",
8382
"pillow",

0 commit comments

Comments
 (0)