Skip to content

Commit

Permalink
Updated examples for STATA and Python. Partially linted code
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexGherardelli committed May 22, 2024
1 parent 038c7e6 commit 2b1985d
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 96 deletions.
8 changes: 7 additions & 1 deletion data_bridges_utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,8 @@
"""
Wrapper for DataBridges client, making it easier to load data in Python, R and STATA.
"""

from .get_data import DataBridgesShapes
from .load_stata import load_stata
from .load_stata import load_stata

__all__ = ['DataBridgesShapes', 'load_stata']
119 changes: 53 additions & 66 deletions data_bridges_utils/get_data.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,39 @@
import time
import data_bridges_client
import logging
from datetime import timedelta, date
import pandas as pd
import yaml

from data_bridges_client.rest import ApiException
from pprint import pprint
from data_bridges_client.token import WfpApiToken
import yaml
import os
from datetime import timedelta
from datetime import date
import pandas as pd
import logging
import data_bridges_client

logger = logging.getLogger(__name__)

class DataBridgesShapes:
"""
Retrieves survey data using the specified configuration and access type.
Args:
survey_id (str): The ID of the survey to retrieve.
access_type (str): The type of access to use for retrieving the survey data.
Can be one of '', 'full', 'draft', 'official', or 'public'.
page_size (int, optional): The number of items to retrieve per page. Defaults to 200.
Returns:
pandas.DataFrame: A DataFrame containing the retrieved survey data.
"""


def __init__(self, yaml_config_path):
self.configuration = self.setup_configuration_and_authentication(yaml_config_path)

def __repr__(self):
return f"DataBridgesShapes(yamlpath='{self.configuration.host}')"
return "DataBridgesShapes(yamlpath='%s')" % self.configuration.host

def __str__(self):
return f"DataBridgesShapes(yamlpath='{self.configuration.host}') \n\n Brought to you with <3 by SHAPES \n\n"
return ("DataBridgesShapes(yamlpath='%s') \n\n Brought to you with <3 by SHAPES \n\n"
% self.configuration.host)

def setup_configuration_and_authentication(self, yaml_config_path):
"""Loads configuration from a YAML file and sets up authentication."""
Expand All @@ -39,25 +52,24 @@ def setup_configuration_and_authentication(self, yaml_config_path):

token = WfpApiToken(api_key=key, api_secret=secret)
configuration = data_bridges_client.Configuration(
host=host,
access_token=token.refresh(scopes=scopes)
host=host, access_token=token.refresh(scopes=scopes)
)
return configuration

def get_household_survey(self, survey_id, access_type, page_size=200):
"""Retrieves survey data using the specified configuration and access type.
Args:
survey_id (str): The ID of the survey to retrieve.
access_type (str): The type of access to use for retrieving the survey data. Can be one of '', 'full', 'draft', 'official', or 'public'.
access_type (str): The type of access to use for retrieving the survey data.
Can be one of '', 'full', 'draft', 'official', or 'public'.
page_size (int, optional): The number of items to retrieve per page. Defaults to 200.
Returns:
pandas.DataFrame: A DataFrame containing the retrieved survey data.
"""

responses = []
page_size = page_size
total_items = 1
max_item = 0
page = 0
Expand All @@ -77,17 +89,15 @@ def get_household_survey(self, survey_id, access_type, page_size=200):
'official': api_instance.household_official_use_base_data_get,
'public': api_instance.household_public_base_data_get
}.get(access_type)(survey_id=survey_id, page=page, env=env)
logger.info(f"Fetching page {page}")
logger.info(f"Items: {len(api_survey.items)}")
responses.extend(
[item for item in api_survey.items]
)
logger.info("Fetching page %s", page)
logger.info("Items: %s", len(api_survey.items))
responses.extend(api_survey.items)
total_items = api_survey.total_items
max_item = len(api_survey.items) + max_item
time.sleep(1)

except ApiException as e:
logger.error(f"Exception when calling Household data-> {access_type}{e}\n")
logger.error("Exception when calling Household data-> %s%s\n", access_type, e)
exit()

df = pd.DataFrame(responses)
Expand All @@ -99,7 +109,8 @@ def get_prices(self, country_iso3, survey_date, page_size=1000):
Args:
country_iso3 (str): The ISO 3-letter country code.
survey_date (str): The survey date in ISO format (e.g. '2022-01-01'). If an empty string is provided, the function will fetch data starting from January 1, 1990.
survey_date (str): The survey date in ISO format (e.g. '2022-01-01').
If an empty string is provided, the function will fetch data starting from Jan 1, 1990.
page_size (int, optional): The number of items to fetch per page. Defaults to 1000.
Returns:
Expand All @@ -111,34 +122,26 @@ def get_prices(self, country_iso3, survey_date, page_size=1000):
else:
start_date = date.fromisocalendar(1990, 1, 1)
responses = []
page_size = page_size
total_items = 20
max_item = 0
page = 0
while total_items > max_item:
page = page + 1 # int | page number for paged results (optional) (default to 1)
page += 1
with data_bridges_client.ApiClient(self.configuration) as api_client:
# Create an instance of the API class
api_instance = data_bridges_client.MarketPricesApi(api_client)
env = 'prod' # str | Environment. * `prod` - api.vam.wfp.org * `dev` - dev.api.vam.wfp.org (optional)
env = 'prod'

try:
# Provides the list of categories.
api_prices = api_instance.market_prices_price_monthly_get(
country_code=country_iso3,
format='JSON',
page=page,
env=env,
start_date=start_date)
responses.extend(
[item.to_dict() for item in api_prices.items]
country_code=country_iso3, format='JSON', page=page, env=env, start_date=start_date
)
responses.extend(item.to_dict() for item in api_prices.items)
total_items = api_prices.total_items
logger.info("Fetching page %s" % page)
logger.info("Fetching page %s", page)
max_item = page * page_size
time.sleep(1)
except ApiException as e:
logger.error("Exception when calling Market price data->market_prices_price_monthly_get: %s\n" % e)
logger.error("Exception when calling Market price data->market_prices_price_monthly_get: %s\n", e)
exit()
df = pd.DataFrame(responses)
return df
Expand All @@ -156,35 +159,26 @@ def get_exchange_rates(self, country_iso3, page_size=1000):
"""

responses = []

# Enter a context with an instance of the API client
page_size = page_size
total_items = 20
max_item = 0
page = 0
while total_items > max_item:
page = page + 1 # int | page number for paged results (optional) (default to 1)
page += 1
with data_bridges_client.ApiClient(self.configuration) as api_client:
# Create an instance of the API class
api_instance = data_bridges_client.CurrencyApi(api_client)
env = 'prod' # str | Environment. * `prod` - api.vam.wfp.org * `dev` - dev.api.vam.wfp.org (optional)
env = 'prod'

try:
# Provides the list of categories.
api_exchange_rates = api_instance.currency_usd_indirect_quotation_get(
country_iso3=country_iso3,
format='JSON',
page=page,
env=env)
responses.extend(
[item.to_dict() for item in api_exchange_rates.items]
country_iso3=country_iso3, format='JSON', page=page, env=env
)
responses.extend(item.to_dict() for item in api_exchange_rates.items)
total_items = api_exchange_rates.total_items
logger.info("Fetching page %s" % page)
logger.info("Fetching page %s", page)
max_item = page * page_size
time.sleep(1)
except ApiException as e:
logger.error("Exception when calling Exchange rates data->household_full_data_get: %s\n" % e)
logger.error("Exception when calling Exchange rates data->household_full_data_get: %s\n", e)
exit()
df = pd.DataFrame(responses)
return df
Expand All @@ -194,36 +188,29 @@ def get_ipc_equivalent(self, country_iso3: str, year: int, page_size=1000):
Retrieves food security data for a given country ISO3 code from the Data Bridges API.
"""
responses = []
page_size = page_size
total_items = 20
max_item = 0
page = 0
while total_items > max_item:
page = page + 1 # int | page number for paged results (optional) (default to 1)
page += 1
with data_bridges_client.ApiClient(self.configuration) as api_client:
# Create an instance of the API class
api_instance = data_bridges_client.FoodSecurityApi(api_client)
env = 'prod' # str | Environment. * `prod` - api.vam.wfp.org * `dev` - dev.api.vam.wfp.org (optional)
env = 'prod'

try:
# Provides the list of categories.
api_food_security = api_instance.food_security_list_get(
country_iso3=country_iso3,
year = year,
page=page,
env=env,
)
responses.extend(
[item.to_dict() for item in api_food_security.items]
country_iso3=country_iso3, year=year, page=page, env=env,
)
responses.extend(item.to_dict() for item in api_food_security.items)
total_items = api_food_security.total_items
logger.info("Fetching page %s" % page)
logger.info("Fetching page %s", page)
max_item = page * page_size
time.sleep(1)
except ApiException as e:
logger.error("Exception when calling Food security data->food_security_list_get: %s\n" % e)
logger.error("Exception when calling Food security data->food_security_list_get: %s\n", e)
df = pd.DataFrame(responses)
return df


if __name__ == "__main__":
pass
22 changes: 1 addition & 21 deletions sample_R_load.R
Original file line number Diff line number Diff line change
@@ -1,21 +1 @@
# Load the reticulate package
library(reticulate)

# Set the path to your Python script
py_script <- "path/to/sample_python_load.py"

# Source the Python script
source_python(py_script)

# Call the Python functions
config_path <- "path/to/data_bridges_api_config.yaml"
survey_id <- 3329
access_type <- "full"
country_iso3 <- "AFG"
survey_date <- "2022-01-01"

# Get survey data
survey_data <- get_survey_data(config_path, survey_id, access_type)

# Get price data
price_data <- get_price_data(config_path, country_iso3, survey_date)
# TODO: Load into R dataframe
2 changes: 0 additions & 2 deletions sample_python_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,3 @@
# TODO: other API calls, including GORP and IPC
# food_sec = client.get_ipc_equivalent("AFG", 2023)
# print(food_sec.head())


13 changes: 7 additions & 6 deletions sample_stata_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
Only works if user has STATA 18+ installed and added to PATH.
"""

from data_bridges_utils.get_data import shapes_get_survey
from data_bridges_utils.load_stata import test_load_stata
from data_bridges_utils import DataBridgesShapes, load_stata

config_path = r"data_bridges_api_config.yaml"
CONFIG_PATH = r"data_bridges_api_config.yaml"

survey_data = shapes_get_survey(surveyid=3329, yamlpath=config_path, access_type='full')
price_data = shapes_get_prices(country_iso3='AFG', yamlpath=config_path, survey_date='2024-12-01')
exchange_rates_data = shapes_get_exchangerates(country_iso3='AFG', yamlpath=config_path)
# Initialize DataBridges client with credentials from YAML file
client = DataBridgesShapes(CONFIG_PATH)

# Get houhold data for survey id
survey_data = client.get_household_survey(survey_id=3329, access_type='full')

# Load into STATA dataframe
ds = load_stata(survey_data)

0 comments on commit 2b1985d

Please sign in to comment.