Skip to content

Commit

Permalink
Upgrade package to use pymc4 (#63)
Browse files Browse the repository at this point in the history
* Replaced all theano calls with the respective aesara calls. First steps
to upgrade to pymc4

* Replaced last occurrences of theano in the docstrings

* Updated example script: one_bundesland

* Further work on example script, also moved plotting routines into folder.

* Moved timeseries and distribution plot into own files. Also refactored to use inferenceData

* Rerun one_bundelsand example script.

* Added watermark to example_one_bundesland notebook

* Updated example bundeslaender script

* Rerun what if notebook

* Rerun paper scenario

* Fixed docs

Co-authored-by: semohr <[email protected]>
  • Loading branch information
semohr and semohr authored Jul 12, 2022
1 parent faffb69 commit 700aaab
Show file tree
Hide file tree
Showing 41 changed files with 3,294 additions and 2,487 deletions.
2 changes: 2 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ python:
- "3.6"
- "3.7"
- "3.8"
- "3.9"
- "3.10"

install:
- pip install --upgrade pip
Expand Down
2 changes: 1 addition & 1 deletion covid19_inference/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

# from .data_retrieval import GOOGLE
from . import data_retrieval
from . import plot
from .plot import *
from . import model
from .model import Cov19Model
from .sampling import robust_sample
Expand Down
27 changes: 11 additions & 16 deletions covid19_inference/_dev_helper.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@
# ------------------------------------------------------------------------------ #
# @Author: F. Paul Spitzner
# @Email: [email protected]
# @Created: 2020-04-21 08:57:53
# @Last Modified: 2020-05-20 10:48:23
# ------------------------------------------------------------------------------ #
# Let's have a dummy instance of model and trace so we can play around with the
# interface and plotting.
# Analog to example_bundeslaender.ipynb
Expand All @@ -12,7 +7,7 @@
import logging

import numpy as np
import pymc3 as pm
import pymc as pm
import datetime

from .model import *
Expand All @@ -23,14 +18,14 @@

def create_example_instance(num_change_points=3):
"""
Parameters
----------
num_change_points : int
the standard change points to inlcude, at most 3
Retruns
-------
(model, trace) with example data
Parameters
----------
num_change_points : int
the standard change points to inlcude, at most 3
Retruns
-------
(model, trace) with example data
"""

jhu = data_retrieval.JHU()
Expand Down Expand Up @@ -98,6 +93,6 @@ def create_example_instance(num_change_points=3):
student_t_likelihood(new_cases_inferred)

# make it fast
trace = pm.sample(model=model, tune=1, draws=1)
idata = pm.sample(model=model, tune=1, draws=1)

return model, trace
return model, idata
2 changes: 1 addition & 1 deletion covid19_inference/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.3.6"
__version__ = "0.4.0"
98 changes: 49 additions & 49 deletions covid19_inference/data_retrieval/_OWD.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,39 +132,39 @@ def _to_iso(self):

def get_possible_countries(self):
"""
Can be used to obtain all different possible countries in the dataset.
Can be used to obtain all different possible countries in the dataset.
Returns
-------
: pandas.DataFrame
Returns
-------
: pandas.DataFrame
"""
return self.data["country"].unique()

def get_total(self, value="tests", country=None, data_begin=None, data_end=None):
"""
Retrieves all new cases from the Our World in Data dataset as a DataFrame with datetime index.
Can be filtered by value, country and state, if only a country is given all available states get summed up.
Parameters
----------
value: str
Which data to return, possible values are
- "confirmed",
- "tests",
- "deaths",
- "vacination"
(default: "confirmed")
country : str
name of the country
begin_date : datetime.datetime, optional
intial date for the returned data, if no value is given the first date in the dataset is used
end_date : datetime.datetime, optional
last date for the returned data, if no value is given the most recent date in the dataset is used
Returns
-------
: pandas.DataFrame
table with new cases and the date as index
Retrieves all new cases from the Our World in Data dataset as a DataFrame with datetime index.
Can be filtered by value, country and state, if only a country is given all available states get summed up.
Parameters
----------
value: str
Which data to return, possible values are
- "confirmed",
- "tests",
- "deaths",
- "vacination"
(default: "confirmed")
country : str
name of the country
begin_date : datetime.datetime, optional
intial date for the returned data, if no value is given the first date in the dataset is used
end_date : datetime.datetime, optional
last date for the returned data, if no value is given the most recent date in the dataset is used
Returns
-------
: pandas.DataFrame
table with new cases and the date as index
"""
if value not in ["confirmed", "deaths", "tests", "vacinations"]:
log.warning(
Expand All @@ -188,28 +188,28 @@ def get_total(self, value="tests", country=None, data_begin=None, data_end=None)

def get_new(self, value="tests", country=None, data_begin=None, data_end=None):
"""
Retrieves all new cases from the Our World in Data dataset as a DataFrame with datetime index.
casesn be filtered by value, country and state, if only a country is given all available states get summed up.
Parameters
----------
value: str
Which data to return, possible values are
- "confirmed",
- "tests",
- "deaths"
(default: "confirmed")
country : str
name of the country
begin_date : datetime.datetime, optional
intial date for the returned data, if no value is given the first date in the dataset is used
end_date : datetime.datetime, optional
last date for the returned data, if no value is given the most recent date in the dataset is used
Returns
-------
: pandas.DataFrame
table with new cases and the date as index
Retrieves all new cases from the Our World in Data dataset as a DataFrame with datetime index.
casesn be filtered by value, country and state, if only a country is given all available states get summed up.
Parameters
----------
value: str
Which data to return, possible values are
- "confirmed",
- "tests",
- "deaths"
(default: "confirmed")
country : str
name of the country
begin_date : datetime.datetime, optional
intial date for the returned data, if no value is given the first date in the dataset is used
end_date : datetime.datetime, optional
last date for the returned data, if no value is given the most recent date in the dataset is used
Returns
-------
: pandas.DataFrame
table with new cases and the date as index
"""
if value not in ["confirmed", "deaths", "tests", "vacinations"]:
log.warning(
Expand Down
70 changes: 35 additions & 35 deletions covid19_inference/data_retrieval/_OxCGRT.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,21 +134,21 @@ def _to_iso(self):

def get_possible_countries(self):
"""
Can be used to obtain all different possible countries in the dataset.
Can be used to obtain all different possible countries in the dataset.
Returns
-------
: pandas.DataFrame
Returns
-------
: pandas.DataFrame
"""
return self.data["country"].unique()

def get_possible_policies(self):
"""
Can be used to obtain all policies in there corresponding categories possible countries in the dataset.
Can be used to obtain all policies in there corresponding categories possible countries in the dataset.
Returns
-------
: dict
Returns
-------
: dict
"""

ret = dict()
Expand Down Expand Up @@ -190,20 +190,20 @@ def get_possible_policies(self):

def get_change_points(self, policies, country):
"""
Returns a list of change points, depending on the selected measure and country.
Returns a list of change points, depending on the selected measure and country.
Parameters
----------
policies : str, array of str
The wanted policies. Can be an array of strings, use get_possible_policies() to get
a dict of possible policies.
Parameters
----------
policies : str, array of str
The wanted policies. Can be an array of strings, use get_possible_policies() to get
a dict of possible policies.
country : str
Filter for country, use get_possible_countries() to get a list of possible ones.
country : str
Filter for country, use get_possible_countries() to get a list of possible ones.
Returns
-------
:array of dicts
Returns
-------
:array of dicts
"""

if isinstance(policies, str):
Expand Down Expand Up @@ -236,22 +236,22 @@ def get_change_points(self, policies, country):

def get_time_data(self, policy, country, data_begin=None, data_end=None):
"""
Parameters
----------
policy : str
The wanted policy.
country : str
Filter for country, use get_possible_countries() to get a list of possible ones.
data_begin : datetime.datetime, optional
intial date for the returned data, if no value is given the first date in the dataset is used,
if none is given could yield errors
data_end : datetime.datetime, optional
last date for the returned data, if no value is given the most recent date in the dataset is used
Returns
-------
:
Pandas dataframe with policy
Parameters
----------
policy : str
The wanted policy.
country : str
Filter for country, use get_possible_countries() to get a list of possible ones.
data_begin : datetime.datetime, optional
intial date for the returned data, if no value is given the first date in the dataset is used,
if none is given could yield errors
data_end : datetime.datetime, optional
last date for the returned data, if no value is given the most recent date in the dataset is used
Returns
-------
:
Pandas dataframe with policy
"""
if data_begin is None:
data_begin = self.__get_first_date()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,11 @@ def _to_iso(self):

def helper(df):
try:
df = df.rename(columns={"DATE": "date",})
df = df.rename(
columns={
"DATE": "date",
}
)
df["date"] = pd.to_datetime(df["date"])
df = df.set_index("date")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,11 @@ def _to_iso(self):

def helper(df):
try:
df = df.rename(columns={"datum": "date",})
df = df.rename(
columns={
"datum": "date",
}
)
df["date"] = pd.to_datetime(df["date"])
df = df.set_index("date")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def get_total(
last date for the returned data, if no value is given the most recent date in the dataset is used
age_group : str, optional
Possible are '0-4', '5-14', '15-24', '25-34', '35-44', '45-54', '55-64', '65-'
Returns
-------
: pandas.DataFrame
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def get_total(
last date for the returned data, if no value is given the most recent date in the dataset is used
age_group : str, optional
Possible are '0-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70-79', '80-'
Returns
-------
: pandas.DataFrame
Expand Down Expand Up @@ -199,7 +199,7 @@ def get_new(
last date for the returned data, if no value is given the most recent date in the dataset is used
age_group : str, optional
Possible are '0-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70-79', '80-'
Returns
-------
: pandas.DataFrame
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,12 @@ def download_all_available_data(self, force_local=False, force_download=False):
def _to_iso(self):
df = self.data

df = df.rename(columns={"Date_file": "date_sub", "Date_statistics": "date",})
df = df.rename(
columns={
"Date_file": "date_sub",
"Date_statistics": "date",
}
)
df["date"] = pd.to_datetime(df["date"], format="%Y-%m-%d %H:%M:%S")
df["date_sub"] = pd.to_datetime(df["date_sub"], format="%Y-%m-%d")
df = df.set_index("date")
Expand Down Expand Up @@ -146,7 +151,7 @@ def get_new(
last date for the returned data, if no value is given the most recent date in the dataset is used
age_group : str, optional
Possible are '0-9','10-19','20-29','30-39','40-49', '50-59','60-69','70-79', '80-89','90+','<50', 'Unknown'
Returns
-------
: pandas.DataFrame
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def get_total(
last date for the returned data, if no value is given the most recent date in the dataset is used
age_group : str, optional
Possible are '0-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70-79', '80-'
Returns
-------
: pandas.DataFrame
Expand Down
Loading

0 comments on commit 700aaab

Please sign in to comment.