Upgrade package to use pymc4 (#63)

* Replaced all theano calls with the respective aesara calls. First steps to upgrade to pymc4 * Replaced last occurrences of theano in the docstrings * Updated example script: one_bundesland * Further work on example script, also moved plotting routines into folder. * Moved timeseries and distribution plot into own files. Also refactored to use inferenceData * Rerun one_bundelsand example script. * Added watermark to example_one_bundesland notebook * Updated example bundeslaender script * Rerun what if notebook * Rerun paper scenario * Fixed docs Co-authored-by: semohr <[email protected]>
Priesemann-Group · Jul 12, 2022 · 700aaab · 700aaab
1 parent faffb69
commit 700aaab
Show file tree

Hide file tree

Showing 41 changed files with 3,294 additions and 2,487 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -21,6 +21,8 @@ python:
   - "3.6"
   - "3.7"
   - "3.8"
+  - "3.9"
+  - "3.10"
 
 install:
   - pip install --upgrade pip

diff --git a/covid19_inference/__init__.py b/covid19_inference/__init__.py
@@ -12,7 +12,7 @@
 
 # from .data_retrieval import GOOGLE
 from . import data_retrieval
-from . import plot
+from .plot import *
 from . import model
 from .model import Cov19Model
 from .sampling import robust_sample

diff --git a/covid19_inference/_dev_helper.py b/covid19_inference/_dev_helper.py
@@ -1,9 +1,4 @@
 # ------------------------------------------------------------------------------ #
-# @Author:        F. Paul Spitzner
-# @Email:         [email protected]
-# @Created:       2020-04-21 08:57:53
-# @Last Modified: 2020-05-20 10:48:23
-# ------------------------------------------------------------------------------ #
 # Let's have a dummy instance of model and trace so we can play around with the
 # interface and plotting.
 # Analog to example_bundeslaender.ipynb
@@ -12,7 +7,7 @@
 import logging
 
 import numpy as np
-import pymc3 as pm
+import pymc as pm
 import datetime
 
 from .model import *
@@ -23,14 +18,14 @@
 
 def create_example_instance(num_change_points=3):
     """
-        Parameters
-        ----------
-            num_change_points : int
-                the standard change points to inlcude, at most 3
-
-        Retruns
-        -------
-            (model, trace) with example data
+    Parameters
+    ----------
+        num_change_points : int
+            the standard change points to inlcude, at most 3
+
+    Retruns
+    -------
+        (model, trace) with example data
     """
 
     jhu = data_retrieval.JHU()
@@ -98,6 +93,6 @@ def create_example_instance(num_change_points=3):
         student_t_likelihood(new_cases_inferred)
 
     # make it fast
-    trace = pm.sample(model=model, tune=1, draws=1)
+    idata = pm.sample(model=model, tune=1, draws=1)
 
-    return model, trace
+    return model, idata
diff --git a/covid19_inference/_version.py b/covid19_inference/_version.py
@@ -1 +1 @@
-__version__ = "0.3.6" 
+__version__ = "0.4.0"
diff --git a/covid19_inference/data_retrieval/_OWD.py b/covid19_inference/data_retrieval/_OWD.py
@@ -132,39 +132,39 @@ def _to_iso(self):
 
     def get_possible_countries(self):
         """
-            Can be used to obtain all different possible countries in the dataset.
+        Can be used to obtain all different possible countries in the dataset.
 
-            Returns
-            -------
-            : pandas.DataFrame
+        Returns
+        -------
+        : pandas.DataFrame
         """
         return self.data["country"].unique()
 
     def get_total(self, value="tests", country=None, data_begin=None, data_end=None):
         """
-            Retrieves all new cases from the Our World in Data dataset as a DataFrame with datetime index.
-            Can be filtered by value, country and state, if only a country is given all available states get summed up.
-
-            Parameters
-            ----------
-            value: str
-                Which data to return, possible values are
-                - "confirmed",
-                - "tests",
-                - "deaths",
-                - "vacination"
-                (default: "confirmed")
-            country : str
-                name of the country
-            begin_date : datetime.datetime, optional
-                intial date for the returned data, if no value is given the first date in the dataset is used
-            end_date : datetime.datetime, optional
-                last date for the returned data, if no value is given the most recent date in the dataset is used
-
-            Returns
-            -------
-            : pandas.DataFrame
-                table with new cases and the date as index
+        Retrieves all new cases from the Our World in Data dataset as a DataFrame with datetime index.
+        Can be filtered by value, country and state, if only a country is given all available states get summed up.
+
+        Parameters
+        ----------
+        value: str
+            Which data to return, possible values are
+            - "confirmed",
+            - "tests",
+            - "deaths",
+            - "vacination"
+            (default: "confirmed")
+        country : str
+            name of the country
+        begin_date : datetime.datetime, optional
+            intial date for the returned data, if no value is given the first date in the dataset is used
+        end_date : datetime.datetime, optional
+            last date for the returned data, if no value is given the most recent date in the dataset is used
+
+        Returns
+        -------
+        : pandas.DataFrame
+            table with new cases and the date as index
         """
         if value not in ["confirmed", "deaths", "tests", "vacinations"]:
             log.warning(
@@ -188,28 +188,28 @@ def get_total(self, value="tests", country=None, data_begin=None, data_end=None)
 
     def get_new(self, value="tests", country=None, data_begin=None, data_end=None):
         """
-            Retrieves all new cases from the Our World in Data dataset as a DataFrame with datetime index.
-            casesn be filtered by value, country and state, if only a country is given all available states get summed up.
-
-            Parameters
-            ----------
-            value: str
-                Which data to return, possible values are
-                - "confirmed",
-                - "tests",
-                - "deaths"
-                (default: "confirmed")
-            country : str
-                name of the country
-            begin_date : datetime.datetime, optional
-                intial date for the returned data, if no value is given the first date in the dataset is used
-            end_date : datetime.datetime, optional
-                last date for the returned data, if no value is given the most recent date in the dataset is used
-
-            Returns
-            -------
-            : pandas.DataFrame
-                table with new cases and the date as index
+        Retrieves all new cases from the Our World in Data dataset as a DataFrame with datetime index.
+        casesn be filtered by value, country and state, if only a country is given all available states get summed up.
+
+        Parameters
+        ----------
+        value: str
+            Which data to return, possible values are
+            - "confirmed",
+            - "tests",
+            - "deaths"
+            (default: "confirmed")
+        country : str
+            name of the country
+        begin_date : datetime.datetime, optional
+            intial date for the returned data, if no value is given the first date in the dataset is used
+        end_date : datetime.datetime, optional
+            last date for the returned data, if no value is given the most recent date in the dataset is used
+
+        Returns
+        -------
+        : pandas.DataFrame
+            table with new cases and the date as index
         """
         if value not in ["confirmed", "deaths", "tests", "vacinations"]:
             log.warning(

diff --git a/covid19_inference/data_retrieval/_OxCGRT.py b/covid19_inference/data_retrieval/_OxCGRT.py
@@ -134,21 +134,21 @@ def _to_iso(self):
 
     def get_possible_countries(self):
         """
-            Can be used to obtain all different possible countries in the dataset.
+        Can be used to obtain all different possible countries in the dataset.
 
-            Returns
-            -------
-            : pandas.DataFrame
+        Returns
+        -------
+        : pandas.DataFrame
         """
         return self.data["country"].unique()
 
     def get_possible_policies(self):
         """
-            Can be used to obtain all policies in there corresponding categories possible countries in the dataset.
+        Can be used to obtain all policies in there corresponding categories possible countries in the dataset.
 
-            Returns
-            -------
-            : dict
+        Returns
+        -------
+        : dict
         """
 
         ret = dict()
@@ -190,20 +190,20 @@ def get_possible_policies(self):
 
     def get_change_points(self, policies, country):
         """
-            Returns a list of change points, depending on the selected measure and country.
+        Returns a list of change points, depending on the selected measure and country.
 
-            Parameters
-            ----------
-            policies : str, array of str
-                The wanted policies. Can be an array of strings, use get_possible_policies() to get
-                a dict of possible policies.
+        Parameters
+        ----------
+        policies : str, array of str
+            The wanted policies. Can be an array of strings, use get_possible_policies() to get
+            a dict of possible policies.
 
-            country : str
-                Filter for country, use get_possible_countries() to get a list of possible ones.
+        country : str
+            Filter for country, use get_possible_countries() to get a list of possible ones.
 
-            Returns
-            -------
-            :array of dicts
+        Returns
+        -------
+        :array of dicts
         """
 
         if isinstance(policies, str):
@@ -236,22 +236,22 @@ def get_change_points(self, policies, country):
 
     def get_time_data(self, policy, country, data_begin=None, data_end=None):
         """
-            Parameters
-            ----------
-            policy : str
-                The wanted policy.
-            country : str
-                Filter for country, use get_possible_countries() to get a list of possible ones.
-            data_begin : datetime.datetime, optional
-                intial date for the returned data, if no value is given the first date in the dataset is used,
-                if none is given could yield errors
-            data_end : datetime.datetime, optional
-                last date for the returned data, if no value is given the most recent date in the dataset is used
-
-            Returns
-            -------
-            :
-                Pandas dataframe with policy
+        Parameters
+        ----------
+        policy : str
+            The wanted policy.
+        country : str
+            Filter for country, use get_possible_countries() to get a list of possible ones.
+        data_begin : datetime.datetime, optional
+            intial date for the returned data, if no value is given the first date in the dataset is used,
+            if none is given could yield errors
+        data_end : datetime.datetime, optional
+            last date for the returned data, if no value is given the most recent date in the dataset is used
+
+        Returns
+        -------
+        :
+            Pandas dataframe with policy
         """
         if data_begin is None:
             data_begin = self.__get_first_date()

diff --git a/covid19_inference/data_retrieval/countries/_belgium_epistat_wiv_isp.py b/covid19_inference/data_retrieval/countries/_belgium_epistat_wiv_isp.py
@@ -363,7 +363,11 @@ def _to_iso(self):
 
         def helper(df):
             try:
-                df = df.rename(columns={"DATE": "date",})
+                df = df.rename(
+                    columns={
+                        "DATE": "date",
+                    }
+                )
                 df["date"] = pd.to_datetime(df["date"])
                 df = df.set_index("date")
 

diff --git a/covid19_inference/data_retrieval/countries/_czechia_onemoceni.py b/covid19_inference/data_retrieval/countries/_czechia_onemoceni.py
@@ -336,7 +336,11 @@ def _to_iso(self):
 
         def helper(df):
             try:
-                df = df.rename(columns={"datum": "date",})
+                df = df.rename(
+                    columns={
+                        "datum": "date",
+                    }
+                )
                 df["date"] = pd.to_datetime(df["date"])
                 df = df.set_index("date")
 

diff --git a/covid19_inference/data_retrieval/countries/_ireland_data_gov_ie.py b/covid19_inference/data_retrieval/countries/_ireland_data_gov_ie.py
@@ -162,7 +162,7 @@ def get_total(
             last date for the returned data, if no value is given the most recent date in the dataset is used
         age_group : str, optional
             Possible are '0-4', '5-14', '15-24', '25-34', '35-44', '45-54', '55-64', '65-'
-            
+
         Returns
         -------
         : pandas.DataFrame

diff --git a/covid19_inference/data_retrieval/countries/_latvia_data_gov_lv.py b/covid19_inference/data_retrieval/countries/_latvia_data_gov_lv.py
@@ -163,7 +163,7 @@ def get_total(
             last date for the returned data, if no value is given the most recent date in the dataset is used
         age_group : str, optional
             Possible are '0-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70-79', '80-'
-            
+
         Returns
         -------
         : pandas.DataFrame
@@ -199,7 +199,7 @@ def get_new(
             last date for the returned data, if no value is given the most recent date in the dataset is used
         age_group : str, optional
             Possible are '0-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70-79', '80-'
-            
+
         Returns
         -------
         : pandas.DataFrame

diff --git a/covid19_inference/data_retrieval/countries/_netherlands_data_rvim_nl.py b/covid19_inference/data_retrieval/countries/_netherlands_data_rvim_nl.py
@@ -114,7 +114,12 @@ def download_all_available_data(self, force_local=False, force_download=False):
     def _to_iso(self):
         df = self.data
 
-        df = df.rename(columns={"Date_file": "date_sub", "Date_statistics": "date",})
+        df = df.rename(
+            columns={
+                "Date_file": "date_sub",
+                "Date_statistics": "date",
+            }
+        )
         df["date"] = pd.to_datetime(df["date"], format="%Y-%m-%d %H:%M:%S")
         df["date_sub"] = pd.to_datetime(df["date_sub"], format="%Y-%m-%d")
         df = df.set_index("date")
@@ -146,7 +151,7 @@ def get_new(
             last date for the returned data, if no value is given the most recent date in the dataset is used
         age_group : str, optional
             Possible are '0-9','10-19','20-29','30-39','40-49', '50-59','60-69','70-79', '80-89','90+','<50', 'Unknown'
-            
+
         Returns
         -------
         : pandas.DataFrame

diff --git a/covid19_inference/data_retrieval/countries/_portugal_dgs.py b/covid19_inference/data_retrieval/countries/_portugal_dgs.py
@@ -157,7 +157,7 @@ def get_total(
             last date for the returned data, if no value is given the most recent date in the dataset is used
         age_group : str, optional
             Possible are '0-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70-79', '80-'
-            
+
         Returns
         -------
         : pandas.DataFrame