From 9ef243038883bf58ae897fb204346bd51392c2a1 Mon Sep 17 00:00:00 2001
From: Knut Rand <knutdrand@gmail.com>
Date: Thu, 19 Sep 2024 13:30:41 +0200
Subject: [PATCH] synch

---
 .gitignore                                    |  1 +
 climate_health/assessment/forecast.py         | 28 +++++-
 .../assessment/prediction_evaluator.py        | 85 ++++++++++++++--
 climate_health/climate_data/gridded_data.py   | 40 ++++++++
 climate_health/climate_predictor.py           |  8 +-
 .../data/gluonts_adaptor/dataset.py           |  1 +
 .../external/models/flax_models/flax_model.py |  1 +
 climate_health/rest_api.py                    | 23 +----
 climate_health/rest_api_src/data_models.py    | 27 +++++
 .../rest_api_src/generate_rest_api.py         | 56 +++++++++++
 climate_health/runners/docker_runner.py       |  4 +-
 .../temporal_dataclass.py                     |  5 +-
 .../time_period/date_util_wrapper.py          | 15 ++-
 scripts/analyze_open_dengue.py                | 11 +++
 scripts/clean_laos_data2.py                   | 99 +++++++++++++++++++
 scripts/explorations/xarray_gee.py            | 28 ++++++
 scripts/external_model_example.py             | 37 ++++---
 scripts/prediction_example.py                 | 15 +++
 tests/test_climate_predictor.py               |  6 +-
 tests/test_forecast.py                        | 15 ++-
 tests/test_gridded_data.py                    | 18 ++++
 21 files changed, 466 insertions(+), 57 deletions(-)
 create mode 100644 climate_health/climate_data/gridded_data.py
 create mode 100644 climate_health/rest_api_src/data_models.py
 create mode 100644 climate_health/rest_api_src/generate_rest_api.py
 create mode 100644 scripts/analyze_open_dengue.py
 create mode 100644 scripts/clean_laos_data2.py
 create mode 100644 scripts/explorations/xarray_gee.py
 create mode 100644 scripts/prediction_example.py
 create mode 100644 tests/test_gridded_data.py

diff --git a/.gitignore b/.gitignore
index e83d6279..e30b3906 100644
--- a/.gitignore
+++ b/.gitignore
@@ -130,3 +130,4 @@ climate_health/web_interface/yarn-error.log*
 
 climate_health/web_interface/node_modules/
 
+/scripts/runs/
diff --git a/climate_health/assessment/forecast.py b/climate_health/assessment/forecast.py
index 996d95a5..ef162900 100644
--- a/climate_health/assessment/forecast.py
+++ b/climate_health/assessment/forecast.py
@@ -1,7 +1,13 @@
+import pandas as pd
+from matplotlib import pyplot as plt
+
 from climate_health.assessment.dataset_splitting import train_test_split_with_weather
+from climate_health.assessment.prediction_evaluator import Estimator, Predictor
+from climate_health.climate_predictor import MonthlyClimatePredictor
+from climate_health.data.gluonts_adaptor.dataset import ForecastAdaptor
 from climate_health.plotting.prediction_plot import plot_forecast_from_summaries
 from climate_health.spatio_temporal_data.temporal_dataclass import DataSet
-from climate_health.time_period.date_util_wrapper import TimeDelta, Month
+from climate_health.time_period.date_util_wrapper import TimeDelta, Month, PeriodRange
 import logging
 
 logger = logging.getLogger(__name__)
@@ -41,3 +47,23 @@ def multi_forecast(model, dataset: DataSet, prediction_lenght: TimeDelta, pre_tr
         cur_dataset, _, _ = train_test_split_with_weather(cur_dataset, split_period)
     logger.info(f'Forecasting {prediction_lenght} months into the future on {len(datasets)} datasets')
     return (forecast(model, dataset, prediction_lenght) for dataset in datasets[::-1])
+
+
+def forecast_ahead(estimator: Estimator, dataset: DataSet, prediction_length: int):
+    '''
+    Forecast n_months into the future using the model
+    '''
+    logger.info(f'Forecasting {prediction_length} months into the future')
+    train_data = dataset
+    predictor = estimator.train(train_data)
+    return forecast_with_predicted_weather(predictor, train_data, prediction_length, )
+
+
+def forecast_with_predicted_weather(predictor: Predictor, historic_data: DataSet, prediction_length: int, ):
+    prediction_range = PeriodRange.from_start_and_n_periods(
+        Month(historic_data.end_timestamp).to_string(), prediction_length)
+    climate_predictor = MonthlyClimatePredictor()
+    climate_predictor.train(historic_data)
+    future_weather = climate_predictor.predict(prediction_range)
+    predictions = predictor.predict(historic_data, future_weather)
+    return predictions
diff --git a/climate_health/assessment/prediction_evaluator.py b/climate_health/assessment/prediction_evaluator.py
index 8e27cf35..e9dcc74f 100644
--- a/climate_health/assessment/prediction_evaluator.py
+++ b/climate_health/assessment/prediction_evaluator.py
@@ -1,6 +1,6 @@
 from collections import defaultdict
 from dataclasses import dataclass
-from typing import Protocol, TypeVar
+from typing import Protocol, TypeVar, Iterable, Dict
 
 from gluonts.evaluation import Evaluator
 from gluonts.model import Forecast
@@ -111,6 +111,26 @@ def train(self, data: DataSet) -> Predictor:
 
 
 def evaluate_model(estimator: Estimator, data: DataSet, prediction_length=3, n_test_sets=4, report_filename=None):
+    '''
+    Evaluate a model on a dataset on a held out test set, making multiple predictions on the test set
+    using the same trained model
+
+    Parameters
+    ----------
+    estimator : Estimator
+        The estimator to train and evaluate
+    data : DataSet
+        The data to train and evaluate on
+    prediction_length : int
+        The number of periods to predict ahead
+    n_test_sets : int
+        The number of test sets to evaluate on
+
+    Returns
+    -------
+    tuple
+        Summary and individual evaluation results
+    '''
     train, test_generator = train_test_generator(data, prediction_length, n_test_sets)
     predictor = estimator.train(data)
     truth_data = {
@@ -123,10 +143,42 @@ def evaluate_model(estimator: Estimator, data: DataSet, prediction_length=3, n_t
     evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
     results = evaluator(tss, forecast_list)
     return results
+
+
+def evaluate_multi_model(estimator: Estimator, data: list[DataSet], prediction_length=3, n_test_sets=4,
+                         report_base_name=None):
+    trains, test_geneartors = zip(*[train_test_generator(d, prediction_length, n_test_sets) for d in data])
+    predictor = estimator.multi_train(trains)
+    result_list = []
+    for i, (data, test_generator) in enumerate(zip(data, test_geneartors)):
+        truth_data = {
+            location: pd.DataFrame(data[location].disease_cases, index=data[location].time_period.to_period_index()) for
+            location in data.keys()}
+        if report_base_name is not None:
+            _, plot_test_generatro = train_test_generator(data, prediction_length, n_test_sets)
+            plot_forecasts(predictor, plot_test_generatro, truth_data, f'{report_base_name}_i.pdf')
+        forecast_list, tss = _get_forecast_generators(predictor, test_generator, truth_data)
+        evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
+        results = evaluator(tss, forecast_list)
+        result_list.append(results)
+    return results
     # forecasts = ((predictor.predict(*test_pair[:2]), test_pair[2]) for test_pair in test_generator)
 
 
-def _get_forecast_generators(predictor, test_generator, truth_data) -> tuple[list[Forecast], list[pd.DataFrame]]:
+def _get_forecast_generators(predictor: Predictor, test_generator: Iterable[tuple[DataSet, DataSet, DataSet]], truth_data: Dict[str, pd.DataFrame]) -> tuple[list[Forecast], list[pd.DataFrame]]:
+    '''
+    Get the forecast and truth data for a predictor and test generator.
+    One entry is a combination of prediction start period and location
+
+    Parameters
+    ----------
+    predictor : Predictor
+        The predictor to evaluate
+    test_generator : Iterable[tuple[DataSet, DataSet, DataSet]]
+        The test generator to generate test data
+    truth_data : dict[str, pd.DataFrame]
+        The truth data for the locations
+    '''
     tss = []
     forecast_list = []
     for historic_data, future_data, _ in test_generator:
@@ -144,11 +196,14 @@ def _get_forecast_dict(predictor: Predictor, test_generator) -> dict[str, list[F
     forecast_dict = defaultdict(list)
 
     for historic_data, future_data, _ in test_generator:
+        assert len(
+            future_data.period_range) > 0, f'Future data must have at least one period {historic_data.period_range}, {future_data.period_range}'
         forecasts = predictor.predict(historic_data, future_data)
         for location, samples in forecasts.items():
             forecast_dict[location].append(ForecastAdaptor.from_samples(samples))
     return forecast_dict
 
+
 def get_forecast_df(predictor: Predictor, test_generator) -> pd.DataFrame:
     forecast_dict = _get_forecast_dict(predictor, test_generator)
     dfs = []
@@ -158,20 +213,17 @@ def get_forecast_df(predictor: Predictor, test_generator) -> pd.DataFrame:
 
     return forecast_df
 
+
 def plot_forecasts(predictors: list[Predictor], test_instance, truth, pdf_filename):
     forecast_dicts = [_get_forecast_dict(predictor, test_instance) for predictor in predictors]
     with PdfPages(pdf_filename) as pdf:
         for location in forecast_dicts[0].keys():
             _t = truth[location]
             for forecast_dict in forecast_dicts:
-                fig = plt.subplots(figsize=(8, 4),ncols=len(forecast_dict))
+                fig = plt.subplots(figsize=(8, 4), ncols=len(forecast_dict))
                 for i in range(len(forecast_dict[location])):
                     forecast = forecast_dict[location][i]
 
-
-
-
-
                 # plt.figure(figsize=(8, 4))  # Set the figure size
                 # t = _t[_t.index <= forecast.index[-1]]
                 # forecast.plot(show_label=True)
@@ -182,7 +234,6 @@ def plot_forecasts(predictors: list[Predictor], test_instance, truth, pdf_filena
                 # plt.close()  # Close the figure
 
 
-
 def plot_forecasts(predictor, test_instance, truth, pdf_filename):
     forecast_dict = _get_forecast_dict(predictor, test_instance)
     with PdfPages(pdf_filename) as pdf:
@@ -199,13 +250,29 @@ def plot_forecasts(predictor, test_instance, truth, pdf_filename):
                 plt.close()  # Close the figure
 
 
+def plot_predictions(predictions: DataSet[Samples], truth: DataSet, pdf_filename):
+    truth_dict = {location: pd.DataFrame(truth[location].disease_cases, index=truth[location].time_period.to_period_index())
+                  for location in truth.keys()}
+    with PdfPages(pdf_filename) as pdf:
+        for location, prediction in predictions.items():
+            prediction = ForecastAdaptor.from_samples(prediction)
+            t = truth_dict[location]
+            plt.figure(figsize=(8, 4))  # Set the figure size
+            # t = _t[_t.index <= prediction.index[-1]]
+            prediction.plot(show_label=True)
+            plt.plot(t[-150:].to_timestamp())
+            plt.title(location)
+            plt.legend()
+            pdf.savefig()
+            plt.close()  # Close the figure
+
+
 def plot_forecasts_list(predictor, test_instances, truth, pdf_filename):
     forecasts, tss = _get_forecast_generators(predictor, test_instances, truth)
     with PdfPages(pdf_filename) as pdf:
         for i, (forecast_entry, ts_entry) in enumerate(zip(forecasts, tss)):
             last_period = forecast_entry.index[-1]
             ts_entry = ts_entry[ts_entry.index <= last_period]
-            offset = ts_entry
             plt.figure(figsize=(8, 4))  # Set the figure size
             plt.plot(ts_entry[-150:].to_timestamp())
             forecast_entry.plot(show_label=True)
diff --git a/climate_health/climate_data/gridded_data.py b/climate_health/climate_data/gridded_data.py
new file mode 100644
index 00000000..460b9324
--- /dev/null
+++ b/climate_health/climate_data/gridded_data.py
@@ -0,0 +1,40 @@
+import ee
+import xarray
+from matplotlib import pyplot as plt
+from matplotlib.colors import Normalize
+
+from ..google_earth_engine.gee_raw import load_credentials
+import geopandas as gpd
+
+# Load the GeoJSON file using GeoPandas
+def get_gridded_data(polygons_filename):
+    gdf = gpd.read_file(polygons_filename)
+    # Get the bounding box of all polygons in the GeoJSON
+    lon1, lat1, lon2, lat2 = gdf.total_bounds
+    print(lon1, lat1, lon2, lat2)
+    credentials = load_credentials()
+    ee.Initialize(ee.ServiceAccountCredentials(credentials.account, key_data=credentials.private_key))
+    collection = ee.ImageCollection('ECMWF/ERA5_LAND/DAILY_AGGR').filterDate('2024-08-01', '2024-8-03').select('temperature_2m')
+    # lon1 = 28.8
+    # lon2 = 30.9
+    # lat1 = -2.9
+    # lat2 = -1.0
+    country_bounds = ee.Geometry.Rectangle(*gdf.total_bounds)#lon1, lat1, lon2, lat2)
+    projection = collection.first().select(0).projection()  # EPSG:4326
+    dataset = xarray.open_dataset(
+        collection,
+        engine='ee',
+        projection=projection,
+        geometry=country_bounds
+    )
+    ds = dataset
+    first_image = dataset.isel(time=0)
+    temp_d = first_image['temperature_2m']
+    temp_d.plot()
+    temp  = temp_d.values
+    #plt.imshow(temp, extent=[ds.lon.min(), ds.lon.max(), ds.lat.min(), ds.lat.max()], origin='lower', cmap='viridis',
+    #           norm=Normalize())
+    #plt.imshow(temp, cmap='viridis')
+    gdf.boundary.plot(ax=plt.gca(), edgecolor='red', linewidth=1)
+    plt.show()
+    return temp
\ No newline at end of file
diff --git a/climate_health/climate_predictor.py b/climate_health/climate_predictor.py
index b725be6d..b76f096b 100644
--- a/climate_health/climate_predictor.py
+++ b/climate_health/climate_predictor.py
@@ -19,23 +19,23 @@ def _feature_matrix(self, time_period: PeriodRange):
         return time_period.month[:,None] == np.arange(1, 13)
 
     def train(self, train_data: DataSet[ClimateData]):
+        train_data = train_data.remove_field('disease_cases')
         for location, data in train_data.items():
-            data = data.data()
             self._cls = data.__class__
             x = self._feature_matrix(data.time_period)
             for field in dataclasses.fields(data):
-                if field.name == 'time_period':
+                if field.name in ('time_period'):
                     continue
                 y = getattr(data, field.name)
                 model = linear_model.LinearRegression()
-                model.fit(x, y[:,None])
+                model.fit(x, y[:, None])
                 self._models[location][field.name] = model
 
     def predict(self, time_period: PeriodRange):
         x = self._feature_matrix(time_period)
         prediction_dict = {}
         for location, models in self._models.items():
-            prediction_dict[location] = self._cls(time_period, **{field: model.predict(x) for field, model in models.items()})
+            prediction_dict[location] = self._cls(time_period, **{field: model.predict(x).ravel() for field, model in models.items()})
         return DataSet(prediction_dict)
 
 
diff --git a/climate_health/data/gluonts_adaptor/dataset.py b/climate_health/data/gluonts_adaptor/dataset.py
index da001618..7fd36592 100644
--- a/climate_health/data/gluonts_adaptor/dataset.py
+++ b/climate_health/data/gluonts_adaptor/dataset.py
@@ -1,3 +1,4 @@
+import warnings
 import dataclasses
 from pathlib import Path
 from typing import Iterable, TypeVar
diff --git a/climate_health/external/models/flax_models/flax_model.py b/climate_health/external/models/flax_models/flax_model.py
index f21bc6f8..512079de 100644
--- a/climate_health/external/models/flax_models/flax_model.py
+++ b/climate_health/external/models/flax_models/flax_model.py
@@ -36,6 +36,7 @@ class TrainState(train_state.TrainState):
 class FlaxModel:
     model: nn.Module#  = RNNModel()
     n_iter: int = 3000
+
     def __init__(self, rng_key: jax.random.PRNGKey = jax.random.PRNGKey(100), n_iter: int = None):
         self.rng_key = rng_key
         self._losses = []
diff --git a/climate_health/rest_api.py b/climate_health/rest_api.py
index 657f033b..2f5d9c20 100644
--- a/climate_health/rest_api.py
+++ b/climate_health/rest_api.py
@@ -1,8 +1,6 @@
 import json
-from contextlib import asynccontextmanager
 import logging
-from asyncio import CancelledError
-from typing import List, Union
+from typing import Union
 
 from fastapi import BackgroundTasks, UploadFile, HTTPException
 from pydantic import BaseModel
@@ -12,18 +10,14 @@
 from fastapi.responses import FileResponse
 from fastapi.middleware.cors import CORSMiddleware
 
-from climate_health.api import read_zip_folder, train_on_prediction_data
 from climate_health.api_types import RequestV1
-from climate_health.google_earth_engine.gee_era5 import Era5LandGoogleEarthEngine
 from climate_health.internal_state import Control, InternalState
 from climate_health.model_spec import ModelSpec, model_spec_from_model
 from climate_health.predictor import all_models
 from climate_health.predictor.feature_spec import Feature, all_features
+from climate_health.rest_api_src.data_models import FullPredictionResponse
 from climate_health.rest_api_src.worker_functions import train_on_zip_file, train_on_json_data
-from climate_health.training_control import TrainingControl
-from dotenv import load_dotenv, find_dotenv
 
-from climate_health.worker.background_tasks_worker import BGTaskWorker
 from climate_health.worker.rq_worker import RedisQueue
 
 logger = logging.getLogger(__name__)
@@ -97,23 +91,10 @@ def is_finished(self):
 # worker = BGTaskWorker(BackgroundTasks(), internal_state, state)
 worker = RedisQueue()
 
-
 def set_cur_response(response):
     state['response'] = response
 
 
-class PredictionResponse(BaseModel):
-    value: float
-    orgUnit: str
-    dataElement: str
-    period: str
-
-
-class FullPredictionResponse(BaseModel):
-    diseaseId: str
-    dataValues: List[PredictionResponse]
-
-
 @app.get('favicon.ico')
 async def favicon() -> FileResponse:
     return FileResponse('chap_icon.jpeg')
diff --git a/climate_health/rest_api_src/data_models.py b/climate_health/rest_api_src/data_models.py
new file mode 100644
index 00000000..93486d69
--- /dev/null
+++ b/climate_health/rest_api_src/data_models.py
@@ -0,0 +1,27 @@
+from typing import List
+
+from pydantic import BaseModel
+
+
+class PredictionBase(BaseModel):
+    orgUnit: str
+    dataElement: str
+    period: str
+
+
+class PredictionResponse(PredictionBase):
+    value: float
+
+
+class PredictionSamplResponse(PredictionBase):
+    values: list[float]
+
+
+class FullPredictionResponse(BaseModel):
+    diseaseId: str
+    dataValues: List[PredictionResponse]
+
+
+class FullPredictionSampleResponse(BaseModel):
+    diseaseId: str
+    dataValues: List[PredictionSamplResponse]
diff --git a/climate_health/rest_api_src/generate_rest_api.py b/climate_health/rest_api_src/generate_rest_api.py
new file mode 100644
index 00000000..773e685e
--- /dev/null
+++ b/climate_health/rest_api_src/generate_rest_api.py
@@ -0,0 +1,56 @@
+from typing import Optional
+
+from fastapi import FastAPI
+from starlette.middleware.cors import CORSMiddleware
+
+from climate_health.api_types import RequestV1
+from climate_health.assessment.prediction_evaluator import Predictor
+from climate_health.datatypes import Samples
+from climate_health.rest_api_src.data_models import FullPredictionResponse
+from climate_health.spatio_temporal_data.temporal_dataclass import DataSet
+
+
+def get_app():
+    app = FastAPI(
+        root_path="/v1"
+    )
+    origins = [
+        '*',  # Allow all origins
+        "http://localhost:3000",
+        "localhost:3000",
+    ]
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=origins,
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"]
+    )
+
+    return app
+
+
+def samples_to_json(samples_dataset: DataSet[Samples]):
+    data_values = []
+    for location, samples in samples_dataset.items():
+        for period, data in zip(samples.time_periods, samples.data):
+            data_values.append(
+                FullPredictionResponse(orgunit=location, period=period.id(), data=data.tolist()))
+    return data_values
+
+
+def get_rest_api(estimator):
+    app = get_app()
+    predictors: dict[str, Predictor] = {}
+    predictions: dict[str, DataSet[Samples]] = {}
+
+    @app.post("/train")
+    def train(self, data: RequestV1, name: Optional[str] = None) -> dict:
+        name = name or 'model_{len(self._models)}'
+        predictors[name] = estimator.train(data)
+        return {'name': name}
+
+    @app.post("/predict")
+    def predict(model_name: str, data: RequestV1):
+        samples: DataSet[Samples] = predictors[model_name].predict(data)
+        return samples_to_json(samples)
diff --git a/climate_health/runners/docker_runner.py b/climate_health/runners/docker_runner.py
index 6c500e9b..756b90d9 100644
--- a/climate_health/runners/docker_runner.py
+++ b/climate_health/runners/docker_runner.py
@@ -2,7 +2,8 @@
 
 from ..docker_helper_functions import create_docker_image, run_command_through_docker_container
 from .runner import Runner
-
+import logging
+logger = logging.getLogger(__name__)
 
 class DockerImageRunner(Runner):
     """A runner based on a docker image (Dockerfile)"""
@@ -30,5 +31,6 @@ def __init__(self, docker_name: str, working_dir: str | Path):
         self._working_dir = working_dir
 
     def run_command(self, command):
+        logger.info(f'Running command {command} in docker container {self._docker_name} in {self._working_dir}')
         return run_command_through_docker_container(self._docker_name, self._working_dir, command)
 
diff --git a/climate_health/spatio_temporal_data/temporal_dataclass.py b/climate_health/spatio_temporal_data/temporal_dataclass.py
index 123d7ff9..56de6126 100644
--- a/climate_health/spatio_temporal_data/temporal_dataclass.py
+++ b/climate_health/spatio_temporal_data/temporal_dataclass.py
@@ -97,13 +97,16 @@ def start_timestamp(self) -> pd.Timestamp:
     def end_timestamp(self) -> pd.Timestamp:
         return self._data.time_period[-1].end_timestamp
 
+class Polygon:
+    pass
+
 
 class DataSet(Generic[FeaturesT]):
     '''
     Class representing severeal time series at different locations.
     '''
 
-    def __init__(self, data_dict: dict[str, FeaturesT]):
+    def __init__(self, data_dict: dict[str, FeaturesT], polygon_dict: dict[str, Polygon] = None):
         self._data_dict = {loc: TemporalDataclass(data) if not isinstance(data, TemporalDataclass) else data for
                            loc, data in data_dict.items()}
 
diff --git a/climate_health/time_period/date_util_wrapper.py b/climate_health/time_period/date_util_wrapper.py
index 94695584..a229e386 100644
--- a/climate_health/time_period/date_util_wrapper.py
+++ b/climate_health/time_period/date_util_wrapper.py
@@ -173,6 +173,7 @@ def from_pandas(cls, period: pd.Period):
     @classmethod
     def parse_week(cls, week: str):
         year, weeknr = week.split('W')
+        print('########', week)
         return Week(int(year), int(weeknr))
 
     @property
@@ -207,7 +208,7 @@ def id(self):
 
 
 class Week(TimePeriod):
-    _used_attributes = ['year']
+    _used_attributes = []#'year']
     _extension = relativedelta(weeks=1)
 
     @property
@@ -223,10 +224,12 @@ def __init__(self, date, *args, **kwargs):
             week_nr = args[0] if args else kwargs['week']
             self._date = self.__date_from_numbers(year, week_nr)
             self.week = week_nr
+            self.year = self._date.year
         else:
             if isinstance(date, TimeStamp):
                 date = date._date
             self.week = date.isocalendar()[1]
+            self.year = date.isocalendar()[0]
             self._date = date
 
     def __sub__(self, other: 'TimePeriod'):
@@ -467,8 +470,17 @@ def from_pandas(cls, periods: Iterable[pd.Period]):
         cls._check_consequtive(time_delta, time_periods)
         return cls.from_time_periods(time_periods[0], time_periods[-1])
 
+    @classmethod
+    def _check_consequtive_weeks(cls, time_periods, fill_missing=False):
+        period_range = pd.period_range(start=time_periods[0]._date, end=time_periods[-1]._date, freq='W')
+        #start
+        if not all(is_consective):
+            ...
+
     @classmethod
     def _check_consequtive(cls, time_delta, time_periods, fill_missing=False):
+        # if time_delta == delta_week:
+        #return cls._check_consequtive_weeks(time_periods, fill_missing)
         is_consec = [p2 == p1 + time_delta for p1, p2 in zip(time_periods, time_periods[1:])]
         if not all(is_consec):
             if fill_missing:
@@ -515,6 +527,7 @@ def from_period_list(cls, fill_missing, periods):
         missing = cls._check_consequtive(delta, periods, fill_missing)
         ret = cls.from_time_periods(periods[0], periods[-1])
         if fill_missing:
+            assert len(ret) == len(missing)+len(periods), (len(ret), len(missing), len(periods), periods, missing)
             return ret, missing
         return ret
 
diff --git a/scripts/analyze_open_dengue.py b/scripts/analyze_open_dengue.py
new file mode 100644
index 00000000..9645b708
--- /dev/null
+++ b/scripts/analyze_open_dengue.py
@@ -0,0 +1,11 @@
+import pandas as pd
+
+filepath = '~/Downloads/Temporal_extract_V1_2_2.csv'
+df = pd.read_csv(filepath)
+print(df['S_res'].value_counts())
+print(df['T_res'].value_counts())
+
+spatial_filepath = '~/Downloads/Spatial_extract_V1_2_2.csv'
+dfS = pd.read_csv(spatial_filepath)
+print(dfS['S_res'].value_counts())
+print(dfS['T_res'].value_counts())
diff --git a/scripts/clean_laos_data2.py b/scripts/clean_laos_data2.py
new file mode 100644
index 00000000..74bf5dab
--- /dev/null
+++ b/scripts/clean_laos_data2.py
@@ -0,0 +1,99 @@
+import string
+
+import numpy as np
+import pandas as pd
+
+from climate_health.datatypes import ClimateData, ClimateHealthTimeSeries, FullData, ClimateHealthData
+from climate_health.file_io.cleaners import laos_data
+from climate_health.spatio_temporal_data.temporal_dataclass import DataSet
+from climate_health.time_period import TimePeriod
+
+filname = '/home/knut/Downloads/laodenguedata.csv'
+def parse_week(week):
+    week, year = week.split()
+    print(week, year)
+    weekstr = string.Formatter().format_field(int(week[1:]), '02')
+    return f'{year}W{weekstr}'
+
+raw_df = pd.read_csv(filname)
+#
+raw_df['Location'] = raw_df['Organisation unit']
+# Make a column for each unique value in the 'Data' column
+df = raw_df.pivot(index=['Period', 'Location'], columns='Data', values='Value')
+df = df.reset_index()
+df['Period']= [parse_week(week) for week in df.Period]
+colnames = ['Climate-Rainfall', 'Climate-Temperature avg',
+       'NCLE: 7. Dengue cases (any)', 'Location', 'Period']
+true_colnames = ['rainfall', 'mean_temperature', 'disease_cases', 'location', 'time_period']
+df.rename(columns={colname: true_colname for colname, true_colname in zip(colnames, true_colnames)}, inplace=True)
+df = df.sort_values(by=['time_period', 'location'])
+if __name__ == '__main__':
+    dataset = DataSet.from_pandas(df, dataclass=ClimateHealthData, fill_missing=True)
+
+
+if False:
+    mapping = {'rainfall': 'gsiW9SgolNd',
+               'mean_temperature': 'VA05qvanuVs',
+               'max_temperature': 'ZH76qVQl5Mz'}
+
+    health_filaname = '/home/knut/Downloads/dengue.csv'
+    df = laos_data(health_filaname)
+
+    # df.to_csv('/home/knut/Downloads/dengue_clean.csv')
+    health = df
+    climate_filename = '/home/knut/Downloads/climate_monthly_perdataelement.csv'
+
+
+    def get_laos_climate(climate_filename):
+        climate_data = pd.read_csv(climate_filename)
+        df = climate_data
+        df = df.sort_values(by=['orgunit', 'year', 'month'])
+        periods = [f'{year}-{month}' for year, month
+                   in zip(climate_data['year'],
+                          climate_data['month'])]
+        climate_data['periodid'] = periods
+        # climate_data = climate_data.sort_values(by=['periodid'])
+        d = {name: df['value.' + mapping[name]].values for name in mapping.keys()}
+        new_df = pd.DataFrame(
+            d | {'time_period': climate_data['periodid'], 'location': climate_data['orgunit']})
+        spatio_temporal_dict = DataSet.from_pandas(
+            new_df, dataclass=ClimateData)
+        return spatio_temporal_dict.interpolate()
+
+
+    spatio_temporal_dict = get_laos_climate(climate_filename)
+    full_dict = {name: ClimateHealthTimeSeries.combine(health.get_location(name).data(),
+                                                       spatio_temporal_dict.get_location(name).data())
+                 for name in health.locations()}
+    data = DataSet(full_dict)
+    data.to_csv('/home/knut/Downloads/laos_data.csv')
+
+    laos_population = '''\
+    Vientiane Capital: ~820,000
+    Phongsali: ~177,000
+    Louangnamtha: ~176,000
+    Oudomxai: ~307,000
+    Bokeo: ~205,000
+    Louangphabang: ~431,000
+    Houaphan: ~294,000
+    Xainyabouli: ~381,000
+    Xiangkhouang: ~252,000
+    Vientiane: ~432,000
+    Bolikhamxai: ~275,000
+    Khammouan: ~415,000
+    Savannakhet: ~939,000
+    Salavan: ~396,000
+    Xekong: ~120,000
+    Champasak: ~694,000
+    Attapu: ~153,000
+    Xaisomboun: ~93,000'''
+    laos_population = {line.split(': ')[0]: int(line.split(': ~')[1].replace(',', '')) for line in
+                       laos_population.split('\n')}
+    data_dict = {name[3:]: data.data() for name, data in data.items()}
+    full_data = {name: FullData(d.time_period, d.rainfall, d.mean_temperature, d.disease_cases,
+                                np.full(len(d), laos_population[name]))
+                 for name, d in data_dict.items()}
+    full_data = DataSet(full_data)
+    full_data.to_csv('/home/knut/Data/laos_full_data.csv')
+    # data = {name: FullData.combine(health.get_location(name).data(), spatio_temporal_dict.get_location(name).data(), laos_population[name])
+    #        for name in health.locations()}
\ No newline at end of file
diff --git a/scripts/explorations/xarray_gee.py b/scripts/explorations/xarray_gee.py
new file mode 100644
index 00000000..a0a0f527
--- /dev/null
+++ b/scripts/explorations/xarray_gee.py
@@ -0,0 +1,28 @@
+import ee
+import xarray
+import pyproj ## installed with pip install to avoid missing proj database error
+import numpy as np;
+
+from climate_health.google_earth_engine.gee_raw import load_credentials
+
+# required: https://github.com/google/Xee
+service_account = 'dhis2-demo@dhis2-gis.iam.gserviceaccount.com'
+#credentials = ee.ServiceAccountCredentials(service_account, '/Users/mastermaps/DHIS2/dhis-google-auth.json')
+credentials = load_credentials()
+ee.Initialize(ee.ServiceAccountCredentials(credentials.account, key_data=credentials.private_key))
+#ee.Initialize(credentials, opt_url='https://earthengine-highvolume.googleapis.com')
+collection = ee.ImageCollection('ECMWF/ERA5_LAND/DAILY_AGGR').filterDate('2024-08-01', '2024-09-01').select('temperature_2m', 'total_precipitation_sum')
+lon1 = 28.8
+lon2 = 30.9
+lat1 = -2.9
+lat2 = -1.0
+rwanda_bounds = ee.Geometry.Rectangle(lon1, lat1, lon2, lat2)
+projection = collection.first().select(0).projection() # EPSG:4326
+dataset = xarray.open_dataset(
+    collection,
+    engine='ee',
+    projection=projection,
+    geometry=rwanda_bounds
+)
+first_image = dataset.isel(time=0)
+temp = first_image['temperature_2m'].values
\ No newline at end of file
diff --git a/scripts/external_model_example.py b/scripts/external_model_example.py
index 8fc93eab..ffe462b8 100644
--- a/scripts/external_model_example.py
+++ b/scripts/external_model_example.py
@@ -1,19 +1,32 @@
+import pandas as pd
+
 from climate_health.assessment.prediction_evaluator import evaluate_model
 from climate_health.external.external_model import get_model_from_directory_or_github_url
 from climate_health.external.r_models import models_path
 from climate_health.file_io.example_data_set import datasets
+import logging
 
-model_names= {'deepar': models_path/ 'deepar',
-              'naive_model': models_path/ 'naive_python_model_with_mlproject_file',
-              'ewars': 'https://github.com/sandvelab/chap_auto_ewars'}
-
-dataset = datasets['ISIMIP_dengue_harmonized'].load()
-dataset = dataset['brazil']
+if __name__ == '__main__':
+    logging.basicConfig(level=logging.INFO)
+    model_names = {
+        #'deepar': models_path / 'deepar',
+        'naive_model': models_path / 'naive_python_model_with_mlproject_file',
+        # 'ewars': 'https://github.com/sandvelab/chap_auto_ewars'
+    }
 
-all_results = {}
-for name, model_name in model_names.items():
-    model = get_model_from_directory_or_github_url(model_name)
-    results = evaluate_model(model, dataset, prediction_length=6, n_test_sets=12, report_filename=f'{name}_report.pdf')
-    all_results[name] = results
+    dataset = datasets['ISIMIP_dengue_harmonized'].load()
+    dataset = dataset['vietnam']
+    n_tests = 7
+    prediction_length = 6
+    all_results = {}
+    for name, model_name in model_names.items():
+        model = get_model_from_directory_or_github_url(model_name)
+        results = evaluate_model(model, dataset,
+                                 prediction_length=prediction_length,
+                                 n_test_sets=n_tests,
+                                 report_filename=f'{name}_{n_tests}_{prediction_length}_report.pdf')
+        all_results[name] = results
 
-print(all_results)
+    report_file = 'evaluation_report.csv'
+    df = pd.DataFrame([res[0] | {'model': name} for name, res in all_results.items()])
+    df.to_csv(report_file, mode='w', header=True)
diff --git a/scripts/prediction_example.py b/scripts/prediction_example.py
new file mode 100644
index 00000000..024580dc
--- /dev/null
+++ b/scripts/prediction_example.py
@@ -0,0 +1,15 @@
+from climate_health.assessment.forecast import forecast_ahead, forecast_with_predicted_weather
+from climate_health.assessment.prediction_evaluator import plot_predictions
+from climate_health.data.datasets import ISIMIP_dengue_harmonized
+from climate_health.external.external_model import get_model_from_directory_or_github_url
+from climate_health.external.r_models import models_path
+
+if __name__ == '__main__':
+    model_name = 'https://github.com/sandvelab/chap_auto_ewars'
+    #model_name = models_path / 'naive_python_model_with_mlproject_file'
+    estimator = get_model_from_directory_or_github_url(model_name)
+    dataset = ISIMIP_dengue_harmonized['vietnam']
+    predictor = estimator.train(dataset)
+    predictions = forecast_with_predicted_weather(predictor, dataset, 3)
+    plot_predictions(predictions, dataset, 'prediction_example.pdf')
+
diff --git a/tests/test_climate_predictor.py b/tests/test_climate_predictor.py
index 5bbc855d..fbaa645c 100644
--- a/tests/test_climate_predictor.py
+++ b/tests/test_climate_predictor.py
@@ -12,9 +12,8 @@ def climate_data():
     time_period = PeriodRange.from_time_periods(Month.parse('2020-01'), Month.parse('2020-12'))
     values = np.arange(len(time_period))
     return DataSet(
-        {'oslo': ClimateData(time_period, values, values*2, values*3),
-        'stockholm': ClimateData(time_period, values, values*2, values*3)})
-
+        {'oslo': ClimateData(time_period, values, values * 2, values * 3),
+         'stockholm': ClimateData(time_period, values, values * 2, values * 3)})
 
 
 def test_climate_predictor(climate_data):
@@ -24,4 +23,3 @@ def test_climate_predictor(climate_data):
     prediction = predictor.predict(time_period)
 
 
-
diff --git a/tests/test_forecast.py b/tests/test_forecast.py
index 6f8d38ec..5d1cc213 100644
--- a/tests/test_forecast.py
+++ b/tests/test_forecast.py
@@ -1,9 +1,11 @@
 import pytest
 
-from climate_health.assessment.forecast import forecast, multi_forecast
+from climate_health.assessment.forecast import forecast, multi_forecast, forecast_ahead
+from climate_health.data.datasets import ISIMIP_dengue_harmonized
 from climate_health.file_io.example_data_set import datasets
 from climate_health.plotting.prediction_plot import plot_forecast_from_summaries
 from climate_health.predictor import get_model
+from climate_health.predictor.naive_estimator import NaiveEstimator
 from climate_health.time_period.date_util_wrapper import delta_month
 
 
@@ -12,7 +14,7 @@
 def test_forecast():
     model = get_model('HierarchicalStateModelD2')(num_warmup=20, num_samples=20)
     dataset = datasets['hydromet_5_filtered'].load()
-    predictions = forecast(model, dataset, 12*delta_month)
+    predictions = forecast(model, dataset, 12 * delta_month)
     for location, prediction in predictions.items():
         fig = plot_forecast_from_summaries(prediction.data(), dataset.get_location(location).data())
         fig.show()
@@ -22,8 +24,15 @@ def test_forecast():
 def test_multi_forecast():
     model = get_model('HierarchicalStateModelD2')(num_warmup=20, num_samples=20)
     dataset = datasets['hydromet_5_filtered'].load()
-    predictions_list = list(multi_forecast(model, dataset, 48*delta_month, pre_train_delta=24*delta_month))
+    predictions_list = list(multi_forecast(model, dataset, 48 * delta_month, pre_train_delta=24 * delta_month))
     for location, true_data in dataset.items():
         local_predictions = [pred.get_location(location).data() for pred in predictions_list]
         fig = plot_forecast_from_summaries(local_predictions, true_data.data())
         fig.show()
+
+
+def test_forecast_ahead():
+    model = NaiveEstimator()
+    dataset = dataset = ISIMIP_dengue_harmonized['vietnam']
+    prediction_length = 3
+    forecast_ahead(model, dataset, prediction_length)
diff --git a/tests/test_gridded_data.py b/tests/test_gridded_data.py
new file mode 100644
index 00000000..fe4bc6a6
--- /dev/null
+++ b/tests/test_gridded_data.py
@@ -0,0 +1,18 @@
+import pytest
+
+from climate_health.google_earth_engine.gee_raw import load_credentials
+from climate_health.climate_data.gridded_data import get_gridded_data
+
+@pytest.fixture
+def credentials():
+    try:
+        load_credentials()
+    except Exception as e:
+        pytest.skip("Credentials not found")
+
+@pytest.fixture
+def polygons_filename(data_path):
+    return data_path/'philippines_polygons.json'
+
+def test_get_gridded_data(polygons_filename):
+    get_gridded_data(polygons_filename)