Skip to content

Commit

Permalink
docing and refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
knutdrand committed Aug 30, 2024
1 parent b42cdc8 commit 0581ed8
Show file tree
Hide file tree
Showing 24 changed files with 92 additions and 37 deletions.
2 changes: 1 addition & 1 deletion climate_health/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
__version__ = '0.0.1'

from . import fetch

from . import data
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def __init__(self, time_index, data):
self._data = data

def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
return ufunc9
return ufunc


class IsSpatialDataSet(Protocol[FeaturesT]):
Expand Down
21 changes: 12 additions & 9 deletions climate_health/assessment/dataset_splitting.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from typing import Iterable, Tuple, Protocol, Optional, Type

from climate_health.dataset import IsSpatioTemporalDataSet
from climate_health._legacy_dataset import IsSpatioTemporalDataSet
from climate_health.datatypes import ClimateHealthData, ClimateData, HealthData
from climate_health.spatio_temporal_data.temporal_dataclass import DataSet
from climate_health.time_period import Year, Month, TimePeriod
from climate_health.time_period.relationships import previous
import dataclasses


def split_period_on_resolution(param, param1, resolution) -> Iterable[Month]:
pass

Expand All @@ -25,7 +26,8 @@ def split_test_train_on_period(data_set: IsSpatioTemporalDataSet, split_points:
func = train_test_split_with_weather if include_future_weather else train_test_split

if include_future_weather:
return (train_test_split_with_weather(data_set, period, future_length, future_weather_class) for period in split_points)
return (train_test_split_with_weather(data_set, period, future_length, future_weather_class) for period in
split_points)
return (func(data_set, period, future_length) for period in split_points)


Expand Down Expand Up @@ -56,22 +58,23 @@ def train_test_split_with_weather(data_set: DataSet, prediction_start_period: Ti
train_set, test_set = train_test_split(data_set, prediction_start_period, extension)
tmp_values: Iterable[Tuple[str, ClimateHealthData]] = ((loc, temporal_data.data()) for loc, temporal_data in
test_set.items())
future_weather = test_set.remove_field('disease_cases') #SpatioTemporalDict(
#{loc: future_weather_class(
# *[getattr(values, field.name) if hasattr(values, field.name) else values.mean_temperature for field in dataclasses.fields(future_weather_class)])
# for loc, values in tmp_values})
future_weather = test_set.remove_field('disease_cases') # SpatioTemporalDict(
# {loc: future_weather_class(
# *[getattr(values, field.name) if hasattr(values, field.name) else values.mean_temperature for field in dataclasses.fields(future_weather_class)])
# for loc, values in tmp_values})
train_periods = {str(period) for data in train_set.data() for period in data.data().time_period}
future_periods = {str(period) for data in future_weather.data() for period in data.data().time_period}
assert train_periods & future_periods == set(), f"Train and future weather data overlap: {train_periods & future_periods}"
return train_set, test_set, future_weather


def get_split_points_for_data_set(data_set: IsSpatioTemporalDataSet, max_splits: int, start_offset = 1) -> list[TimePeriod]:
def get_split_points_for_data_set(data_set: IsSpatioTemporalDataSet, max_splits: int, start_offset=1) -> list[
TimePeriod]:
periods = next(iter(
data_set.data())).data().time_period # Uses the time for the first location, assumes it to be the same for all!
return get_split_points_for_period_range(max_splits, periods, start_offset)


def get_split_points_for_period_range(max_splits, periods, start_offset):
delta = (len(periods) - 1 - start_offset) // (max_splits+1)
return list(periods)[start_offset+delta::delta][:max_splits]
delta = (len(periods) - 1 - start_offset) // (max_splits + 1)
return list(periods)[start_offset + delta::delta][:max_splits]
2 changes: 1 addition & 1 deletion climate_health/assessment/multi_location_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
from sklearn.metrics import mean_absolute_error

from climate_health.dataset import IsSpatioTemporalDataSet
from climate_health._legacy_dataset import IsSpatioTemporalDataSet
from climate_health.datatypes import HealthData, ResultType, SummaryStatistics


Expand Down
4 changes: 4 additions & 0 deletions climate_health/data/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from ..spatio_temporal_data.temporal_dataclass import DataSet
from ..api_types import PeriodObservation

__all__ = ['DataSet', 'PeriodObservation']
2 changes: 2 additions & 0 deletions climate_health/data/adaptors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .gluonts_adaptor.dataset import DataSetAdaptor
gluonts = DataSetAdaptor()
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
from pathlib import Path
from typing import Iterable
from ..assessment.dataset_splitting import train_test_split
from ..file_io.example_data_set import datasets
from climate_health.assessment.dataset_splitting import train_test_split
from climate_health.file_io.example_data_set import datasets
from climate_health.datatypes import TimeSeriesData, remove_field
from climate_health.spatio_temporal_data.temporal_dataclass import DataSet
from ..spatio_temporal_data.multi_country_dataset import MultiCountryDataSet
from ..time_period import delta_month
from climate_health.spatio_temporal_data.multi_country_dataset import MultiCountryDataSet
from climate_health.time_period import delta_month

GlunTSDataSet = Iterable[dict]


class DataSetAdaptor:


@staticmethod
def from_gluonts(self, gluonts_dataset: GlunTSDataSet, dataclass: type[TimeSeriesData]) -> DataSet:
raise NotImplementedError

to_dataset = from_gluonts

@staticmethod
def to_gluonts(dataset: DataSet, start_index=0, static=None, real=None) -> GlunTSDataSet:
if isinstance(dataset, MultiCountryDataSet):
Expand All @@ -31,6 +35,8 @@ def to_gluonts(dataset: DataSet, start_index=0, static=None, real=None) -> GlunT
'feat_static_cat': [i]+static,
}

from_dataset = to_gluonts

@staticmethod
def to_gluonts_multicountry(dataset: MultiCountryDataSet) -> GlunTSDataSet:
offset = 0
Expand Down
2 changes: 1 addition & 1 deletion climate_health/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def to_csv(self, csv_file: str, **kwargs):
@classmethod
def create_class_from_basemodel(cls, dataclass: type[PeriodObservation]):
fields = dataclass.model_fields
fields = [(name, field.annotation) for name, field in fields.items()]
fields = [(name, field.annotation) if name != 'time_period' else (name, Period) for name, field in fields.items()]
return dataclasses.make_dataclass(dataclass.__name__, fields, bases=(TimeSeriesData,))

@staticmethod
Expand Down
2 changes: 1 addition & 1 deletion climate_health/external/external_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import yaml
import json

from climate_health.dataset import IsSpatioTemporalDataSet
from climate_health._legacy_dataset import IsSpatioTemporalDataSet
from climate_health.datatypes import ClimateHealthTimeSeries, ClimateData, HealthData, SummaryStatistics
from climate_health.docker_helper_functions import create_docker_image, run_command_through_docker_container
from climate_health.geojson import NeighbourGraph
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import numpy as np

from climate_health.dataset import ClimateData
from climate_health._legacy_dataset import ClimateData
from climate_health.datatypes import HealthData
from .hmc import sample
from climate_health.spatio_temporal_data.temporal_dataclass import DataSet
Expand Down
2 changes: 1 addition & 1 deletion climate_health/external/python_model.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from .external_model import run_command
from ..datatypes import ClimateHealthTimeSeries, HealthData, ClimateData
from ..dataset import IsSpatioTemporalDataSet
from .._legacy_dataset import IsSpatioTemporalDataSet

from climate_health.time_period import Month
import tempfile
Expand Down
2 changes: 1 addition & 1 deletion climate_health/external/r_model.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from pathlib import Path
from typing import TypeVar, Generic
from climate_health.assessment.dataset_splitting import IsTimeDelta
from climate_health.dataset import IsSpatioTemporalDataSet
from climate_health._legacy_dataset import IsSpatioTemporalDataSet
from climate_health.datatypes import ClimateHealthTimeSeries, HealthData, ClimateData
from climate_health.time_period import Month

Expand Down
2 changes: 1 addition & 1 deletion climate_health/file_io/load.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .external_file import fetch_and_clean
from ..dataset import IsSpatioTemporalDataSet
from .._legacy_dataset import IsSpatioTemporalDataSet


def load_data_set(data_set_filename: str) -> IsSpatioTemporalDataSet:
Expand Down
2 changes: 1 addition & 1 deletion climate_health/predictor/naive_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
from sklearn import linear_model

from climate_health.dataset import IsSpatioTemporalDataSet
from climate_health._legacy_dataset import IsSpatioTemporalDataSet
from climate_health.spatio_temporal_data.temporal_dataclass import DataSet, TemporalDataclass
from climate_health.datatypes import HealthData, ClimateHealthTimeSeries, ClimateData
from climate_health.time_period.dataclasses import Period
Expand Down
2 changes: 1 addition & 1 deletion climate_health/predictor/protocol.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Protocol

from ..dataset import IsSpatioTemporalDataSet
from .._legacy_dataset import IsSpatioTemporalDataSet
from ..datatypes import ClimateData, ClimateHealthTimeSeries, HealthData


Expand Down
2 changes: 1 addition & 1 deletion climate_health/spatio_temporal_data/temporal_dataclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pandas as pd

from ..api_types import PeriodObservation
from ..dataset import TemporalIndexType, FeaturesT
from .._legacy_dataset import TemporalIndexType, FeaturesT
from ..datatypes import Location, add_field, remove_field, TimeSeriesArray, TimeSeriesData
from ..time_period import PeriodRange
from ..time_period.date_util_wrapper import TimeStamp
Expand Down
9 changes: 9 additions & 0 deletions docs_source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,12 @@ Functionality for fetching data
.. currentmodule:: climate_health.fetch

.. autofunction:: gee_era5

.. currentmodule:: climate_health.data

.. autoclass:: DataSet
:members: from_period_observations, from_pandas, to_pandas

.. autoclass:: PeriodObservation


2 changes: 1 addition & 1 deletion scripts/ext_model_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from climate_health.reports import HTMLReport
from climate_health.spatio_temporal_data.temporal_dataclass import DataSet
from tests.test_external_model_evaluation_acceptance import ExternalModelMock
from climate_health.dataset import IsSpatioTemporalDataSet
from climate_health._legacy_dataset import IsSpatioTemporalDataSet


def load_data_set(data_set_filename: str) -> IsSpatioTemporalDataSet:
Expand Down
31 changes: 31 additions & 0 deletions scripts/gluonts_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from climate_health.data import DataSet, PeriodObservation, adaptors

class Obs(PeriodObservation):
disease_cases: int
rainfall: float
temperature: float


# Create observations
observation_dict = {'Oslo': [
Obs(time_period='2020-01', disease_cases=10, rainfall=0.1, temperature=20),
Obs(time_period='2020-02', disease_cases=11, rainfall=0.2, temperature=22),
Obs(time_period='2020-03', disease_cases=12, rainfall=0.3, temperature=21)],
'Troms': [
Obs(time_period='2020-01', disease_cases=2, rainfall=1.1, temperature=10),
Obs(time_period='2020-02', disease_cases=2, rainfall=2.2, temperature=11),
Obs(time_period='2020-03', disease_cases=2, rainfall=0.3, temperature=12)]}

# Create a climate health dataset
dataset = DataSet.from_period_observations(observation_dict)

# Convert to a gluonts dataset
gluonts_dataset = adaptors.gluonts.from_dataset(dataset)

print(list(gluonts_dataset))






2 changes: 1 addition & 1 deletion tests/data_wrangling/test_standardize_laos_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def messy_standardization_function(filename: Path, geolocator):
print(month[0].month, month[-1].month)
time_period = period_range(month[0], month[-1], exclusive_end=False)
data_dict = {get_city_name(c): HealthData(time_period, data[c]) for c in data.columns[1:]}
from climate_health.dataset import SpatioTemporalDict
from climate_health._legacy_dataset import SpatioTemporalDict
return SpatioTemporalDict(data_dict)


Expand Down
2 changes: 1 addition & 1 deletion tests/test_external_model_evaluation_acceptance.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import pytest

from climate_health.assessment.prediction_evaluator import evaluate_model
from climate_health.dataset import IsSpatioTemporalDataSet
from climate_health._legacy_dataset import IsSpatioTemporalDataSet
from climate_health.datatypes import ClimateHealthTimeSeries, HealthData
from climate_health.external.external_model import ExternalCommandLineModel
from climate_health.external.models import SSM
Expand Down
16 changes: 8 additions & 8 deletions tests/test_gluonts_adaptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,10 @@

import pytest

from climate_health.gluonts_adaptor.dataset import DataSetAdaptor, get_dataset, get_split_dataset
from climate_health.data.gluonts_adaptor.dataset import DataSetAdaptor, get_dataset, get_split_dataset
from climate_health.spatio_temporal_data.multi_country_dataset import MultiCountryDataSet
from .data_fixtures import train_data_pop, full_data
from climate_health.file_io.example_data_set import datasets

from .data_fixtures import train_data_pop, full_data

@pytest.fixture
def full_dataset():
Expand All @@ -16,6 +15,7 @@ def full_dataset():
dataset = MultiCountryDataSet.from_folder(foldername)
return dataset


def test_to_gluonts(train_data_pop):
dataset = DataSetAdaptor().to_gluonts(train_data_pop)
dataset = list(dataset)
Expand All @@ -25,29 +25,29 @@ def test_to_gluonts(train_data_pop):
for i, data in enumerate(dataset):
assert data['feat_static_cat'] == [i]


@pytest.fixture()
def laos_full_data():
dataset = datasets['laos_full_data']
if not dataset.filepath().exists():
pytest.skip()
return 'laos_full_data'


def test_get_dataset(laos_full_data):
dataset = list(get_dataset(laos_full_data))
for data in dataset:
print(data)


def test_get_split_dataset(laos_full_data):
train, test = get_split_dataset(laos_full_data, n_periods=6)
first_train = list(train)[0]
first_test = list(test)[0]
print(first_train.keys())
print(first_test.keys())
assert len(first_test['target']) == len(first_train['target'])+6
assert len(first_test['target']) == len(first_train['target']) + 6


def test_full_data(full_dataset):
print(list(DataSetAdaptor.to_gluonts_multicountry(full_dataset)))




2 changes: 1 addition & 1 deletion tests/test_model_building_acceptance.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest

from climate_health.dataset import IsSpatioTemporalDataSet
from climate_health._legacy_dataset import IsSpatioTemporalDataSet
from climate_health.datatypes import HealthData
from climate_health.predictor.naive_predictor import NaiveForecastSampler
from climate_health.time_period import Day
Expand Down

0 comments on commit 0581ed8

Please sign in to comment.