Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Data sampler update #170

Closed
wants to merge 11 commits into from
114 changes: 99 additions & 15 deletions pvnet_app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,36 +16,106 @@ def save_yaml_config(config: dict, path: str) -> None:
yaml.dump(config, file, default_flow_style=False)


# def populate_config_with_data_data_filepaths(config: dict, gsp_path: str = "") -> dict:
# """Populate the data source filepaths in the config

# Args:
# config: The data config
# gsp_path: For lagacy usage only
# """
# production_paths = {
# "gsp": gsp_path,
# "nwp": {"ukv": nwp_ukv_path, "ecmwf": nwp_ecmwf_path},
# "satellite": sat_path,
# }

# # Replace data sources - GSP and satellite
# for source in ["gsp", "satellite"]:
# # v0 and v1 schema
# if source in config["input_data"]:
# zarr_path_key = (
# f"{source}_zarr_path"
# if f"{source}_zarr_path" in config["input_data"][source]
# else "zarr_path"
# )

# if config["input_data"][source][zarr_path_key] != "":
# assert source in production_paths, f"Missing production path: {source}"
# config["input_data"][source][zarr_path_key] = production_paths[source]

# # Handle NWP separately - nested
# if "nwp" in config["input_data"]:
# nwp_config = config["input_data"]["nwp"]
# for nwp_source in nwp_config.keys():
# # v0 and v1 schema
# zarr_path_key = (
# "nwp_zarr_path"
# if "nwp_zarr_path" in nwp_config[nwp_source]
# else "zarr_path"
# )
# provider_key = (
# "nwp_provider"
# if "nwp_provider" in nwp_config[nwp_source]
# else "provider"
# )

# if zarr_path_key in nwp_config[nwp_source] and nwp_config[nwp_source][zarr_path_key] != "":
# provider = nwp_config[nwp_source][provider_key].lower()
# assert provider in production_paths["nwp"], f"Missing NWP path: {provider}"
# nwp_config[nwp_source][zarr_path_key] = production_paths["nwp"][provider]

# return config


def populate_config_with_data_data_filepaths(config: dict, gsp_path: str = "") -> dict:
"""Populate the data source filepaths in the config
"""Populate the data source filepaths in the config with backwards compatibility

Args:
config: The data config
gsp_path: For lagacy usage only
gsp_path: For legacy usage only
"""

production_paths = {
"gsp": gsp_path,
"nwp": {"ukv": nwp_ukv_path, "ecmwf": nwp_ecmwf_path},
"satellite": sat_path,
}

# Replace data sources
# Backward compatibility for attribute handling
for source in ["gsp", "satellite"]:
if source in config["input_data"]:
if config["input_data"][source][f"{source}_zarr_path"] != "":
source_config = config["input_data"][source]
path_key = f"{source}_zarr_path"

if source_config.get(path_key, ""):
assert source in production_paths, f"Missing production path: {source}"
config["input_data"][source][f"{source}_zarr_path"] = production_paths[source]
source_config[path_key] = production_paths[source]

if source == "gsp":
source_config["handle_legacy_attrs"] = True

# NWP is nested so much be treated separately
# NWP is nested - treated separately
if "nwp" in config["input_data"]:
nwp_config = config["input_data"]["nwp"]
for nwp_source in nwp_config.keys():
if nwp_config[nwp_source]["nwp_zarr_path"] != "":
zarr_path = nwp_config[nwp_source].get("nwp_zarr_path", "")
if zarr_path:
assert "nwp" in production_paths, "Missing production path: nwp"
assert nwp_source in production_paths["nwp"], f"Missing NWP path: {nwp_source}"
nwp_config[nwp_source]["nwp_zarr_path"] = production_paths["nwp"][nwp_source]

# v0 and v1 schema
old_keys = [
("zarr_path", "nwp_zarr_path"),
("channels", "nwp_channels"),
("image_size_pixels_height", "nwp_image_size_pixels_height"),
("image_size_pixels_width", "nwp_image_size_pixels_width"),
("provider", "nwp_provider")
]

for old_key, new_key in old_keys:
if old_key in nwp_config[nwp_source]:
nwp_config[nwp_source][new_key] = nwp_config[nwp_source].pop(old_key)

return config


Expand All @@ -56,18 +126,32 @@ def overwrite_config_dropouts(config: dict) -> dict:
config: The data config
"""

# Replace data sources
# Replace data source - satellite
for source in ["satellite"]:

# v0 and v1 schema
if source in config["input_data"]:
if config["input_data"][source][f"{source}_zarr_path"] != "":
config["input_data"][source][f"dropout_timedeltas_minutes"] = None

# NWP is nested so much be treated separately
zarr_path_key = (
f"{source}_zarr_path"
if f"{source}_zarr_path" in config["input_data"][source]
else "zarr_path"
)
if config["input_data"][source][zarr_path_key] != "":
config["input_data"][source]["dropout_timedeltas_minutes"] = None

# Handle NWP separately - nested
if "nwp" in config["input_data"]:
nwp_config = config["input_data"]["nwp"]
for nwp_source in nwp_config.keys():
if nwp_config[nwp_source]["nwp_zarr_path"] != "":
nwp_config[nwp_source]["dropout_timedeltas_minutes"] = None

# v0 and v1 schema
zarr_path_key = (
"nwp_zarr_path"
if "nwp_zarr_path" in nwp_config[nwp_source]
else "zarr_path"
)
if zarr_path_key in nwp_config[nwp_source] and nwp_config[nwp_source][zarr_path_key] != "":
config["input_data"]["nwp"][nwp_source]["dropout_timedeltas_minutes"] = None

return config

Expand Down
30 changes: 26 additions & 4 deletions pvnet_app/model_configs/all_models.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# Here we define all the models that are available in the app
# Batches are prepared only once, so the extra models must be able to run on the batches created
# to run the pvnet_v2 model

models:

- name: pvnet_v2
pvnet:
repo: openclimatefix/pvnet_uk_region
Expand All @@ -13,15 +15,17 @@ models:
save_gsp_sum: False
verbose: True
save_gsp_to_recent: true

- name: pvnet_v2-sat0-samples-v1
pvnet:
repo: openclimatefix/pvnet_uk_region
version: 8a7cc21b64d25ce1add7a8547674be3143b2e650
summation:
repo: openclimatefix/pvnet_v2_summation
version: dcfdc17fda8e48c387122614bec8b284eaa868b9

# single source models
- name: pvnet_v2-sat0-only-samples-v1"
- name: pvnet_v2-sat0-only-samples-v1
pvnet:
repo: openclimatefix/pvnet_uk_region
version: d7ab648942c85b6788adcdbed44c91c4e1c5604a
Expand All @@ -47,6 +51,7 @@ models:
version: 4fe6b1441b6dd549292c201ed85eee156ecc220c
ecmwf_only: True
uses_satellite_data: False

# This is the old model for pvnet and pvnet_ecmwf
- name: pvnet_v2
pvnet:
Expand All @@ -60,6 +65,7 @@ models:
verbose: True
save_gsp_to_recent: True
uses_ocf_data_sampler: False

- name: pvnet_ecmwf # this name is important as it used for blending
pvnet:
repo: openclimatefix/pvnet_uk_region
Expand All @@ -70,9 +76,9 @@ models:
ecmwf_only: True
uses_satellite_data: False
uses_ocf_data_sampler: False
# The day ahead model has not yet been re-trained with data-sampler.
# It will be run with the legacy dataloader using ocf_datapipes
- name: pvnet_day_ahead

# Legacy day ahead model without data-sampler
- name: pvnet_day_ahead_legacy
pvnet:
repo: openclimatefix/pvnet_uk_region_day_ahead
version: d87565731692a6003e43caac4feaed0f69e79272
Expand All @@ -85,4 +91,20 @@ models:
save_gsp_to_recent: True
day_ahead: True
uses_ocf_data_sampler: False
config_schema_version: "v0"

# Day ahead model that utilises data-sampler
- name: pvnet_day_ahead
pvnet:
repo: openclimatefix/pvnet_uk_region_day_ahead
version: bc5bfcfedfbe9c722befc52d195177720582b4f7
summation:
repo: openclimatefix/pvnet_summation_uk_national_day_ahead
version: ed60c5d32a020242ca4739dcc6dbc8864f783a08
use_adjuster: True
save_gsp_sum: True
verbose: True
save_gsp_to_recent: True
day_ahead: True
uses_ocf_data_sampler: True
config_schema_version: "v1"
6 changes: 6 additions & 0 deletions pvnet_app/model_configs/pydantic_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ class Model(BaseModel):
description="If this model uses data sampler, old one uses ocf_datapipes",
)

config_schema_version: Optional[str] = Field(
"v1",
title="Config Schema Version",
description="Schema version - 'v0' for legacy ocf_datapipes format or 'v1' for data-sampler"
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah if its this, cant you use uses_ocf_data_sampler?
Or is it for if the model is trained using an older ocf-data-sampler?



class Models(BaseModel):
"""A group of ml models"""
Expand Down
Loading