Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Regrid NWP inputs #21

Merged
merged 23 commits into from
Nov 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions .github/workflows/conda-pytest.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: Conda pytest
on: push

jobs:
push_to_registry:
name: Build docker file for testing
runs-on: ubuntu-latest
steps:
- name: Check out the repo
uses: actions/checkout@v2

- uses: conda-incubator/setup-miniconda@v2
with:
miniconda-version: "latest"
channels: bioconda, conda-forge, defaults
auto-update-conda: true
auto-activate-base: true

- name: Install the conda
shell: bash -l {0}
run: |
conda install -c conda-forge xesmf esmpy -y

- name: Install the repo
shell: bash -l {0}
run: |
pip install -e .

- name: run pytest
shell: bash -l {0}
run: |
python -m pytest
18 changes: 0 additions & 18 deletions .github/workflows/workflows.yaml

This file was deleted.

34 changes: 15 additions & 19 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,40 +1,36 @@
FROM python:3.10-slim
FROM continuumio/miniconda3

ARG TESTING=0

# make sure it doesnt fail if the docker file doesnt know the git commit
ARG GIT_PYTHON_REFRESH=quiet
SHELL ["/bin/bash", "-l", "-c"]

RUN apt-get update
RUN apt-get install git -y
RUN apt-get install g++ gcc libgeos++-dev libproj-dev proj-data proj-bin -y

# copy files
# Copy files
COPY setup.py app/setup.py
COPY README.md app/README.md
COPY requirements.txt app/requirements.txt
RUN pip install git+https://github.com/SheffieldSolar/PV_Live-API#pvlive_api


# install requirements
RUN pip install torch --index-url https://download.pytorch.org/whl/cpu
RUN pip install -r app/requirements.txt

# copy library files
COPY pvnet_app/ app/pvnet_app/
COPY tests/ app/tests/
COPY scripts/ app/scripts/
COPY data/ app/data/

# change to app folder
# Install requirements
RUN conda install python=3.10
RUN conda install -c conda-forge xesmf esmpy -y
RUN echo "export ESMFMKFILE='/opt/conda/lib/esmf.mk'" >> ~/.bashrc
RUN pip install torch --index-url https://download.pytorch.org/whl/cpu
RUN pip install -r app/requirements.txt
RUN pip install git+https://github.com/SheffieldSolar/PV_Live-API#pvlive_api

# Change to app folder
WORKDIR /app

# install library
# Install library
RUN pip install -e .

# download models so app can used cached
RUN python scripts/cache_default_models.py


RUN if [ "$TESTING" = 1 ]; then pip install pytest pytest-cov coverage; fi

CMD ["python", "-u","pvnet_app/app.py"]
CMD ["python", "-u","pvnet_app/app.py"]
Binary file added data/nwp_target_coords.nc
Binary file not shown.
40 changes: 40 additions & 0 deletions pvnet_app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import torch
import typer
import xarray as xr
import xesmf as xe
from nowcasting_datamodel.connection import DatabaseConnection
from nowcasting_datamodel.models import (
ForecastSQL,
Expand Down Expand Up @@ -102,6 +103,42 @@
# ---------------------------------------------------------------------------
# HELPER FUNCTIONS

def regrid_nwp_data(nwp_path):
"""This function loads the NWP data, then regrids and saves it back out if the data is not on
the same grid as expected. The data is resaved in-place.
"""
ds_raw = xr.open_zarr(nwp_path)

# These are the coords we are aiming for
ds_target_coords = xr.load_dataset(f"{this_dir}/../data/nwp_target_coords.nc")

# Check if regridding step needs to be done
needs_regridding = not (
ds_raw.latitude.equals(ds_target_coords.latitude) and
ds_raw.longitude.equals(ds_target_coords.longitude)

)

if not needs_regridding:
logger.info("No NWP regridding required - skipping this step")
return

logger.info("Regridding NWP to expected grid")
# Its more efficient to regrid eagerly
ds_raw = ds_raw.compute()

# Regrid
regridder = xe.Regridder(ds_raw, ds_target_coords, method="bilinear")
ds_regridded = regridder(ds_raw)

# Re-save - including rechunking
os.system(f"rm -fr {nwp_path}")
ds_regridded["variable"] = ds_regridded["variable"].astype(str)
ds_regridded.chunk(dict(step=12, x=100, y=100)).to_zarr(nwp_path)

return


def populate_data_config_sources(input_path, output_path):
"""Resave the data config and replace the source filepaths

Expand Down Expand Up @@ -319,6 +356,9 @@ def app(
fs = fsspec.open(os.environ["NWP_ZARR_PATH"]).fs
fs.get(os.environ["NWP_ZARR_PATH"], "nwp.zarr", recursive=True)

# Regrid the nwp data if needed
regrid_nwp_data("nwp.zarr")

# ---------------------------------------------------------------------------
# 2. Set up data loader
logger.info("Creating DataLoader")
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@ fsspec[s3]
xarray
zarr
numpy
pandas==2.0
pandas
sqlalchemy
torchdata
pytest
pytest-cov
typer
testcontainers
xesmf
9 changes: 4 additions & 5 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def nwp_data():
f"{os.path.dirname(os.path.abspath(__file__))}/test_data/nwp_shell.zarr"
)

# Last init time was at least 2 hours ago and hour to 3-hour interval
# Last init time was at least 2 hours ago and floor to 3-hour interval
t0_datetime_utc = time_before_present(timedelta(hours=2)).floor(timedelta(hours=3))
ds.init_time.values[:] = pd.date_range(
t0_datetime_utc - timedelta(hours=3 * (len(ds.init_time) - 1)),
Expand All @@ -104,17 +104,16 @@ def nwp_data():
for v in list(ds.variables.keys()):
if ds[v].dtype == object:
ds[v].encoding.clear()

# Add data to dataset
ds["UKV"] = xr.DataArray(
np.zeros([len(ds[c]) for c in ds.coords]),
coords=ds.coords,
np.zeros([len(ds[c]) for c in ds.xindexes]),
coords=[ds[c] for c in ds.xindexes],
)

# Add stored attributes to DataArray
ds.UKV.attrs = ds.attrs["_data_attrs"]
del ds.attrs["_data_attrs"]

return ds


Expand Down
8 changes: 5 additions & 3 deletions tests/test_data/nwp_shell.zarr/.zattrs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
"GRIB_centre": "egrr",
"GRIB_centreDescription": "U.K. Met Office - Exeter",
"GRIB_edition": 2,
"GRIB_subCentre": 0,
"GRIB_subCentre": 5,
"history": "2023-10-26T12:56 GRIB to CDM+CF via cfgrib-0.9.10.3/ecCodes-2.31.0 with {\"source\": \"../tmp/nwpc/ground_snow-depth-water-equivalent_2023102609.grib\", \"filter_by_keys\": {}, \"encode_cf\": [\"parameter\", \"time\", \"geography\", \"vertical\"]}",
"institution": "U.K. Met Office - Exeter"
}
}
},
"coordinates": "latitude longitude"
}
2 changes: 1 addition & 1 deletion tests/test_data/nwp_shell.zarr/.zgroup
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{
"zarr_format": 2
}
}
Loading
Loading