diff --git a/.github/workflows/conda-pytest.yaml b/.github/workflows/conda-pytest.yaml new file mode 100644 index 0000000..60fe551 --- /dev/null +++ b/.github/workflows/conda-pytest.yaml @@ -0,0 +1,32 @@ +name: Conda pytest +on: push + +jobs: + push_to_registry: + name: Build docker file for testing + runs-on: ubuntu-latest + steps: + - name: Check out the repo + uses: actions/checkout@v2 + + - uses: conda-incubator/setup-miniconda@v2 + with: + miniconda-version: "latest" + channels: bioconda, conda-forge, defaults + auto-update-conda: true + auto-activate-base: true + + - name: Install the conda + shell: bash -l {0} + run: | + conda install -c conda-forge xesmf esmpy -y + + - name: Install the repo + shell: bash -l {0} + run: | + pip install -e . + + - name: run pytest + shell: bash -l {0} + run: | + python -m pytest diff --git a/.github/workflows/workflows.yaml b/.github/workflows/workflows.yaml deleted file mode 100644 index c412050..0000000 --- a/.github/workflows/workflows.yaml +++ /dev/null @@ -1,18 +0,0 @@ -name: Python package tests - -on: - push: - schedule: - - cron: "0 12 * * 1" -jobs: - call-run-python-tests: - uses: openclimatefix/.github/.github/workflows/python-test.yml@main - with: - # 0 means don't use pytest-xdist - pytest_numcpus: "4" - # pytest-cov looks at this folder - pytest_cov_dir: "pvnet_app" - # extra things to install - sudo_apt_install: "libgeos++-dev libproj-dev proj-data proj-bin" - # brew_install: "proj geos librttopo" - os_list: '["ubuntu-latest"]' diff --git a/Dockerfile b/Dockerfile index cb6a6c4..3f39349 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,40 +1,36 @@ -FROM python:3.10-slim +FROM continuumio/miniconda3 ARG TESTING=0 -# make sure it doesnt fail if the docker file doesnt know the git commit -ARG GIT_PYTHON_REFRESH=quiet +SHELL ["/bin/bash", "-l", "-c"] RUN apt-get update RUN apt-get install git -y RUN apt-get install g++ gcc libgeos++-dev libproj-dev proj-data proj-bin -y -# copy files +# Copy files COPY setup.py app/setup.py COPY README.md app/README.md COPY requirements.txt app/requirements.txt -RUN pip install git+https://github.com/SheffieldSolar/PV_Live-API#pvlive_api - - -# install requirements -RUN pip install torch --index-url https://download.pytorch.org/whl/cpu -RUN pip install -r app/requirements.txt - -# copy library files COPY pvnet_app/ app/pvnet_app/ COPY tests/ app/tests/ COPY scripts/ app/scripts/ +COPY data/ app/data/ -# change to app folder +# Install requirements +RUN conda install python=3.10 +RUN conda install -c conda-forge xesmf esmpy -y +RUN echo "export ESMFMKFILE='/opt/conda/lib/esmf.mk'" >> ~/.bashrc +RUN pip install torch --index-url https://download.pytorch.org/whl/cpu +RUN pip install -r app/requirements.txt +RUN pip install git+https://github.com/SheffieldSolar/PV_Live-API#pvlive_api + +# Change to app folder WORKDIR /app -# install library +# Install library RUN pip install -e . -# download models so app can used cached -RUN python scripts/cache_default_models.py - - RUN if [ "$TESTING" = 1 ]; then pip install pytest pytest-cov coverage; fi -CMD ["python", "-u","pvnet_app/app.py"] +CMD ["python", "-u","pvnet_app/app.py"] \ No newline at end of file diff --git a/data/nwp_target_coords.nc b/data/nwp_target_coords.nc new file mode 100644 index 0000000..e629e2a Binary files /dev/null and b/data/nwp_target_coords.nc differ diff --git a/pvnet_app/app.py b/pvnet_app/app.py index faa297e..3f9f35e 100644 --- a/pvnet_app/app.py +++ b/pvnet_app/app.py @@ -19,6 +19,7 @@ import torch import typer import xarray as xr +import xesmf as xe from nowcasting_datamodel.connection import DatabaseConnection from nowcasting_datamodel.models import ( ForecastSQL, @@ -102,6 +103,42 @@ # --------------------------------------------------------------------------- # HELPER FUNCTIONS +def regrid_nwp_data(nwp_path): + """This function loads the NWP data, then regrids and saves it back out if the data is not on + the same grid as expected. The data is resaved in-place. + """ + ds_raw = xr.open_zarr(nwp_path) + + # These are the coords we are aiming for + ds_target_coords = xr.load_dataset(f"{this_dir}/../data/nwp_target_coords.nc") + + # Check if regridding step needs to be done + needs_regridding = not ( + ds_raw.latitude.equals(ds_target_coords.latitude) and + ds_raw.longitude.equals(ds_target_coords.longitude) + + ) + + if not needs_regridding: + logger.info("No NWP regridding required - skipping this step") + return + + logger.info("Regridding NWP to expected grid") + # Its more efficient to regrid eagerly + ds_raw = ds_raw.compute() + + # Regrid + regridder = xe.Regridder(ds_raw, ds_target_coords, method="bilinear") + ds_regridded = regridder(ds_raw) + + # Re-save - including rechunking + os.system(f"rm -fr {nwp_path}") + ds_regridded["variable"] = ds_regridded["variable"].astype(str) + ds_regridded.chunk(dict(step=12, x=100, y=100)).to_zarr(nwp_path) + + return + + def populate_data_config_sources(input_path, output_path): """Resave the data config and replace the source filepaths @@ -319,6 +356,9 @@ def app( fs = fsspec.open(os.environ["NWP_ZARR_PATH"]).fs fs.get(os.environ["NWP_ZARR_PATH"], "nwp.zarr", recursive=True) + # Regrid the nwp data if needed + regrid_nwp_data("nwp.zarr") + # --------------------------------------------------------------------------- # 2. Set up data loader logger.info("Creating DataLoader") diff --git a/requirements.txt b/requirements.txt index a6879ec..e0e253a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,10 +7,11 @@ fsspec[s3] xarray zarr numpy -pandas==2.0 +pandas sqlalchemy torchdata pytest pytest-cov typer testcontainers +xesmf diff --git a/tests/conftest.py b/tests/conftest.py index 52508fe..23f1fe2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -88,7 +88,7 @@ def nwp_data(): f"{os.path.dirname(os.path.abspath(__file__))}/test_data/nwp_shell.zarr" ) - # Last init time was at least 2 hours ago and hour to 3-hour interval + # Last init time was at least 2 hours ago and floor to 3-hour interval t0_datetime_utc = time_before_present(timedelta(hours=2)).floor(timedelta(hours=3)) ds.init_time.values[:] = pd.date_range( t0_datetime_utc - timedelta(hours=3 * (len(ds.init_time) - 1)), @@ -104,17 +104,16 @@ def nwp_data(): for v in list(ds.variables.keys()): if ds[v].dtype == object: ds[v].encoding.clear() - + # Add data to dataset ds["UKV"] = xr.DataArray( - np.zeros([len(ds[c]) for c in ds.coords]), - coords=ds.coords, + np.zeros([len(ds[c]) for c in ds.xindexes]), + coords=[ds[c] for c in ds.xindexes], ) # Add stored attributes to DataArray ds.UKV.attrs = ds.attrs["_data_attrs"] del ds.attrs["_data_attrs"] - return ds diff --git a/tests/test_data/nwp_shell.zarr/.zattrs b/tests/test_data/nwp_shell.zarr/.zattrs index 87da0a2..91d151a 100644 --- a/tests/test_data/nwp_shell.zarr/.zattrs +++ b/tests/test_data/nwp_shell.zarr/.zattrs @@ -4,7 +4,9 @@ "GRIB_centre": "egrr", "GRIB_centreDescription": "U.K. Met Office - Exeter", "GRIB_edition": 2, - "GRIB_subCentre": 0, + "GRIB_subCentre": 5, + "history": "2023-10-26T12:56 GRIB to CDM+CF via cfgrib-0.9.10.3/ecCodes-2.31.0 with {\"source\": \"../tmp/nwpc/ground_snow-depth-water-equivalent_2023102609.grib\", \"filter_by_keys\": {}, \"encode_cf\": [\"parameter\", \"time\", \"geography\", \"vertical\"]}", "institution": "U.K. Met Office - Exeter" - } -} + }, + "coordinates": "latitude longitude" +} \ No newline at end of file diff --git a/tests/test_data/nwp_shell.zarr/.zgroup b/tests/test_data/nwp_shell.zarr/.zgroup index 3f3fad2..3b7daf2 100644 --- a/tests/test_data/nwp_shell.zarr/.zgroup +++ b/tests/test_data/nwp_shell.zarr/.zgroup @@ -1,3 +1,3 @@ { "zarr_format": 2 -} +} \ No newline at end of file diff --git a/tests/test_data/nwp_shell.zarr/.zmetadata b/tests/test_data/nwp_shell.zarr/.zmetadata index 1409a58..27f0a35 100644 --- a/tests/test_data/nwp_shell.zarr/.zmetadata +++ b/tests/test_data/nwp_shell.zarr/.zmetadata @@ -6,9 +6,11 @@ "GRIB_centre": "egrr", "GRIB_centreDescription": "U.K. Met Office - Exeter", "GRIB_edition": 2, - "GRIB_subCentre": 0, + "GRIB_subCentre": 5, + "history": "2023-10-26T12:56 GRIB to CDM+CF via cfgrib-0.9.10.3/ecCodes-2.31.0 with {\"source\": \"../tmp/nwpc/ground_snow-depth-water-equivalent_2023102609.grib\", \"filter_by_keys\": {}, \"encode_cf\": [\"parameter\", \"time\", \"geography\", \"vertical\"]}", "institution": "U.K. Met Office - Exeter" - } + }, + "coordinates": "latitude longitude" }, ".zgroup": { "zarr_format": 2 @@ -29,7 +31,7 @@ "filters": null, "order": "C", "shape": [ - 2 + 1 ], "zarr_format": 2 }, @@ -42,9 +44,71 @@ "standard_name": "forecast_reference_time", "units": "nanoseconds since 1970-01-01" }, + "latitude/.zarray": { + "chunks": [ + 319, + 227 + ], + "compressor": { + "blocksize": 0, + "clevel": 5, + "cname": "lz4", + "id": "blosc", + "shuffle": 1 + }, + "dtype": "