From c8d3553003923b9cd64bfb9a3f7f5ceb1b2c4133 Mon Sep 17 00:00:00 2001 From: Joel Oskarsson Date: Tue, 4 Jun 2024 21:33:52 +0200 Subject: [PATCH 1/3] Fix swapped x and y dimensions in comments and variable names for MEPS data (#52) The x- and y-dimensions for the MEPS data are swapped in comments describing tensor shapes, and also in some variable names. This change swaps from (x, y) ordering to the correct (y, x) ordering. This fixes #46. See the issue for a more clear description. --- CHANGELOG.md | 5 +++++ create_grid_features.py | 6 +++--- neural_lam/weather_dataset.py | 28 ++++++++++++++-------------- 3 files changed, 22 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3544b299..f4680c37 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -76,6 +76,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 [\#47](https://github.com/mllam/neural-lam/pull/47) @joeloskarsson +- Fix incorrect ordering of x- and y-dimensions in comments describing tensor + shapes for MEPS data + [\#52](https://github.com/mllam/neural-lam/pull/52) + @joeloskarsson + ## [v0.1.0](https://github.com/joeloskarsson/neural-lam/releases/tag/v0.1.0) First tagged release of `neural-lam`, matching Oskarsson et al 2023 publication diff --git a/create_grid_features.py b/create_grid_features.py index c3714368..4f058e17 100644 --- a/create_grid_features.py +++ b/create_grid_features.py @@ -29,14 +29,14 @@ def main(): # -- Static grid node features -- grid_xy = torch.tensor( np.load(os.path.join(static_dir_path, "nwp_xy.npy")) - ) # (2, N_x, N_y) + ) # (2, N_y, N_x) grid_xy = grid_xy.flatten(1, 2).T # (N_grid, 2) pos_max = torch.max(torch.abs(grid_xy)) grid_xy = grid_xy / pos_max # Divide by maximum coordinate geopotential = torch.tensor( np.load(os.path.join(static_dir_path, "surface_geopotential.npy")) - ) # (N_x, N_y) + ) # (N_y, N_x) geopotential = geopotential.flatten(0, 1).unsqueeze(1) # (N_grid,1) gp_min = torch.min(geopotential) gp_max = torch.max(geopotential) @@ -46,7 +46,7 @@ def main(): grid_border_mask = torch.tensor( np.load(os.path.join(static_dir_path, "border_mask.npy")), dtype=torch.int64, - ) # (N_x, N_y) + ) # (N_y, N_x) grid_border_mask = ( grid_border_mask.flatten(0, 1).to(torch.float).unsqueeze(1) ) # (N_grid, 1) diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index a782806b..3288ed67 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -16,8 +16,8 @@ class WeatherDataset(torch.utils.data.Dataset): For our dataset: N_t' = 65 N_t = 65//subsample_step (= 21 for 3h steps) - dim_x = 268 - dim_y = 238 + dim_y = 268 + dim_x = 238 N_grid = 268x238 = 63784 d_features = 17 (d_features' = 18) d_forcing = 5 @@ -87,7 +87,7 @@ def __getitem__(self, idx): try: full_sample = torch.tensor( np.load(sample_path), dtype=torch.float32 - ) # (N_t', dim_x, dim_y, d_features') + ) # (N_t', dim_y, dim_x, d_features') except ValueError: print(f"Failed to load {sample_path}") @@ -101,40 +101,40 @@ def __getitem__(self, idx): sample = full_sample[ subsample_index : subsample_end_index : self.subsample_step ] - # (N_t, dim_x, dim_y, d_features') + # (N_t, dim_y, dim_x, d_features') # Remove feature 15, "z_height_above_ground" sample = torch.cat( (sample[:, :, :, :15], sample[:, :, :, 16:]), dim=3 - ) # (N_t, dim_x, dim_y, d_features) + ) # (N_t, dim_y, dim_x, d_features) # Accumulate solar radiation instead of just subsampling - rad_features = full_sample[:, :, :, 2:4] # (N_t', dim_x, dim_y, 2) + rad_features = full_sample[:, :, :, 2:4] # (N_t', dim_y, dim_x, 2) # Accumulate for first time step init_accum_rad = torch.sum( rad_features[: (subsample_index + 1)], dim=0, keepdim=True - ) # (1, dim_x, dim_y, 2) + ) # (1, dim_y, dim_x, 2) # Accumulate for rest of subsampled sequence in_subsample_len = ( subsample_end_index - self.subsample_step + subsample_index + 1 ) rad_features_in_subsample = rad_features[ (subsample_index + 1) : in_subsample_len - ] # (N_t*, dim_x, dim_y, 2), N_t* = (N_t-1)*ss_step - _, dim_x, dim_y, _ = sample.shape + ] # (N_t*, dim_y, dim_x, 2), N_t* = (N_t-1)*ss_step + _, dim_y, dim_x, _ = sample.shape rest_accum_rad = torch.sum( rad_features_in_subsample.view( self.original_sample_length - 1, self.subsample_step, - dim_x, dim_y, + dim_x, 2, ), dim=1, - ) # (N_t-1, dim_x, dim_y, 2) + ) # (N_t-1, dim_y, dim_x, 2) accum_rad = torch.cat( (init_accum_rad, rest_accum_rad), dim=0 - ) # (N_t, dim_x, dim_y, 2) + ) # (N_t, dim_y, dim_x, 2) # Replace in sample sample[:, :, :, 2:4] = accum_rad @@ -168,7 +168,7 @@ def __getitem__(self, idx): np.load(water_path), dtype=torch.float32 ).unsqueeze( -1 - ) # (dim_x, dim_y, 1) + ) # (dim_y, dim_x, 1) # Flatten water_cover_features = water_cover_features.flatten(0, 1) # (N_grid, 1) # Expand over temporal dimension @@ -183,7 +183,7 @@ def __getitem__(self, idx): ) flux = torch.tensor(np.load(flux_path), dtype=torch.float32).unsqueeze( -1 - ) # (N_t', dim_x, dim_y, 1) + ) # (N_t', dim_y, dim_x, 1) if self.standardize: flux = (flux - self.flux_mean) / self.flux_std From 96f193e0ba61438cdf8033f16d8d4d03a2c6aad7 Mon Sep 17 00:00:00 2001 From: SimonKamuk <43374850+SimonKamuk@users.noreply.github.com> Date: Mon, 10 Jun 2024 11:24:58 +0200 Subject: [PATCH 2/3] Run ci/cd tests on push to main (#55) This is a minor change for ci/cd to also run on pushes to main (which then includes the push created when a branch is merged). Also changed ci/cd badges to only look at the main branch. --- .github/workflows/pre-commit.yml | 4 +--- .github/workflows/run_tests.yml | 4 +--- CHANGELOG.md | 4 ++-- README.md | 4 ++-- 4 files changed, 6 insertions(+), 10 deletions(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index dadac50d..ad2b1a9c 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -1,10 +1,8 @@ name: Linting on: - # trigger on pushes to any branch, but not main + # trigger on pushes to any branch push: - branches-ignore: - - main # and also on PRs to main pull_request: branches: diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 71bff3d3..4c677908 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -1,10 +1,8 @@ name: Unit Tests on: - # trigger on pushes to any branch, but not main + # trigger on pushes to any branch push: - branches-ignore: - - main # and also on PRs to main pull_request: branches: diff --git a/CHANGELOG.md b/CHANGELOG.md index f4680c37..d109dcb9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,8 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [unreleased](https://github.com/joeloskarsson/neural-lam/compare/v0.1.0...HEAD) ### Added -- Added tests for loading dataset, creating graph, and training model based on reduced MEPS dataset stored on AWS S3, along with automatic running of tests on push/PR to GitHub. Added caching of test data tp speed up running tests. - [/#38](https://github.com/mllam/neural-lam/pull/38) +- Added tests for loading dataset, creating graph, and training model based on reduced MEPS dataset stored on AWS S3, along with automatic running of tests on push/PR to GitHub, including push to main branch. Added caching of test data to speed up running tests. + [\#38](https://github.com/mllam/neural-lam/pull/38) [\#55](https://github.com/mllam/neural-lam/pull/55) @SimonKamuk - Replaced `constants.py` with `data_config.yaml` for data configuration management diff --git a/README.md b/README.md index 1bdc6602..26d844f7 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -![Linting](https://github.com/mllam/neural-lam/actions/workflows/pre-commit.yml/badge.svg) -![Automatic tests](https://github.com/mllam/neural-lam/actions/workflows/run_tests.yml/badge.svg) +![Linting](https://github.com/mllam/neural-lam/actions/workflows/pre-commit.yml/badge.svg?branch=main) +![Automatic tests](https://github.com/mllam/neural-lam/actions/workflows/run_tests.yml/badge.svg?branch=main)

From 066efe0a77f1ed741d9eb2f303ee920927d79ad2 Mon Sep 17 00:00:00 2001 From: sadamov <45732287+sadamov@users.noreply.github.com> Date: Thu, 13 Jun 2024 08:47:56 +0200 Subject: [PATCH 3/3] Add entry for PR #22 (previously forgotten) (#56) Changelog updated with missing entry for #22 --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d109dcb9..34a8e0e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed + Optional multi-core/GPU support for statistics calculation in `create_parameter_weights.py` + [\#22](https://github.com/mllam/neural-lam/pull/22) + @sadamov + - Robust restoration of optimizer and scheduler using `ckpt_path` [\#17](https://github.com/mllam/neural-lam/pull/17) @sadamov