diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index dadac50d..ad2b1a9c 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -1,10 +1,8 @@
 name: Linting
 
 on:
-  # trigger on pushes to any branch, but not main
+  # trigger on pushes to any branch
   push:
-    branches-ignore:
-      - main
   # and also on PRs to main
   pull_request:
     branches:
diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
index 71bff3d3..4c677908 100644
--- a/.github/workflows/run_tests.yml
+++ b/.github/workflows/run_tests.yml
@@ -1,10 +1,8 @@
 name: Unit Tests
 
 on:
-  # trigger on pushes to any branch, but not main
+  # trigger on pushes to any branch
   push:
-    branches-ignore:
-      - main
   # and also on PRs to main
   pull_request:
     branches:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3544b299..34a8e0e4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,8 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [unreleased](https://github.com/joeloskarsson/neural-lam/compare/v0.1.0...HEAD)
 
 ### Added
-- Added tests for loading dataset, creating graph, and training model based on reduced MEPS dataset stored on AWS S3, along with automatic running of tests on push/PR to GitHub. Added caching of test data tp speed up running tests.
-  [/#38](https://github.com/mllam/neural-lam/pull/38)
+- Added tests for loading dataset, creating graph, and training model based on reduced MEPS dataset stored on AWS S3, along with automatic running of tests on push/PR to GitHub, including push to main branch. Added caching of test data to speed up running tests.
+  [\#38](https://github.com/mllam/neural-lam/pull/38) [\#55](https://github.com/mllam/neural-lam/pull/55)
   @SimonKamuk
 
 - Replaced `constants.py` with `data_config.yaml` for data configuration management
@@ -30,6 +30,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Changed
 
+  Optional multi-core/GPU support for statistics calculation in `create_parameter_weights.py`
+  [\#22](https://github.com/mllam/neural-lam/pull/22)
+  @sadamov
+
 - Robust restoration of optimizer and scheduler using `ckpt_path`
   [\#17](https://github.com/mllam/neural-lam/pull/17)
   @sadamov
@@ -76,6 +80,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   [\#47](https://github.com/mllam/neural-lam/pull/47)
   @joeloskarsson
 
+- Fix incorrect ordering of x- and y-dimensions in comments describing tensor
+  shapes for MEPS data
+  [\#52](https://github.com/mllam/neural-lam/pull/52)
+  @joeloskarsson
+
 ## [v0.1.0](https://github.com/joeloskarsson/neural-lam/releases/tag/v0.1.0)
 
 First tagged release of `neural-lam`, matching Oskarsson et al 2023 publication
diff --git a/README.md b/README.md
index 1bdc6602..26d844f7 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
-![Linting](https://github.com/mllam/neural-lam/actions/workflows/pre-commit.yml/badge.svg)
-![Automatic tests](https://github.com/mllam/neural-lam/actions/workflows/run_tests.yml/badge.svg)
+![Linting](https://github.com/mllam/neural-lam/actions/workflows/pre-commit.yml/badge.svg?branch=main)
+![Automatic tests](https://github.com/mllam/neural-lam/actions/workflows/run_tests.yml/badge.svg?branch=main)
 
 <p align="middle">
     <img src="figures/neural_lam_header.png" width="700">
diff --git a/create_grid_features.py b/create_grid_features.py
index c3714368..4f058e17 100644
--- a/create_grid_features.py
+++ b/create_grid_features.py
@@ -29,14 +29,14 @@ def main():
     # -- Static grid node features --
     grid_xy = torch.tensor(
         np.load(os.path.join(static_dir_path, "nwp_xy.npy"))
-    )  # (2, N_x, N_y)
+    )  # (2, N_y, N_x)
     grid_xy = grid_xy.flatten(1, 2).T  # (N_grid, 2)
     pos_max = torch.max(torch.abs(grid_xy))
     grid_xy = grid_xy / pos_max  # Divide by maximum coordinate
 
     geopotential = torch.tensor(
         np.load(os.path.join(static_dir_path, "surface_geopotential.npy"))
-    )  # (N_x, N_y)
+    )  # (N_y, N_x)
     geopotential = geopotential.flatten(0, 1).unsqueeze(1)  # (N_grid,1)
     gp_min = torch.min(geopotential)
     gp_max = torch.max(geopotential)
@@ -46,7 +46,7 @@ def main():
     grid_border_mask = torch.tensor(
         np.load(os.path.join(static_dir_path, "border_mask.npy")),
         dtype=torch.int64,
-    )  # (N_x, N_y)
+    )  # (N_y, N_x)
     grid_border_mask = (
         grid_border_mask.flatten(0, 1).to(torch.float).unsqueeze(1)
     )  # (N_grid, 1)
diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py
index a782806b..3288ed67 100644
--- a/neural_lam/weather_dataset.py
+++ b/neural_lam/weather_dataset.py
@@ -16,8 +16,8 @@ class WeatherDataset(torch.utils.data.Dataset):
     For our dataset:
     N_t' = 65
     N_t = 65//subsample_step (= 21 for 3h steps)
-    dim_x = 268
-    dim_y = 238
+    dim_y = 268
+    dim_x = 238
     N_grid = 268x238 = 63784
     d_features = 17 (d_features' = 18)
     d_forcing = 5
@@ -87,7 +87,7 @@ def __getitem__(self, idx):
         try:
             full_sample = torch.tensor(
                 np.load(sample_path), dtype=torch.float32
-            )  # (N_t', dim_x, dim_y, d_features')
+            )  # (N_t', dim_y, dim_x, d_features')
         except ValueError:
             print(f"Failed to load {sample_path}")
 
@@ -101,40 +101,40 @@ def __getitem__(self, idx):
         sample = full_sample[
             subsample_index : subsample_end_index : self.subsample_step
         ]
-        # (N_t, dim_x, dim_y, d_features')
+        # (N_t, dim_y, dim_x, d_features')
 
         # Remove feature 15, "z_height_above_ground"
         sample = torch.cat(
             (sample[:, :, :, :15], sample[:, :, :, 16:]), dim=3
-        )  # (N_t, dim_x, dim_y, d_features)
+        )  # (N_t, dim_y, dim_x, d_features)
 
         # Accumulate solar radiation instead of just subsampling
-        rad_features = full_sample[:, :, :, 2:4]  # (N_t', dim_x, dim_y, 2)
+        rad_features = full_sample[:, :, :, 2:4]  # (N_t', dim_y, dim_x, 2)
         # Accumulate for first time step
         init_accum_rad = torch.sum(
             rad_features[: (subsample_index + 1)], dim=0, keepdim=True
-        )  # (1, dim_x, dim_y, 2)
+        )  # (1, dim_y, dim_x, 2)
         # Accumulate for rest of subsampled sequence
         in_subsample_len = (
             subsample_end_index - self.subsample_step + subsample_index + 1
         )
         rad_features_in_subsample = rad_features[
             (subsample_index + 1) : in_subsample_len
-        ]  # (N_t*, dim_x, dim_y, 2), N_t* = (N_t-1)*ss_step
-        _, dim_x, dim_y, _ = sample.shape
+        ]  # (N_t*, dim_y, dim_x, 2), N_t* = (N_t-1)*ss_step
+        _, dim_y, dim_x, _ = sample.shape
         rest_accum_rad = torch.sum(
             rad_features_in_subsample.view(
                 self.original_sample_length - 1,
                 self.subsample_step,
-                dim_x,
                 dim_y,
+                dim_x,
                 2,
             ),
             dim=1,
-        )  # (N_t-1, dim_x, dim_y, 2)
+        )  # (N_t-1, dim_y, dim_x, 2)
         accum_rad = torch.cat(
             (init_accum_rad, rest_accum_rad), dim=0
-        )  # (N_t, dim_x, dim_y, 2)
+        )  # (N_t, dim_y, dim_x, 2)
         # Replace in sample
         sample[:, :, :, 2:4] = accum_rad
 
@@ -168,7 +168,7 @@ def __getitem__(self, idx):
             np.load(water_path), dtype=torch.float32
         ).unsqueeze(
             -1
-        )  # (dim_x, dim_y, 1)
+        )  # (dim_y, dim_x, 1)
         # Flatten
         water_cover_features = water_cover_features.flatten(0, 1)  # (N_grid, 1)
         # Expand over temporal dimension
@@ -183,7 +183,7 @@ def __getitem__(self, idx):
         )
         flux = torch.tensor(np.load(flux_path), dtype=torch.float32).unsqueeze(
             -1
-        )  # (N_t', dim_x, dim_y, 1)
+        )  # (N_t', dim_y, dim_x, 1)
 
         if self.standardize:
             flux = (flux - self.flux_mean) / self.flux_std