mllam · leifdenby · Aug 20, 2024 · May 22, 2024 · May 22, 2024 · May 22, 2024
diff --git a/.cirun.yml b/.cirun.yml
@@ -0,0 +1,16 @@
+# setup for using github runners via https://cirun.io/
+runners:
+  - name: "aws-runner"
+    # Cloud Provider: AWS
+    cloud: "aws"
+    # https://aws.amazon.com/ec2/instance-types/g4/
+    instance_type: "g4ad.xlarge"
+    # Deep Learning Base OSS Nvidia Driver GPU AMI (Ubuntu 22.04), Frankfurt region
+    machine_image: "ami-0ba41b554b28d24a4"
+    # use Frankfurt region
+    region: "eu-central-1"
+    preemptible: false
+    # Add this label in the "runs-on" param in .github/workflows/<workflow-name>.yml
+    # So that this runner is created for running the workflow
+    labels:
+      - "cirun-aws-runner"
diff --git a/.github/workflows/ci-pdm-install-and-test-cpu.yml b/.github/workflows/ci-pdm-install-and-test-cpu.yml
@@ -0,0 +1,55 @@
+# cicd workflow for running tests with pytest
+# needs to first install pdm, then install torch cpu manually and then install the package
+# then run the tests
+
+name: test (pdm install, cpu)
+
+on: [push, pull_request]
+
+jobs:
+  tests:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Install pdm
+        run: |
+          python -m pip install pdm
+
+      - name: Create venv
+        run: |
+          pdm venv create --with-pip
+          pdm use --venv in-project
+
+      - name: Install torch (CPU)
+        run: |
+          pdm run python -m pip install torch  --index-url https://download.pytorch.org/whl/cpu
+          # check that the CPU version is installed
+
+      - name: Install package (including dev dependencies)
+        run: |
+          pdm install --group :all
+
+      - name: Print and check torch version
+        run: |
+          pdm run python -c "import torch; print(torch.__version__)"
+          pdm run python -c "import torch; assert torch.__version__.endswith('+cpu')"
+
+      - name: Load cache data
+        uses: actions/cache/restore@v4
+        with:
+          path: data
+          key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
+          restore-keys: |
+            ${{ runner.os }}-meps-reduced-example-data-v0.1.0
+
+      - name: Run tests
+        run: |
+          pdm run pytest
+
+      - name: Save cache data
+        uses: actions/cache/save@v4
+        with:
+          path: data
+          key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
diff --git a/.github/workflows/ci-pdm-install-and-test-gpu.yml b/.github/workflows/ci-pdm-install-and-test-gpu.yml
@@ -0,0 +1,60 @@
+# cicd workflow for running tests with pytest
+# needs to first install pdm, then install torch cpu manually and then install the package
+# then run the tests
+
+name: test (pdm install, gpu)
+
+on: [push, pull_request]
+
+jobs:
+  tests:
+    runs-on: "cirun-aws-runner--${{ github.run_id }}"
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Set up Python 3.9
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.9
+
+      - name: Install pdm
+        run: |
+          python -m pip install pdm
+
+      - name: Create venv
+        run: |
+          pdm config venv.in_project False
+          pdm config venv.location /opt/dlami/nvme/venv
+          pdm venv create --with-pip
+
+      - name: Install torch (GPU CUDA 12.1)
+        run: |
+          pdm run python -m pip install torch  --index-url https://download.pytorch.org/whl/cu121
+
+      - name: Print and check torch version
+        run: |
+          pdm run python -c "import torch; print(torch.__version__)"
+          pdm run python -c "import torch; assert not torch.__version__.endswith('+cpu')"
+
+      - name: Install package (including dev dependencies)
+        run: |
+          pdm install --group :all
+
+      - name: Load cache data
+        uses: actions/cache/restore@v4
+        with:
+          path: data
+          key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
+          restore-keys: |
+            ${{ runner.os }}-meps-reduced-example-data-v0.1.0
+
+      - name: Run tests
+        run: |
+          pdm run pytest
+
+      - name: Save cache data
+        uses: actions/cache/save@v4
+        with:
+          path: data
+          key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
diff --git a/.github/workflows/ci-pip-install-and-test-cpu.yml b/.github/workflows/ci-pip-install-and-test-cpu.yml
@@ -0,0 +1,45 @@
+# cicd workflow for running tests with pytest
+# needs to first install pdm, then install torch cpu manually and then install the package
+# then run the tests
+
+name: test (pip install, cpu)
+
+on: [push, pull_request]
+
+jobs:
+  tests:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Install torch (CPU)
+        run: |
+          python -m pip install torch  --index-url https://download.pytorch.org/whl/cpu
+
+      - name: Install package (including dev dependencies)
+        run: |
+          python -m pip install ".[dev]"
+
+      - name: Print and check torch version
+        run: |
+          python -c "import torch; print(torch.__version__)"
+          python -c "import torch; assert torch.__version__.endswith('+cpu')"
+
+      - name: Load cache data
+        uses: actions/cache/restore@v4
+        with:
+          path: data
+          key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
+          restore-keys: |
+            ${{ runner.os }}-meps-reduced-example-data-v0.1.0
+
+      - name: Run tests
+        run: |
+          python -m pytest
+
+      - name: Save cache data
+        uses: actions/cache/save@v4
+        with:
+          path: data
+          key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
diff --git a/.github/workflows/ci-pip-install-and-test-gpu.yml b/.github/workflows/ci-pip-install-and-test-gpu.yml
@@ -0,0 +1,50 @@
+# cicd workflow for running tests with pytest
+# needs to first install pdm, then install torch cpu manually and then install the package
+# then run the tests
+
+name: test (pip install, gpu)
+
+on: [push, pull_request]
+
+jobs:
+  tests:
+    runs-on: "cirun-aws-runner--${{ github.run_id }}"
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Set up Python 3.9
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.9
+
+      - name: Install torch (GPU CUDA 12.1)
+        run: |
+          python -m pip install torch  --index-url https://download.pytorch.org/whl/cu121
+
+      - name: Install package (including dev dependencies)
+        run: |
+          python -m pip install ".[dev]"
+
+      - name: Print and check torch version
+        run: |
+          python -c "import torch; print(torch.__version__)"
+          python -c "import torch; assert not torch.__version__.endswith('+cpu')"
+
+      - name: Load cache data
+        uses: actions/cache/restore@v4
+        with:
+          path: data
+          key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
+          restore-keys: |
+            ${{ runner.os }}-meps-reduced-example-data-v0.1.0
+
+      - name: Run tests
+        run: |
+          python -m pytest
+
+      - name: Save cache data
+        uses: actions/cache/save@v4
+        with:
+          path: data
+          key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
diff --git a/.gitignore b/.gitignore
@@ -75,3 +75,8 @@ tags
 
 # Coc configuration directory
 .vim
+
+# pdm (https://pdm-project.org/en/stable/)
+.pdm-python
+# exclude pdm.lock file so that both cpu and gpu versions of torch will be accepted by pdm
+pdm.lock
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -31,6 +31,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - added github pull-request template to ease contribution and review process
   [\#53](https://github.com/mllam/neural-lam/pull/53), @leifdenby
 
+- ci/cd setup for running both CPU and GPU-based testing both with pdm and pip based installs [\#37](https://github.com/mllam/neural-lam/pull/37), @khintz, @leifdenby
+
 ### Changed
 
   Optional multi-core/GPU support for statistics calculation in `create_parameter_weights.py`
@@ -88,17 +90,24 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   [\#52](https://github.com/mllam/neural-lam/pull/52)
   @joeloskarsson
 
-- Cap numpy version to < 2.0.0
+- Cap numpy version to < 2.0.0 (this cap was removed in #37, see below)
   [\#68](https://github.com/mllam/neural-lam/pull/68)
   @joeloskarsson
 
+- Remove numpy < 2.0.0 version cap
+  [\#37](https://github.com/mllam/neural-lam/pull/37)
+  @leifdenby
+
 - turn `neural-lam` into a python package by moving all `*.py`-files into the
   `neural_lam/` source directory and updating imports accordingly. This means
   all cli functions are now invoke through the package name, e.g. `python -m
   neural_lam.train_model` instead of `python train_model.py` (and can be done
   anywhere once the package has been installed).
   [\#32](https://github.com/mllam/neural-lam/pull/32), @leifdenby
 
+- move from `requirements.txt` to `pyproject.toml` for defining package dependencies.
+  [\#37](https://github.com/mllam/neural-lam/pull/37), @leifdenby
+
 ## [v0.1.0](https://github.com/joeloskarsson/neural-lam/releases/tag/v0.1.0)
 
 First tagged release of `neural-lam`, matching Oskarsson et al 2023 publication

diff --git a/README.md b/README.md
@@ -1,5 +1,6 @@
 ![Linting](https://github.com/mllam/neural-lam/actions/workflows/pre-commit.yml/badge.svg?branch=main)
-![Automatic tests](https://github.com/mllam/neural-lam/actions/workflows/run_tests.yml/badge.svg?branch=main)
+[![test (pdm install, gpu)](https://github.com/mllam/neural-lam/actions/workflows/ci-pdm-install-and-test-gpu.yml/badge.svg)](https://github.com/mllam/neural-lam/actions/workflows/ci-pdm-install-and-test-gpu.yml)
+[![test (pdm install, cpu)](https://github.com/mllam/neural-lam/actions/workflows/ci-pdm-install-and-test-cpu.yml/badge.svg)](https://github.com/mllam/neural-lam/actions/workflows/ci-pdm-install-and-test-cpu.yml)
 
 <p align="middle">
     <img src="figures/neural_lam_header.png" width="700">
@@ -57,15 +58,38 @@ See the issues https://github.com/joeloskarsson/neural-lam/issues/2, https://git
 Below follows instructions on how to use Neural-LAM to train and evaluate models.
 
 ## Installation
-Follow the steps below to create the necessary python environment.
 
-1. Install GEOS for your system. For example with `sudo apt-get install libgeos-dev`. This is necessary for the Cartopy requirement.
-2. Use python 3.9.
-3. Install version 2.0.1 of PyTorch. Follow instructions on the [PyTorch webpage](https://pytorch.org/get-started/previous-versions/) for how to set this up with GPU support on your system.
-4. Install `neural-lam` with pip:
-```
-pip install -e .
-```
+When installing `neural-lam` you have a choice of either installing with
+directly `pip` or using the `pdm` package manager.
+We recommend using `pdm` as it makes it easy to add/remove packages while
+keeping versions consistent (it automatically updates the `pyproject.toml`
+file), makes it easy to handle virtual environments and includes the
+development toolchain packages installation too.
+
+**regarding `torch` installation**: because `torch` creates different package
+variants for different CUDA versions and cpu-only support you will need to install
+`torch` separately if you don't want the most recent GPU variant that also
+expects the most recent version of CUDA on your system.
+
+We cover all the installation options in our [github actions ci/cd
+setup](.github/workflows/) which you can use as a reference.
+
+### Using `pdm`
+
+1. Clone this repository and navigate to the root directory.
+2. Install `pdm` if you don't have it installed on your system (either with `pip install pdm` or [following the install instructions](https://pdm-project.org/latest/#installation)).
+> If you are happy using the latest version of `torch` with GPU support (expecting the latest version of CUDA is installed on your system) you can skip to step 5.
+3. Create a virtual environment for pdm to use with `pdm venv create --with-pip`.
+4. Install a specific version of `torch` with `pdm run python -m pip install torch --index-url https://download.pytorch.org/whl/cpu` for a CPU-only version or `pdm run python -m pip install torch --index-url https://download.pytorch.org/whl/cu111` for CUDA 11.1 support (you can find the correct URL for the variant you want on [PyTorch webpage](https://pytorch.org/get-started/locally/)).
+5. Install the dependencies with `pdm install` (by default this in include the). If you will be developing `neural-lam` we recommend to install the development dependencies with `pdm install --group dev`. By default `pdm` installs the `neural-lam` package in editable mode, so you can make changes to the code and see the effects immediately.
+
+### Using `pip`
+
+1. Clone this repository and navigate to the root directory.
+> If you are happy using the latest version of `torch` with GPU support (expecting the latest version of CUDA is installed on your system) you can skip to step 3.
+2. Install a specific version of `torch` with `python -m pip install torch --index-url https://download.pytorch.org/whl/cpu` for a CPU-only version or `python -m pip install torch --index-url https://download.pytorch.org/whl/cu111` for CUDA 11.1 support (you can find the correct URL for the variant you want on [PyTorch webpage](https://pytorch.org/get-started/locally/)).
+3. Install the dependencies with `python -m pip install .`. If you will be developing `neural-lam` we recommend to install in editable mode and install the development dependencies with `python -m pip install -e ".[dev]"` so you can make changes to the code and see the effects immediately.
+
 
 ## Data
 Datasets should be stored in a directory called `data`.