Move deps to pyproject toml and setup ci/cd cpu+gpu install testing (#37

) Move dependencies from `requirements.txt` to`pyproject.toml`, introduce pdm/pip tests for both CPU and GPU based testing to ensure this new way of defining dependencies works for both CPU and GPU installs (GPU tests running on AWS EC2) and remove cap on numpy version (to make it clear we support `numpy >= 2.0.0`.
mllam · Aug 20, 2024 · 4969f92 · 4969f92
1 parent a54c45f
commit 4969f92
Show file tree

Hide file tree

Showing 12 changed files with 319 additions and 70 deletions.
diff --git a/.cirun.yml b/.cirun.yml
@@ -0,0 +1,16 @@
+# setup for using github runners via https://cirun.io/
+runners:
+  - name: "aws-runner"
+    # Cloud Provider: AWS
+    cloud: "aws"
+    # https://aws.amazon.com/ec2/instance-types/g4/
+    instance_type: "g4ad.xlarge"
+    # Deep Learning Base OSS Nvidia Driver GPU AMI (Ubuntu 22.04), Frankfurt region
+    machine_image: "ami-0ba41b554b28d24a4"
+    # use Frankfurt region
+    region: "eu-central-1"
+    preemptible: false
+    # Add this label in the "runs-on" param in .github/workflows/<workflow-name>.yml
+    # So that this runner is created for running the workflow
+    labels:
+      - "cirun-aws-runner"
diff --git a/.github/workflows/ci-pdm-install-and-test-cpu.yml b/.github/workflows/ci-pdm-install-and-test-cpu.yml
@@ -0,0 +1,55 @@
+# cicd workflow for running tests with pytest
+# needs to first install pdm, then install torch cpu manually and then install the package
+# then run the tests
+
+name: test (pdm install, cpu)
+
+on: [push, pull_request]
+
+jobs:
+  tests:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Install pdm
+        run: |
+          python -m pip install pdm
+
+      - name: Create venv
+        run: |
+          pdm venv create --with-pip
+          pdm use --venv in-project
+
+      - name: Install torch (CPU)
+        run: |
+          pdm run python -m pip install torch  --index-url https://download.pytorch.org/whl/cpu
+          # check that the CPU version is installed
+
+      - name: Install package (including dev dependencies)
+        run: |
+          pdm install --group :all
+
+      - name: Print and check torch version
+        run: |
+          pdm run python -c "import torch; print(torch.__version__)"
+          pdm run python -c "import torch; assert torch.__version__.endswith('+cpu')"
+
+      - name: Load cache data
+        uses: actions/cache/restore@v4
+        with:
+          path: data
+          key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
+          restore-keys: |
+            ${{ runner.os }}-meps-reduced-example-data-v0.1.0
+
+      - name: Run tests
+        run: |
+          pdm run pytest
+
+      - name: Save cache data
+        uses: actions/cache/save@v4
+        with:
+          path: data
+          key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
diff --git a/.github/workflows/ci-pdm-install-and-test-gpu.yml b/.github/workflows/ci-pdm-install-and-test-gpu.yml
@@ -0,0 +1,60 @@
+# cicd workflow for running tests with pytest
+# needs to first install pdm, then install torch cpu manually and then install the package
+# then run the tests
+
+name: test (pdm install, gpu)
+
+on: [push, pull_request]
+
+jobs:
+  tests:
+    runs-on: "cirun-aws-runner--${{ github.run_id }}"
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Set up Python 3.9
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.9
+
+      - name: Install pdm
+        run: |
+          python -m pip install pdm
+
+      - name: Create venv
+        run: |
+          pdm config venv.in_project False
+          pdm config venv.location /opt/dlami/nvme/venv
+          pdm venv create --with-pip
+
+      - name: Install torch (GPU CUDA 12.1)
+        run: |
+          pdm run python -m pip install torch  --index-url https://download.pytorch.org/whl/cu121
+
+      - name: Print and check torch version
+        run: |
+          pdm run python -c "import torch; print(torch.__version__)"
+          pdm run python -c "import torch; assert not torch.__version__.endswith('+cpu')"
+
+      - name: Install package (including dev dependencies)
+        run: |
+          pdm install --group :all
+
+      - name: Load cache data
+        uses: actions/cache/restore@v4
+        with:
+          path: data
+          key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
+          restore-keys: |
+            ${{ runner.os }}-meps-reduced-example-data-v0.1.0
+
+      - name: Run tests
+        run: |
+          pdm run pytest
+
+      - name: Save cache data
+        uses: actions/cache/save@v4
+        with:
+          path: data
+          key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
diff --git a/.github/workflows/ci-pip-install-and-test-cpu.yml b/.github/workflows/ci-pip-install-and-test-cpu.yml
@@ -0,0 +1,45 @@
+# cicd workflow for running tests with pytest
+# needs to first install pdm, then install torch cpu manually and then install the package
+# then run the tests
+
+name: test (pip install, cpu)
+
+on: [push, pull_request]
+
+jobs:
+  tests:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Install torch (CPU)
+        run: |
+          python -m pip install torch  --index-url https://download.pytorch.org/whl/cpu
+
+      - name: Install package (including dev dependencies)
+        run: |
+          python -m pip install ".[dev]"
+
+      - name: Print and check torch version
+        run: |
+          python -c "import torch; print(torch.__version__)"
+          python -c "import torch; assert torch.__version__.endswith('+cpu')"
+
+      - name: Load cache data
+        uses: actions/cache/restore@v4
+        with:
+          path: data
+          key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
+          restore-keys: |
+            ${{ runner.os }}-meps-reduced-example-data-v0.1.0
+
+      - name: Run tests
+        run: |
+          python -m pytest
+
+      - name: Save cache data
+        uses: actions/cache/save@v4
+        with:
+          path: data
+          key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
diff --git a/.github/workflows/ci-pip-install-and-test-gpu.yml b/.github/workflows/ci-pip-install-and-test-gpu.yml
@@ -0,0 +1,50 @@
+# cicd workflow for running tests with pytest
+# needs to first install pdm, then install torch cpu manually and then install the package
+# then run the tests
+
+name: test (pip install, gpu)
+
+on: [push, pull_request]
+
+jobs:
+  tests:
+    runs-on: "cirun-aws-runner--${{ github.run_id }}"
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Set up Python 3.9
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.9
+
+      - name: Install torch (GPU CUDA 12.1)
+        run: |
+          python -m pip install torch  --index-url https://download.pytorch.org/whl/cu121
+
+      - name: Install package (including dev dependencies)
+        run: |
+          python -m pip install ".[dev]"
+
+      - name: Print and check torch version
+        run: |
+          python -c "import torch; print(torch.__version__)"
+          python -c "import torch; assert not torch.__version__.endswith('+cpu')"
+
+      - name: Load cache data
+        uses: actions/cache/restore@v4
+        with:
+          path: data
+          key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
+          restore-keys: |
+            ${{ runner.os }}-meps-reduced-example-data-v0.1.0
+
+      - name: Run tests
+        run: |
+          python -m pytest
+
+      - name: Save cache data
+        uses: actions/cache/save@v4
+        with:
+          path: data
+          key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
diff --git a/.gitignore b/.gitignore
@@ -75,3 +75,8 @@ tags
 
 # Coc configuration directory
 .vim
+
+# pdm (https://pdm-project.org/en/stable/)
+.pdm-python
+# exclude pdm.lock file so that both cpu and gpu versions of torch will be accepted by pdm
+pdm.lock
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -31,6 +31,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - added github pull-request template to ease contribution and review process
   [\#53](https://github.com/mllam/neural-lam/pull/53), @leifdenby
 
+- ci/cd setup for running both CPU and GPU-based testing both with pdm and pip based installs [\#37](https://github.com/mllam/neural-lam/pull/37), @khintz, @leifdenby
+
 ### Changed
 
   Optional multi-core/GPU support for statistics calculation in `create_parameter_weights.py`
@@ -88,17 +90,24 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   [\#52](https://github.com/mllam/neural-lam/pull/52)
   @joeloskarsson
 
-- Cap numpy version to < 2.0.0
+- Cap numpy version to < 2.0.0 (this cap was removed in #37, see below)
   [\#68](https://github.com/mllam/neural-lam/pull/68)
   @joeloskarsson
 
+- Remove numpy < 2.0.0 version cap
+  [\#37](https://github.com/mllam/neural-lam/pull/37)
+  @leifdenby
+
 - turn `neural-lam` into a python package by moving all `*.py`-files into the
   `neural_lam/` source directory and updating imports accordingly. This means
   all cli functions are now invoke through the package name, e.g. `python -m
   neural_lam.train_model` instead of `python train_model.py` (and can be done
   anywhere once the package has been installed).
   [\#32](https://github.com/mllam/neural-lam/pull/32), @leifdenby
 
+- move from `requirements.txt` to `pyproject.toml` for defining package dependencies.
+  [\#37](https://github.com/mllam/neural-lam/pull/37), @leifdenby
+
 ## [v0.1.0](https://github.com/joeloskarsson/neural-lam/releases/tag/v0.1.0)
 
 First tagged release of `neural-lam`, matching Oskarsson et al 2023 publication

diff --git a/README.md b/README.md
@@ -1,5 +1,6 @@
 ![Linting](https://github.com/mllam/neural-lam/actions/workflows/pre-commit.yml/badge.svg?branch=main)
-![Automatic tests](https://github.com/mllam/neural-lam/actions/workflows/run_tests.yml/badge.svg?branch=main)
+[![test (pdm install, gpu)](https://github.com/mllam/neural-lam/actions/workflows/ci-pdm-install-and-test-gpu.yml/badge.svg)](https://github.com/mllam/neural-lam/actions/workflows/ci-pdm-install-and-test-gpu.yml)
+[![test (pdm install, cpu)](https://github.com/mllam/neural-lam/actions/workflows/ci-pdm-install-and-test-cpu.yml/badge.svg)](https://github.com/mllam/neural-lam/actions/workflows/ci-pdm-install-and-test-cpu.yml)
 
 <p align="middle">
     <img src="figures/neural_lam_header.png" width="700">
@@ -57,15 +58,38 @@ See the issues https://github.com/joeloskarsson/neural-lam/issues/2, https://git
 Below follows instructions on how to use Neural-LAM to train and evaluate models.
 
 ## Installation
-Follow the steps below to create the necessary python environment.
 
-1. Install GEOS for your system. For example with `sudo apt-get install libgeos-dev`. This is necessary for the Cartopy requirement.
-2. Use python 3.9.
-3. Install version 2.0.1 of PyTorch. Follow instructions on the [PyTorch webpage](https://pytorch.org/get-started/previous-versions/) for how to set this up with GPU support on your system.
-4. Install `neural-lam` with pip:
-```
-pip install -e .
-```
+When installing `neural-lam` you have a choice of either installing with
+directly `pip` or using the `pdm` package manager.
+We recommend using `pdm` as it makes it easy to add/remove packages while
+keeping versions consistent (it automatically updates the `pyproject.toml`
+file), makes it easy to handle virtual environments and includes the
+development toolchain packages installation too.
+
+**regarding `torch` installation**: because `torch` creates different package
+variants for different CUDA versions and cpu-only support you will need to install
+`torch` separately if you don't want the most recent GPU variant that also
+expects the most recent version of CUDA on your system.
+
+We cover all the installation options in our [github actions ci/cd
+setup](.github/workflows/) which you can use as a reference.
+
+### Using `pdm`
+
+1. Clone this repository and navigate to the root directory.
+2. Install `pdm` if you don't have it installed on your system (either with `pip install pdm` or [following the install instructions](https://pdm-project.org/latest/#installation)).
+> If you are happy using the latest version of `torch` with GPU support (expecting the latest version of CUDA is installed on your system) you can skip to step 5.
+3. Create a virtual environment for pdm to use with `pdm venv create --with-pip`.
+4. Install a specific version of `torch` with `pdm run python -m pip install torch --index-url https://download.pytorch.org/whl/cpu` for a CPU-only version or `pdm run python -m pip install torch --index-url https://download.pytorch.org/whl/cu111` for CUDA 11.1 support (you can find the correct URL for the variant you want on [PyTorch webpage](https://pytorch.org/get-started/locally/)).
+5. Install the dependencies with `pdm install` (by default this in include the). If you will be developing `neural-lam` we recommend to install the development dependencies with `pdm install --group dev`. By default `pdm` installs the `neural-lam` package in editable mode, so you can make changes to the code and see the effects immediately.
+
+### Using `pip`
+
+1. Clone this repository and navigate to the root directory.
+> If you are happy using the latest version of `torch` with GPU support (expecting the latest version of CUDA is installed on your system) you can skip to step 3.
+2. Install a specific version of `torch` with `python -m pip install torch --index-url https://download.pytorch.org/whl/cpu` for a CPU-only version or `python -m pip install torch --index-url https://download.pytorch.org/whl/cu111` for CUDA 11.1 support (you can find the correct URL for the variant you want on [PyTorch webpage](https://pytorch.org/get-started/locally/)).
+3. Install the dependencies with `python -m pip install .`. If you will be developing `neural-lam` we recommend to install in editable mode and install the development dependencies with `python -m pip install -e ".[dev]"` so you can make changes to the code and see the effects immediately.
+
 
 ## Data
 Datasets should be stored in a directory called `data`.