From 4969f92ad974f136089d15e7e2e2e9d73a43590d Mon Sep 17 00:00:00 2001
From: Leif Denby <leif@denby.eu>
Date: Tue, 20 Aug 2024 14:32:09 +0200
Subject: [PATCH] Move deps to pyproject toml and setup ci/cd cpu+gpu install
 testing (#37)

Move dependencies from `requirements.txt` to`pyproject.toml`, introduce pdm/pip tests for both CPU and GPU based testing to ensure this new way of defining dependencies works for both CPU and GPU installs (GPU tests running on AWS EC2) and remove cap on numpy version (to make it clear we support `numpy >= 2.0.0`.
---
 .cirun.yml                                    | 16 +++++
 .../workflows/ci-pdm-install-and-test-cpu.yml | 55 +++++++++++++++++
 .../workflows/ci-pdm-install-and-test-gpu.yml | 60 +++++++++++++++++++
 .../workflows/ci-pip-install-and-test-cpu.yml | 45 ++++++++++++++
 .../workflows/ci-pip-install-and-test-gpu.yml | 50 ++++++++++++++++
 .github/workflows/run_tests.yml               | 43 -------------
 .gitignore                                    |  5 ++
 CHANGELOG.md                                  | 11 +++-
 README.md                                     | 42 ++++++++++---
 pyproject.toml                                | 37 ++++++++++++
 requirements.txt                              | 17 ------
 tests/test_imports.py                         |  8 +++
 12 files changed, 319 insertions(+), 70 deletions(-)
 create mode 100644 .cirun.yml
 create mode 100644 .github/workflows/ci-pdm-install-and-test-cpu.yml
 create mode 100644 .github/workflows/ci-pdm-install-and-test-gpu.yml
 create mode 100644 .github/workflows/ci-pip-install-and-test-cpu.yml
 create mode 100644 .github/workflows/ci-pip-install-and-test-gpu.yml
 delete mode 100644 .github/workflows/run_tests.yml
 delete mode 100644 requirements.txt
 create mode 100644 tests/test_imports.py
diff --git a/.cirun.yml b/.cirun.yml
new file mode 100644
index 00000000..21b03ab4
--- /dev/null
+++ b/.cirun.yml
@@ -0,0 +1,16 @@
+# setup for using github runners via https://cirun.io/
+runners:
+  - name: "aws-runner"
+    # Cloud Provider: AWS
+    cloud: "aws"
+    # https://aws.amazon.com/ec2/instance-types/g4/
+    instance_type: "g4ad.xlarge"
+    # Deep Learning Base OSS Nvidia Driver GPU AMI (Ubuntu 22.04), Frankfurt region
+    machine_image: "ami-0ba41b554b28d24a4"
+    # use Frankfurt region
+    region: "eu-central-1"
+    preemptible: false
+    # Add this label in the "runs-on" param in .github/workflows/<workflow-name>.yml
+    # So that this runner is created for running the workflow
+    labels:
+      - "cirun-aws-runner"
diff --git a/.github/workflows/ci-pdm-install-and-test-cpu.yml b/.github/workflows/ci-pdm-install-and-test-cpu.yml
new file mode 100644
index 00000000..c5da88cc
--- /dev/null
+++ b/.github/workflows/ci-pdm-install-and-test-cpu.yml
@@ -0,0 +1,55 @@
+# cicd workflow for running tests with pytest
+# needs to first install pdm, then install torch cpu manually and then install the package
+# then run the tests
+
+name: test (pdm install, cpu)
+
+on: [push, pull_request]
+
+jobs:
+  tests:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Install pdm
+        run: |
+          python -m pip install pdm
+
+      - name: Create venv
+        run: |
+          pdm venv create --with-pip
+          pdm use --venv in-project
+
+      - name: Install torch (CPU)
+        run: |
+          pdm run python -m pip install torch  --index-url https://download.pytorch.org/whl/cpu
+          # check that the CPU version is installed
+
+      - name: Install package (including dev dependencies)
+        run: |
+          pdm install --group :all
+
+      - name: Print and check torch version
+        run: |
+          pdm run python -c "import torch; print(torch.__version__)"
+          pdm run python -c "import torch; assert torch.__version__.endswith('+cpu')"
+
+      - name: Load cache data
+        uses: actions/cache/restore@v4
+        with:
+          path: data
+          key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
+          restore-keys: |
+            ${{ runner.os }}-meps-reduced-example-data-v0.1.0
+
+      - name: Run tests
+        run: |
+          pdm run pytest
+
+      - name: Save cache data
+        uses: actions/cache/save@v4
+        with:
+          path: data
+          key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
diff --git a/.github/workflows/ci-pdm-install-and-test-gpu.yml b/.github/workflows/ci-pdm-install-and-test-gpu.yml
new file mode 100644
index 00000000..9ab4f379
--- /dev/null
+++ b/.github/workflows/ci-pdm-install-and-test-gpu.yml
@@ -0,0 +1,60 @@
+# cicd workflow for running tests with pytest
+# needs to first install pdm, then install torch cpu manually and then install the package
+# then run the tests
+
+name: test (pdm install, gpu)
+
+on: [push, pull_request]
+
+jobs:
+  tests:
+    runs-on: "cirun-aws-runner--${{ github.run_id }}"
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Set up Python 3.9
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.9
+
+      - name: Install pdm
+        run: |
+          python -m pip install pdm
+
+      - name: Create venv
+        run: |
+          pdm config venv.in_project False
+          pdm config venv.location /opt/dlami/nvme/venv
+          pdm venv create --with-pip
+
+      - name: Install torch (GPU CUDA 12.1)
+        run: |
+          pdm run python -m pip install torch  --index-url https://download.pytorch.org/whl/cu121
+
+      - name: Print and check torch version
+        run: |
+          pdm run python -c "import torch; print(torch.__version__)"
+          pdm run python -c "import torch; assert not torch.__version__.endswith('+cpu')"
+
+      - name: Install package (including dev dependencies)
+        run: |
+          pdm install --group :all
+
+      - name: Load cache data
+        uses: actions/cache/restore@v4
+        with:
+          path: data
+          key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
+          restore-keys: |
+            ${{ runner.os }}-meps-reduced-example-data-v0.1.0
+
+      - name: Run tests
+        run: |
+          pdm run pytest
+
+      - name: Save cache data
+        uses: actions/cache/save@v4
+        with:
+          path: data
+          key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
diff --git a/.github/workflows/ci-pip-install-and-test-cpu.yml b/.github/workflows/ci-pip-install-and-test-cpu.yml
new file mode 100644
index 00000000..81e402c5
--- /dev/null
+++ b/.github/workflows/ci-pip-install-and-test-cpu.yml
@@ -0,0 +1,45 @@
+# cicd workflow for running tests with pytest
+# needs to first install pdm, then install torch cpu manually and then install the package
+# then run the tests
+
+name: test (pip install, cpu)
+
+on: [push, pull_request]
+
+jobs:
+  tests:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Install torch (CPU)
+        run: |
+          python -m pip install torch  --index-url https://download.pytorch.org/whl/cpu
+
+      - name: Install package (including dev dependencies)
+        run: |
+          python -m pip install ".[dev]"
+
+      - name: Print and check torch version
+        run: |
+          python -c "import torch; print(torch.__version__)"
+          python -c "import torch; assert torch.__version__.endswith('+cpu')"
+
+      - name: Load cache data
+        uses: actions/cache/restore@v4
+        with:
+          path: data
+          key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
+          restore-keys: |
+            ${{ runner.os }}-meps-reduced-example-data-v0.1.0
+
+      - name: Run tests
+        run: |
+          python -m pytest
+
+      - name: Save cache data
+        uses: actions/cache/save@v4
+        with:
+          path: data
+          key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
diff --git a/.github/workflows/ci-pip-install-and-test-gpu.yml b/.github/workflows/ci-pip-install-and-test-gpu.yml
new file mode 100644
index 00000000..ce68946a
--- /dev/null
+++ b/.github/workflows/ci-pip-install-and-test-gpu.yml
@@ -0,0 +1,50 @@
+# cicd workflow for running tests with pytest
+# needs to first install pdm, then install torch cpu manually and then install the package
+# then run the tests
+
+name: test (pip install, gpu)
+
+on: [push, pull_request]
+
+jobs:
+  tests:
+    runs-on: "cirun-aws-runner--${{ github.run_id }}"
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Set up Python 3.9
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.9
+
+      - name: Install torch (GPU CUDA 12.1)
+        run: |
+          python -m pip install torch  --index-url https://download.pytorch.org/whl/cu121
+
+      - name: Install package (including dev dependencies)
+        run: |
+          python -m pip install ".[dev]"
+
+      - name: Print and check torch version
+        run: |
+          python -c "import torch; print(torch.__version__)"
+          python -c "import torch; assert not torch.__version__.endswith('+cpu')"
+
+      - name: Load cache data
+        uses: actions/cache/restore@v4
+        with:
+          path: data
+          key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
+          restore-keys: |
+            ${{ runner.os }}-meps-reduced-example-data-v0.1.0
+
+      - name: Run tests
+        run: |
+          python -m pytest
+
+      - name: Save cache data
+        uses: actions/cache/save@v4
+        with:
+          path: data
+          key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
deleted file mode 100644
index 810f2b2c..00000000
--- a/.github/workflows/run_tests.yml
+++ /dev/null
@@ -1,43 +0,0 @@
-name: Unit Tests
-
-on:
-  # trigger on pushes to any branch
-  push:
-  # and also on PRs to main
-  pull_request:
-    branches:
-      - main
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
-
-    steps:
-      - uses: actions/checkout@v3
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
-          pip install torch-geometric>=2.5.2
-      - name: Load cache data
-        uses: actions/cache/restore@v4
-        with:
-          path: data
-          key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
-          restore-keys: |
-            ${{ runner.os }}-meps-reduced-example-data-v0.1.0
-      - name: Test with pytest
-        run: |
-          python -m pytest -v -s tests/
-      - name: Save cache data
-        uses: actions/cache/save@v4
-        with:
-          path: data
-          key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
diff --git a/.gitignore b/.gitignore
index 65e9f6f8..022206f5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -75,3 +75,8 @@ tags
 
 # Coc configuration directory
 .vim
+
+# pdm (https://pdm-project.org/en/stable/)
+.pdm-python
+# exclude pdm.lock file so that both cpu and gpu versions of torch will be accepted by pdm
+pdm.lock
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c183888e..f7c5cd63 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -31,6 +31,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - added github pull-request template to ease contribution and review process
   [\#53](https://github.com/mllam/neural-lam/pull/53), @leifdenby
 
+- ci/cd setup for running both CPU and GPU-based testing both with pdm and pip based installs [\#37](https://github.com/mllam/neural-lam/pull/37), @khintz, @leifdenby
+
 ### Changed
 
   Optional multi-core/GPU support for statistics calculation in `create_parameter_weights.py`
@@ -88,10 +90,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   [\#52](https://github.com/mllam/neural-lam/pull/52)
   @joeloskarsson
 
-- Cap numpy version to < 2.0.0
+- Cap numpy version to < 2.0.0 (this cap was removed in #37, see below)
   [\#68](https://github.com/mllam/neural-lam/pull/68)
   @joeloskarsson
 
+- Remove numpy < 2.0.0 version cap
+  [\#37](https://github.com/mllam/neural-lam/pull/37)
+  @leifdenby
+
 - turn `neural-lam` into a python package by moving all `*.py`-files into the
   `neural_lam/` source directory and updating imports accordingly. This means
   all cli functions are now invoke through the package name, e.g. `python -m
@@ -99,6 +105,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   anywhere once the package has been installed).
   [\#32](https://github.com/mllam/neural-lam/pull/32), @leifdenby
 
+- move from `requirements.txt` to `pyproject.toml` for defining package dependencies.
+  [\#37](https://github.com/mllam/neural-lam/pull/37), @leifdenby
+
 ## [v0.1.0](https://github.com/joeloskarsson/neural-lam/releases/tag/v0.1.0)
 
 First tagged release of `neural-lam`, matching Oskarsson et al 2023 publication
diff --git a/README.md b/README.md
index ce8daf69..7dc6c7ab 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,6 @@
 ![Linting](https://github.com/mllam/neural-lam/actions/workflows/pre-commit.yml/badge.svg?branch=main)
-![Automatic tests](https://github.com/mllam/neural-lam/actions/workflows/run_tests.yml/badge.svg?branch=main)
+[![test (pdm install, gpu)](https://github.com/mllam/neural-lam/actions/workflows/ci-pdm-install-and-test-gpu.yml/badge.svg)](https://github.com/mllam/neural-lam/actions/workflows/ci-pdm-install-and-test-gpu.yml)
+[![test (pdm install, cpu)](https://github.com/mllam/neural-lam/actions/workflows/ci-pdm-install-and-test-cpu.yml/badge.svg)](https://github.com/mllam/neural-lam/actions/workflows/ci-pdm-install-and-test-cpu.yml)
 
 <p align="middle">
     <img src="figures/neural_lam_header.png" width="700">
@@ -57,15 +58,38 @@ See the issues https://github.com/joeloskarsson/neural-lam/issues/2, https://git
 Below follows instructions on how to use Neural-LAM to train and evaluate models.
 
 ## Installation
-Follow the steps below to create the necessary python environment.
 
-1. Install GEOS for your system. For example with `sudo apt-get install libgeos-dev`. This is necessary for the Cartopy requirement.
-2. Use python 3.9.
-3. Install version 2.0.1 of PyTorch. Follow instructions on the [PyTorch webpage](https://pytorch.org/get-started/previous-versions/) for how to set this up with GPU support on your system.
-4. Install `neural-lam` with pip:
-```
-pip install -e .
-```
+When installing `neural-lam` you have a choice of either installing with
+directly `pip` or using the `pdm` package manager.
+We recommend using `pdm` as it makes it easy to add/remove packages while
+keeping versions consistent (it automatically updates the `pyproject.toml`
+file), makes it easy to handle virtual environments and includes the
+development toolchain packages installation too.
+
+**regarding `torch` installation**: because `torch` creates different package
+variants for different CUDA versions and cpu-only support you will need to install
+`torch` separately if you don't want the most recent GPU variant that also
+expects the most recent version of CUDA on your system.
+
+We cover all the installation options in our [github actions ci/cd
+setup](.github/workflows/) which you can use as a reference.
+
+### Using `pdm`
+
+1. Clone this repository and navigate to the root directory.
+2. Install `pdm` if you don't have it installed on your system (either with `pip install pdm` or [following the install instructions](https://pdm-project.org/latest/#installation)).
+> If you are happy using the latest version of `torch` with GPU support (expecting the latest version of CUDA is installed on your system) you can skip to step 5.
+3. Create a virtual environment for pdm to use with `pdm venv create --with-pip`.
+4. Install a specific version of `torch` with `pdm run python -m pip install torch --index-url https://download.pytorch.org/whl/cpu` for a CPU-only version or `pdm run python -m pip install torch --index-url https://download.pytorch.org/whl/cu111` for CUDA 11.1 support (you can find the correct URL for the variant you want on [PyTorch webpage](https://pytorch.org/get-started/locally/)).
+5. Install the dependencies with `pdm install` (by default this in include the). If you will be developing `neural-lam` we recommend to install the development dependencies with `pdm install --group dev`. By default `pdm` installs the `neural-lam` package in editable mode, so you can make changes to the code and see the effects immediately.
+
+### Using `pip`
+
+1. Clone this repository and navigate to the root directory.
+> If you are happy using the latest version of `torch` with GPU support (expecting the latest version of CUDA is installed on your system) you can skip to step 3.
+2. Install a specific version of `torch` with `python -m pip install torch --index-url https://download.pytorch.org/whl/cpu` for a CPU-only version or `python -m pip install torch --index-url https://download.pytorch.org/whl/cu111` for CUDA 11.1 support (you can find the correct URL for the variant you want on [PyTorch webpage](https://pytorch.org/get-started/locally/)).
+3. Install the dependencies with `python -m pip install .`. If you will be developing `neural-lam` we recommend to install in editable mode and install the development dependencies with `python -m pip install -e ".[dev]"` so you can make changes to the code and see the effects immediately.
+
 
 ## Data
 Datasets should be stored in a directory called `data`.
diff --git a/pyproject.toml b/pyproject.toml
index c482abc9..d66c0087 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,38 @@
 [project]
 name = "neural-lam"
 version = "0.1.0"
+description = "LAM-based data-driven forecasting"
+authors = [
+    {name = "Joel Oskarsson", email = "joel.oskarsson@liu.se"},
+    {name = "Simon Adamov", email = "Simon.Adamov@meteoswiss.ch"},
+    {name = "Leif Denby", email = "lcd@dmi.dk"},
+]
+
+# PEP 621 project metadata
+# See https://www.python.org/dev/peps/pep-0621/
+dependencies = [
+    "numpy>=1.24.2",
+    "wandb>=0.13.10",
+    "scipy>=1.10.0",
+    "pytorch-lightning>=2.0.3",
+    "shapely>=2.0.1",
+    "networkx>=3.0",
+    "Cartopy>=0.22.0",
+    "pyproj>=3.4.1",
+    "tueplots>=0.0.8",
+    "matplotlib>=3.7.0",
+    "plotly>=5.15.0",
+    "torch>=2.3.0",
+    "torch-geometric==2.3.1",
+]
+requires-python = ">=3.9"
 
+[project.optional-dependencies]
+dev = [
+    "pre-commit>=3.8.0",
+    "pytest>=8.3.2",
+    "pooch>=1.8.2",
+]
 [tool.setuptools]
 py-modules = ["neural_lam"]
 
@@ -70,3 +101,9 @@ max-statements=100 # Allow for some more involved functions
 allow-any-import-level="neural_lam"
 [tool.pylint.SIMILARITIES]
 min-similarity-lines=10
+
+
+[tool.pdm]
+[build-system]
+requires = ["pdm-backend"]
+build-backend = "pdm.backend"
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 6bcf304d..00000000
--- a/requirements.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-# for all
-numpy>=1.24.2, <2.0.0
-wandb>=0.13.10
-matplotlib>=3.7.0
-scipy>=1.10.0
-pytorch-lightning>=2.0.3
-shapely>=2.0.1
-networkx>=3.0
-Cartopy>=0.22.0
-pyproj>=3.4.1
-tueplots>=0.0.8
-plotly>=5.15.0
-
-# for dev
-pre-commit>=2.15.0
-pytest>=8.1.1
-pooch>=1.8.1
diff --git a/tests/test_imports.py b/tests/test_imports.py
new file mode 100644
index 00000000..e7bbd356
--- /dev/null
+++ b/tests/test_imports.py
@@ -0,0 +1,8 @@
+# First-party
+import neural_lam
+import neural_lam.vis
+
+
+def test_import():
+    assert neural_lam is not None
+    assert neural_lam.vis is not None