Skip to content

Commit

Permalink
Add basic template files (#2)
Browse files Browse the repository at this point in the history
* Add template files

* add gitignore

* update submod

* Update readme

* add common files

* relocate scripts

* add component scripts

* add placeholder components

* Update Readme

* Add CI

* update to viash 0.9.0

* Add shebang

* remove .functionality

* Update _viash

* Update readme

* fix configs for viash 0.9

* WIP instructions

* update readme

* Update readme with alert syntax

* remove config mods

* update CI test

* generalise docs

* Update component files

* add test

* update _viash

* relocate github actions

* fix CI fail

* fix config error

* test fix
  • Loading branch information
KaiWaldrant authored Jun 21, 2024
1 parent 1bb4888 commit e2075ce
Show file tree
Hide file tree
Showing 29 changed files with 996 additions and 3 deletions.
122 changes: 122 additions & 0 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
name: build

on:
push:
branches: [ 'main' ]
workflow_dispatch:
inputs:
target_branch:
description: 'Branch to deploy to. If not specified, `build-${BRANCH_NAME}` will be used.'
required: false
version:
description: 'Version name to use for the build. If not specified, `build-${BRANCH_NAME}` will be used.'
required: false

jobs:
# phase 1
list:
runs-on: ubuntu-latest

outputs:
target_branch: ${{ steps.defaults.outputs.target_branch }}
version: ${{ steps.defaults.outputs.version }}
component_matrix: ${{ steps.set_matrix.outputs.matrix }}

steps:
- uses: actions/checkout@v4
with:
submodules: 'recursive'

- uses: viash-io/viash-actions/setup@v5

- name: Determine version tag from branch name
id: defaults
run: |
BRANCH_NAME=$(echo $GITHUB_REF | sed 's/refs\/heads\///')
VERSION=${{ github.event.inputs.version }}
if [ -z "$VERSION" ]; then
VERSION="build-$BRANCH_NAME"
fi
echo "version=$VERSION" >> $GITHUB_OUTPUT
TARGET_BRANCH=${{ github.event.inputs.target_branch }}
if [ -z "$TARGET_BRANCH" ]; then
TARGET_BRANCH="build-$BRANCH_NAME"
fi
echo "target_branch=$TARGET_BRANCH" >> $GITHUB_OUTPUT
- name: Remove target folder from .gitignore
run: |
# allow publishing the target folder
sed -i '/^\/target.*/d' .gitignore
- uses: viash-io/viash-actions/ns-build@v5
with:
config_mod: .functionality.version := '${{ steps.defaults.outputs.version }}'
parallel: true

- name: Deploy to target branch
uses: peaceiris/actions-gh-pages@v4
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: .
publish_branch: ${{ steps.defaults.outputs.target_branch }}

- id: ns_list
uses: viash-io/viash-actions/ns-list@v5
with:
platform: docker
src: src
format: json

- id: set_matrix
run: |
echo "matrix=$(jq -c '[ .[] |
{
"name": (.functionality.namespace + "/" + .functionality.name),
"dir": .info.config | capture("^(?<dir>.*\/)").dir
}
]' ${{ steps.ns_list.outputs.output_file }} )" >> $GITHUB_OUTPUT
# phase 2
build:
needs: list

runs-on: ubuntu-latest

strategy:
fail-fast: false
matrix:
component: ${{ fromJson(needs.list.outputs.component_matrix) }}

steps:
# Remove unnecessary files to free up space. Otherwise, we get 'no space left on device.'
- uses: data-intuitive/reclaim-the-bytes@v2

- uses: actions/checkout@v4

- uses: viash-io/viash-actions/setup@v5

- name: Build container
uses: viash-io/viash-actions/ns-build@v5
with:
config_mod: .functionality.version := '${{ needs.list.outputs.version }}'
platform: docker
src: ${{ matrix.component.dir }}
setup: build

- name: Login to container registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ secrets.GTHB_USER }}
password: ${{ secrets.GTHB_PAT }}

- name: Push container
uses: viash-io/viash-actions/ns-build@v5
with:
config_mod: .functionality.version := '${{ needs.list.outputs.version }}'
platform: docker
src: ${{ matrix.component.dir }}
setup: push
113 changes: 113 additions & 0 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
name: test

on:
pull_request:
push:
branches: [ '**' ]

jobs:
run_ci_check_job:
runs-on: ubuntu-latest
outputs:
run_ci: ${{ steps.github_cli.outputs.check }}
steps:
- name: 'Check if branch has an existing pull request and the trigger was a push'
id: github_cli
run: |
pull_request=$(gh pr list -R ${{ github.repository }} -H ${{ github.ref_name }} --json url --state open --limit 1 | jq '.[0].url')
# If the branch has a PR and this run was triggered by a push event, do not run
if [[ "$pull_request" != "null" && "$GITHUB_REF_NAME" != "main" && "${{ github.event_name == 'push' }}" == "true" && "${{ !contains(github.event.head_commit.message, 'ci force') }}" == "true" ]]; then
echo "check=false" >> $GITHUB_OUTPUT
else
echo "check=true" >> $GITHUB_OUTPUT
fi
env:
GITHUB_TOKEN: ${{ secrets.GTHB_PAT }}

# phase 1
list:
needs: run_ci_check_job
env:
s3_bucket: s3://openproblems-data/resources_test
runs-on: ubuntu-latest
if: ${{ needs.run_ci_check_job.outputs.run_ci == 'true' }}

outputs:
matrix: ${{ steps.set_matrix.outputs.matrix }}
cache_key: ${{ steps.cache.outputs.cache_key }}

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: 'recursive'

- uses: christian-ci/action-yaml-github-output@v2
with:
file_path: _viash.yaml

- uses: viash-io/viash-actions/setup@v5

- uses: viash-io/viash-actions/project/sync-and-cache-s3@v5
id: cache
with:
s3_bucket: $s3_bucket/$NAME
dest_path: resources
cache_key_prefix: resources__

- id: ns_list
uses: viash-io/viash-actions/ns-list@v5
with:
engine: docker
format: json

- id: ns_list_filtered
uses: viash-io/viash-actions/project/detect-changed-components@v5
with:
input_file: "${{ steps.ns_list.outputs.output_file }}"

- id: set_matrix
run: |
echo "matrix=$(jq -c '[ .[] |
{
"name": (.namespace + "/" + .name),
"config": .info.config
}
]' ${{ steps.ns_list_filtered.outputs.output_file }} )" >> $GITHUB_OUTPUT
# phase 2
viash_test:
needs: list
if: ${{ needs.list.outputs.matrix != '[]' && needs.list.outputs.matrix != '' }}
runs-on: ubuntu-latest

strategy:
fail-fast: false
matrix:
component: ${{ fromJson(needs.list.outputs.matrix) }}

steps:
# Remove unnecessary files to free up space. Otherwise, we get 'no space left on device.'
- uses: data-intuitive/reclaim-the-bytes@v2

- uses: actions/checkout@v4
with:
submodules: 'recursive'

- uses: viash-io/viash-actions/setup@v5

# use cache
- name: Cache resources data
uses: actions/cache@v4
timeout-minutes: 10
with:
path: resources
key: ${{ needs.list.outputs.cache_key }}

- name: Run test
timeout-minutes: 30
run: |
VIASH_TEMP=$RUNNER_TEMP/viash viash test \
"${{ matrix.component.config }}" \
--cpus 2 \
--memory "16gb"
9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
resources
work
.nextflow*
target
.vscode
.DS_Store
output
trace-*
.ipynb_checkpoints
73 changes: 73 additions & 0 deletions INSTRUCTIONS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# Instructions

This is a guide on what to do after you have created a new task repository from the template. More in depth information about how to create a new task can be found in the [OpenProblems Documentation](https://openproblems.bio/documentation/create_task/).

## First things first

* Update the `_viash.yaml` file with the correct task information.
* Update the `src/api/task_info.yaml` file with the information you have provied in the task issue.

## Resources

THe OpenProblems team has provided some test resources that can be used to test the task. These resources are stored in the `resources` folder. The `scripts/download_resources.sh` script can be used to download these resources.

If these resources are not sufficient, you can add more resources to the `resources` folder. The `scripts/download_resources.sh` script can be updated to download these resources.





<!-- Add to readme
* update _viash.yaml
* update src/api/task_info.yaml
* update scripts/download_resources
-->

#!/bin/bash

echo "This script is not supposed to be run directly."
echo "Please run the script step-by-step."
exit 1

# sync resources
scripts/download_resources.sh

# create a new component
method_id="my_metric"
method_lang="python" # change this to "r" if need be

common/create_component/create_component -- \
--language "$method_lang" \
--name "$method_id"

# TODO: fill in required fields in src/task/methods/foo/config.vsh.yaml
# TODO: edit src/task/methods/foo/script.py/R

# test the component
viash test src/task/methods/$method_id/config.vsh.yaml

# rebuild the container (only if you change something to the docker platform)
# You can reduce the memory and cpu allotted to jobs in _viash.yaml by modifying .platforms[.type == "nextflow"].config.labels
viash run src/task/methods/$method_id/config.vsh.yaml -- \
---setup cachedbuild ---verbose

# run the method (using parquet as input)
viash run src/task/methods/$method_id/config.vsh.yaml -- \
--de_train "resources/neurips-2023-kaggle/de_train.parquet" \
--id_map "resources/neurips-2023-kaggle/id_map.csv" \
--output "output/prediction.parquet"

# run the method (using h5ad as input)
viash run src/task/methods/$method_id/config.vsh.yaml -- \
--de_train_h5ad "resources/neurips-2023-kaggle/2023-09-12_de_by_cell_type_train.h5ad" \
--id_map "resources/neurips-2023-kaggle/id_map.csv" \
--output "output/prediction.parquet"

# run evaluation metric
viash run src/task/metrics/mean_rowwise_error/config.vsh.yaml -- \
--de_test "resources/neurips-2023-kaggle/de_test.parquet" \
--prediction "output/prediction.parquet" \
--output "output/score.h5ad"

# print score on kaggle test dataset
python -c 'import anndata; print(anndata.read_h5ad("output/score.h5ad").uns)'
29 changes: 27 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,27 @@
# task-template
This repo is a template to create a new task that has the correct files and structure needed to start a new task.
# Task Template

This repo is a template to create a new task for the OpenProblems v2. This repo contains several example files and components that can be used when updated with the task info.

> [!WARNING]
> This README will be overwritten when performing the `create_task_readme` script.
## Create a repository from this template

> [!IMPORTANT]
> Before creating a new repository, make sure you are part of the openProblems task team. This will be done when you create an issue for the task and you got the go ahead to create the task.
> For more information on how to create a new task, check out the [Create a new task](https://openproblems.bio/documentation/create_task/) documentation.
The instructions below will guide you through creating a new repository from this template ([creating-a-repository-from-a-template](https://docs.github.com/en/repositories/creating-and-managing-repositories/creating-a-repository-from-a-template#creating-a-repository-from-a-template)).


* Click the "Use this template" button on the top right of the repository.
* Use the Owner dropdown menu to select the `openproblems-bio` account.
* Type a name for your repository (task_...), and a description.
* Set the repository visibility to public.
* Click "Create repository from template".

## What to do next

Check out the [instructions](INSTRUCTIONS.md) for more information on how to update the example files and components. These instructions also contain information on how to build out the task and basic commands.

For more information on the OpenProblems v2, check out the [Documentation](https://openproblems.bio/documentation/) on the Open Problems website.
16 changes: 16 additions & 0 deletions _viash.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
viash_version: 0.9.0-RC6

name: task_template
description: |
An OpenProblems benchmark task.
license: MIT
keywords: [single-cell, openproblems, benchmark]
links:
issue_tracker: https://github.com/openproblems-bio/task_<task_name>/issues
repository: https://github.com/openproblems-bio/task_<task_name>
docker_registry: ghcr.io/openproblems-bio

version: dev

config_mods: |
.runners[.type == "nextflow"].config.labels := { lowmem : "memory = 20.Gb", midmem : "memory = 50.Gb", highmem : "memory = 100.Gb", lowcpu : "cpus = 5", midcpu : "cpus = 15", highcpu : "cpus = 30", lowtime : "time = 1.h", midtime : "time = 4.h", hightime : "time = 8.h", veryhightime : "time = 24.h" }
2 changes: 1 addition & 1 deletion common
3 changes: 3 additions & 0 deletions main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
workflow {
print("This is a dummy placeholder for pipeline execution. Please use the corresponding nf files for running pipelines.")
}
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
process.container = 'nextflow/bash:latest'
12 changes: 12 additions & 0 deletions scripts/add_a_control_method.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash

task_name="task_template"
component_name="my_control_method"
component_lang="python" # change this to "r" if need be

common/create_component/create_component \
--task $task_name \
--language "$component_lang" \
--name "$component_name" \
--api_file src/api/comp_control_method.yaml \
--output "src/control_methods/$component_name"
Loading

0 comments on commit e2075ce

Please sign in to comment.