diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 0000000..c0b31e1 --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,122 @@ +name: build + +on: + push: + branches: [ 'main' ] + workflow_dispatch: + inputs: + target_branch: + description: 'Branch to deploy to. If not specified, `build-${BRANCH_NAME}` will be used.' + required: false + version: + description: 'Version name to use for the build. If not specified, `build-${BRANCH_NAME}` will be used.' + required: false + +jobs: + # phase 1 + list: + runs-on: ubuntu-latest + + outputs: + target_branch: ${{ steps.defaults.outputs.target_branch }} + version: ${{ steps.defaults.outputs.version }} + component_matrix: ${{ steps.set_matrix.outputs.matrix }} + + steps: + - uses: actions/checkout@v4 + with: + submodules: 'recursive' + + - uses: viash-io/viash-actions/setup@v5 + + - name: Determine version tag from branch name + id: defaults + run: | + BRANCH_NAME=$(echo $GITHUB_REF | sed 's/refs\/heads\///') + + VERSION=${{ github.event.inputs.version }} + if [ -z "$VERSION" ]; then + VERSION="build-$BRANCH_NAME" + fi + echo "version=$VERSION" >> $GITHUB_OUTPUT + + TARGET_BRANCH=${{ github.event.inputs.target_branch }} + if [ -z "$TARGET_BRANCH" ]; then + TARGET_BRANCH="build-$BRANCH_NAME" + fi + echo "target_branch=$TARGET_BRANCH" >> $GITHUB_OUTPUT + + - name: Remove target folder from .gitignore + run: | + # allow publishing the target folder + sed -i '/^\/target.*/d' .gitignore + + - uses: viash-io/viash-actions/ns-build@v5 + with: + config_mod: .functionality.version := '${{ steps.defaults.outputs.version }}' + parallel: true + + - name: Deploy to target branch + uses: peaceiris/actions-gh-pages@v4 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: . + publish_branch: ${{ steps.defaults.outputs.target_branch }} + + - id: ns_list + uses: viash-io/viash-actions/ns-list@v5 + with: + platform: docker + src: src + format: json + + - id: set_matrix + run: | + echo "matrix=$(jq -c '[ .[] | + { + "name": (.functionality.namespace + "/" + .functionality.name), + "dir": .info.config | capture("^(?.*\/)").dir + } + ]' ${{ steps.ns_list.outputs.output_file }} )" >> $GITHUB_OUTPUT + + # phase 2 + build: + needs: list + + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + component: ${{ fromJson(needs.list.outputs.component_matrix) }} + + steps: + # Remove unnecessary files to free up space. Otherwise, we get 'no space left on device.' + - uses: data-intuitive/reclaim-the-bytes@v2 + + - uses: actions/checkout@v4 + + - uses: viash-io/viash-actions/setup@v5 + + - name: Build container + uses: viash-io/viash-actions/ns-build@v5 + with: + config_mod: .functionality.version := '${{ needs.list.outputs.version }}' + platform: docker + src: ${{ matrix.component.dir }} + setup: build + + - name: Login to container registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ secrets.GTHB_USER }} + password: ${{ secrets.GTHB_PAT }} + + - name: Push container + uses: viash-io/viash-actions/ns-build@v5 + with: + config_mod: .functionality.version := '${{ needs.list.outputs.version }}' + platform: docker + src: ${{ matrix.component.dir }} + setup: push \ No newline at end of file diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml new file mode 100644 index 0000000..0abad5c --- /dev/null +++ b/.github/workflows/test.yaml @@ -0,0 +1,113 @@ +name: test + +on: + pull_request: + push: + branches: [ '**' ] + +jobs: + run_ci_check_job: + runs-on: ubuntu-latest + outputs: + run_ci: ${{ steps.github_cli.outputs.check }} + steps: + - name: 'Check if branch has an existing pull request and the trigger was a push' + id: github_cli + run: | + pull_request=$(gh pr list -R ${{ github.repository }} -H ${{ github.ref_name }} --json url --state open --limit 1 | jq '.[0].url') + # If the branch has a PR and this run was triggered by a push event, do not run + if [[ "$pull_request" != "null" && "$GITHUB_REF_NAME" != "main" && "${{ github.event_name == 'push' }}" == "true" && "${{ !contains(github.event.head_commit.message, 'ci force') }}" == "true" ]]; then + echo "check=false" >> $GITHUB_OUTPUT + else + echo "check=true" >> $GITHUB_OUTPUT + fi + env: + GITHUB_TOKEN: ${{ secrets.GTHB_PAT }} + + # phase 1 + list: + needs: run_ci_check_job + env: + s3_bucket: s3://openproblems-data/resources_test + runs-on: ubuntu-latest + if: ${{ needs.run_ci_check_job.outputs.run_ci == 'true' }} + + outputs: + matrix: ${{ steps.set_matrix.outputs.matrix }} + cache_key: ${{ steps.cache.outputs.cache_key }} + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: 'recursive' + + - uses: christian-ci/action-yaml-github-output@v2 + with: + file_path: _viash.yaml + + - uses: viash-io/viash-actions/setup@v5 + + - uses: viash-io/viash-actions/project/sync-and-cache-s3@v5 + id: cache + with: + s3_bucket: $s3_bucket/$NAME + dest_path: resources + cache_key_prefix: resources__ + + - id: ns_list + uses: viash-io/viash-actions/ns-list@v5 + with: + engine: docker + format: json + + - id: ns_list_filtered + uses: viash-io/viash-actions/project/detect-changed-components@v5 + with: + input_file: "${{ steps.ns_list.outputs.output_file }}" + + - id: set_matrix + run: | + echo "matrix=$(jq -c '[ .[] | + { + "name": (.namespace + "/" + .name), + "config": .info.config + } + ]' ${{ steps.ns_list_filtered.outputs.output_file }} )" >> $GITHUB_OUTPUT + + # phase 2 + viash_test: + needs: list + if: ${{ needs.list.outputs.matrix != '[]' && needs.list.outputs.matrix != '' }} + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + component: ${{ fromJson(needs.list.outputs.matrix) }} + + steps: + # Remove unnecessary files to free up space. Otherwise, we get 'no space left on device.' + - uses: data-intuitive/reclaim-the-bytes@v2 + + - uses: actions/checkout@v4 + with: + submodules: 'recursive' + + - uses: viash-io/viash-actions/setup@v5 + + # use cache + - name: Cache resources data + uses: actions/cache@v4 + timeout-minutes: 10 + with: + path: resources + key: ${{ needs.list.outputs.cache_key }} + + - name: Run test + timeout-minutes: 30 + run: | + VIASH_TEMP=$RUNNER_TEMP/viash viash test \ + "${{ matrix.component.config }}" \ + --cpus 2 \ + --memory "16gb" diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fe80ce0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +resources +work +.nextflow* +target +.vscode +.DS_Store +output +trace-* +.ipynb_checkpoints \ No newline at end of file diff --git a/INSTRUCTIONS.md b/INSTRUCTIONS.md new file mode 100644 index 0000000..74287af --- /dev/null +++ b/INSTRUCTIONS.md @@ -0,0 +1,73 @@ +# Instructions + +This is a guide on what to do after you have created a new task repository from the template. More in depth information about how to create a new task can be found in the [OpenProblems Documentation](https://openproblems.bio/documentation/create_task/). + +## First things first + +* Update the `_viash.yaml` file with the correct task information. +* Update the `src/api/task_info.yaml` file with the information you have provied in the task issue. + +## Resources + +THe OpenProblems team has provided some test resources that can be used to test the task. These resources are stored in the `resources` folder. The `scripts/download_resources.sh` script can be used to download these resources. + +If these resources are not sufficient, you can add more resources to the `resources` folder. The `scripts/download_resources.sh` script can be updated to download these resources. + + + + + + + +#!/bin/bash + +echo "This script is not supposed to be run directly." +echo "Please run the script step-by-step." +exit 1 + +# sync resources +scripts/download_resources.sh + +# create a new component +method_id="my_metric" +method_lang="python" # change this to "r" if need be + +common/create_component/create_component -- \ + --language "$method_lang" \ + --name "$method_id" + +# TODO: fill in required fields in src/task/methods/foo/config.vsh.yaml +# TODO: edit src/task/methods/foo/script.py/R + +# test the component +viash test src/task/methods/$method_id/config.vsh.yaml + +# rebuild the container (only if you change something to the docker platform) +# You can reduce the memory and cpu allotted to jobs in _viash.yaml by modifying .platforms[.type == "nextflow"].config.labels +viash run src/task/methods/$method_id/config.vsh.yaml -- \ + ---setup cachedbuild ---verbose + +# run the method (using parquet as input) +viash run src/task/methods/$method_id/config.vsh.yaml -- \ + --de_train "resources/neurips-2023-kaggle/de_train.parquet" \ + --id_map "resources/neurips-2023-kaggle/id_map.csv" \ + --output "output/prediction.parquet" + +# run the method (using h5ad as input) +viash run src/task/methods/$method_id/config.vsh.yaml -- \ + --de_train_h5ad "resources/neurips-2023-kaggle/2023-09-12_de_by_cell_type_train.h5ad" \ + --id_map "resources/neurips-2023-kaggle/id_map.csv" \ + --output "output/prediction.parquet" + +# run evaluation metric +viash run src/task/metrics/mean_rowwise_error/config.vsh.yaml -- \ + --de_test "resources/neurips-2023-kaggle/de_test.parquet" \ + --prediction "output/prediction.parquet" \ + --output "output/score.h5ad" + +# print score on kaggle test dataset +python -c 'import anndata; print(anndata.read_h5ad("output/score.h5ad").uns)' \ No newline at end of file diff --git a/README.md b/README.md index b4a593c..0c87796 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,27 @@ -# task-template -This repo is a template to create a new task that has the correct files and structure needed to start a new task. +# Task Template + +This repo is a template to create a new task for the OpenProblems v2. This repo contains several example files and components that can be used when updated with the task info. + +> [!WARNING] +> This README will be overwritten when performing the `create_task_readme` script. + +## Create a repository from this template + +> [!IMPORTANT] +> Before creating a new repository, make sure you are part of the openProblems task team. This will be done when you create an issue for the task and you got the go ahead to create the task. +> For more information on how to create a new task, check out the [Create a new task](https://openproblems.bio/documentation/create_task/) documentation. + +The instructions below will guide you through creating a new repository from this template ([creating-a-repository-from-a-template](https://docs.github.com/en/repositories/creating-and-managing-repositories/creating-a-repository-from-a-template#creating-a-repository-from-a-template)). + + +* Click the "Use this template" button on the top right of the repository. +* Use the Owner dropdown menu to select the `openproblems-bio` account. +* Type a name for your repository (task_...), and a description. +* Set the repository visibility to public. +* Click "Create repository from template". + +## What to do next + +Check out the [instructions](INSTRUCTIONS.md) for more information on how to update the example files and components. These instructions also contain information on how to build out the task and basic commands. + +For more information on the OpenProblems v2, check out the [Documentation](https://openproblems.bio/documentation/) on the Open Problems website. diff --git a/_viash.yaml b/_viash.yaml new file mode 100644 index 0000000..923e418 --- /dev/null +++ b/_viash.yaml @@ -0,0 +1,16 @@ +viash_version: 0.9.0-RC6 + +name: task_template +description: | + An OpenProblems benchmark task. +license: MIT +keywords: [single-cell, openproblems, benchmark] +links: + issue_tracker: https://github.com/openproblems-bio/task_/issues + repository: https://github.com/openproblems-bio/task_ + docker_registry: ghcr.io/openproblems-bio + +version: dev + +config_mods: | + .runners[.type == "nextflow"].config.labels := { lowmem : "memory = 20.Gb", midmem : "memory = 50.Gb", highmem : "memory = 100.Gb", lowcpu : "cpus = 5", midcpu : "cpus = 15", highcpu : "cpus = 30", lowtime : "time = 1.h", midtime : "time = 4.h", hightime : "time = 8.h", veryhightime : "time = 24.h" } \ No newline at end of file diff --git a/common b/common index b31caac..ecbb47c 160000 --- a/common +++ b/common @@ -1 +1 @@ -Subproject commit b31caac1ccf9ff2a35d4a295bc498023660007e4 +Subproject commit ecbb47ca0cb36e9350760cf126d5c7e3125f26de diff --git a/main.nf b/main.nf new file mode 100644 index 0000000..62f0140 --- /dev/null +++ b/main.nf @@ -0,0 +1,3 @@ +workflow { + print("This is a dummy placeholder for pipeline execution. Please use the corresponding nf files for running pipelines.") +} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config new file mode 100644 index 0000000..8fc6c4e --- /dev/null +++ b/nextflow.config @@ -0,0 +1 @@ +process.container = 'nextflow/bash:latest' \ No newline at end of file diff --git a/scripts/add_a_control_method.sh b/scripts/add_a_control_method.sh new file mode 100644 index 0000000..d853907 --- /dev/null +++ b/scripts/add_a_control_method.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +task_name="task_template" +component_name="my_control_method" +component_lang="python" # change this to "r" if need be + +common/create_component/create_component \ + --task $task_name \ + --language "$component_lang" \ + --name "$component_name" \ + --api_file src/api/comp_control_method.yaml \ + --output "src/control_methods/$component_name" \ No newline at end of file diff --git a/scripts/add_a_method.sh b/scripts/add_a_method.sh new file mode 100644 index 0000000..8812644 --- /dev/null +++ b/scripts/add_a_method.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +task_name="task_template" +component_name="my_method" +component_lang="python" # change this to "r" if need be + +common/create_component/create_component \ + --task $task_name \ + --language "$component_lang" \ + --name "$component_name" \ + --api_file src/api/comp_method.yaml \ + --output "src/methods/$component_name" \ No newline at end of file diff --git a/scripts/add_a_metric.sh b/scripts/add_a_metric.sh new file mode 100644 index 0000000..71d6067 --- /dev/null +++ b/scripts/add_a_metric.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +task_name="task_template" +component_name="my_metric" +component_lang="python" # change this to "r" if need be + +common/create_component/create_component \ + --task $task_name \ + --language "$component_lang" \ + --name "$component_name" \ + --api_file src/api/comp_metric.yaml \ + --output "src/metrics/$component_name" \ No newline at end of file diff --git a/scripts/create_readme.sh b/scripts/create_readme.sh new file mode 100644 index 0000000..0857cb6 --- /dev/null +++ b/scripts/create_readme.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +common/create_task_readme/create_task_readme \ + --task_dir src \ + --github_url https://github.com/openproblems-bio/task-template \ + --output README.md diff --git a/scripts/download_resources.sh b/scripts/download_resources.sh new file mode 100644 index 0000000..945c47e --- /dev/null +++ b/scripts/download_resources.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +set -e + +echo ">> Downloading resources" + +viash run common/src/sync_resources/config.vsh.yaml -- \ + --input "s3://openproblems-data/resources_test/common/" \ + --output "resources_test" \ + --delete \ No newline at end of file diff --git a/scripts/test_all_components.sh b/scripts/test_all_components.sh new file mode 100644 index 0000000..cd016e9 --- /dev/null +++ b/scripts/test_all_components.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +# Test all components in a namespace (refer https://viash.io/reference/cli/ns_test.html) +viash ns test --parallel \ No newline at end of file diff --git a/src/api/comp_control_method.yaml b/src/api/comp_control_method.yaml new file mode 100644 index 0000000..fd3ac29 --- /dev/null +++ b/src/api/comp_control_method.yaml @@ -0,0 +1,29 @@ +namespace: control_methods +info: + type: control_method + type_info: + label: Control Method + summary: A control method. + description: | + A control method to predict effects. +arguments: + - name: --train_h5ad + __merge__: file_train_h5ad.yaml + required: false + direction: input + - name: --test_h5ad + __merge__: file_test_h5ad.yaml + required: true + direction: input + - name: --output + __merge__: file_prediction.yaml + required: true + direction: output +test_resources: + - type: python_script + path: /common/src/component_tests/run_and_check_output.py + - type: python_script + path: /common/component_tests/check_method_config.py + - path: /common/library.bib + #TODO: - path: fill in e.g. /resources/denoising/pancreas + #TODO: dest: fill in e.g. resources/denoising/pancreas \ No newline at end of file diff --git a/src/api/comp_method.yaml b/src/api/comp_method.yaml new file mode 100644 index 0000000..10316b5 --- /dev/null +++ b/src/api/comp_method.yaml @@ -0,0 +1,25 @@ +namespace: "methods" +info: + type: method + type_info: + label: Method + summary: A method. + description: | + A method to predict the task effects. +arguments: + - name: --train_h5ad + __merge__: file_train_h5ad.yaml + required: false + direction: input + - name: --output + __merge__: file_prediction.yaml + required: true + direction: output +test_resources: + - type: python_script + path: /common/component_tests/run_and_check_output.py + - type: python_script + path: /common/component_tests/check_method_config.py + - path: /common/library.bib + #TODO: - path: fill in e.g. /resources/denoising/pancreas + #TODO: dest: fill in e.g. resources/denoising/pancreas \ No newline at end of file diff --git a/src/api/comp_metric.yaml b/src/api/comp_metric.yaml new file mode 100644 index 0000000..9dc8c29 --- /dev/null +++ b/src/api/comp_metric.yaml @@ -0,0 +1,29 @@ +namespace: "metrics" +info: + type: metric + type_info: + label: Metric + summary: A metric. + description: | + A metric for evaluating method predictions. +arguments: + - name: "--input_test" + __merge__: file_test_h5ad.yaml + direction: input + required: true + - name: "--input_prediction" + __merge__: file_prediction.yaml + direction: input + required: true + - name: "--output" + __merge__: file_score.yaml + direction: output + required: true +test_resources: + - type: python_script + path: /common/comp_tests/check_metric_config.py + - type: python_script + path: /common/comp_tests/run_and_check_adata.py + - path: /common/library.bib + #TODO: - path: fill in e.g. /resources/denoising/pancreas + #TODO: dest: fill in e.g. resources/denoising/pancreas diff --git a/src/api/file_prediction.yaml b/src/api/file_prediction.yaml new file mode 100644 index 0000000..b473e75 --- /dev/null +++ b/src/api/file_prediction.yaml @@ -0,0 +1,21 @@ +#TODO: Change to the required and/or optional fields of the anndata +type: file +example: "resources_test/denoising/pancreas/denoised.h5ad" +info: + label: "Predicted data" + summary: A predicted dataset as output by a method. + slots: + layers: + - type: integer + name: prediction + description: predicted data + required: true + uns: + - type: string + name: dataset_id + description: "A unique identifier for the dataset" + required: true + - type: string + name: method_id + description: "A unique identifier for the method" + required: true \ No newline at end of file diff --git a/src/api/file_score.yaml b/src/api/file_score.yaml new file mode 100644 index 0000000..8aefeba --- /dev/null +++ b/src/api/file_score.yaml @@ -0,0 +1,26 @@ +type: file +example: resources/score.h5ad +info: + label: Score + summary: "File indicating the score of a metric." + file_type: h5ad + slots: + uns: + - type: string + name: dataset_id + description: "A unique identifier for the dataset" + required: true + - type: string + name: method_id + description: "A unique identifier for the method" + required: true + - type: string + name: metric_ids + description: "One or more unique metric identifiers" + multiple: true + required: true + - type: double + name: metric_values + description: "The metric values obtained for the given prediction. Must be of same length as 'metric_ids'." + multiple: true + required: true \ No newline at end of file diff --git a/src/api/file_test_h5ad.yaml b/src/api/file_test_h5ad.yaml new file mode 100644 index 0000000..d373b84 --- /dev/null +++ b/src/api/file_test_h5ad.yaml @@ -0,0 +1,45 @@ +#TODO: Change to the required and/or optional fields of the anndata +type: file +example: "resources_test/denoising/pancreas/test.h5ad" +info: + label: "Test data" + summary: The subset of molecules used for the test dataset + slots: + layers: + - type: integer + name: counts + description: Raw counts + required: true + uns: + - type: string + name: dataset_id + description: "A unique identifier for the dataset" + required: true + - name: dataset_name + type: string + description: Nicely formatted name. + required: true + - type: string + name: dataset_url + description: Link to the original source of the dataset. + required: false + - name: dataset_reference + type: string + description: Bibtex reference of the paper in which the dataset was published. + required: false + - name: dataset_summary + type: string + description: Short description of the dataset. + required: true + - name: dataset_description + type: string + description: Long description of the dataset. + required: true + - name: dataset_organism + type: string + description: The organism of the sample in the dataset. + required: false + - name: train_sum + type: integer + description: The total number of counts in the training dataset. + required: true \ No newline at end of file diff --git a/src/api/file_train_h5ad.yaml b/src/api/file_train_h5ad.yaml new file mode 100644 index 0000000..9ec0e86 --- /dev/null +++ b/src/api/file_train_h5ad.yaml @@ -0,0 +1,19 @@ +#TODO: Change to the required and/or optional fields of the anndata +type: file +example: "resources_test/denoising/pancreas/train.h5ad" +info: + label: "Training data" + summary: The subset of molecules used for the training dataset + slots: + layers: + - type: integer + name: counts + description: Raw counts + required: true + uns: + - type: string + name: dataset_id + description: "A unique identifier for the dataset" + required: true + # obs: + # ... \ No newline at end of file diff --git a/src/api/task_info.yaml b/src/api/task_info.yaml new file mode 100644 index 0000000..5166700 --- /dev/null +++ b/src/api/task_info.yaml @@ -0,0 +1,73 @@ +name: A unique identifier. Can only contain lowercase letters, numbers or underscores. +label: A unique, human-readable, short label. Used for creating summary tables and visualisations. +summary: A one sentence summary of purpose and methodology. Used for creating an overview tables. +image: The name of the image file to use for the component on the website. +readme: | + ## Installation + + You need to have Docker, Java, and Viash installed. Follow + [these instructions](https://openproblems.bio/documentation/fundamentals/requirements) + to install the required dependencies. + + ## Add a method + + To add a method to the repository, follow the instructions in the `scripts/add_a_method.sh` script. + + ## Frequently used commands + + To get started, you can run the following commands: + + ```bash + git clone git@github.com:openproblems-bio/.git + + cd + + # download resources + scripts/download_resources.sh + ``` + + To run the benchmark, you first need to build the components. Afterwards, you can run the benchmark: + + ```bash + viash ns build --parallel --setup cachedbuild + + scripts/run_benchmark.sh + ``` + + After adding a component, it is recommended to run the tests to ensure that the component is working correctly: + + ```bash + viash ns test --parallel + ``` + + Optionally, you can provide the `--query` argument to test only a subset of components: + + ```bash + viash ns test --parallel --query "component_name" + ``` +motivation: | + Explain the motivation behind your proposed task. Describe the biological or computational + problem you aim to address and why it’s important. Discuss the current state of research in + this area and any gaps or challenges that your task could help address. This section + should convince readers of the significance and relevance of your task. +description: | + Provide a clear and concise description of your task, detailing the specific problem it aims + to solve. Outline the input data types, the expected output, and any assumptions or constraints. + Be sure to explain any terminology or concepts that are essential for understanding the task. + +authors: + # Full name of the author, usually in the name of FirstName MiddleName LastName. + - name: ... + # Role of the author. Possible values: + # + # * `"author"`: Authors who have made substantial contributions to the component. + # * `"maintainer"`: The maintainer of the component. + # * `"contributor"`: Authors who have made smaller contributions (such as code patches etc.). + roles: [ ... ] + # Additional information on the author + info: + github: ... + orcid: ... + email: ... + twitter: ... + linkedin: ... \ No newline at end of file diff --git a/src/control_methods/my_control_method/config.vsh.yaml b/src/control_methods/my_control_method/config.vsh.yaml new file mode 100644 index 0000000..bba79d9 --- /dev/null +++ b/src/control_methods/my_control_method/config.vsh.yaml @@ -0,0 +1,59 @@ +# The API specifies which type of component this is. +# It contains specifications for: +# - The input/output files +# - Common parameters +# - A unit test +__merge__: ../../api/comp_control_method.yaml + + +# A unique identifier for your component (required). +# Can contain only lowercase letters or underscores. +name: my_control_method + +# Metadata for your component +info: + # A relatively short label, used when rendering visualisations (required) + label: My Control Method + # A one sentence summary of how this method works (required). Used when + # rendering summary tables. + summary: "FILL IN: A one sentence summary of this method." + # A multi-line description of how this component works (required). Used + # when rendering reference documentation. + description: | + FILL IN: A (multi-line) description of how this method works. + # Which normalisation method this component prefers to use (required). + preferred_normalization: log_cp10k + +# Component-specific parameters (optional) +# arguments: +# - name: "--n_neighbors" +# type: "integer" +# default: 5 +# description: Number of neighbors to use. + +# Resources required to run the component +resources: + # The script of your component (required) + - type: python_script + path: script.py + # Additional resources your script needs (optional) + # - type: file + # path: weights.pt + +engines: + # Specifications for the Docker image for this component. + - type: docker + image: ghcr.io/openproblems-bio/base_python:1.0.4 + # Add custom dependencies here (optional). For more information, see + # https://viash.io/reference/config/engines/docker/#setup . + # setup: + # - type: python + # packages: scib==1.1.5 + +runners: + # This platform allows running the component natively + - type: executable + # Allows turning the component into a Nextflow module / pipeline. + - type: nextflow + directives: + label: [midtime,midmem,midcpu] diff --git a/src/control_methods/my_control_method/script.py b/src/control_methods/my_control_method/script.py new file mode 100644 index 0000000..f97215f --- /dev/null +++ b/src/control_methods/my_control_method/script.py @@ -0,0 +1,39 @@ +import anndata as ad + +## VIASH START +# Note: this section is auto-generated by viash at runtime. To edit it, make changes +# in config.vsh.yaml and then run `viash config inject config.vsh.yaml`. +par = { + 'train_h5ad': 'resources_test/task_template/pancreas/train_h5ad.h5ad', + 'test_h5ad': 'resources_test/task_template/pancreas/test_h5ad.h5ad', + 'output': 'output.h5ad' +} +meta = { + 'name': 'my_control_method' +} +## VIASH END + +print('Reading input files', flush=True) +train_h5ad = ad.read_h5ad(par['train_h5ad']) +test_h5ad = ad.read_h5ad(par['test_h5ad']) + +print('Preprocess data', flush=True) +# ... preprocessing ... + +print('Train model', flush=True) +# ... train model ... + +print('Generate predictions', flush=True) +# ... generate predictions ... + +print("Write output AnnData to file", flush=True) +output = ad.AnnData( + uns={ + 'dataset_id': train_h5ad.uns['dataset_id'], + 'method_id': meta['name'] + }, + layers={ + 'prediction': layers_prediction + } +) +output.write_h5ad(par['output'], compression='gzip') diff --git a/src/methods/my_method/config.vsh.yaml b/src/methods/my_method/config.vsh.yaml new file mode 100644 index 0000000..743101f --- /dev/null +++ b/src/methods/my_method/config.vsh.yaml @@ -0,0 +1,65 @@ +# The API specifies which type of component this is. +# It contains specifications for: +# - The input/output files +# - Common parameters +# - A unit test +__merge__: ../../api/comp_method.yaml + + +# A unique identifier for your component (required). +# Can contain only lowercase letters or underscores. +name: my_method + +# Metadata for your component +info: + # A relatively short label, used when rendering visualisations (required) + label: My Method + # A one sentence summary of how this method works (required). Used when + # rendering summary tables. + summary: "FILL IN: A one sentence summary of this method." + # A multi-line description of how this component works (required). Used + # when rendering reference documentation. + description: | + FILL IN: A (multi-line) description of how this method works. + # Which normalisation method this component prefers to use (required). + preferred_normalization: log_cp10k + # A reference key from the bibtex library at src/common/library.bib (required). + reference: bibtex_reference_key + # URL to the documentation for this method (required). + documentation_url: https://url.to/the/documentation + # URL to the code repository for this method (required). + repository_url: https://github.com/organisation/repository + +# Component-specific parameters (optional) +# arguments: +# - name: "--n_neighbors" +# type: "integer" +# default: 5 +# description: Number of neighbors to use. + +# Resources required to run the component +resources: + # The script of your component (required) + - type: python_script + path: script.py + # Additional resources your script needs (optional) + # - type: file + # path: weights.pt + +engines: + # Specifications for the Docker image for this component. + - type: docker + image: ghcr.io/openproblems-bio/base_python:1.0.4 + # Add custom dependencies here (optional). For more information, see + # https://viash.io/reference/config/engines/docker/#setup . + # setup: + # - type: python + # packages: scib==1.1.5 + +runners: + # This platform allows running the component natively + - type: executable + # Allows turning the component into a Nextflow module / pipeline. + - type: nextflow + directives: + label: [midtime,midmem,midcpu] diff --git a/src/methods/my_method/script.py b/src/methods/my_method/script.py new file mode 100644 index 0000000..b0ed7f1 --- /dev/null +++ b/src/methods/my_method/script.py @@ -0,0 +1,37 @@ +import anndata as ad + +## VIASH START +# Note: this section is auto-generated by viash at runtime. To edit it, make changes +# in config.vsh.yaml and then run `viash config inject config.vsh.yaml`. +par = { + 'train_h5ad': 'resources_test/task_template/pancreas/train_h5ad.h5ad', + 'output': 'output.h5ad' +} +meta = { + 'name': 'my_method' +} +## VIASH END + +print('Reading input files', flush=True) +train_h5ad = ad.read_h5ad(par['train_h5ad']) + +print('Preprocess data', flush=True) +# ... preprocessing ... + +print('Train model', flush=True) +# ... train model ... + +print('Generate predictions', flush=True) +# ... generate predictions ... + +print("Write output AnnData to file", flush=True) +output = ad.AnnData( + uns={ + 'dataset_id': train_h5ad.uns['dataset_id'], + 'method_id': meta['name'] + }, + layers={ + 'prediction': layers_prediction + } +) +output.write_h5ad(par['output'], compression='gzip') diff --git a/src/metrics/my_metric/config.vsh.yaml b/src/metrics/my_metric/config.vsh.yaml new file mode 100644 index 0000000..d998cf0 --- /dev/null +++ b/src/metrics/my_metric/config.vsh.yaml @@ -0,0 +1,73 @@ +# The API specifies which type of component this is. +# It contains specifications for: +# - The input/output files +# - Common parameters +# - A unit test +__merge__: ../../api/comp_metric.yaml + + +# A unique identifier for your component (required). +# Can contain only lowercase letters or underscores. +name: my_metric + +# Metadata for your component +info: + metrics: + # A unique identifier for your metric (required). + # Can contain only lowercase letters or underscores. + name: my_metric + # A relatively short label, used when rendering visualisarions (required) + label: My Metric + # A one sentence summary of how this metric works (required). Used when + # rendering summary tables. + summary: "FILL IN: A one sentence summary of this metric." + # A multi-line description of how this component works (required). Used + # when rendering reference documentation. + description: | + FILL IN: A (multi-line) description of how this metric works. + # A reference key from the bibtex library at src/common/library.bib (required). + reference: bibtex_reference_key + # URL to the documentation for this metric (required). + documentation_url: https://url.to/the/documentation + # URL to the code repository for this metric (required). + repository_url: https://github.com/organisation/repository + # The minimum possible value for this metric (required) + min: 0 + # The maximum possible value for this metric (required) + max: 1 + # Whether a higher value represents a 'better' solution (required) + maximize: true + +# Component-specific parameters (optional) +# arguments: +# - name: "--n_neighbors" +# type: "integer" +# default: 5 +# description: Number of neighbors to use. + +# Resources required to run the component +resources: + # The script of your component (required) + - type: python_script + path: script.py + # Additional resources your script needs (optional) + # - type: file + # path: weights.pt + +engines: + # Specifications for the Docker image for this component. + - type: docker + image: ghcr.io/openproblems-bio/base_python:1.0.4 + # Add custom dependencies here (optional). For more information, see + # https://viash.io/reference/config/engines/docker/#setup . + # setup: + # - type: python + # packages: scib==1.1.5 + +runners: + # This platform allows running the component natively + - type: executable + # Allows turning the component into a Nextflow module / pipeline. + - type: nextflow + directives: + label: [midtime,midmem,midcpu] diff --git a/src/metrics/my_metric/script.py b/src/metrics/my_metric/script.py new file mode 100644 index 0000000..08dc74d --- /dev/null +++ b/src/metrics/my_metric/script.py @@ -0,0 +1,35 @@ +import anndata as ad + +## VIASH START +# Note: this section is auto-generated by viash at runtime. To edit it, make changes +# in config.vsh.yaml and then run `viash config inject config.vsh.yaml`. +par = { + 'input_test': 'resources_test/task_template/pancreas/test.h5ad', + 'input_prediction': 'resources_test/task_template/pancreas/prediction.h5ad', + 'output': 'output.h5ad' +} +meta = { + 'name': 'my_metric' +} +## VIASH END + +print('Reading input files', flush=True) +input_test = ad.read_h5ad(par['input_test']) +input_prediction = ad.read_h5ad(par['input_prediction']) + +print('Compute metrics', flush=True) +# metric_ids and metric_values can have length > 1 +# but should be of equal length +uns_metric_ids = [ 'my_metric' ] +uns_metric_values = [ 0.5 ] + +print("Write output AnnData to file", flush=True) +output = ad.AnnData( + uns={ + 'dataset_id': input_prediction.uns['dataset_id'], + 'method_id': input_prediction.uns['method_id'], + 'metric_ids': uns_metric_ids, + 'metric_values': uns_metric_values + } +) +output.write_h5ad(par['output'], compression='gzip')