generated from openproblems-bio/task_template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path_viash.yaml
87 lines (78 loc) · 3.36 KB
/
_viash.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
viash_version: 0.9.0
name: task_label_projection
organization: openproblems-bio
version: dev
license: MIT
keywords: [ "single-cell", "label projection", "cell type annotation" ]
links:
issue_tracker: https://github.com/openproblems-bio/task_label_projection/issues
repository: https://github.com/openproblems-bio/task_label_projection
docker_registry: ghcr.io
label: Label projection
summary: Automated cell type annotation from rich, labeled reference data
description: |
A major challenge for integrating single cell datasets is creating matching
cell type annotations for each cell. One of the most common strategies for
annotating cell types is referred to as
["cluster-then-annotate"](https://www.nature.com/articles/s41576-018-0088-9)
whereby cells are aggregated into clusters based on feature similarity and
then manually characterized based on differential gene expression or previously
identified marker genes. Recently, methods have emerged to build on this
strategy and annotate cells using
[known marker genes](https://www.nature.com/articles/s41592-019-0535-3).
However, these strategies pose a difficulty for integrating atlas-scale
datasets as the particular annotations may not match.
To ensure that the cell type labels in newly generated datasets match
existing reference datasets, some methods align cells to a previously
annotated [reference dataset](https://academic.oup.com/bioinformatics/article/35/22/4688/54802990)
and then _project_ labels from the reference to the new dataset.
Here, we compare methods for annotation based on a reference dataset.
The datasets consist of two or more samples of single cell profiles that
have been manually annotated with matching labels. These datasets are then
split into training and test batches, and the task of each method is to
train a cell type classifer on the training set and project those labels
onto the test set.
# references:
# doi:
# - 10.21203/rs.3.rs-4181617/v1
# bibtex:
# - |
# @article{doe_2021_template,
# doi = {10.21203/rs.3.rs-4181617/v1},
# url = {https://doi.org/10.21203/rs.3.rs-4181617/v1},
# author = {Doe, John},
# title = {A template for creating new tasks},
# publisher = {Research Square},
# year = {2021},
# }
info:
image: thumbnail.svg
test_resources:
- type: s3
path: s3://openproblems-data/resources_test/common/cxg_immune_cell_atlas/
dest: resources_test/common/cxg_immune_cell_atlas
- type: s3
path: s3://openproblems-data/resources_test/task_label_projection/
dest: resources_test/task_label_projection
authors:
- name: "Nikolay Markov"
roles: [ author, maintainer ]
info:
github: mxposed
- name: "Scott Gigante"
roles: [ author ]
info:
github: scottgigante
orcid: "0000-0002-4544-2764"
- name: Robrecht Cannoodt
roles: [ author ]
info:
github: rcannood
orcid: "0000-0003-3641-729X"
config_mods: |
.runners[.type == "nextflow"].config.labels := { lowmem : "memory = 20.Gb", midmem : "memory = 50.Gb", highmem : "memory = 100.Gb", lowcpu : "cpus = 5", midcpu : "cpus = 15", highcpu : "cpus = 30", lowtime : "time = 1.h", midtime : "time = 4.h", hightime : "time = 8.h", veryhightime : "time = 24.h" }
repositories:
- name: openproblems
type: github
repo: openproblems-bio/openproblems
tag: build/main