From dadc9d38bb7d5ae1d07193d6659fe62cf8c0f620 Mon Sep 17 00:00:00 2001 From: sreichl Date: Fri, 13 Sep 2024 17:08:06 +0200 Subject: [PATCH] adapt to Snakemake 8; move env, config, annot export into the result folder to be self-contained --- README.md | 2 +- config/README.md | 4 +++- config/config.yaml | 1 - workflow/Snakefile | 16 +++++++++------- workflow/envs/global.yaml | 7 +++++++ workflow/profiles/default/config.yaml | 3 +++ workflow/rules/dea.smk | 2 -- workflow/rules/envs_export.smk | 20 ++++++-------------- workflow/rules/visualize.smk | 2 -- 9 files changed, 29 insertions(+), 28 deletions(-) create mode 100644 workflow/envs/global.yaml create mode 100644 workflow/profiles/default/config.yaml diff --git a/README.md b/README.md index 1c1357e..fe3a1c8 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ [![DOI](https://zenodo.org/badge/483638364.svg)](https://zenodo.org/doi/10.5281/zenodo.10689139) # Single-cell RNA sequencing (scRNA-seq) Differential Expression Analysis & Visualization Snakemake Workflow -A [Snakemake](https://snakemake.readthedocs.io/en/stable/) workflow for performing differential expression analyses (DEA) of processed (multimodal) scRNA-seq data powered by the R package [Seurat's](https://satijalab.org/seurat/index.html) functions [FindMarkers](https://satijalab.org/seurat/reference/findmarkers) and [FindAllMarkers](https://satijalab.org/seurat/reference/findallmarkers). +A [Snakemake 8](https://snakemake.readthedocs.io/en/stable/) workflow for performing differential expression analyses (DEA) of processed (multimodal) scRNA-seq data powered by the R package [Seurat's](https://satijalab.org/seurat/index.html) functions [FindMarkers](https://satijalab.org/seurat/reference/findmarkers) and [FindAllMarkers](https://satijalab.org/seurat/reference/findallmarkers). This workflow adheres to the module specifications of [MR.PARETO](https://github.com/epigen/mr.pareto), an effort to augment research by modularizing (biomedical) data science. For more details, instructions and modules check out the project's repository. Please consider starring and sharing modules that are useful to you, this helps me in prioritizing my efforts! diff --git a/config/README.md b/config/README.md index d3895f5..5398fe8 100644 --- a/config/README.md +++ b/config/README.md @@ -2,10 +2,12 @@ You need one configuration file and one annotation file to run the complete workflow. You can use the provided example as starting point. If in doubt read the comments in the config and/or try the default values. -- project configuration (config/config.yaml): different for every project/dataset and configures the analyses to be performed. +- project configuration (`config/config.yaml`): different for every project/dataset and configures the analyses to be performed. - sample annotation (sample_annotation): CSV file consisting of five columns - name: name of the dataset/analysis (tip: keep it short, but descriptive and distinctive). - data: path to the input Seurat object as .rds. - assay: the Seurat assay to be used (e.g., SCT or RNA). - metadata: column name of the metadata that should be used to group cells for comparison (e.g., condition or cell_type). - control: name of the class/level that should be used as control in the comparison (e.g., untreated) or "ALL" to compare every class against the rest (e.g., useful to find cluster markers; one vs all) + +Set workflow-specific `resources` or command line arguments (CLI) in the workflow profile `workflow/profiles/default.config.yaml`, which supersedes global Snakemake profiles. \ No newline at end of file diff --git a/config/config.yaml b/config/config.yaml index 6ca5efc..802c619 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -2,7 +2,6 @@ ##### RESOURCES ##### mem: '32000' threads: 1 # only DEA rule is multicore and gets 8*threads -partition: 'shortq' ##### GENERAL ##### annotation: /path/to/MyData_dea_seurat_annotation.csv diff --git a/workflow/Snakefile b/workflow/Snakefile index bb8aab2..2337b12 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -1,4 +1,7 @@ +##### global workflow dependencies ##### +conda: "envs/global.yaml" + ##### libraries ##### import os import sys @@ -6,7 +9,8 @@ import pandas as pd import yaml from snakemake.utils import min_version -min_version("7.15.2") +##### set minimum snakemake version ##### +min_version("8.20.1") ##### module name ##### module_name = "dea_seurat" @@ -48,17 +52,15 @@ rule all: analysis = analyses, feature_list = feature_lists + ['FILTERED'], ), - envs = expand(os.path.join(config["result_path"],'envs',module_name,'{env}.yaml'),env=['seurat','volcanos','ggplot','heatmap']), - configs = os.path.join(config["result_path"],'configs',module_name,'{}_config.yaml'.format(config["project_name"])), - annotations = os.path.join(config["result_path"],'configs',module_name,'{}_annot.csv'.format(config["project_name"])), - feature_lists = expand(os.path.join(config["result_path"],'configs',module_name,'{feature_list}.txt'), feature_list = feature_lists), + envs = expand(os.path.join(result_path,'envs','{env}.yaml'),env=['seurat','volcanos','ggplot','heatmap']), + configs = os.path.join(result_path,'configs','{}_config.yaml'.format(config["project_name"])), + annotations = os.path.join(result_path,'configs','{}_annot.csv'.format(config["project_name"])), + feature_lists = expand(os.path.join(result_path,'configs','{feature_list}.txt'), feature_list = feature_lists), resources: mem_mb=config.get("mem", "16000"), threads: config.get("threads", 1) log: os.path.join("logs","rules","all.log"), - params: - partition=config.get("partition"), ##### load rules ##### diff --git a/workflow/envs/global.yaml b/workflow/envs/global.yaml new file mode 100644 index 0000000..482aa8e --- /dev/null +++ b/workflow/envs/global.yaml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + - nodefaults +dependencies: + - numpy=2.0.1 + - pandas=2.2.2 \ No newline at end of file diff --git a/workflow/profiles/default/config.yaml b/workflow/profiles/default/config.yaml new file mode 100644 index 0000000..29bebdd --- /dev/null +++ b/workflow/profiles/default/config.yaml @@ -0,0 +1,3 @@ +default-resources: + slurm_partition: shortq + slurm_extra: "'--qos=shortq'" \ No newline at end of file diff --git a/workflow/rules/dea.smk b/workflow/rules/dea.smk index 33ba5bc..4e6abad 100644 --- a/workflow/rules/dea.smk +++ b/workflow/rules/dea.smk @@ -14,7 +14,6 @@ rule dea: log: os.path.join("logs","rules","dea_{analysis}.log"), params: - partition=config.get("partition"), assay = lambda w: annot_dict["{}".format(w.analysis)]["assay"], metadata = lambda w: annot_dict["{}".format(w.analysis)]["metadata"], control = lambda w: annot_dict["{}".format(w.analysis)]["control"], @@ -52,7 +51,6 @@ rule aggregate: log: os.path.join("logs","rules","aggregate_{analysis}.log"), params: - partition=config.get("partition"), assay = lambda w: annot_dict["{}".format(w.analysis)]["assay"], metadata = lambda w: annot_dict["{}".format(w.analysis)]["metadata"], control = lambda w: annot_dict["{}".format(w.analysis)]["control"], diff --git a/workflow/rules/envs_export.smk b/workflow/rules/envs_export.smk index 3f64e08..3be212d 100644 --- a/workflow/rules/envs_export.smk +++ b/workflow/rules/envs_export.smk @@ -1,7 +1,7 @@ # one rule per used conda environment to document the exact versions and builds of the used software rule env_export: output: - report(os.path.join(config["result_path"],'envs','dea_seurat','{env}.yaml'), + report(os.path.join(result_path,'envs','{env}.yaml'), caption="../report/software.rst", category="Software", subcategory="{}_{}".format(config["project_name"], module_name) @@ -13,8 +13,6 @@ rule env_export: threads: config.get("threads", 1) log: os.path.join("logs","rules","env_{env}.log"), - params: - partition=config.get("partition"), shell: """ conda env export > {output} @@ -23,7 +21,7 @@ rule env_export: # add configuration files to report rule config_export: output: - configs = report(os.path.join(config["result_path"],'configs','dea_seurat','{}_config.yaml'.format(config["project_name"])), + configs = report(os.path.join(result_path,'configs','{}_config.yaml'.format(config["project_name"])), caption="../report/configs.rst", category="Configuration", subcategory="{}_{}".format(config["project_name"], module_name) @@ -33,8 +31,6 @@ rule config_export: threads: config.get("threads", 1) log: os.path.join("logs","rules","config_export.log"), - params: - partition=config.get("partition"), run: with open(output["configs"], 'w') as outfile: yaml.dump(config, outfile, sort_keys=False, width=1000, indent=2) @@ -44,18 +40,16 @@ rule annot_export: input: config["annotation"], output: - annot = report(os.path.join(config["result_path"],'configs','dea_seurat','{}_annot.csv'.format(config["project_name"])), + annot = report(os.path.join(result_path,'configs','{}_annot.csv'.format(config["project_name"])), caption="../report/configs.rst", category="Configuration", subcategory="{}_{}".format(config["project_name"], module_name) ) resources: - mem_mb=1000, #config.get("mem_small", "16000"), + mem_mb=1000, threads: config.get("threads", 1) log: os.path.join("logs","rules","annot_export.log"), - params: - partition=config.get("partition"), shell: """ cp {input} {output} @@ -66,18 +60,16 @@ rule feature_list_export: input: get_feature_list_path, output: - feature_lists = report(os.path.join(config["result_path"],'configs',module_name,'{feature_list}.txt'), + feature_lists = report(os.path.join(result_path,'configs','{feature_list}.txt'), caption="../report/feature_lists.rst", category="Configuration", subcategory="{}_{}".format(config["project_name"], module_name) ), resources: - mem_mb=1000, #config.get("mem_small", "16000"),config.get("mem", "16000"), + mem_mb=1000, threads: config.get("threads", 1) log: os.path.join("logs","rules","feature_list_export_{feature_list}.log"), - params: - partition=config.get("partition"), shell: """ cp {input} {output} diff --git a/workflow/rules/visualize.smk b/workflow/rules/visualize.smk index a6324c1..c2f5022 100644 --- a/workflow/rules/visualize.smk +++ b/workflow/rules/visualize.smk @@ -22,7 +22,6 @@ rule volcanos: log: os.path.join("logs","rules","volcanos_{analysis}_{feature_list}.log"), params: - partition=config.get("partition"), assay = lambda w: annot_dict["{}".format(w.analysis)]["assay"], metadata = lambda w: annot_dict["{}".format(w.analysis)]["metadata"], control = lambda w: annot_dict["{}".format(w.analysis)]["control"], @@ -51,7 +50,6 @@ rule heatmap: log: os.path.join("logs","rules","lfc_heatmap_{analysis}_{feature_list}.log"), params: - partition=config.get("partition"), assay = lambda w: annot_dict["{}".format(w.analysis)]["assay"], metadata = lambda w: annot_dict["{}".format(w.analysis)]["metadata"], control = lambda w: annot_dict["{}".format(w.analysis)]["control"],