From dadc9d38bb7d5ae1d07193d6659fe62cf8c0f620 Mon Sep 17 00:00:00 2001
From: sreichl <reichl.stephan@gmail.com>
Date: Fri, 13 Sep 2024 17:08:06 +0200
Subject: [PATCH] adapt to Snakemake 8; move env, config, annot export into the
 result folder to be self-contained

---
 README.md                             |  2 +-
 config/README.md                      |  4 +++-
 config/config.yaml                    |  1 -
 workflow/Snakefile                    | 16 +++++++++-------
 workflow/envs/global.yaml             |  7 +++++++
 workflow/profiles/default/config.yaml |  3 +++
 workflow/rules/dea.smk                |  2 --
 workflow/rules/envs_export.smk        | 20 ++++++--------------
 workflow/rules/visualize.smk          |  2 --
 9 files changed, 29 insertions(+), 28 deletions(-)
 create mode 100644 workflow/envs/global.yaml
 create mode 100644 workflow/profiles/default/config.yaml

diff --git a/README.md b/README.md
index 1c1357e..fe3a1c8 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 [![DOI](https://zenodo.org/badge/483638364.svg)](https://zenodo.org/doi/10.5281/zenodo.10689139)
 
 # Single-cell RNA sequencing (scRNA-seq) Differential Expression Analysis & Visualization Snakemake Workflow
-A [Snakemake](https://snakemake.readthedocs.io/en/stable/) workflow for performing differential expression analyses (DEA) of processed (multimodal) scRNA-seq data powered by the R package [Seurat's](https://satijalab.org/seurat/index.html) functions [FindMarkers](https://satijalab.org/seurat/reference/findmarkers) and [FindAllMarkers](https://satijalab.org/seurat/reference/findallmarkers).
+A [Snakemake 8](https://snakemake.readthedocs.io/en/stable/) workflow for performing differential expression analyses (DEA) of processed (multimodal) scRNA-seq data powered by the R package [Seurat's](https://satijalab.org/seurat/index.html) functions [FindMarkers](https://satijalab.org/seurat/reference/findmarkers) and [FindAllMarkers](https://satijalab.org/seurat/reference/findallmarkers).
 
 This workflow adheres to the module specifications of [MR.PARETO](https://github.com/epigen/mr.pareto), an effort to augment research by modularizing (biomedical) data science. For more details, instructions and modules check out the project's repository. Please consider starring and sharing modules that are useful to you, this helps me in prioritizing my efforts!
 
diff --git a/config/README.md b/config/README.md
index d3895f5..5398fe8 100644
--- a/config/README.md
+++ b/config/README.md
@@ -2,10 +2,12 @@
 
 You need one configuration file and one annotation file to run the complete workflow. You can use the provided example as starting point. If in doubt read the comments in the config and/or try the default values.
 
-- project configuration (config/config.yaml): different for every project/dataset and configures the analyses to be performed.
+- project configuration (`config/config.yaml`): different for every project/dataset and configures the analyses to be performed.
 - sample annotation (sample_annotation): CSV file consisting of five columns
     -  name: name of the dataset/analysis (tip: keep it short, but descriptive and distinctive).
     -  data: path to the input Seurat object as .rds.
     -  assay: the Seurat assay to be used (e.g., SCT or RNA).
     -  metadata: column name of the metadata that should be used to group cells for comparison (e.g., condition or cell_type).
     -  control: name of the class/level that should be used as control in the comparison (e.g., untreated) or "ALL" to compare every class against the rest (e.g., useful to find cluster markers; one vs all)
+
+Set workflow-specific `resources` or command line arguments (CLI) in the workflow profile `workflow/profiles/default.config.yaml`, which supersedes global Snakemake profiles.
\ No newline at end of file
diff --git a/config/config.yaml b/config/config.yaml
index 6ca5efc..802c619 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -2,7 +2,6 @@
 ##### RESOURCES #####
 mem: '32000'
 threads: 1 # only DEA rule is multicore and gets 8*threads
-partition: 'shortq'
 
 ##### GENERAL #####
 annotation: /path/to/MyData_dea_seurat_annotation.csv
diff --git a/workflow/Snakefile b/workflow/Snakefile
index bb8aab2..2337b12 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -1,4 +1,7 @@
 
+##### global workflow dependencies #####
+conda: "envs/global.yaml"
+
 ##### libraries #####
 import os
 import sys
@@ -6,7 +9,8 @@ import pandas as pd
 import yaml
 from snakemake.utils import min_version
 
-min_version("7.15.2")
+##### set minimum snakemake version #####
+min_version("8.20.1")
 
 ##### module name #####
 module_name = "dea_seurat"
@@ -48,17 +52,15 @@ rule all:
                                  analysis = analyses,
                                  feature_list = feature_lists + ['FILTERED'],
                          ),
-        envs = expand(os.path.join(config["result_path"],'envs',module_name,'{env}.yaml'),env=['seurat','volcanos','ggplot','heatmap']),
-        configs = os.path.join(config["result_path"],'configs',module_name,'{}_config.yaml'.format(config["project_name"])),
-        annotations = os.path.join(config["result_path"],'configs',module_name,'{}_annot.csv'.format(config["project_name"])),
-        feature_lists = expand(os.path.join(config["result_path"],'configs',module_name,'{feature_list}.txt'), feature_list = feature_lists),
+        envs = expand(os.path.join(result_path,'envs','{env}.yaml'),env=['seurat','volcanos','ggplot','heatmap']),
+        configs = os.path.join(result_path,'configs','{}_config.yaml'.format(config["project_name"])),
+        annotations = os.path.join(result_path,'configs','{}_annot.csv'.format(config["project_name"])),
+        feature_lists = expand(os.path.join(result_path,'configs','{feature_list}.txt'), feature_list = feature_lists),
     resources:
         mem_mb=config.get("mem", "16000"),
     threads: config.get("threads", 1)
     log:
         os.path.join("logs","rules","all.log"),
-    params:
-        partition=config.get("partition"),
 
         
 ##### load rules #####
diff --git a/workflow/envs/global.yaml b/workflow/envs/global.yaml
new file mode 100644
index 0000000..482aa8e
--- /dev/null
+++ b/workflow/envs/global.yaml
@@ -0,0 +1,7 @@
+channels:
+  - conda-forge
+  - bioconda
+  - nodefaults
+dependencies:
+  - numpy=2.0.1
+  - pandas=2.2.2
\ No newline at end of file
diff --git a/workflow/profiles/default/config.yaml b/workflow/profiles/default/config.yaml
new file mode 100644
index 0000000..29bebdd
--- /dev/null
+++ b/workflow/profiles/default/config.yaml
@@ -0,0 +1,3 @@
+default-resources:
+    slurm_partition: shortq
+    slurm_extra: "'--qos=shortq'"
\ No newline at end of file
diff --git a/workflow/rules/dea.smk b/workflow/rules/dea.smk
index 33ba5bc..4e6abad 100644
--- a/workflow/rules/dea.smk
+++ b/workflow/rules/dea.smk
@@ -14,7 +14,6 @@ rule dea:
     log:
         os.path.join("logs","rules","dea_{analysis}.log"),
     params:
-        partition=config.get("partition"),
         assay = lambda w: annot_dict["{}".format(w.analysis)]["assay"],
         metadata = lambda w: annot_dict["{}".format(w.analysis)]["metadata"],
         control = lambda w: annot_dict["{}".format(w.analysis)]["control"],
@@ -52,7 +51,6 @@ rule aggregate:
     log:
         os.path.join("logs","rules","aggregate_{analysis}.log"),
     params:
-        partition=config.get("partition"),
         assay = lambda w: annot_dict["{}".format(w.analysis)]["assay"],
         metadata = lambda w: annot_dict["{}".format(w.analysis)]["metadata"],
         control = lambda w: annot_dict["{}".format(w.analysis)]["control"],
diff --git a/workflow/rules/envs_export.smk b/workflow/rules/envs_export.smk
index 3f64e08..3be212d 100644
--- a/workflow/rules/envs_export.smk
+++ b/workflow/rules/envs_export.smk
@@ -1,7 +1,7 @@
 # one rule per used conda environment to document the exact versions and builds of the used software        
 rule env_export:
     output:
-        report(os.path.join(config["result_path"],'envs','dea_seurat','{env}.yaml'),
+        report(os.path.join(result_path,'envs','{env}.yaml'),
                       caption="../report/software.rst", 
                       category="Software", 
                       subcategory="{}_{}".format(config["project_name"], module_name)
@@ -13,8 +13,6 @@ rule env_export:
     threads: config.get("threads", 1)
     log:
         os.path.join("logs","rules","env_{env}.log"),
-    params:
-        partition=config.get("partition"),
     shell:
         """
         conda env export > {output}
@@ -23,7 +21,7 @@ rule env_export:
 # add configuration files to report        
 rule config_export:
     output:
-        configs = report(os.path.join(config["result_path"],'configs','dea_seurat','{}_config.yaml'.format(config["project_name"])), 
+        configs = report(os.path.join(result_path,'configs','{}_config.yaml'.format(config["project_name"])), 
                          caption="../report/configs.rst", 
                          category="Configuration", 
                          subcategory="{}_{}".format(config["project_name"], module_name)
@@ -33,8 +31,6 @@ rule config_export:
     threads: config.get("threads", 1)
     log:
         os.path.join("logs","rules","config_export.log"),
-    params:
-        partition=config.get("partition"),
     run:
         with open(output["configs"], 'w') as outfile:
             yaml.dump(config, outfile, sort_keys=False, width=1000, indent=2)
@@ -44,18 +40,16 @@ rule annot_export:
     input:
         config["annotation"],
     output:
-        annot = report(os.path.join(config["result_path"],'configs','dea_seurat','{}_annot.csv'.format(config["project_name"])), 
+        annot = report(os.path.join(result_path,'configs','{}_annot.csv'.format(config["project_name"])), 
                          caption="../report/configs.rst", 
                          category="Configuration", 
                          subcategory="{}_{}".format(config["project_name"], module_name)
                         )
     resources:
-        mem_mb=1000, #config.get("mem_small", "16000"),
+        mem_mb=1000,
     threads: config.get("threads", 1)
     log:
         os.path.join("logs","rules","annot_export.log"),
-    params:
-        partition=config.get("partition"),
     shell:
         """
         cp {input} {output}
@@ -66,18 +60,16 @@ rule feature_list_export:
     input:
         get_feature_list_path,
     output:
-        feature_lists = report(os.path.join(config["result_path"],'configs',module_name,'{feature_list}.txt'), 
+        feature_lists = report(os.path.join(result_path,'configs','{feature_list}.txt'), 
                             caption="../report/feature_lists.rst", 
                             category="Configuration", 
                             subcategory="{}_{}".format(config["project_name"], module_name)
                            ),
     resources:
-        mem_mb=1000, #config.get("mem_small", "16000"),config.get("mem", "16000"),
+        mem_mb=1000,
     threads: config.get("threads", 1)
     log:
         os.path.join("logs","rules","feature_list_export_{feature_list}.log"),
-    params:
-        partition=config.get("partition"),
     shell:
         """
         cp {input} {output}
diff --git a/workflow/rules/visualize.smk b/workflow/rules/visualize.smk
index a6324c1..c2f5022 100644
--- a/workflow/rules/visualize.smk
+++ b/workflow/rules/visualize.smk
@@ -22,7 +22,6 @@ rule volcanos:
     log:
         os.path.join("logs","rules","volcanos_{analysis}_{feature_list}.log"),
     params:
-        partition=config.get("partition"),
         assay = lambda w: annot_dict["{}".format(w.analysis)]["assay"],
         metadata = lambda w: annot_dict["{}".format(w.analysis)]["metadata"],
         control = lambda w: annot_dict["{}".format(w.analysis)]["control"],
@@ -51,7 +50,6 @@ rule heatmap:
     log:
         os.path.join("logs","rules","lfc_heatmap_{analysis}_{feature_list}.log"),
     params:
-        partition=config.get("partition"),
         assay = lambda w: annot_dict["{}".format(w.analysis)]["assay"],
         metadata = lambda w: annot_dict["{}".format(w.analysis)]["metadata"],
         control = lambda w: annot_dict["{}".format(w.analysis)]["control"],