feat(pipeline): Bugfixes to ATAC and ChIP-seq pipelines

* Fix slurm preset (#118) * require snakemake<8 * fix escape character and whitespace errors * Delete setup.cfg file * fix: split peak call rules * tests: added all peak call methods to atac test * Update output file paths in alignment_post_processing.smk * Update file paths in hub.smk * fix: updated wildcards for bigBed files * Refactor test_seqnado_config_creation function and add missing options to config_atac.yml * Update config file for chip sequencing * fix: seqnado-design * chore: removed commented code * Fix file path in lanceotron_no_input rule * Fix metadata and experiment creation in DesignIP class * Fix symlink_files function to handle both paired and single-end assays * Add log and wrapper for fastqc_raw_single rule * update config if ucsc is null * Fix config (#119) * remove split fastq from config and all rules * clean up config and fix spelling of indices * remove test config files * update default heatmap options * return to config but with small changes * refactor config.py * fix typo in config.py * use indices consistently for genome indices * update config process in docs * add entrypoint and chmod profile (#122) * Feature add config rerun (#126) * add option to rerun config * update config docs with rerun * move sigularity fix to faq in docs (#127) * feat(pipeline): handle failed peak calls (#131) * fix: add validate peaks rule * Add get_peak_files function to retrieve peak files based on assay type * fix(pipeline): inputs not used for peak call (#132) * Develop (#128) * Fix slurm preset (#118) * require snakemake<8 * fix escape character and whitespace errors * Delete setup.cfg file * fix: split peak call rules * tests: added all peak call methods to atac test * Update output file paths in alignment_post_processing.smk * Update file paths in hub.smk * fix: updated wildcards for bigBed files * Refactor test_seqnado_config_creation function and add missing options to config_atac.yml * Update config file for chip sequencing * fix: seqnado-design * chore: removed commented code * Fix file path in lanceotron_no_input rule * Fix metadata and experiment creation in DesignIP class * Fix symlink_files function to handle both paired and single-end assays * Add log and wrapper for fastqc_raw_single rule * update config if ucsc is null * Fix config (#119) * remove split fastq from config and all rules * clean up config and fix spelling of indices * remove test config files * update default heatmap options * return to config but with small changes * refactor config.py * fix typo in config.py * use indices consistently for genome indices * update config process in docs * add entrypoint and chmod profile (#122) * Feature add config rerun (#126) * add option to rerun config * update config docs with rerun * move sigularity fix to faq in docs (#127) --------- Co-authored-by: alsmith <[email protected]> * fix: "f" missing for all get_control_X functions. Whoops... * fix: multiple errors with get_control_X files * fix: removed touch sentinel * fix: && removed at end of lines * fix: make blank file if peak calls fail * fix: validate peaks wrong param supplied --------- Co-authored-by: Catherine Chahrour <[email protected]> --------- Co-authored-by: alsmith <[email protected]> Co-authored-by: Alastair Smith <[email protected]>
alsmith151 · Feb 5, 2024 · 3597518 · 3597518
1 parent 03fa79a
commit 3597518
Show file tree

Hide file tree

Showing 2 changed files with 66 additions and 6 deletions.
diff --git a/seqnado/workflow/rules/hub.smk b/seqnado/workflow/rules/hub.smk
@@ -93,6 +93,30 @@ def get_hub_input(wildcards):
     return input_files
 
 
+def get_peak_files(wildcards):
+    peak_files = []
+
+    if config["call_peaks"]:
+        if ASSAY == "ChIP":
+            peak_files.extend(
+                expand(
+                    "seqnado_output/peaks/{method}/{sample}.bed",
+                    method=config["peak_calling_method"],
+                    sample=SAMPLE_NAMES_IP,
+                )
+            )
+        elif ASSAY == "ATAC":
+            peak_files.extend(
+                expand(
+                    "seqnado_output/peaks/{method}/{sample}.bed",
+                    method=config["peak_calling_method"],
+                    sample=SAMPLE_NAMES,
+                )
+            )
+
+    return peak_files
+
+
 rule save_design:
     output:
         "seqnado_output/design.csv",
@@ -102,9 +126,33 @@ rule save_design:
         DESIGN.to_dataframe().to_csv("seqnado_output/design.csv", index=False)
 
 
+rule validate_peaks:
+    input:
+        peaks=get_peak_files,
+    output:
+        sentinel="seqnado_output/peaks/.validated",
+    container:
+        None
+    log:
+        "seqnado_output/logs/validate_peaks.log",
+    run:
+        from loguru import logger
+
+        with logger.catch():
+            for peak_file in input.peaks:
+                with open(peak_file, "r+") as p:
+                    peak_entries = p.readlines()
+                    if len(peak_entries) < 1:
+                        p.write("chr21\t1\t2\n")
+
+        with open(output.sentinel, "w") as s:
+            s.write("validated")
+
+
 rule bed_to_bigbed:
     input:
         bed="seqnado_output/peaks/{directory}/{sample}.bed",
+        sentinel="seqnado_output/peaks/.validated",
     output:
         bigbed="seqnado_output/peaks/{directory}/{sample}.bigBed",
     params:
@@ -149,3 +197,4 @@ rule generate_hub:
 
 localrules:
     generate_hub,
+    validate_peaks,
diff --git a/seqnado/workflow/rules/peak_call_chip.smk b/seqnado/workflow/rules/peak_call_chip.smk
@@ -12,17 +12,26 @@ def get_lanceotron_threshold(wildcards):
 
 def get_control_bam(wildcards):
     exp = DESIGN.query(sample_name=wildcards.sample, ip=wildcards.treatment)
-    return "seqnado_output/aligned/{sample}_{exp.control}.bam"
+    control = f"seqnado_output/aligned/{wildcards.sample}_{exp.control}.bam".replace(
+        " ", ""
+    )
+    return control
 
 
 def get_control_tag(wildcards):
     exp = DESIGN.query(sample_name=wildcards.sample, ip=wildcards.treatment)
-    return "seqnado_output/tag_dirs/{sample}_{exp.control}"
+    control = f"seqnado_output/tag_dirs/{wildcards.sample}_{exp.control}".replace(
+        " ", ""
+    )
+    return control
 
 
 def get_control_bigwig(wildcards):
     exp = DESIGN.query(sample_name=wildcards.sample, ip=wildcards.treatment)
-    return "seqnado_output/bigwigs/deeptools/{sample}_{exp.control}.bigWig"
+    control = f"seqnado_output/bigwigs/deeptools/{wildcards.sample}_{exp.control}.bigWig".replace(
+        " ", ""
+    )
+    return control
 
 
 rule macs2_with_input:
@@ -43,7 +52,7 @@ rule macs2_with_input:
     shell:
         """
         macs2 callpeak -t {input.treatment} -c {input.control} -n seqnado_output/peaks/macs/{wildcards.treatment} -f BAMPE {params.options} > {log} 2>&1 &&
-        cat {params.narrow} | cut -f 1-3 > {output.peaks}
+        cat {params.narrow} | cut -f 1-3 > {output.peaks} || touch {output.peaks}
         """
 
 
@@ -132,7 +141,7 @@ rule lanceotron_with_input:
     shell:
         """
         lanceotron callPeaksInput {input.treatment} -i {input.control} -f {params.outdir} --skipheader > {log} 2>&1 &&
-        cat {params.outdir}/{wildcards.treatment}_L-tron.bed | awk 'BEGIN{{OFS="\\t"}} $4 >= {params.threshold} {{print $1, $2, $3}}' > {output.peaks}
+        cat {params.outdir}/{wildcards.treatment}_L-tron.bed | awk 'BEGIN{{OFS="\\t"}} $4 >= {params.threshold} {{print $1, $2, $3}}' > {output.peaks} || touch {output.peaks}
         """
 
 
@@ -159,4 +168,6 @@ rule lanceotron_no_input:
         """
 
 
-ruleorder: lanceotron_with_input > lanceotron_no_input > homer_with_input > homer_no_input > macs2_with_input > macs2_no_input
+ruleorder: lanceotron_with_input > lanceotron_no_input
+ruleorder: homer_with_input > homer_no_input
+ruleorder: macs2_with_input > macs2_no_input