From f9f1da503855a2b2f48421b2c11ee9d493ee263d Mon Sep 17 00:00:00 2001
From: Catherine Chahrour <74187550+CChahrour@users.noreply.github.com>
Date: Wed, 7 Feb 2024 17:42:06 +0000
Subject: [PATCH 1/2] fix: chip input issue (#136)

* Fix slurm preset (#118)

* require snakemake<8

* fix escape character and whitespace errors

* Delete setup.cfg file

* fix: split peak call rules

* tests: added all peak call methods to atac test

* Update output file paths in
alignment_post_processing.smk

* Update file paths in hub.smk

* fix: updated wildcards for bigBed files

* Refactor test_seqnado_config_creation function and
add missing options to config_atac.yml

* Update config file for chip sequencing

* fix: seqnado-design

* chore: removed commented code

* Fix file path in lanceotron_no_input rule

* Fix metadata and experiment creation in DesignIP
class

* Fix symlink_files function to handle both paired
and single-end assays

* Add log and wrapper for fastqc_raw_single rule

* update config if ucsc is null

* Fix config (#119)

* remove split fastq from config and all rules

* clean up config and fix spelling of indices

* remove test config files

* update default heatmap options

* return to config but with small changes

* refactor config.py

* fix typo in config.py

* use indices consistently for genome indices

* update config process in docs

* add entrypoint and chmod profile (#122)

* Feature add config rerun (#126)

* add option to rerun config

* update config docs with rerun

* move sigularity fix to faq in docs (#127)

* feat(pipeline): handle failed peak calls (#131)

* fix: add validate peaks rule

* Add get_peak_files function to retrieve peak files based on assay type

* fix(pipeline): inputs not used for peak call (#132)

* Develop (#128)

* Fix slurm preset (#118)

* require snakemake<8

* fix escape character and whitespace errors

* Delete setup.cfg file

* fix: split peak call rules

* tests: added all peak call methods to atac test

* Update output file paths in
alignment_post_processing.smk

* Update file paths in hub.smk

* fix: updated wildcards for bigBed files

* Refactor test_seqnado_config_creation function and
add missing options to config_atac.yml

* Update config file for chip sequencing

* fix: seqnado-design

* chore: removed commented code

* Fix file path in lanceotron_no_input rule

* Fix metadata and experiment creation in DesignIP
class

* Fix symlink_files function to handle both paired
and single-end assays

* Add log and wrapper for fastqc_raw_single rule

* update config if ucsc is null

* Fix config (#119)

* remove split fastq from config and all rules

* clean up config and fix spelling of indices

* remove test config files

* update default heatmap options

* return to config but with small changes

* refactor config.py

* fix typo in config.py

* use indices consistently for genome indices

* update config process in docs

* add entrypoint and chmod profile (#122)

* Feature add config rerun (#126)

* add option to rerun config

* update config docs with rerun

* move sigularity fix to faq in docs (#127)

---------

Co-authored-by: alsmith <asmith151@outlook.com>

* fix: "f" missing for all get_control_X functions.

Whoops...

* fix: multiple errors with get_control_X files

* fix: removed touch sentinel

* fix: && removed at end of lines

* fix: make blank file if peak calls fail

* fix: validate peaks wrong param supplied

---------

Co-authored-by: Catherine Chahrour <74187550+CChahrour@users.noreply.github.com>

* Fix: chip input issue (#135)

* fix symlinking issue for input fastq files

---------

Co-authored-by: alsmith <asmith151@outlook.com>
Co-authored-by: Alastair Smith <49727900+alsmith151@users.noreply.github.com>
---
 seqnado/utils.py | 72 ++++++++++++++++--------------------------------
 1 file changed, 23 insertions(+), 49 deletions(-)

diff --git a/seqnado/utils.py b/seqnado/utils.py
index 88acabee..5ae4e194 100644
--- a/seqnado/utils.py
+++ b/seqnado/utils.py
@@ -683,71 +683,45 @@ def from_dataframe(cls, df: pd.DataFrame, simplified: bool = True, **kwargs):
 
         return cls(assays=experiments, **kwargs)
 
-
-def symlink_files_paired(
-    output_dir: pathlib.Path, assay: Union[AssayNonIP, AssayIP], assay_name: str
-):
-    r1_path_new = pathlib.Path(f"{output_dir}/{assay_name}_1.fastq.gz")
-    r2_path_new = pathlib.Path(f"{output_dir}/{assay_name}_2.fastq.gz")
-
-    if not r1_path_new.exists():
-        try:
-            r1_path_new.symlink_to(assay.r1.path.resolve())
-        except FileExistsError:
-            logger.warning(f"Symlink for {r1_path_new} already exists.")
-
-    if assay.r2 and not r2_path_new.exists():
-        try:
-            r2_path_new.symlink_to(assay.r2.path.resolve())
-        except FileExistsError:
-            logger.warning(f"Symlink for {r2_path_new} already exists.")
-
-
-def symlink_files_single(
-    output_dir: pathlib.Path, assay: Union[AssayNonIP, AssayIP], assay_name: str
-):
-    r1_path_new = pathlib.Path(f"{output_dir}/{assay_name}.fastq.gz")
-
-    if not r1_path_new.exists():
+def symlink_file(output_dir: pathlib.Path, source_path: pathlib.Path, new_file_name: str):
+    """
+    Create a symlink in the output directory with the new file name.
+    """
+    new_path = output_dir / new_file_name
+    if not new_path.exists():
         try:
-            r1_path_new.symlink_to(assay.r1.path.resolve())
+            new_path.symlink_to(source_path.resolve())
         except FileExistsError:
-            logger.warning(f"Symlink for {r1_path_new} already exists.")
-
+            logger.warning(f"Symlink for {new_path} already exists.")
 
-def symlink_fastq_files(
-    design: Union[Design, DesignIP], output_dir: str = "seqnado_output/fastqs/"
-) -> None:
+def symlink_fastq_files(design: Union[Design, DesignIP], output_dir: str = "seqnado_output/fastqs/") -> None:
     """
     Symlink the fastq files to the output directory.
     """
     output_dir = pathlib.Path(output_dir)
     output_dir.mkdir(parents=True, exist_ok=True)
-
+    
     if isinstance(design, Design):
         for assay_name, assay in design.assays.items():
+            symlink_file(output_dir, assay.r1.path, f"{assay_name}_1.fastq.gz")
             if assay.is_paired:
-                symlink_files_paired(output_dir, assay, assay_name)
-            else:
-                symlink_files_single(output_dir, assay, assay_name)
+                symlink_file(output_dir, assay.r2.path, f"{assay_name}_2.fastq.gz")
 
     elif isinstance(design, DesignIP):
         for experiment_name, experiment in design.assays.items():
-            assay = experiment.ip_files
-            assay_name = assay.name
-
-            if assay.is_paired:
-                symlink_files_paired(output_dir, assay, assay_name)
-            else:
-                symlink_files_single(output_dir, assay, assay_name)
+            # IP files
+            ip_assay = experiment.ip_files
+            symlink_file(output_dir, ip_assay.r1.path, f"{ip_assay.name}_1.fastq.gz")
+            if ip_assay.is_paired:
+                symlink_file(output_dir, ip_assay.r2.path, f"{ip_assay.name}_2.fastq.gz")
 
             if experiment.control_files:
-                assay = experiment.control_files
-                assay_name = assay.name
-                if assay.is_paired:
-                    symlink_files_paired(output_dir, assay, assay_name)
-                else:
-                    symlink_files_single(output_dir, assay, assay_name)
+                control_assay = experiment.control_files
+                control_r1_name = control_assay.r1.path.name 
+                symlink_file(output_dir, control_assay.r1.path, control_r1_name)
+                if control_assay.is_paired:
+                    control_r2_name = control_assay.r2.path.name 
+                    symlink_file(output_dir, control_assay.r2.path, control_r2_name)
 
 
 def define_output_files(

From 07f84208d5e96785af86f0b4892c2b2a7f890659 Mon Sep 17 00:00:00 2001
From: Catherine Chahrour <74187550+CChahrour@users.noreply.github.com>
Date: Thu, 8 Feb 2024 12:18:27 +0000
Subject: [PATCH 2/2] bug fixes for conda release (#139)

---
 seqnado/config.py                         | 211 +++++++++++++++-------
 seqnado/workflow/rules/peak_call_chip.smk |  21 ++-
 seqnado/workflow/scripts/split_bam.py     | 109 -----------
 seqnado/workflow/scripts/split_bam2.py    |  88 ---------
 4 files changed, 155 insertions(+), 274 deletions(-)
 delete mode 100644 seqnado/workflow/scripts/split_bam.py
 delete mode 100644 seqnado/workflow/scripts/split_bam2.py

diff --git a/seqnado/config.py b/seqnado/config.py
index a36d0d6a..0093d11b 100644
--- a/seqnado/config.py
+++ b/seqnado/config.py
@@ -6,12 +6,16 @@
 package_dir = os.path.dirname(os.path.abspath(__file__))
 template_dir = os.path.join(package_dir, "workflow/config")
 
+
 # Helper Functions
 def get_user_input(prompt, default=None, is_boolean=False, choices=None):
     while True:
-        user_input = input(f"{prompt} [{'/'.join(choices) if choices else default}]: ") or default
+        user_input = (
+            input(f"{prompt} [{'/'.join(choices) if choices else default}]: ")
+            or default
+        )
         if is_boolean:
-            return user_input.lower() == 'yes'
+            return user_input.lower() == "yes"
         if choices and user_input not in choices:
             print(f"Invalid choice. Please choose from {', '.join(choices)}.")
             continue
@@ -19,20 +23,23 @@ def get_user_input(prompt, default=None, is_boolean=False, choices=None):
 
 
 def setup_configuration(assay, genome, template_data):
-    username = os.getenv('USER', 'unknown_user')
-    today = datetime.datetime.now().strftime('%Y-%m-%d')
-    project_name = get_user_input("What is your project name?", default=f"{username}_project")
+    username = os.getenv("USER", "unknown_user")
+    today = datetime.datetime.now().strftime("%Y-%m-%d")
+    project_name = get_user_input(
+        "What is your project name?", default=f"{username}_project"
+    )
+    project_name = project_name.replace(" ", "_")
 
     common_config = {
-        'username': username,
-        'project_date': today,
-        'project_name': project_name,
-        'genome': genome
+        "username": username,
+        "project_date": today,
+        "project_name": project_name,
+        "genome": genome,
     }
-    
+
     template_data.update(common_config)
 
-    with open(os.path.join(template_dir, 'preset_genomes.json'), 'r') as f:
+    with open(os.path.join(template_dir, "preset_genomes.json"), "r") as f:
         genome_values = json.load(f)
 
     genome_dict = {}
@@ -41,72 +48,135 @@ def setup_configuration(assay, genome, template_data):
         genome = get_user_input("What is your genome name?", default="other")
         genome_dict = {
             genome: {
-                "indices": get_user_input("Path to Bowtie2 genome indices:") if assay in ["chip", "atac"] else get_user_input("Path to STAR v2.7.10b genome indices:"),
+                "indices": (
+                    get_user_input("Path to Bowtie2 genome indices:")
+                    if assay in ["chip", "atac"]
+                    else get_user_input("Path to STAR v2.7.10b genome indices:")
+                ),
                 "chromosome_sizes": get_user_input("Path to chromosome sizes file:"),
                 "gtf": get_user_input("Path to GTF file:"),
-                "blacklist": get_user_input("Path to blacklist bed file:")
+                "blacklist": get_user_input("Path to blacklist bed file:"),
             }
         }
     else:
         if genome in genome_values:
             genome_dict[genome] = {
-                "indices": genome_values[genome].get('bt2_indices' if assay in ["chip", "atac"] else 'star_indices', ''),
-                "chromosome_sizes": genome_values[genome].get('chromosome_sizes', ''),
-                "gtf": genome_values[genome].get('gtf', ''),
-                "blacklist": genome_values[genome].get('blacklist', '')
+                "indices": genome_values[genome].get(
+                    "bt2_indices" if assay in ["chip", "atac"] else "star_indices", ""
+                ),
+                "chromosome_sizes": genome_values[genome].get("chromosome_sizes", ""),
+                "gtf": genome_values[genome].get("gtf", ""),
+                "blacklist": genome_values[genome].get("blacklist", ""),
             }
 
-    
     genome_config = {
-        'genome': genome,
-        'indices': genome_dict[genome]['indices'],
-        'chromosome_sizes': genome_dict[genome]['chromosome_sizes'],
-        'gtf': genome_dict[genome]['gtf'],
+        "genome": genome,
+        "indices": genome_dict[genome]["indices"],
+        "chromosome_sizes": genome_dict[genome]["chromosome_sizes"],
+        "gtf": genome_dict[genome]["gtf"],
     }
     template_data.update(genome_config)
 
-
-    template_data['remove_blacklist'] = get_user_input("Do you want to remove blacklist regions? (yes/no)", default="yes", is_boolean=True)
-    if template_data['remove_blacklist']:
-        template_data['blacklist'] = genome_dict[genome]['blacklist']
-
-    template_data['remove_pcr_duplicates'] = get_user_input("Remove PCR duplicates? (yes/no)", default= "yes" if assay in ["chip", "atac"] else "no", is_boolean=True)
-    if template_data['remove_pcr_duplicates']:
-        template_data['remove_pcr_duplicates_method'] = get_user_input("Remove PCR duplicates method:", default="picard", choices=["picard"])
+    template_data["remove_blacklist"] = get_user_input(
+        "Do you want to remove blacklist regions? (yes/no)",
+        default="yes",
+        is_boolean=True,
+    )
+    if template_data["remove_blacklist"]:
+        template_data["blacklist"] = genome_dict[genome]["blacklist"]
+
+    template_data["remove_pcr_duplicates"] = get_user_input(
+        "Remove PCR duplicates? (yes/no)",
+        default="yes" if assay in ["chip", "atac"] else "no",
+        is_boolean=True,
+    )
+    if template_data["remove_pcr_duplicates"]:
+        template_data["remove_pcr_duplicates_method"] = get_user_input(
+            "Remove PCR duplicates method:", default="picard", choices=["picard"]
+        )
 
     else:
-        template_data['remove_pcr_duplicates_method'] = "False"
-        
+        template_data["remove_pcr_duplicates_method"] = "False"
+
     if assay == "atac":
-        template_data['shift_atac_reads'] = get_user_input("Shift ATAC-seq reads? (yes/no)", default="yes", is_boolean=True) if assay == "atac" else "False"
+        template_data["shift_atac_reads"] = (
+            get_user_input(
+                "Shift ATAC-seq reads? (yes/no)", default="yes", is_boolean=True
+            )
+            if assay == "atac"
+            else "False"
+        )
 
     if assay == "chip":
-        template_data['spikein'] = get_user_input("Do you have spikein? (yes/no)", default="no", is_boolean=True) 
-        if template_data['spikein']:
-                template_data['normalisation_method'] = get_user_input("Normalisation method:", default="orlando", choices=["orlando", "with_input"])
-                template_data['reference_genome'] = get_user_input("Reference genome:", default="hg38")
-                template_data['spikein_genome'] = get_user_input("Spikein genome:", default="dm6")
-                template_data['fastq_screen_config'] = get_user_input("Path to fastqscreen config:", default="/ceph/project/milne_group/shared/seqnado_reference/fastqscreen_reference/fastq_screen.conf")
-        
-    template_data['make_bigwigs'] = get_user_input("Do you want to make bigwigs? (yes/no)", default="no", is_boolean=True)
-    if template_data['make_bigwigs']:
-        template_data['pileup_method'] = get_user_input("Pileup method:", default="deeptools", choices=["deeptools", "homer"])
-        template_data['make_heatmaps'] = get_user_input("Do you want to make heatmaps? (yes/no)", default="no", is_boolean=True)
-    
-    if assay in ["chip", "atac"]:
-        template_data['call_peaks'] = get_user_input("Do you want to call peaks? (yes/no)", default="no", is_boolean=True)
-        if template_data['call_peaks']:
-            template_data['peak_calling_method'] = get_user_input("Peak caller:", default="lanceotron", choices=["lanceotron", "macs", "homer"])
-
-    template_data['run_deseq2'] = get_user_input("Run DESeq2? (yes/no)", default="no", is_boolean=True) if assay == "rna" else "False"
+        template_data["spikein"] = get_user_input(
+            "Do you have spikein? (yes/no)", default="no", is_boolean=True
+        )
+        if template_data["spikein"]:
+            template_data["normalisation_method"] = get_user_input(
+                "Normalisation method:",
+                default="orlando",
+                choices=["orlando", "with_input"],
+            )
+            template_data["reference_genome"] = get_user_input(
+                "Reference genome:", default="hg38"
+            )
+            template_data["spikein_genome"] = get_user_input(
+                "Spikein genome:", default="dm6"
+            )
+            template_data["fastq_screen_config"] = get_user_input(
+                "Path to fastqscreen config:",
+                default="/ceph/project/milne_group/shared/seqnado_reference/fastqscreen_reference/fastq_screen.conf",
+            )
+
+    template_data["make_bigwigs"] = get_user_input(
+        "Do you want to make bigwigs? (yes/no)", default="no", is_boolean=True
+    )
+    if template_data["make_bigwigs"]:
+        template_data["pileup_method"] = get_user_input(
+            "Pileup method:", default="deeptools", choices=["deeptools", "homer"]
+        )
+        template_data["make_heatmaps"] = get_user_input(
+            "Do you want to make heatmaps? (yes/no)", default="no", is_boolean=True
+        )
 
-    template_data['make_ucsc_hub'] = get_user_input("Do you want to make a UCSC hub? (yes/no)", default="no", is_boolean=True)
-    
-    template_data['UCSC_hub_directory'] = get_user_input("UCSC hub directory:", default="/path/to/ucsc_hub/") if template_data['make_ucsc_hub'] else "."
-    template_data['email'] = get_user_input("What is your email address?", default=f"{username}@example.com") if template_data['make_ucsc_hub'] else f"{username}@example.com"
-    template_data['color_by'] = get_user_input("Color by (for UCSC hub):", default="samplename") if template_data['make_ucsc_hub'] else "samplename"
-    
-    template_data['options'] = TOOL_OPTIONS_RNA if assay == "rna" else TOOL_OPTIONS
+    if assay in ["chip", "atac"]:
+        template_data["call_peaks"] = get_user_input(
+            "Do you want to call peaks? (yes/no)", default="no", is_boolean=True
+        )
+        if template_data["call_peaks"]:
+            template_data["peak_calling_method"] = get_user_input(
+                "Peak caller:",
+                default="lanceotron",
+                choices=["lanceotron", "macs", "homer"],
+            )
+
+    template_data["run_deseq2"] = (
+        get_user_input("Run DESeq2? (yes/no)", default="no", is_boolean=True)
+        if assay == "rna"
+        else "False"
+    )
+
+    template_data["make_ucsc_hub"] = get_user_input(
+        "Do you want to make a UCSC hub? (yes/no)", default="no", is_boolean=True
+    )
+
+    template_data["UCSC_hub_directory"] = (
+        get_user_input("UCSC hub directory:", default="/path/to/ucsc_hub/")
+        if template_data["make_ucsc_hub"]
+        else "."
+    )
+    template_data["email"] = (
+        get_user_input("What is your email address?", default=f"{username}@example.com")
+        if template_data["make_ucsc_hub"]
+        else f"{username}@example.com"
+    )
+    template_data["color_by"] = (
+        get_user_input("Color by (for UCSC hub):", default="samplename")
+        if template_data["make_ucsc_hub"]
+        else "samplename"
+    )
+
+    template_data["options"] = TOOL_OPTIONS_RNA if assay == "rna" else TOOL_OPTIONS
 
 
 # Tool Specific Options
@@ -178,35 +248,40 @@ def setup_configuration(assay, genome, template_data):
     colormap: RdYlBu_r
 """
 
+
 def create_config(assay, genome, rerun):
     env = Environment(loader=FileSystemLoader(template_dir), auto_reload=False)
 
-    template = env.get_template("config.yaml.jinja")        
+    template = env.get_template("config.yaml.jinja")
     template_deseq2 = env.get_template("deseq2.qmd.jinja")
-    
+
     # Initialize template data
-    template_data = {'assay': assay, 'genome': genome}
+    template_data = {"assay": assay, "genome": genome}
 
     # Setup configuration
     setup_configuration(assay, genome, template_data)
-    
+
     # Create directory and render template
     if rerun:
         dir_name = os.getcwd()
-        with open(os.path.join(dir_name, f"config_{assay}.yml"), 'w') as file:
+        with open(os.path.join(dir_name, f"config_{assay}.yml"), "w") as file:
             file.write(template.render(template_data))
     else:
         dir_name = f"{template_data['project_date']}_{template_data['assay']}_{template_data['project_name']}"
         os.makedirs(dir_name, exist_ok=True)
         fastq_dir = os.path.join(dir_name, "fastq")
         os.makedirs(fastq_dir, exist_ok=True)
-        
-        with open(os.path.join(dir_name, f"config_{assay}.yml"), 'w') as file:
+
+        with open(os.path.join(dir_name, f"config_{assay}.yml"), "w") as file:
             file.write(template.render(template_data))
 
     # add deseq2 qmd file if rna
     if assay == "rna":
-        with open(os.path.join(dir_name, f"deseq2_{template_data['project_name']}.qmd"), 'w') as file:
+        with open(
+            os.path.join(dir_name, f"deseq2_{template_data['project_name']}.qmd"), "w"
+        ) as file:
             file.write(template_deseq2.render(template_data))
-            
-    print(f"Directory '{dir_name}' has been created with the 'config_{assay}.yml' file.")
+
+    print(
+        f"Directory '{dir_name}' has been created with the 'config_{assay}.yml' file."
+    )
diff --git a/seqnado/workflow/rules/peak_call_chip.smk b/seqnado/workflow/rules/peak_call_chip.smk
index 88a7313d..32c75d2e 100644
--- a/seqnado/workflow/rules/peak_call_chip.smk
+++ b/seqnado/workflow/rules/peak_call_chip.smk
@@ -43,16 +43,17 @@ rule macs2_with_input:
     params:
         options=seqnado.utils.check_options(config["macs"]["callpeak"]),
         narrow=lambda wc, output: output.peaks.replace(".bed", "_peaks.narrowPeak"),
+        basename=lambda wc, output: output.peaks.replace(".bed", ""),
     threads: 1
     resources:
         mem_mb=2000,
         time="0-02:00:00",
     log:
-        "seqnado_output/logs/macs/{sample}_{treatment}.bed",
+        "seqnado_output/logs/macs/{sample}_{treatment}.log",
     shell:
         """
-        macs2 callpeak -t {input.treatment} -c {input.control} -n seqnado_output/peaks/macs/{wildcards.treatment} -f BAMPE {params.options} > {log} 2>&1 &&
-        cat {params.narrow} | cut -f 1-3 > {output.peaks} || touch {output.peaks}
+        macs2 callpeak -t {input.treatment} -c {input.control} -n {params.basename} -f BAMPE {params.options} > {log} 2>&1 &&
+        cat {params.narrow} | cut -f 1-3 > {output.peaks}
         """
 
 
@@ -70,7 +71,7 @@ rule macs2_no_input:
         mem_mb=2000,
         time="0-02:00:00",
     log:
-        "seqnado_output/logs/macs/{sample}_{treatment}.bed",
+        "seqnado_output/logs/macs/{sample}_{treatment}.log",
     shell:
         """
         macs2 callpeak -t {input.treatment} -n {params.basename} -f BAMPE {params.options} > {log} 2>&1 &&
@@ -85,7 +86,7 @@ rule homer_with_input:
     output:
         peaks="seqnado_output/peaks/homer/{sample}_{treatment}.bed",
     log:
-        "seqnado_output/logs/homer/{sample}_{treatment}.bed",
+        "seqnado_output/logs/homer/{sample}_{treatment}.log",
     params:
         options=seqnado.utils.check_options(config["homer"]["findpeaks"]),
     threads: 1
@@ -106,7 +107,7 @@ rule homer_no_input:
     output:
         peaks="seqnado_output/peaks/homer/{sample}_{treatment}.bed",
     log:
-        "seqnado_output/logs/homer/{sample}_{treatment}.bed",
+        "seqnado_output/logs/homer/{sample}_{treatment}.log",
     params:
         options=seqnado.utils.check_options(config["homer"]["findpeaks"]),
     threads: 1
@@ -128,10 +129,11 @@ rule lanceotron_with_input:
     output:
         peaks="seqnado_output/peaks/lanceotron/{sample}_{treatment}.bed",
     log:
-        "seqnado_output/logs/lanceotron/{sample}_{treatment}.bed",
+        "seqnado_output/logs/lanceotron/{sample}_{treatment}.log",
     params:
         threshold=get_lanceotron_threshold,
         outdir=lambda wc, output: os.path.dirname(output.peaks),
+        basename=lambda wc, output: output.peaks.replace(".bed", ""),
     container:
         "library://asmith151/seqnado/seqnado_extra:latest"
     threads: 1
@@ -141,7 +143,7 @@ rule lanceotron_with_input:
     shell:
         """
         lanceotron callPeaksInput {input.treatment} -i {input.control} -f {params.outdir} --skipheader > {log} 2>&1 &&
-        cat {params.outdir}/{wildcards.treatment}_L-tron.bed | awk 'BEGIN{{OFS="\\t"}} $4 >= {params.threshold} {{print $1, $2, $3}}' > {output.peaks} || touch {output.peaks}
+        cat {params.basename}_L-tron.bed | awk 'BEGIN{{OFS="\\t"}} $4 >= {params.threshold} {{print $1, $2, $3}}' > {output.peaks} 
         """
 
 
@@ -155,6 +157,7 @@ rule lanceotron_no_input:
     params:
         options=seqnado.utils.check_options(config["lanceotron"]["callpeak"]),
         outdir=lambda wc, output: os.path.dirname(output.peaks),
+        basename=lambda wc, output: output.peaks.replace(".bed", ""),
     threads: 1
     container:
         "library://asmith151/seqnado/seqnado_extra:latest"
@@ -164,7 +167,7 @@ rule lanceotron_no_input:
     shell:
         """
         lanceotron callPeaks {input.treatment} -f {params.outdir} --skipheader  {params.options} > {log} 2>&1 &&
-        cat {params.outdir}/{wildcards.sample}_{wildcards.treatment}_L-tron.bed | cut -f 1-3 > {output.peaks}
+        cat {params.basename}_L-tron.bed | cut -f 1-3 > {output.peaks}
         """
 
 
diff --git a/seqnado/workflow/scripts/split_bam.py b/seqnado/workflow/scripts/split_bam.py
deleted file mode 100644
index 23a7f686..00000000
--- a/seqnado/workflow/scripts/split_bam.py
+++ /dev/null
@@ -1,109 +0,0 @@
-import logging
-import os
-import pysam
-import shutil
-import subprocess
-import sys
-from optparse import OptionParser
-from loguru import logger
-
-# Set up logging
-logger.add(snakemake.log[0], level="INFO")
-
-    __version__ = "1.0.5"
-
-    def create_headers(bamfile, ex_chr_prefix):
-        """Create BAM headers for sample and exogenous genomes."""
-        bam_header = bamfile.header
-        sample_header, exo_header = {}, {}
-        sample_header.update(bam_header)
-        exo_header.update(bam_header)
-
-        sample_header["SQ"] = [sq for sq in bam_header["SQ"] if sq["SN"].startswith("chr")]
-        exo_header["SQ"] = [sq for sq in bam_header["SQ"] if sq["SN"].startswith(ex_chr_prefix)]
-
-        for header in [sample_header, exo_header]:
-            header.setdefault("CO", []).extend([])
-
-        return sample_header, exo_header
-
-
-    def create_report(output_prefix, stats):
-        """
-        Create a report file with statistics from the BAM processing in TSV format.
-
-        Parameters:
-        output_prefix (str): Prefix used for output files.
-        stats (dict): A dictionary containing the statistics to report.
-        """
-        report_file = output_prefix + "_report.tsv"
-        with open(report_file, "w") as report:
-            # Writing the headers
-            headers = stats.keys()
-            report.write("\t".join(headers) + "\n")
-            
-            # Writing the values
-            values = [str(stats[key]) for key in headers]
-            report.write("\t".join(values) + "\n")
-
-    def process_bam(bam_file, output_prefix, ex_chr_prefix, sample_genome, map_qual_threshold):
-        """Process the BAM file and collect statistics."""
-        stats = {
-            "bam_file": os.path.basename(bam_file),
-            sample_genome + "_reads": 0,
-            ex_chr_prefix + "_reads": 0,
-            "unmapped_reads": 0,
-            "qcfail_reads": 0,
-            "duplicate_reads": 0,
-            "secondary_reads": 0,
-            "low_mapq_reads": 0,
-        }
-
-        samfile = pysam.AlignmentFile(bam_file, "rb")
-        sample_header, ex_header = create_headers(samfile, ex_chr_prefix)
-
-        with pysam.AlignmentFile(output_prefix + "_" + sample_genome +".bam", "wb", header=sample_header) as sample_out, \
-            pysam.AlignmentFile(output_prefix + "_" + ex_chr_prefix + ".bam", "wb", header=ex_header) as exo_out:
-
-            for read in samfile:
-                if read.is_unmapped:
-                    stats["unmapped_reads"] += 1
-                elif read.is_qcfail:
-                    stats["qcfail_reads"] += 1
-                elif read.is_duplicate:
-                    stats["duplicate_reads"] += 1
-                elif read.is_secondary:
-                    stats["secondary_reads"] += 1
-                elif read.mapq < map_qual_threshold:
-                    stats["low_mapq_reads"] += 1
-                elif read.reference_name.startswith(ex_chr_prefix):
-                    stats[ex_chr_prefix + "_reads"] += 1
-                    exo_out.write(read)
-                else:
-                    stats[sample_genome + "_reads"] += 1
-                    sample_out.write(read)
-
-        return stats
-
-
-    def main():
-        parser = OptionParser(usage="%prog [options]", version="%prog " + __version__)
-        parser.add_option("-i", dest="bam_file", help="BAM file of the composite genome")
-        parser.add_option("-o", "--output", dest="out_prefix", help="Output prefix")
-        parser.add_option("-g", "--sample-prefix", dest="sample_prefix", default="hg38", help="Prefix for exogenous chromosome IDs")
-        parser.add_option("-p", "--exo-prefix", dest="chr_prefix", default="dm6", help="Prefix for exogenous chromosome IDs")
-        parser.add_option("-q", "--mapq", dest="map_qual", type="int", default=30, help="Mapping quality threshold")
-
-
-        (options, args) = parser.parse_args()
-        if not (options.bam_file and options.out_prefix):
-            parser.print_help()
-            sys.exit()
-
-        process_bam(options.bam_file, options.out_prefix, options.chr_prefix, options.sample_prefix, options.map_qual)
-        stats = process_bam(options.bam_file, options.out_prefix, options.chr_prefix, options.sample_prefix, options.map_qual)
-        create_report(options.out_prefix, stats)
-
-with logger.catch():
-    if __name__ == "__main__":
-        main()
diff --git a/seqnado/workflow/scripts/split_bam2.py b/seqnado/workflow/scripts/split_bam2.py
deleted file mode 100644
index d2d1afcc..00000000
--- a/seqnado/workflow/scripts/split_bam2.py
+++ /dev/null
@@ -1,88 +0,0 @@
-import os
-import pathlib
-import pysam
-import shutil
-import subprocess
-from optparse import OptionParser
-from loguru import logger
-
-# Set up logging
-logger.add(snakemake.log[0], level="INFO")
-
-def create_headers(bamfile, ex_chr_prefix):
-    """Create BAM headers for sample and exogenous genomes."""
-    bam_header = bamfile.header
-    sample_header, exo_header = {}, {}
-    sample_header.update(bam_header)
-    exo_header.update(bam_header)
-
-    sample_header["SQ"] = [sq for sq in bam_header["SQ"] if sq["SN"].startswith("chr")]
-    exo_header["SQ"] = [sq for sq in bam_header["SQ"] if sq["SN"].startswith(ex_chr_prefix)]
-
-    for header in [sample_header, exo_header]:
-        header.setdefault("CO", []).extend([])
-
-    return sample_header, exo_header
-
-
-
-def process_bam(bam_file, output_prefix, ex_chr_prefix, sample_genome, map_qual_threshold):
-    """Process the BAM file and collect statistics."""
-    stats = {
-        "bam_file": os.path.basename(bam_file),
-        sample_genome + "_reads": 0,
-        ex_chr_prefix + "_reads": 0,
-        "unmapped_reads": 0,
-        "qcfail_reads": 0,
-        "duplicate_reads": 0,
-        "secondary_reads": 0,
-        "low_mapq_reads": 0,
-    }
-
-    samfile = pysam.AlignmentFile(bam_file, "rb")
-    sample_header, ex_header = create_headers(samfile, ex_chr_prefix)
-
-    with pysam.AlignmentFile(output_prefix + "_" + sample_genome +".bam", "wb", header=sample_header) as sample_out, \
-        pysam.AlignmentFile(output_prefix + "_" + ex_chr_prefix + ".bam", "wb", header=ex_header) as exo_out:
-
-        for read in samfile:
-            if read.is_unmapped:
-                stats["unmapped_reads"] += 1
-            elif read.is_qcfail:
-                stats["qcfail_reads"] += 1
-            elif read.is_duplicate:
-                stats["duplicate_reads"] += 1
-            elif read.is_secondary:
-                stats["secondary_reads"] += 1
-            elif read.mapq < map_qual_threshold:
-                stats["low_mapq_reads"] += 1
-            elif read.reference_name.startswith(ex_chr_prefix):
-                stats[ex_chr_prefix + "_reads"] += 1
-                exo_out.write(read)
-            else:
-                stats[sample_genome + "_reads"] += 1
-                sample_out.write(read)
-
-    return stats
-
-    report_file = output_prefix + "_report.tsv"
-    with open(report_file, "w") as report:
-        # Writing the headers
-        headers = stats.keys()
-        report.write("\t".join(headers) + "\n")
-        
-        # Writing the values
-        values = [str(stats[key]) for key in headers]
-        report.write("\t".join(values) + "\n")
-
-
-with logger.catch():
-    logger.info("Split bam files")
-    
-    bam_file = snakemake.input.bam
-    out_prefix = snakemake.params.prefix
-    sample_prefix = snakemake.params.genome_prefix
-    chr_prefix = snakemake.params.exo_prefix
-    map_qual = snakemake.params.map_qual
-
-    process_bam(bam_file, out_prefix, chr_prefix, sample_prefix, map_qual)