From 5130adaf60cca472b8363280009de05ac1856538 Mon Sep 17 00:00:00 2001 From: Ulthran Date: Wed, 5 Jun 2024 10:42:20 -0400 Subject: [PATCH 1/5] Run tests on push to main --- .github/workflows/pr.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index 57447e0..ed35a2b 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -4,9 +4,6 @@ on: pull_request: branches: - main - push: - branches: - - main jobs: run-tests: From 83de856b39f74e10804e849faf9f8c964b2ba3bc Mon Sep 17 00:00:00 2001 From: Ulthran Date: Wed, 5 Jun 2024 10:45:46 -0400 Subject: [PATCH 2/5] Trigger pr workflow on all proper events --- .github/workflows/pr.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index ed35a2b..aaed1b4 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -2,8 +2,9 @@ name: Tests on: pull_request: - branches: - - main + branches: [ master, main ] + push: + branches: [ master, main ] jobs: run-tests: From e588c26df3345870eb8eef386214418f3fbd9be8 Mon Sep 17 00:00:00 2001 From: Ulthran Date: Wed, 5 Jun 2024 10:45:57 -0400 Subject: [PATCH 3/5] Include .fa and .fasta genomes --- sbx_mapping.smk | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sbx_mapping.smk b/sbx_mapping.smk index 41bb351..1d62181 100644 --- a/sbx_mapping.smk +++ b/sbx_mapping.smk @@ -28,7 +28,9 @@ except NameError: GenomeFiles = [] GenomeSegments = {} else: - GenomeFiles = [f for f in Cfg["sbx_mapping"]["genomes_fp"].glob("*.fasta")] + GenomeFiles = [f for f in Cfg["sbx_mapping"]["genomes_fp"].glob("*.fasta")] + [ + f for f in Cfg["sbx_mapping"]["genomes_fp"].glob("*.fa") + ] GenomeSegments = { PurePath(g.name).stem: read_seq_ids(Cfg["sbx_mapping"]["genomes_fp"] / g) for g in GenomeFiles From 684336d4e3c35a8eee77a10893c8a463ceac6b62 Mon Sep 17 00:00:00 2001 From: Ulthran Date: Wed, 5 Jun 2024 13:55:31 -0400 Subject: [PATCH 4/5] Allow .fasta OR .fa (not both) --- sbx_mapping.smk | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sbx_mapping.smk b/sbx_mapping.smk index 1d62181..6e9249b 100644 --- a/sbx_mapping.smk +++ b/sbx_mapping.smk @@ -28,13 +28,14 @@ except NameError: GenomeFiles = [] GenomeSegments = {} else: - GenomeFiles = [f for f in Cfg["sbx_mapping"]["genomes_fp"].glob("*.fasta")] + [ - f for f in Cfg["sbx_mapping"]["genomes_fp"].glob("*.fa") - ] + GenomeFiles = [f for f in Cfg["sbx_mapping"]["genomes_fp"].glob("*.fasta")] + if not GenomeFiles: + GenomeFiles = [f for f in Cfg["sbx_mapping"]["genomes_fp"].glob("*.fa")] GenomeSegments = { PurePath(g.name).stem: read_seq_ids(Cfg["sbx_mapping"]["genomes_fp"] / g) for g in GenomeFiles } + GenomeFiles = {PurePath(g.name).stem: g for g in GenomeFiles} sys.stderr.write("done.\n") sys.stderr.write(f"sbx_mapping::INFO Genome files found: {str(GenomeFiles)}\n") @@ -102,8 +103,8 @@ rule build_genome_index: rule align_to_genome: input: + *rules.build_genome_index.output, reads=expand(QC_FP / "decontam" / "{{sample}}_{rp}.fastq.gz", rp=Pairs), - index=Cfg["sbx_mapping"]["genomes_fp"] / "{genome}.fasta.amb", output: temp(MAPPING_FP / "intermediates" / "{genome}" / "{sample}.sam"), benchmark: From 179092b8044bb5e02749d016e69b586ff8d6f033 Mon Sep 17 00:00:00 2001 From: Ulthran Date: Wed, 5 Jun 2024 14:00:03 -0400 Subject: [PATCH 5/5] Track new host file ext --- sbx_mapping.smk | 50 ++++++++++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/sbx_mapping.smk b/sbx_mapping.smk index 6e9249b..423cab0 100644 --- a/sbx_mapping.smk +++ b/sbx_mapping.smk @@ -15,29 +15,26 @@ def get_mapping_path() -> Path: SBX_MAPPING_VERSION = open(get_mapping_path() / "VERSION").read().strip() - -try: - GenomeFiles - GenomeSegments -except NameError: - sys.stderr.write("sbx_mapping::INFO Collecting target genomes... ") - if ( - Cfg["sbx_mapping"]["genomes_fp"] == Cfg["all"]["root"] - or not Cfg["sbx_mapping"]["genomes_fp"] - ): - GenomeFiles = [] - GenomeSegments = {} - else: - GenomeFiles = [f for f in Cfg["sbx_mapping"]["genomes_fp"].glob("*.fasta")] - if not GenomeFiles: - GenomeFiles = [f for f in Cfg["sbx_mapping"]["genomes_fp"].glob("*.fa")] - GenomeSegments = { - PurePath(g.name).stem: read_seq_ids(Cfg["sbx_mapping"]["genomes_fp"] / g) - for g in GenomeFiles - } - GenomeFiles = {PurePath(g.name).stem: g for g in GenomeFiles} - sys.stderr.write("done.\n") - sys.stderr.write(f"sbx_mapping::INFO Genome files found: {str(GenomeFiles)}\n") +HOST_FILE_EXT = ".fasta" +sys.stderr.write("sbx_mapping::INFO Collecting target genomes... ") +if ( + Cfg["sbx_mapping"]["genomes_fp"] == Cfg["all"]["root"] + or not Cfg["sbx_mapping"]["genomes_fp"] +): + GenomeFiles = [] + GenomeSegments = {} +else: + GenomeFiles = [f for f in Cfg["sbx_mapping"]["genomes_fp"].glob("*.fasta")] + if not GenomeFiles: + GenomeFiles = [f for f in Cfg["sbx_mapping"]["genomes_fp"].glob("*.fa")] + HOST_FILE_EXT = ".fa" + GenomeSegments = { + PurePath(g.name).stem: read_seq_ids(Cfg["sbx_mapping"]["genomes_fp"] / g) + for g in GenomeFiles + } + GenomeFiles = {PurePath(g.name).stem: g for g in GenomeFiles} +sys.stderr.write("done.\n") +sys.stderr.write(f"sbx_mapping::INFO Genome files found: {str(GenomeFiles)}\n") try: @@ -83,10 +80,10 @@ rule all_mapping: rule build_genome_index: input: - Cfg["sbx_mapping"]["genomes_fp"] / "{genome}.fasta", + Cfg["sbx_mapping"]["genomes_fp"] / ("{genome}" + HOST_FILE_EXT), output: [ - Cfg["sbx_mapping"]["genomes_fp"] / ("{genome}.fasta." + ext) + Cfg["sbx_mapping"]["genomes_fp"] / ("{genome}" + HOST_FILE_EXT + "." + ext) for ext in ["amb", "ann", "bwt", "pac", "sa"] ], benchmark: @@ -113,6 +110,7 @@ rule align_to_genome: LOG_FP / "align_to_genome_{genome}_{sample}.log", params: index_fp=Cfg["sbx_mapping"]["genomes_fp"], + host_file_ext=HOST_FILE_EXT, threads: 4 conda: "envs/sbx_mapping_env.yml" @@ -121,7 +119,7 @@ rule align_to_genome: shell: """ bwa mem -M -t {threads} \ - {params.index_fp}/{wildcards.genome}.fasta \ + {params.index_fp}/{wildcards.genome}{params.host_file_ext} \ {input.reads} -o {output} \ 2>&1 | tee {log} """