Skip to content

Commit

Permalink
Merge pull request #23 from sunbeam-labs/22-allow-for-fa-and-fasta-ex…
Browse files Browse the repository at this point in the history
…tensions-in-host-dir

22 allow for fa and fasta extensions in host dir
  • Loading branch information
Ulthran authored Jun 5, 2024
2 parents f3fb0ac + 179092b commit 9e3c8fd
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 28 deletions.
6 changes: 2 additions & 4 deletions .github/workflows/pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,9 @@ name: Tests

on:
pull_request:
branches:
- main
branches: [ master, main ]
push:
branches:
- main
branches: [ master, main ]

jobs:
run-tests:
Expand Down
49 changes: 25 additions & 24 deletions sbx_mapping.smk
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,26 @@ def get_mapping_path() -> Path:


SBX_MAPPING_VERSION = open(get_mapping_path() / "VERSION").read().strip()

try:
GenomeFiles
GenomeSegments
except NameError:
sys.stderr.write("sbx_mapping::INFO Collecting target genomes... ")
if (
Cfg["sbx_mapping"]["genomes_fp"] == Cfg["all"]["root"]
or not Cfg["sbx_mapping"]["genomes_fp"]
):
GenomeFiles = []
GenomeSegments = {}
else:
GenomeFiles = [f for f in Cfg["sbx_mapping"]["genomes_fp"].glob("*.fasta")]
GenomeSegments = {
PurePath(g.name).stem: read_seq_ids(Cfg["sbx_mapping"]["genomes_fp"] / g)
for g in GenomeFiles
}
sys.stderr.write("done.\n")
sys.stderr.write(f"sbx_mapping::INFO Genome files found: {str(GenomeFiles)}\n")
HOST_FILE_EXT = ".fasta"
sys.stderr.write("sbx_mapping::INFO Collecting target genomes... ")
if (
Cfg["sbx_mapping"]["genomes_fp"] == Cfg["all"]["root"]
or not Cfg["sbx_mapping"]["genomes_fp"]
):
GenomeFiles = []
GenomeSegments = {}
else:
GenomeFiles = [f for f in Cfg["sbx_mapping"]["genomes_fp"].glob("*.fasta")]
if not GenomeFiles:
GenomeFiles = [f for f in Cfg["sbx_mapping"]["genomes_fp"].glob("*.fa")]
HOST_FILE_EXT = ".fa"
GenomeSegments = {
PurePath(g.name).stem: read_seq_ids(Cfg["sbx_mapping"]["genomes_fp"] / g)
for g in GenomeFiles
}
GenomeFiles = {PurePath(g.name).stem: g for g in GenomeFiles}
sys.stderr.write("done.\n")
sys.stderr.write(f"sbx_mapping::INFO Genome files found: {str(GenomeFiles)}\n")


try:
Expand Down Expand Up @@ -80,10 +80,10 @@ rule all_mapping:

rule build_genome_index:
input:
Cfg["sbx_mapping"]["genomes_fp"] / "{genome}.fasta",
Cfg["sbx_mapping"]["genomes_fp"] / ("{genome}" + HOST_FILE_EXT),
output:
[
Cfg["sbx_mapping"]["genomes_fp"] / ("{genome}.fasta." + ext)
Cfg["sbx_mapping"]["genomes_fp"] / ("{genome}" + HOST_FILE_EXT + "." + ext)
for ext in ["amb", "ann", "bwt", "pac", "sa"]
],
benchmark:
Expand All @@ -100,8 +100,8 @@ rule build_genome_index:

rule align_to_genome:
input:
*rules.build_genome_index.output,
reads=expand(QC_FP / "decontam" / "{{sample}}_{rp}.fastq.gz", rp=Pairs),
index=Cfg["sbx_mapping"]["genomes_fp"] / "{genome}.fasta.amb",
output:
temp(MAPPING_FP / "intermediates" / "{genome}" / "{sample}.sam"),
benchmark:
Expand All @@ -110,6 +110,7 @@ rule align_to_genome:
LOG_FP / "align_to_genome_{genome}_{sample}.log",
params:
index_fp=Cfg["sbx_mapping"]["genomes_fp"],
host_file_ext=HOST_FILE_EXT,
threads: 4
conda:
"envs/sbx_mapping_env.yml"
Expand All @@ -118,7 +119,7 @@ rule align_to_genome:
shell:
"""
bwa mem -M -t {threads} \
{params.index_fp}/{wildcards.genome}.fasta \
{params.index_fp}/{wildcards.genome}{params.host_file_ext} \
{input.reads} -o {output} \
2>&1 | tee {log}
"""
Expand Down

0 comments on commit 9e3c8fd

Please sign in to comment.