Skip to content

Commit

Permalink
feat: remove 'hack'
Browse files Browse the repository at this point in the history
  • Loading branch information
clintval committed Sep 20, 2024
1 parent e69b48f commit 72abb30
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 18 deletions.
33 changes: 16 additions & 17 deletions prymer/offtarget/bwa.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,10 @@
from typing import cast

import pysam
from fgpyo import sam
from fgpyo import sequence
from fgpyo.sam import Cigar
from pysam import AlignedSegment
from pysam import AlignmentHeader

from prymer.api import coordmath
from prymer.util.executable_runner import ExecutableRunner
Expand Down Expand Up @@ -261,7 +261,7 @@ def __init__(
else:
message = "BWA index file does not exist:\n\t"
message += "\t\n".join(f"{p}" for p in missing_aux_paths)
raise FileNotFoundError(f"{message}\nPlease index with: `{executable_path} index {ref}`")
raise FileNotFoundError(f"{message}\nIndex with: `{executable_path} index {ref}`")

# -N = non-iterative mode: search for all n-difference hits (slooow)
# -S = output SAM (run samse)
Expand Down Expand Up @@ -293,20 +293,18 @@ def __init__(

super().__init__(command=command)

# HACK ALERT
# This is a hack. By trial and error, pysam.AlignmentFile() will block reading unless
# there's at least a few records due to htslib wanting to read a few records for format
# auto-detection. Lame. So a hundred queries are sent to the aligner to align enable the
# htslib auto-detection to complete, and for us to be able to read using pysam.
num_warmup: int = 100
for i in range(num_warmup):
query = Query(id=f"ignoreme:{i}", bases="A" * 100)
fastq_str = query.to_fastq(reverse_complement=self.reverse_complement)
self._subprocess.stdin.write(fastq_str)
# Send in a single record to be aligned so we get bwa to output a SAM header.
self._subprocess.stdin.write(Query(id="ignore", bases="A").to_fastq())
self.__signal_bwa() # forces the input to be sent to the underlying process.
self._reader = sam.reader(path=self._subprocess.stdout, file_type=sam.SamFileType.SAM)
for _ in range(num_warmup):
next(self._reader)

header = []
for line in self._subprocess.stdout:
if line.startswith("@"):
header.append(line)
if line.startswith("ignore"):
break

self._header = AlignmentHeader.from_text("".join(header))

def __signal_bwa(self) -> None:
"""Signals BWA to process the queries"""
Expand Down Expand Up @@ -349,7 +347,9 @@ def map_all(self, queries: list[Query]) -> list[BwaResult]:
results: list[BwaResult] = []
for query in queries:
# get the next alignment and convert to a result
results.append(self._to_result(query=query, rec=next(self._reader)))
line: str = next(self._subprocess.stdout).strip()
alignment = AlignedSegment.fromstring(line, self._header)
results.append(self._to_result(query=query, rec=alignment))

return results

Expand Down Expand Up @@ -423,5 +423,4 @@ def to_hits(self, rec: AlignedSegment) -> list[BwaHit]:
return hits

def close(self) -> None:
self._reader.close()
super().close()
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -154,5 +154,4 @@ ignore = ["E203", "E701"]
unfixable = ["B"]

[tool.ruff.lint.isort]

force-single-line = true

0 comments on commit 72abb30

Please sign in to comment.