Skip to content

Commit

Permalink
feat: Primer3 takes a list of VCFs instead of a VariantLookup object
Browse files Browse the repository at this point in the history
  • Loading branch information
emmcauley committed Oct 3, 2024
1 parent c434b1d commit 0a83439
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 14 deletions.
44 changes: 35 additions & 9 deletions prymer/primer3/primer3.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@
>>> from prymer.api.variant_lookup import VariantLookup, VariantOverlapDetector
>>> genome_fasta = Path("./tests/primer3/data/miniref.fa")
>>> genome_vcf = Path("./tests/primer3/data/miniref.variants.vcf.gz")
>>> variant_lookup: VariantLookup = VariantOverlapDetector(vcf_paths=[genome_vcf], min_maf=0.01, include_missing_mafs=False)
>>> designer = Primer3(genome_fasta=genome_fasta, variant_lookup=variant_lookup)
>>> designer = Primer3(genome_fasta=genome_fasta, list_of_vcfs=[genome_vcf], use_file_based_lookup=True)
```
Expand Down Expand Up @@ -144,8 +143,11 @@
from prymer.api.primer_pair import PrimerPair
from prymer.api.span import Span
from prymer.api.span import Strand
from prymer.api.variant_lookup import FileBasedVariantLookup
from prymer.api.variant_lookup import SimpleVariant
from prymer.api.variant_lookup import VariantLookup
from prymer.api.variant_lookup import VariantOverlapDetector
from prymer.api.variant_lookup import cached
from prymer.api.variant_lookup import disk_based
from prymer.primer3.primer3_failure_reason import Primer3FailureReason
from prymer.primer3.primer3_input import Primer3Input
from prymer.primer3.primer3_input_tag import Primer3InputTag
Expand Down Expand Up @@ -228,13 +230,22 @@ def __init__(
self,
genome_fasta: Path,
executable: Optional[str] = None,
variant_lookup: Optional[VariantLookup] = None,
list_of_vcfs: Optional[list[Path]] = None,
use_file_based_lookup: bool = True,
min_maf: float = 0.0,
include_missing_mafs: bool = False,
) -> None:
"""
Args:
genome_fasta: Path to reference genome .fasta file
executable: string representation of the path to primer3_core
variant_lookup: VariantLookup object to facilitate hard-masking variants
list_of_vcfs: an optional list of VCF files with which to hard-mask variants
use_file_based_lookup: whether to use a file-based `VariantLookup`
min_maf: an optional minimum Minor Allele Frequency with which to filter a list of
VCF files (return only variants with at least this minor allele frequency)
include_missing_mafs: when filtering variants with a minor allele frequency,
`True` to include variants with no annotated minor allele frequency, otherwise
`False`. If no minor allele frequency is given, then this parameter does nothing.
Assumes the sequence dictionary is located adjacent to the .fasta file and has the same
base name with a .dict suffix.
Expand All @@ -245,7 +256,10 @@ def __init__(
)
command: list[str] = [f"{executable_path}"]

self.variant_lookup = variant_lookup
self.list_of_vcfs = list_of_vcfs
self.use_file_based_lookup = use_file_based_lookup
self.min_maf = min_maf
self.include_missing_mafs = include_missing_mafs
self._fasta = pysam.FastaFile(filename=f"{genome_fasta}")

dict_path = genome_fasta.with_suffix(".dict")
Expand Down Expand Up @@ -285,12 +299,24 @@ def get_design_sequences(self, region: Span) -> tuple[str, str]:
soft_masked = self._fasta.fetch(
reference=region.refname, start=region.start - 1, end=region.end
)

if self.variant_lookup is None:
if self.list_of_vcfs is None:
hard_masked = soft_masked
return soft_masked, hard_masked
variant_lookup: Union[FileBasedVariantLookup, VariantOverlapDetector]
if self.use_file_based_lookup is True:
variant_lookup = disk_based(
vcf_paths=self.list_of_vcfs,
min_maf=self.min_maf,
include_missing_mafs=self.include_missing_mafs,
)
else:
variant_lookup = cached(

Check warning on line 313 in prymer/primer3/primer3.py

View check run for this annotation

Codecov / codecov/patch

prymer/primer3/primer3.py#L313

Added line #L313 was not covered by tests
vcf_paths=self.list_of_vcfs,
min_maf=self.min_maf,
include_missing_mafs=self.include_missing_mafs,
)

overlapping_variants: list[SimpleVariant] = self.variant_lookup.query(
overlapping_variants: list[SimpleVariant] = variant_lookup.query(
refname=region.refname, start=region.start, end=region.end
)
positions: list[int] = []
Expand Down
7 changes: 2 additions & 5 deletions tests/primer3/test_primer3.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from prymer.api import PrimerPair
from prymer.api import Span
from prymer.api import Strand
from prymer.api import cached
from prymer.primer3 import DesignLeftPrimersTask
from prymer.primer3 import DesignPrimerPairsTask
from prymer.primer3 import DesignRightPrimersTask
Expand Down Expand Up @@ -381,15 +380,13 @@ def test_variant_lookup(
expected_soft_masked: str,
) -> None:
"""Test that MAF filtering and masking are working as expected."""
with Primer3(
genome_fasta=genome_ref, variant_lookup=cached([vcf_path], min_maf=0.01)
) as designer:
with Primer3(genome_fasta=genome_ref, list_of_vcfs=[vcf_path], min_maf=0.01) as designer:
actual_soft_masked, actual_hard_masked = designer.get_design_sequences(region=region)
assert actual_hard_masked == expected_hard_masked
assert actual_soft_masked == expected_soft_masked

# with no variant lookup should all be soft-masked
with Primer3(genome_fasta=genome_ref, variant_lookup=None) as designer:
with Primer3(genome_fasta=genome_ref) as designer:
actual_soft_masked, actual_hard_masked = designer.get_design_sequences(region=region)
assert actual_hard_masked == expected_soft_masked
assert actual_soft_masked == expected_soft_masked
Expand Down

0 comments on commit 0a83439

Please sign in to comment.