diff --git a/prymer/primer3/primer3.py b/prymer/primer3/primer3.py index a00051e..6543699 100644 --- a/prymer/primer3/primer3.py +++ b/prymer/primer3/primer3.py @@ -63,7 +63,7 @@ primer_and_amplicon_params=params, \ task=DesignLeftPrimersTask(), \ ) ->>> left_result = designer.design_primers(design_input=design_input) +>>> left_result = designer.design_oligos(design_input=design_input) ``` @@ -140,6 +140,7 @@ from fgpyo.util.metric import Metric from prymer.api.primer import Primer +from prymer.api.probe import Probe from prymer.api.primer_like import PrimerLike from prymer.api.primer_pair import PrimerPair from prymer.api.span import Span @@ -152,6 +153,7 @@ from prymer.primer3.primer3_task import DesignLeftPrimersTask from prymer.primer3.primer3_task import DesignPrimerPairsTask from prymer.primer3.primer3_task import DesignRightPrimersTask +from prymer.primer3.primer3_task import PickHybProbeOnly from prymer.util.executable_runner import ExecutableRunner @@ -308,13 +310,6 @@ def get_design_sequences(self, region: Span) -> tuple[str, str]: hard_masked = "".join(soft_masked_list) return soft_masked, hard_masked - @staticmethod - def _is_valid_primer(design_input: Primer3Input, primer_design: Primer) -> bool: - return ( - primer_design.longest_dinucleotide_run_length() - <= design_input.primer_and_amplicon_params.primer_max_dinuc_bases - ) - @staticmethod def _screen_pair_results( design_input: Primer3Input, designed_primer_pairs: list[PrimerPair] @@ -349,8 +344,8 @@ def _screen_pair_results( valid_primer_pair_designs.append(primer_pair) return valid_primer_pair_designs, dinuc_pair_failures - def design_primers(self, design_input: Primer3Input) -> Primer3Result: # noqa: C901 - """Designs primers or primer pairs given a target region. + def design_oligos(self, design_input: Primer3Input) -> Primer3Result: # noqa: C901 + """Designs primers, primer pairs, and/or internal probes given a target region. Args: design_input: encapsulates the target region, design task, specifications, and scoring @@ -371,12 +366,15 @@ def design_primers(self, design_input: Primer3Input) -> Primer3Result: # noqa: f"Error, trying to use a subprocess that has already been " f"terminated, return code {self._subprocess.returncode}" ) - - design_region: Span = self._create_design_region( - target_region=design_input.target, - max_amplicon_length=design_input.primer_and_amplicon_params.max_amplicon_length, - min_primer_length=design_input.primer_and_amplicon_params.min_primer_length, - ) + match design_input.task: + case PickHybProbeOnly(): + design_region: Span = design_input.target + case _: + design_region: Span = self._create_design_region( + target_region=design_input.target, + max_amplicon_length=design_input.primer_and_amplicon_params.max_amplicon_length, + min_primer_length=design_input.primer_and_amplicon_params.min_primer_length, + ) soft_masked, hard_masked = self.get_design_sequences(design_region) global_primer3_params = { @@ -389,7 +387,6 @@ def design_primers(self, design_input: Primer3Input) -> Primer3Result: # noqa: **global_primer3_params, **design_input.to_input_tags(design_region=design_region), } - # Submit inputs to primer3 for tag, value in assembled_primer3_tags.items(): self._subprocess.stdin.write(f"{tag}={value}") @@ -441,6 +438,19 @@ def primer3_error(message: str) -> None: primer3_error("Primer3 failed") match design_input.task: + case PickHybProbeOnly(): # Probe design + all_probe_results: list[Probe] = Primer3._build_probes( + design_input=design_input, + design_results=primer3_results, + design_region=design_region, + unmasked_design_seq=soft_masked) + + return Primer3._assemble_single_designs( + design_input=design_input, + design_results=primer3_results, + unfiltered_designs=all_probe_results, + ) + case DesignPrimerPairsTask(): # Primer pair design all_pair_results: list[PrimerPair] = Primer3._build_primer_pairs( design_input=design_input, @@ -462,7 +472,7 @@ def primer3_error(message: str) -> None: design_task=design_input.task, unmasked_design_seq=soft_masked, ) - return Primer3._assemble_primers( + return Primer3._assemble_single_designs( design_input=design_input, design_results=primer3_results, unfiltered_designs=all_single_results, @@ -471,6 +481,46 @@ def primer3_error(message: str) -> None: case _ as unreachable: assert_never(unreachable) + @staticmethod + def _build_probes( + design_input: Primer3Input, + design_results: dict[str, str], + design_region: Span, + unmasked_design_seq: str, + ) -> list[Probe]: + count: int = _check_design_results(design_input, design_results) + task_key = design_input.task.task_type + probes: list[Probe] = [] + for idx in range(count): + key = f"PRIMER_{task_key}_{idx}" + str_position, str_length = design_results[key].split(",", maxsplit=1) + position, length = int(str_position), int(str_length) # position is 1-based + + span = design_region.get_subspan( + offset=position - 1, subspan_length=length, strand=design_region.strand + ) + + slice_offset = design_region.get_offset(span.start) + slice_end = design_region.get_offset(span.end) + 1 + + # remake the primer sequence from the un-masked genome sequence just in case + bases = unmasked_design_seq[slice_offset:slice_end] + if span.strand == Strand.NEGATIVE: + bases = reverse_complement(bases) + + probes.append( + Probe( + bases=bases, + tm=float(design_results[f"{key}_TM"]), + penalty=float(design_results[f"{key}_PENALTY"]), + span=span, + self_any_th=float(design_results[f"{key}_SELF_ANY_TH"]), + self_end_th=float(design_results[f"{key}_SELF_END_TH"]), + hairpin_th=float(design_results[f"{key}_HAIRPIN_TH"]), + ) + ) + return probes + @staticmethod def _build_primers( design_input: Primer3Input, @@ -495,18 +545,9 @@ def _build_primers( Raises: ValueError: if Primer3 does not return primer designs """ - count_tag = design_input.task.count_tag - - maybe_count: Optional[str] = design_results.get(count_tag) - if maybe_count is None: # no count tag was found - if "PRIMER_ERROR" in design_results: - primer_error = design_results["PRIMER_ERROR"] - raise ValueError(f"Primer3 returned an error: {primer_error}") - else: - raise ValueError(f"Primer3 did not return the count tag: {count_tag}") - count: int = int(maybe_count) - - primers = [] + count: int = _check_design_results(design_input, design_results) + + primers: list[Primer] = [] for idx in range(count): key = f"PRIMER_{design_task.task_type}_{idx}" str_position, str_length = design_results[key].split(",", maxsplit=1) @@ -544,41 +585,28 @@ def _build_primers( return primers @staticmethod - def _assemble_primers( - design_input: Primer3Input, design_results: dict[str, str], unfiltered_designs: list[Primer] + def _assemble_single_designs(design_input: Primer3Input, design_results: dict[str, str], unfiltered_designs: Union[list[Primer], list[Probe]] ) -> Primer3Result: - """Helper function to organize primer designs into valid and failed designs. + """Screens oligo designs (primers or probes) emitted by Primer3 for acceptable dinucleotide runs and extracts failure reasons for failed designs.""" - Wraps `Primer3._is_valid_primer()` and `Primer3._build_failures()` to filter out designs - with dinucleotide runs that are too long and extract additional failure reasons emitted by - Primer3. - - Args: - design_input: encapsulates the target region, design task, specifications, - and scoring penalties - unfiltered_designs: list of primers emitted from Primer3 - design_results: key-value pairs of results reported by Primer3 - - Returns: - primer_designs: a `Primer3Result` that encapsulates valid and failed designs - """ - valid_primer_designs = [ + valid_oligo_designs = [ design for design in unfiltered_designs - if Primer3._is_valid_primer(primer_design=design, design_input=design_input) + if _has_acceptable_dinuc_run(oligo_design=design, design_input=design_input) ] dinuc_failures = [ design for design in unfiltered_designs - if not Primer3._is_valid_primer(primer_design=design, design_input=design_input) + if not _has_acceptable_dinuc_run(oligo_design=design, design_input=design_input) ] failure_strings = [design_results[f"PRIMER_{design_input.task.task_type}_EXPLAIN"]] failures = Primer3._build_failures(dinuc_failures, failure_strings) - primer_designs: Primer3Result = Primer3Result( - filtered_designs=valid_primer_designs, failures=failures + design_candidates: Primer3Result = Primer3Result( + filtered_designs=valid_oligo_designs, failures=failures ) - return primer_designs + return design_candidates + @staticmethod def _build_primer_pairs( @@ -684,7 +712,7 @@ def _assemble_primer_pairs( @staticmethod def _build_failures( - dinuc_failures: list[Primer], + dinuc_failures: Union[list[Primer], list[Probe]], failure_strings: list[str], ) -> list[Primer3Failure]: """Extracts the reasons why designs that were considered by Primer3 failed @@ -760,3 +788,28 @@ def _create_design_region( ) return design_region + +def _check_design_results(design_input: Primer3Input, design_results: dict[str, str]) -> int: + """Checks for any additional Primer3 errors and reports out the count of designs emitted by Primer3.""" + count_tag = design_input.task.count_tag + maybe_count: Optional[str] = design_results.get(count_tag) + if maybe_count is None: # no count tag was found + if "PRIMER_ERROR" in design_results: + primer_error = design_results["PRIMER_ERROR"] + raise ValueError(f"Primer3 returned an error: {primer_error}") + else: + raise ValueError(f"Primer3 did not return the count tag: {count_tag}") + count: int = int(maybe_count) + + return count + +def _has_acceptable_dinuc_run(design_input: Primer3Input, oligo_design: Union[Primer, Probe]) -> bool: + if type(oligo_design) is Primer: + max_dinuc_bases: int = design_input.primer_and_amplicon_params.primer_max_dinuc_bases + elif type(oligo_design) is Probe: + max_dinuc_bases: int = design_input.probe_params.probe_max_dinuc_bases + + return ( + oligo_design.longest_dinucleotide_run_length() + <= max_dinuc_bases + ) diff --git a/prymer/primer3/primer3_input.py b/prymer/primer3/primer3_input.py index e9fb3ac..f7bc110 100644 --- a/prymer/primer3/primer3_input.py +++ b/prymer/primer3/primer3_input.py @@ -122,12 +122,18 @@ def __post_init__(self) -> None: "Primer3 requires at least one set of parameters" " for either primer or probe design" ) - - if self.primer_and_amplicon_params is not None and self.primer_weights is None: - object.__setattr__(self, "primer_weights", PrimerAndAmpliconWeights()) - - if self.probe_params is not None and self.probe_weights is None: - object.__setattr__(self, "probe_weights", ProbeWeights()) + elif self.task.requires_primer_amplicon_params: + if self.primer_and_amplicon_params is None: + raise ValueError(f"Primer3 task {self.task} requires `PrimerAndAmpliconParams`") + else: + if self.primer_weights is None: + object.__setattr__(self, "primer_weights", PrimerAndAmpliconWeights()) + elif self.task.requires_probe_params: + if self.probe_params is None: + raise ValueError(f"Primer3 task {self.task} requires `ProbeParameters`") + else: + if self.probe_weights is None: + object.__setattr__(self, "probe_weights", ProbeWeights()) def to_input_tags(self, design_region: Span) -> dict[Primer3InputTag, Any]: """Assembles `Primer3InputTag` and values for input to `Primer3` @@ -154,5 +160,4 @@ def to_input_tags(self, design_region: Span) -> dict[Primer3InputTag, Any]: for settings in optional_attributes.values(): if settings is not None: assembled_tags.update(settings.to_input_tags()) - return assembled_tags diff --git a/prymer/primer3/primer3_task.py b/prymer/primer3/primer3_task.py index e09a769..461d77c 100644 --- a/prymer/primer3/primer3_task.py +++ b/prymer/primer3/primer3_task.py @@ -8,7 +8,7 @@ The design task "type" dictates which type of primers to pick and informs the design region. These parameters are aligned to the correct Primer3 settings and fed directly into Primer3. -Three types of tasks are available: +Four types of tasks are available: 1. [`DesignPrimerPairsTask`][prymer.primer3.primer3_task.DesignPrimerPairsTask] -- task for designing _primer pairs_. @@ -16,6 +16,8 @@ for designing primers to the _left_ (5') of the design region on the top/positive strand. 3. [`DesignRightPrimersTask`][prymer.primer3.primer3_task.DesignRightPrimersTask] -- task for designing primers to the _right_ (3') of the design region on the bottom/negative strand. +4. [`PickHybProbeOnly`][prymer.primer3.primer3_task.PickHybProbeOnly] -- task for designing an + internal probe for hybridization-based technologies The main purpose of these classes are to generate the [`Primer3InputTag`s][prymer.primer3.primer3_input_tag.Primer3InputTag]s required by @@ -103,15 +105,15 @@ from prymer.primer3.primer3_input_tag import Primer3InputTag Primer3TaskType: TypeAlias = Union[ - "DesignPrimerPairsTask", "DesignLeftPrimersTask", "DesignRightPrimersTask" + "DesignPrimerPairsTask", "DesignLeftPrimersTask", "DesignRightPrimersTask", "PickHybProbeOnly" ] """Type alias for all `Primer3Task`s, to enable exhaustiveness checking.""" @unique class TaskType(UppercaseStrEnum): - """Represents the type of design task, either design primer pairs, or individual primers - (left or right).""" + """Represents the type of design task: design primer pairs, individual primers + (left or right), or an internal hybridization probe.""" # Developer Note: the names of this enum are important, as they are used as-is for the # count_tag in `Primer3Task`. @@ -119,6 +121,7 @@ class TaskType(UppercaseStrEnum): PAIR = auto() LEFT = auto() RIGHT = auto() + INTERNAL = auto() class Primer3Task(ABC): @@ -191,9 +194,17 @@ def _to_input_tags(cls, target: Span, design_region: Span) -> dict[Primer3InputT Primer3InputTag.PRIMER_PICK_RIGHT_PRIMER: 1, Primer3InputTag.PRIMER_PICK_INTERNAL_OLIGO: 0, Primer3InputTag.SEQUENCE_TARGET: f"{target.start - design_region.start + 1}," - f"{target.length}", + f"{target.length}", } + @property + def requires_primer_amplicon_params(self) -> bool: + return True + + @property + def requires_probe_params(self) -> bool: + return False + class DesignLeftPrimersTask(Primer3Task, task_type=TaskType.LEFT): """Stores task-specific characteristics for designing left primers.""" @@ -208,6 +219,14 @@ def _to_input_tags(cls, target: Span, design_region: Span) -> dict[Primer3InputT Primer3InputTag.SEQUENCE_INCLUDED_REGION: f"1,{target.start - design_region.start}", } + @property + def requires_primer_amplicon_params(self) -> bool: + return True + + @property + def requires_probe_params(self) -> bool: + return False + class DesignRightPrimersTask(Primer3Task, task_type=TaskType.RIGHT): """Stores task-specific characteristics for designing right primers""" @@ -223,3 +242,32 @@ def _to_input_tags(cls, target: Span, design_region: Span) -> dict[Primer3InputT Primer3InputTag.PRIMER_PICK_INTERNAL_OLIGO: 0, Primer3InputTag.SEQUENCE_INCLUDED_REGION: f"{start},{length}", } + + @property + def requires_primer_amplicon_params(self) -> bool: + return True + + @property + def requires_probe_params(self) -> bool: + return False + + +class PickHybProbeOnly(Primer3Task, task_type=TaskType.INTERNAL): + """Stores task-specific characteristics for designing an internal hybridization probe.""" + + @classmethod + def _to_input_tags(cls, target: Span, design_region: Span) -> dict[Primer3InputTag, Any]: + return { + Primer3InputTag.PRIMER_TASK: "generic", + Primer3InputTag.PRIMER_PICK_LEFT_PRIMER: 0, + Primer3InputTag.PRIMER_PICK_RIGHT_PRIMER: 0, + Primer3InputTag.PRIMER_PICK_INTERNAL_OLIGO: 1, + } + + @property + def requires_primer_amplicon_params(self) -> bool: + return False + + @property + def requires_probe_params(self) -> bool: + return True diff --git a/tests/primer3/test_primer3.py b/tests/primer3/test_primer3.py index ca3b145..223ccba 100644 --- a/tests/primer3/test_primer3.py +++ b/tests/primer3/test_primer3.py @@ -12,14 +12,16 @@ from prymer.api.span import Span from prymer.api.span import Strand from prymer.api.variant_lookup import cached -from prymer.primer3.primer3 import Primer3 +from prymer.primer3.primer3 import Primer3, _has_acceptable_dinuc_run from prymer.primer3.primer3 import Primer3Failure from prymer.primer3.primer3 import Primer3Result from prymer.primer3.primer3_input import Primer3Input from prymer.primer3.primer3_parameters import PrimerAndAmpliconParameters +from prymer.primer3.primer3_parameters import ProbeParameters from prymer.primer3.primer3_task import DesignLeftPrimersTask from prymer.primer3.primer3_task import DesignPrimerPairsTask from prymer.primer3.primer3_task import DesignRightPrimersTask +from prymer.primer3.primer3_task import PickHybProbeOnly @pytest.fixture(scope="session") @@ -32,6 +34,15 @@ def vcf_path() -> Path: return Path(__file__).parent / "data" / "miniref.variants.vcf.gz" +@pytest.fixture +def valid_probe_params_no_exclude() -> ProbeParameters: + return ProbeParameters( + probe_sizes=MinOptMax(min=18, max=30, opt=22), + probe_tms=MinOptMax(min=55.0, max=100.0, opt=70.0), + probe_gcs=MinOptMax(min=30.0, max=65.0, opt=45.0), + ) + + @pytest.fixture def single_primer_params() -> PrimerAndAmpliconParameters: return PrimerAndAmpliconParameters( @@ -148,10 +159,27 @@ def test_design_primers_raises( task=DesignLeftPrimersTask(), ) with pytest.raises(ValueError, match="Primer3 failed"): - Primer3(genome_fasta=genome_ref).design_primers(design_input=invalid_design_input) + Primer3(genome_fasta=genome_ref).design_oligos(design_input=invalid_design_input) # TODO: add other Value Errors +def test_internal_probe_valid_designs( + genome_ref: Path, + valid_probe_params_no_exclude: ProbeParameters, +) -> None: + """Test that left primer designs are within the specified design specifications.""" + target = Span(refname="chr1", start=201, end=250, strand=Strand.POSITIVE) + assert valid_probe_params_no_exclude is not None + design_input = Primer3Input( + target=target, + probe_params=valid_probe_params_no_exclude, + task=PickHybProbeOnly(), + ) + with Primer3(genome_fasta=genome_ref) as designer: + print(designer.get_design_sequences(target)) + valid_probes = designer.design_oligos(design_input=design_input) + print(valid_probes) + def test_left_primer_valid_designs( genome_ref: Path, single_primer_params: PrimerAndAmpliconParameters, @@ -166,7 +194,7 @@ def test_left_primer_valid_designs( with Primer3(genome_fasta=genome_ref) as designer: for _ in range(10): # run many times to ensure we can re-use primer3 - left_result = designer.design_primers(design_input=design_input) + left_result = designer.design_oligos(design_input=design_input) designed_lefts: list[Primer] = left_result.primers() assert all(isinstance(design, Primer) for design in designed_lefts) for actual_design in designed_lefts: @@ -213,7 +241,7 @@ def test_right_primer_valid_designs( ) with Primer3(genome_fasta=genome_ref) as designer: for _ in range(10): # run many times to ensure we can re-use primer3 - right_result: Primer3Result = designer.design_primers(design_input=design_input) + right_result: Primer3Result = designer.design_oligos(design_input=design_input) designed_rights: list[Primer] = right_result.primers() assert all(isinstance(design, Primer) for design in designed_rights) @@ -261,7 +289,7 @@ def test_primer_pair_design( task=DesignPrimerPairsTask(), ) with Primer3(genome_fasta=genome_ref) as designer: - pair_result: Primer3Result = designer.design_primers(design_input=design_input) + pair_result: Primer3Result = designer.design_oligos(design_input=design_input) designed_pairs: list[PrimerPair] = pair_result.primer_pairs() assert all(isinstance(design, PrimerPair) for design in designed_pairs) lefts = [primer_pair.left_primer for primer_pair in designed_pairs] @@ -351,7 +379,7 @@ def test_fasta_close_valid( with pytest.raises( RuntimeError, match="Error, trying to use a subprocess that has already been terminated" ): - designer.design_primers(design_input=design_input) + designer.design_oligos(design_input=design_input) @pytest.mark.parametrize( @@ -406,7 +434,7 @@ def test_screen_pair_results( genome_ref: Path, pair_primer_params: PrimerAndAmpliconParameters, ) -> None: - """Test that `_is_valid_primer()` and `_screen_pair_results()` use + """Test that `_has_acceptable_dinuc_run()` and `_screen_pair_results()` use `Primer3Parameters.primer_max_dinuc_bases` to disqualify primers when applicable. Create 2 sets of design input, the only difference being the length of allowable dinucleotide run in a primer (high_threshold = 6, low_threshold = 2). @@ -419,8 +447,10 @@ def test_screen_pair_results( task=DesignPrimerPairsTask(), ) - lower_dinuc_thresh = replace(pair_primer_params, primer_max_dinuc_bases=2) # lower from 6 to 2 - altered_design_input = Primer3Input( + lower_dinuc_thresh: PrimerAndAmpliconParameters = replace( + pair_primer_params, primer_max_dinuc_bases=2 + ) # lower from 6 to 2 + altered_design_input: Primer3Input = Primer3Input( target=target, primer_and_amplicon_params=lower_dinuc_thresh, task=DesignPrimerPairsTask(), @@ -431,6 +461,7 @@ def test_screen_pair_results( design_input=design_input, designed_primer_pairs=valid_primer_pairs ) assert len(base_dinuc_pair_failures) == 0 + assert design_input.primer_and_amplicon_params.primer_max_dinuc_bases is not None for primer_pair in base_primer_pair_designs: assert ( primer_pair.left_primer.longest_dinucleotide_run_length() @@ -440,11 +471,11 @@ def test_screen_pair_results( primer_pair.right_primer.longest_dinucleotide_run_length() <= design_input.primer_and_amplicon_params.primer_max_dinuc_bases ) - assert Primer3._is_valid_primer( - design_input=design_input, primer_design=primer_pair.left_primer + assert _has_acceptable_dinuc_run( + design_input=design_input, oligo_design=primer_pair.left_primer ) - assert Primer3._is_valid_primer( - design_input=design_input, primer_design=primer_pair.right_primer + assert _has_acceptable_dinuc_run( + design_input=design_input, oligo_design=primer_pair.right_primer ) # 1 primer from every pair will fail lowered dinuc threshold of 2 @@ -452,6 +483,8 @@ def test_screen_pair_results( altered_designs, altered_dinuc_failures = designer._screen_pair_results( design_input=altered_design_input, designed_primer_pairs=valid_primer_pairs ) + assert altered_design_input.primer_and_amplicon_params is not None + assert [ design.longest_dinucleotide_run_length() > altered_design_input.primer_and_amplicon_params.primer_max_dinuc_bases diff --git a/tests/primer3/test_primer3_weights.py b/tests/primer3/test_primer3_weights.py index 64534e5..3351b74 100644 --- a/tests/primer3/test_primer3_weights.py +++ b/tests/primer3/test_primer3_weights.py @@ -20,15 +20,7 @@ def test_primer_weights_valid() -> None: assert test_dict[Primer3InputTag.PRIMER_WT_SIZE_GT] == 0.1 assert test_dict[Primer3InputTag.PRIMER_WT_TM_LT] == 1.0 assert test_dict[Primer3InputTag.PRIMER_WT_TM_GT] == 1.0 - assert test_dict[Primer3InputTag.PRIMER_INTERNAL_WT_SIZE_LT] == 0.25 - assert test_dict[Primer3InputTag.PRIMER_INTERNAL_WT_SIZE_GT] == 0.25 - assert test_dict[Primer3InputTag.PRIMER_INTERNAL_WT_TM_LT] == 1.0 - assert test_dict[Primer3InputTag.PRIMER_INTERNAL_WT_TM_GT] == 1.0 - assert test_dict[Primer3InputTag.PRIMER_INTERNAL_WT_GC_PERCENT_LT] == 0.5 - assert test_dict[Primer3InputTag.PRIMER_INTERNAL_WT_GC_PERCENT_GT] == 0.5 - assert test_dict[Primer3InputTag.PRIMER_INTERNAL_WT_SELF_ANY] == 1.0 - assert test_dict[Primer3InputTag.PRIMER_INTERNAL_WT_SELF_END] == 1.0 - assert len((test_dict.values())) == 21 + assert len((test_dict.values())) == 13 def test_probe_weights_valid() -> None: