Skip to content

Commit

Permalink
perf(mi): massive increase in performance for STRkit MI calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
davidlougheed committed Oct 27, 2023
1 parent a6af2e1 commit 8a37651
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 17 deletions.
4 changes: 3 additions & 1 deletion strkit/mi/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import logging
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Optional
from typing import Any, Optional

from strkit.logger import logger as logger_
from .result import MIContigResult, MIResult
Expand Down Expand Up @@ -70,6 +70,8 @@ def __init__(
self._debug: bool = debug
self._logger: logging.Logger = logger

self._cache: dict[str, Any] = {}

@property
def test_to_perform(self) -> str:
return self._test_to_perform
Expand Down
33 changes: 17 additions & 16 deletions strkit/mi/strkit.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,20 +119,26 @@ def calculate_contig(self, contig: str) -> MIContigResult:


class StrKitJSONCalculator(BaseCalculator):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

with open(self._mother_call_file, "r") as mvf:
self._cache["mother_data"] = json.loads(mvf.read())
with open(self._father_call_file, "r") as fvf:
self._cache["father_data"] = json.loads(fvf.read())
with open(self._child_call_file, "r") as cvf:
self._cache["child_data"] = json.loads(cvf.read())

@staticmethod
def get_contigs_from_fh(fh) -> set:
report = json.loads(fh.read())
def get_contigs_from_data(report) -> set:
if (report_contigs := report.get("contigs")) is not None:
return set(report_contigs)
return {res["contig"] for res in report["results"]}

def _get_sample_contigs(self, include_sex_chromosomes: bool = False) -> tuple[set, set, set]:
with open(self._mother_call_file, "r") as mvf:
mc = self.get_contigs_from_fh(mvf)
with open(self._father_call_file, "r") as fvf:
fc = self.get_contigs_from_fh(fvf)
with open(self._child_call_file, "r") as cvf:
cc = self.get_contigs_from_fh(cvf)
mc = self.get_contigs_from_data(self._cache["mother_data"])
fc = self.get_contigs_from_data(self._cache["father_data"])
cc = self.get_contigs_from_data(self._cache["child_data"])
return mc, fc, cc

@staticmethod
Expand Down Expand Up @@ -182,8 +188,7 @@ def make_calls_dict(report: dict, contig: str):
}

def calculate_contig(self, contig: str) -> MIContigResult:
with open(self._child_call_file, "r") as ch:
c_report = json.loads(ch.read())
c_report = self._cache["child_data"]

fractional = c_report["parameters"]["fractional"]

Expand All @@ -192,14 +197,10 @@ def calculate_contig(self, contig: str) -> MIContigResult:

cr = MIContigResult(includes_95_ci=True)

with open(self._mother_call_file) as mh:
mother_data = self.make_calls_dict(json.loads(mh.read()), contig)

mother_data = self.make_calls_dict(self._cache["mother_data"], contig)
logger.debug(f"loaded materal calls for {contig}")

with open(self._father_call_file) as fh:
father_data = self.make_calls_dict(json.loads(fh.read()), contig)

father_data = self.make_calls_dict(self._cache["father_data"], contig)
logger.debug(f"loaded paternal calls for {contig}")

for res in c_report["results"]:
Expand Down

0 comments on commit 8a37651

Please sign in to comment.