Skip to content

Commit

Permalink
Merge pull request #187 from aryarm/ref/GetLengthGenotypes
Browse files Browse the repository at this point in the history
perf: speed up `TRRecord.GetLengthGenotypes()`
  • Loading branch information
aryarm authored Dec 21, 2023
2 parents 1527df6 + 4f840d1 commit 835ced3
Showing 1 changed file with 7 additions and 12 deletions.
19 changes: 7 additions & 12 deletions trtools/utils/tr_harmonizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -829,7 +829,7 @@ def GetGenotypeIndicies(self) -> Optional[np.ndarray]:
"""
if self.vcfrecord.genotype is None:
return None
return self.vcfrecord.genotype.array()
return self.vcfrecord.genotype.array().astype(int)

def GetCalledSamples(self, strict: bool = True) -> Optional[np.ndarray]:
"""
Expand Down Expand Up @@ -1092,18 +1092,13 @@ def GetLengthGenotypes(self) -> Optional[np.ndarray]:
if idx_gts is None:
return None

len_gts = np.empty(idx_gts.shape)
len_gts[:, -1] = idx_gts[:, -1]

allele_lens = []
allele_lens.append(self.ref_allele_length)
allele_lens.extend(self.alt_allele_lengths)
# store allele lengths in a numpy array
# and add fake alleles for -2 and -1 missing values
allele_lens = np.array([self.ref_allele_length, *self.alt_allele_lengths, -2, -1])

for idx, allele_len in enumerate(allele_lens):
len_gts[:, :-1][idx_gts[:, :-1] == idx] = allele_len

len_gts[idx_gts == -1] = -1
len_gts[idx_gts == -2] = -2
# copy repeats lengths and phasing for each sample
len_gts = allele_lens[idx_gts]
len_gts[:, -1] = idx_gts[:, -1]

return len_gts

Expand Down

0 comments on commit 835ced3

Please sign in to comment.