Skip to content

Commit

Permalink
Rework compute_allele_frequency
Browse files Browse the repository at this point in the history
  • Loading branch information
szhan committed Feb 26, 2024
1 parent 468e35b commit 1789f21
Showing 1 changed file with 16 additions and 10 deletions.
26 changes: 16 additions & 10 deletions python/tests/beagle_numba.py
Original file line number Diff line number Diff line change
Expand Up @@ -708,7 +708,7 @@ def compute_individual_scores(alleles_1, allele_probs_1, alleles_2, allele_probs
"""
n = len(alleles_1) # Number of individuals
assert len(alleles_2) == n, "Lengths of alleles differ."
assert n > 1, "There must be at least one individual."
assert n > 0, "There must be at least one individual."
assert len(allele_probs_1) == n, "Lengths of alleles and probabilities differ."
assert len(allele_probs_2) == n, "Lengths of alleles and probabilities differ."
gt_probs = np.zeros((n, 3), dtype=np.float32)
Expand Down Expand Up @@ -780,7 +780,9 @@ def compute_dosage_r_squared(gt_probs):
pass


def compute_allele_frequency(gt_probs, *, allele=1):
def compute_allele_frequency(
alleles_1, allele_probs_1, alleles_2, allele_probs_2, *, allele=1
):
"""
Estimate the frequency of an allele at a site from posterior genotype probabilities.
Expand All @@ -794,21 +796,25 @@ def compute_allele_frequency(gt_probs, *, allele=1):
Am J Hum Genet. 84(2): 210–223. doi: 10.1016/j.ajhg.2009.01.005.
In BEAGLE 4.1, AF: "Estimated ALT Allele Frequencies".
See `printInfo` in `VcfRecBuilder.java` of the BEAGLE 4.1 source code.
:param np.ndarray gt_probs: Genotype probabilities at a site.
:param numpy.ndarray alleles_1: Imputed alleles for haplotype 1.
:param numpy.ndarray allele_probs_1: Imputed allele probabilities for haplotype 1.
:param numpy.ndarray alleles_2: Imputed alleles for haplotype 2.
:param numpy.ndarray allele_probs_2: Imputed allele probabilities for haplotype 2.
:param int allele: Allele (default = 1).
:return: Estimated allele frequency.
:rtype: float
"""
n = len(gt_probs) # Number of individuals
n = len(alleles_1) # Number of individuals
assert len(alleles_2) == n, "Lengths of alleles differ."
assert n > 0, "There must be at least one individual."
assert len(allele_probs_1) == n, "Lengths of alleles and probabilities differ."
assert len(allele_probs_2) == n, "Lengths of alleles and probabilities differ."
assert allele in [0, 1], f"Allele {allele} is not recognized."
if allele == 0:
est_allele_count_0 = 2 * np.sum(gt_probs[:, 0]) + np.sum(gt_probs[:, 1])
return est_allele_count_0 / (2 * n)
else:
est_allele_count_1 = np.sum(gt_probs[:, 1]) + 2 * np.sum(gt_probs[:, 2])
return est_allele_count_1 / (2 * n)
cum_allele_freq_1 = allele_probs_1[alleles_1 == allele]
cum_allele_freq_2 = allele_probs_2[alleles_2 == allele]
return (cum_allele_freq_1 + cum_allele_freq_2) / (2 * n)


def write_vcf(impdata, out_file, *, chr_name="1"):
Expand Down

0 comments on commit 1789f21

Please sign in to comment.