diff --git a/python/tests/beagle_numba.py b/python/tests/beagle_numba.py index 36f74654e1..214df669c3 100644 --- a/python/tests/beagle_numba.py +++ b/python/tests/beagle_numba.py @@ -708,7 +708,7 @@ def compute_individual_scores(alleles_1, allele_probs_1, alleles_2, allele_probs """ n = len(alleles_1) # Number of individuals assert len(alleles_2) == n, "Lengths of alleles differ." - assert n > 1, "There must be at least one individual." + assert n > 0, "There must be at least one individual." assert len(allele_probs_1) == n, "Lengths of alleles and probabilities differ." assert len(allele_probs_2) == n, "Lengths of alleles and probabilities differ." gt_probs = np.zeros((n, 3), dtype=np.float32) @@ -780,7 +780,9 @@ def compute_dosage_r_squared(gt_probs): pass -def compute_allele_frequency(gt_probs, *, allele=1): +def compute_allele_frequency( + alleles_1, allele_probs_1, alleles_2, allele_probs_2, *, allele=1 +): """ Estimate the frequency of an allele at a site from posterior genotype probabilities. @@ -794,21 +796,25 @@ def compute_allele_frequency(gt_probs, *, allele=1): Am J Hum Genet. 84(2): 210–223. doi: 10.1016/j.ajhg.2009.01.005. In BEAGLE 4.1, AF: "Estimated ALT Allele Frequencies". + See `printInfo` in `VcfRecBuilder.java` of the BEAGLE 4.1 source code. - :param np.ndarray gt_probs: Genotype probabilities at a site. + :param numpy.ndarray alleles_1: Imputed alleles for haplotype 1. + :param numpy.ndarray allele_probs_1: Imputed allele probabilities for haplotype 1. + :param numpy.ndarray alleles_2: Imputed alleles for haplotype 2. + :param numpy.ndarray allele_probs_2: Imputed allele probabilities for haplotype 2. :param int allele: Allele (default = 1). :return: Estimated allele frequency. :rtype: float """ - n = len(gt_probs) # Number of individuals + n = len(alleles_1) # Number of individuals + assert len(alleles_2) == n, "Lengths of alleles differ." assert n > 0, "There must be at least one individual." + assert len(allele_probs_1) == n, "Lengths of alleles and probabilities differ." + assert len(allele_probs_2) == n, "Lengths of alleles and probabilities differ." assert allele in [0, 1], f"Allele {allele} is not recognized." - if allele == 0: - est_allele_count_0 = 2 * np.sum(gt_probs[:, 0]) + np.sum(gt_probs[:, 1]) - return est_allele_count_0 / (2 * n) - else: - est_allele_count_1 = np.sum(gt_probs[:, 1]) + 2 * np.sum(gt_probs[:, 2]) - return est_allele_count_1 / (2 * n) + cum_allele_freq_1 = allele_probs_1[alleles_1 == allele] + cum_allele_freq_2 = allele_probs_2[alleles_2 == allele] + return (cum_allele_freq_1 + cum_allele_freq_2) / (2 * n) def write_vcf(impdata, out_file, *, chr_name="1"):