Skip to content

Commit

Permalink
add more tests
Browse files Browse the repository at this point in the history
  • Loading branch information
jdidion committed Apr 4, 2024
1 parent 8f7a4eb commit 81e39d1
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 11 deletions.
29 changes: 18 additions & 11 deletions src/main/scala/com/fulcrumgenomics/vcf/DownsampleVcf.scala
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import com.fulcrumgenomics.vcf.DownsampleVcf.{downsampleAndRegenotype, winnowVar

import scala.math.log10
import scala.util.Random
import scala.tools.nsc.doc.html.HtmlTags

object DownsampleVcf extends LazyLogging {
/** Removes variants that are within a specified distance from a previous variant.
Expand Down Expand Up @@ -103,6 +104,16 @@ object DownsampleVcf extends LazyLogging {
gt.copy(attrs=Map("PL" -> pls, "AD" -> newAds, "DP" -> newAds.sum), calls=calls)
}

/**Converts a sequence of log-likelihoods to phred-scale by 1) multiplying each by -10, 2)
* subtracting from each the min value so the smallest value is 0, and 3) rounding to the
* nearest integer.
*/
def logToPhredLikelihoods(logLikelihoods: IndexedSeq[Double]): IndexedSeq[Int] = {
val rawPL = logLikelihoods.map(gl => gl * -10)
val minPL = rawPL.min
rawPL.map(pl => (pl - minPL).round.toInt)
}

object Likelihoods {
/** Computes the likelihoods for each possible biallelic genotype.
* @param alleleDepthA the reference allele depth
Expand All @@ -122,13 +133,14 @@ object DownsampleVcf extends LazyLogging {
Likelihoods(2, IndexedSeq(rawGlAA, rawGlAB, rawGlBB))

Check warning on line 133 in src/main/scala/com/fulcrumgenomics/vcf/DownsampleVcf.scala

View check run for this annotation

Codecov / codecov/patch

src/main/scala/com/fulcrumgenomics/vcf/DownsampleVcf.scala#L133

Added line #L133 was not covered by tests
}

/** Computes the likelihoods for each possible multiallelic genotype.
/** Computes the likelihoods for each possible genotype given a sequence of read depths for any
* number of alleles.
* @param alleleDepths the sequence of allele depths in the order specified in the VCF
* @param epsilon the error rate for genotyping
* @return a new `Likelihood` that has the likelihoods of all possible genotypes in the order
* specified in VFC spec for the GL/PL tags.
* @return a new `Likelihood` that has the log likelihoods of all possible genotypes in the
* order specified in VFC spec for the GL/PL tags.
*/
def multiallelic(alleleDepths: IndexedSeq[Int], epsilon: Double = 0.01): Likelihoods = {
def generalized(alleleDepths: IndexedSeq[Int], epsilon: Double = 0.01): Likelihoods = {
val numAlleles = alleleDepths.length
// probabilities associated with each possible genotype for a pair of alleles
val probs: Array[Double] = Array(
Expand All @@ -151,8 +163,7 @@ object DownsampleVcf extends LazyLogging {

def apply(alleleDepths: IndexedSeq[Int], epsilon: Double = 0.01): Likelihoods = {
require(alleleDepths.length >= 2, "at least two alleles are required to calculate genotype likelihoods")
if (alleleDepths.length > 2) multiallelic(alleleDepths, epsilon)
else biallelic(alleleDepths(0), alleleDepths(1), epsilon)
generalized(alleleDepths, epsilon)
}
}

Expand All @@ -166,11 +177,7 @@ object DownsampleVcf extends LazyLogging {
* @return a list of phred-scaled likelihooodS for AA, AB, BB.
*/
def pls: IndexedSeq[Int] = {
// subtract the min value so the smallest GL is 0, then multiply by -10 and convert to
// Int to make it PHRED-scale
val rawPL = genotypeLikelihoods.map(gl => gl * -10)
val minPL = rawPL.min
rawPL.map(pl => (pl - minPL).round.toInt)
logToPhredLikelihoods(genotypeLikelihoods)
}

def mostLikelyGenotype: Option[(Int, Int)] = {
Expand Down
17 changes: 17 additions & 0 deletions src/test/scala/com/fulcrumgenomics/vcf/DownsampleVcfTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,23 @@ class DownsampleVcfTest extends UnitSpec {
likelihood.pls should contain theSameElementsInOrderAs expected
}

it should "return correct results for basic cases" in {
val e = 0.01
val cases: IndexedSeq[(IndexedSeq[Int], IndexedSeq[Double])] = IndexedSeq(
(IndexedSeq(1, 0), IndexedSeq(1 - e, 0.5, e)),
(IndexedSeq(0, 1), IndexedSeq(e, 0.5, 1 - e)),
(IndexedSeq(1, 1), IndexedSeq((1 - e) * e, 0.25, (1 - e) * e)),
(IndexedSeq(2, 0), IndexedSeq(math.pow((1 - e), 2), 0.25, math.pow(e, 2))),
(IndexedSeq(0, 0, 1), IndexedSeq(e, e, e, 0.5, 0.5, 1 - e)),
)
cases.foreach { case (input, output) =>
val likelihood = Likelihoods(input, e)
val logOutput = output.map(p => math.log10(p))
likelihood.pls.length shouldBe logOutput.length
likelihood.pls should contain theSameElementsInOrderAs DownsampleVcf.logToPhredLikelihoods(logOutput)
}
}

it should "return a likelihood of 0 for AA if there are only ref alleles observed" in {
val likelihood = Likelihoods(IndexedSeq(10, 0))
val expected = IndexedSeq[Int](0, 30, 200)
Expand Down

0 comments on commit 81e39d1

Please sign in to comment.