From 2f50599cd3cda260b14a2d18b9ccb053204f8e28 Mon Sep 17 00:00:00 2001 From: Kari Stromhaug Date: Mon, 28 Mar 2022 11:41:36 -0600 Subject: [PATCH 01/17] Remove use of VariantContextSetBuilder from ErrorRateByReadPositionTest.scala --- .../bam/ErrorRateByReadPositionTest.scala | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/test/scala/com/fulcrumgenomics/bam/ErrorRateByReadPositionTest.scala b/src/test/scala/com/fulcrumgenomics/bam/ErrorRateByReadPositionTest.scala index 9524bb49d..52adbf2ba 100644 --- a/src/test/scala/com/fulcrumgenomics/bam/ErrorRateByReadPositionTest.scala +++ b/src/test/scala/com/fulcrumgenomics/bam/ErrorRateByReadPositionTest.scala @@ -26,13 +26,13 @@ package com.fulcrumgenomics.bam import java.nio.file.{Files, Path} - import com.fulcrumgenomics.bam.api.SamOrder import com.fulcrumgenomics.commons.io.PathUtil import com.fulcrumgenomics.fasta.Converters.ToSAMSequenceDictionary import com.fulcrumgenomics.fasta.SequenceDictionary import com.fulcrumgenomics.testing.SamBuilder.{Minus, Plus} -import com.fulcrumgenomics.testing.{ReferenceSetBuilder, SamBuilder, UnitSpec, VariantContextSetBuilder} +import com.fulcrumgenomics.testing.VcfBuilder.Gt +import com.fulcrumgenomics.testing.{ReferenceSetBuilder, SamBuilder, UnitSpec, VcfBuilder} import com.fulcrumgenomics.util.{Metric, Rscript} import htsjdk.samtools.util.{Interval, IntervalList} import com.fulcrumgenomics.util.Metric.Count @@ -74,11 +74,11 @@ class ErrorRateByReadPositionTest extends UnitSpec with OptionValues { private val dict = SequenceDictionary.extract(ref) private val vcf = { - val builder = new VariantContextSetBuilder().setSequenceDictionary(dict) - builder.addVariant(1, 500, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C")) - builder.addVariant(2, 500, variantAlleles=List("C", "T"), genotypeAlleles=List("C", "T")) - builder.addVariant(3, 500, variantAlleles=List("G", "A"), genotypeAlleles=List("G", "A")) - builder.addVariant(4, 500, variantAlleles=List("T", "C"), genotypeAlleles=List("T", "C")) + val builder = VcfBuilder(samples=List("sample1")) + builder.add("1", 500, alleles=List("A", "C"), gts=Seq(Gt(sample="sample1", gt="0/1"))) + builder.add("2", 500, alleles=List("C", "T"), gts=Seq(Gt(sample="sample1", gt="0/1"))) + builder.add("3", 500, alleles=List("G", "A"), gts=Seq(Gt(sample="sample1", gt="0/1"))) + builder.add("4", 500, alleles=List("T", "C"), gts=Seq(Gt(sample="sample1", gt="0/1"))) builder.toTempFile() } From 9ffb390260fcbe0235e3029b49855bd48fd4e216 Mon Sep 17 00:00:00 2001 From: Kari Stromhaug Date: Mon, 28 Mar 2022 15:06:16 -0600 Subject: [PATCH 02/17] Remove VariantContextSetBuilder from ReviewConsensusVariantsTest.scala --- .../umi/ReviewConsensusVariantsTest.scala | 34 +++++++++++++------ 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/src/test/scala/com/fulcrumgenomics/umi/ReviewConsensusVariantsTest.scala b/src/test/scala/com/fulcrumgenomics/umi/ReviewConsensusVariantsTest.scala index 016fabc60..037732888 100644 --- a/src/test/scala/com/fulcrumgenomics/umi/ReviewConsensusVariantsTest.scala +++ b/src/test/scala/com/fulcrumgenomics/umi/ReviewConsensusVariantsTest.scala @@ -28,9 +28,11 @@ import com.fulcrumgenomics.FgBioDef._ import com.fulcrumgenomics.bam.api.SamOrder import com.fulcrumgenomics.commons.io.PathUtil import com.fulcrumgenomics.testing.SamBuilder.{Minus, Plus} -import com.fulcrumgenomics.testing.{SamBuilder, UnitSpec, VariantContextSetBuilder} +import com.fulcrumgenomics.testing.VcfBuilder.Gt +import com.fulcrumgenomics.testing.{SamBuilder, UnitSpec, VcfBuilder} import com.fulcrumgenomics.umi.ReviewConsensusVariants.ConsensusVariantReviewInfo import com.fulcrumgenomics.util.{Io, Metric} +import com.fulcrumgenomics.vcf.api._ import htsjdk.samtools.SAMFileHeader import htsjdk.samtools.reference.{ReferenceSequenceFile, ReferenceSequenceFileFactory} import htsjdk.samtools.util.IntervalList @@ -79,12 +81,19 @@ class ReviewConsensusVariantsTest extends UnitSpec { lazy val ref: ReferenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(refFasta) - lazy val header = { + lazy val intervalListHeader = { val h = new SAMFileHeader h.setSequenceDictionary(ref.getSequenceDictionary) h } + lazy val vcfDefaultHeader: VcfHeader = VcfBuilder.DefaultHeader.copy( + contigs = ref.getSequenceDictionary.getSequences.iterator().map { r => + VcfContigHeader(r.getSequenceIndex, r.getSequenceName, Some(r.getSequenceLength)) } .toIndexedSeq + ) + +// lazy val vcfHeader = VcfHeader(ref.) + // We're going to simulate raw reads and consensuses as if there were variants at: // chr1:10 // chr1:20 @@ -180,7 +189,7 @@ class ReviewConsensusVariantsTest extends UnitSpec { val rawOut = outBase.getParent.resolve(s"${outBase.getFileName}.grouped.bam") val txtOut = outBase.getParent.resolve(s"${outBase.getFileName}.txt") val intervals = makeTempFile("empty.", ".interval_list") - new IntervalList(header).write(intervals.toFile) + new IntervalList(intervalListHeader).write(intervals.toFile) new ReviewConsensusVariants(input=intervals, consensusBam=consensusBam, groupedBam=rawBam, ref=refFasta, output=outBase).execute() conOut.toFile.exists() shouldBe true @@ -199,9 +208,9 @@ class ReviewConsensusVariantsTest extends UnitSpec { val txtOut = outBase.getParent.resolve(s"${outBase.getFileName}.txt") val intervals = makeTempFile("empty.", ".vcf") - val vcfBuilder = VariantContextSetBuilder("s1") - vcfBuilder.header.setSequenceDictionary(header.getSequenceDictionary) + val vcfBuilder = VcfBuilder(vcfDefaultHeader.copy(samples=IndexedSeq("s1"))) vcfBuilder.write(intervals) + new ReviewConsensusVariants(input=intervals, consensusBam=consensusBam, groupedBam=rawBam, ref=refFasta, output=outBase).execute() conOut.toFile.exists() shouldBe true @@ -219,12 +228,15 @@ class ReviewConsensusVariantsTest extends UnitSpec { val rawOut = outBase.getParent.resolve(s"${outBase.getFileName}.grouped.bam") val txtOut = outBase.getParent.resolve(s"${outBase.getFileName}.txt") - val vcfBuilder = new VariantContextSetBuilder(sampleNames=List("tumor")) - vcfBuilder.header.setSequenceDictionary(this.header.getSequenceDictionary) - vcfBuilder.addVariant(refIdx=0, start=10, variantAlleles=List("A","T"), genotypeAlleles=List("A","T"), genotypeAttributes=Map("AF" -> 0.01)) - vcfBuilder.addVariant(refIdx=0, start=20, variantAlleles=List("A","C"), genotypeAlleles=List("A","C"), genotypeAttributes=Map("AF" -> 0.01)) - vcfBuilder.addVariant(refIdx=0, start=30, variantAlleles=List("A","G"), genotypeAlleles=List("A","G"), genotypeAttributes=Map("AF" -> 0.01)) - vcfBuilder.addVariant(refIdx=1, start=20, variantAlleles=List("C","T"), genotypeAlleles=List("C","T"), genotypeAttributes=Map("AD" -> Array(100,2))) + val vcfBuilder = VcfBuilder(vcfDefaultHeader.copy( + samples=IndexedSeq("tumor"), + infos = Seq(VcfInfoHeader(id="AF", count=VcfCount.OnePerAllele, kind=VcfFieldType("Float"), description="Allele Frequency"), + VcfInfoHeader(id="AD", count=VcfCount.OnePerAllele, kind=VcfFieldType("Integer"), description="Allele Depth)"))) + ) + vcfBuilder.add(chrom="chr1", pos=10, alleles=Seq("A", "T"), gts=Seq(Gt(sample="tumor", gt="0/1")),info=Map("AF" -> 0.01)) + vcfBuilder.add(chrom="chr1", pos=20, alleles=Seq("A", "C"), gts=Seq(Gt(sample="tumor", gt="0/1")),info=Map("AF" -> 0.01)) + vcfBuilder.add(chrom="chr1", pos=30, alleles=Seq("A", "G"), gts=Seq(Gt(sample="tumor", gt="0/1")),info=Map("AF" -> 0.01)) + vcfBuilder.add(chrom="chr2", pos=20, alleles=Seq("C", "T"), gts=Seq(Gt(sample="tumor", gt="0/1")),info=Map("AD" -> Seq(100, 2))) new ReviewConsensusVariants(input=vcfBuilder.toTempFile(), consensusBam=consensusBam, groupedBam=rawBam, ref=refFasta, output=outBase).execute() From e4da320c7e67949ae1d6163cd659252f5b3b9408 Mon Sep 17 00:00:00 2001 From: Kari Stromhaug Date: Mon, 28 Mar 2022 16:15:20 -0600 Subject: [PATCH 03/17] Remove VariantContextBuilder from VariantMaskTest.scala --- .../fulcrumgenomics/vcf/VariantMaskTest.scala | 88 ++++++++++++++----- 1 file changed, 64 insertions(+), 24 deletions(-) diff --git a/src/test/scala/com/fulcrumgenomics/vcf/VariantMaskTest.scala b/src/test/scala/com/fulcrumgenomics/vcf/VariantMaskTest.scala index 2ea685f98..2cf582751 100644 --- a/src/test/scala/com/fulcrumgenomics/vcf/VariantMaskTest.scala +++ b/src/test/scala/com/fulcrumgenomics/vcf/VariantMaskTest.scala @@ -25,7 +25,12 @@ package com.fulcrumgenomics.vcf import com.fulcrumgenomics.fasta.SequenceDictionary -import com.fulcrumgenomics.testing.{ReferenceSetBuilder, UnitSpec, VariantContextSetBuilder} +import com.fulcrumgenomics.testing.VcfBuilder.Gt +import com.fulcrumgenomics.testing.{ReferenceSetBuilder, UnitSpec, VariantContextSetBuilder, VcfBuilder} +import com.fulcrumgenomics.vcf.api.{VcfContigHeader, VcfHeader} +import htsjdk.variant.vcf.VCFFileReader + +import scala.jdk.CollectionConverters.IteratorHasAsScala class VariantMaskTest extends UnitSpec { val ref = { @@ -41,11 +46,19 @@ class VariantMaskTest extends UnitSpec { val dict = SequenceDictionary.extract(ref) + lazy val vcfDefaultHeader: VcfHeader = VcfBuilder.DefaultHeader.copy( + contigs = dict.map { r => + VcfContigHeader(r.index, r.name, Some(r.length)) } .toIndexedSeq + ) + "VariantMask" should "mask SNPs as individual bases" in { - val builder = new VariantContextSetBuilder().setSequenceDictionary(dict) - builder.addVariant(refIdx=1, start=100, variantAlleles=List("A","T")) - builder.addVariant(refIdx=1, start=102, variantAlleles=List("A","T")) - val mask = VariantMask(builder.iterator, dict) + val builder = VcfBuilder(samples=Seq("S1")) + builder.add(chrom="chr1", pos=100, alleles=Seq("A", "T")) + builder.add(chrom="chr1", pos=102, alleles=Seq("A", "T")) + val testVcf = new VCFFileReader(builder.toTempFile(), false) + val testIterator = testVcf.iterator().asScala.toSeq + testVcf.close() + val mask = VariantMask(testIterator.iterator, dict) mask.isVariant(1, 99) shouldBe false mask.isVariant(1, 100) shouldBe true @@ -55,9 +68,10 @@ class VariantMaskTest extends UnitSpec { } it should "mask all deleted bases for deletions, plus the upstream base" in { - val builder = new VariantContextSetBuilder().setSequenceDictionary(dict) - builder.addVariant(refIdx=1, start=100, variantAlleles=List("AA","A")) - val mask = VariantMask(builder.iterator, dict) + val builder = VcfBuilder(samples = Seq("S1")) + builder.add(chrom="chr1", pos=100, alleles=Seq("AA", "A")) + val testIterator = new VCFFileReader(builder.toTempFile(), false).iterator().asScala.iterator + val mask = VariantMask(testIterator, dict) mask.isVariant(1, 99) shouldBe false mask.isVariant(1, 100) shouldBe true @@ -66,9 +80,14 @@ class VariantMaskTest extends UnitSpec { } it should "mask just the upstream base for insertions" in { - val builder = new VariantContextSetBuilder().setSequenceDictionary(dict) - builder.addVariant(refIdx=1, start=100, variantAlleles=List("A","AA")) - val mask = VariantMask(builder.iterator, dict) +// val builder = new VariantContextSetBuilder().setSequenceDictionary(dict) +// builder.addVariant(refIdx=1, start=100, variantAlleles=List("A","AA")) + + val builder = VcfBuilder(samples = Seq("S1")) + builder.add(chrom="chr1", pos=100, alleles=Seq("A", "AA")) + val testIterator = new VCFFileReader(builder.toTempFile(), false).iterator().asScala.iterator + + val mask = VariantMask(testIterator, dict) mask.isVariant(1, 99) shouldBe false mask.isVariant(1, 100) shouldBe true @@ -77,9 +96,14 @@ class VariantMaskTest extends UnitSpec { } it should "allow querying be sequence name as well as ref index" in { - val builder = new VariantContextSetBuilder().setSequenceDictionary(dict) - builder.addVariant(refIdx=1, start=100, variantAlleles=List("A","T")) - val mask = VariantMask(builder.iterator, dict) +// val builder = new VariantContextSetBuilder().setSequenceDictionary(dict) +// builder.addVariant(refIdx=1, start=100, variantAlleles=List("A","T")) + + val builder = VcfBuilder(samples = Seq("S1")) + builder.add(chrom="chr1", pos=100, alleles=Seq("A", "T")) + val testIterator = new VCFFileReader(builder.toTempFile(), false).iterator().asScala.iterator + + val mask = VariantMask(testIterator, dict) mask.isVariant("chr0", 100) shouldBe false mask.isVariant("chr1", 100) shouldBe true @@ -91,23 +115,36 @@ class VariantMaskTest extends UnitSpec { it should "construct a mask ok from a VCF path" in { val builder = new VariantContextSetBuilder().setSequenceDictionary(dict) builder.addVariant(refIdx=1, start=100, variantAlleles=List("A","C"), genotypeAlleles=List("A", "C")) +// val builder = VcfBuilder(samples = Seq("S1")) +// builder.add(chrom="chr1", pos=100, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0/1"))) + val mask = VariantMask(builder.toTempFile()) mask.isVariant(1, 100) shouldBe true } it should "throw an exception if a VCF doesn't have a sequence dictionary in it" in { - val builder = new VariantContextSetBuilder().setSequenceDictionary(dict) - builder.addVariant(refIdx=1, start=100, variantAlleles=List("A","C"), genotypeAlleles=List("A", "C")) - builder.setSequenceDictionary(SequenceDictionary()) + // TODO figure out how to test a vcf that doesn't have a header with VcfBuilder +// val builder = new VariantContextSetBuilder().setSequenceDictionary(dict) +// builder.addVariant(refIdx=1, start=100, variantAlleles=List("A","C"), genotypeAlleles=List("A", "C")) +// builder.setSequenceDictionary(SequenceDictionary()) + val builder = VcfBuilder(VcfHeader(contigs=IndexedSeq(), infos=Seq(), formats=Seq(), filters=Seq(), others=Seq(), samples=IndexedSeq("S1"))) + builder.add(chrom="chr1", pos=100, alleles=Seq("A", "C")) + val vcf = builder.toTempFile() an[Exception] shouldBe thrownBy { VariantMask(vcf) } } it should "throw an exception if requested to traverse backwards to an earlier reference" in { - val builder = new VariantContextSetBuilder().setSequenceDictionary(dict) - builder.addVariant(refIdx=1, start=100, variantAlleles=List("A","T")) - builder.addVariant(refIdx=2, start=200, variantAlleles=List("A","T")) - val mask = VariantMask(builder.iterator, dict) +// val builder = new VariantContextSetBuilder().setSequenceDictionary(dict) +// builder.addVariant(refIdx=1, start=100, variantAlleles=List("A","T")) +// builder.addVariant(refIdx=2, start=200, variantAlleles=List("A","T")) + + val builder = VcfBuilder(samples = Seq("S1")) + builder.add(chrom="chr1", pos=100, alleles=Seq("A", "T")) + builder.add(chrom="chr2", pos=200, alleles=Seq("A", "T")) + val testIterator = new VCFFileReader(builder.toTempFile(), false).iterator().asScala.iterator + + val mask = VariantMask(testIterator, dict) mask.isVariant(1, 100) shouldBe true mask.isVariant(2, 200) shouldBe true @@ -115,9 +152,12 @@ class VariantMaskTest extends UnitSpec { } it should "throw an exception if invalid reference sequences or positions are requested" in { - val builder = new VariantContextSetBuilder().setSequenceDictionary(dict) - builder.addVariant(refIdx=1, start=100, variantAlleles=List("A","T")) - val mask = VariantMask(builder.iterator, dict) +// val builder = new VariantContextSetBuilder().setSequenceDictionary(dict) +// builder.addVariant(refIdx=1, start=100, variantAlleles=List("A","T")) + val builder = VcfBuilder(header=vcfDefaultHeader) + builder.add(chrom="chr1", pos=100, alleles=Seq("A", "T")) + val testIterator = new VCFFileReader(builder.toTempFile(), false).iterator().asScala.iterator + val mask = VariantMask(testIterator, dict) an[Exception] shouldBe thrownBy { mask.isVariant(-1, 100) } // invalid index (low) an[Exception] shouldBe thrownBy { mask.isVariant("chrNope", 100) } // invalid ref name From 81e53960c9d5649f6331cfc28e32403ccd6797aa Mon Sep 17 00:00:00 2001 From: Kari Stromhaug Date: Tue, 29 Mar 2022 11:43:05 -0600 Subject: [PATCH 04/17] Remove a test that is no longer valid and fix another --- .../bam/ErrorRateByReadPositionTest.scala | 8 +++---- .../fulcrumgenomics/vcf/VariantMaskTest.scala | 22 +++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/test/scala/com/fulcrumgenomics/bam/ErrorRateByReadPositionTest.scala b/src/test/scala/com/fulcrumgenomics/bam/ErrorRateByReadPositionTest.scala index 52adbf2ba..9bd96bfc4 100644 --- a/src/test/scala/com/fulcrumgenomics/bam/ErrorRateByReadPositionTest.scala +++ b/src/test/scala/com/fulcrumgenomics/bam/ErrorRateByReadPositionTest.scala @@ -75,10 +75,10 @@ class ErrorRateByReadPositionTest extends UnitSpec with OptionValues { private val vcf = { val builder = VcfBuilder(samples=List("sample1")) - builder.add("1", 500, alleles=List("A", "C"), gts=Seq(Gt(sample="sample1", gt="0/1"))) - builder.add("2", 500, alleles=List("C", "T"), gts=Seq(Gt(sample="sample1", gt="0/1"))) - builder.add("3", 500, alleles=List("G", "A"), gts=Seq(Gt(sample="sample1", gt="0/1"))) - builder.add("4", 500, alleles=List("T", "C"), gts=Seq(Gt(sample="sample1", gt="0/1"))) + builder.add("chr1", 500, alleles=List("A", "C"), gts=Seq(Gt(sample="sample1", gt="0/1"))) + builder.add("chr2", 500, alleles=List("C", "T"), gts=Seq(Gt(sample="sample1", gt="0/1"))) + builder.add("chr3", 500, alleles=List("G", "A"), gts=Seq(Gt(sample="sample1", gt="0/1"))) + builder.add("chr4", 500, alleles=List("T", "C"), gts=Seq(Gt(sample="sample1", gt="0/1"))) builder.toTempFile() } diff --git a/src/test/scala/com/fulcrumgenomics/vcf/VariantMaskTest.scala b/src/test/scala/com/fulcrumgenomics/vcf/VariantMaskTest.scala index 2cf582751..b599f90c5 100644 --- a/src/test/scala/com/fulcrumgenomics/vcf/VariantMaskTest.scala +++ b/src/test/scala/com/fulcrumgenomics/vcf/VariantMaskTest.scala @@ -122,17 +122,17 @@ class VariantMaskTest extends UnitSpec { mask.isVariant(1, 100) shouldBe true } - it should "throw an exception if a VCF doesn't have a sequence dictionary in it" in { - // TODO figure out how to test a vcf that doesn't have a header with VcfBuilder -// val builder = new VariantContextSetBuilder().setSequenceDictionary(dict) -// builder.addVariant(refIdx=1, start=100, variantAlleles=List("A","C"), genotypeAlleles=List("A", "C")) -// builder.setSequenceDictionary(SequenceDictionary()) - val builder = VcfBuilder(VcfHeader(contigs=IndexedSeq(), infos=Seq(), formats=Seq(), filters=Seq(), others=Seq(), samples=IndexedSeq("S1"))) - builder.add(chrom="chr1", pos=100, alleles=Seq("A", "C")) - - val vcf = builder.toTempFile() - an[Exception] shouldBe thrownBy { VariantMask(vcf) } - } +// it should "throw an exception if a VCF doesn't have a sequence dictionary in it" in { +// // TODO figure out how to test a vcf that doesn't have a header with VcfBuilder +//// val builder = new VariantContextSetBuilder().setSequenceDictionary(dict) +//// builder.addVariant(refIdx=1, start=100, variantAlleles=List("A","C"), genotypeAlleles=List("A", "C")) +//// builder.setSequenceDictionary(SequenceDictionary()) +// val builder = VcfBuilder(VcfHeader(contigs=IndexedSeq(), infos=Seq(), formats=Seq(), filters=Seq(), others=Seq(), samples=IndexedSeq("S1"))) +// builder.add(chrom="chr1", pos=100, alleles=Seq("A", "C")) +// +// val vcf = builder.toTempFile() +// an[Exception] shouldBe thrownBy { VariantMask(vcf) } +// } it should "throw an exception if requested to traverse backwards to an earlier reference" in { // val builder = new VariantContextSetBuilder().setSequenceDictionary(dict) From 4137b4472f1073236190e35ed3ef9850fc6b73c6 Mon Sep 17 00:00:00 2001 From: Kari Stromhaug Date: Tue, 29 Mar 2022 11:50:26 -0600 Subject: [PATCH 05/17] Fix one test and remove commented out code --- .../fulcrumgenomics/vcf/VariantMaskTest.scala | 22 ++++--------------- 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/src/test/scala/com/fulcrumgenomics/vcf/VariantMaskTest.scala b/src/test/scala/com/fulcrumgenomics/vcf/VariantMaskTest.scala index b599f90c5..ebce2d2df 100644 --- a/src/test/scala/com/fulcrumgenomics/vcf/VariantMaskTest.scala +++ b/src/test/scala/com/fulcrumgenomics/vcf/VariantMaskTest.scala @@ -26,7 +26,7 @@ package com.fulcrumgenomics.vcf import com.fulcrumgenomics.fasta.SequenceDictionary import com.fulcrumgenomics.testing.VcfBuilder.Gt -import com.fulcrumgenomics.testing.{ReferenceSetBuilder, UnitSpec, VariantContextSetBuilder, VcfBuilder} +import com.fulcrumgenomics.testing.{ReferenceSetBuilder, UnitSpec, VcfBuilder} import com.fulcrumgenomics.vcf.api.{VcfContigHeader, VcfHeader} import htsjdk.variant.vcf.VCFFileReader @@ -44,7 +44,7 @@ class VariantMaskTest extends UnitSpec { builder.toTempFile() } - val dict = SequenceDictionary.extract(ref) + val dict: SequenceDictionary = SequenceDictionary.extract(ref) lazy val vcfDefaultHeader: VcfHeader = VcfBuilder.DefaultHeader.copy( contigs = dict.map { r => @@ -80,9 +80,6 @@ class VariantMaskTest extends UnitSpec { } it should "mask just the upstream base for insertions" in { -// val builder = new VariantContextSetBuilder().setSequenceDictionary(dict) -// builder.addVariant(refIdx=1, start=100, variantAlleles=List("A","AA")) - val builder = VcfBuilder(samples = Seq("S1")) builder.add(chrom="chr1", pos=100, alleles=Seq("A", "AA")) val testIterator = new VCFFileReader(builder.toTempFile(), false).iterator().asScala.iterator @@ -96,9 +93,6 @@ class VariantMaskTest extends UnitSpec { } it should "allow querying be sequence name as well as ref index" in { -// val builder = new VariantContextSetBuilder().setSequenceDictionary(dict) -// builder.addVariant(refIdx=1, start=100, variantAlleles=List("A","T")) - val builder = VcfBuilder(samples = Seq("S1")) builder.add(chrom="chr1", pos=100, alleles=Seq("A", "T")) val testIterator = new VCFFileReader(builder.toTempFile(), false).iterator().asScala.iterator @@ -113,10 +107,8 @@ class VariantMaskTest extends UnitSpec { } it should "construct a mask ok from a VCF path" in { - val builder = new VariantContextSetBuilder().setSequenceDictionary(dict) - builder.addVariant(refIdx=1, start=100, variantAlleles=List("A","C"), genotypeAlleles=List("A", "C")) -// val builder = VcfBuilder(samples = Seq("S1")) -// builder.add(chrom="chr1", pos=100, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0/1"))) + val builder = VcfBuilder(vcfDefaultHeader.copy(samples = IndexedSeq("S1"))) + builder.add(chrom="chr1", pos=100, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0/1"))) val mask = VariantMask(builder.toTempFile()) mask.isVariant(1, 100) shouldBe true @@ -135,10 +127,6 @@ class VariantMaskTest extends UnitSpec { // } it should "throw an exception if requested to traverse backwards to an earlier reference" in { -// val builder = new VariantContextSetBuilder().setSequenceDictionary(dict) -// builder.addVariant(refIdx=1, start=100, variantAlleles=List("A","T")) -// builder.addVariant(refIdx=2, start=200, variantAlleles=List("A","T")) - val builder = VcfBuilder(samples = Seq("S1")) builder.add(chrom="chr1", pos=100, alleles=Seq("A", "T")) builder.add(chrom="chr2", pos=200, alleles=Seq("A", "T")) @@ -152,8 +140,6 @@ class VariantMaskTest extends UnitSpec { } it should "throw an exception if invalid reference sequences or positions are requested" in { -// val builder = new VariantContextSetBuilder().setSequenceDictionary(dict) -// builder.addVariant(refIdx=1, start=100, variantAlleles=List("A","T")) val builder = VcfBuilder(header=vcfDefaultHeader) builder.add(chrom="chr1", pos=100, alleles=Seq("A", "T")) val testIterator = new VCFFileReader(builder.toTempFile(), false).iterator().asScala.iterator From 2a92e69bae57fa29dd46deed08230874bf8d3e4e Mon Sep 17 00:00:00 2001 From: Kari Stromhaug Date: Wed, 30 Mar 2022 13:03:59 -0600 Subject: [PATCH 06/17] A few more changes to VariantMaskTest --- .../fulcrumgenomics/vcf/VariantMaskTest.scala | 43 ++++++++----------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/src/test/scala/com/fulcrumgenomics/vcf/VariantMaskTest.scala b/src/test/scala/com/fulcrumgenomics/vcf/VariantMaskTest.scala index ebce2d2df..2034e790f 100644 --- a/src/test/scala/com/fulcrumgenomics/vcf/VariantMaskTest.scala +++ b/src/test/scala/com/fulcrumgenomics/vcf/VariantMaskTest.scala @@ -55,11 +55,7 @@ class VariantMaskTest extends UnitSpec { val builder = VcfBuilder(samples=Seq("S1")) builder.add(chrom="chr1", pos=100, alleles=Seq("A", "T")) builder.add(chrom="chr1", pos=102, alleles=Seq("A", "T")) - val testVcf = new VCFFileReader(builder.toTempFile(), false) - val testIterator = testVcf.iterator().asScala.toSeq - testVcf.close() - val mask = VariantMask(testIterator.iterator, dict) - + val mask = VariantMask(builder.iterator, dict) mask.isVariant(1, 99) shouldBe false mask.isVariant(1, 100) shouldBe true mask.isVariant(1, 101) shouldBe false @@ -70,8 +66,7 @@ class VariantMaskTest extends UnitSpec { it should "mask all deleted bases for deletions, plus the upstream base" in { val builder = VcfBuilder(samples = Seq("S1")) builder.add(chrom="chr1", pos=100, alleles=Seq("AA", "A")) - val testIterator = new VCFFileReader(builder.toTempFile(), false).iterator().asScala.iterator - val mask = VariantMask(testIterator, dict) + val mask = VariantMask(builder.iterator, dict) mask.isVariant(1, 99) shouldBe false mask.isVariant(1, 100) shouldBe true @@ -82,9 +77,8 @@ class VariantMaskTest extends UnitSpec { it should "mask just the upstream base for insertions" in { val builder = VcfBuilder(samples = Seq("S1")) builder.add(chrom="chr1", pos=100, alleles=Seq("A", "AA")) - val testIterator = new VCFFileReader(builder.toTempFile(), false).iterator().asScala.iterator - val mask = VariantMask(testIterator, dict) + val mask = VariantMask(builder.iterator, dict) mask.isVariant(1, 99) shouldBe false mask.isVariant(1, 100) shouldBe true @@ -95,9 +89,8 @@ class VariantMaskTest extends UnitSpec { it should "allow querying be sequence name as well as ref index" in { val builder = VcfBuilder(samples = Seq("S1")) builder.add(chrom="chr1", pos=100, alleles=Seq("A", "T")) - val testIterator = new VCFFileReader(builder.toTempFile(), false).iterator().asScala.iterator - val mask = VariantMask(testIterator, dict) + val mask = VariantMask(builder.iterator, dict) mask.isVariant("chr0", 100) shouldBe false mask.isVariant("chr1", 100) shouldBe true @@ -113,26 +106,25 @@ class VariantMaskTest extends UnitSpec { val mask = VariantMask(builder.toTempFile()) mask.isVariant(1, 100) shouldBe true } - -// it should "throw an exception if a VCF doesn't have a sequence dictionary in it" in { -// // TODO figure out how to test a vcf that doesn't have a header with VcfBuilder -//// val builder = new VariantContextSetBuilder().setSequenceDictionary(dict) -//// builder.addVariant(refIdx=1, start=100, variantAlleles=List("A","C"), genotypeAlleles=List("A", "C")) -//// builder.setSequenceDictionary(SequenceDictionary()) -// val builder = VcfBuilder(VcfHeader(contigs=IndexedSeq(), infos=Seq(), formats=Seq(), filters=Seq(), others=Seq(), samples=IndexedSeq("S1"))) -// builder.add(chrom="chr1", pos=100, alleles=Seq("A", "C")) // -// val vcf = builder.toTempFile() -// an[Exception] shouldBe thrownBy { VariantMask(vcf) } -// } +//// it should "throw an exception if a VCF doesn't have a sequence dictionary in it" in { +//// // TODO figure out how to test a vcf that doesn't have a header with VcfBuilder +////// val builder = new VariantContextSetBuilder().setSequenceDictionary(dict) +////// builder.addVariant(refIdx=1, start=100, variantAlleles=List("A","C"), genotypeAlleles=List("A", "C")) +////// builder.setSequenceDictionary(SequenceDictionary()) +//// val builder = VcfBuilder(VcfHeader(contigs=IndexedSeq(), infos=Seq(), formats=Seq(), filters=Seq(), others=Seq(), samples=IndexedSeq("S1"))) +//// builder.add(chrom="chr1", pos=100, alleles=Seq("A", "C")) +//// +//// val vcf = builder.toTempFile() +//// an[Exception] shouldBe thrownBy { VariantMask(vcf) } +//// } it should "throw an exception if requested to traverse backwards to an earlier reference" in { val builder = VcfBuilder(samples = Seq("S1")) builder.add(chrom="chr1", pos=100, alleles=Seq("A", "T")) builder.add(chrom="chr2", pos=200, alleles=Seq("A", "T")) - val testIterator = new VCFFileReader(builder.toTempFile(), false).iterator().asScala.iterator - val mask = VariantMask(testIterator, dict) + val mask = VariantMask(builder.iterator, dict) mask.isVariant(1, 100) shouldBe true mask.isVariant(2, 200) shouldBe true @@ -142,8 +134,7 @@ class VariantMaskTest extends UnitSpec { it should "throw an exception if invalid reference sequences or positions are requested" in { val builder = VcfBuilder(header=vcfDefaultHeader) builder.add(chrom="chr1", pos=100, alleles=Seq("A", "T")) - val testIterator = new VCFFileReader(builder.toTempFile(), false).iterator().asScala.iterator - val mask = VariantMask(testIterator, dict) + val mask = VariantMask(builder.iterator, dict) an[Exception] shouldBe thrownBy { mask.isVariant(-1, 100) } // invalid index (low) an[Exception] shouldBe thrownBy { mask.isVariant("chrNope", 100) } // invalid ref name From 9f1b6e7e843d8858b26154800a68bf8a7533b9b5 Mon Sep 17 00:00:00 2001 From: Kari Stromhaug Date: Wed, 30 Mar 2022 14:21:20 -0600 Subject: [PATCH 07/17] Fix VariantMask tests --- .../fulcrumgenomics/vcf/VariantMaskTest.scala | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/test/scala/com/fulcrumgenomics/vcf/VariantMaskTest.scala b/src/test/scala/com/fulcrumgenomics/vcf/VariantMaskTest.scala index 2034e790f..33bf5ed15 100644 --- a/src/test/scala/com/fulcrumgenomics/vcf/VariantMaskTest.scala +++ b/src/test/scala/com/fulcrumgenomics/vcf/VariantMaskTest.scala @@ -55,7 +55,8 @@ class VariantMaskTest extends UnitSpec { val builder = VcfBuilder(samples=Seq("S1")) builder.add(chrom="chr1", pos=100, alleles=Seq("A", "T")) builder.add(chrom="chr1", pos=102, alleles=Seq("A", "T")) - val mask = VariantMask(builder.iterator, dict) + val iterator = new VCFFileReader(builder.toTempFile()) + val mask = VariantMask(iterator.iterator().asScala, dict=dict) mask.isVariant(1, 99) shouldBe false mask.isVariant(1, 100) shouldBe true mask.isVariant(1, 101) shouldBe false @@ -66,7 +67,8 @@ class VariantMaskTest extends UnitSpec { it should "mask all deleted bases for deletions, plus the upstream base" in { val builder = VcfBuilder(samples = Seq("S1")) builder.add(chrom="chr1", pos=100, alleles=Seq("AA", "A")) - val mask = VariantMask(builder.iterator, dict) + val iterator = new VCFFileReader(builder.toTempFile()) + val mask = VariantMask(iterator.iterator().asScala, dict=dict) mask.isVariant(1, 99) shouldBe false mask.isVariant(1, 100) shouldBe true @@ -77,8 +79,8 @@ class VariantMaskTest extends UnitSpec { it should "mask just the upstream base for insertions" in { val builder = VcfBuilder(samples = Seq("S1")) builder.add(chrom="chr1", pos=100, alleles=Seq("A", "AA")) - - val mask = VariantMask(builder.iterator, dict) + val iterator = new VCFFileReader(builder.toTempFile()) + val mask = VariantMask(iterator.iterator().asScala, dict=dict) mask.isVariant(1, 99) shouldBe false mask.isVariant(1, 100) shouldBe true @@ -89,8 +91,8 @@ class VariantMaskTest extends UnitSpec { it should "allow querying be sequence name as well as ref index" in { val builder = VcfBuilder(samples = Seq("S1")) builder.add(chrom="chr1", pos=100, alleles=Seq("A", "T")) - - val mask = VariantMask(builder.iterator, dict) + val iterator = new VCFFileReader(builder.toTempFile()) + val mask = VariantMask(iterator.iterator().asScala, dict=dict) mask.isVariant("chr0", 100) shouldBe false mask.isVariant("chr1", 100) shouldBe true @@ -102,7 +104,6 @@ class VariantMaskTest extends UnitSpec { it should "construct a mask ok from a VCF path" in { val builder = VcfBuilder(vcfDefaultHeader.copy(samples = IndexedSeq("S1"))) builder.add(chrom="chr1", pos=100, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0/1"))) - val mask = VariantMask(builder.toTempFile()) mask.isVariant(1, 100) shouldBe true } @@ -123,8 +124,8 @@ class VariantMaskTest extends UnitSpec { val builder = VcfBuilder(samples = Seq("S1")) builder.add(chrom="chr1", pos=100, alleles=Seq("A", "T")) builder.add(chrom="chr2", pos=200, alleles=Seq("A", "T")) - - val mask = VariantMask(builder.iterator, dict) + val iterator = new VCFFileReader(builder.toTempFile()) + val mask = VariantMask(iterator.iterator().asScala, dict=dict) mask.isVariant(1, 100) shouldBe true mask.isVariant(2, 200) shouldBe true @@ -134,7 +135,8 @@ class VariantMaskTest extends UnitSpec { it should "throw an exception if invalid reference sequences or positions are requested" in { val builder = VcfBuilder(header=vcfDefaultHeader) builder.add(chrom="chr1", pos=100, alleles=Seq("A", "T")) - val mask = VariantMask(builder.iterator, dict) + val iterator = new VCFFileReader(builder.toTempFile()) + val mask = VariantMask(iterator.iterator().asScala, dict=dict) an[Exception] shouldBe thrownBy { mask.isVariant(-1, 100) } // invalid index (low) an[Exception] shouldBe thrownBy { mask.isVariant("chrNope", 100) } // invalid ref name From f0a08f15d3be022f081a5115ffd4997a582133cc Mon Sep 17 00:00:00 2001 From: Kari Stromhaug Date: Wed, 30 Mar 2022 14:38:32 -0600 Subject: [PATCH 08/17] Most tests for AssessPhasingTest changed --- .../vcf/AssessPhasingTest.scala | 333 ++++++++++-------- 1 file changed, 193 insertions(+), 140 deletions(-) diff --git a/src/test/scala/com/fulcrumgenomics/vcf/AssessPhasingTest.scala b/src/test/scala/com/fulcrumgenomics/vcf/AssessPhasingTest.scala index 025f7ae82..a2046976b 100644 --- a/src/test/scala/com/fulcrumgenomics/vcf/AssessPhasingTest.scala +++ b/src/test/scala/com/fulcrumgenomics/vcf/AssessPhasingTest.scala @@ -25,14 +25,15 @@ package com.fulcrumgenomics.vcf import java.nio.file.{Files, Paths} - import com.fulcrumgenomics.FgBioDef._ import com.fulcrumgenomics.commons.io.PathUtil import com.fulcrumgenomics.commons.util.NumericCounter import com.fulcrumgenomics.fasta.Converters.FromSAMSequenceDictionary -import com.fulcrumgenomics.testing.{ErrorLogLevel, UnitSpec, VariantContextSetBuilder} +import com.fulcrumgenomics.testing.VcfBuilder.Gt +import com.fulcrumgenomics.testing.{ErrorLogLevel, UnitSpec, VariantContextSetBuilder, VcfBuilder} import com.fulcrumgenomics.util.Metric import com.fulcrumgenomics.vcf.PhaseCigar.IlluminaSwitchErrors +import com.fulcrumgenomics.vcf.api.Genotype import htsjdk.samtools.SAMFileHeader import htsjdk.samtools.util.{Interval, IntervalList} import htsjdk.variant.variantcontext.writer.{Options, VariantContextWriterBuilder} @@ -298,19 +299,21 @@ class PhaseBlockTest extends ErrorLogLevel { import AssessPhasingTest.withPhasingSetId "PhaseBlock.toOverlapDetector" should "create an empty detector if no variants are given" in { - val builder = new VariantContextSetBuilder() - PhaseBlock.buildOverlapDetector(iterator=builder.iterator, dict=builder.dict).getAll.isEmpty shouldBe true + val builder = new VCFFileReader(VcfBuilder(samples=Seq("s1")).toTempFile()) + PhaseBlock.buildOverlapDetector(iterator=builder.iterator, dict=builder.getFileHeader.getSequenceDictionary.fromSam).getAll.isEmpty shouldBe true } it should "create an empty detector when variants do not have the phase set tag" in { - val builder = new VariantContextSetBuilder().addVariant(start=1, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) - PhaseBlock.buildOverlapDetector(iterator=builder.iterator, dict=builder.dict).getAll.isEmpty shouldBe true + val builder = VcfBuilder(samples=Seq("s1")).add(pos=1, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0/1"))) + val contextBuilder = new VCFFileReader(builder.toTempFile()) + PhaseBlock.buildOverlapDetector(iterator=contextBuilder.iterator, dict=contextBuilder.getFileHeader.getSequenceDictionary.fromSam).getAll.isEmpty shouldBe true } it should "create a detector for a single variant" in { - val builder = new VariantContextSetBuilder().addVariant(start=1, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) + val vcfBuilder = VcfBuilder(samples=Seq("s1")).add(pos=1, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0/1"))) + val builder = new VCFFileReader(vcfBuilder.toTempFile()) val iterator = builder.iterator.map { ctx => withPhasingSetId(ctx, 1) } - val detector = PhaseBlock.buildOverlapDetector(iterator=iterator, dict=builder.dict) + val detector = PhaseBlock.buildOverlapDetector(iterator=iterator, dict=builder.getFileHeader.getSequenceDictionary.fromSam) detector.getAll should have size 1 val interval = detector.getAll.toSeq.head interval.getStart shouldBe 1 @@ -318,13 +321,13 @@ class PhaseBlockTest extends ErrorLogLevel { } it should "create a detector from multiple variants within one block" in { - val builder = new VariantContextSetBuilder() - builder.addVariant(start=1, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) - builder.addVariant(start=2, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) - builder.addVariant(start=3, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) - - val iterator = builder.iterator.map { ctx => withPhasingSetId(ctx, 1) } - val detector = PhaseBlock.buildOverlapDetector(iterator=iterator, dict=builder.dict) + val vcfBuilder = VcfBuilder(samples=Seq("s1")) + vcfBuilder.add(pos=1, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + vcfBuilder.add(pos=2, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + vcfBuilder.add(pos=3, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + val builder = new VCFFileReader(vcfBuilder.toTempFile()) + val iterator = builder.iterator().map { ctx => withPhasingSetId(ctx, 1) } + val detector = PhaseBlock.buildOverlapDetector(iterator=iterator, dict=builder.getFileHeader.getSequenceDictionary.fromSam) detector.getAll should have size 1 val interval = detector.getAll.toSeq.head interval.getStart shouldBe 1 @@ -332,19 +335,20 @@ class PhaseBlockTest extends ErrorLogLevel { } it should "create a detector from multiple variants across multiple blocks" in { - val builderBlockOne = new VariantContextSetBuilder() - builderBlockOne.addVariant(start=1, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) - builderBlockOne.addVariant(start=2, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) - builderBlockOne.addVariant(start=3, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) - - val builderBlockTwo = new VariantContextSetBuilder() - builderBlockTwo.addVariant(start=4, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) - builderBlockTwo.addVariant(start=5, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) - builderBlockTwo.addVariant(start=6, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) - - val iterator = builderBlockOne.iterator.map { ctx => withPhasingSetId(ctx, 1) } ++ builderBlockTwo.iterator.map { ctx => withPhasingSetId(ctx, 4) } - - val detector = PhaseBlock.buildOverlapDetector(iterator=iterator, dict=builderBlockOne.dict) + val vcfBuilderBlockOne = VcfBuilder(samples=Seq("s1")) + vcfBuilderBlockOne.add(pos=1, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + vcfBuilderBlockOne.add(pos=2, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + vcfBuilderBlockOne.add(pos=3, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + val vcfBuilderBlockTwo = VcfBuilder(samples=Seq("s1")) + vcfBuilderBlockTwo.add(pos=4, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + vcfBuilderBlockTwo.add(pos=5, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + vcfBuilderBlockTwo.add(pos=6, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + + val builderBlockOne = new VCFFileReader(vcfBuilderBlockOne.toTempFile()) + val builderBlockTwo = new VCFFileReader(vcfBuilderBlockTwo.toTempFile()) + val iterator = builderBlockOne.iterator().map { ctx => withPhasingSetId(ctx, 1) } ++ builderBlockTwo.iterator.map { ctx => withPhasingSetId(ctx, 4) } + + val detector = PhaseBlock.buildOverlapDetector(iterator=iterator, dict=builderBlockOne.getFileHeader.getSequenceDictionary.fromSam) detector.getAll should have size 2 val intervals = detector.getAll.toSeq.sortBy(_.getStart) val head = intervals.head @@ -356,22 +360,22 @@ class PhaseBlockTest extends ErrorLogLevel { } it should "keep the larger block when one block encloses/contains another" in { - val builderBlockOne = new VariantContextSetBuilder() - builderBlockOne.addVariant(start=1, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) - builderBlockOne.addVariant(start=2, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) - builderBlockOne.addVariant(start=3, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) + val vcfBuilderBlockOne = VcfBuilder(samples=Seq("s1")) + vcfBuilderBlockOne.add(pos=1, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + vcfBuilderBlockOne.add(pos=2, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + vcfBuilderBlockOne.add(pos=3, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + val builderBlockOne = new VCFFileReader(vcfBuilderBlockOne.toTempFile()) // second fully contained in the first { - val builderBlockTwo = new VariantContextSetBuilder() - builderBlockTwo.addVariant(start = 2, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C"), phased = true) - - val contexts = (builderBlockOne.iterator.map { ctx => withPhasingSetId(ctx, 1) } ++ builderBlockTwo.iterator.map { ctx => withPhasingSetId(ctx, 2) }).toSeq + val vcfBuilderBlockTwo = VcfBuilder(samples=Seq("S1")).add(pos=2, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + val builderBlockTwo = new VCFFileReader(vcfBuilderBlockTwo.toTempFile()) + val contexts = (builderBlockOne.iterator().map { ctx => withPhasingSetId(ctx, 1) } ++ builderBlockTwo.iterator().map { ctx => withPhasingSetId(ctx, 2) }).toSeq // Check that if we do not want to modify the blocks we get an exception - an[Exception] should be thrownBy PhaseBlock.buildOverlapDetector(iterator = contexts.iterator, dict = builderBlockOne.dict, modifyBlocks = false) + an[Exception] should be thrownBy PhaseBlock.buildOverlapDetector(iterator = contexts.iterator, dict = builderBlockOne.getFileHeader.getSequenceDictionary.fromSam, modifyBlocks = false) - val detector = PhaseBlock.buildOverlapDetector(iterator = contexts.iterator, dict = builderBlockOne.dict) + val detector = PhaseBlock.buildOverlapDetector(iterator = contexts.iterator, dict = builderBlockOne.getFileHeader.getSequenceDictionary.fromSam) detector.getAll should have size 1 val intervals = detector.getAll.toSeq.sortBy(_.getStart) val head = intervals.head @@ -381,15 +385,15 @@ class PhaseBlockTest extends ErrorLogLevel { // first fully contained in the second { - val builderBlockTwo = new VariantContextSetBuilder() - builderBlockTwo.addVariant(start = 2, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C"), phased = true) + val vcfBuilderBlockTwo = VcfBuilder(samples=Seq("s1")).add(pos=2, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + val builderBlockTwo = new VCFFileReader(vcfBuilderBlockTwo.toTempFile()) - val contexts = (builderBlockTwo.iterator.map { ctx => withPhasingSetId(ctx, 2) } ++ builderBlockOne.iterator.map { ctx => withPhasingSetId(ctx, 1) }).toSeq + val contexts = (builderBlockTwo.iterator().map { ctx => withPhasingSetId(ctx, 2) } ++ builderBlockOne.iterator().map { ctx => withPhasingSetId(ctx, 1) }).toSeq // Check that if we do not want to modify the blocks we get an exception - an[Exception] should be thrownBy PhaseBlock.buildOverlapDetector(iterator = contexts.iterator, dict = builderBlockOne.dict, modifyBlocks = false) + an[Exception] should be thrownBy PhaseBlock.buildOverlapDetector(iterator = contexts.iterator, dict = builderBlockOne.getFileHeader.getSequenceDictionary.fromSam, modifyBlocks = false) - val detector = PhaseBlock.buildOverlapDetector(iterator = contexts.iterator, dict = builderBlockOne.dict) + val detector = PhaseBlock.buildOverlapDetector(iterator = contexts.iterator, dict = builderBlockOne.getFileHeader.getSequenceDictionary.fromSam) detector.getAll should have size 1 val intervals = detector.getAll.toSeq.sortBy(_.getStart) val head = intervals.head @@ -401,7 +405,7 @@ class PhaseBlockTest extends ErrorLogLevel { { val contexts = builderBlockOne.iterator.map { ctx => withPhasingSetId(ctx, 1) }.toSeq - val detector = PhaseBlock.buildOverlapDetector(iterator = (contexts ++ contexts).iterator, dict = builderBlockOne.dict) + val detector = PhaseBlock.buildOverlapDetector(iterator = (contexts ++ contexts).iterator, dict = builderBlockOne.getFileHeader.getSequenceDictionary.fromSam) detector.getAll should have size 1 val intervals = detector.getAll.toSeq.sortBy(_.getStart) val head = intervals.head @@ -411,24 +415,26 @@ class PhaseBlockTest extends ErrorLogLevel { } it should "truncate the smaller block when too blocks overlap" in { - val builderBlockOne = new VariantContextSetBuilder() - builderBlockOne.addVariant(start=2, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) - builderBlockOne.addVariant(start=3, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) - builderBlockOne.addVariant(start=4, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) + val vcfBuilderBlockOne = VcfBuilder(samples=Seq("s1")) + vcfBuilderBlockOne.add(pos=2, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + vcfBuilderBlockOne.add(pos=3, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + vcfBuilderBlockOne.add(pos=4, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + val builderBlockOne = new VCFFileReader(vcfBuilderBlockOne.toTempFile()) // first block is extended { - val builderBlockTwo = new VariantContextSetBuilder() - builderBlockTwo.addVariant(start = 3, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C"), phased = true) - builderBlockTwo.addVariant(start = 4, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C"), phased = true) - builderBlockTwo.addVariant(start = 5, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C"), phased = true) + val vcfBuilderBlockTwo = VcfBuilder(samples=Seq("s1")) + vcfBuilderBlockTwo.add(pos=3, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + vcfBuilderBlockTwo.add(pos=4, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + vcfBuilderBlockTwo.add(pos=5, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + val builderBlockTwo = new VCFFileReader(vcfBuilderBlockTwo.toTempFile()) - val contexts = (builderBlockOne.iterator.map { ctx => withPhasingSetId(ctx, 2) } ++ builderBlockTwo.iterator.map { ctx => withPhasingSetId(ctx, 3) }).toSeq + val contexts = (builderBlockOne.iterator().map { ctx => withPhasingSetId(ctx, 2) } ++ builderBlockTwo.iterator().map { ctx => withPhasingSetId(ctx, 3) }).toSeq // Check that if we do not want to modify the blocks we get an exception - an[Exception] should be thrownBy PhaseBlock.buildOverlapDetector(iterator = contexts.iterator, dict = builderBlockOne.dict, modifyBlocks = false) + an[Exception] should be thrownBy PhaseBlock.buildOverlapDetector(iterator = contexts.iterator, dict = builderBlockOne.getFileHeader.getSequenceDictionary.fromSam, modifyBlocks = false) - val detector = PhaseBlock.buildOverlapDetector(iterator = contexts.iterator, dict = builderBlockOne.dict) + val detector = PhaseBlock.buildOverlapDetector(iterator = contexts.iterator, dict = builderBlockOne.getFileHeader.getSequenceDictionary.fromSam) detector.getAll should have size 2 val intervals = detector.getAll.toSeq.sortBy(_.getStart) val head = intervals.head @@ -441,18 +447,19 @@ class PhaseBlockTest extends ErrorLogLevel { // second block is extended { - val builderBlockTwo = new VariantContextSetBuilder() - builderBlockTwo.addVariant(start = 3, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C"), phased = true) - builderBlockTwo.addVariant(start = 4, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C"), phased = true) - builderBlockTwo.addVariant(start = 5, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C"), phased = true) - builderBlockTwo.addVariant(start = 6, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C"), phased = true) + val vcfBuilderBlockTwo = VcfBuilder(samples=Seq("s1")) + vcfBuilderBlockTwo.add(pos=3, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + vcfBuilderBlockTwo.add(pos=4, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + vcfBuilderBlockTwo.add(pos=5, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + vcfBuilderBlockTwo.add(pos=6, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + val builderBlockTwo = new VCFFileReader(vcfBuilderBlockTwo.toTempFile()) - val contexts = (builderBlockOne.iterator.map { ctx => withPhasingSetId(ctx, 2) } ++ builderBlockTwo.iterator.map { ctx => withPhasingSetId(ctx, 3) }).toSeq + val contexts = (builderBlockOne.iterator().map { ctx => withPhasingSetId(ctx, 2) } ++ builderBlockTwo.iterator().map { ctx => withPhasingSetId(ctx, 3) }).toSeq // Check that if we do not want to modify the blocks we get an exception - an[Exception] should be thrownBy PhaseBlock.buildOverlapDetector(iterator = contexts.iterator, dict = builderBlockOne.dict, modifyBlocks = false) + an[Exception] should be thrownBy PhaseBlock.buildOverlapDetector(iterator = contexts.iterator, dict = builderBlockOne.getFileHeader.getSequenceDictionary.fromSam, modifyBlocks = false) - val detector = PhaseBlock.buildOverlapDetector(iterator = contexts.iterator, dict = builderBlockOne.dict) + val detector = PhaseBlock.buildOverlapDetector(iterator = contexts.iterator, dict = builderBlockOne.getFileHeader.getSequenceDictionary.fromSam) detector.getAll should have size 2 val intervals = detector.getAll.toSeq.sortBy(_.getStart) val head = intervals.head @@ -465,27 +472,30 @@ class PhaseBlockTest extends ErrorLogLevel { } it should "resolve three overlapping blocks, such that when the middle one is truncated and now starts after the third, it is resolved" in { - val builderBlockOne = new VariantContextSetBuilder() - builderBlockOne.addVariant(start=2, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) - builderBlockOne.addVariant(start=10, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) - val oneIter = builderBlockOne.iterator.map { ctx => withPhasingSetId(ctx, 2) } - - val builderBlockTwo = new VariantContextSetBuilder() - builderBlockTwo.addVariant(start = 8, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C"), phased = true) - builderBlockTwo.addVariant(start = 14, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C"), phased = true) - val twoIter = builderBlockTwo.iterator.map { ctx => withPhasingSetId(ctx, 3) } - - val builderBlockThree = new VariantContextSetBuilder() - builderBlockThree.addVariant(start = 9, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C"), phased = true) - builderBlockThree.addVariant(start = 13, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C"), phased = true) - val threeIter = builderBlockThree.iterator.map { ctx => withPhasingSetId(ctx, 4) } + val vcfBuilderBlockOne = VcfBuilder(samples=Seq("s1")) + vcfBuilderBlockOne.add(pos=2, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + vcfBuilderBlockOne.add(pos=10, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + val builderBlockOne = new VCFFileReader(vcfBuilderBlockOne.toTempFile()) + val oneIter = builderBlockOne.iterator().map { ctx => withPhasingSetId(ctx, 2) } + + val vcfBuilderBlockTwo = VcfBuilder(samples=Seq("s1")) + vcfBuilderBlockTwo.add(pos=8, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + vcfBuilderBlockTwo.add(pos=14, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + val builderBlockTwo = new VCFFileReader(vcfBuilderBlockTwo.toTempFile()) + val twoIter = builderBlockTwo.iterator().map { ctx => withPhasingSetId(ctx, 3) } + + val vcfBuilderBlockThree = VcfBuilder(samples=Seq("s1")) + vcfBuilderBlockThree.add(pos=9, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + vcfBuilderBlockThree.add(pos=13, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + val builderBlockThree = new VCFFileReader(vcfBuilderBlockThree.toTempFile()) + val threeIter = builderBlockThree.iterator().map { ctx => withPhasingSetId(ctx, 4) } val contexts = (oneIter ++ twoIter ++ threeIter).toSeq // Check that if we do not want to modify the blocks we get an exception - an[Exception] should be thrownBy PhaseBlock.buildOverlapDetector(iterator = contexts.iterator, dict = builderBlockOne.dict, modifyBlocks = false) + an[Exception] should be thrownBy PhaseBlock.buildOverlapDetector(iterator = contexts.iterator, dict = builderBlockOne.getFileHeader.getSequenceDictionary.fromSam, modifyBlocks = false) - val detector = PhaseBlock.buildOverlapDetector(iterator = contexts.iterator, dict = builderBlockOne.dict) + val detector = PhaseBlock.buildOverlapDetector(iterator = contexts.iterator, dict = builderBlockOne.getFileHeader.getSequenceDictionary.fromSam) detector.getAll should have size 3 val intervals = detector.getAll.toSeq.sortBy(_.getStart) @@ -505,27 +515,30 @@ class PhaseBlockTest extends ErrorLogLevel { } it should "resolve three overlapping blocks, such that when the middle one is truncated and now is enclosed in the third, we get two blocks" in { - val builderBlockOne = new VariantContextSetBuilder() - builderBlockOne.addVariant(start=2, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) - builderBlockOne.addVariant(start=10, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) - val oneIter = builderBlockOne.iterator.map { ctx => withPhasingSetId(ctx, 2) } - - val builderBlockTwo = new VariantContextSetBuilder() - builderBlockTwo.addVariant(start = 8, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C"), phased = true) - builderBlockTwo.addVariant(start = 13, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C"), phased = true) - val twoIter = builderBlockTwo.iterator.map { ctx => withPhasingSetId(ctx, 3) } - - val builderBlockThree = new VariantContextSetBuilder() - builderBlockThree.addVariant(start = 9, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C"), phased = true) - builderBlockThree.addVariant(start = 13, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C"), phased = true) - val threeIter = builderBlockThree.iterator.map { ctx => withPhasingSetId(ctx, 4) } + val vcfBuilderBlockOne = VcfBuilder(samples=Seq("s1")) + vcfBuilderBlockOne.add(pos=2, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + vcfBuilderBlockOne.add(pos=10, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + val builderBlockOne = new VCFFileReader(vcfBuilderBlockOne.toTempFile()) + val oneIter = builderBlockOne.iterator().map { ctx => withPhasingSetId(ctx, 2) } + + val vcfBuilderBlockTwo = VcfBuilder(samples=Seq("s1")) + vcfBuilderBlockTwo.add(pos=8, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + vcfBuilderBlockTwo.add(pos=13, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + val builderBlockTwo = new VCFFileReader(vcfBuilderBlockTwo.toTempFile()) + val twoIter = builderBlockTwo.iterator().map { ctx => withPhasingSetId(ctx, 3) } + + val vcfBuilderBlockThree = VcfBuilder(samples=Seq("s1")) + vcfBuilderBlockThree.add(pos=9, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + vcfBuilderBlockThree.add(pos=13, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + val builderBlockThree = new VCFFileReader(vcfBuilderBlockThree.toTempFile()) + val threeIter = builderBlockThree.iterator().map { ctx => withPhasingSetId(ctx, 4) } val contexts = (oneIter ++ twoIter ++ threeIter).toSeq // Check that if we do not want to modify the blocks we get an exception - an[Exception] should be thrownBy PhaseBlock.buildOverlapDetector(iterator = contexts.iterator, dict = builderBlockOne.dict, modifyBlocks = false) + an[Exception] should be thrownBy PhaseBlock.buildOverlapDetector(iterator = contexts.iterator, dict = builderBlockOne.getFileHeader.getSequenceDictionary.fromSam, modifyBlocks = false) - val detector = PhaseBlock.buildOverlapDetector(iterator = contexts.iterator, dict = builderBlockOne.dict) + val detector = PhaseBlock.buildOverlapDetector(iterator = contexts.iterator, dict = builderBlockOne.getFileHeader.getSequenceDictionary.fromSam) detector.getAll should have size 2 val intervals = detector.getAll.toSeq.sortBy(_.getStart) @@ -574,12 +587,14 @@ class PhaseCigarTest extends ErrorLogLevel { } "Cigar.toCigar" should "create an empty cigar if no variants have a phasing set" in { - val builder = new VariantContextSetBuilder().addVariant(start=1, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) - val ctx = builder.head +// val builder = new VariantContextSetBuilder().addVariant(start=1, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) + val vcfBuilder = VcfBuilder(samples=Seq("s1")).add(pos=1, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + val builder = new VCFFileReader(vcfBuilder.toTempFile()) + val ctx = builder.iterator().next() // truth variant only { - val (cigar, metric) = toCigar(truth = Seq(ctx), call = Seq.empty, header = builder.header, skipMismatchingAlleles = true) + val (cigar, metric) = toCigar(truth = Seq(ctx), call = Seq.empty, header = builder.getFileHeader, skipMismatchingAlleles = true) cigar should contain theSameElementsInOrderAs Seq(BothEnd, BothEnd) metric.num_called shouldBe 0 metric.num_truth shouldBe 1 @@ -588,7 +603,7 @@ class PhaseCigarTest extends ErrorLogLevel { // call variant only { - val (cigar, metric) = toCigar(truth = Seq.empty, call = Seq(ctx), header = builder.header, skipMismatchingAlleles = true) + val (cigar, metric) = toCigar(truth = Seq.empty, call = Seq(ctx), header = builder.getFileHeader, skipMismatchingAlleles = true) cigar should contain theSameElementsInOrderAs Seq(BothEnd, BothEnd) metric.num_called shouldBe 1 metric.num_phased shouldBe 0 @@ -597,12 +612,14 @@ class PhaseCigarTest extends ErrorLogLevel { } it should "create a cigar from either a single truth or call variant" in { - val builder = new VariantContextSetBuilder().addVariant(start=1, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) - val ctx = withPhasingSetId(builder.head, 1) +// val builder = new VariantContextSetBuilder().addVariant(start=1, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) + val vcfBuilder = VcfBuilder(samples=Seq("s1")).add(pos=1, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + val builder = new VCFFileReader(vcfBuilder.toTempFile()) + val ctx = withPhasingSetId(builder.iterator().next(), 1) // truth variant only { - val (cigar, metric) = toCigar(truth = Seq(ctx), call = Seq.empty, header = builder.header, skipMismatchingAlleles = true) + val (cigar, metric) = toCigar(truth = Seq(ctx), call = Seq.empty, header = builder.getFileHeader, skipMismatchingAlleles = true) cigar should contain theSameElementsInOrderAs Seq(BothEnd, TruthOnly, BothEnd) metric.num_called shouldBe 0 metric.num_truth shouldBe 1 @@ -611,7 +628,7 @@ class PhaseCigarTest extends ErrorLogLevel { // call variant only { - val (cigar, metric) = toCigar(truth = Seq.empty, call = Seq(ctx), header = builder.header, skipMismatchingAlleles = true) + val (cigar, metric) = toCigar(truth = Seq.empty, call = Seq(ctx), header = builder.getFileHeader, skipMismatchingAlleles = true) cigar should contain theSameElementsInOrderAs Seq(BothEnd, CallOnly, BothEnd) metric.num_called shouldBe 1 metric.num_phased shouldBe 1 @@ -620,13 +637,14 @@ class PhaseCigarTest extends ErrorLogLevel { } it should "create a cigar when both truth and call variants are present but only of the two are phased" in { - val builder = new VariantContextSetBuilder().addVariant(start=1, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) - val ctxPhased = withPhasingSetId(builder.head, 1) - val ctxNoPhase = builder.head + val vcfBuilder = VcfBuilder(samples=Seq("s1")).add(pos=1, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + val builder = new VCFFileReader(vcfBuilder.toTempFile()) + val ctxPhased = withPhasingSetId(builder.iterator().next(), 1) + val ctxNoPhase = builder.iterator().next() // truth variant phased only { - val (cigar, metric) = toCigar(truth = Seq(ctxPhased), call = Seq(ctxNoPhase), header = builder.header, skipMismatchingAlleles = true) + val (cigar, metric) = toCigar(truth = Seq(ctxPhased), call = Seq(ctxNoPhase), header = builder.getFileHeader, skipMismatchingAlleles = true) cigar should contain theSameElementsInOrderAs Seq(BothEnd, TruthOnly, BothEnd) metric.num_called shouldBe 1 metric.num_phased shouldBe 0 @@ -636,7 +654,7 @@ class PhaseCigarTest extends ErrorLogLevel { // call variant phased only { - val (cigar, metric) = toCigar(truth = Seq(ctxNoPhase), call = Seq(ctxPhased), header = builder.header, skipMismatchingAlleles = true) + val (cigar, metric) = toCigar(truth = Seq(ctxNoPhase), call = Seq(ctxPhased), header = builder.getFileHeader, skipMismatchingAlleles = true) cigar should contain theSameElementsInOrderAs Seq(BothEnd, CallOnly, BothEnd) metric.num_called shouldBe 1 metric.num_phased shouldBe 1 @@ -646,12 +664,14 @@ class PhaseCigarTest extends ErrorLogLevel { } it should "create a cigar when both truth and call variants are present and both are phased" in { - val builder = new VariantContextSetBuilder().addVariant(start=1, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) - val ctx = withPhasingSetId(builder.head, 1) +// val builder = new VariantContextSetBuilder().addVariant(start=1, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) + val vcfBuilder = VcfBuilder(samples=Seq("s1")).add(pos=1, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + val builder = new VCFFileReader(vcfBuilder.toTempFile()) + val ctx = withPhasingSetId(builder.iterator().next(), 1) // both variants are phased { - val (cigar, metric) = toCigar(truth = Seq(ctx), call = Seq(ctx), header = builder.header, skipMismatchingAlleles = true) + val (cigar, metric) = toCigar(truth = Seq(ctx), call = Seq(ctx), header = builder.getFileHeader, skipMismatchingAlleles = true) cigar should contain theSameElementsInOrderAs Seq(BothEnd, Match, BothEnd) metric.num_called shouldBe 1 metric.num_phased shouldBe 1 @@ -662,7 +682,13 @@ class PhaseCigarTest extends ErrorLogLevel { it should "create a cigar when both truth and call variants are present and both are phased but mismatch alleles" in { val builderTruth = new VariantContextSetBuilder().addVariant(start=1, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) +// val vcfBuilderTruth = VcfBuilder(samples=Seq("s1")).add(pos=1, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) +// val builderTruth = new VCFFileReader(vcfBuilderTruth.toTempFile()) + val builderCall = new VariantContextSetBuilder().addVariant(start=1, variantAlleles=List("A", "C"), genotypeAlleles=List("C", "A"), phased=true) +// val vcfBuilderCall = VcfBuilder(samples=Seq("s1")).add(pos=1, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) +// val builderCall = new VCFFileReader(vcfBuilderCall.toTempFile()) + val truth = withPhasingSetId(builderTruth.head, 1) val call = withPhasingSetId(builderCall.head, 1) @@ -688,6 +714,10 @@ class PhaseCigarTest extends ErrorLogLevel { { builderTruth.addVariant(start=2, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) builderCall.addVariant(start=2, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) +// vcfBuilderTruth.add(pos=2, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) +// vcfBuilderCall.add(pos=2, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) +// val builderTruth = new VCFFileReader(vcfBuilderTruth.toTempFile()) +// val builderCall = new VCFFileReader(vcfBuilderCall.toTempFile()) val truthTwo = withPhasingSetId(builderTruth.last, 1) val callTwo = withPhasingSetId(builderCall.last, 1) @@ -701,14 +731,18 @@ class PhaseCigarTest extends ErrorLogLevel { } it should "skip sites where alleles mismatch if specified" in { - val builderTruth = new VariantContextSetBuilder().addVariant(start=1, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) - val builderCall = new VariantContextSetBuilder().addVariant(start=1, variantAlleles=List("A", "G"), genotypeAlleles=List("A", "G"), phased=true) - val truth = withPhasingSetId(builderTruth.head, 1) - val call = withPhasingSetId(builderCall.head, 1) + val vcfBuilderTruth = VcfBuilder(samples=Seq("s1")).add(pos=1, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + val builderTruth = new VCFFileReader(vcfBuilderTruth.toTempFile()) + + val vcfBuilderCall = VcfBuilder(samples=Seq("s1")).add(pos=1, alleles=Seq("A", "G"), gts=Seq(Gt(sample="s1", gt="0|1"))) + val builderCall = new VCFFileReader(vcfBuilderCall.toTempFile()) + + val truth = withPhasingSetId(builderTruth.iterator().next(), 1) + val call = withPhasingSetId(builderCall.iterator().next(), 1) // skip the sites that have mismatching alleles { - val (cigar, metric) = toCigar(truth = Seq(truth), call = Seq(call), header = builderTruth.header, skipMismatchingAlleles = true) + val (cigar, metric) = toCigar(truth = Seq(truth), call = Seq(call), header = builderTruth.getFileHeader, skipMismatchingAlleles = true) cigar should contain theSameElementsInOrderAs Seq(BothEnd, BothEnd) metric.num_called shouldBe 0 metric.num_truth shouldBe 0 @@ -716,7 +750,7 @@ class PhaseCigarTest extends ErrorLogLevel { // include the sites that have mismatching alleles { - val (cigar, metric) = toCigar(truth = Seq(truth), call = Seq(call), header = builderTruth.header, skipMismatchingAlleles = false) + val (cigar, metric) = toCigar(truth = Seq(truth), call = Seq(call), header = builderTruth.getFileHeader, skipMismatchingAlleles = false) cigar should contain theSameElementsInOrderAs Seq(BothEnd, Mismatch, BothEnd) metric.num_called shouldBe 1 metric.num_phased shouldBe 1 @@ -936,11 +970,12 @@ class PhaseCigarTest extends ErrorLogLevel { } "Cigar.contextsToBlockEndOperator/contextsToMatchingOperator" should "should return the cigar operator for two variant contexts" in { - val builder = new VariantContextSetBuilder() - builder.addVariant(start = 1, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C"), phased = true) - builder.addVariant(start = 2, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C"), phased = true) - val ctxStart = withPhasingSetId(builder.head, 1) - val ctxNoStart = withPhasingSetId(builder.last, 1) + val vcfBuilder = VcfBuilder(samples=Seq("s1")) + vcfBuilder.add(pos=1, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + vcfBuilder.add(pos=2, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + val builder = new VCFFileReader(vcfBuilder.toTempFile()) + val ctxStart = withPhasingSetId(builder.iterator().toSeq.head, 1) + val ctxNoStart = withPhasingSetId(builder.iterator().toSeq.last, 1) // No truth, call is start of a phase block PhaseCigar.contextsToBlockEndOperator(truth=None, call=Some(ctxStart)) shouldBe Some(CallEnd) @@ -976,9 +1011,10 @@ class PhaseCigarTest extends ErrorLogLevel { } it should "should return the cigar operator for two variant contexts that disagree on phase" in { - val builder = new VariantContextSetBuilder() - builder.addVariant(start = 1, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C"), phased = true) - builder.addVariant(start = 2, variantAlleles = List("A", "C"), genotypeAlleles = List("C", "A"), phased = true) + val vcfBuilder = VcfBuilder(samples=Seq("s1")) + vcfBuilder.add(pos=1, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + vcfBuilder.add(pos=2, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="1|0"))) + val builder = new VCFFileReader(vcfBuilder.toTempFile()).iterator().toSeq val ctx = withPhasingSetId(builder.head, 1) val ctxMismatch = withPhasingSetId(builder.last, 2) @@ -992,25 +1028,34 @@ class PhaseCigarTest extends ErrorLogLevel { } "Cigar.cigarForVariantContexts" should "return a match if two variant contexts share the same alleles in the same order" in { - val builder = new VariantContextSetBuilder() - builder.addVariant(start = 1, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C")) - builder.addVariant(start = 2, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C")) + val vcfBuilder = VcfBuilder(samples=Seq("s1")) + vcfBuilder.add(pos=1, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0/1"))) + vcfBuilder.add(pos=2, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0/1"))) + val builder = new VCFFileReader(vcfBuilder.toTempFile()).iterator().toSeq PhaseCigar.cigarTypeForVariantContexts(builder.head, builder.last) shouldBe Match PhaseCigar.cigarTypeForVariantContexts(builder.last, builder.head) shouldBe Match } it should "return a mismatch if two variant contexts share the same alleles but in the different order" in { - val builder = new VariantContextSetBuilder() - builder.addVariant(start = 1, variantAlleles = List("A", "C"), genotypeAlleles = List("C", "A")) - builder.addVariant(start = 2, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C")) +// val builder = new VariantContextSetBuilder() +// builder.addVariant(start = 1, variantAlleles = List("A", "C"), genotypeAlleles = List("C", "A")) +// builder.addVariant(start = 2, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C")) + val vcfBuilder = VcfBuilder(samples=Seq("s1")) + vcfBuilder.add(pos=1, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0/1"))) + vcfBuilder.add(pos=2, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="1/0"))) + val builder = new VCFFileReader(vcfBuilder.toTempFile()).iterator().toSeq PhaseCigar.cigarTypeForVariantContexts(builder.head, builder.last) shouldBe Mismatch PhaseCigar.cigarTypeForVariantContexts(builder.last, builder.head) shouldBe Mismatch } it should "return a mismatch if two variant contexts share the different alleles" in { - val builder = new VariantContextSetBuilder() - builder.addVariant(start = 1, variantAlleles = List("A", "G"), genotypeAlleles = List("A", "G")) - builder.addVariant(start = 2, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C")) +// val builder = new VariantContextSetBuilder() +// builder.addVariant(start = 1, variantAlleles = List("A", "G"), genotypeAlleles = List("A", "G")) +// builder.addVariant(start = 2, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C")) + val vcfBuilder = VcfBuilder(samples=Seq("s1")) + vcfBuilder.add(pos=1, alleles=Seq("A", "G"), gts=Seq(Gt(sample="s1", gt="0/1"))) + vcfBuilder.add(pos=2, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0/1"))) + val builder = new VCFFileReader(vcfBuilder.toTempFile()).iterator().toSeq PhaseCigar.cigarTypeForVariantContexts(builder.head, builder.last) shouldBe Mismatch PhaseCigar.cigarTypeForVariantContexts(builder.last, builder.head) shouldBe Mismatch } @@ -1018,17 +1063,25 @@ class PhaseCigarTest extends ErrorLogLevel { it should "throw an exception if the variant contexts do not have exactly two alleles" in { // One variant alleles, one genotype allele { - val builder = new VariantContextSetBuilder() - builder.addVariant(start = 1, variantAlleles = List("A"), genotypeAlleles = List("A")) - builder.addVariant(start = 2, variantAlleles = List("A"), genotypeAlleles = List("A")) +// val builder = new VariantContextSetBuilder() +// builder.addVariant(start = 1, variantAlleles = List("A"), genotypeAlleles = List("A")) +// builder.addVariant(start = 2, variantAlleles = List("A"), genotypeAlleles = List("A")) + val vcfBuilder = VcfBuilder(samples=Seq("s1")) + vcfBuilder.add(pos=1, alleles=Seq("A"), gts=Seq(Gt(sample="s1", gt="0"))) + vcfBuilder.add(pos=2, alleles=Seq("A"), gts=Seq(Gt(sample="s1", gt="0"))) + val builder = new VCFFileReader(vcfBuilder.toTempFile()).iterator().toSeq an[Exception] should be thrownBy PhaseCigar.cigarTypeForVariantContexts(builder.head, builder.last) } // Two variant alleles, one genotype allele { - val builder = new VariantContextSetBuilder() - builder.addVariant(start = 1, variantAlleles = List("A", "C"), genotypeAlleles = List("C")) - builder.addVariant(start = 2, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C")) +// val builder = new VariantContextSetBuilder() +// builder.addVariant(start = 1, variantAlleles = List("A", "C"), genotypeAlleles = List("C")) +// builder.addVariant(start = 2, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C")) + val vcfBuilder = VcfBuilder(samples=Seq("s1")) + vcfBuilder.add(pos=1, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="1"))) + vcfBuilder.add(pos=2, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0/1"))) + val builder = new VCFFileReader(vcfBuilder.toTempFile()).iterator().toSeq an[Exception] should be thrownBy PhaseCigar.cigarTypeForVariantContexts(builder.head, builder.last) an[Exception] should be thrownBy PhaseCigar.cigarTypeForVariantContexts(builder.last, builder.head) } From ff1578a1b2d6dc56a10e037a467e254dbe259b83 Mon Sep 17 00:00:00 2001 From: Kari Stromhaug Date: Wed, 30 Mar 2022 14:41:15 -0600 Subject: [PATCH 09/17] Start on next round of changes on AssessPhasingTest --- .../vcf/AssessPhasingTest.scala | 93 +++++++++++++------ 1 file changed, 64 insertions(+), 29 deletions(-) diff --git a/src/test/scala/com/fulcrumgenomics/vcf/AssessPhasingTest.scala b/src/test/scala/com/fulcrumgenomics/vcf/AssessPhasingTest.scala index a2046976b..3c3b009d8 100644 --- a/src/test/scala/com/fulcrumgenomics/vcf/AssessPhasingTest.scala +++ b/src/test/scala/com/fulcrumgenomics/vcf/AssessPhasingTest.scala @@ -33,7 +33,7 @@ import com.fulcrumgenomics.testing.VcfBuilder.Gt import com.fulcrumgenomics.testing.{ErrorLogLevel, UnitSpec, VariantContextSetBuilder, VcfBuilder} import com.fulcrumgenomics.util.Metric import com.fulcrumgenomics.vcf.PhaseCigar.IlluminaSwitchErrors -import com.fulcrumgenomics.vcf.api.Genotype +import com.fulcrumgenomics.vcf.api.{Genotype, Variant} import htsjdk.samtools.SAMFileHeader import htsjdk.samtools.util.{Interval, IntervalList} import htsjdk.variant.variantcontext.writer.{Options, VariantContextWriterBuilder} @@ -41,7 +41,7 @@ import htsjdk.variant.variantcontext.{GenotypeBuilder, VariantContext, VariantCo import htsjdk.variant.vcf.{VCFFileReader, VCFHeader} object AssessPhasingTest { - def withPhasingSetId(ctx: VariantContext, id: Int): VariantContext = { + def withPhasingSetId(ctx: Variant, id: Int): Variant = { val gBuilder = new GenotypeBuilder(ctx.getGenotype(0)) gBuilder.attribute("PS", id) val ctxBuilder = new VariantContextBuilder(ctx) @@ -49,49 +49,84 @@ object AssessPhasingTest { ctxBuilder.make() } - private val builderTruth = new VariantContextSetBuilder() - private val builderCall = new VariantContextSetBuilder() + private val builderTruth = VcfBuilder(samples=Seq("S1")) + private val builderCall = VcfBuilder(samples=Seq("S1")) + // private val builderTruth = new VariantContextSetBuilder() + // private val builderCall = new VariantContextSetBuilder() // init builderTruth and builderCall { // BLOCK #1: positions 1 - 4 (call 1-4, truth 1-4) - builderTruth.addVariant(start=1, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // Match - builderCall.addVariant( start=1, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // - with previous - builderTruth.addVariant(start=2, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // TruthOnly - builderCall.addVariant( start=3, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // CallOnly - builderTruth.addVariant(start=4, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // Match - builderCall.addVariant( start=4, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // - with previous + builderTruth.add(pos=1, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // Match + builderCall.add( pos=1, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // - with previous + builderTruth.add(pos=2, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // TruthOnly + builderCall.add( pos=3, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // CallOnly + builderTruth.add(pos=4, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // Match + builderCall.add( pos=4, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // - with previous + + + // builderTruth.addVariant(start=1, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // Match + // builderCall.addVariant( start=1, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // - with previous + // builderTruth.addVariant(start=2, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // TruthOnly + // builderCall.addVariant( start=3, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // CallOnly + // builderTruth.addVariant(start=4, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // Match + // builderCall.addVariant( start=4, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // - with previous // BLOCK #2: positions 11-16 (call 11-15, truth 11-16) - builderTruth.addVariant(start=11, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // Match - builderCall.addVariant( start=11, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // - with previous - builderTruth.addVariant(start=12, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // Mismatch **** POINT ERROR **** - builderCall.addVariant( start=12, variantAlleles=List("A", "C"), genotypeAlleles=List("C", "A"), phased=true) // - with previous - builderTruth.addVariant(start=13, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // NA - builderCall.addVariant( start=13, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // CallOnly - builderTruth.addVariant(start=14, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // TruthOnly - builderCall.addVariant( start=14, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // NA - builderTruth.addVariant(start=15, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // Match - builderCall.addVariant( start=15, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // - with Previous - builderTruth.addVariant(start=16, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // TruthOnly + builderTruth.add(pos=11, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // Match + builderCall.add( pos=11, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // - with previous + builderTruth.add(pos=12, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // Mismatch **** POINT ERROR **** + builderCall.add( pos=12, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="1|0"))) // - with previous + builderTruth.add(pos=13, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // NA + builderCall.add( pos=13, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // CallOnly + builderCall.add( pos=14, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // TruthOnly + builderCall.add( pos=14, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // NA + builderCall.add( pos=15, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // Match + builderCall.add( pos=15, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // - with previous + builderCall.add( pos=16, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // TruthOnly + + + // builderTruth.addVariant(start=11, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // Match + // builderCall.addVariant( start=11, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // - with previous + // builderTruth.addVariant(start=12, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // Mismatch **** POINT ERROR **** + // builderCall.addVariant( start=12, variantAlleles=List("A", "C"), genotypeAlleles=List("C", "A"), phased=true) // - with previous + // builderTruth.addVariant(start=13, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // NA + // builderCall.addVariant( start=13, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // CallOnly + // builderTruth.addVariant(start=14, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // TruthOnly + // builderCall.addVariant( start=14, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // NA + // builderTruth.addVariant(start=15, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // Match + // builderCall.addVariant( start=15, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // - with Previous + // builderTruth.addVariant(start=16, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // TruthOnly // BLOCK #3: position 21 (call 21-21) - builderCall.addVariant( start=21, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // CallOnly + builderCall.add( pos=21, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // CallOnly + + // builderCall.addVariant( start=21, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // CallOnly // BLOCK #4: position 30-42 Range(30, 37).foreach { start => - builderTruth.addVariant(start=start, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // Match - builderCall.addVariant( start=start, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // - with previous + builderTruth.add( pos=start, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // Match + builderCall.add( pos=start, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // - with previous } + + // Range(30, 37).foreach { start => + // builderTruth.addVariant(start=start, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // Match + // builderCall.addVariant( start=start, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // - with previous + // } + Range(37, 43).foreach { start => - builderTruth.addVariant(start=start, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // Mismatch - builderCall.addVariant( start=start, variantAlleles=List("A", "C"), genotypeAlleles=List("C", "A"), phased=true) // - with previous + builderTruth.add( pos=start, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // Match + builderCall.add( pos=start, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="1|0"))) // - with previous } + // Range(37, 43).foreach { start => + // builderTruth.addVariant(start=start, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // Mismatch + // builderCall.addVariant( start=start, variantAlleles=List("A", "C"), genotypeAlleles=List("C", "A"), phased=true) // - with previous + // } // NB: call has blocks lengths 4, 5, 1, and 13; truth has block lengths 4, 6, and 13. } - private def addPhaseSetId(ctx: VariantContext): VariantContext = { + private def addPhaseSetId(ctx: Variant): Variant = { if (ctx.getStart <= 10) withPhasingSetId(ctx, 1) else if (ctx.getStart <= 20) withPhasingSetId(ctx, 11) else if (ctx.getStart <= 29) withPhasingSetId(ctx, 21) @@ -99,7 +134,7 @@ object AssessPhasingTest { else unreachable("Not defined") } - lazy val TruthVariants: Seq[VariantContext] = { + lazy val TruthVariants: Seq[Variant] = { // Keep the truth variant position 13 without a phase set builderTruth.map { ctx => if (ctx.getStart == 13) ctx @@ -107,7 +142,7 @@ object AssessPhasingTest { }.toSeq } - lazy val CallVariants: Seq[VariantContext] = { + lazy val CallVariants: Seq[Variant] = { // Keep the call variant position 14 without a phase set builderCall.map { ctx => if (ctx.getStart == 14) ctx From df69e84a59ac0efc4b32dc8dc2e5560650dea823 Mon Sep 17 00:00:00 2001 From: Kari Stromhaug Date: Wed, 30 Mar 2022 15:01:05 -0600 Subject: [PATCH 10/17] Just one test remaining in AssessPhasingTest --- .../vcf/AssessPhasingTest.scala | 75 ++++--------------- 1 file changed, 15 insertions(+), 60 deletions(-) diff --git a/src/test/scala/com/fulcrumgenomics/vcf/AssessPhasingTest.scala b/src/test/scala/com/fulcrumgenomics/vcf/AssessPhasingTest.scala index 3c3b009d8..916aa01e9 100644 --- a/src/test/scala/com/fulcrumgenomics/vcf/AssessPhasingTest.scala +++ b/src/test/scala/com/fulcrumgenomics/vcf/AssessPhasingTest.scala @@ -24,7 +24,6 @@ package com.fulcrumgenomics.vcf -import java.nio.file.{Files, Paths} import com.fulcrumgenomics.FgBioDef._ import com.fulcrumgenomics.commons.io.PathUtil import com.fulcrumgenomics.commons.util.NumericCounter @@ -33,15 +32,16 @@ import com.fulcrumgenomics.testing.VcfBuilder.Gt import com.fulcrumgenomics.testing.{ErrorLogLevel, UnitSpec, VariantContextSetBuilder, VcfBuilder} import com.fulcrumgenomics.util.Metric import com.fulcrumgenomics.vcf.PhaseCigar.IlluminaSwitchErrors -import com.fulcrumgenomics.vcf.api.{Genotype, Variant} import htsjdk.samtools.SAMFileHeader import htsjdk.samtools.util.{Interval, IntervalList} import htsjdk.variant.variantcontext.writer.{Options, VariantContextWriterBuilder} import htsjdk.variant.variantcontext.{GenotypeBuilder, VariantContext, VariantContextBuilder} import htsjdk.variant.vcf.{VCFFileReader, VCFHeader} +import java.nio.file.{Files, Paths} + object AssessPhasingTest { - def withPhasingSetId(ctx: Variant, id: Int): Variant = { + def withPhasingSetId(ctx: VariantContext, id: Int): VariantContext = { val gBuilder = new GenotypeBuilder(ctx.getGenotype(0)) gBuilder.attribute("PS", id) val ctxBuilder = new VariantContextBuilder(ctx) @@ -51,10 +51,7 @@ object AssessPhasingTest { private val builderTruth = VcfBuilder(samples=Seq("S1")) private val builderCall = VcfBuilder(samples=Seq("S1")) - // private val builderTruth = new VariantContextSetBuilder() - // private val builderCall = new VariantContextSetBuilder() - // init builderTruth and builderCall { // BLOCK #1: positions 1 - 4 (call 1-4, truth 1-4) builderTruth.add(pos=1, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // Match @@ -64,14 +61,6 @@ object AssessPhasingTest { builderTruth.add(pos=4, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // Match builderCall.add( pos=4, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // - with previous - - // builderTruth.addVariant(start=1, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // Match - // builderCall.addVariant( start=1, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // - with previous - // builderTruth.addVariant(start=2, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // TruthOnly - // builderCall.addVariant( start=3, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // CallOnly - // builderTruth.addVariant(start=4, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // Match - // builderCall.addVariant( start=4, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // - with previous - // BLOCK #2: positions 11-16 (call 11-15, truth 11-16) builderTruth.add(pos=11, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // Match builderCall.add( pos=11, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // - with previous @@ -79,54 +68,32 @@ object AssessPhasingTest { builderCall.add( pos=12, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="1|0"))) // - with previous builderTruth.add(pos=13, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // NA builderCall.add( pos=13, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // CallOnly - builderCall.add( pos=14, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // TruthOnly + builderTruth.add( pos=14, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // TruthOnly builderCall.add( pos=14, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // NA - builderCall.add( pos=15, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // Match + builderTruth.add( pos=15, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // Match builderCall.add( pos=15, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // - with previous - builderCall.add( pos=16, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // TruthOnly - - - // builderTruth.addVariant(start=11, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // Match - // builderCall.addVariant( start=11, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // - with previous - // builderTruth.addVariant(start=12, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // Mismatch **** POINT ERROR **** - // builderCall.addVariant( start=12, variantAlleles=List("A", "C"), genotypeAlleles=List("C", "A"), phased=true) // - with previous - // builderTruth.addVariant(start=13, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // NA - // builderCall.addVariant( start=13, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // CallOnly - // builderTruth.addVariant(start=14, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // TruthOnly - // builderCall.addVariant( start=14, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // NA - // builderTruth.addVariant(start=15, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // Match - // builderCall.addVariant( start=15, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // - with Previous - // builderTruth.addVariant(start=16, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // TruthOnly + builderTruth.add( pos=16, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // TruthOnly // BLOCK #3: position 21 (call 21-21) builderCall.add( pos=21, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // CallOnly - // builderCall.addVariant( start=21, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // CallOnly - // BLOCK #4: position 30-42 Range(30, 37).foreach { start => builderTruth.add( pos=start, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // Match builderCall.add( pos=start, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // - with previous } - // Range(30, 37).foreach { start => - // builderTruth.addVariant(start=start, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // Match - // builderCall.addVariant( start=start, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // - with previous - // } - Range(37, 43).foreach { start => builderTruth.add( pos=start, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="0|1"))) // Match builderCall.add( pos=start, alleles=Seq("A", "C"), gts=Seq(Gt(sample="S1", gt="1|0"))) // - with previous } - - // Range(37, 43).foreach { start => - // builderTruth.addVariant(start=start, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // Mismatch - // builderCall.addVariant( start=start, variantAlleles=List("A", "C"), genotypeAlleles=List("C", "A"), phased=true) // - with previous - // } // NB: call has blocks lengths 4, 5, 1, and 13; truth has block lengths 4, 6, and 13. } - private def addPhaseSetId(ctx: Variant): Variant = { + val readBuilderCall: VCFFileReader = new VCFFileReader(builderCall.toTempFile()) + val readBuilderTruth: VCFFileReader = new VCFFileReader(builderTruth.toTempFile()) + + private def addPhaseSetId(ctx: VariantContext): VariantContext = { if (ctx.getStart <= 10) withPhasingSetId(ctx, 1) else if (ctx.getStart <= 20) withPhasingSetId(ctx, 11) else if (ctx.getStart <= 29) withPhasingSetId(ctx, 21) @@ -134,23 +101,23 @@ object AssessPhasingTest { else unreachable("Not defined") } - lazy val TruthVariants: Seq[Variant] = { + lazy val TruthVariants: Seq[VariantContext] = { // Keep the truth variant position 13 without a phase set - builderTruth.map { ctx => + readBuilderTruth.iterator().map { ctx => if (ctx.getStart == 13) ctx else addPhaseSetId(ctx) }.toSeq } - lazy val CallVariants: Seq[Variant] = { + lazy val CallVariants: Seq[VariantContext] = { // Keep the call variant position 14 without a phase set - builderCall.map { ctx => + readBuilderCall.iterator().map { ctx => if (ctx.getStart == 14) ctx else addPhaseSetId(ctx) }.toSeq } - val Header = builderCall.header + val Header = readBuilderCall.getFileHeader } /** @@ -1072,9 +1039,6 @@ class PhaseCigarTest extends ErrorLogLevel { } it should "return a mismatch if two variant contexts share the same alleles but in the different order" in { -// val builder = new VariantContextSetBuilder() -// builder.addVariant(start = 1, variantAlleles = List("A", "C"), genotypeAlleles = List("C", "A")) -// builder.addVariant(start = 2, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C")) val vcfBuilder = VcfBuilder(samples=Seq("s1")) vcfBuilder.add(pos=1, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0/1"))) vcfBuilder.add(pos=2, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="1/0"))) @@ -1084,9 +1048,6 @@ class PhaseCigarTest extends ErrorLogLevel { } it should "return a mismatch if two variant contexts share the different alleles" in { -// val builder = new VariantContextSetBuilder() -// builder.addVariant(start = 1, variantAlleles = List("A", "G"), genotypeAlleles = List("A", "G")) -// builder.addVariant(start = 2, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C")) val vcfBuilder = VcfBuilder(samples=Seq("s1")) vcfBuilder.add(pos=1, alleles=Seq("A", "G"), gts=Seq(Gt(sample="s1", gt="0/1"))) vcfBuilder.add(pos=2, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0/1"))) @@ -1098,9 +1059,6 @@ class PhaseCigarTest extends ErrorLogLevel { it should "throw an exception if the variant contexts do not have exactly two alleles" in { // One variant alleles, one genotype allele { -// val builder = new VariantContextSetBuilder() -// builder.addVariant(start = 1, variantAlleles = List("A"), genotypeAlleles = List("A")) -// builder.addVariant(start = 2, variantAlleles = List("A"), genotypeAlleles = List("A")) val vcfBuilder = VcfBuilder(samples=Seq("s1")) vcfBuilder.add(pos=1, alleles=Seq("A"), gts=Seq(Gt(sample="s1", gt="0"))) vcfBuilder.add(pos=2, alleles=Seq("A"), gts=Seq(Gt(sample="s1", gt="0"))) @@ -1110,9 +1068,6 @@ class PhaseCigarTest extends ErrorLogLevel { // Two variant alleles, one genotype allele { -// val builder = new VariantContextSetBuilder() -// builder.addVariant(start = 1, variantAlleles = List("A", "C"), genotypeAlleles = List("C")) -// builder.addVariant(start = 2, variantAlleles = List("A", "C"), genotypeAlleles = List("A", "C")) val vcfBuilder = VcfBuilder(samples=Seq("s1")) vcfBuilder.add(pos=1, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="1"))) vcfBuilder.add(pos=2, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0/1"))) From 319887f4745719bdde4792af4ab66c061c6473a4 Mon Sep 17 00:00:00 2001 From: Kari Stromhaug Date: Wed, 30 Mar 2022 15:52:51 -0600 Subject: [PATCH 11/17] Remove some comments in AssessPhasingTest and remove VariantContestSetBuilder in MakeMixtureVcfTest --- .../vcf/AssessPhasingTest.scala | 4 +- .../vcf/MakeMixtureVcfTest.scala | 137 +++++++++--------- 2 files changed, 68 insertions(+), 73 deletions(-) diff --git a/src/test/scala/com/fulcrumgenomics/vcf/AssessPhasingTest.scala b/src/test/scala/com/fulcrumgenomics/vcf/AssessPhasingTest.scala index 916aa01e9..bc9a214bc 100644 --- a/src/test/scala/com/fulcrumgenomics/vcf/AssessPhasingTest.scala +++ b/src/test/scala/com/fulcrumgenomics/vcf/AssessPhasingTest.scala @@ -589,7 +589,6 @@ class PhaseCigarTest extends ErrorLogLevel { } "Cigar.toCigar" should "create an empty cigar if no variants have a phasing set" in { -// val builder = new VariantContextSetBuilder().addVariant(start=1, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) val vcfBuilder = VcfBuilder(samples=Seq("s1")).add(pos=1, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) val builder = new VCFFileReader(vcfBuilder.toTempFile()) val ctx = builder.iterator().next() @@ -614,7 +613,6 @@ class PhaseCigarTest extends ErrorLogLevel { } it should "create a cigar from either a single truth or call variant" in { -// val builder = new VariantContextSetBuilder().addVariant(start=1, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) val vcfBuilder = VcfBuilder(samples=Seq("s1")).add(pos=1, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) val builder = new VCFFileReader(vcfBuilder.toTempFile()) val ctx = withPhasingSetId(builder.iterator().next(), 1) @@ -666,7 +664,6 @@ class PhaseCigarTest extends ErrorLogLevel { } it should "create a cigar when both truth and call variants are present and both are phased" in { -// val builder = new VariantContextSetBuilder().addVariant(start=1, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) val vcfBuilder = VcfBuilder(samples=Seq("s1")).add(pos=1, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) val builder = new VCFFileReader(vcfBuilder.toTempFile()) val ctx = withPhasingSetId(builder.iterator().next(), 1) @@ -683,6 +680,7 @@ class PhaseCigarTest extends ErrorLogLevel { } it should "create a cigar when both truth and call variants are present and both are phased but mismatch alleles" in { + // TODO fix this test val builderTruth = new VariantContextSetBuilder().addVariant(start=1, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) // val vcfBuilderTruth = VcfBuilder(samples=Seq("s1")).add(pos=1, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) // val builderTruth = new VCFFileReader(vcfBuilderTruth.toTempFile()) diff --git a/src/test/scala/com/fulcrumgenomics/vcf/MakeMixtureVcfTest.scala b/src/test/scala/com/fulcrumgenomics/vcf/MakeMixtureVcfTest.scala index d24a5dcad..19ae0e893 100644 --- a/src/test/scala/com/fulcrumgenomics/vcf/MakeMixtureVcfTest.scala +++ b/src/test/scala/com/fulcrumgenomics/vcf/MakeMixtureVcfTest.scala @@ -25,89 +25,91 @@ package com.fulcrumgenomics.vcf import com.fulcrumgenomics.FgBioDef._ -import com.fulcrumgenomics.testing.{UnitSpec, VariantContextSetBuilder} +import com.fulcrumgenomics.testing.VcfBuilder.Gt +import com.fulcrumgenomics.testing.{SamBuilder, UnitSpec, VariantContextSetBuilder, VcfBuilder} import com.fulcrumgenomics.vcf.MakeMixtureVcf.Sample import htsjdk.variant.variantcontext.Allele -import htsjdk.variant.vcf.VCFFileReader +import htsjdk.variant.vcf.{VCFFileReader, VCFHeader, VCFHeaderLine} +import java.util +import java.util.Collections import scala.collection.mutable class MakeMixtureVcfTest extends UnitSpec { private val dummyVcf = makeTempFile("dummy.", ".vcf") + val header: VCFHeader = { + val jSamples = new util.ArrayList[String](util.Arrays.asList("s1", "s2", "s3", "s4")) + val h = new VCFHeader(Collections.emptySet[VCFHeaderLine](), jSamples) + h.setSequenceDictionary(new SamBuilder().header.getSequenceDictionary) + h + } + "MakeMixtureVcf.determineSamples" should "auto-populate samples from the header" in { val samples = Seq("s1", "s2", "s3", "s4") - val builder = new VariantContextSetBuilder(samples) + val mixer = new MakeMixtureVcf(input=dummyVcf, output=dummyVcf, outputSampleName="mix") - val ss = mixer.determineSamples(builder.header) + val ss = mixer.determineSamples(header) ss shouldBe samples.map(s => Sample(s, 0.25)) } it should "work when only a subset of samples are specified" in { - val samples = Seq("s1", "s2", "s3", "s4") - val builder = new VariantContextSetBuilder(samples) val mixer = new MakeMixtureVcf(input=dummyVcf, output=dummyVcf, samples=Seq("s2", "s3"), outputSampleName="mix") - val ss = mixer.determineSamples(builder.header) + val ss = mixer.determineSamples(header) ss shouldBe Seq(Sample("s2", 0.5), Sample("s3", 0.5)) } it should "work when specific sample proportions are specified" in { - val samples = Seq("s1", "s2", "s3", "s4") - val builder = new VariantContextSetBuilder(samples) val mixer = new MakeMixtureVcf(input=dummyVcf, output=dummyVcf, samples=Seq("s2@0.9", "s3@0.1"), outputSampleName="mix") - val ss = mixer.determineSamples(builder.header) + val ss = mixer.determineSamples(header) ss shouldBe Seq(Sample("s2", 0.9), Sample("s3", 0.1)) } it should "divide the remaining proportion equally between unannotated samples" in { - val samples = Seq("s1", "s2", "s3", "s4") - val builder = new VariantContextSetBuilder(samples) val mixer = new MakeMixtureVcf(input=dummyVcf, output=dummyVcf, samples=Seq("s1@0.5", "s2", "s3"), outputSampleName="mix") - val ss = mixer.determineSamples(builder.header) + val ss = mixer.determineSamples(header) ss shouldBe Seq(Sample("s1", 0.5), Sample("s2", 0.25), Sample("s3", 0.25)) } it should "throw an exception if a sample is specified that's not in the VCF" in { val samples = Seq("s1", "s2", "s3", "s4") - val builder = new VariantContextSetBuilder(samples) val mixer = new MakeMixtureVcf(input=dummyVcf, output=dummyVcf, samples=Seq("s1", "s2", "s7"), outputSampleName="mix") - an[Exception] should be thrownBy { mixer.determineSamples(builder.header) } + an[Exception] should be thrownBy { mixer.determineSamples(header) } } it should "throw an exception if the proportions add up to less than 1" in { val samples = Seq("s1", "s2", "s3", "s4") - val builder = new VariantContextSetBuilder(samples) val mixer = new MakeMixtureVcf(input=dummyVcf, output=dummyVcf, samples=Seq("s1@0.25", "s2@0.25"), outputSampleName="mix") - an[Exception] should be thrownBy { mixer.determineSamples(builder.header) } + an[Exception] should be thrownBy { mixer.determineSamples(header) } } it should "throw an exception if the proportions add up to more than 1" in { val samples = Seq("s1", "s2", "s3", "s4") - val builder = new VariantContextSetBuilder(samples) val mixer = new MakeMixtureVcf(input=dummyVcf, output=dummyVcf, samples=Seq("s1@0.75", "s2@0.75"), outputSampleName="mix") - an[Exception] should be thrownBy { mixer.determineSamples(builder.header) } + an[Exception] should be thrownBy { mixer.determineSamples(header) } } it should "throw an exception if a sample proportion is negative" in { - val samples = Seq("s1", "s2", "s3") - val builder = new VariantContextSetBuilder(samples) + val jSamples = new util.ArrayList[String](util.Arrays.asList("s1", "s2", "s3")) + val tmpHeader = new VCFHeader(Collections.emptySet[VCFHeaderLine](), jSamples) + tmpHeader.setSequenceDictionary(new SamBuilder().header.getSequenceDictionary) + val mixer = new MakeMixtureVcf(input=dummyVcf, output=dummyVcf, samples=Seq("s1@0.75", "s2@0.75", "s3@-0.5"), outputSampleName="mix") - an[Exception] should be thrownBy { mixer.determineSamples(builder.header) } + an[Exception] should be thrownBy { mixer.determineSamples(tmpHeader) } } it should "throw an exception if the same sample is specified more than once" in { val samples = Seq("s1", "s2", "s3", "s4") - val builder = new VariantContextSetBuilder(samples) val mixer = new MakeMixtureVcf(input=dummyVcf, output=dummyVcf, samples=Seq("s1@0.5", "s1@0.5"), outputSampleName="mix") - an[Exception] should be thrownBy { mixer.determineSamples(builder.header) } + an[Exception] should be thrownBy { mixer.determineSamples(header) } } "MakeMixtureVcf.updateAlleleFractionsForSample" should "extract simple fractions when no AF field is used" in { - val samples = Seq("s1", "s2") - val builder = new VariantContextSetBuilder(samples) - builder.addVariant(start=10, sampleName=Some("s1"), variantAlleles=List("A", "C"), genotypeAlleles=List("A")) - builder.addVariant(start=10, sampleName=Some("s2"), variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C")) - val ctx = builder.iterator.next() + val vcfBuilder = VcfBuilder(samples=Seq("s1", "s2")) + vcfBuilder.add(pos=10, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0"), Gt(sample="s2", gt="0/1"))) + val builder = new VCFFileReader(vcfBuilder.toTempFile()) + + val ctx = builder.iterator().next() val mixer = new MakeMixtureVcf(input=dummyVcf, output=dummyVcf, outputSampleName="mix") val fractions = mutable.Map[Allele,Double](ctx.getAlleles.map(a => a -> 0d).toSeq:_*) @@ -123,11 +125,10 @@ class MakeMixtureVcfTest extends UnitSpec { it should "extract a single-valued AF from a field" in { val samples = Seq("s1", "s2") - val builder = new VariantContextSetBuilder(samples) - MakeMixtureVcf.HeaderLines.foreach(builder.addMetaDataLine) - builder.addVariant(start=10, sampleName=Some("s1"), variantAlleles=List("A", "C"), genotypeAlleles=List("A")) - builder.addVariant(start=10, sampleName=Some("s2"), variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), genotypeAttributes=Map("AF" -> 0.4)) - val vcf = builder.toTempFile() + val vcfBuilder = VcfBuilder(samples=samples) + vcfBuilder.add(pos=10, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0"), Gt(sample="s2", gt="0/1", attrs=Map("AF" -> 0.4)))) + + val vcf = vcfBuilder.toTempFile() val in = new VCFFileReader(vcf.toFile, false) val ctx = in.iterator().next() val mixer = new MakeMixtureVcf(input=vcf, output=dummyVcf, outputSampleName="mix", alleleFractionField=Some("AF")) @@ -144,13 +145,10 @@ class MakeMixtureVcfTest extends UnitSpec { it should "extract a multi-valued AF from a field" in { val samples = Seq("s1", "s2") - val builder = new VariantContextSetBuilder(samples) - MakeMixtureVcf.HeaderLines.foreach(builder.addMetaDataLine) - builder.addVariant(start=10, sampleName=Some("s1"), variantAlleles=List("A","C","T"), genotypeAlleles=List("A")) - builder.addVariant(start=10, sampleName=Some("s2"), variantAlleles=List("A","C","T"), genotypeAlleles=List("A", "C", "T"), - genotypeAttributes=Map("AF" -> Array[Double](0.4, 0.1))) + val vcfBuilder = VcfBuilder(samples=samples) + vcfBuilder.add(pos=10, alleles=Seq("A", "C", "T"), gts=Seq(Gt(sample="s1", gt="0"), Gt(sample="s2", gt="0/1/2", attrs=Map("AF" -> IndexedSeq(0.4, 0.1))))) - val vcf = builder.toTempFile() + val vcf = vcfBuilder.toTempFile() val in = new VCFFileReader(vcf.toFile, false) val ctx = in.iterator().next() val mixer = new MakeMixtureVcf(input=vcf, output=dummyVcf, outputSampleName="mix", alleleFractionField=Some("AF")) @@ -169,12 +167,16 @@ class MakeMixtureVcfTest extends UnitSpec { it should "fail if a sample with non-hom-ref genotypes is missing it's AF attribute" in { val samples = Seq("s1", "s2") - val builder = new VariantContextSetBuilder(samples) - MakeMixtureVcf.HeaderLines.foreach(builder.addMetaDataLine) - builder.addVariant(start=10, sampleName=Some("s1"), variantAlleles=List("A","C","T"), genotypeAlleles=List("A")) - builder.addVariant(start=10, sampleName=Some("s2"), variantAlleles=List("A","C","T"), genotypeAlleles=List("A", "C")) +// val builder = new VariantContextSetBuilder(samples) +// MakeMixtureVcf.HeaderLines.foreach(builder.addMetaDataLine) +// builder.addVariant(start=10, sampleName=Some("s1"), variantAlleles=List("A","C","T"), genotypeAlleles=List("A")) +// builder.addVariant(start=10, sampleName=Some("s2"), variantAlleles=List("A","C","T"), genotypeAlleles=List("A", "C")) + + val vcfBuilder = VcfBuilder(samples=samples) + vcfBuilder.add(pos=10, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0"), Gt(sample="s2", gt="0/1"))) + val builder = new VCFFileReader(vcfBuilder.toTempFile()) - val ctx = builder.iterator.next() + val ctx = builder.iterator().next() val mixer = new MakeMixtureVcf(input=dummyVcf, output=dummyVcf, outputSampleName="mix", alleleFractionField=Some("AF")) val fractions = mutable.Map[Allele,Double](ctx.getAlleles.map(a => a -> 0d).toSeq:_*) @@ -184,31 +186,26 @@ class MakeMixtureVcfTest extends UnitSpec { Seq(true, false).foreach { noCallsAreHomRef => "MakeMixtureVcf" should s"run end to end and create a valid output VCF with no-calls-are-hom-ref=${noCallsAreHomRef}" in { val samples = Seq("s1", "s2", "s3") // s1=0.5, s2=0.25, s3=0.25 - val builder = new VariantContextSetBuilder(samples) - MakeMixtureVcf.HeaderLines.foreach(builder.addMetaDataLine) - - // First variant should come back as A/C with AF = 0.1 - builder.addVariant(start=10, sampleName=Some("s1"), variantAlleles=List("A","C"), genotypeAlleles=List("A")) - builder.addVariant(start=10, sampleName=Some("s2"), variantAlleles=List("A","C"), genotypeAlleles=List("A")) - builder.addVariant(start=10, sampleName=Some("s3"), variantAlleles=List("A","C"), genotypeAlleles=List("A", "C"), - genotypeAttributes=Map("AF" -> 0.4)) - - // Second variant should come back as C/C with AF = 1 - builder.addVariant(start=20, sampleName=Some("s1"), variantAlleles=List("A","C"), genotypeAlleles=List("C"), genotypeAttributes=Map("AF" -> 1.0)) - builder.addVariant(start=20, sampleName=Some("s2"), variantAlleles=List("A","C"), genotypeAlleles=List("C"), genotypeAttributes=Map("AF" -> 1.0)) - builder.addVariant(start=20, sampleName=Some("s3"), variantAlleles=List("A","C"), genotypeAlleles=List("C"), genotypeAttributes=Map("AF" -> 1.0)) - - // Third variant should come back as A/C/T with AFs = 0.0625, 0.0625 - builder.addVariant(start=30, sampleName=Some("s1"), variantAlleles=List("A","C","T"), genotypeAlleles=List("A")) - builder.addVariant(start=30, sampleName=Some("s2"), variantAlleles=List("A","C","T"), genotypeAlleles=List("A","T"), genotypeAttributes=Map("AF" -> 0.25)) - builder.addVariant(start=30, sampleName=Some("s3"), variantAlleles=List("A","C","T"), genotypeAlleles=List("A","C"), genotypeAttributes=Map("AF" -> 0.25)) - - // Fourth variant should come back as A/C/T with AFs = 0.0625, 0.0625 when noCallsAreHomRef, and a no-call otherwise - builder.addVariant(start=40, sampleName=Some("s1"), variantAlleles=List("A","C","T"), genotypeAlleles=List(".", ".")) - builder.addVariant(start=40, sampleName=Some("s2"), variantAlleles=List("A","C","T"), genotypeAlleles=List("A","T"), genotypeAttributes=Map("AF" -> 0.25)) - builder.addVariant(start=40, sampleName=Some("s3"), variantAlleles=List("A","C","T"), genotypeAlleles=List("A","C"), genotypeAttributes=Map("AF" -> 0.25)) - - val in = builder.toTempFile() + + val vcfBuilder = VcfBuilder(samples=samples) + vcfBuilder.add(pos=10, alleles=Seq("A", "C"), gts=Seq( + Gt(sample="s1", gt="0/0"), + Gt(sample="s2", gt="0/0"), + Gt(sample="s3", gt="0/1", attrs=Map("AF" -> 0.4)))) + vcfBuilder.add(pos=20, alleles=Seq("A", "C"), gts=Seq( + Gt(sample="s1", gt="1/1", attrs=Map("AF" -> 1.0)), + Gt(sample="s2", gt="1/1", attrs=Map("AF" -> 1.0)), + Gt(sample="s3", gt="1/1", attrs=Map("AF" -> 1.0)))) + vcfBuilder.add(pos=30, alleles=Seq("A", "C", "T"), gts=Seq( + Gt(sample="s1", gt="0/0"), + Gt(sample="s2", gt="0/2", attrs=Map("AF" -> 0.25)), + Gt(sample="s3", gt="0/1", attrs=Map("AF" -> 0.25)))) + vcfBuilder.add(pos=40, alleles=Seq("A", "C", "T"), gts=Seq( + Gt(sample="s1", gt="./."), + Gt(sample="s2", gt="0/2", attrs=Map("AF" -> 0.25)), + Gt(sample="s3", gt="0/1", attrs=Map("AF" -> 0.25)))) + + val in = vcfBuilder.toTempFile() val out = makeTempFile("mixture.", ".vcf") val mixer = new MakeMixtureVcf(input=in, output=out, samples=Seq("s1@0.5", "s2@0.25", "s3@0.25"), From dd80c2fcb57746d66978d3ac812d019db2ea40b7 Mon Sep 17 00:00:00 2001 From: Kari Stromhaug Date: Wed, 30 Mar 2022 16:02:26 -0600 Subject: [PATCH 12/17] Remove some comments, remove VariantContextSetBuilder from MakeTwoSampleMixtureVcfTest --- .../vcf/MakeMixtureVcfTest.scala | 7 +----- .../vcf/MakeTwoSampleMixtureVcfTest.scala | 10 ++++----- .../fulcrumgenomics/vcf/VariantMaskTest.scala | 22 +++++++++---------- 3 files changed, 17 insertions(+), 22 deletions(-) diff --git a/src/test/scala/com/fulcrumgenomics/vcf/MakeMixtureVcfTest.scala b/src/test/scala/com/fulcrumgenomics/vcf/MakeMixtureVcfTest.scala index 19ae0e893..007edbaae 100644 --- a/src/test/scala/com/fulcrumgenomics/vcf/MakeMixtureVcfTest.scala +++ b/src/test/scala/com/fulcrumgenomics/vcf/MakeMixtureVcfTest.scala @@ -26,7 +26,7 @@ package com.fulcrumgenomics.vcf import com.fulcrumgenomics.FgBioDef._ import com.fulcrumgenomics.testing.VcfBuilder.Gt -import com.fulcrumgenomics.testing.{SamBuilder, UnitSpec, VariantContextSetBuilder, VcfBuilder} +import com.fulcrumgenomics.testing.{SamBuilder, UnitSpec, VcfBuilder} import com.fulcrumgenomics.vcf.MakeMixtureVcf.Sample import htsjdk.variant.variantcontext.Allele import htsjdk.variant.vcf.{VCFFileReader, VCFHeader, VCFHeaderLine} @@ -167,11 +167,6 @@ class MakeMixtureVcfTest extends UnitSpec { it should "fail if a sample with non-hom-ref genotypes is missing it's AF attribute" in { val samples = Seq("s1", "s2") -// val builder = new VariantContextSetBuilder(samples) -// MakeMixtureVcf.HeaderLines.foreach(builder.addMetaDataLine) -// builder.addVariant(start=10, sampleName=Some("s1"), variantAlleles=List("A","C","T"), genotypeAlleles=List("A")) -// builder.addVariant(start=10, sampleName=Some("s2"), variantAlleles=List("A","C","T"), genotypeAlleles=List("A", "C")) - val vcfBuilder = VcfBuilder(samples=samples) vcfBuilder.add(pos=10, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0"), Gt(sample="s2", gt="0/1"))) val builder = new VCFFileReader(vcfBuilder.toTempFile()) diff --git a/src/test/scala/com/fulcrumgenomics/vcf/MakeTwoSampleMixtureVcfTest.scala b/src/test/scala/com/fulcrumgenomics/vcf/MakeTwoSampleMixtureVcfTest.scala index 5969044c5..41a1e2e74 100644 --- a/src/test/scala/com/fulcrumgenomics/vcf/MakeTwoSampleMixtureVcfTest.scala +++ b/src/test/scala/com/fulcrumgenomics/vcf/MakeTwoSampleMixtureVcfTest.scala @@ -24,19 +24,19 @@ package com.fulcrumgenomics.vcf -import com.fulcrumgenomics.testing.{UnitSpec, VariantContextSetBuilder} -import htsjdk.variant.variantcontext.Allele import com.fulcrumgenomics.FgBioDef._ +import com.fulcrumgenomics.testing.VcfBuilder.Gt +import com.fulcrumgenomics.testing.{UnitSpec, VcfBuilder} +import htsjdk.variant.variantcontext.Allele import htsjdk.variant.vcf.VCFFileReader class MakeTwoSampleMixtureVcfTest extends UnitSpec { - private val builder = new VariantContextSetBuilder(sampleNames = List("s1", "s2")) + private val builder = VcfBuilder(samples=Seq("s1", "s2")) private val (_A, _C, _G, _T, _N) = ("A", "C", "G", "T", Allele.NO_CALL_STRING) def addVariant(pos: Int, refAllele: String, s1Allele1: String, s1Allele2: String, s2Allele1: String, s2Allele2: String) = { val alleles = List(refAllele) ++ Set(s1Allele1, s1Allele2, s2Allele1, s2Allele2).filterNot(_ == refAllele).filterNot(_ == _N) - builder.addVariant(start=pos, variantAlleles=alleles, sampleName=Some("s1"), genotypeAlleles=List(s1Allele1, s1Allele2)) - builder.addVariant(start=pos, variantAlleles=alleles, sampleName=Some("s2"), genotypeAlleles=List(s2Allele1, s2Allele2)) + builder.add(pos=pos, alleles=alleles, gts=Seq(Gt(sample="s1", gt=f"$s1Allele1/$s1Allele2"), Gt(sample="s2", gt=f"$s2Allele1/$s2Allele2"))) } addVariant(10, _A, _A, _A, _A, _A) // Monomorphic, should not come out diff --git a/src/test/scala/com/fulcrumgenomics/vcf/VariantMaskTest.scala b/src/test/scala/com/fulcrumgenomics/vcf/VariantMaskTest.scala index 33bf5ed15..8e24d668f 100644 --- a/src/test/scala/com/fulcrumgenomics/vcf/VariantMaskTest.scala +++ b/src/test/scala/com/fulcrumgenomics/vcf/VariantMaskTest.scala @@ -107,18 +107,18 @@ class VariantMaskTest extends UnitSpec { val mask = VariantMask(builder.toTempFile()) mask.isVariant(1, 100) shouldBe true } + +// it should "throw an exception if a VCF doesn't have a sequence dictionary in it" in { +// // TODO figure out how to test a vcf that doesn't have a header with VcfBuilder +//// val builder = new VariantContextSetBuilder().setSequenceDictionary(dict) +//// builder.addVariant(refIdx=1, start=100, variantAlleles=List("A","C"), genotypeAlleles=List("A", "C")) +//// builder.setSequenceDictionary(SequenceDictionary()) +// val builder = VcfBuilder(VcfHeader(contigs=IndexedSeq(), infos=Seq(), formats=Seq(), filters=Seq(), others=Seq(), samples=IndexedSeq("S1"))) +// builder.add(chrom="chr1", pos=100, alleles=Seq("A", "C")) // -//// it should "throw an exception if a VCF doesn't have a sequence dictionary in it" in { -//// // TODO figure out how to test a vcf that doesn't have a header with VcfBuilder -////// val builder = new VariantContextSetBuilder().setSequenceDictionary(dict) -////// builder.addVariant(refIdx=1, start=100, variantAlleles=List("A","C"), genotypeAlleles=List("A", "C")) -////// builder.setSequenceDictionary(SequenceDictionary()) -//// val builder = VcfBuilder(VcfHeader(contigs=IndexedSeq(), infos=Seq(), formats=Seq(), filters=Seq(), others=Seq(), samples=IndexedSeq("S1"))) -//// builder.add(chrom="chr1", pos=100, alleles=Seq("A", "C")) -//// -//// val vcf = builder.toTempFile() -//// an[Exception] shouldBe thrownBy { VariantMask(vcf) } -//// } +// val vcf = builder.toTempFile() +// an[Exception] shouldBe thrownBy { VariantMask(vcf) } +// } it should "throw an exception if requested to traverse backwards to an earlier reference" in { val builder = VcfBuilder(samples = Seq("S1")) From e0e7db24d1e1e44a54f1361dbec196da1222f845 Mon Sep 17 00:00:00 2001 From: Kari Stromhaug Date: Wed, 30 Mar 2022 16:12:05 -0600 Subject: [PATCH 13/17] Remove VariantContextSetBuilder from JoinVariantContextIteratorTest --- .../vcf/JointVariantContextIteratorTest.scala | 50 ++++++++++++------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/src/test/scala/com/fulcrumgenomics/vcf/JointVariantContextIteratorTest.scala b/src/test/scala/com/fulcrumgenomics/vcf/JointVariantContextIteratorTest.scala index 861bd2acc..a34deffb4 100644 --- a/src/test/scala/com/fulcrumgenomics/vcf/JointVariantContextIteratorTest.scala +++ b/src/test/scala/com/fulcrumgenomics/vcf/JointVariantContextIteratorTest.scala @@ -24,15 +24,17 @@ package com.fulcrumgenomics.vcf -import com.fulcrumgenomics.testing.{UnitSpec, VariantContextSetBuilder} +import com.fulcrumgenomics.testing.{UnitSpec, VariantContextSetBuilder, VcfBuilder} import htsjdk.variant.variantcontext.VariantContext +import htsjdk.variant.vcf.VCFFileReader + +import scala.jdk.CollectionConverters.IteratorHasAsScala /** * Tests for JointVariantContextIterator. */ class JointVariantContextIteratorTest extends UnitSpec { - import com.fulcrumgenomics.fasta.Converters.FromSAMSequenceDictionary - private val dict = new VariantContextSetBuilder().header.getSequenceDictionary.fromSam + private val dict = VcfBuilder(samples=Seq("s1")).header.dict private def compareVariantContexts(actual: VariantContext, expected: VariantContext): Unit = { actual.getContig shouldBe expected.getContig @@ -41,34 +43,44 @@ class JointVariantContextIteratorTest extends UnitSpec { } "JointVariantContextIterator" should "iterate variant contexts given a single iterator" in { - val builder = new VariantContextSetBuilder().addVariant(refIdx=0, start=1, variantAlleles=List("A")) - val iterator = JointVariantContextIterator(iters=Seq(builder.iterator), dict=dict) - compareVariantContexts(actual=iterator.next().head.get, expected=builder.head) + val vcfBuilder = VcfBuilder(samples=Seq("s1")).add(chrom="chr1", pos=1, alleles=Seq("A")) + val builder = new VCFFileReader(vcfBuilder.toTempFile()) + + val iterator = JointVariantContextIterator(iters=Seq(builder.iterator().asScala), dict=dict) + compareVariantContexts(actual=iterator.next().head.get, expected=builder.iterator().next()) } it should "not return a variant context if all the iterators are empty" in { - val builder = new VariantContextSetBuilder() - val iterator = JointVariantContextIterator(iters=Seq(builder.iterator, builder.iterator), dict=dict) + val vcfBuilder = VcfBuilder(samples=Seq("s1")) + val builder = new VCFFileReader(vcfBuilder.toTempFile()) + + val iterator = JointVariantContextIterator(iters=Seq(builder.iterator().asScala, builder.iterator().asScala), dict=dict) iterator.hasNext shouldBe false an[NoSuchElementException] should be thrownBy iterator.next() } it should "return a pair of variant contexts at the same position" in { - val builder = new VariantContextSetBuilder().addVariant(refIdx=0, start=1, variantAlleles=List("A")) - val iterator = JointVariantContextIterator(iters=Seq(builder.iterator, builder.iterator), dict=dict) + val vcfBuilder = VcfBuilder(samples=Seq("s1")).add(chrom="chr1", pos=1, alleles=Seq("A")) + val builder = new VCFFileReader(vcfBuilder.toTempFile()) + + val iterator = JointVariantContextIterator(iters=Seq(builder.iterator().asScala, builder.iterator().asScala), dict=dict) iterator.hasNext shouldBe true val Seq(left, right) = iterator.next().flatten compareVariantContexts(left, right) } it should "return a None for an iterator that doesn't have a variant context for a given covered site" in { - val builderLeft = new VariantContextSetBuilder() - .addVariant(refIdx=0, start=10, variantAlleles=List("A")) - .addVariant(refIdx=0, start=30, variantAlleles=List("A")) - val builderRight = new VariantContextSetBuilder() - .addVariant(refIdx=0, start=10, variantAlleles=List("A")) - .addVariant(refIdx=0, start=20, variantAlleles=List("A")) - val iterator = JointVariantContextIterator(iters=Seq(builderLeft.iterator, builderRight.iterator), dict=dict) + val vcfBuilderLeft = VcfBuilder(samples=Seq("s1")) + .add(chrom="chr1", pos=10, alleles=Seq("A")) + .add(chrom="chr1", pos=30, alleles=Seq("A")) + val builderLeft = new VCFFileReader(vcfBuilderLeft.toTempFile()) + + val vcfBuilderRight = VcfBuilder(samples=Seq("s1")) + .add(chrom="chr1", pos=10, alleles=Seq("A")) + .add(chrom="chr1", pos=20, alleles=Seq("A")) + val builderRight = new VCFFileReader(vcfBuilderRight.toTempFile()) + + val iterator = JointVariantContextIterator(iters=Seq(builderLeft.iterator().asScala, builderRight.iterator().asScala), dict=dict) // pos: 10 status: both iterator.hasNext shouldBe true iterator.next().flatten match { @@ -77,12 +89,12 @@ class JointVariantContextIteratorTest extends UnitSpec { // pos: 20 status: right iterator.hasNext shouldBe true iterator.next() match { - case Seq(None, Some(right)) => compareVariantContexts(right, builderRight.last) + case Seq(None, Some(right)) => compareVariantContexts(right, builderRight.iterator().asScala.toSeq.last) } // pos: 30 status: left iterator.hasNext shouldBe true iterator.next() match { - case Seq(Some(left), None) => compareVariantContexts(left, builderLeft.last) + case Seq(Some(left), None) => compareVariantContexts(left, builderLeft.iterator().asScala.toSeq.last) } } } From 7f63cf1e281df030767cfb168fa5477331049c2e Mon Sep 17 00:00:00 2001 From: Kari Stromhaug Date: Wed, 30 Mar 2022 16:12:51 -0600 Subject: [PATCH 14/17] Remove and import --- .../fulcrumgenomics/vcf/JointVariantContextIteratorTest.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/scala/com/fulcrumgenomics/vcf/JointVariantContextIteratorTest.scala b/src/test/scala/com/fulcrumgenomics/vcf/JointVariantContextIteratorTest.scala index a34deffb4..9f53e7193 100644 --- a/src/test/scala/com/fulcrumgenomics/vcf/JointVariantContextIteratorTest.scala +++ b/src/test/scala/com/fulcrumgenomics/vcf/JointVariantContextIteratorTest.scala @@ -24,7 +24,7 @@ package com.fulcrumgenomics.vcf -import com.fulcrumgenomics.testing.{UnitSpec, VariantContextSetBuilder, VcfBuilder} +import com.fulcrumgenomics.testing.{UnitSpec, VcfBuilder} import htsjdk.variant.variantcontext.VariantContext import htsjdk.variant.vcf.VCFFileReader From c67871d067b3a20f37fc928bea317e589ed48303 Mon Sep 17 00:00:00 2001 From: Kari Stromhaug Date: Wed, 30 Mar 2022 16:24:10 -0600 Subject: [PATCH 15/17] Remove VariantContextSetBuider from ByIntervalListContextIteratorTest --- ...tervalListVariantContextIteratorTest.scala | 66 +++++++++++-------- 1 file changed, 38 insertions(+), 28 deletions(-) diff --git a/src/test/scala/com/fulcrumgenomics/vcf/ByIntervalListVariantContextIteratorTest.scala b/src/test/scala/com/fulcrumgenomics/vcf/ByIntervalListVariantContextIteratorTest.scala index d379a30ab..8a625229e 100644 --- a/src/test/scala/com/fulcrumgenomics/vcf/ByIntervalListVariantContextIteratorTest.scala +++ b/src/test/scala/com/fulcrumgenomics/vcf/ByIntervalListVariantContextIteratorTest.scala @@ -25,7 +25,8 @@ package com.fulcrumgenomics.vcf import com.fulcrumgenomics.FgBioDef._ -import com.fulcrumgenomics.testing.{UnitSpec, VariantContextSetBuilder} +import com.fulcrumgenomics.testing.VcfBuilder.Gt +import com.fulcrumgenomics.testing.{UnitSpec, VcfBuilder} import htsjdk.samtools.SAMFileHeader import htsjdk.samtools.util.{Interval, IntervalList} import htsjdk.variant.variantcontext.VariantContext @@ -36,8 +37,7 @@ import htsjdk.variant.vcf.VCFFileReader */ class ByIntervalListVariantContextIteratorTest extends UnitSpec { - private val dict = new VariantContextSetBuilder().header.getSequenceDictionary - + private val dict = VcfBuilder(samples=Seq("s1")).header.dict.toSam private def emtpyIntervalList(): IntervalList = { val header = new SAMFileHeader header.setSequenceDictionary(this.dict) @@ -59,31 +59,34 @@ class ByIntervalListVariantContextIteratorTest extends UnitSpec { "ByIntervalListVariantContextIterator" should "return no variants if the interval list is empty" in { Iterator(true, false).foreach { useIndex => - val builder = new VariantContextSetBuilder().addVariant(refIdx=0, start=1, variantAlleles=List("A"), genotypeAlleles=List("A")) - val iterator = toIterator(reader=builder.toVcfFileReader(), intervalList=emtpyIntervalList(), useIndex=useIndex) + val vcfBuilder = VcfBuilder(samples=Seq("s1")).add(chrom="chr1", pos=1, alleles=Seq("A"), gts=Seq(Gt(sample="s1", gt="0/0"))) + val builder = new VCFFileReader(vcfBuilder.toTempFile()) + val iterator = toIterator(reader=builder, intervalList=emtpyIntervalList(), useIndex=useIndex) iterator.isEmpty shouldBe true } } it should "return no variants if the variants are empty" in { Iterator(true, false).foreach { useIndex => - val builder = new VariantContextSetBuilder() + val vcfBuilder = VcfBuilder(samples=Seq("s1")) + val builder = new VCFFileReader(vcfBuilder.toTempFile()) val intervalList = emtpyIntervalList() intervalList.add(new Interval(dict.getSequence(0).getSequenceName, 1, 1000, false, "foo")) - val iterator = toIterator(reader=builder.toVcfFileReader(), intervalList=emtpyIntervalList(), useIndex=useIndex) + val iterator = toIterator(reader=builder, intervalList=emtpyIntervalList(), useIndex=useIndex) iterator.isEmpty shouldBe true } } it should "return a variant context if it overlaps an interval" in { Iterator(true, false).foreach { useIndex => - val builder = new VariantContextSetBuilder().addVariant(refIdx=0, start=500, variantAlleles=List("A"), genotypeAlleles=List("A")) + val vcfBuilder = VcfBuilder(samples=Seq("s1")).add(chrom="chr1", pos=500, alleles=Seq("A"), gts=Seq(Gt(sample="s1", gt="0/0"))) + val builder = new VCFFileReader(vcfBuilder.toTempFile()) val intervalList = emtpyIntervalList() intervalList.add(new Interval(dict.getSequence(0).getSequenceName, 1, 1000, false, "foo")) - val iterator = toIterator(reader=builder.toVcfFileReader(), intervalList=intervalList, useIndex=useIndex) + val iterator = toIterator(reader=builder, intervalList=intervalList, useIndex=useIndex) iterator.isEmpty shouldBe false val actual = iterator.next() - val expected = builder.head + val expected = builder.iterator().next() actual.getContig shouldBe expected.getContig actual.getStart shouldBe expected.getStart actual.getEnd shouldBe expected.getEnd @@ -93,28 +96,31 @@ class ByIntervalListVariantContextIteratorTest extends UnitSpec { it should "not return a variant context if it doesn't overlap an interval (same chromosome)" in { Iterator(true, false).foreach { useIndex => - val builder = new VariantContextSetBuilder().addVariant(refIdx=0, start=500, variantAlleles=List("A"), genotypeAlleles=List("A")) + val vcfBuilder = VcfBuilder(samples=Seq("s1")).add(chrom="chr1", pos=500, alleles=Seq("A"), gts=Seq(Gt(sample="s1", gt="0/0"))) + val builder = new VCFFileReader(vcfBuilder.toTempFile()) val intervalList = emtpyIntervalList() intervalList.add(new Interval(dict.getSequence(0).getSequenceName, 750, 1000, false, "foo")) - val iterator = toIterator(reader=builder.toVcfFileReader(), intervalList=intervalList, useIndex=useIndex) + val iterator = toIterator(reader=builder, intervalList=intervalList, useIndex=useIndex) iterator.isEmpty shouldBe true } } it should "not return a variant context if it doesn't overlap an interval (different chromosome)" in { Iterator(true, false).foreach { useIndex => - val builder = new VariantContextSetBuilder().addVariant(refIdx=0, start=500, variantAlleles=List("A"), genotypeAlleles=List("A")) + val vcfBuilder = VcfBuilder(samples=Seq("s1")).add(chrom="chr1", pos=500, alleles=Seq("A"), gts=Seq(Gt(sample="s1", gt="0/0"))) + val builder = new VCFFileReader(vcfBuilder.toTempFile()) val intervalList = emtpyIntervalList() intervalList.add(new Interval(dict.getSequence(1).getSequenceName, 1, 1000, false, "foo")) - val iterator = toIterator(reader=builder.toVcfFileReader(), intervalList=intervalList, useIndex=useIndex) + val iterator = toIterator(reader=builder, intervalList=intervalList, useIndex=useIndex) iterator.isEmpty shouldBe true } } it should "throw an exception when next() is call but hasNext() is false" in { Iterator(true, false).foreach { useIndex => - val builder = new VariantContextSetBuilder().addVariant(refIdx=0, start=1, variantAlleles=List("A"), genotypeAlleles=List("A")) - val iterator = toIterator(reader=builder.toVcfFileReader(), intervalList=emtpyIntervalList(), useIndex=useIndex) + val vcfBuilder = VcfBuilder(samples=Seq("s1")).add(chrom="chr1", pos=1, alleles=Seq("A"), gts=Seq(Gt(sample="s1", gt="0/0"))) + val builder = new VCFFileReader(vcfBuilder.toTempFile()) + val iterator = toIterator(reader=builder, intervalList=emtpyIntervalList(), useIndex=useIndex) iterator.hasNext shouldBe false an[Exception] should be thrownBy iterator.next() } @@ -122,13 +128,14 @@ class ByIntervalListVariantContextIteratorTest extends UnitSpec { it should "return a variant context if it encloses an interval" in { Iterator(true, false).foreach { useIndex => - val builder = new VariantContextSetBuilder().addVariant(refIdx=0, start=495, variantAlleles=List("AAAAA", "A"), genotypeAlleles=List("A")) + val vcfBuilder = VcfBuilder(samples=Seq("s1")).add(chrom="chr1", pos=495, alleles=Seq("AAAAA", "A"), gts=Seq(Gt(sample="s1", gt="1/1"))) + val builder = new VCFFileReader(vcfBuilder.toTempFile()) val intervalList = emtpyIntervalList() intervalList.add(new Interval(dict.getSequence(0).getSequenceName, 496, 496, false, "foo")) - val iterator = toIterator(reader=builder.toVcfFileReader(), intervalList=intervalList, useIndex=useIndex) + val iterator = toIterator(reader=builder, intervalList=intervalList, useIndex=useIndex) iterator.isEmpty shouldBe false val actual = iterator.next() - val expected = builder.head + val expected = builder.iterator().next() actual.getContig shouldBe expected.getContig actual.getStart shouldBe expected.getStart actual.getEnd shouldBe expected.getEnd @@ -138,14 +145,15 @@ class ByIntervalListVariantContextIteratorTest extends UnitSpec { it should "return a variant context only once if it overlaps multiple intervals" in { Iterator(true, false).foreach { useIndex => - val builder = new VariantContextSetBuilder().addVariant(refIdx=0, start=495, variantAlleles=List("AAAAA", "A"), genotypeAlleles=List("A")) + val vcfBuilder = VcfBuilder(samples=Seq("s1")).add(chrom="chr1", pos=495, alleles=Seq("AAAAA", "A"), gts=Seq(Gt(sample="s1", gt="1/1"))) + val builder = new VCFFileReader(vcfBuilder.toTempFile()) val intervalList = emtpyIntervalList() intervalList.add(new Interval(dict.getSequence(0).getSequenceName, 496, 496, false, "foo")) intervalList.add(new Interval(dict.getSequence(0).getSequenceName, 500, 500, false, "foo")) - val iterator = toIterator(reader=builder.toVcfFileReader(), intervalList=intervalList, useIndex=useIndex) + val iterator = toIterator(reader=builder, intervalList=intervalList, useIndex=useIndex) iterator.isEmpty shouldBe false val actual = iterator.next() - val expected = builder.head + val expected = builder.iterator().next() actual.getContig shouldBe expected.getContig actual.getStart shouldBe expected.getStart actual.getEnd shouldBe expected.getEnd @@ -154,13 +162,14 @@ class ByIntervalListVariantContextIteratorTest extends UnitSpec { } it should "throw an exception when intervals are given out of order when using the VCF index" in { - val builder = new VariantContextSetBuilder() - .addVariant(refIdx=0, start=495, variantAlleles=List("AAAAA", "A"), genotypeAlleles=List("A")) - .addVariant(refIdx=0, start=595, variantAlleles=List("AAAAA", "A"), genotypeAlleles=List("A")) + val vcfBuilder = VcfBuilder(samples=Seq("s1")) + .add(chrom="chr1", pos=495, alleles=Seq("AAAAA", "A"), gts=Seq(Gt(sample="s1", gt="1/1"))) + .add(chrom="chr1", pos=595, alleles=Seq("AAAAA", "A"), gts=Seq(Gt(sample="s1", gt="1/1"))) + val builder = new VCFFileReader(vcfBuilder.toTempFile()) val intervalList = emtpyIntervalList() intervalList.add(new Interval(dict.getSequence(0).getSequenceName, 494, 500, false, "foo")) intervalList.add(new Interval(dict.getSequence(0).getSequenceName, 500, 500, false, "foo")) - val iterator = toIterator(reader=builder.toVcfFileReader(), intervalList=intervalList, useIndex=true) + val iterator = toIterator(reader=builder, intervalList=intervalList, useIndex=true) // OK, since we are overlapping the first interval iterator.isEmpty shouldBe false // NOK, since the intervals were overlapping when we pre-fetch the second variant context @@ -169,10 +178,11 @@ class ByIntervalListVariantContextIteratorTest extends UnitSpec { it should "ignore a variant context if does not overlap an interval" in { Iterator(true, false).foreach { useIndex => - val builder = new VariantContextSetBuilder().addVariant(refIdx=0, start=495, variantAlleles=List("A", "C"), genotypeAlleles=List("C")) + val vcfBuilder = VcfBuilder(samples=Seq("s1")).add(chrom="chr1", pos=495, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="1/1"))) + val builder = new VCFFileReader(vcfBuilder.toTempFile()) val intervalList = emtpyIntervalList() intervalList.add(new Interval(dict.getSequence(0).getSequenceName, 500, 500, false, "foo")) - val iterator = toIterator(reader=builder.toVcfFileReader(), intervalList=intervalList, useIndex=useIndex) + val iterator = toIterator(reader=builder, intervalList=intervalList, useIndex=useIndex) iterator.isEmpty shouldBe true } } From e554bdda173dc0263ce1aa6e0e312d31d700b6bd Mon Sep 17 00:00:00 2001 From: Kari Stromhaug Date: Thu, 31 Mar 2022 17:53:51 -0600 Subject: [PATCH 16/17] Fix UpdateVcfContigNames tests and remove one test in VariantMaskTest --- .../vcf/UpdateVcfContigNamesTest.scala | 45 ++++++++++++------- .../fulcrumgenomics/vcf/VariantMaskTest.scala | 12 ----- 2 files changed, 30 insertions(+), 27 deletions(-) diff --git a/src/test/scala/com/fulcrumgenomics/vcf/UpdateVcfContigNamesTest.scala b/src/test/scala/com/fulcrumgenomics/vcf/UpdateVcfContigNamesTest.scala index a0bfd0ce8..a1c080b7b 100644 --- a/src/test/scala/com/fulcrumgenomics/vcf/UpdateVcfContigNamesTest.scala +++ b/src/test/scala/com/fulcrumgenomics/vcf/UpdateVcfContigNamesTest.scala @@ -28,7 +28,8 @@ import com.fulcrumgenomics.FgBioDef.javaIterableToIterator import com.fulcrumgenomics.commons.io.PathUtil import com.fulcrumgenomics.fasta.Converters.FromSAMSequenceDictionary import com.fulcrumgenomics.fasta.SequenceDictionary -import com.fulcrumgenomics.testing.{UnitSpec, VariantContextSetBuilder} +import com.fulcrumgenomics.testing.{UnitSpec, VcfBuilder} +import com.fulcrumgenomics.vcf.api.{VcfContigHeader, VcfHeader} import htsjdk.variant.vcf.VCFFileReader class UpdateVcfContigNamesTest extends UnitSpec { @@ -40,13 +41,16 @@ class UpdateVcfContigNamesTest extends UnitSpec { private val targetDict = SequenceDictionary.extract(this.targetDictPath) "UpdateVcfContigNames" should "update the contig names" in { - - - val builder = new VariantContextSetBuilder() - builder.setSequenceDictionary(sourceDict) - builder.addVariant(refIdx = 0, start = 1, variantAlleles = List("A", "C")) // chr1 - builder.addVariant(refIdx = 10, start = 10, variantAlleles = List("A", "C")) // chr2 - + val builder = VcfBuilder(header=VcfHeader( + contigs=IndexedSeq(VcfContigHeader(0, "chr1"), VcfContigHeader(1, "chr2"), VcfContigHeader(3, "NC_000002.12")), + infos=Seq(), + formats=Seq(), + filters=Seq(), + samples=IndexedSeq("S1"), + others=Seq()) + ) + builder.add(chrom="chr1", pos=1, alleles=Seq("A", "C")) // chr1 + builder.add(chrom="NC_000002.12", pos=10, alleles=Seq("A", "C")) // chr2 val output = makeTempFile("output", ".vcf.gz") val tool = new UpdateVcfContigNames(input = builder.toTempFile(), output = output, dict = this.targetDictPath, skipMissing = false) executeFgbioTool(tool) @@ -71,10 +75,15 @@ class UpdateVcfContigNamesTest extends UnitSpec { } it should "fail when a contig cannot be updated" in { - val builder = new VariantContextSetBuilder() - builder.setSequenceDictionary(sourceDict) - builder.addVariant(refIdx = 594, start = 1, variantAlleles = List("A", "C")) // dummy - + val builder = VcfBuilder(header=VcfHeader( + contigs=sourceDict.map(s => VcfContigHeader(s.index, s.name)).toIndexedSeq, + infos=Seq(), + formats=Seq(), + filters=Seq(), + samples=IndexedSeq("S1"), + others=Seq()) + ) + builder.add(chrom="dummy", pos=1, alleles=Seq("A", "C")) val output = makeTempFile("output", ".vcf.gz") val tool = new UpdateVcfContigNames(input = builder.toTempFile(), output = output, dict = this.targetDictPath, skipMissing = false) @@ -83,9 +92,15 @@ class UpdateVcfContigNamesTest extends UnitSpec { } it should "skip contigs that cannot be updated when --skip-missing is used" in { - val builder = new VariantContextSetBuilder() - builder.setSequenceDictionary(sourceDict) - builder.addVariant(refIdx = 594, start = 1, variantAlleles = List("A", "C")) // dummy + val builder = VcfBuilder(header=VcfHeader( + contigs=sourceDict.map(s => VcfContigHeader(s.index, s.name)).toIndexedSeq, + infos=Seq(), + formats=Seq(), + filters=Seq(), + samples=IndexedSeq("S1"), + others=Seq()) + ) + builder.add(chrom="dummy", pos=1, alleles=Seq("A", "C")) val output = makeTempFile("output", ".vcf.gz") val tool = new UpdateVcfContigNames(input = builder.toTempFile(), output = output, dict = this.targetDictPath, skipMissing = true) diff --git a/src/test/scala/com/fulcrumgenomics/vcf/VariantMaskTest.scala b/src/test/scala/com/fulcrumgenomics/vcf/VariantMaskTest.scala index 8e24d668f..461650823 100644 --- a/src/test/scala/com/fulcrumgenomics/vcf/VariantMaskTest.scala +++ b/src/test/scala/com/fulcrumgenomics/vcf/VariantMaskTest.scala @@ -108,18 +108,6 @@ class VariantMaskTest extends UnitSpec { mask.isVariant(1, 100) shouldBe true } -// it should "throw an exception if a VCF doesn't have a sequence dictionary in it" in { -// // TODO figure out how to test a vcf that doesn't have a header with VcfBuilder -//// val builder = new VariantContextSetBuilder().setSequenceDictionary(dict) -//// builder.addVariant(refIdx=1, start=100, variantAlleles=List("A","C"), genotypeAlleles=List("A", "C")) -//// builder.setSequenceDictionary(SequenceDictionary()) -// val builder = VcfBuilder(VcfHeader(contigs=IndexedSeq(), infos=Seq(), formats=Seq(), filters=Seq(), others=Seq(), samples=IndexedSeq("S1"))) -// builder.add(chrom="chr1", pos=100, alleles=Seq("A", "C")) -// -// val vcf = builder.toTempFile() -// an[Exception] shouldBe thrownBy { VariantMask(vcf) } -// } - it should "throw an exception if requested to traverse backwards to an earlier reference" in { val builder = VcfBuilder(samples = Seq("S1")) builder.add(chrom="chr1", pos=100, alleles=Seq("A", "T")) From 35a121c13cd359d2d8f45acd297627a2530b7199 Mon Sep 17 00:00:00 2001 From: Kari Stromhaug Date: Fri, 8 Apr 2022 14:52:57 -0600 Subject: [PATCH 17/17] Fixed last few tests for contextsetbuilder --- .../vcf/AssessPhasingTest.scala | 39 ++++++++----------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/src/test/scala/com/fulcrumgenomics/vcf/AssessPhasingTest.scala b/src/test/scala/com/fulcrumgenomics/vcf/AssessPhasingTest.scala index bc9a214bc..a896f805d 100644 --- a/src/test/scala/com/fulcrumgenomics/vcf/AssessPhasingTest.scala +++ b/src/test/scala/com/fulcrumgenomics/vcf/AssessPhasingTest.scala @@ -678,23 +678,20 @@ class PhaseCigarTest extends ErrorLogLevel { metric.num_truth_phased shouldBe 1 } } - + // split into three different unit tests can put 683-693 into different function, and put three subtests in separate tests it should "create a cigar when both truth and call variants are present and both are phased but mismatch alleles" in { - // TODO fix this test - val builderTruth = new VariantContextSetBuilder().addVariant(start=1, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) -// val vcfBuilderTruth = VcfBuilder(samples=Seq("s1")).add(pos=1, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) -// val builderTruth = new VCFFileReader(vcfBuilderTruth.toTempFile()) + val vcfBuilderTruth = VcfBuilder(samples=Seq("s1")).add(pos=1, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + val builderTruth = new VCFFileReader(vcfBuilderTruth.toTempFile()) - val builderCall = new VariantContextSetBuilder().addVariant(start=1, variantAlleles=List("A", "C"), genotypeAlleles=List("C", "A"), phased=true) -// val vcfBuilderCall = VcfBuilder(samples=Seq("s1")).add(pos=1, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) -// val builderCall = new VCFFileReader(vcfBuilderCall.toTempFile()) + val vcfBuilderCall = VcfBuilder(samples=Seq("s1")).add(pos=1, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="1|0"))) + val builderCall = new VCFFileReader(vcfBuilderCall.toTempFile()) - val truth = withPhasingSetId(builderTruth.head, 1) - val call = withPhasingSetId(builderCall.head, 1) + val truth = withPhasingSetId(builderTruth.iterator().next(), 1) + val call = withPhasingSetId(builderCall.iterator().next(), 1) // both variants are phased, phase is inverted, and we assume a fixed order, so a mismatch { - val (cigar, metric) = toCigar(truth = Seq(truth), call = Seq(call), header = builderTruth.header, skipMismatchingAlleles = true, assumeFixedAlleleOrder = true) + val (cigar, metric) = toCigar(truth = Seq(truth), call = Seq(call), header = builderTruth.getFileHeader, skipMismatchingAlleles = true, assumeFixedAlleleOrder = true) cigar should contain theSameElementsInOrderAs Seq(BothEnd, Mismatch, BothEnd) metric.num_called shouldBe 1 metric.num_phased shouldBe 1 @@ -703,7 +700,7 @@ class PhaseCigarTest extends ErrorLogLevel { } // both variants are phased, phase is inverted, and we don't assume a fixed order, so a match { - val (cigar, metric) = toCigar(truth = Seq(truth), call = Seq(call), header = builderTruth.header, skipMismatchingAlleles = true, assumeFixedAlleleOrder = false) + val (cigar, metric) = toCigar(truth = Seq(truth), call = Seq(call), header = builderTruth.getFileHeader, skipMismatchingAlleles = true, assumeFixedAlleleOrder = false) cigar should contain theSameElementsInOrderAs Seq(BothEnd, Match, BothEnd) metric.num_called shouldBe 1 metric.num_phased shouldBe 1 @@ -712,16 +709,14 @@ class PhaseCigarTest extends ErrorLogLevel { } // first site is a match, second is a mismatch since we inverted after the first { - builderTruth.addVariant(start=2, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) - builderCall.addVariant(start=2, variantAlleles=List("A", "C"), genotypeAlleles=List("A", "C"), phased=true) -// vcfBuilderTruth.add(pos=2, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) -// vcfBuilderCall.add(pos=2, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) -// val builderTruth = new VCFFileReader(vcfBuilderTruth.toTempFile()) -// val builderCall = new VCFFileReader(vcfBuilderCall.toTempFile()) - val truthTwo = withPhasingSetId(builderTruth.last, 1) - val callTwo = withPhasingSetId(builderCall.last, 1) - - val (cigar, metric) = toCigar(truth = Seq(truth, truthTwo), call = Seq(call, callTwo), header = builderTruth.header, skipMismatchingAlleles = true, assumeFixedAlleleOrder = false) + vcfBuilderTruth.add(pos=2, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + vcfBuilderCall.add(pos=2, alleles=Seq("A", "C"), gts=Seq(Gt(sample="s1", gt="0|1"))) + val builderTruth = new VCFFileReader(vcfBuilderTruth.toTempFile()) + val builderCall = new VCFFileReader(vcfBuilderCall.toTempFile()) + val truthTwo = withPhasingSetId(builderTruth.iterator().toSeq.last, 1) + val callTwo = withPhasingSetId(builderCall.iterator().toSeq.last, 1) + + val (cigar, metric) = toCigar(truth = Seq(truth, truthTwo), call = Seq(call, callTwo), header = builderTruth.getFileHeader, skipMismatchingAlleles = true, assumeFixedAlleleOrder = false) cigar should contain theSameElementsInOrderAs Seq(BothEnd, Match, Mismatch, BothEnd) metric.num_called shouldBe 2 metric.num_phased shouldBe 2