From 65d71ee7699a7ac2f0cb343f0774e9a45e8fafda Mon Sep 17 00:00:00 2001 From: Nils Homer Date: Wed, 9 Mar 2022 17:08:00 -0700 Subject: [PATCH 1/2] Add scala implementations for some SamRecord methods * refPosAtReadPos * readPosAtRefPos --- .../bam/SamRecordClipper.scala | 17 ++-- .../fulcrumgenomics/bam/api/SamRecord.scala | 80 ++++++++++++++++++- .../bam/pileup/PileupBuilder.scala | 18 +++-- .../umi/ReviewConsensusVariants.scala | 4 +- .../bam/api/SamRecordTest.scala | 44 ++++++++++ 5 files changed, 143 insertions(+), 20 deletions(-) diff --git a/src/main/scala/com/fulcrumgenomics/bam/SamRecordClipper.scala b/src/main/scala/com/fulcrumgenomics/bam/SamRecordClipper.scala index 151d87ff0..0df84075b 100644 --- a/src/main/scala/com/fulcrumgenomics/bam/SamRecordClipper.scala +++ b/src/main/scala/com/fulcrumgenomics/bam/SamRecordClipper.scala @@ -309,10 +309,15 @@ class SamRecordClipper(val mode: ClippingMode, val autoClipAttributes: Boolean) // mid point, or if the mid point is in a deletion, the base prior to the deletion. The mate ends at the mid // point, or if the mid point is in a deletion, the base after the deletion. val midPoint = (rec.start + mate.end) / 2 - val readEnd = rec.readPosAtRefPos(pos=midPoint, returnLastBaseIfDeleted=true) - val mateStart = { // NB: need to be careful if the midpoint falls in a deletion - val retval = mate.readPosAtRefPos(pos=midPoint + 1, returnLastBaseIfDeleted=false) - if (retval != 0) retval else mate.readPosAtRefPos(pos=midPoint + 1, returnLastBaseIfDeleted=true) + 1 + val readEnd = rec.readPosAtRefPos(pos=midPoint, returnLastBaseIfDeleted=true).getOrElse( + unreachable("Expected to find reference position in read") + ) + val mateStart: Int = { // NB: need to be careful if the midpoint falls in a deletion + mate.readPosAtRefPos(pos=midPoint + 1, returnLastBaseIfDeleted=false).getOrElse( + 1 + mate.readPosAtRefPos(pos=midPoint + 1, returnLastBaseIfDeleted=true).getOrElse( + unreachable("Expected to find reference position in a deletion in mate") + ) + ) } val numOverlappingBasesRead = this.clip3PrimeEndOfRead(rec, rec.cigar.trailingClippedBases + rec.length - readEnd) val numOverlappingBasesMate = this.clip3PrimeEndOfRead(mate, mate.cigar.leadingClippedBases + mateStart - 1) @@ -342,11 +347,11 @@ class SamRecordClipper(val mode: ClippingMode, val autoClipAttributes: Boolean) else { if (rec.positiveStrand && rec.end >= mateEnd) { // clip from where last read base of where the mate ends - Math.max(0, rec.length - rec.readPosAtRefPos(pos=mateEnd, returnLastBaseIfDeleted=false)) + Math.max(0, rec.length - rec.readPosAtRefPos(pos=mateEnd, returnLastBaseIfDeleted=false).getOrElse(0)) } else if (rec.negativeStrand && rec.start <= rec.mateStart) { // clip up to and including one base before where the mate starts - Math.max(0, rec.readPosAtRefPos(pos=rec.mateStart, returnLastBaseIfDeleted=false) - 1) + Math.max(0, rec.readPosAtRefPos(pos=rec.mateStart, returnLastBaseIfDeleted=false).getOrElse(0) - 1) } else { // no bases extend past 0 diff --git a/src/main/scala/com/fulcrumgenomics/bam/api/SamRecord.scala b/src/main/scala/com/fulcrumgenomics/bam/api/SamRecord.scala index cf05b54c6..7a07d2150 100644 --- a/src/main/scala/com/fulcrumgenomics/bam/api/SamRecord.scala +++ b/src/main/scala/com/fulcrumgenomics/bam/api/SamRecord.scala @@ -236,9 +236,83 @@ trait SamRecord { // transient attributes @inline final val transientAttrs: TransientAttrs = new TransientAttrs(this) - // TODO long-term: replace these two methods with methods on [[Cigar]] to save creating alignment blocks in memory - @inline final def refPosAtReadPos(pos: Int) = getReferencePositionAtReadPosition(pos) - @inline final def readPosAtRefPos(pos: Int, returnLastBaseIfDeleted: Boolean) = getReadPositionAtReferencePosition(pos, returnLastBaseIfDeleted) + /** Returns the 1-based reference position for the 1-based position in the read, or [[None]] if the reference does + * not map at the read position (e.g. insertion). + * + * @param pos the 1-based read position to query + * @param returnLastBaseIfInserted if the reference is an insertion, true to returned the previous reference base, + * false to return None + * */ + @inline final def refPosAtReadPos(pos: Int, returnLastBaseIfInserted: Boolean = false): Option[Int] = if (pos <= 0) None else { + var readPos = 1 + var refPos = this.start + var elementIndex = 0 + val elems = this.cigar.elems + + def continue(): Boolean = if (elementIndex >= elems.length) false else { + val elem = elems(elementIndex) + if (pos > CoordMath.getEnd(readPos, elem.lengthOnQuery)) true // current cigar element is before the desired position + else { + if (pos < readPos) readPos = 0 // past the point in the read + else if (elem.operator == CigarOperator.INSERTION) { + if (!returnLastBaseIfInserted) refPos = 0 + else refPos -= 1 + } // in an insertion, no reference position + else refPos += pos - readPos // get the offset + false + } + } + + while (continue()) { + val elem = elems(elementIndex) + refPos += elem.lengthOnTarget + readPos += elem.lengthOnQuery + elementIndex += 1 + } + + if (this.start <= refPos && readPos <= this.end) Some(refPos) else None + } + + + /** Returns the 1-based position into the read's bases where the position is mapped either as a match or mismatch, + * [[None]] if the read does not map the position. + * + * @param pos the 1-based reference position to query + * @param returnLastBaseIfDeleted if the reference is a deletion, true to returned the previous read base, false to + * return None + * */ + @inline final def readPosAtRefPos(pos: Int, returnLastBaseIfDeleted: Boolean = false): Option[Int] = { + if (this.unmapped || pos < this.start || this.end < pos) None + else { // overlaps + var readPos = 0 + var refPos = this.start + var elementIndex = 0 + val elems = this.cigar.elems + + def continue(): Boolean = if (elementIndex >= elems.length) false else { + val elem = elems(elementIndex) + if (pos > CoordMath.getEnd(refPos, elem.lengthOnTarget)) true // current cigar element is before the desired position + else { // overlaps! + if (elem.operator == CigarOperator.DELETION) { // deletion + if (!returnLastBaseIfDeleted) readPos = 0 // don't return a read position, so zero out the current read position + } else { + readPos += pos - refPos + 1 // get the offset + } + false + } + } + + while (continue()) { + val elem = elems(elementIndex) + refPos += elem.lengthOnTarget + readPos += elem.lengthOnQuery + elementIndex += 1 + } + + if (0 < readPos && readPos <= this.length) Some(readPos) else None + } + } + //////////////////////////////////////////////////////////////////////////// // Non-wrapper methods. diff --git a/src/main/scala/com/fulcrumgenomics/bam/pileup/PileupBuilder.scala b/src/main/scala/com/fulcrumgenomics/bam/pileup/PileupBuilder.scala index 89963cf96..a4d761036 100644 --- a/src/main/scala/com/fulcrumgenomics/bam/pileup/PileupBuilder.scala +++ b/src/main/scala/com/fulcrumgenomics/bam/pileup/PileupBuilder.scala @@ -209,14 +209,16 @@ trait PileupBuilder extends PileupParameters { if (rec.start == pos + 1) { // This site must be an insertion in the read that is at the start of the read. testAndAdd(InsertionEntry(rec, 0)) } else { - val offset = rec.readPosAtRefPos(pos, returnLastBaseIfDeleted = false) - if (offset == 0) { // This site must be deleted within the read. - val deletionPosition = rec.readPosAtRefPos(pos, returnLastBaseIfDeleted = true) - testAndAdd(DeletionEntry(rec, deletionPosition - 1)) - } else { // This site must be a matched site within the read. - testAndAdd(BaseEntry(rec, offset - 1)) - // Also check to see if the subsequent base represents an insertion. - if (offset < rec.length - 1 && rec.refPosAtReadPos(offset + 1) == 0) testAndAdd(InsertionEntry(rec, offset)) + rec.readPosAtRefPos(pos, returnLastBaseIfDeleted = false) match { + case None => // This site must be deleted within the read. + val deletionPosition = rec.readPosAtRefPos(pos, returnLastBaseIfDeleted=true).getOrElse( + unreachable("Bug: expected to get the read position within a deletion") + ) + testAndAdd(DeletionEntry(rec, deletionPosition - 1)) + case Some(offset) => // This site must be a matched site within the read. + testAndAdd(BaseEntry(rec, offset - 1)) + // Also check to see if the subsequent base represents an insertion. + if (offset < rec.length - 1 && rec.refPosAtReadPos(offset + 1).isEmpty) testAndAdd(InsertionEntry(rec, offset)) } } } diff --git a/src/main/scala/com/fulcrumgenomics/umi/ReviewConsensusVariants.scala b/src/main/scala/com/fulcrumgenomics/umi/ReviewConsensusVariants.scala index e33e3fe0e..850779feb 100644 --- a/src/main/scala/com/fulcrumgenomics/umi/ReviewConsensusVariants.scala +++ b/src/main/scala/com/fulcrumgenomics/umi/ReviewConsensusVariants.scala @@ -338,9 +338,7 @@ class ReviewConsensusVariants private def nonReferenceAtAnyVariant(rec: SamRecord, variantsByChromAndPos: Map[String, Seq[Variant]]): Boolean = { rec.mapped && variantsByChromAndPos(rec.refName).exists { v => if (v.start >= rec.start && v.start <= rec.end) { - val readPos = rec.readPosAtRefPos(v.start, false) - if (readPos == 0) true - else { + rec.readPosAtRefPos(v.start, false).forall { readPos => val base = rec.bases(readPos - 1) !SequenceUtil.basesEqual(base, v.refBase.toByte) && (!this.ignoreNsInConsensusReads || !SequenceUtil.isNoCall(base)) } diff --git a/src/test/scala/com/fulcrumgenomics/bam/api/SamRecordTest.scala b/src/test/scala/com/fulcrumgenomics/bam/api/SamRecordTest.scala index 38251e0c4..bfc833137 100644 --- a/src/test/scala/com/fulcrumgenomics/bam/api/SamRecordTest.scala +++ b/src/test/scala/com/fulcrumgenomics/bam/api/SamRecordTest.scala @@ -289,4 +289,48 @@ class SamRecordTest extends UnitSpec with OptionValues { rec1.matesOverlap shouldBe None // Mate's start is not enclosed by rec, and mate's end cannot be determined rec2.matesOverlap.value shouldBe true // Mate's start is enclosed by rec, regardless of where mate end is } + + "SamRecord.readPosAtRefPos" should "return the correct value" in { + case class ReadPosAtRefPos(cigar: String, refPos: Int, readPos: Option[Int], returnLastBaseIfDeleted: Boolean) + val testCases = Seq( + ReadPosAtRefPos("3S9M", 7, Some(10), false), + ReadPosAtRefPos("3S9M", 0, None, false), + ReadPosAtRefPos("3S9M", -1, None, false), + ReadPosAtRefPos("3S9M", 13, None, false), + ReadPosAtRefPos("4M1D6M", 4, Some(4), false), + ReadPosAtRefPos("4M1D6M", 4, Some(4), true), + ReadPosAtRefPos("4M1D6M", 5, None, false), + ReadPosAtRefPos("4M1D6M", 5, Some(4), true), + ReadPosAtRefPos("4M1I6M", 5, Some(6), false), + ReadPosAtRefPos("4M1I6M", 11, None, false) + ) + testCases.foreach { testCase => + val frag = new SamBuilder(readLength=Cigar(testCase.cigar).lengthOnQuery).addFrag(start=1, cigar=testCase.cigar).value + val readPos = frag.readPosAtRefPos(pos=testCase.refPos, returnLastBaseIfDeleted=testCase.returnLastBaseIfDeleted) + readPos shouldBe testCase.readPos + readPos.getOrElse(0) shouldBe frag.asSam.getReadPositionAtReferencePosition(testCase.refPos, testCase.returnLastBaseIfDeleted) + } + } + + "SamRecord.refPosAtReadPos" should "return the correct value" in { + case class RefPosAtReadPos(cigar: String, refPos: Option[Int], readPos: Int, returnLastBaseIfInserted: Boolean) + val testCases = Seq( + RefPosAtReadPos("3S9M", Some(7), 10, false), + RefPosAtReadPos("3S9M", None, 0, false), + RefPosAtReadPos("3S9M", None, 13, false), + RefPosAtReadPos("4M1D6M", Some(4), 4, false), + RefPosAtReadPos("4M1D6M", Some(6), 5, false), + RefPosAtReadPos("4M1I6M", None, 5, false), + RefPosAtReadPos("4M1I6M", Some(4), 5, true), + RefPosAtReadPos("4M1I6M", Some(5), 6, false), + ) + testCases.foreach { testCase => + val frag = new SamBuilder(readLength=Cigar(testCase.cigar).lengthOnQuery).addFrag(start=1, cigar=testCase.cigar).value + val readPos = frag.refPosAtReadPos(pos=testCase.readPos, returnLastBaseIfInserted=testCase.returnLastBaseIfInserted) + readPos shouldBe testCase.refPos + if (!testCase.returnLastBaseIfInserted) { + readPos.getOrElse(0) shouldBe frag.asSam.getReferencePositionAtReadPosition(testCase.readPos) + } + } + } } From 5232ac9b8bce8440862844ac53230f2a094e1be0 Mon Sep 17 00:00:00 2001 From: Nils Homer Date: Fri, 25 Mar 2022 23:51:39 -0700 Subject: [PATCH 2/2] add lots o tests --- .../fulcrumgenomics/bam/api/SamRecord.scala | 116 ++++++----- .../bam/api/SamRecordTest.scala | 180 ++++++++++++++---- 2 files changed, 214 insertions(+), 82 deletions(-) diff --git a/src/main/scala/com/fulcrumgenomics/bam/api/SamRecord.scala b/src/main/scala/com/fulcrumgenomics/bam/api/SamRecord.scala index 7a07d2150..9fb0a3ea4 100644 --- a/src/main/scala/com/fulcrumgenomics/bam/api/SamRecord.scala +++ b/src/main/scala/com/fulcrumgenomics/bam/api/SamRecord.scala @@ -237,66 +237,46 @@ trait SamRecord { @inline final val transientAttrs: TransientAttrs = new TransientAttrs(this) /** Returns the 1-based reference position for the 1-based position in the read, or [[None]] if the reference does - * not map at the read position (e.g. insertion). + * not map at the read position (i.e. insertion or soft-clipping). + * + * If the requested read position is less than or equal to zero, or greater than the read length [[length()]], then + * an exception will be thrown. + * + * Hard-clipping (`H`) is ignored and not counted as part of the read. * * @param pos the 1-based read position to query * @param returnLastBaseIfInserted if the reference is an insertion, true to returned the previous reference base, * false to return None * */ - @inline final def refPosAtReadPos(pos: Int, returnLastBaseIfInserted: Boolean = false): Option[Int] = if (pos <= 0) None else { + @inline final def refPosAtReadPos(pos: Int, returnLastBaseIfInserted: Boolean = false): Option[Int] = { + require(this.mapped, s"read was not mapped: ${this}") + require(1 <= pos, s"position given '$pos' was less than one: ${this}") + require(pos <= cigar.lengthOnQuery , s"position given '$pos' was longer than the # of read bases '${cigar.lengthOnQuery}': ${this}") + var readPos = 1 var refPos = this.start var elementIndex = 0 val elems = this.cigar.elems - def continue(): Boolean = if (elementIndex >= elems.length) false else { + // skip leading clipping, ignoring hard-clipping + while (elems(elementIndex).operator.isClipping && readPos <= pos) { val elem = elems(elementIndex) - if (pos > CoordMath.getEnd(readPos, elem.lengthOnQuery)) true // current cigar element is before the desired position - else { - if (pos < readPos) readPos = 0 // past the point in the read - else if (elem.operator == CigarOperator.INSERTION) { - if (!returnLastBaseIfInserted) refPos = 0 - else refPos -= 1 - } // in an insertion, no reference position - else refPos += pos - readPos // get the offset - false - } - } - - while (continue()) { - val elem = elems(elementIndex) - refPos += elem.lengthOnTarget - readPos += elem.lengthOnQuery + if (elem.operator == CigarOperator.SOFT_CLIP) readPos += elem.lengthOnQuery elementIndex += 1 } - if (this.start <= refPos && readPos <= this.end) Some(refPos) else None - } - - - /** Returns the 1-based position into the read's bases where the position is mapped either as a match or mismatch, - * [[None]] if the read does not map the position. - * - * @param pos the 1-based reference position to query - * @param returnLastBaseIfDeleted if the reference is a deletion, true to returned the previous read base, false to - * return None - * */ - @inline final def readPosAtRefPos(pos: Int, returnLastBaseIfDeleted: Boolean = false): Option[Int] = { - if (this.unmapped || pos < this.start || this.end < pos) None - else { // overlaps - var readPos = 0 - var refPos = this.start - var elementIndex = 0 - val elems = this.cigar.elems - + // return None if the read was in a leading soft-clip + if (pos < readPos) None else { def continue(): Boolean = if (elementIndex >= elems.length) false else { val elem = elems(elementIndex) - if (pos > CoordMath.getEnd(refPos, elem.lengthOnTarget)) true // current cigar element is before the desired position - else { // overlaps! - if (elem.operator == CigarOperator.DELETION) { // deletion - if (!returnLastBaseIfDeleted) readPos = 0 // don't return a read position, so zero out the current read position - } else { - readPos += pos - refPos + 1 // get the offset + if (pos > CoordMath.getEnd(readPos, elem.lengthOnQuery)) true // current cigar element is before the desired position + else { + if (elem.operator == CigarOperator.INSERTION) { + if (!returnLastBaseIfInserted) refPos = 0 // this cause us to return None later for the position + else refPos -= 1 // in an insertion, no reference position, so use the previous reference position + } + else { + refPos += pos - readPos // get the offset, for soft-clipping this will move refPos past this.end, and so return None } false } @@ -309,8 +289,54 @@ trait SamRecord { elementIndex += 1 } - if (0 < readPos && readPos <= this.length) Some(readPos) else None + if (this.start <= refPos && refPos <= this.end) Some(refPos) else None + } + } + + /** Returns the 1-based position into the read's bases where the position is mapped either as a match or mismatch, + * [[None]] if the read does not map the position. + * + * @param pos the 1-based reference position to query + * @param returnLastBaseIfDeleted if the reference is a deletion, true to returned the previous read base, false to + * return None + * @param returnLastBaseIfSkipped if the reference is a skip, true to returned the previous read base, false to + * return None + * */ + @inline final def readPosAtRefPos(pos: Int, + returnLastBaseIfDeleted: Boolean = false, + returnLastBaseIfSkipped: Boolean = false): Option[Int] = { + require(this.mapped, s"read was not mapped: ${this}") + require(this.start <= pos, s"position given '$pos' was before the start of the alignment '${this.start}': ${this}") + require(pos <= this.end , s"position given '$pos' was past the end of the alignment '${this.end}': ${this}") + + var readPos = 0 + var refPos = this.start + var elementIndex = 0 + val elems = this.cigar.elems + + def continue(): Boolean = if (elementIndex >= elems.length) false else { + val elem = elems(elementIndex) + if (pos > CoordMath.getEnd(refPos, elem.lengthOnTarget)) true // current cigar element is before the desired position + else { // overlaps! + if (elem.operator == CigarOperator.DELETION) { // deletion + if (!returnLastBaseIfDeleted) readPos = 0 // don't return a read position, so zero out the current read position + } else if (elem.operator == CigarOperator.SKIPPED_REGION) { // skip region + if (!returnLastBaseIfSkipped) readPos = 0 // don't return a read position, so zero out the current read position + } else { + readPos += pos - refPos + 1 // get the offset + } + false + } + } + + while (continue()) { + val elem = elems(elementIndex) + refPos += elem.lengthOnTarget + readPos += elem.lengthOnQuery + elementIndex += 1 } + + if (0 < readPos && readPos <= this.length) Some(readPos) else None } diff --git a/src/test/scala/com/fulcrumgenomics/bam/api/SamRecordTest.scala b/src/test/scala/com/fulcrumgenomics/bam/api/SamRecordTest.scala index bfc833137..79cc22e95 100644 --- a/src/test/scala/com/fulcrumgenomics/bam/api/SamRecordTest.scala +++ b/src/test/scala/com/fulcrumgenomics/bam/api/SamRecordTest.scala @@ -290,47 +290,153 @@ class SamRecordTest extends UnitSpec with OptionValues { rec2.matesOverlap.value shouldBe true // Mate's start is enclosed by rec, regardless of where mate end is } - "SamRecord.readPosAtRefPos" should "return the correct value" in { - case class ReadPosAtRefPos(cigar: String, refPos: Int, readPos: Option[Int], returnLastBaseIfDeleted: Boolean) - val testCases = Seq( - ReadPosAtRefPos("3S9M", 7, Some(10), false), - ReadPosAtRefPos("3S9M", 0, None, false), - ReadPosAtRefPos("3S9M", -1, None, false), - ReadPosAtRefPos("3S9M", 13, None, false), - ReadPosAtRefPos("4M1D6M", 4, Some(4), false), - ReadPosAtRefPos("4M1D6M", 4, Some(4), true), - ReadPosAtRefPos("4M1D6M", 5, None, false), - ReadPosAtRefPos("4M1D6M", 5, Some(4), true), - ReadPosAtRefPos("4M1I6M", 5, Some(6), false), - ReadPosAtRefPos("4M1I6M", 11, None, false) - ) - testCases.foreach { testCase => - val frag = new SamBuilder(readLength=Cigar(testCase.cigar).lengthOnQuery).addFrag(start=1, cigar=testCase.cigar).value - val readPos = frag.readPosAtRefPos(pos=testCase.refPos, returnLastBaseIfDeleted=testCase.returnLastBaseIfDeleted) - readPos shouldBe testCase.readPos - readPos.getOrElse(0) shouldBe frag.asSam.getReadPositionAtReferencePosition(testCase.refPos, testCase.returnLastBaseIfDeleted) + /** A single test case for [[SamRecord.readPosAtRefPos()]] and/or [[SamRecord.refPosAtReadPos()]]. + * + * If `returnLastBaseIfInserted` is `true`, then [[SamRecord.refPosAtReadPos()]] should not be tested. Similarly, + * if either `returnLastBaseIfDeleted` or `returnLastBaseIfSkipped` is `true`, then [[SamRecord.readPosAtRefPos()]] + * should not be tested. + * + * @param cigar the cigar + * @param readPos the read position; empty if the reference position does not map to a read base + * @param refPos the reference position; epty if the read position doesnot map to a reference base + * @param returnLastBaseIfInserted passed to [[SamRecord.readPosAtRefPos()]] + * @param returnLastBaseIfDeleted passed to [[SamRecord.refPosAtReadPos()]] + * @param returnLastBaseIfSkipped passed to [[SamRecord.refPosAtReadPos()]] + */ + case class TestCase(cigar: String, readPos: Option[Int], refPos: Option[Int], + returnLastBaseIfInserted: Boolean = false, + returnLastBaseIfDeleted: Boolean = false, + returnLastBaseIfSkipped: Boolean = false) { + require(readPos.isDefined || refPos.isDefined) + val doRefPosAtReadPos: Boolean = !returnLastBaseIfDeleted && !returnLastBaseIfSkipped + val doReadPosAtRefPos: Boolean = !returnLastBaseIfInserted + require(doRefPosAtReadPos || doReadPosAtRefPos) + } + object TestCase { + def apply(cigar: String, readPos: Int, refPos: Int): TestCase = { + TestCase(cigar=cigar, readPos=Some(readPos), refPos=Some(refPos)) } } - "SamRecord.refPosAtReadPos" should "return the correct value" in { - case class RefPosAtReadPos(cigar: String, refPos: Option[Int], readPos: Int, returnLastBaseIfInserted: Boolean) - val testCases = Seq( - RefPosAtReadPos("3S9M", Some(7), 10, false), - RefPosAtReadPos("3S9M", None, 0, false), - RefPosAtReadPos("3S9M", None, 13, false), - RefPosAtReadPos("4M1D6M", Some(4), 4, false), - RefPosAtReadPos("4M1D6M", Some(6), 5, false), - RefPosAtReadPos("4M1I6M", None, 5, false), - RefPosAtReadPos("4M1I6M", Some(4), 5, true), - RefPosAtReadPos("4M1I6M", Some(5), 6, false), - ) - testCases.foreach { testCase => - val frag = new SamBuilder(readLength=Cigar(testCase.cigar).lengthOnQuery).addFrag(start=1, cigar=testCase.cigar).value - val readPos = frag.refPosAtReadPos(pos=testCase.readPos, returnLastBaseIfInserted=testCase.returnLastBaseIfInserted) - readPos shouldBe testCase.refPos - if (!testCase.returnLastBaseIfInserted) { - readPos.getOrElse(0) shouldBe frag.asSam.getReferencePositionAtReadPosition(testCase.readPos) + /** Test cases for [[SamRecord.readPosAtRefPos()]] and [[SamRecord.refPosAtReadPos()]]. */ + private val testCases = Seq( + // all mapped bases + TestCase("10M", 10, 10), + TestCase("10M", 1, 1), + // leading soft-clipping + TestCase("3S9M", Some(1), None), // no reference base, start of soft-clipping + TestCase("3S9M", Some(3), None), // no reference base, end of soft-clipping + TestCase("3S9M", 4, 1), // start of aligned bases + TestCase("3S9M", 10, 7), // end of aligned bases + // leading hard-clipping + TestCase("3H9M", 1, 1), // start of aligned bases + TestCase("3H3S9M", 4, 1), // start of aligned bases + TestCase("3H3S9M", Some(3), None), // no reference base, end of soft-clipping + // leading padding + TestCase("3P9M", 1, 1), // start of aligned bases + TestCase("3P9M", 9, 9), // end of aligned bases + // leading skip + TestCase("3N9M", None, Some(1)), // no reference base, start of padding + TestCase("3N9M", None, Some(3)), // no reference base, end of padding + TestCase("3N9M", 1, 4), // start of aligned bases + TestCase("3N9M", 9, 12), // end of aligned bases + // deletions + TestCase("4M1D6M", 4, 4), // before the deletion + TestCase("4M1D6M", None, Some(5)), // in a deletion + TestCase("4M1D6M", 5, 6), // after the deletion + TestCase("4M1D6M", Some(4), Some(5), returnLastBaseIfDeleted=true), // returns the previous mapped base + TestCase("4M2D6M", 4, 4), // before the deletion + TestCase("4M2D6M", None, Some(5)), // in a deletion + TestCase("4M2D6M", None, Some(6)), // in a deletion + TestCase("4M2D6M", Some(4), Some(5), returnLastBaseIfDeleted=true), // returns the previous mapped base + TestCase("4M2D6M", Some(4), Some(6), returnLastBaseIfDeleted=true), // returns the previous mapped base + TestCase("4M2D6M", 5, 7), // after the deletion + TestCase("20M10D20M", 21, 31), // first base of the deletion + // insertions + TestCase("4M1I6M", 4, 4), // before the insertion + TestCase("4M1I6M", Some(5), None), // in an insertion + TestCase("4M1I6M", Some(5), Some(4), returnLastBaseIfInserted=true), // returns the previous mapped base + TestCase("4M1I6M", 6, 5), // after the insertion + TestCase("4M2I6M", 4, 4), // before the insertion + TestCase("4M2I6M", Some(5), None), // in an insertion + TestCase("4M2I6M", Some(6), None), // in an insertion + TestCase("4M2I6M", Some(5), Some(4), returnLastBaseIfInserted=true), // returns the previous mapped base + TestCase("4M2I6M", Some(6), Some(4), returnLastBaseIfInserted=true), // returns the previous mapped base + TestCase("4M2I6M", 7, 5), // after the insertion + TestCase("20M10I20M", 31, 21), // first base of the insertion + // trailing soft-clipping + TestCase("9M3S", 1, 1), // first aligned base + TestCase("9M3S", 9, 9), // last aligned base + TestCase("9M3S", Some(10), None), // no reference base, first base of soft-clipping + TestCase("9M3S", Some(12), None), // no reference base, last base of soft-clipping + // trailing hard-clipping + TestCase("9M3H", 1, 1), // start of aligned bases + TestCase("9M3S3H", 9, 9), // end of aligned bases + TestCase("9M3S3H", Some(10), None), /// no reference base, first base of soft-clipping + TestCase("9M3S3H", Some(12), None), // no reference base, last base of soft-clipping + // trailing padding + TestCase("9M3P", 1, 1), // start of aligned bases + TestCase("9M3P", 9, 9), // end of aligned bases + // trailing skip + TestCase("9M3N", None, Some(10)), // no reference base, start of padding + TestCase("9M3N", None, Some(12)), // no reference base, end of padding + TestCase("9M3N", 9, 9), // end of aligned bases + ) + + "SamRecord.readPosAtRefPos" should "throw an exception when the read position is OOB" in { + val frag1 = new SamBuilder(readLength=10).addFrag(start=1, cigar="10M").value + an[Exception] should be thrownBy frag1.readPosAtRefPos(pos=0) + an[Exception] should be thrownBy frag1.readPosAtRefPos(pos=11) + + // request a base in the trailing hard-clipping should throw an exception + val frag2 = new SamBuilder(readLength=10).addFrag(start=1, cigar="10M10H").value + an[Exception] should be thrownBy frag2.readPosAtRefPos(pos=11) + + // request a base in the trailing soft-clipping should throw an exception + val frag3 = new SamBuilder(readLength=20).addFrag(start=1, cigar="10M10S").value + an[Exception] should be thrownBy frag3.readPosAtRefPos(pos=11) + } + + // now run the test cases + testCases.filter(_.doReadPosAtRefPos).foreach { testCase => + testCase.refPos.foreach { pos => + it should s"return read position ${testCase.readPos} for reference position ${testCase.refPos} with cigar ${testCase.cigar}" in { + val frag = new SamBuilder(readLength=Cigar(testCase.cigar).lengthOnQuery).addFrag(start=1, cigar=testCase.cigar).value + val readPos = frag.readPosAtRefPos(pos=pos, returnLastBaseIfDeleted=testCase.returnLastBaseIfDeleted, testCase.returnLastBaseIfSkipped) + withClue(testCase) { + readPos shouldBe testCase.readPos + if (!testCase.returnLastBaseIfSkipped) { + readPos.getOrElse(0) shouldBe frag.asSam.getReadPositionAtReferencePosition(pos, testCase.returnLastBaseIfDeleted) + } + } } } } + + "SamRecord.refPosAtReadPos" should "throw an exception when the read position is OOB" in { + val frag1 = new SamBuilder(readLength=10).addFrag(start=1, cigar="10M").value + an[Exception] should be thrownBy frag1.refPosAtReadPos(pos=0) + an[Exception] should be thrownBy frag1.refPosAtReadPos(pos=11) + + // request a base in the trailing hard-clipping should throw an exception + val frag2 = new SamBuilder(readLength=10).addFrag(start=1, cigar="10M10H").value + an[Exception] should be thrownBy frag2.refPosAtReadPos(pos=11) + } + + // now run the test cases + testCases.filter(_.doRefPosAtReadPos).foreach { testCase => + testCase.readPos.foreach { pos: Int => + it should s"return ref position ${testCase.refPos} for read position ${testCase.readPos} with cigar ${testCase.cigar}" in { + val frag = new SamBuilder(readLength=Cigar(testCase.cigar).lengthOnQuery).addFrag(start=1, cigar=testCase.cigar).value + val refPos = frag.refPosAtReadPos(pos=pos, returnLastBaseIfInserted=testCase.returnLastBaseIfInserted) + withClue(testCase) { + refPos shouldBe testCase.refPos + if (!testCase.returnLastBaseIfInserted) { + refPos.getOrElse(0) shouldBe frag.asSam.getReferencePositionAtReadPosition(pos) + } + } + } + } + } + }