-
-
Notifications
You must be signed in to change notification settings - Fork 69
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add scala implementations for some SamRecord methods #809
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -236,9 +236,109 @@ trait SamRecord { | |
// transient attributes | ||
@inline final val transientAttrs: TransientAttrs = new TransientAttrs(this) | ||
|
||
// TODO long-term: replace these two methods with methods on [[Cigar]] to save creating alignment blocks in memory | ||
@inline final def refPosAtReadPos(pos: Int) = getReferencePositionAtReadPosition(pos) | ||
@inline final def readPosAtRefPos(pos: Int, returnLastBaseIfDeleted: Boolean) = getReadPositionAtReferencePosition(pos, returnLastBaseIfDeleted) | ||
/** Returns the 1-based reference position for the 1-based position in the read, or [[None]] if the reference does | ||
* not map at the read position (i.e. insertion or soft-clipping). | ||
* | ||
* If the requested read position is less than or equal to zero, or greater than the read length [[length()]], then | ||
* an exception will be thrown. | ||
* | ||
* Hard-clipping (`H`) is ignored and not counted as part of the read. | ||
* | ||
* @param pos the 1-based read position to query | ||
* @param returnLastBaseIfInserted if the reference is an insertion, true to returned the previous reference base, | ||
* false to return None | ||
nh13 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
* */ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we need to think carefully about, and document here, what the behaviour should be if: For (b) I would generally say to return For (c) I suspect we just want to ignore hard clipping? But make sure we do the right thing if it's there? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similarly you return |
||
@inline final def refPosAtReadPos(pos: Int, returnLastBaseIfInserted: Boolean = false): Option[Int] = { | ||
require(this.mapped, s"read was not mapped: ${this}") | ||
require(1 <= pos, s"position given '$pos' was less than one: ${this}") | ||
require(pos <= cigar.lengthOnQuery , s"position given '$pos' was longer than the # of read bases '${cigar.lengthOnQuery}': ${this}") | ||
|
||
var readPos = 1 | ||
nh13 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
var refPos = this.start | ||
var elementIndex = 0 | ||
val elems = this.cigar.elems | ||
|
||
// skip leading clipping, ignoring hard-clipping | ||
while (elems(elementIndex).operator.isClipping && readPos <= pos) { | ||
val elem = elems(elementIndex) | ||
if (elem.operator == CigarOperator.SOFT_CLIP) readPos += elem.lengthOnQuery | ||
elementIndex += 1 | ||
} | ||
|
||
// return None if the read was in a leading soft-clip | ||
if (pos < readPos) None else { | ||
def continue(): Boolean = if (elementIndex >= elems.length) false else { | ||
val elem = elems(elementIndex) | ||
if (pos > CoordMath.getEnd(readPos, elem.lengthOnQuery)) true // current cigar element is before the desired position | ||
else { | ||
if (elem.operator == CigarOperator.INSERTION) { | ||
if (!returnLastBaseIfInserted) refPos = 0 // this cause us to return None later for the position | ||
else refPos -= 1 // in an insertion, no reference position, so use the previous reference position | ||
} | ||
else { | ||
refPos += pos - readPos // get the offset, for soft-clipping this will move refPos past this.end, and so return None | ||
} | ||
false | ||
} | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this implementation has the same problem with clipping. If I have a read that has There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It wont since we check There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added tests |
||
|
||
while (continue()) { | ||
val elem = elems(elementIndex) | ||
refPos += elem.lengthOnTarget | ||
readPos += elem.lengthOnQuery | ||
elementIndex += 1 | ||
} | ||
|
||
if (this.start <= refPos && refPos <= this.end) Some(refPos) else None | ||
} | ||
} | ||
|
||
/** Returns the 1-based position into the read's bases where the position is mapped either as a match or mismatch, | ||
* [[None]] if the read does not map the position. | ||
* | ||
* @param pos the 1-based reference position to query | ||
* @param returnLastBaseIfDeleted if the reference is a deletion, true to returned the previous read base, false to | ||
* return None | ||
* @param returnLastBaseIfSkipped if the reference is a skip, true to returned the previous read base, false to | ||
* return None | ||
* */ | ||
@inline final def readPosAtRefPos(pos: Int, | ||
returnLastBaseIfDeleted: Boolean = false, | ||
returnLastBaseIfSkipped: Boolean = false): Option[Int] = { | ||
require(this.mapped, s"read was not mapped: ${this}") | ||
require(this.start <= pos, s"position given '$pos' was before the start of the alignment '${this.start}': ${this}") | ||
require(pos <= this.end , s"position given '$pos' was past the end of the alignment '${this.end}': ${this}") | ||
|
||
var readPos = 0 | ||
var refPos = this.start | ||
var elementIndex = 0 | ||
val elems = this.cigar.elems | ||
|
||
def continue(): Boolean = if (elementIndex >= elems.length) false else { | ||
val elem = elems(elementIndex) | ||
if (pos > CoordMath.getEnd(refPos, elem.lengthOnTarget)) true // current cigar element is before the desired position | ||
else { // overlaps! | ||
if (elem.operator == CigarOperator.DELETION) { // deletion | ||
if (!returnLastBaseIfDeleted) readPos = 0 // don't return a read position, so zero out the current read position | ||
} else if (elem.operator == CigarOperator.SKIPPED_REGION) { // skip region | ||
if (!returnLastBaseIfSkipped) readPos = 0 // don't return a read position, so zero out the current read position | ||
} else { | ||
readPos += pos - refPos + 1 // get the offset | ||
} | ||
false | ||
} | ||
} | ||
|
||
while (continue()) { | ||
val elem = elems(elementIndex) | ||
refPos += elem.lengthOnTarget | ||
readPos += elem.lengthOnQuery | ||
elementIndex += 1 | ||
} | ||
|
||
if (0 < readPos && readPos <= this.length) Some(readPos) else None | ||
} | ||
|
||
|
||
//////////////////////////////////////////////////////////////////////////// | ||
// Non-wrapper methods. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -289,4 +289,154 @@ class SamRecordTest extends UnitSpec with OptionValues { | |
rec1.matesOverlap shouldBe None // Mate's start is not enclosed by rec, and mate's end cannot be determined | ||
rec2.matesOverlap.value shouldBe true // Mate's start is enclosed by rec, regardless of where mate end is | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tests copied from htsjdk, with a few added |
||
/** A single test case for [[SamRecord.readPosAtRefPos()]] and/or [[SamRecord.refPosAtReadPos()]]. | ||
* | ||
* If `returnLastBaseIfInserted` is `true`, then [[SamRecord.refPosAtReadPos()]] should not be tested. Similarly, | ||
* if either `returnLastBaseIfDeleted` or `returnLastBaseIfSkipped` is `true`, then [[SamRecord.readPosAtRefPos()]] | ||
* should not be tested. | ||
* | ||
* @param cigar the cigar | ||
* @param readPos the read position; empty if the reference position does not map to a read base | ||
* @param refPos the reference position; epty if the read position doesnot map to a reference base | ||
* @param returnLastBaseIfInserted passed to [[SamRecord.readPosAtRefPos()]] | ||
* @param returnLastBaseIfDeleted passed to [[SamRecord.refPosAtReadPos()]] | ||
* @param returnLastBaseIfSkipped passed to [[SamRecord.refPosAtReadPos()]] | ||
*/ | ||
case class TestCase(cigar: String, readPos: Option[Int], refPos: Option[Int], | ||
returnLastBaseIfInserted: Boolean = false, | ||
returnLastBaseIfDeleted: Boolean = false, | ||
returnLastBaseIfSkipped: Boolean = false) { | ||
require(readPos.isDefined || refPos.isDefined) | ||
val doRefPosAtReadPos: Boolean = !returnLastBaseIfDeleted && !returnLastBaseIfSkipped | ||
val doReadPosAtRefPos: Boolean = !returnLastBaseIfInserted | ||
require(doRefPosAtReadPos || doReadPosAtRefPos) | ||
} | ||
object TestCase { | ||
def apply(cigar: String, readPos: Int, refPos: Int): TestCase = { | ||
TestCase(cigar=cigar, readPos=Some(readPos), refPos=Some(refPos)) | ||
} | ||
} | ||
|
||
/** Test cases for [[SamRecord.readPosAtRefPos()]] and [[SamRecord.refPosAtReadPos()]]. */ | ||
private val testCases = Seq( | ||
// all mapped bases | ||
TestCase("10M", 10, 10), | ||
TestCase("10M", 1, 1), | ||
// leading soft-clipping | ||
TestCase("3S9M", Some(1), None), // no reference base, start of soft-clipping | ||
TestCase("3S9M", Some(3), None), // no reference base, end of soft-clipping | ||
TestCase("3S9M", 4, 1), // start of aligned bases | ||
TestCase("3S9M", 10, 7), // end of aligned bases | ||
// leading hard-clipping | ||
TestCase("3H9M", 1, 1), // start of aligned bases | ||
TestCase("3H3S9M", 4, 1), // start of aligned bases | ||
TestCase("3H3S9M", Some(3), None), // no reference base, end of soft-clipping | ||
// leading padding | ||
TestCase("3P9M", 1, 1), // start of aligned bases | ||
TestCase("3P9M", 9, 9), // end of aligned bases | ||
// leading skip | ||
TestCase("3N9M", None, Some(1)), // no reference base, start of padding | ||
TestCase("3N9M", None, Some(3)), // no reference base, end of padding | ||
TestCase("3N9M", 1, 4), // start of aligned bases | ||
TestCase("3N9M", 9, 12), // end of aligned bases | ||
// deletions | ||
TestCase("4M1D6M", 4, 4), // before the deletion | ||
TestCase("4M1D6M", None, Some(5)), // in a deletion | ||
TestCase("4M1D6M", 5, 6), // after the deletion | ||
TestCase("4M1D6M", Some(4), Some(5), returnLastBaseIfDeleted=true), // returns the previous mapped base | ||
TestCase("4M2D6M", 4, 4), // before the deletion | ||
TestCase("4M2D6M", None, Some(5)), // in a deletion | ||
TestCase("4M2D6M", None, Some(6)), // in a deletion | ||
TestCase("4M2D6M", Some(4), Some(5), returnLastBaseIfDeleted=true), // returns the previous mapped base | ||
TestCase("4M2D6M", Some(4), Some(6), returnLastBaseIfDeleted=true), // returns the previous mapped base | ||
TestCase("4M2D6M", 5, 7), // after the deletion | ||
TestCase("20M10D20M", 21, 31), // first base of the deletion | ||
// insertions | ||
TestCase("4M1I6M", 4, 4), // before the insertion | ||
TestCase("4M1I6M", Some(5), None), // in an insertion | ||
TestCase("4M1I6M", Some(5), Some(4), returnLastBaseIfInserted=true), // returns the previous mapped base | ||
TestCase("4M1I6M", 6, 5), // after the insertion | ||
TestCase("4M2I6M", 4, 4), // before the insertion | ||
TestCase("4M2I6M", Some(5), None), // in an insertion | ||
TestCase("4M2I6M", Some(6), None), // in an insertion | ||
TestCase("4M2I6M", Some(5), Some(4), returnLastBaseIfInserted=true), // returns the previous mapped base | ||
TestCase("4M2I6M", Some(6), Some(4), returnLastBaseIfInserted=true), // returns the previous mapped base | ||
TestCase("4M2I6M", 7, 5), // after the insertion | ||
TestCase("20M10I20M", 31, 21), // first base of the insertion | ||
// trailing soft-clipping | ||
TestCase("9M3S", 1, 1), // first aligned base | ||
TestCase("9M3S", 9, 9), // last aligned base | ||
TestCase("9M3S", Some(10), None), // no reference base, first base of soft-clipping | ||
TestCase("9M3S", Some(12), None), // no reference base, last base of soft-clipping | ||
// trailing hard-clipping | ||
TestCase("9M3H", 1, 1), // start of aligned bases | ||
TestCase("9M3S3H", 9, 9), // end of aligned bases | ||
TestCase("9M3S3H", Some(10), None), /// no reference base, first base of soft-clipping | ||
TestCase("9M3S3H", Some(12), None), // no reference base, last base of soft-clipping | ||
// trailing padding | ||
TestCase("9M3P", 1, 1), // start of aligned bases | ||
TestCase("9M3P", 9, 9), // end of aligned bases | ||
// trailing skip | ||
TestCase("9M3N", None, Some(10)), // no reference base, start of padding | ||
TestCase("9M3N", None, Some(12)), // no reference base, end of padding | ||
TestCase("9M3N", 9, 9), // end of aligned bases | ||
) | ||
|
||
"SamRecord.readPosAtRefPos" should "throw an exception when the read position is OOB" in { | ||
val frag1 = new SamBuilder(readLength=10).addFrag(start=1, cigar="10M").value | ||
an[Exception] should be thrownBy frag1.readPosAtRefPos(pos=0) | ||
an[Exception] should be thrownBy frag1.readPosAtRefPos(pos=11) | ||
|
||
// request a base in the trailing hard-clipping should throw an exception | ||
val frag2 = new SamBuilder(readLength=10).addFrag(start=1, cigar="10M10H").value | ||
an[Exception] should be thrownBy frag2.readPosAtRefPos(pos=11) | ||
|
||
// request a base in the trailing soft-clipping should throw an exception | ||
val frag3 = new SamBuilder(readLength=20).addFrag(start=1, cigar="10M10S").value | ||
an[Exception] should be thrownBy frag3.readPosAtRefPos(pos=11) | ||
} | ||
|
||
// now run the test cases | ||
testCases.filter(_.doReadPosAtRefPos).foreach { testCase => | ||
testCase.refPos.foreach { pos => | ||
it should s"return read position ${testCase.readPos} for reference position ${testCase.refPos} with cigar ${testCase.cigar}" in { | ||
val frag = new SamBuilder(readLength=Cigar(testCase.cigar).lengthOnQuery).addFrag(start=1, cigar=testCase.cigar).value | ||
val readPos = frag.readPosAtRefPos(pos=pos, returnLastBaseIfDeleted=testCase.returnLastBaseIfDeleted, testCase.returnLastBaseIfSkipped) | ||
withClue(testCase) { | ||
readPos shouldBe testCase.readPos | ||
if (!testCase.returnLastBaseIfSkipped) { | ||
readPos.getOrElse(0) shouldBe frag.asSam.getReadPositionAtReferencePosition(pos, testCase.returnLastBaseIfDeleted) | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
"SamRecord.refPosAtReadPos" should "throw an exception when the read position is OOB" in { | ||
val frag1 = new SamBuilder(readLength=10).addFrag(start=1, cigar="10M").value | ||
an[Exception] should be thrownBy frag1.refPosAtReadPos(pos=0) | ||
an[Exception] should be thrownBy frag1.refPosAtReadPos(pos=11) | ||
|
||
// request a base in the trailing hard-clipping should throw an exception | ||
val frag2 = new SamBuilder(readLength=10).addFrag(start=1, cigar="10M10H").value | ||
an[Exception] should be thrownBy frag2.refPosAtReadPos(pos=11) | ||
} | ||
|
||
// now run the test cases | ||
testCases.filter(_.doRefPosAtReadPos).foreach { testCase => | ||
testCase.readPos.foreach { pos: Int => | ||
it should s"return ref position ${testCase.refPos} for read position ${testCase.readPos} with cigar ${testCase.cigar}" in { | ||
val frag = new SamBuilder(readLength=Cigar(testCase.cigar).lengthOnQuery).addFrag(start=1, cigar=testCase.cigar).value | ||
val refPos = frag.refPosAtReadPos(pos=pos, returnLastBaseIfInserted=testCase.returnLastBaseIfInserted) | ||
withClue(testCase) { | ||
refPos shouldBe testCase.refPos | ||
if (!testCase.returnLastBaseIfInserted) { | ||
refPos.getOrElse(0) shouldBe frag.asSam.getReferencePositionAtReadPosition(pos) | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
these are breaking changes to the API. I like returning
Option[Int]
better, so I can returnNone
instead of0