Skip to content

Commit

Permalink
POC of a tool to re-pick primary alignments.
Browse files Browse the repository at this point in the history
  • Loading branch information
tfenne committed Sep 23, 2024
1 parent ba61d26 commit e39452f
Showing 1 changed file with 100 additions and 0 deletions.
100 changes: 100 additions & 0 deletions src/main/scala/com/fulcrumgenomics/bam/PickPrimaryAlignment.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/*
* The MIT License
*
* Copyright (c) 2024 Fulcrum Genomics LLC
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/

package com.fulcrumgenomics.bam

import com.fulcrumgenomics.FgBioDef._
import com.fulcrumgenomics.bam.api.{SamRecord, SamSource, SamWriter}
import com.fulcrumgenomics.cmdline.{ClpGroups, FgBioTool}
import com.fulcrumgenomics.sopt.{arg, clp}
import com.fulcrumgenomics.util.ProgressLogger

@clp(group=ClpGroups.SamOrBam, description="""
Resets which record is marked as the primary alignment per read.
""")
class PickPrimaryAlignment(
@arg(flag='i', doc="") val input: PathToBam,
@arg(flag='o', doc="") val output: PathToBam
) extends FgBioTool {

override def execute(): Unit = {
val in = SamSource(input)
val out = SamWriter(output, in.header)
val progress = ProgressLogger(this.logger, noun="templates")

Check warning on line 44 in src/main/scala/com/fulcrumgenomics/bam/PickPrimaryAlignment.scala

View check run for this annotation

Codecov / codecov/patch

src/main/scala/com/fulcrumgenomics/bam/PickPrimaryAlignment.scala#L42-L44

Added lines #L42 - L44 were not covered by tests

Bams.templateIterator(in).foreach { template =>
val fixed = pickPrimaries(template)
if (!(fixed eq template)) {
fixed.fixMateInfo()

Check warning on line 49 in src/main/scala/com/fulcrumgenomics/bam/PickPrimaryAlignment.scala

View check run for this annotation

Codecov / codecov/patch

src/main/scala/com/fulcrumgenomics/bam/PickPrimaryAlignment.scala#L46-L49

Added lines #L46 - L49 were not covered by tests
}

out ++= fixed.allReads
progress.record()

Check warning on line 53 in src/main/scala/com/fulcrumgenomics/bam/PickPrimaryAlignment.scala

View check run for this annotation

Codecov / codecov/patch

src/main/scala/com/fulcrumgenomics/bam/PickPrimaryAlignment.scala#L52-L53

Added lines #L52 - L53 were not covered by tests
}

in.safelyClose()
out.close()

Check warning on line 57 in src/main/scala/com/fulcrumgenomics/bam/PickPrimaryAlignment.scala

View check run for this annotation

Codecov / codecov/patch

src/main/scala/com/fulcrumgenomics/bam/PickPrimaryAlignment.scala#L56-L57

Added lines #L56 - L57 were not covered by tests
}

/** Re-picks the primary mappings for any reads that have supplementary reads. */
private[bam] def pickPrimaries(t: Template): Template = {
if (t.r1Supplementals.isEmpty && t.r2Supplementals.isEmpty) {
t

Check warning on line 63 in src/main/scala/com/fulcrumgenomics/bam/PickPrimaryAlignment.scala

View check run for this annotation

Codecov / codecov/patch

src/main/scala/com/fulcrumgenomics/bam/PickPrimaryAlignment.scala#L62-L63

Added lines #L62 - L63 were not covered by tests
}
else {
val r1Primary = if (t.r1Supplementals.isEmpty) t.r1 else {
Some(pickPrimary(t.r1 ++ t.r1Supplementals))

Check warning on line 67 in src/main/scala/com/fulcrumgenomics/bam/PickPrimaryAlignment.scala

View check run for this annotation

Codecov / codecov/patch

src/main/scala/com/fulcrumgenomics/bam/PickPrimaryAlignment.scala#L65-L67

Added lines #L65 - L67 were not covered by tests
}
val r2Primary = if (t.r2Supplementals.isEmpty) t.r2 else {
Some(pickPrimary(t.r2 ++ t.r2Supplementals))

Check warning on line 70 in src/main/scala/com/fulcrumgenomics/bam/PickPrimaryAlignment.scala

View check run for this annotation

Codecov / codecov/patch

src/main/scala/com/fulcrumgenomics/bam/PickPrimaryAlignment.scala#L69-L70

Added lines #L69 - L70 were not covered by tests
}

if ((r1Primary eq t.r1) && (r2Primary eq t.r2)) t else {
r1Primary.foreach(_.supplementary = false)
r2Primary.foreach(_.supplementary = false)

Check warning on line 75 in src/main/scala/com/fulcrumgenomics/bam/PickPrimaryAlignment.scala

View check run for this annotation

Codecov / codecov/patch

src/main/scala/com/fulcrumgenomics/bam/PickPrimaryAlignment.scala#L73-L75

Added lines #L73 - L75 were not covered by tests

Template(

Check warning on line 77 in src/main/scala/com/fulcrumgenomics/bam/PickPrimaryAlignment.scala

View check run for this annotation

Codecov / codecov/patch

src/main/scala/com/fulcrumgenomics/bam/PickPrimaryAlignment.scala#L77

Added line #L77 was not covered by tests
r1 = r1Primary,
r2 = r2Primary,
r1Supplementals = (t.r1 ++ t.r1Supplementals).filter(r => !r1Primary.exists(_ eq r)).tapEach(_.supplementary=true).toSeq,
r2Supplementals = (t.r2 ++ t.r2Supplementals).filter(r => !r2Primary.exists(_ eq r)).tapEach(_.supplementary=true).toSeq,
r1Secondaries = t.r1Secondaries,
r2Secondaries = t.r2Secondaries

Check warning on line 83 in src/main/scala/com/fulcrumgenomics/bam/PickPrimaryAlignment.scala

View check run for this annotation

Codecov / codecov/patch

src/main/scala/com/fulcrumgenomics/bam/PickPrimaryAlignment.scala#L80-L83

Added lines #L80 - L83 were not covered by tests
)
}
}
}

/** Selects the record, among mappings of the same read, with the earliest query base aligned. */
private [bam] def pickPrimary(recs: Iterable[SamRecord]): SamRecord = {
recs.minBy(firstMappedBaseInQueryOrder)

Check warning on line 91 in src/main/scala/com/fulcrumgenomics/bam/PickPrimaryAlignment.scala

View check run for this annotation

Codecov / codecov/patch

src/main/scala/com/fulcrumgenomics/bam/PickPrimaryAlignment.scala#L91

Added line #L91 was not covered by tests
}

/** Returns the 1-based position of the first query base that is aligned to the reference. */
private [bam] def firstMappedBaseInQueryOrder(rec: SamRecord): Int = {
require(rec.mapped, s"Record is unmapped: ${rec}")
val iter = (if (rec.negativeStrand) rec.cigar.elems.reverseIterator else rec.cigar.elems.iterator).bufferBetter
iter.takeWhile(!_.operator.isAlignment).sumBy(_.lengthOnQuery)

Check warning on line 98 in src/main/scala/com/fulcrumgenomics/bam/PickPrimaryAlignment.scala

View check run for this annotation

Codecov / codecov/patch

src/main/scala/com/fulcrumgenomics/bam/PickPrimaryAlignment.scala#L96-L98

Added lines #L96 - L98 were not covered by tests
}
}

0 comments on commit e39452f

Please sign in to comment.