Skip to content

Commit

Permalink
Make PileupBuilder.includeMapPositionsOutsideFrInsert intuitively c…
Browse files Browse the repository at this point in the history
…orrect (#981)

* Make PileupBuilder.includeMapPositionsOutsideFrInsert intuitively correct

* Revert back to the original isFrPair filtering logic and document better

* chore: update test name

* fix: codecov try specifying the environment (#982)

---------

Co-authored-by: Nils Homer <[email protected]>
  • Loading branch information
clintval and nh13 authored Apr 25, 2024
1 parent 3a74fd2 commit a82cfe0
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 14 deletions.
1 change: 1 addition & 0 deletions .github/workflows/unittests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ on: [push, pull_request]
jobs:
test:
runs-on: ubuntu-latest
environment: github-actions
steps:
- name: Checkout
uses: actions/checkout@v2
Expand Down
23 changes: 14 additions & 9 deletions src/main/scala/com/fulcrumgenomics/bam/pileup/PileupBuilder.scala
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,15 @@ object PileupBuilder {
/** Allow records flagged as supplementary alignments to contribute to a pileup by default. */
val includeSupplementalAlignments: Boolean = false

/** Exclude any record of an FR pair where the site requested is outside the insert by default. */
/** For FR pairs only, determine if we should keep the pair of reads if the site requested is outside of the
* insert. By default this filter is set to <false> which means for FR pairs where R1 and R2 overlap each other
* and *extend* past the start coordinates of their mate, the pair will be filtered out if the position requested
* overlaps the end of either read in the span that is beyond the start coordinate of the read's mate.
*
* See the following GitHub issue comment for a visualization of when this filter applies:
*
* - https://github.com/fulcrumgenomics/fgbio/issues/980#issuecomment-2075049301
*/
val includeMapPositionsOutsideFrInsert: Boolean = false
}

Expand Down Expand Up @@ -116,14 +124,11 @@ object PileupBuilder {
)
}

/** Returns true if <rec> is in a mapped FR pair but the position <pos> is outside the insert coordinates of <rec>.
* Returns false if <rec> is in a mapped FR pair and the position <pos> is inside the insert coordinates of <rec> or
* <rec> is not in a mapped FR pair.
*/
private def positionIsOutsideFrInsert(rec: SamRecord, refIndex: Int, pos: Int): Boolean = {
rec.isFrPair && {
/** Returns true if <rec> is in a mapped FR pair and the position <pos> is inside the insert coordinates of <rec>. */
private def positionIsInsideFrInsert(rec: SamRecord, refIndex: Int, pos: Int): Boolean = {
refIndex == rec.refIndex && rec.isFrPair && {
val (start, end) = Bams.insertCoordinates(rec)
rec.refIndex == refIndex && pos >= start && pos <= end
pos >= start && pos <= end
}
}

Expand Down Expand Up @@ -201,7 +206,7 @@ trait PileupBuilder extends PileupParameters {
if (compare) compare = rec.end >= pos
if (compare) compare = rec.start <= pos || PileupBuilder.startsWithInsertion(rec)
if (compare) compare = if (!includeMapPositionsOutsideFrInsert && rec.isFrPair) {
PileupBuilder.positionIsOutsideFrInsert(rec, refIndex = refIndex, pos = pos)
PileupBuilder.positionIsInsideFrInsert(rec, refIndex = refIndex, pos = pos)
} else { compare }
compare
}
Expand Down
12 changes: 7 additions & 5 deletions src/main/scala/com/fulcrumgenomics/testing/SamBuilder.scala
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,19 @@

package com.fulcrumgenomics.testing

import java.nio.file.Files
import java.text.DecimalFormat
import java.util.concurrent.atomic.AtomicLong

import com.fulcrumgenomics.FgBioDef._
import com.fulcrumgenomics.alignment.Cigar
import com.fulcrumgenomics.bam.api.SamRecord.{MissingBases, MissingQuals}
import com.fulcrumgenomics.bam.api.{SamOrder, SamRecord, SamSource, SamWriter}
import com.fulcrumgenomics.fasta.{SequenceDictionary, SequenceMetadata}
import com.fulcrumgenomics.testing.SamBuilder._
import htsjdk.samtools.SamPairUtil.PairOrientation
import htsjdk.samtools._

import java.nio.file.Files
import java.text.DecimalFormat
import java.util
import java.util.concurrent.atomic.AtomicLong
import scala.collection.mutable.ArrayBuffer
import scala.util.Random

Expand Down Expand Up @@ -173,7 +174,8 @@ class SamBuilder(val readLength: Int=100,
r2("RG") = this.rg.getId
attrs.foreach { case (key, value) => r2(key) = value }

SamPairUtil.setMateInfo(r1.asSam, r2.asSam, true)
val orientations = util.Arrays.asList(PairOrientation.values(): _*)
SamPairUtil.setProperPairAndMateInfo(r1.asSam, r2.asSam, orientations, true)
val recs = Seq(r1, r2)
this.records ++= recs
recs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,26 @@ class PileupBuilderTest extends UnitSpec {
piler.safelyClose()
}

it should "not filter out single-end records when we are not filter for mapped pairs only and we are not removing records where the position is outside the insert of FR pairs" in {
val builder = new SamBuilder(readLength = ReadLength, sd = Some(TestSequenceDictionary), sort = Some(Coordinate))

builder.addFrag(name = "q1", contig = Chr1Index, start = 100)

val source = builder.toSource
val piler = PileupBuilder(
source,
accessPattern = accessPattern,
mappedPairsOnly = false,
includeMapPositionsOutsideFrInsert = true
)

piler.pileup(Chr1, 100).depth shouldBe 1
piler.pileup(Chr1, 100 + ReadLength - 1).depth shouldBe 1

source.safelyClose()
piler.safelyClose()
}

it should "filter out records where a position is outside the insert for an FR pair" in {
val builder = new SamBuilder(readLength = ReadLength, sd = Some(TestSequenceDictionary), sort = Some(Coordinate))

Expand Down

0 comments on commit a82cfe0

Please sign in to comment.