From 05bafc566a781b6ceaec416b7c7307ae2027fe43 Mon Sep 17 00:00:00 2001 From: jdidion Date: Fri, 29 Sep 2023 10:32:56 -0700 Subject: [PATCH 1/8] when evaluating intra-contig evidence, calculate segment distance differently depending on whether the contig is curcular --- .gitignore | 3 + .../fulcrumgenomics/sv/tools/SvPileup.scala | 82 ++++++++++++------- 2 files changed, 56 insertions(+), 29 deletions(-) diff --git a/.gitignore b/.gitignore index 50bbbee..2441103 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,6 @@ out jars project target +.bloop +.metals +.vscode diff --git a/src/main/scala/com/fulcrumgenomics/sv/tools/SvPileup.scala b/src/main/scala/com/fulcrumgenomics/sv/tools/SvPileup.scala index c036311..d2d6665 100644 --- a/src/main/scala/com/fulcrumgenomics/sv/tools/SvPileup.scala +++ b/src/main/scala/com/fulcrumgenomics/sv/tools/SvPileup.scala @@ -5,7 +5,7 @@ import com.fulcrumgenomics.bam.api.{SamSource, SamWriter} import com.fulcrumgenomics.bam.{Bams, Template} import com.fulcrumgenomics.commons.io.PathUtil import com.fulcrumgenomics.commons.util.LazyLogging -import com.fulcrumgenomics.fasta.SequenceDictionary +import com.fulcrumgenomics.fasta.{SequenceDictionary, Topology} import com.fulcrumgenomics.sopt.{arg, clp} import com.fulcrumgenomics.sv.EvidenceType._ import com.fulcrumgenomics.sv._ @@ -52,11 +52,11 @@ object TargetBedRequirement extends FgBioEnum[TargetBedRequirement] { | tag. | |The `be` SAM tag contains a comma-delimited list of breakpoints to which a given read belongs. Each element is - |a semi-colon delimited, with four fields: + |semi-colon delimited, with four fields: | |1. The unique breakpoint identifier (same identifier found in the tab-delimited output). - |2. Either "left" or "right, corresponding to if the read shows evidence of the genomic left or right side of the - | breakpoint as found in the breakpoint file (i.e. `left_pos` or `right_pos`). + |2. Either "left" or "right, corresponding to whether the read shows evidence of the genomic left or right side of + | the breakpoint as found in the breakpoint file (i.e. `left_pos` or `right_pos`). |3. Either "from" or "into", such that when traversing the breakpoint would read through "from" and then into | "into" in the sequencing order of the read pair. For a split-read alignment, the "from" contains the aligned | portion of the read that comes from earlier in the read in sequencing order. For an alignment of a read-pair @@ -148,7 +148,8 @@ class SvPileup maxWithinReadDistance = maxAlignedSegmentInnerDistance, maxReadPairInnerDistance = maxReadPairInnerDistance, minUniqueBasesToAdd = minUniqueBasesToAdd, - slop = slop + slop = slop, + dict = source.dict, ) val filteredEvidences = targets match { @@ -315,12 +316,14 @@ object SvPileup extends LazyLogging { * adding them. * @param slop the number of bases of slop to allow when determining which records to track for the * left or right side of an aligned segment when merging segments + * @param dict the sequence dictionary to use for determining if a contig is circular */ def findBreakpoints(template: Template, maxWithinReadDistance: Int, maxReadPairInnerDistance: Int, minUniqueBasesToAdd: Int, - slop: Int = 0 + slop: Int = 0, + dict: SequenceDictionary ): IndexedSeq[BreakpointEvidence] = { val segments = AlignedSegment.segmentsFrom(template, minUniqueBasesToAdd=minUniqueBasesToAdd, slop=slop) @@ -329,11 +332,11 @@ object SvPileup extends LazyLogging { NoBreakpoints case 2 => // Special case for 2 since most templates will generate two segments and we'd like it to be efficient - val bp = findBreakpoint(segments.head, segments.last, maxWithinReadDistance, maxReadPairInnerDistance) + val bp = findBreakpoint(segments.head, segments.last, maxWithinReadDistance, maxReadPairInnerDistance, dict) if (bp.isEmpty) NoBreakpoints else bp.toIndexedSeq case _ => segments.iterator.sliding(2).flatMap { case Seq(seg1, seg2) => - findBreakpoint(seg1, seg2, maxWithinReadDistance, maxReadPairInnerDistance) + findBreakpoint(seg1, seg2, maxWithinReadDistance, maxReadPairInnerDistance, dict) }.toIndexedSeq } } @@ -342,9 +345,10 @@ object SvPileup extends LazyLogging { private def findBreakpoint(seg1: AlignedSegment, seg2: AlignedSegment, maxWithinReadDistance: Int, - maxReadPairInnerDistance: Int): Option[BreakpointEvidence] = { + maxReadPairInnerDistance: Int, + dict: SequenceDictionary): Option[BreakpointEvidence] = { if (isInterContigBreakpoint(seg1, seg2) || - isIntraContigBreakpoint(seg1, seg2, maxWithinReadDistance, maxReadPairInnerDistance) + isIntraContigBreakpoint(seg1, seg2, maxWithinReadDistance, maxReadPairInnerDistance, dict) ) { val ev = if (seg1.origin.isInterRead(seg2.origin)) EvidenceType.ReadPair else EvidenceType.SplitRead Some(BreakpointEvidence(from=seg1, into=seg2, evidence=ev)) @@ -365,9 +369,10 @@ object SvPileup extends LazyLogging { r1.refIndex != r2.refIndex } - /** Determines if the two segments are provide evidence of a breakpoint joining two different regions from - * the same contig. Returns true if: - * - the two segments overlap (implying some kind of duplication) (note overlapping reads will get a merged seg) + /** Determines if the two segments provide evidence of a breakpoint joining two different regions from + * the same contig. If the contig is circular (i.e. labeled `TP:circular` in the `SQ` header), then + * reads that span the origin are considered contiguous. Returns true if: + * - the two segments overlap (implying some kind of duplication) (note: overlapping reads will get a merged seg) * - the strand of the two segments differ (implying an inversion or other rearrangement) * - the second segment is before the first segment on the genome * - the distance between the two segments is larger than the maximum allowed (likely a deletion) @@ -376,29 +381,48 @@ object SvPileup extends LazyLogging { * @param seg2 the second alignment segment * @param maxWithinReadDistance the maximum distance between segments if they are from the same read * @param maxBetweenReadDistance the maximum distance between segments if they are from different reads + * @param dict the sequence dictionary to use for determining if a contig is circular */ def isIntraContigBreakpoint(seg1: AlignedSegment, seg2: AlignedSegment, maxWithinReadDistance: Int, - maxBetweenReadDistance: Int): Boolean = { + maxBetweenReadDistance: Int, + dict: SequenceDictionary): Boolean = { require(seg1.range.refIndex == seg2.range.refIndex) - + // The way aligned segments are generated for a template, if we have all the reads in the expected orientation - // the segments should all come out on the same strand. Therefore any difference in strand is odd. In addition - // any segment that "moves backwards" down the genome is odd, as genome position and read position should increase - // together. - if (seg1.positiveStrand != seg2.positiveStrand) true - else if (seg1.positiveStrand && seg2.range.start < seg1.range.end) true - else if (!seg1.positiveStrand && seg1.range.start < seg2.range.start) true + // the segments should all come out on the same strand. Therefore any difference in strand is odd. + val positive_strand = seg1.positiveStrand + if (positive_strand != seg2.positiveStrand) { + return true + } + // Otherwise, any segment that "moves backwards" down the genome is odd, as genome position and read position + // should increase together (unless the contig is curcular). + val contig = dict(seg1.range.refIndex) + val is_circular = contig.topology.contains(Topology.Circular) + if (!is_circular && ( + (positive_strand && seg2.range.start < seg1.range.end) || + (!positive_strand && seg1.range.start < seg2.range.end) + )) { + return true + } + // If the contig is curcular and the segments span the origin, treat them as contiguous when + // calculating the distance between them. + val innerDistance = if (is_circular && positive_strand && seg2.range.end <= seg1.range.start) { + require(seg1.range.end <= contig.length) + (contig.length - seg1.range.end) + seg2.range.start + } + else if (is_circular && !positive_strand && seg1.range.end <= seg2.range.start) { + require(seg2.range.end <= contig.length) + (contig.length - seg2.range.end) + seg1.range.start + } + else if (seg1.range.start <= seg2.range.start) { + seg2.range.start - seg1.range.end + } else { - val maxDistance = if (seg1.origin.isInterRead(seg2.origin)) maxBetweenReadDistance else maxWithinReadDistance - - val innerDistance = { - if (seg1.range.start <= seg2.range.start) seg2.range.start - seg1.range.end - else seg1.range.start - seg2.range.end - } - - innerDistance > maxDistance + seg1.range.start - seg2.range.end } + val maxDistance = if (seg1.origin.isInterRead(seg2.origin)) maxBetweenReadDistance else maxWithinReadDistance + innerDistance > maxDistance } } From fbef4b8a443fb52a010c7530a7e0193af69d760c Mon Sep 17 00:00:00 2001 From: jdidion Date: Fri, 29 Sep 2023 10:34:52 -0700 Subject: [PATCH 2/8] fix formatting --- src/main/scala/com/fulcrumgenomics/sv/tools/SvPileup.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/com/fulcrumgenomics/sv/tools/SvPileup.scala b/src/main/scala/com/fulcrumgenomics/sv/tools/SvPileup.scala index d2d6665..7c76f05 100644 --- a/src/main/scala/com/fulcrumgenomics/sv/tools/SvPileup.scala +++ b/src/main/scala/com/fulcrumgenomics/sv/tools/SvPileup.scala @@ -316,7 +316,7 @@ object SvPileup extends LazyLogging { * adding them. * @param slop the number of bases of slop to allow when determining which records to track for the * left or right side of an aligned segment when merging segments - * @param dict the sequence dictionary to use for determining if a contig is circular + * @param dict the sequence dictionary to use for determining if a contig is circular */ def findBreakpoints(template: Template, maxWithinReadDistance: Int, From 4fee506a20c3d56694b7078a81a64f58476c9d01 Mon Sep 17 00:00:00 2001 From: jdidion Date: Fri, 29 Sep 2023 10:46:07 -0700 Subject: [PATCH 3/8] fix tests --- .../sv/tools/SvPileupTest.scala | 61 ++++++++++--------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/src/test/scala/com/fulcrumgenomics/sv/tools/SvPileupTest.scala b/src/test/scala/com/fulcrumgenomics/sv/tools/SvPileupTest.scala index 3998a97..e711794 100644 --- a/src/test/scala/com/fulcrumgenomics/sv/tools/SvPileupTest.scala +++ b/src/test/scala/com/fulcrumgenomics/sv/tools/SvPileupTest.scala @@ -47,35 +47,35 @@ class SvPileupTest extends UnitSpec { // any overlapping segments or jumping backwards between segments is indicative of a breakpoint // What you might get from a read pair with a gap between the two reads - SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=later.copy(origin=ReadTwo), maxWithinReadDistance=5, maxBetweenReadDistance=150) shouldBe false + SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=later.copy(origin=ReadTwo), maxWithinReadDistance=5, maxBetweenReadDistance=150, dict=builder.dict) shouldBe false // same segment but jumping backwards - SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=earlier, maxWithinReadDistance=5, maxBetweenReadDistance=150) shouldBe true + SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=earlier, maxWithinReadDistance=5, maxBetweenReadDistance=150, dict=builder.dict) shouldBe true // overlapping segments but still jumping backwards - SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=overlap, maxWithinReadDistance=5, maxBetweenReadDistance=150) shouldBe true + SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=overlap, maxWithinReadDistance=5, maxBetweenReadDistance=150, dict=builder.dict) shouldBe true // non-overlapping segments from the same read, testing various values for maxWithinReadDistance // Note that the inner distance between two blocks is defined as `later.start - earlier.end`, so for // this case that is 150-100 = 50, so a breakpoint should be called when the maxWithinReadDistance < 50. - SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=later, maxWithinReadDistance= 5, maxBetweenReadDistance=150) shouldBe true - SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=later, maxWithinReadDistance=25, maxBetweenReadDistance=150) shouldBe true - SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=later, maxWithinReadDistance=48, maxBetweenReadDistance=150) shouldBe true - SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=later, maxWithinReadDistance=49, maxBetweenReadDistance=150) shouldBe true - SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=later, maxWithinReadDistance=50, maxBetweenReadDistance=150) shouldBe false - SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=later, maxWithinReadDistance=51, maxBetweenReadDistance=150) shouldBe false + SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=later, maxWithinReadDistance= 5, maxBetweenReadDistance=150, dict=builder.dict) shouldBe true + SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=later, maxWithinReadDistance=25, maxBetweenReadDistance=150, dict=builder.dict) shouldBe true + SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=later, maxWithinReadDistance=48, maxBetweenReadDistance=150, dict=builder.dict) shouldBe true + SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=later, maxWithinReadDistance=49, maxBetweenReadDistance=150, dict=builder.dict) shouldBe true + SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=later, maxWithinReadDistance=50, maxBetweenReadDistance=150, dict=builder.dict) shouldBe false + SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=later, maxWithinReadDistance=51, maxBetweenReadDistance=150, dict=builder.dict) shouldBe false // non-overlapping segments where the later segment is "both" so indicates a split read breakpoint - SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=later.copy(origin=Both), maxWithinReadDistance= 5, maxBetweenReadDistance=150) shouldBe true - SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=later.copy(origin=Both), maxWithinReadDistance=25, maxBetweenReadDistance=150) shouldBe true - SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=later.copy(origin=Both), maxWithinReadDistance=49, maxBetweenReadDistance=150) shouldBe true - SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=later.copy(origin=Both), maxWithinReadDistance=75, maxBetweenReadDistance=150) shouldBe false + SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=later.copy(origin=Both), maxWithinReadDistance= 5, maxBetweenReadDistance=150, dict=builder.dict) shouldBe true + SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=later.copy(origin=Both), maxWithinReadDistance=25, maxBetweenReadDistance=150, dict=builder.dict) shouldBe true + SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=later.copy(origin=Both), maxWithinReadDistance=49, maxBetweenReadDistance=150, dict=builder.dict) shouldBe true + SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=later.copy(origin=Both), maxWithinReadDistance=75, maxBetweenReadDistance=150, dict=builder.dict) shouldBe false // non-overlapping segments where the later segment is "Read2" so between read distance should be used - SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=later.copy(origin=ReadTwo), maxWithinReadDistance=5, maxBetweenReadDistance=5 ) shouldBe true - SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=later.copy(origin=ReadTwo), maxWithinReadDistance=5, maxBetweenReadDistance=25) shouldBe true - SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=later.copy(origin=ReadTwo), maxWithinReadDistance=5, maxBetweenReadDistance=49) shouldBe true - SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=later.copy(origin=ReadTwo), maxWithinReadDistance=5, maxBetweenReadDistance=75) shouldBe false + SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=later.copy(origin=ReadTwo), maxWithinReadDistance=5, maxBetweenReadDistance=5, dict=builder.dict) shouldBe true + SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=later.copy(origin=ReadTwo), maxWithinReadDistance=5, maxBetweenReadDistance=25, dict=builder.dict) shouldBe true + SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=later.copy(origin=ReadTwo), maxWithinReadDistance=5, maxBetweenReadDistance=49, dict=builder.dict) shouldBe true + SvPileup.isIntraContigBreakpoint(seg1=earlier, seg2=later.copy(origin=ReadTwo), maxWithinReadDistance=5, maxBetweenReadDistance=75, dict=builder.dict) shouldBe false } "SvPileup.isIntraContigBreakpoint" should "identify when two segments flip strand" in { @@ -88,22 +88,22 @@ class SvPileupTest extends UnitSpec { val r2 = f2.copy(positiveStrand=false) // Simple tests that should not call breakpoints - SvPileup.isIntraContigBreakpoint(f1, f2, maxWithinReadDistance=500, maxBetweenReadDistance=500) shouldBe false - SvPileup.isIntraContigBreakpoint(r2, r1, maxWithinReadDistance=500, maxBetweenReadDistance=500) shouldBe false + SvPileup.isIntraContigBreakpoint(f1, f2, maxWithinReadDistance=500, maxBetweenReadDistance=500, dict=builder.dict) shouldBe false + SvPileup.isIntraContigBreakpoint(r2, r1, maxWithinReadDistance=500, maxBetweenReadDistance=500, dict=builder.dict) shouldBe false // Now what if we make them different reads - SvPileup.isIntraContigBreakpoint(f1, f2.copy(origin=ReadTwo), maxWithinReadDistance=500, maxBetweenReadDistance=500) shouldBe false - SvPileup.isIntraContigBreakpoint(r2, r1.copy(origin=ReadTwo), maxWithinReadDistance=500, maxBetweenReadDistance=500) shouldBe false + SvPileup.isIntraContigBreakpoint(f1, f2.copy(origin=ReadTwo), maxWithinReadDistance=500, maxBetweenReadDistance=500, dict=builder.dict) shouldBe false + SvPileup.isIntraContigBreakpoint(r2, r1.copy(origin=ReadTwo), maxWithinReadDistance=500, maxBetweenReadDistance=500, dict=builder.dict) shouldBe false // But any combination on different strands should yield a breakpoint - SvPileup.isIntraContigBreakpoint(f1, r1, maxWithinReadDistance=500, maxBetweenReadDistance=500) shouldBe true - SvPileup.isIntraContigBreakpoint(f1, r2, maxWithinReadDistance=500, maxBetweenReadDistance=500) shouldBe true - SvPileup.isIntraContigBreakpoint(f2, r1, maxWithinReadDistance=500, maxBetweenReadDistance=500) shouldBe true - SvPileup.isIntraContigBreakpoint(f2, r2, maxWithinReadDistance=500, maxBetweenReadDistance=500) shouldBe true - SvPileup.isIntraContigBreakpoint(r1, f1, maxWithinReadDistance=500, maxBetweenReadDistance=500) shouldBe true - SvPileup.isIntraContigBreakpoint(r1, f2, maxWithinReadDistance=500, maxBetweenReadDistance=500) shouldBe true - SvPileup.isIntraContigBreakpoint(r2, f1, maxWithinReadDistance=500, maxBetweenReadDistance=500) shouldBe true - SvPileup.isIntraContigBreakpoint(r2, f2, maxWithinReadDistance=500, maxBetweenReadDistance=500) shouldBe true + SvPileup.isIntraContigBreakpoint(f1, r1, maxWithinReadDistance=500, maxBetweenReadDistance=500, dict=builder.dict) shouldBe true + SvPileup.isIntraContigBreakpoint(f1, r2, maxWithinReadDistance=500, maxBetweenReadDistance=500, dict=builder.dict) shouldBe true + SvPileup.isIntraContigBreakpoint(f2, r1, maxWithinReadDistance=500, maxBetweenReadDistance=500, dict=builder.dict) shouldBe true + SvPileup.isIntraContigBreakpoint(f2, r2, maxWithinReadDistance=500, maxBetweenReadDistance=500, dict=builder.dict) shouldBe true + SvPileup.isIntraContigBreakpoint(r1, f1, maxWithinReadDistance=500, maxBetweenReadDistance=500, dict=builder.dict) shouldBe true + SvPileup.isIntraContigBreakpoint(r1, f2, maxWithinReadDistance=500, maxBetweenReadDistance=500, dict=builder.dict) shouldBe true + SvPileup.isIntraContigBreakpoint(r2, f1, maxWithinReadDistance=500, maxBetweenReadDistance=500, dict=builder.dict) shouldBe true + SvPileup.isIntraContigBreakpoint(r2, f2, maxWithinReadDistance=500, maxBetweenReadDistance=500, dict=builder.dict) shouldBe true } ////////////////////////////////////////////////////////////////////////////// @@ -136,7 +136,8 @@ class SvPileupTest extends UnitSpec { template = t, maxWithinReadDistance = 5, maxReadPairInnerDistance = 1000, - minUniqueBasesToAdd = 10 + minUniqueBasesToAdd = 10, + dict = builder.dict ) /** Short hand for constructing a BreakpointEvidence. */ From b755730192abcbb232842b65d98a8fbc57c37e01 Mon Sep 17 00:00:00 2001 From: Nobody Date: Fri, 29 Sep 2023 17:49:26 +0000 Subject: [PATCH 4/8] Generate docs files --- docs/tools/SvPileup.md | 6 +++--- docs/tools/index.md | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/tools/SvPileup.md b/docs/tools/SvPileup.md index fff058c..b3d6f24 100644 --- a/docs/tools/SvPileup.md +++ b/docs/tools/SvPileup.md @@ -20,11 +20,11 @@ Two output files will be created: tag. The `be` SAM tag contains a comma-delimited list of breakpoints to which a given read belongs. Each element is -a semi-colon delimited, with four fields: +semi-colon delimited, with four fields: 1. The unique breakpoint identifier (same identifier found in the tab-delimited output). -2. Either "left" or "right, corresponding to if the read shows evidence of the genomic left or right side of the - breakpoint as found in the breakpoint file (i.e. `left_pos` or `right_pos`). +2. Either "left" or "right, corresponding to whether the read shows evidence of the genomic left or right side of + the breakpoint as found in the breakpoint file (i.e. `left_pos` or `right_pos`). 3. Either "from" or "into", such that when traversing the breakpoint would read through "from" and then into "into" in the sequencing order of the read pair. For a split-read alignment, the "from" contains the aligned portion of the read that comes from earlier in the read in sequencing order. For an alignment of a read-pair diff --git a/docs/tools/index.md b/docs/tools/index.md index be706a9..809f02d 100644 --- a/docs/tools/index.md +++ b/docs/tools/index.md @@ -4,7 +4,7 @@ title: fgsv tools # fgsv tools -The following tools are available in fgsv version 0.0.2-9cbf8a5. +The following tools are available in fgsv version 0.0.3-4fee506. ## All tools All tools. From 9f28c1d9d88bc84b49e472bfc9a5d167d370cb36 Mon Sep 17 00:00:00 2001 From: Nobody Date: Fri, 29 Sep 2023 22:18:12 +0000 Subject: [PATCH 5/8] Generate docs files --- docs/tools/index.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/tools/index.md b/docs/tools/index.md index 2c08704..dc1c6d4 100644 --- a/docs/tools/index.md +++ b/docs/tools/index.md @@ -4,8 +4,7 @@ title: fgsv tools # fgsv tools -The following tools are available in fgsv version 0.0.3-4fee506. - +The following tools are available in fgsv version 0.0.3-5aea888. ## All tools All tools. From 3f469cb2a86706cd6098303144a9c78ac5d736f5 Mon Sep 17 00:00:00 2001 From: jdidion Date: Fri, 29 Sep 2023 15:13:13 -0700 Subject: [PATCH 6/8] add tests --- .../sv/tools/SvPileupTest.scala | 26 ++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/src/test/scala/com/fulcrumgenomics/sv/tools/SvPileupTest.scala b/src/test/scala/com/fulcrumgenomics/sv/tools/SvPileupTest.scala index e711794..47bd574 100644 --- a/src/test/scala/com/fulcrumgenomics/sv/tools/SvPileupTest.scala +++ b/src/test/scala/com/fulcrumgenomics/sv/tools/SvPileupTest.scala @@ -5,6 +5,7 @@ import com.fulcrumgenomics.alignment.Cigar import com.fulcrumgenomics.bam.Template import com.fulcrumgenomics.bam.api.{SamRecord, SamWriter} import com.fulcrumgenomics.commons.io.PathUtil +import com.fulcrumgenomics.fasta.{SequenceDictionary, SequenceMetadata, Topology} import com.fulcrumgenomics.sv.EvidenceType.{ReadPair, SplitRead} import com.fulcrumgenomics.sv.SegmentOrigin.{Both, ReadOne, ReadTwo} import com.fulcrumgenomics.sv._ @@ -109,7 +110,12 @@ class SvPileupTest extends UnitSpec { ////////////////////////////////////////////////////////////////////////////// // Objects and functions used in testing findBreakpoint() ////////////////////////////////////////////////////////////////////////////// - private val builder = new SamBuilder(readLength=100) + private val builder = { + val seqs = (Range.inclusive(1, 22) ++ Seq("X", "Y")).map { chr => + SequenceMetadata(name="chr" + chr, length=200e6.toInt) + } ++ Seq(SequenceMetadata(name="chrM", length=16000, topology = Some(Topology.Circular))) + new SamBuilder(readLength=100, sd=Some(SequenceDictionary(seqs:_*))) + } import SamBuilder.{Minus, Plus, Strand} /** Construct a read/rec with the information necessary for breakpoint detection. */ @@ -294,6 +300,24 @@ class SvPileupTest extends UnitSpec { ) } + it should "not call a breakpoint from a read pair on opposite sides of a circular contig origin" in { + val template = t( + r("chrM", 15800, Plus, r=1, cigar="100M", supp=false), + r("chrM", 100, Minus, r=2, cigar="100M", supp=false), + ) + call(template) should contain theSameElementsInOrderAs IndexedSeq.empty + } + + it should "not call a breakpoint from a split read that spans a circular contig origin" in { + val template = t( + r("chrM", 15951, Plus, r=1, cigar="50M50S", supp=false), + r("chrM", 1, Plus, r=1, cigar="50S50M", supp=true), + r("chrM", 300, Minus, r=2, cigar="100M", supp=false), + ) + call(template) should contain theSameElementsInOrderAs IndexedSeq.empty + } + + it should "call a breakpoint from a single-end split read with no mate" in { val r1Half1 = r("chr1", 100, Plus, r=0, cigar="50M50S", supp=false) val r1Half2 = r("chr7", 800, Plus, r=0, cigar="50S50M", supp=true) From 532c9ad72eecbfa85ed8a8c02f6ab31331fc82b8 Mon Sep 17 00:00:00 2001 From: Nobody Date: Fri, 29 Sep 2023 23:39:13 +0000 Subject: [PATCH 7/8] Generate docs files --- docs/tools/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tools/index.md b/docs/tools/index.md index dc1c6d4..6d863c3 100644 --- a/docs/tools/index.md +++ b/docs/tools/index.md @@ -4,7 +4,7 @@ title: fgsv tools # fgsv tools -The following tools are available in fgsv version 0.0.3-5aea888. +The following tools are available in fgsv version 0.0.3-3f469cb. ## All tools All tools. From c0e2a8a8fb13851665d1300370e8cb5a70b24eca Mon Sep 17 00:00:00 2001 From: jdidion Date: Fri, 6 Oct 2023 10:39:58 -0700 Subject: [PATCH 8/8] cleanup based on PR suggestions --- .../fulcrumgenomics/sv/tools/SvPileup.scala | 60 +++++++++---------- 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/src/main/scala/com/fulcrumgenomics/sv/tools/SvPileup.scala b/src/main/scala/com/fulcrumgenomics/sv/tools/SvPileup.scala index 7c76f05..51cb43c 100644 --- a/src/main/scala/com/fulcrumgenomics/sv/tools/SvPileup.scala +++ b/src/main/scala/com/fulcrumgenomics/sv/tools/SvPileup.scala @@ -392,37 +392,35 @@ object SvPileup extends LazyLogging { // The way aligned segments are generated for a template, if we have all the reads in the expected orientation // the segments should all come out on the same strand. Therefore any difference in strand is odd. - val positive_strand = seg1.positiveStrand - if (positive_strand != seg2.positiveStrand) { - return true - } - // Otherwise, any segment that "moves backwards" down the genome is odd, as genome position and read position - // should increase together (unless the contig is curcular). - val contig = dict(seg1.range.refIndex) - val is_circular = contig.topology.contains(Topology.Circular) - if (!is_circular && ( - (positive_strand && seg2.range.start < seg1.range.end) || - (!positive_strand && seg1.range.start < seg2.range.end) - )) { - return true - } - // If the contig is curcular and the segments span the origin, treat them as contiguous when - // calculating the distance between them. - val innerDistance = if (is_circular && positive_strand && seg2.range.end <= seg1.range.start) { - require(seg1.range.end <= contig.length) - (contig.length - seg1.range.end) + seg2.range.start - } - else if (is_circular && !positive_strand && seg1.range.end <= seg2.range.start) { - require(seg2.range.end <= contig.length) - (contig.length - seg2.range.end) + seg1.range.start - } - else if (seg1.range.start <= seg2.range.start) { - seg2.range.start - seg1.range.end - } - else { - seg1.range.start - seg2.range.end + val positiveStrand = seg1.positiveStrand + positiveStrand != seg2.positiveStrand || { + // Otherwise, any segment that "moves backwards" down the genome is odd, as genome position and read position + // should increase together (unless the contig is circular). + val contig = dict(seg1.range.refIndex) + val isCircular = contig.topology.contains(Topology.Circular) + (!isCircular && ( + (positiveStrand && seg2.range.start < seg1.range.end) || + (!positiveStrand && seg1.range.start < seg2.range.end) + )) || { + // If the contig is circular and the segments span the origin, treat them as contiguous when + // calculating the distance between them. + val innerDistance = if (isCircular && positiveStrand && seg2.range.end <= seg1.range.start) { + require(seg1.range.end <= contig.length) + (contig.length - seg1.range.end) + seg2.range.start + } + else if (isCircular && !positiveStrand && seg1.range.end <= seg2.range.start) { + require(seg2.range.end <= contig.length) + (contig.length - seg2.range.end) + seg1.range.start + } + else if (seg1.range.start <= seg2.range.start) { + seg2.range.start - seg1.range.end + } + else { + seg1.range.start - seg2.range.end + } + val maxDistance = if (seg1.origin.isInterRead(seg2.origin)) maxBetweenReadDistance else maxWithinReadDistance + innerDistance > maxDistance + } } - val maxDistance = if (seg1.origin.isInterRead(seg2.origin)) maxBetweenReadDistance else maxWithinReadDistance - innerDistance > maxDistance } }