Skip to content

Commit

Permalink
Add an entrypoint to SamSource for bytes of SAM/BAM/CRAM
Browse files Browse the repository at this point in the history
  • Loading branch information
clintval committed Apr 29, 2022
1 parent c2be4c6 commit a3a5933
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 7 deletions.
36 changes: 32 additions & 4 deletions src/main/scala/com/fulcrumgenomics/bam/api/SamSource.scala
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,12 @@

package com.fulcrumgenomics.bam.api

import java.io.Closeable

import com.fulcrumgenomics.FgBioDef._
import com.fulcrumgenomics.bam.api.QueryType.QueryType
import htsjdk.samtools._
import htsjdk.samtools.util.{Interval, Locatable}

import java.io.{Closeable, InputStream}
import scala.collection.compat._

/** Companion to the [[SamSource]] class that provides factory methods for sources. */
Expand Down Expand Up @@ -66,6 +65,29 @@ object SamSource {
index.foreach(i => input.index(i))
new SamSource(fac.open(input))
}

/** Constructs a [[SamSource]] to read from the provided input stream.
*
* @param stream the input stream of SAM/BAM/CRAM bytes
* @param ref an optional reference sequencing for decoding CRAM files
* @param async if true use extra thread(s) to speed up reading
* @param stringency the validation stringency to apply when reading the data
* @param factory a SAMRecordFactory; MUST return classes that mix in [[SamRecord]]
*/
def apply(
stream: InputStream,
ref: Option[PathToFasta],
async: Boolean,
stringency: ValidationStringency,
factory: SAMRecordFactory,
): SamSource = {
val fac = SamReaderFactory.make()
fac.samRecordFactory(factory)
fac.setUseAsyncIo(async)
fac.validationStringency(stringency)
ref.foreach(fasta => fac.referenceSequence(fasta.toFile))
new SamSource(fac.open(SamInputResource.of(stream)), closer = Some(() => stream.close()))
}
}

/** Describes the two types of queries that can be performed. */
Expand All @@ -78,7 +100,9 @@ object QueryType extends Enumeration {
* A source class for reading SAM/BAM/CRAM files and for querying them.
* @param reader the underlying [[SamReader]]
*/
class SamSource private(private val reader: SamReader) extends View[SamRecord] with HeaderHelper with Closeable {
class SamSource private(private val reader: SamReader, private val closer: Option[Closeable] = None)
extends View[SamRecord] with HeaderHelper with Closeable {

/** The [[htsjdk.samtools.SAMFileHeader]] associated with the source. */
override val header: SAMFileHeader = reader.getFileHeader

Expand Down Expand Up @@ -109,7 +133,11 @@ class SamSource private(private val reader: SamReader) extends View[SamRecord] w
/** Provides a string that shows where the source is reading from. */
override def toString: String = s"SamReader(${reader.getResourceDescription})"

override def close(): Unit = this.reader.close()
/** Close an optional wrapped closeable and release the SAM reader. */
override def close(): Unit = {
this.closer.foreach(_.close())
this.reader.close()
}

/**
* Returns the underlying SamReader. This should be avoided as much as possible, and the
Expand Down
31 changes: 28 additions & 3 deletions src/test/scala/com/fulcrumgenomics/bam/api/SamIoTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@

package com.fulcrumgenomics.bam.api

import java.nio.file.Files
import java.util.concurrent.{Callable, Executors, TimeUnit}

import com.fulcrumgenomics.FgBioDef._
import com.fulcrumgenomics.bam.api.SamSource.{DefaultUseAsyncIo, DefaultValidationStringency}
import com.fulcrumgenomics.fasta.{SequenceDictionary, SequenceMetadata}
import com.fulcrumgenomics.testing.{SamBuilder, UnitSpec}
import com.fulcrumgenomics.util.Io
import htsjdk.samtools.GenomicIndexUtil

import java.nio.file.Files
import java.util.concurrent.{Callable, Executors, TimeUnit}
import scala.util.Random

class SamIoTest extends UnitSpec {
Expand Down Expand Up @@ -164,4 +164,29 @@ class SamIoTest extends UnitSpec {
filterCount shouldBe 10
mapCount shouldBe 10
}

it should "allow reading from a stream of SAM bytes" in {
val builder = new SamBuilder()
builder.addPair(name = "q1", start1 = 100, start2 = 300)
builder.addPair(name = "q4", start1 = 200, start2 = 400)
builder.addPair(name = "q3", start1 = 300, start2 = 500)
builder.addPair(name = "q2", start1 = 400, start2 = 600)

val sam = makeTempFile(getClass.getSimpleName, ".sam")
val out = SamWriter(sam, builder.header, sort = Some(SamOrder.Coordinate))
builder.foreach(out.write)
out.close()

val source = SamSource(
stream = Io.toInputStream(sam),
ref = None,
async = DefaultUseAsyncIo,
stringency = DefaultValidationStringency,
factory = SamRecord.Factory
)

source.indexed shouldBe false
source.toSeq.map(_.start) should contain theSameElementsInOrderAs Seq(100, 200, 300, 300, 400, 400, 500, 600)
source.close()
}
}

0 comments on commit a3a5933

Please sign in to comment.