BioJulia · Marlin-Na · Feb 28, 2021 · Mar 30, 2021 · Mar 30, 2021 · Mar 30, 2021
diff --git a/.github/workflows/UnitTests.yml b/.github/workflows/UnitTests.yml
@@ -12,7 +12,7 @@ jobs:
       fail-fast: false
       matrix:
         julia-version:
-          - '1.0' # LTS
+          - '1.3' # The Indexes package uses the Artifacts framework, which is first available in Julia 1.3.
           - '1'
         julia-arch: [x64, x86]
         os: [ubuntu-latest, windows-latest, macOS-latest]

diff --git a/Project.toml b/Project.toml
@@ -1,23 +1,24 @@
 name = "Indexes"
 uuid = "4ffb77ac-cb80-11e8-1b35-4b78cc642f6d"
 authors = ["Kenta Sato <[email protected]>", "Ben J. Ward <[email protected]>", "Ciarán O’Mara <[email protected]>"]
-version = "0.1.3"
+version = "0.2.0"
 
 [deps]
-BGZFStreams = "28d598bf-9b8f-59f1-b38c-5a06b4a0f5e6"
 BioGenerics = "47718e42-2ac5-11e9-14af-e5595289c2ea"
+CodecBGZF = "d9d91ef6-315d-495b-8131-db2ca24339d6"
 GenomicFeatures = "899a7d2d-5c61-547b-bef9-6698a8d05446"
 TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
 
 [compat]
-BGZFStreams = "0.3"
 BioGenerics = "0.1"
+CodecBGZF = "0.1"
 GenomicFeatures = "2"
 TranscodingStreams = "0.9.5"
-julia = "1"
+julia = "1.3"
 
 [extras]
+FormatSpecimens = "3372ea36-2a1a-11e9-3eb7-996970b6ffbd"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Test"]
+test = ["FormatSpecimens", "Test"]
diff --git a/src/Indexes.jl b/src/Indexes.jl
@@ -10,22 +10,10 @@ module Indexes
 
 using TranscodingStreams
 
-import BGZFStreams
+import CodecBGZF
 import BioGenerics
 import GenomicFeatures: Interval
 
-function Base.bytesavailable(stream::BGZFStreams.BGZFStream{IOStream})
-
-    block_index = BGZFStreams.ensure_buffered_data(stream)
-    if block_index == 0
-        return 0
-    end
-    block = stream.blocks[block_index]
-
-    return length(block.position:block.size)
-
-end
-
 include("chunk.jl")
 include("bgzfindex.jl")
 include("tabix.jl")

diff --git a/src/bgzfindex.jl b/src/bgzfindex.jl
@@ -1,7 +1,7 @@
 # BGZF Index
 # ==========
 #
-# An index type for BGZFStream.
+# An index type for CodecBGZF.
 #
 # The details of the internal is specified in
 # https://samtools.github.io/hts-specs/SAMv1.pdf.
@@ -13,7 +13,7 @@
 const BinIndex = Dict{UInt32,Vector{Chunk}}
 
 # linear index
-const LinearIndex = Vector{BGZFStreams.VirtualOffset}
+const LinearIndex = Vector{CodecBGZF.VirtualOffset}
 
 # Metadata providing a summary of the number of mappend/unmapped reads.
 struct PseudoBin
@@ -27,7 +27,7 @@ struct PseudoBin
     n_unmapped::Int64
 end
 
-# Index for BGZFStream; used in BAI and Tabix index.
+# Index for CodecBGZF; used in BAI and Tabix index.
 struct BGZFIndex
     # indexes of contigs (chromosomes)
     data::Vector{Tuple{BinIndex,LinearIndex,Union{PseudoBin, Nothing}}}

diff --git a/src/chunk.jl b/src/chunk.jl
@@ -8,11 +8,11 @@
 
 # BGZF file chunk [.start, .stop).
 struct Chunk
-    start::BGZFStreams.VirtualOffset
-    stop::BGZFStreams.VirtualOffset
+    start::CodecBGZF.VirtualOffset
+    stop::CodecBGZF.VirtualOffset
 end
 
-function Base.in(voffset::BGZFStreams.VirtualOffset, chunk::Chunk)
+function Base.in(voffset::CodecBGZF.VirtualOffset, chunk::Chunk)
     return chunk.start ≤ voffset < chunk.stop
 end
 
@@ -33,6 +33,6 @@ function Base.isless(chunk1::Chunk, chunk2::Chunk)
     return false
 end
 
-function Base.seek(stream::BGZFStreams.BGZFStream, chunk::Chunk)
+function Base.seek(stream::CodecBGZF.BGZFDecompressorStream, chunk::Chunk)
     return seek(stream, chunk.start)
 end
diff --git a/src/overlap.jl b/src/overlap.jl
@@ -47,7 +47,7 @@ function done(iter::TabixOverlapIterator, state)
         The `virtualoffset(source)` is not synchronized with the current reading position because data are buffered in `buffer` for parsing text.
         So we need to check not only `virtualoffset` but also `nb_available`, which returns the current buffered data size.
         =#
-        while bytesavailable(buffer) > 0 || BGZFStreams.virtualoffset(source) < chunk.stop
+        while bytesavailable(buffer) > 0 || CodecBGZF.VirtualOffset(source) < chunk.stop
             read!(iter.reader, state.record)
             c = icmp(state.record, iter.interval)
             if c == 0  # overlapping

diff --git a/src/tabix.jl b/src/tabix.jl
@@ -108,7 +108,7 @@ end
 
 # Read a Tabix object from `input_`.
 function read_tabix(input_::IO)
-    input = BGZFStreams.BGZFStream(input_)
+    input = CodecBGZF.BGZFDecompressorStream(input_)
 
     # check magic bytes
     T = read(input, UInt8)

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,7 +1,75 @@
 using Test
 using Indexes
+using FormatSpecimens
+
 
 @testset "Indexes" begin
-    # TODO
-    # @test GenomicFeatures.Indexes.Tabix === GenomicFeatures.Indexes.Tabix
+
+@testset "Tabix" begin
+
+    # BAI
+    path = joinpath(path_of_format("BAM"), "GSE25840_GSM424320_GM06985_gencode_spliced.head.bam.bai")
+    path = joinpath(path_of_format("BAM"), "R_12h_D06.uniq.q40.bam.bai")
+    path = joinpath(path_of_format("BAM"), "cigar-64k.bam.bai")
+
+    # TBI
+    path = joinpath(path_of_format("BED"), "ws245Genes.WBGene.bed.bgz.tbi")
+    path = joinpath(path_of_format("GFF3"), "TAIR10.part.gff.bgz.tbi")
+
+    @info path
+
+    open(path) do io
+
+        input = Indexes.CodecBGZF.BGZFDecompressorStream(io)
+        @info input
+
+        seekstart(input)
+
+        # check magic bytes
+        T = read(input, UInt8)
+        @info T
+
+        B = read(input, UInt8)
+        @info B
+
+        I = read(input, UInt8)
+        @info I
+
+        x = read(input, UInt8)
+        @info x
+
+    end
+
+    index = Indexes.Tabix(path)
+
+end
+
+@testset "BGZF" begin
+
+# #
+# Indexes.reg2bin(-1, 0)
+#
+# # The BAI index format for BAM files
+#
+# goodfiles = filter(entry-> hastag(entry, "bai"), list_valid_specimens("BAM"))
+#
+# entry = goodfiles[1]
+#
+# # Get the full path of a file in the entry:
+# path_bam = joinpath(path_of_format("BAM"), filename(entry))
+# path_bai = path_bam * ".bai"
+#
+# stream = open(path_bai)
+#
+# # Read magic bytes
+# str = read(stream, 4)
+#
+# # read contents
+# n_refs = read(stream, Int32)
+#
+#
+# indexes = Indexes.read_bgzfindex(stream, n_refs)
+
+end
+
 end