Skip to content

Fix: CodecBGZF.virtualoffset -> CodecBGZF.VirtualOffset #7

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/UnitTests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
fail-fast: false
matrix:
julia-version:
- '1.0' # LTS
- '1.3' # The Indexes package uses the Artifacts framework, which is first available in Julia 1.3.
- '1'
julia-arch: [x64, x86]
os: [ubuntu-latest, windows-latest, macOS-latest]
Expand Down
11 changes: 6 additions & 5 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,23 +1,24 @@
name = "Indexes"
uuid = "4ffb77ac-cb80-11e8-1b35-4b78cc642f6d"
authors = ["Kenta Sato <[email protected]>", "Ben J. Ward <[email protected]>", "Ciarán O’Mara <[email protected]>"]
version = "0.1.3"
version = "0.2.0"

[deps]
BGZFStreams = "28d598bf-9b8f-59f1-b38c-5a06b4a0f5e6"
BioGenerics = "47718e42-2ac5-11e9-14af-e5595289c2ea"
CodecBGZF = "d9d91ef6-315d-495b-8131-db2ca24339d6"
GenomicFeatures = "899a7d2d-5c61-547b-bef9-6698a8d05446"
TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"

[compat]
BGZFStreams = "0.3"
BioGenerics = "0.1"
CodecBGZF = "0.1"
GenomicFeatures = "2"
TranscodingStreams = "0.9.5"
julia = "1"
julia = "1.3"

[extras]
FormatSpecimens = "3372ea36-2a1a-11e9-3eb7-996970b6ffbd"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test"]
test = ["FormatSpecimens", "Test"]
14 changes: 1 addition & 13 deletions src/Indexes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,10 @@ module Indexes

using TranscodingStreams

import BGZFStreams
import CodecBGZF
import BioGenerics
import GenomicFeatures: Interval

function Base.bytesavailable(stream::BGZFStreams.BGZFStream{IOStream})

block_index = BGZFStreams.ensure_buffered_data(stream)
if block_index == 0
return 0
end
block = stream.blocks[block_index]

return length(block.position:block.size)

end

include("chunk.jl")
include("bgzfindex.jl")
include("tabix.jl")
Expand Down
6 changes: 3 additions & 3 deletions src/bgzfindex.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# BGZF Index
# ==========
#
# An index type for BGZFStream.
# An index type for CodecBGZF.
#
# The details of the internal is specified in
# https://samtools.github.io/hts-specs/SAMv1.pdf.
Expand All @@ -13,7 +13,7 @@
const BinIndex = Dict{UInt32,Vector{Chunk}}

# linear index
const LinearIndex = Vector{BGZFStreams.VirtualOffset}
const LinearIndex = Vector{CodecBGZF.VirtualOffset}

# Metadata providing a summary of the number of mappend/unmapped reads.
struct PseudoBin
Expand All @@ -27,7 +27,7 @@ struct PseudoBin
n_unmapped::Int64
end

# Index for BGZFStream; used in BAI and Tabix index.
# Index for CodecBGZF; used in BAI and Tabix index.
struct BGZFIndex
# indexes of contigs (chromosomes)
data::Vector{Tuple{BinIndex,LinearIndex,Union{PseudoBin, Nothing}}}
Expand Down
8 changes: 4 additions & 4 deletions src/chunk.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@

# BGZF file chunk [.start, .stop).
struct Chunk
start::BGZFStreams.VirtualOffset
stop::BGZFStreams.VirtualOffset
start::CodecBGZF.VirtualOffset
stop::CodecBGZF.VirtualOffset
end

function Base.in(voffset::BGZFStreams.VirtualOffset, chunk::Chunk)
function Base.in(voffset::CodecBGZF.VirtualOffset, chunk::Chunk)
return chunk.start ≤ voffset < chunk.stop
end

Expand All @@ -33,6 +33,6 @@ function Base.isless(chunk1::Chunk, chunk2::Chunk)
return false
end

function Base.seek(stream::BGZFStreams.BGZFStream, chunk::Chunk)
function Base.seek(stream::CodecBGZF.BGZFDecompressorStream, chunk::Chunk)
return seek(stream, chunk.start)
end
2 changes: 1 addition & 1 deletion src/overlap.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ function done(iter::TabixOverlapIterator, state)
The `virtualoffset(source)` is not synchronized with the current reading position because data are buffered in `buffer` for parsing text.
So we need to check not only `virtualoffset` but also `nb_available`, which returns the current buffered data size.
=#
while bytesavailable(buffer) > 0 || BGZFStreams.virtualoffset(source) < chunk.stop
while bytesavailable(buffer) > 0 || CodecBGZF.VirtualOffset(source) < chunk.stop
read!(iter.reader, state.record)
c = icmp(state.record, iter.interval)
if c == 0 # overlapping
Expand Down
2 changes: 1 addition & 1 deletion src/tabix.jl
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ end

# Read a Tabix object from `input_`.
function read_tabix(input_::IO)
input = BGZFStreams.BGZFStream(input_)
input = CodecBGZF.BGZFDecompressorStream(input_)

# check magic bytes
T = read(input, UInt8)
Expand Down
72 changes: 70 additions & 2 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,75 @@
using Test
using Indexes
using FormatSpecimens


@testset "Indexes" begin
# TODO
# @test GenomicFeatures.Indexes.Tabix === GenomicFeatures.Indexes.Tabix

@testset "Tabix" begin

# BAI
path = joinpath(path_of_format("BAM"), "GSE25840_GSM424320_GM06985_gencode_spliced.head.bam.bai")
path = joinpath(path_of_format("BAM"), "R_12h_D06.uniq.q40.bam.bai")
path = joinpath(path_of_format("BAM"), "cigar-64k.bam.bai")

# TBI
path = joinpath(path_of_format("BED"), "ws245Genes.WBGene.bed.bgz.tbi")
path = joinpath(path_of_format("GFF3"), "TAIR10.part.gff.bgz.tbi")

@info path

open(path) do io

input = Indexes.CodecBGZF.BGZFDecompressorStream(io)
@info input

seekstart(input)

# check magic bytes
T = read(input, UInt8)
@info T

B = read(input, UInt8)
@info B

I = read(input, UInt8)
@info I

x = read(input, UInt8)
@info x

end

index = Indexes.Tabix(path)

end

@testset "BGZF" begin

# #
# Indexes.reg2bin(-1, 0)
#
# # The BAI index format for BAM files
#
# goodfiles = filter(entry-> hastag(entry, "bai"), list_valid_specimens("BAM"))
#
# entry = goodfiles[1]
#
# # Get the full path of a file in the entry:
# path_bam = joinpath(path_of_format("BAM"), filename(entry))
# path_bai = path_bam * ".bai"
#
# stream = open(path_bai)
#
# # Read magic bytes
# str = read(stream, 4)
#
# # read contents
# n_refs = read(stream, Int32)
#
#
# indexes = Indexes.read_bgzfindex(stream, n_refs)

end

end