diff --git a/polbin/Cargo.lock b/polbin/Cargo.lock index 5417194e..b4b3a24f 100644 --- a/polbin/Cargo.lock +++ b/polbin/Cargo.lock @@ -204,6 +204,7 @@ version = "0.1.0" dependencies = [ "bstr 1.9.1", "gfa", + "memmap", "num_enum", "zerocopy", ] diff --git a/polbin/Cargo.toml b/polbin/Cargo.toml index dec5ff12..60aae942 100644 --- a/polbin/Cargo.toml +++ b/polbin/Cargo.toml @@ -6,5 +6,6 @@ edition = "2021" [dependencies] bstr = "1.9.1" gfa = "0.10.1" +memmap = "0.7.0" num_enum = "0.7.2" zerocopy = { version = "0.7.32", features = ["derive"] } diff --git a/polbin/src/file.rs b/polbin/src/file.rs index f228b004..761b402d 100644 --- a/polbin/src/file.rs +++ b/polbin/src/file.rs @@ -1,5 +1,4 @@ use crate::flatgfa; -use bstr::BStr; use zerocopy::{FromBytes, FromZeroes}; const MAGIC_NUMBER: usize = 0x1337_4915; @@ -14,36 +13,48 @@ struct TOC { steps_count: usize, seq_data_len: usize, overlaps_count: usize, - alignment_len: usize, + alignment_count: usize, name_data_len: usize, optional_data_len: usize, line_order_len: usize, } -pub fn load(data: &[u8]) -> flatgfa::FlatGFA { +/// Get the first `len` bytes in a byte slice, and return the rest of the slice. +fn get_prefix(data: &[u8], len: usize) -> (&[u8], &[u8]) { + assert!(data.len() >= len); + (&data[0..len], &data[len..]) +} + +pub fn view(data: &[u8]) -> flatgfa::FlatGFA { // Table of contents. let toc = TOC::ref_from_prefix(data).unwrap(); let rest = &data[std::mem::size_of::()..]; assert_eq!(toc.magic, MAGIC_NUMBER); - // Header (version). - let header = BStr::new(&rest[0..toc.header_len]); - let rest = &rest[toc.header_len..]; - - // Segments. + // Get slices for each chunk. + let (header, rest) = get_prefix(rest, toc.header_len); let (segs, rest) = flatgfa::Segment::slice_from_prefix(rest, toc.segs_count).unwrap(); + let (paths, rest) = flatgfa::Path::slice_from_prefix(rest, toc.paths_count).unwrap(); + let (links, rest) = flatgfa::Link::slice_from_prefix(rest, toc.links_count).unwrap(); + let (steps, rest) = flatgfa::Handle::slice_from_prefix(rest, toc.steps_count).unwrap(); + let (seq_data, rest) = get_prefix(rest, toc.seq_data_len); + let (overlaps, rest) = flatgfa::Span::slice_from_prefix(rest, toc.overlaps_count).unwrap(); + let (alignment, rest) = flatgfa::AlignOp::slice_from_prefix(rest, toc.alignment_count).unwrap(); + let (name_data, rest) = get_prefix(rest, toc.name_data_len); + let (optional_data, rest) = get_prefix(rest, toc.optional_data_len); + let (line_order, _) = get_prefix(rest, toc.line_order_len); flatgfa::FlatGFA { - header, + header: header.into(), segs, - paths: todo!(), - links: todo!(), - steps: todo!(), - seq_data: todo!(), - overlaps: todo!(), - alignment: todo!(), - name_data: todo!(), - optional_data: todo!(), - line_order: todo!(), + paths, + links, + steps, + seq_data, + overlaps, + alignment, + name_data: name_data.into(), + optional_data: optional_data.into(), + line_order, } } diff --git a/polbin/src/main.rs b/polbin/src/main.rs index 9cb29307..c4428348 100644 --- a/polbin/src/main.rs +++ b/polbin/src/main.rs @@ -2,10 +2,23 @@ mod file; mod flatgfa; mod parse; mod print; +use memmap::Mmap; + +fn map_file(name: &str) -> Mmap { + let file = std::fs::File::open(name).unwrap(); + unsafe { Mmap::map(&file) }.unwrap() +} fn main() { - let stdin = std::io::stdin(); - let store = parse::Parser::parse(stdin.lock()); - let gfa = store.view(); - print::print(&gfa); + // Read either GFA text from stdin or a binary file from the first argument. + if let Some(name) = std::env::args().nth(1) { + let mmap = map_file(&name); + let gfa = file::view(&mmap); + print::print(&gfa); + } else { + let stdin = std::io::stdin(); + let store = parse::Parser::parse(stdin.lock()); + let gfa = store.view(); + print::print(&gfa); + } }