From 1f7aecfcdabf93e0a5946f2fd941a2dbc5f8e3eb Mon Sep 17 00:00:00 2001
From: susan-garry <sgarry406@gmail.com>
Date: Wed, 25 Sep 2024 01:04:14 -0400
Subject: [PATCH 1/8] move flatgfa data structure definition and internal
 operations to its own module

---
 .gitignore             |   3 +
 flatgfa-py/Cargo.lock  |  11 +-
 flatgfa/Cargo.lock     |  11 +-
 flatgfa/Cargo.toml     |  11 +-
 flatgfa/src/cmds.rs    |   6 +-
 flatgfa/src/file.rs    | 336 --------------------------------
 flatgfa/src/flatgfa.rs | 430 -----------------------------------------
 flatgfa/src/gfaline.rs | 272 --------------------------
 flatgfa/src/lib.rs     |   9 -
 flatgfa/src/main.rs    |  12 +-
 flatgfa/src/parse.rs   | 283 ---------------------------
 flatgfa/src/pool.rs    | 299 ----------------------------
 flatgfa/src/print.rs   | 153 ---------------
 13 files changed, 35 insertions(+), 1801 deletions(-)
 delete mode 100644 flatgfa/src/file.rs
 delete mode 100644 flatgfa/src/flatgfa.rs
 delete mode 100644 flatgfa/src/gfaline.rs
 delete mode 100644 flatgfa/src/lib.rs
 delete mode 100644 flatgfa/src/parse.rs
 delete mode 100644 flatgfa/src/pool.rs
 delete mode 100644 flatgfa/src/print.rs

diff --git a/.gitignore b/.gitignore
index 2c542cdd..f4e54928 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,4 +22,7 @@ pollen/target
 polbin/target
 pollen/*.rlib
 
+flatgfa/target
+flatgfa/**/target
+
 slow_odgi/dist/
diff --git a/flatgfa-py/Cargo.lock b/flatgfa-py/Cargo.lock
index 3b85ab62..dcb76f0d 100644
--- a/flatgfa-py/Cargo.lock
+++ b/flatgfa-py/Cargo.lock
@@ -84,10 +84,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
 
 [[package]]
-name = "flatgfa"
+name = "fgfa_ds"
 version = "0.1.0"
 dependencies = [
- "argh",
  "atoi",
  "bstr",
  "memchr",
@@ -97,6 +96,14 @@ dependencies = [
  "zerocopy",
 ]
 
+[[package]]
+name = "flatgfa"
+version = "0.1.0"
+dependencies = [
+ "argh",
+ "fgfa_ds",
+]
+
 [[package]]
 name = "flatgfa-py"
 version = "0.1.0"
diff --git a/flatgfa/Cargo.lock b/flatgfa/Cargo.lock
index d09b04b8..bf18eb98 100644
--- a/flatgfa/Cargo.lock
+++ b/flatgfa/Cargo.lock
@@ -72,10 +72,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
 
 [[package]]
-name = "flatgfa"
+name = "fgfa_ds"
 version = "0.1.0"
 dependencies = [
- "argh",
  "atoi",
  "bstr",
  "memchr",
@@ -85,6 +84,14 @@ dependencies = [
  "zerocopy",
 ]
 
+[[package]]
+name = "flatgfa"
+version = "0.1.0"
+dependencies = [
+ "argh",
+ "fgfa_ds",
+]
+
 [[package]]
 name = "hashbrown"
 version = "0.14.3"
diff --git a/flatgfa/Cargo.toml b/flatgfa/Cargo.toml
index 2426a543..6178f607 100644
--- a/flatgfa/Cargo.toml
+++ b/flatgfa/Cargo.toml
@@ -1,3 +1,6 @@
+[workspace]
+members = ["fgfa_ds"]
+
 [package]
 name = "flatgfa"
 version = "0.1.0"
@@ -9,13 +12,7 @@ path = "src/main.rs"
 
 [dependencies]
 argh = "0.1.12"
-atoi = "2.0.0"
-bstr = "1.9.1"
-memchr = "2.7.1"
-memmap = "0.7.0"
-num_enum = "0.7.2"
-tinyvec = "1.6.0"
-zerocopy = { version = "0.7.32", features = ["derive"] }
+fgfa_ds = { path = "fgfa_ds" }
 
 [profile.profiling]
 inherits = "release"
diff --git a/flatgfa/src/cmds.rs b/flatgfa/src/cmds.rs
index 48437e4c..3e56d721 100644
--- a/flatgfa/src/cmds.rs
+++ b/flatgfa/src/cmds.rs
@@ -1,6 +1,6 @@
-use crate::flatgfa::{self, Handle, Link, Orientation, Path, Segment};
-use crate::pool::{self, Id, Span, Store};
-use crate::{GFAStore, HeapFamily};
+use fgfa_ds::flatgfa::{self, Handle, Link, Orientation, Path, Segment};
+use fgfa_ds::pool::{self, Id, Span, Store};
+use fgfa_ds::{GFAStore, HeapFamily};
 use argh::FromArgs;
 use std::collections::{HashMap, HashSet};
 
diff --git a/flatgfa/src/file.rs b/flatgfa/src/file.rs
deleted file mode 100644
index 77bf7bf0..00000000
--- a/flatgfa/src/file.rs
+++ /dev/null
@@ -1,336 +0,0 @@
-use crate::flatgfa;
-use crate::pool::{FixedStore, Pool, Span, Store};
-use memmap::{Mmap, MmapMut};
-use std::mem::{size_of, size_of_val};
-use tinyvec::SliceVec;
-use zerocopy::{AsBytes, FromBytes, FromZeroes};
-
-const MAGIC_NUMBER: u64 = 0xB101_1054;
-
-/// A table of contents for the FlatGFA file.
-#[derive(FromBytes, FromZeroes, AsBytes, Debug)]
-#[repr(packed)]
-pub struct Toc {
-    magic: u64,
-    header: Size,
-    segs: Size,
-    paths: Size,
-    links: Size,
-    steps: Size,
-    seq_data: Size,
-    overlaps: Size,
-    alignment: Size,
-    name_data: Size,
-    optional_data: Size,
-    line_order: Size,
-}
-
-/// A table-of-contents entry for a pool in the FlatGFA file.
-#[derive(FromBytes, FromZeroes, AsBytes, Clone, Copy, Debug)]
-#[repr(packed)]
-struct Size {
-    /// The number of actual elements in the pool.
-    len: usize,
-
-    // The allocated space for the pool. `capacity - len` slots are "empty."
-    capacity: usize,
-}
-
-impl Size {
-    fn of_pool<T>(pool: Pool<T>) -> Self {
-        Size {
-            len: pool.len(),
-            capacity: pool.len(),
-        }
-    }
-
-    fn of_store<T: Clone>(store: &FixedStore<'_, T>) -> Self {
-        Size {
-            len: store.len(),
-            capacity: store.capacity(),
-        }
-    }
-
-    fn bytes<T>(&self) -> usize {
-        self.capacity * size_of::<T>()
-    }
-
-    fn empty(capacity: usize) -> Self {
-        Size { len: 0, capacity }
-    }
-}
-
-impl Toc {
-    /// Get the total size in bytes of the file described.
-    pub fn size(&self) -> usize {
-        size_of::<Self>()
-            + self.header.bytes::<u8>()
-            + self.segs.bytes::<flatgfa::Segment>()
-            + self.paths.bytes::<flatgfa::Path>()
-            + self.links.bytes::<flatgfa::Link>()
-            + self.steps.bytes::<flatgfa::Handle>()
-            + self.seq_data.bytes::<u8>()
-            + self.overlaps.bytes::<Span<flatgfa::AlignOp>>()
-            + self.alignment.bytes::<flatgfa::AlignOp>()
-            + self.name_data.bytes::<u8>()
-            + self.optional_data.bytes::<u8>()
-            + self.line_order.bytes::<u8>()
-    }
-
-    /// Get a table of contents that fits a FlatGFA with no spare space.
-    fn full(gfa: &flatgfa::FlatGFA) -> Self {
-        Self {
-            magic: MAGIC_NUMBER,
-            header: Size::of_pool(gfa.header),
-            segs: Size::of_pool(gfa.segs),
-            paths: Size::of_pool(gfa.paths),
-            links: Size::of_pool(gfa.links),
-            steps: Size::of_pool(gfa.steps),
-            seq_data: Size::of_pool(gfa.seq_data),
-            overlaps: Size::of_pool(gfa.overlaps),
-            alignment: Size::of_pool(gfa.alignment),
-            name_data: Size::of_pool(gfa.name_data),
-            optional_data: Size::of_pool(gfa.optional_data),
-            line_order: Size::of_pool(gfa.line_order),
-        }
-    }
-
-    pub fn for_fixed_store(store: &flatgfa::FixedGFAStore) -> Self {
-        Self {
-            magic: MAGIC_NUMBER,
-            header: Size::of_store(&store.header),
-            segs: Size::of_store(&store.segs),
-            paths: Size::of_store(&store.paths),
-            links: Size::of_store(&store.links),
-            steps: Size::of_store(&store.steps),
-            seq_data: Size::of_store(&store.seq_data),
-            overlaps: Size::of_store(&store.overlaps),
-            alignment: Size::of_store(&store.alignment),
-            name_data: Size::of_store(&store.name_data),
-            optional_data: Size::of_store(&store.optional_data),
-            line_order: Size::of_store(&store.line_order),
-        }
-    }
-
-    /// Guess a reasonable set of capacities for a fresh file.
-    pub fn guess(factor: usize) -> Self {
-        Self {
-            magic: MAGIC_NUMBER,
-            header: Size::empty(128),
-            segs: Size::empty(32 * factor * factor),
-            paths: Size::empty(factor),
-            links: Size::empty(32 * factor * factor),
-            steps: Size::empty(1024 * factor * factor),
-            seq_data: Size::empty(512 * factor * factor),
-            overlaps: Size::empty(256 * factor),
-            alignment: Size::empty(64 * factor * factor),
-            name_data: Size::empty(64 * factor),
-            optional_data: Size::empty(512 * factor * factor),
-            line_order: Size::empty(64 * factor * factor),
-        }
-    }
-
-    /// Estimate a reasonable set of capacities for a fresh file based on some
-    /// measurements of the GFA text.
-    pub fn estimate(
-        segs: usize,
-        links: usize,
-        paths: usize,
-        header_bytes: usize,
-        seg_bytes: usize,
-        path_bytes: usize,
-    ) -> Self {
-        Self {
-            magic: MAGIC_NUMBER,
-            header: Size::empty(header_bytes),
-            segs: Size::empty(segs),
-            paths: Size::empty(paths),
-            links: Size::empty(links),
-            steps: Size::empty(path_bytes / 3),
-            seq_data: Size::empty(seg_bytes),
-            overlaps: Size::empty((links + paths) * 2),
-            alignment: Size::empty(links * 2 + paths * 4),
-            name_data: Size::empty(paths * 512),
-            optional_data: Size::empty(links * 16),
-            line_order: Size::empty(segs + links + paths + 8),
-        }
-    }
-}
-
-/// Consume `size.len` items from a byte slice, skip the remainder of `size.capacity`
-/// elements, and return the items and the rest of the slice.
-fn slice_prefix<T: FromBytes>(data: &[u8], size: Size) -> (&[T], &[u8]) {
-    let (prefix, rest) = T::slice_from_prefix(data, size.len).unwrap();
-    let pad = size_of::<T>() * (size.capacity - size.len);
-    (prefix, &rest[pad..])
-}
-
-/// Read the table of contents from a prefix of the byte buffer.
-fn read_toc(data: &[u8]) -> (&Toc, &[u8]) {
-    let toc = Toc::ref_from_prefix(data).unwrap();
-    let rest = &data[size_of::<Toc>()..];
-    let magic = toc.magic;
-    assert_eq!(magic, MAGIC_NUMBER);
-    (toc, rest)
-}
-
-fn read_toc_mut(data: &mut [u8]) -> (&mut Toc, &mut [u8]) {
-    let (toc_slice, rest) = Toc::mut_slice_from_prefix(data, 1).unwrap();
-    let toc = &mut toc_slice[0];
-    let magic = toc.magic;
-    assert_eq!(magic, MAGIC_NUMBER);
-    (toc, rest)
-}
-
-/// Get a FlatGFA backed by the data in a byte buffer.
-pub fn view(data: &[u8]) -> flatgfa::FlatGFA {
-    let (toc, rest) = read_toc(data);
-
-    let (header, rest) = slice_prefix(rest, toc.header);
-    let (segs, rest) = slice_prefix(rest, toc.segs);
-    let (paths, rest) = slice_prefix(rest, toc.paths);
-    let (links, rest) = slice_prefix(rest, toc.links);
-    let (steps, rest) = slice_prefix(rest, toc.steps);
-    let (seq_data, rest) = slice_prefix(rest, toc.seq_data);
-    let (overlaps, rest) = slice_prefix(rest, toc.overlaps);
-    let (alignment, rest) = slice_prefix(rest, toc.alignment);
-    let (name_data, rest) = slice_prefix(rest, toc.name_data);
-    let (optional_data, rest) = slice_prefix(rest, toc.optional_data);
-    let (line_order, _) = slice_prefix(rest, toc.line_order);
-
-    flatgfa::FlatGFA {
-        header: header.into(),
-        segs: segs.into(),
-        paths: paths.into(),
-        links: links.into(),
-        steps: steps.into(),
-        seq_data: seq_data.into(),
-        overlaps: overlaps.into(),
-        alignment: alignment.into(),
-        name_data: name_data.into(),
-        optional_data: optional_data.into(),
-        line_order: line_order.into(),
-    }
-}
-
-/// Like `slice_prefix`, but produce a `SliceVec`.
-fn slice_vec_prefix<T: FromBytes + AsBytes>(
-    data: &mut [u8],
-    size: Size,
-) -> (SliceVec<T>, &mut [u8]) {
-    let (prefix, rest) = T::mut_slice_from_prefix(data, size.capacity).unwrap();
-    let vec = SliceVec::from_slice_len(prefix, size.len);
-    (vec, rest)
-}
-
-/// Get a FlatGFA `SliceStore` from the suffix of a file just following the table of contents.
-fn slice_store<'a>(data: &'a mut [u8], toc: &Toc) -> flatgfa::FixedGFAStore<'a> {
-    let (header, rest) = slice_vec_prefix(data, toc.header);
-    let (segs, rest) = slice_vec_prefix(rest, toc.segs);
-    let (paths, rest) = slice_vec_prefix(rest, toc.paths);
-    let (links, rest) = slice_vec_prefix(rest, toc.links);
-    let (steps, rest) = slice_vec_prefix(rest, toc.steps);
-    let (seq_data, rest) = slice_vec_prefix(rest, toc.seq_data);
-    let (overlaps, rest) = slice_vec_prefix(rest, toc.overlaps);
-    let (alignment, rest) = slice_vec_prefix(rest, toc.alignment);
-    let (name_data, rest) = slice_vec_prefix(rest, toc.name_data);
-    let (optional_data, rest) = slice_vec_prefix(rest, toc.optional_data);
-    let (line_order, _) = slice_vec_prefix(rest, toc.line_order);
-
-    flatgfa::FixedGFAStore {
-        header: header.into(),
-        segs: segs.into(),
-        paths: paths.into(),
-        links: links.into(),
-        steps: steps.into(),
-        seq_data: seq_data.into(),
-        overlaps: overlaps.into(),
-        alignment: alignment.into(),
-        name_data: name_data.into(),
-        optional_data: optional_data.into(),
-        line_order: line_order.into(),
-    }
-}
-
-/// Get a mutable FlatGFA `SliceStore` backed by a byte buffer.
-pub fn view_store(data: &mut [u8]) -> flatgfa::FixedGFAStore {
-    let (toc, rest) = read_toc_mut(data);
-    slice_store(rest, toc)
-}
-
-/// Initialize a buffer with an empty FlatGFA store.
-pub fn init(data: &mut [u8], toc: Toc) -> (&mut Toc, flatgfa::FixedGFAStore) {
-    // Write the table of contents.
-    assert!(data.len() == toc.size());
-    toc.write_to_prefix(data).unwrap();
-
-    // Get a mutable reference to the embedded TOC.
-    let (toc_bytes, rest) = data.split_at_mut(size_of::<Toc>());
-    let toc_mut = Toc::mut_from(toc_bytes).unwrap();
-
-    // Extract a store from the remaining bytes.
-    (toc_mut, slice_store(rest, &toc))
-}
-
-fn write_bump<'a, T: AsBytes + ?Sized>(buf: &'a mut [u8], data: &T) -> Option<&'a mut [u8]> {
-    let len = size_of_val(data);
-    data.write_to_prefix(buf)?;
-    Some(&mut buf[len..])
-}
-
-fn write_bytes<'a>(buf: &'a mut [u8], data: &[u8]) -> Option<&'a mut [u8]> {
-    let len = data.len();
-    buf[0..len].copy_from_slice(data);
-    Some(&mut buf[len..])
-}
-
-/// Copy a FlatGFA into a byte buffer.
-pub fn dump(gfa: &flatgfa::FlatGFA, buf: &mut [u8]) {
-    // Table of contents.
-    let toc = Toc::full(gfa);
-    let rest = write_bump(buf, &toc).unwrap();
-
-    // All the slices.
-    let rest = write_bytes(rest, gfa.header.all()).unwrap();
-    let rest = write_bump(rest, gfa.segs.all()).unwrap();
-    let rest = write_bump(rest, gfa.paths.all()).unwrap();
-    let rest = write_bump(rest, gfa.links.all()).unwrap();
-    let rest = write_bump(rest, gfa.steps.all()).unwrap();
-    let rest = write_bytes(rest, gfa.seq_data.all()).unwrap();
-    let rest = write_bump(rest, gfa.overlaps.all()).unwrap();
-    let rest = write_bump(rest, gfa.alignment.all()).unwrap();
-    let rest = write_bytes(rest, gfa.name_data.all()).unwrap();
-    let rest = write_bytes(rest, gfa.optional_data.all()).unwrap();
-    write_bytes(rest, gfa.line_order.all()).unwrap();
-}
-
-/// Get the total size in bytes of a FlatGFA structure. This should result in a big
-/// enough buffer to write the entire FlatGFA into with `dump`.
-pub fn size(gfa: &flatgfa::FlatGFA) -> usize {
-    Toc::full(gfa).size()
-}
-
-pub fn map_file(name: &str) -> Mmap {
-    let file = std::fs::File::open(name).unwrap();
-    unsafe { Mmap::map(&file) }.unwrap()
-}
-
-pub fn map_new_file(name: &str, size: u64) -> MmapMut {
-    let file = std::fs::OpenOptions::new()
-        .read(true)
-        .write(true)
-        .create(true)
-        .open(name)
-        .unwrap();
-    file.set_len(size).unwrap();
-    unsafe { MmapMut::map_mut(&file) }.unwrap()
-}
-
-pub fn map_file_mut(name: &str) -> MmapMut {
-    let file = std::fs::OpenOptions::new()
-        .read(true)
-        .write(true)
-        .open(name)
-        .unwrap();
-    unsafe { MmapMut::map_mut(&file) }.unwrap()
-}
diff --git a/flatgfa/src/flatgfa.rs b/flatgfa/src/flatgfa.rs
deleted file mode 100644
index a7f0e5dd..00000000
--- a/flatgfa/src/flatgfa.rs
+++ /dev/null
@@ -1,430 +0,0 @@
-use std::str::FromStr;
-
-use crate::pool::{self, Id, Pool, Span, Store};
-use bstr::BStr;
-use num_enum::{IntoPrimitive, TryFromPrimitive};
-use zerocopy::{AsBytes, FromBytes, FromZeroes};
-
-/// An efficient flattened representation of a GFA file.
-///
-/// This struct *borrows* the underlying data from some other data store. Namely, the
-/// `GFAStore` structs contain `Vec`s or `Vec`-like arenas as backing stores for each
-/// of the slices in this struct. `FlatGFA` itself provides access to the GFA data
-/// structure that is agnostic to the location of the underlying bytes. However, all
-/// its components have a fixed size; unlike the underlying `GFAStore`, it is not
-/// possible to add new objects.
-pub struct FlatGFA<'a> {
-    /// A GFA may optionally have a single header line, with a version number.
-    /// If this is empty, there is no header line.
-    pub header: Pool<'a, u8>,
-
-    /// The segment (S) lines in the GFA file.
-    pub segs: Pool<'a, Segment>,
-
-    /// The path (P) lines.
-    pub paths: Pool<'a, Path>,
-
-    /// The link (L) lines.
-    pub links: Pool<'a, Link>,
-
-    /// Paths consist of steps. This is a flat pool of steps, chunks of which are
-    /// associated with each path.
-    pub steps: Pool<'a, Handle>,
-
-    /// The actual base-pair sequences for the segments. This is a pool of
-    /// base-pair symbols, chunks of which are associated with each segment.
-    ///
-    /// TODO: This could certainly use a smaller representation than `u8`
-    /// (since we care only about 4 base pairs). If we want to pay the cost
-    /// of bit-packing.
-    pub seq_data: Pool<'a, u8>,
-
-    /// Both paths and links can have overlaps, which are CIGAR sequences. They
-    /// are all stored together here in a flat pool, elements of which point
-    /// to chunks of `alignment`.
-    pub overlaps: Pool<'a, Span<AlignOp>>,
-
-    /// The CIGAR aligment operations that make up the overlaps. `overlaps`
-    /// contains range of indices in this pool.
-    pub alignment: Pool<'a, AlignOp>,
-
-    /// The string names: currenly, just of paths. (We assume segments have integer
-    /// names, so they don't need to be stored separately.)
-    pub name_data: Pool<'a, u8>,
-
-    /// Segments can come with optional extra fields, which we store in a flat pool
-    /// as raw characters because we don't currently care about them.
-    pub optional_data: Pool<'a, u8>,
-
-    /// An "interleaving" order of GFA lines. This is to preserve perfect round-trip
-    /// fidelity: we record the order of lines as we saw them when parsing a GFA file
-    /// so we can emit them again in that order. Elements should be `LineKind` values
-    /// (but they are checked before we use them).
-    pub line_order: Pool<'a, u8>,
-}
-
-/// GFA graphs consist of "segment" nodes, which are fragments of base-pair sequences
-/// that can be strung together into paths.
-#[derive(Debug, FromZeroes, FromBytes, AsBytes, Clone, Copy)]
-#[repr(packed)]
-pub struct Segment {
-    /// The segment's name. We assume all names are just plain numbers.
-    pub name: usize,
-
-    /// The base-pair sequence for the segment. This is a range in the `seq_data` pool.
-    pub seq: Span<u8>,
-
-    /// Segments can have optional fields. This is a range in the `optional_data` pool.
-    pub optional: Span<u8>,
-}
-
-impl Segment {
-    #[allow(clippy::len_without_is_empty)]
-    pub fn len(&self) -> usize {
-        self.seq.len()
-    }
-}
-
-/// A path is a sequence of oriented references to segments.
-#[derive(Debug, FromZeroes, FromBytes, AsBytes, Clone, Copy)]
-#[repr(packed)]
-pub struct Path {
-    /// The path's name. This can be an arbitrary string. It is a range in the
-    /// `name_data` pool.
-    pub name: Span<u8>,
-
-    /// The sequence of path steps. This is a range in the `steps` pool.
-    pub steps: Span<Handle>,
-
-    /// The CIGAR overlaps for each step on the path. This is a range in the
-    /// `overlaps` pool.
-    pub overlaps: Span<Span<AlignOp>>,
-}
-
-impl Path {
-    pub fn step_count(&self) -> usize {
-        self.steps.end.index() - self.steps.start.index()
-    }
-}
-
-/// An allowed edge between two oriented segments.
-#[derive(Debug, FromBytes, FromZeroes, AsBytes, Clone, Copy)]
-#[repr(packed)]
-pub struct Link {
-    /// The source of the edge.
-    pub from: Handle,
-
-    // The destination of the edge.
-    pub to: Handle,
-
-    /// The CIGAR overlap between the segments. This is a range in the
-    /// `alignment` pool.
-    pub overlap: Span<AlignOp>,
-}
-
-impl Link {
-    /// Is either end of the link the given segment? If so, return the other end.
-    pub fn incident_seg(&self, seg_id: Id<Segment>) -> Option<Id<Segment>> {
-        if self.from.segment() == seg_id {
-            Some(self.to.segment())
-        } else if self.to.segment() == seg_id {
-            Some(self.from.segment())
-        } else {
-            None
-        }
-    }
-}
-
-/// A forward or backward direction.
-#[derive(Debug, PartialEq, IntoPrimitive, TryFromPrimitive)]
-#[repr(u8)]
-pub enum Orientation {
-    Forward,  // +
-    Backward, // -
-}
-
-impl FromStr for Orientation {
-    type Err = ();
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        if s == "+" {
-            Ok(Orientation::Forward)
-        } else if s == "-" {
-            Ok(Orientation::Backward)
-        } else {
-            Err(())
-        }
-    }
-}
-
-/// An oriented reference to a segment.
-///
-/// A Handle refers to the forward (+) or backward (-) orientation for a given segment.
-/// So, logically, it consists of a pair of a segment reference (usize) and an
-/// orientation (1 bit). We pack the two values into a single word.
-#[derive(Debug, FromBytes, FromZeroes, AsBytes, Clone, Copy, PartialEq, Eq, Hash)]
-#[repr(packed)]
-pub struct Handle(u32);
-
-impl Handle {
-    /// Create a new handle referring to a segment ID and an orientation.
-    pub fn new(segment: Id<Segment>, orient: Orientation) -> Self {
-        let seg_num: u32 = segment.into();
-        assert!(seg_num & (1 << (u32::BITS - 1)) == 0, "index too large");
-        let orient_bit: u8 = orient.into();
-        assert!(orient_bit & !1 == 0, "invalid orientation");
-        Self(seg_num << 1 | (orient_bit as u32))
-    }
-
-    /// Get the segment ID. This is an index in the `segs` pool.
-    pub fn segment(&self) -> Id<Segment> {
-        (self.0 >> 1).into()
-    }
-
-    /// Get the orientation (+ or -) for the handle.
-    pub fn orient(&self) -> Orientation {
-        ((self.0 & 1) as u8).try_into().unwrap()
-    }
-}
-
-/// The kind of each operation in a CIGAR alignment.
-#[derive(Debug, IntoPrimitive, TryFromPrimitive, Clone, Copy)]
-#[repr(u8)]
-pub enum AlignOpcode {
-    Match,     // M
-    Gap,       // N
-    Insertion, // D
-    Deletion,  // I
-}
-
-/// A single operation in a CIGAR alignment, like "3M" or "1D".
-///
-/// Logically, this is a pair of a number and an `AlignOpcode`. We pack the two
-/// into a single u32.
-#[derive(Debug, FromZeroes, FromBytes, AsBytes, Clone, Copy)]
-#[repr(packed)]
-pub struct AlignOp(u32);
-
-impl AlignOp {
-    /// Create a new alignment operation from an opcode and count.
-    pub fn new(op: AlignOpcode, len: u32) -> Self {
-        let op_byte: u8 = op.into();
-        assert!(len & !0xff == 0, "length too large");
-        Self((len << 8) | (op_byte as u32))
-    }
-
-    /// Get the operation (M, I, etc.) for this operation.
-    pub fn op(&self) -> AlignOpcode {
-        ((self.0 & 0xff) as u8).try_into().unwrap()
-    }
-
-    /// Get the length of the operation.
-    pub fn len(&self) -> u32 {
-        self.0 >> 8
-    }
-
-    /// Check whether there are zero operations in this alignment.
-    pub fn is_empty(&self) -> bool {
-        self.len() == 0
-    }
-}
-
-/// An entire CIGAR alignment string, like "3M1D2M".
-#[derive(Debug)]
-#[repr(transparent)]
-pub struct Alignment<'a> {
-    /// The sequence of operations that make up the alignment.
-    pub ops: &'a [AlignOp],
-}
-
-/// A kind of GFA line. We use this in `line_order` to preserve the textual order
-/// in a GFA file for round-tripping.
-#[derive(Debug, IntoPrimitive, TryFromPrimitive)]
-#[repr(u8)]
-pub enum LineKind {
-    Header,
-    Segment,
-    Path,
-    Link,
-}
-
-impl<'a> FlatGFA<'a> {
-    /// Get the base-pair sequence for a segment.
-    pub fn get_seq(&self, seg: &Segment) -> &BStr {
-        self.seq_data[seg.seq].as_ref()
-    }
-
-    /// Look up a segment by its name.
-    pub fn find_seg(&self, name: usize) -> Option<Id<Segment>> {
-        // TODO Make this more efficient by maintaining the name index? This would not be
-        // too hard; we already have the machinery in `parse.rs`...
-        self.segs.search(|seg| seg.name == name)
-    }
-
-    /// Look up a path by its name.
-    pub fn find_path(&self, name: &BStr) -> Option<Id<Path>> {
-        self.paths.search(|path| self.get_path_name(path) == name)
-    }
-
-    /// Get the string name of a path.
-    pub fn get_path_name(&self, path: &Path) -> &BStr {
-        self.name_data[path.name].as_ref()
-    }
-
-    pub fn get_path_steps(&self, path: &Path) -> impl Iterator<Item = &Handle> {
-        self.steps[path.steps].iter()
-    }
-
-    /// Get a handle's associated segment.
-    pub fn get_handle_seg(&self, handle: Handle) -> &Segment {
-        &self.segs[handle.segment()]
-    }
-
-    /// Get the optional data for a segment, as a tab-separated string.
-    pub fn get_optional_data(&self, seg: &Segment) -> &BStr {
-        self.optional_data[seg.optional].as_ref()
-    }
-
-    /// Look up a CIGAR alignment.
-    pub fn get_alignment(&self, overlap: Span<AlignOp>) -> Alignment {
-        Alignment { 
-            ops: &self.alignment[overlap]
-        }
-    }
-
-    /// Get the recorded order of line kinds.
-    pub fn get_line_order(&self) -> impl Iterator<Item = LineKind> + 'a {
-        self.line_order
-            .all()
-            .iter()
-            .map(|b| (*b).try_into().unwrap())
-    }
-}
-
-/// The data storage pools for a `FlatGFA`.
-#[derive(Default)]
-pub struct GFAStore<'a, P: StoreFamily<'a>> {
-    pub header: P::Store<u8>,
-    pub segs: P::Store<Segment>,
-    pub paths: P::Store<Path>,
-    pub links: P::Store<Link>,
-    pub steps: P::Store<Handle>,
-    pub seq_data: P::Store<u8>,
-    pub overlaps: P::Store<Span<AlignOp>>,
-    pub alignment: P::Store<AlignOp>,
-    pub name_data: P::Store<u8>,
-    pub optional_data: P::Store<u8>,
-    pub line_order: P::Store<u8>,
-}
-
-impl<'a, P: StoreFamily<'a>> GFAStore<'a, P> {
-    /// Add a header line for the GFA file. This may only be added once.
-    pub fn add_header(&mut self, version: &[u8]) {
-        assert!(self.header.as_ref().is_empty());
-        self.header.add_slice(version);
-    }
-
-    /// Add a new segment to the GFA file.
-    pub fn add_seg(&mut self, name: usize, seq: &[u8], optional: &[u8]) -> Id<Segment> {
-        self.segs.add(Segment {
-            name,
-            seq: self.seq_data.add_slice(seq),
-            optional: self.optional_data.add_slice(optional),
-        })
-    }
-
-    /// Add a new path.
-    pub fn add_path(
-        &mut self,
-        name: &[u8],
-        steps: Span<Handle>,
-        overlaps: impl Iterator<Item = Vec<AlignOp>>,
-    ) -> Id<Path> {
-        let overlaps = self.overlaps.add_iter(
-            overlaps
-                .into_iter()
-                .map(|align| self.alignment.add_iter(align)),
-        );
-        let name = self.name_data.add_slice(name);
-        self.paths.add(Path {
-            name,
-            steps,
-            overlaps,
-        })
-    }
-
-    /// Add a sequence of steps.
-    pub fn add_steps(&mut self, steps: impl Iterator<Item = Handle>) -> Span<Handle> {
-        self.steps.add_iter(steps)
-    }
-
-    /// Add a single step.
-    pub fn add_step(&mut self, step: Handle) -> Id<Handle> {
-        self.steps.add(step)
-    }
-
-    /// Add a sequence of links.
-    pub fn add_links(&mut self, links: impl Iterator<Item = Link>) -> Span<Link> {
-        self.links.add_iter(links)
-    }
-
-    /// Add a link between two (oriented) segments.
-    pub fn add_link(&mut self, from: Handle, to: Handle, overlap: Vec<AlignOp>) -> Id<Link> {
-        self.links.add(Link {
-            from,
-            to,
-            overlap: self.alignment.add_iter(overlap),
-        })
-    }
-
-    /// Record a line type to preserve the line order.
-    pub fn record_line(&mut self, kind: LineKind) {
-        self.line_order.add(kind.into());
-    }
-
-    /// Borrow a FlatGFA view of this data store.
-    pub fn as_ref(&self) -> FlatGFA {
-        FlatGFA {
-            header: self.header.as_ref(),
-            segs: self.segs.as_ref(),
-            paths: self.paths.as_ref(),
-            links: self.links.as_ref(),
-            name_data: self.name_data.as_ref(),
-            seq_data: self.seq_data.as_ref(),
-            steps: self.steps.as_ref(),
-            overlaps: self.overlaps.as_ref(),
-            alignment: self.alignment.as_ref(),
-            optional_data: self.optional_data.as_ref(),
-            line_order: self.line_order.as_ref(),
-        }
-    }
-}
-
-pub trait StoreFamily<'a> {
-    type Store<T: Clone + 'a>: pool::Store<T>;
-}
-
-#[derive(Default)]
-pub struct HeapFamily;
-impl<'a> StoreFamily<'a> for HeapFamily {
-    type Store<T: Clone + 'a> = pool::HeapStore<T>;
-}
-
-pub struct FixedFamily;
-impl<'a> StoreFamily<'a> for FixedFamily {
-    type Store<T: Clone + 'a> = pool::FixedStore<'a, T>;
-}
-
-/// A store for `FlatGFA` data backed by fixed-size slices.
-///
-/// This store contains `SliceVec`s, which act like `Vec`s but are allocated within
-/// a fixed region. This means they have a maximum size, but they can directly map
-/// onto the contents of a file.
-pub type FixedGFAStore<'a> = GFAStore<'a, FixedFamily>;
-
-/// A mutable, in-memory data store for `FlatGFA`.
-///
-/// This store contains a bunch of `Vec`s: one per array required to implement a
-/// `FlatGFA`. It exposes an API for building up a GFA data structure, so it is
-/// useful for creating new ones from scratch.
-pub type HeapGFAStore = GFAStore<'static, HeapFamily>;
diff --git a/flatgfa/src/gfaline.rs b/flatgfa/src/gfaline.rs
deleted file mode 100644
index 36408d42..00000000
--- a/flatgfa/src/gfaline.rs
+++ /dev/null
@@ -1,272 +0,0 @@
-use crate::flatgfa::{AlignOp, Orientation};
-use atoi::FromRadix10;
-
-type ParseResult<T> = Result<T, &'static str>;
-type LineResult<'a> = ParseResult<Line<'a>>;
-type PartialParseResult<'a, T> = ParseResult<(T, &'a [u8])>;
-
-/// A parsed GFA file line.
-pub enum Line<'a> {
-    Header(&'a [u8]),
-    Segment(Segment<'a>),
-    Link(Link),
-    Path(Path<'a>),
-}
-
-pub struct Segment<'a> {
-    pub name: usize,
-    pub seq: &'a [u8],
-    pub data: &'a [u8],
-}
-
-pub struct Link {
-    pub from_seg: usize,
-    pub from_orient: Orientation,
-    pub to_seg: usize,
-    pub to_orient: Orientation,
-    pub overlap: Vec<AlignOp>,
-}
-
-pub struct Path<'a> {
-    pub name: &'a [u8],
-    pub steps: &'a [u8],
-    pub overlaps: Vec<Vec<AlignOp>>,
-}
-
-/// Parse a single line of a GFA file.
-pub fn parse_line(line: &[u8]) -> LineResult {
-    if line.len() < 2 || line[1] != b'\t' {
-        return Err("expected marker and tab");
-    }
-    let rest = &line[2..];
-    match line[0] {
-        b'H' => parse_header(rest),
-        b'S' => parse_seg(rest),
-        b'L' => parse_link(rest),
-        b'P' => parse_path(rest),
-        _ => Err("unhandled line kind"),
-    }
-}
-
-/// Parse a header line, which looks like `H <data>`.
-fn parse_header(line: &[u8]) -> LineResult {
-    Ok(Line::Header(line))
-}
-
-/// Parse a segment line, which looks like `S <name> <seq> <data>`.
-fn parse_seg(line: &[u8]) -> LineResult {
-    let (name, rest) = parse_num(line)?;
-    let rest = parse_byte(rest, b'\t')?;
-    let (seq, data) = parse_field(rest)?;
-    Ok(Line::Segment(Segment { name, seq, data }))
-}
-
-/// Parse a link line, which looks like `L <from> <+-> <to> <+-> <CIGAR>`.
-fn parse_link(line: &[u8]) -> LineResult {
-    let (from_seg, rest) = parse_num(line)?;
-    let rest = parse_byte(rest, b'\t')?;
-    let (from_orient, rest) = parse_orient(rest)?;
-    let rest = parse_byte(rest, b'\t')?;
-    let (to_seg, rest) = parse_num(rest)?;
-    let rest = parse_byte(rest, b'\t')?;
-    let (to_orient, rest) = parse_orient(rest)?;
-    let rest = parse_byte(rest, b'\t')?;
-    let (overlap, rest) = parse_align(rest)?;
-    if !rest.is_empty() {
-        return Err("expected end of line");
-    }
-    Ok(Line::Link(Link {
-        from_seg,
-        from_orient,
-        to_seg,
-        to_orient,
-        overlap,
-    }))
-}
-
-/// Parse a path line, which looks like `P <name> <steps> <*|CIGARs>`.
-fn parse_path(line: &[u8]) -> LineResult {
-    let (name, rest) = parse_field(line)?;
-    let (steps, rest) = parse_field(rest)?;
-    let (overlaps, rest) = parse_maybe_overlap_list(rest)?;
-    if !rest.is_empty() {
-        return Err("expected end of line");
-    }
-    Ok(Line::Path(Path {
-        name,
-        steps,
-        overlaps,
-    }))
-}
-
-/// Parse a *possible* overlap list, which may be `*` (empty).
-pub fn parse_maybe_overlap_list(s: &[u8]) -> PartialParseResult<Vec<Vec<AlignOp>>> {
-    if s == b"*" {
-        Ok((vec![], &s[1..]))
-    } else {
-        parse_overlap_list(s)
-    }
-}
-
-/// Parse a comma-separated list of CIGAR strings.
-///
-/// TODO: This could be optimized to avoid accumulating into a vector.
-fn parse_overlap_list(s: &[u8]) -> PartialParseResult<Vec<Vec<AlignOp>>> {
-    let mut rest = s;
-    let mut overlaps = vec![];
-    while !rest.is_empty() {
-        let overlap;
-        (overlap, rest) = parse_align(rest)?;
-        overlaps.push(overlap);
-        if !rest.is_empty() {
-            rest = parse_byte(rest, b',')?;
-        }
-    }
-    Ok((overlaps, rest))
-}
-
-/// Consume a chunk of a string up to a given marker byte.
-fn parse_until(line: &[u8], marker: u8) -> PartialParseResult<&[u8]> {
-    let end = memchr::memchr(marker, line).unwrap_or(line.len());
-    let rest = if end == line.len() {
-        &[]
-    } else {
-        &line[end + 1..]
-    };
-    Ok((&line[..end], rest))
-}
-
-/// Consume a string from the line, until a tab (or the end of the line).
-pub fn parse_field(line: &[u8]) -> PartialParseResult<&[u8]> {
-    parse_until(line, b'\t')
-}
-
-/// Consume a specific byte.
-fn parse_byte(s: &[u8], byte: u8) -> ParseResult<&[u8]> {
-    if s.is_empty() || s[0] != byte {
-        return Err("expected byte");
-    }
-    Ok(&s[1..])
-}
-
-/// Parse a single integer.
-fn parse_num<T: FromRadix10>(s: &[u8]) -> PartialParseResult<T> {
-    match T::from_radix_10(s) {
-        (_, 0) => Err("expected number"),
-        (num, used) => Ok((num, &s[used..])),
-    }
-}
-
-/// Parse a segment orientation (+ or -).
-fn parse_orient(line: &[u8]) -> PartialParseResult<Orientation> {
-    if line.is_empty() {
-        return Err("expected orientation");
-    }
-    let orient = match line[0] {
-        b'+' => Orientation::Forward,
-        b'-' => Orientation::Backward,
-        _ => return Err("expected orient"),
-    };
-    Ok((orient, &line[1..]))
-}
-
-/// Parse a single CIGAR alignment operation (like `4D`).
-fn parse_align_op(s: &[u8]) -> PartialParseResult<AlignOp> {
-    let (len, rest) = parse_num::<u32>(s)?;
-    let op = match rest[0] {
-        b'M' => crate::flatgfa::AlignOpcode::Match,
-        b'N' => crate::flatgfa::AlignOpcode::Gap,
-        b'D' => crate::flatgfa::AlignOpcode::Deletion,
-        b'I' => crate::flatgfa::AlignOpcode::Insertion,
-        _ => return Err("expected align op"),
-    };
-    Ok((AlignOp::new(op, len), &rest[1..]))
-}
-
-/// Parse a complete CIGAR alignment string (like `3M2I`).
-///
-/// TODO This could be optimized to avoid collecting into a vector.
-fn parse_align(s: &[u8]) -> PartialParseResult<Vec<AlignOp>> {
-    let mut rest = s;
-    let mut align = vec![];
-    while !rest.is_empty() && rest[0].is_ascii_digit() {
-        let op;
-        (op, rest) = parse_align_op(rest)?;
-        align.push(op);
-    }
-    Ok((align, rest))
-}
-
-/// Parse GFA paths' segment lists. These look like `1+,2-,3+`.
-pub struct StepsParser<'a> {
-    str: &'a [u8],
-    index: usize,
-    state: StepsParseState,
-    seg: usize,
-}
-
-/// The parser state: we're either looking for a segment name (or a +/- terminator),
-/// or we're expecting a comma (or end of string).
-enum StepsParseState {
-    Seg,
-    Comma,
-}
-
-impl<'a> StepsParser<'a> {
-    pub fn new(str: &'a [u8]) -> Self {
-        StepsParser {
-            str,
-            index: 0,
-            state: StepsParseState::Seg,
-            seg: 0,
-        }
-    }
-
-    pub fn rest(&self) -> &[u8] {
-        &self.str[self.index..]
-    }
-}
-
-impl<'a> Iterator for StepsParser<'a> {
-    type Item = (usize, bool);
-    fn next(&mut self) -> Option<(usize, bool)> {
-        while self.index < self.str.len() {
-            // Consume one byte.
-            let byte = self.str[self.index];
-            self.index += 1;
-
-            match self.state {
-                StepsParseState::Seg => {
-                    if byte == b'+' || byte == b'-' {
-                        self.state = StepsParseState::Comma;
-                        return Some((self.seg, byte == b'+'));
-                    } else if byte.is_ascii_digit() {
-                        self.seg *= 10;
-                        self.seg += (byte - b'0') as usize;
-                    } else {
-                        return None;
-                    }
-                }
-                StepsParseState::Comma => {
-                    if byte == b',' {
-                        self.state = StepsParseState::Seg;
-                        self.seg = 0;
-                    } else {
-                        return None;
-                    }
-                }
-            }
-        }
-
-        None
-    }
-}
-
-#[test]
-fn test_parse_steps() {
-    let s = b"1+,23-,4+ suffix";
-    let mut parser = StepsParser::new(s);
-    let path: Vec<_> = (&mut parser).collect();
-    assert_eq!(path, vec![(1, true), (23, false), (4, true)]);
-    assert_eq!(parser.rest(), b"suffix");
-}
diff --git a/flatgfa/src/lib.rs b/flatgfa/src/lib.rs
deleted file mode 100644
index d6ec729e..00000000
--- a/flatgfa/src/lib.rs
+++ /dev/null
@@ -1,9 +0,0 @@
-pub mod cmds;
-pub mod file;
-pub mod flatgfa;
-pub mod gfaline;
-pub mod parse;
-pub mod pool;
-pub mod print;
-
-pub use flatgfa::*;
diff --git a/flatgfa/src/main.rs b/flatgfa/src/main.rs
index 376404c4..53968e99 100644
--- a/flatgfa/src/main.rs
+++ b/flatgfa/src/main.rs
@@ -1,8 +1,10 @@
 use argh::FromArgs;
-use flatgfa::flatgfa::FlatGFA;
-use flatgfa::parse::Parser;
-use flatgfa::pool::Store;
-use flatgfa::{cmds, file, parse}; // TODO: hopefully remove at some point, this breaks a lot of principles
+use fgfa_ds::flatgfa::FlatGFA;
+use fgfa_ds::parse::Parser;
+use fgfa_ds::pool::Store;
+use fgfa_ds::{file, parse}; // TODO: hopefully remove at some point, this breaks a lot of principles
+
+mod cmds;
 
 #[derive(FromArgs)]
 /// Convert between GFA text and FlatGFA binary formats.
@@ -112,7 +114,7 @@ fn main() -> Result<(), &'static str> {
             // defining here which values from out input `gfa` are needed by our final `flat` gfa.
             // Here we are reference values in two different Stores to create this Flatgfa, and 
             // have not yet found a good rust-safe way to do this
-            let flat = flatgfa::FlatGFA {
+            let flat = FlatGFA {
                 header: gfa.header,
                 seq_data: gfa.seq_data,
                 name_data: gfa.name_data,
diff --git a/flatgfa/src/parse.rs b/flatgfa/src/parse.rs
deleted file mode 100644
index 0685d3f5..00000000
--- a/flatgfa/src/parse.rs
+++ /dev/null
@@ -1,283 +0,0 @@
-use crate::flatgfa::{self, Handle, LineKind, Orientation};
-use crate::gfaline;
-use std::collections::HashMap;
-use std::io::BufRead;
-
-pub struct Parser<'a, P: flatgfa::StoreFamily<'a>> {
-    /// The flat representation we're building.
-    flat: flatgfa::GFAStore<'a, P>,
-
-    /// All segment IDs, indexed by their names, which we need to refer to segments in paths.
-    seg_ids: NameMap,
-}
-
-impl<'a, P: flatgfa::StoreFamily<'a>> Parser<'a, P> {
-    pub fn new(builder: flatgfa::GFAStore<'a, P>) -> Self {
-        Self {
-            flat: builder,
-            seg_ids: NameMap::default(),
-        }
-    }
-
-    /// Parse a GFA text file from an I/O stream.
-    pub fn parse_stream<R: BufRead>(mut self, stream: R) -> flatgfa::GFAStore<'a, P> {
-        // We can parse segments immediately, but we need to defer links and paths until we have all
-        // the segment names that they might refer to.
-        let mut deferred_links = Vec::new();
-        let mut deferred_paths = Vec::new();
-
-        // Parse or defer each line.
-        for line in stream.split(b'\n') {
-            let line = line.unwrap();
-
-            // Avoid parsing paths entirely for now; just preserve the entire line for later.
-            if line[0] == b'P' {
-                self.flat.record_line(LineKind::Path);
-                deferred_paths.push(line);
-                continue;
-            }
-
-            // Parse other kinds of lines.
-            let gfa_line = gfaline::parse_line(line.as_ref()).unwrap();
-            self.record_line(&gfa_line);
-
-            match gfa_line {
-                gfaline::Line::Header(data) => {
-                    self.flat.add_header(data);
-                }
-                gfaline::Line::Segment(seg) => {
-                    self.add_seg(seg);
-                }
-                gfaline::Line::Link(link) => {
-                    deferred_links.push(link);
-                }
-                gfaline::Line::Path(_) => {
-                    unreachable!("paths handled separately")
-                }
-            }
-        }
-
-        // "Unwind" the deferred links and paths.
-        for link in deferred_links {
-            self.add_link(link);
-        }
-        for line in deferred_paths {
-            self.add_path(&line);
-        }
-
-        self.flat
-    }
-
-    /// Parse a GFA text file from an in-memory buffer.
-    pub fn parse_mem(mut self, buf: &[u8]) -> flatgfa::GFAStore<'a, P> {
-        let mut deferred_lines = Vec::new();
-
-        for line in MemchrSplit::new(b'\n', buf) {
-            // When parsing from memory, it's easy to entirely defer parsing of any line: we just keep
-            // pointers to them. So we defer both paths and links.
-            if line[0] == b'P' || line[0] == b'L' {
-                self.flat.record_line(if line[0] == b'P' {
-                    LineKind::Path
-                } else {
-                    LineKind::Link
-                });
-                deferred_lines.push(line);
-                continue;
-            }
-
-            // Actually parse other lines.
-            let gfa_line = gfaline::parse_line(line).unwrap();
-            self.record_line(&gfa_line);
-            match gfa_line {
-                gfaline::Line::Header(data) => {
-                    self.flat.add_header(data);
-                }
-                gfaline::Line::Segment(seg) => {
-                    self.add_seg(seg);
-                }
-                gfaline::Line::Link(_) | gfaline::Line::Path(_) => {
-                    unreachable!("paths and links handled separately")
-                }
-            }
-        }
-
-        // "Unwind" the deferred lines.
-        for line in deferred_lines {
-            if line[0] == b'P' {
-                self.add_path(line);
-            } else {
-                let gfa_line = gfaline::parse_line(line).unwrap();
-                if let gfaline::Line::Link(link) = gfa_line {
-                    self.add_link(link);
-                } else {
-                    unreachable!("unexpected deferred line")
-                }
-            }
-        }
-
-        self.flat
-    }
-
-    /// Record a marker that captures the original GFA line ordering.
-    fn record_line(&mut self, line: &gfaline::Line) {
-        match line {
-            gfaline::Line::Header(_) => self.flat.record_line(LineKind::Header),
-            gfaline::Line::Segment(_) => self.flat.record_line(LineKind::Segment),
-            gfaline::Line::Link(_) => self.flat.record_line(LineKind::Link),
-            gfaline::Line::Path(_) => self.flat.record_line(LineKind::Path),
-        }
-    }
-
-    fn add_seg(&mut self, seg: gfaline::Segment) {
-        let seg_id = self.flat.add_seg(seg.name, seg.seq, seg.data);
-        self.seg_ids.insert(seg.name, seg_id.into());
-    }
-
-    fn add_link(&mut self, link: gfaline::Link) {
-        let from = Handle::new(self.seg_ids.get(link.from_seg).into(), link.from_orient);
-        let to = Handle::new(self.seg_ids.get(link.to_seg).into(), link.to_orient);
-        self.flat.add_link(from, to, link.overlap);
-    }
-
-    fn add_path(&mut self, line: &[u8]) {
-        // This must be a path line.
-        assert_eq!(&line[..2], b"P\t");
-        let line = &line[2..];
-
-        // Parse the name.
-        let (name, rest) = gfaline::parse_field(line).unwrap();
-
-        // Parse the steps.
-        let mut step_parser = gfaline::StepsParser::new(rest);
-        let steps = self.flat.add_steps((&mut step_parser).map(|(name, dir)| {
-            Handle::new(
-                self.seg_ids.get(name).into(),
-                if dir {
-                    Orientation::Forward
-                } else {
-                    Orientation::Backward
-                },
-            )
-        }));
-        let rest = step_parser.rest();
-
-        // Parse the overlaps.
-        let (overlaps, rest) = gfaline::parse_maybe_overlap_list(rest).unwrap();
-
-        assert!(rest.is_empty());
-        self.flat.add_path(name, steps, overlaps.into_iter());
-    }
-}
-
-impl Parser<'static, flatgfa::HeapFamily> {
-    pub fn for_heap() -> Self {
-        Self::new(flatgfa::HeapGFAStore::default())
-    }
-}
-
-impl<'a> Parser<'a, flatgfa::FixedFamily> {
-    pub fn for_slice(store: flatgfa::FixedGFAStore<'a>) -> Self {
-        Self::new(store)
-    }
-}
-
-#[derive(Default)]
-struct NameMap {
-    /// Names at most this are assigned *sequential* IDs, i.e., the ID is just the name
-    /// minus one.
-    sequential_max: usize,
-
-    /// Non-sequential names go here.
-    others: HashMap<usize, u32>,
-}
-
-impl NameMap {
-    fn insert(&mut self, name: usize, id: u32) {
-        // Is this the next sequential name? If so, no need to record it in our hash table;
-        // just bump the number of sequential names we've seen.
-        if (name - 1) == self.sequential_max && (name - 1) == (id as usize) {
-            self.sequential_max += 1;
-        } else {
-            self.others.insert(name, id);
-        }
-    }
-
-    fn get(&self, name: usize) -> u32 {
-        if name <= self.sequential_max {
-            (name - 1) as u32
-        } else {
-            self.others[&name]
-        }
-    }
-}
-
-/// Scan a GFA text file to count the number of each type of line and measure some sizes
-/// that are useful in estimating the final size of the FlatGFA file.
-pub fn estimate_toc(buf: &[u8]) -> crate::file::Toc {
-    let mut segs = 0;
-    let mut links = 0;
-    let mut paths = 0;
-    let mut header_bytes = 0;
-    let mut seg_bytes = 0;
-    let mut path_bytes = 0;
-
-    let mut rest = buf;
-    while !rest.is_empty() {
-        let marker = rest[0];
-        let next = memchr::memchr(b'\n', rest).unwrap_or(rest.len() + 1);
-
-        match marker {
-            b'H' => {
-                header_bytes += next;
-            }
-            b'S' => {
-                segs += 1;
-                seg_bytes += next;
-            }
-            b'L' => {
-                links += 1;
-            }
-            b'P' => {
-                paths += 1;
-                path_bytes += next;
-            }
-            _ => {
-                panic!("unknown line type")
-            }
-        }
-
-        if next >= rest.len() {
-            break;
-        }
-        rest = &rest[next + 1..];
-    }
-
-    crate::file::Toc::estimate(segs, links, paths, header_bytes, seg_bytes, path_bytes)
-}
-
-struct MemchrSplit<'a> {
-    haystack: &'a [u8],
-    memchr: memchr::Memchr<'a>,
-    pos: usize,
-}
-
-impl<'a> Iterator for MemchrSplit<'a> {
-    type Item = &'a [u8];
-
-    fn next(&mut self) -> Option<Self::Item> {
-        let start = self.pos;
-        let end = self.memchr.next()?;
-        self.pos = end + 1;
-        Some(&self.haystack[start..end])
-    }
-}
-
-impl MemchrSplit<'_> {
-    fn new(needle: u8, haystack: &[u8]) -> MemchrSplit {
-        MemchrSplit {
-            haystack,
-            memchr: memchr::memchr_iter(needle, haystack),
-            pos: 0,
-        }
-    }
-}
diff --git a/flatgfa/src/pool.rs b/flatgfa/src/pool.rs
deleted file mode 100644
index 2872388a..00000000
--- a/flatgfa/src/pool.rs
+++ /dev/null
@@ -1,299 +0,0 @@
-use std::ops::{Index, Add, Sub};
-use std::{hash::Hash, marker::PhantomData};
-use tinyvec::SliceVec;
-use zerocopy::{AsBytes, FromBytes, FromZeroes};
-
-/// An index into a pool.
-#[derive(Debug, FromZeroes, FromBytes, AsBytes, Clone, Copy)]
-#[repr(transparent)]
-pub struct Id<T>(u32, PhantomData<T>);
-
-impl<T> PartialEq for Id<T> {
-    fn eq(&self, other: &Self) -> bool {
-        self.0 == other.0
-    }
-}
-
-impl<T> Eq for Id<T> {}
-
-impl<T> Hash for Id<T> {
-    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
-        self.0.hash(state)
-    }
-}
-
-impl<T> Add<u32> for Id<T> {
-    type Output = Self;
-
-    #[inline]
-    fn add(self, rhs: u32) -> Self::Output {
-        Self(self.0 + rhs, PhantomData)
-    }
-}
-
-impl<T> Sub<u32> for Id<T> {
-    type Output = Self;
-    #[inline]
-    fn sub(self, rhs:u32) -> Self::Output {
-        Self(self.0 - rhs, PhantomData)
-    }
-}
-
-impl<T> Id<T> {
-    pub fn index(self) -> usize {
-        self.0 as usize
-    }
-
-    pub fn new(index: usize) -> Self {
-        Self(index.try_into().expect("id too large"), PhantomData)
-    }
-}
-
-impl<T> From<u32> for Id<T> {
-    fn from(v: u32) -> Self {
-        Self(v, PhantomData)
-    }
-}
-
-impl<T> From<Id<T>> for u32 {
-    fn from(v: Id<T>) -> Self {
-        v.0
-    }
-}
-
-/// A range of indices into a pool.
-///
-/// TODO: Consider smaller indices for this, and possibly base/offset instead
-/// of start/end.
-#[derive(Debug, FromZeroes, FromBytes, AsBytes, Clone, Copy, PartialEq, Eq, Hash)]
-#[repr(packed)]
-pub struct Span<T> {
-    pub start: Id<T>,
-    pub end: Id<T>,
-    _marker: PhantomData<T>,
-}
-
-impl<T> From<Span<T>> for std::ops::Range<usize> {
-    fn from(span: Span<T>) -> std::ops::Range<usize> {
-        (span.start.index())..(span.end.index())
-    }
-}
-
-impl<T> From<&Span<T>> for std::ops::Range<usize> {
-    fn from(span: &Span<T>) -> std::ops::Range<usize> {
-        (span.start.0 as usize)..(span.end.0 as usize)
-    }
-}
-
-impl<T> Span<T> {
-    pub fn is_empty(&self) -> bool {
-        self.start.0 == self.end.0
-    }
-
-    pub fn len(&self) -> usize {
-        (self.end.0 - self.start.0) as usize
-    }
-
-    pub fn contains(&self, id: Id<T>) -> bool {
-        self.start.0 <= id.0 && id.0 < self.end.0
-    }
-
-    pub fn new(start: Id<T>, end: Id<T>) -> Self {
-        Self {
-            start,
-            end,
-            _marker: PhantomData,
-        }
-    }
-
-    pub fn new_empty() -> Self {
-        Span::new(Id::new(0), Id::new(0))
-    }
-}
-
-/// A simple arena for objects of a single type.
-///
-/// This trait provides convenient accessors for treating Vec and Vec-like objects
-/// as allocation arenas. This trait supports adding to the pool (i.e., growing the
-/// arena). Pools also `Deref` to slices, which are `&Pool`s and support convenient
-/// access to the current set of objects (but not addition of new objects).
-pub trait Store<T: Clone> {
-    /// Get a fixed-size view of the arena.
-    fn as_ref(&self) -> Pool<T>;
-
-    /// Add an item to the pool and get the new id.
-    fn add(&mut self, item: T) -> Id<T>;
-
-    /// Add an entire sequence of items to a "pool" vector and return the
-    /// range of new indices (IDs).
-    fn add_iter(&mut self, iter: impl IntoIterator<Item = T>) -> Span<T>;
-
-    /// Like `add_iter`, but for slices.
-    fn add_slice(&mut self, slice: &[T]) -> Span<T>;
-
-    /// Get the number of items in the pool.
-    fn len(&self) -> usize;
-
-    /// Check whether the pool is empty.
-    fn is_empty(&self) -> bool {
-        self.len() == 0
-    }
-
-    /// Get the next available ID.
-    fn next_id(&self) -> Id<T> {
-        Id::new(self.len())
-    }
-}
-
-/// A store that uses a `Vec` to allocate objects on the heap.
-///
-/// This is a "normal" arena that can freely grow to fill available memory.
-#[repr(transparent)]
-pub struct HeapStore<T>(Vec<T>);
-
-impl<T: Clone> Store<T> for HeapStore<T> {
-    fn as_ref(&self) -> Pool<T> {
-        Pool(&self.0)
-    }
-
-    fn add(&mut self, item: T) -> Id<T> {
-        let id = self.as_ref().next_id();
-        self.0.push(item);
-        id
-    }
-
-    fn add_iter(&mut self, iter: impl IntoIterator<Item = T>) -> Span<T> {
-        let start = self.as_ref().next_id();
-        self.0.extend(iter);
-        Span::new(start, self.as_ref().next_id())
-    }
-
-    fn add_slice(&mut self, slice: &[T]) -> Span<T> {
-        let start = self.as_ref().next_id();
-        self.0.extend_from_slice(slice);
-        Span::new(start, self.as_ref().next_id())
-    }
-
-    fn len(&self) -> usize {
-        self.0.len()
-    }
-}
-
-impl<T> Default for HeapStore<T> {
-    fn default() -> Self {
-        Self(Vec::new())
-    }
-}
-
-/// A store that keeps its data in fixed locations in memory.
-///
-/// This is a funkier kind of arena that uses memory that has already been pre-allocated
-/// somewhere else, such as in a memory-mapped file. A consequence is that there is a
-/// fixed maximum size for the arena; it's possible to add objects only until it fills up.
-#[repr(transparent)]
-pub struct FixedStore<'a, T>(SliceVec<'a, T>);
-
-impl<'a, T: Clone> Store<T> for FixedStore<'a, T> {
-    fn as_ref(&self) -> Pool<T> {
-        Pool(&self.0)
-    }
-
-    fn add(&mut self, item: T) -> Id<T> {
-        let id = self.next_id();
-        self.0.push(item);
-        id
-    }
-
-    fn add_iter(&mut self, iter: impl IntoIterator<Item = T>) -> Span<T> {
-        let start = self.next_id();
-        self.0.extend(iter);
-        Span::new(start, self.next_id())
-    }
-
-    fn add_slice(&mut self, slice: &[T]) -> Span<T> {
-        let start = self.next_id();
-        self.0.extend_from_slice(slice);
-        Span::new(start, self.next_id())
-    }
-
-    fn len(&self) -> usize {
-        self.0.len()
-    }
-}
-
-impl<'a, T> FixedStore<'a, T> {
-    pub fn capacity(&self) -> usize {
-        self.0.capacity()
-    }
-}
-
-impl<'a, T> From<SliceVec<'a, T>> for FixedStore<'a, T> {
-    fn from(slice: SliceVec<'a, T>) -> Self {
-        Self(slice)
-    }
-}
-
-/// A fixed-sized arena.
-///
-/// This trait allows id-based access to a fixed-size chunk of objects reflecting
-/// a `Store`. Unlike `Store`, it does not support adding new objects.
-#[repr(transparent)]
-#[derive(Clone, Copy)]
-pub struct Pool<'a, T>(&'a [T]);
-
-impl<'a, T> Pool<'a, T> {
-    /// Get the number of items in the pool.
-    pub fn len(&self) -> usize {
-        self.0.len()
-    }
-
-    /// Check if the pool is empty.
-    pub fn is_empty(&self) -> bool {
-        self.0.is_empty()
-    }
-
-    /// Get the next available ID.
-    pub fn next_id(&self) -> Id<T> {
-        Id::new(self.len())
-    }
-
-    /// Get the entire pool as a slice.
-    pub fn all(&self) -> &'a [T] {
-        self.0
-    }
-
-    /// Find the first item in the pool that satisfies a predicate.
-    pub fn search(&self, pred: impl Fn(&T) -> bool) -> Option<Id<T>> {
-        self.0.iter().position(pred).map(|i| Id::new(i))
-    }
-
-    /// Iterate over id/item pairs in the pool.
-    pub fn items(&self) -> impl Iterator<Item = (Id<T>, &T)> {
-        self.0
-            .iter()
-            .enumerate()
-            .map(|(i, item)| (Id::new(i), item))
-    }
-}
-
-impl<T> Index<Id<T>> for Pool<'_, T> {
-    type Output = T;
-
-    fn index(&self, id: Id<T>) -> &T {
-        &self.0[id.index()]
-    }
-}
-
-impl<T> Index<Span<T>> for Pool<'_, T> {
-    type Output = [T];
-
-    fn index(&self, span: Span<T>) -> &[T] {
-        &self.0[std::ops::Range::from(span)]
-    }
-}
-
-impl<'a, T> From<&'a [T]> for Pool<'a, T> {
-    fn from(slice: &'a [T]) -> Self {
-        Self(slice)
-    }
-}
diff --git a/flatgfa/src/print.rs b/flatgfa/src/print.rs
deleted file mode 100644
index b6d28502..00000000
--- a/flatgfa/src/print.rs
+++ /dev/null
@@ -1,153 +0,0 @@
-use crate::flatgfa;
-use std::fmt;
-
-impl fmt::Display for flatgfa::Orientation {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match self {
-            flatgfa::Orientation::Forward => write!(f, "+"),
-            flatgfa::Orientation::Backward => write!(f, "-"),
-        }
-    }
-}
-
-impl fmt::Display for flatgfa::AlignOpcode {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match self {
-            flatgfa::AlignOpcode::Match => write!(f, "M"),
-            flatgfa::AlignOpcode::Gap => write!(f, "N"),
-            flatgfa::AlignOpcode::Insertion => write!(f, "D"),
-            flatgfa::AlignOpcode::Deletion => write!(f, "I"),
-        }
-    }
-}
-
-impl<'a> fmt::Display for flatgfa::Alignment<'a> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        if self.ops.len() == 0 {
-            write!(f, "0M")?;
-        }
-        for op in self.ops {
-            write!(f, "{}{}", op.len(), op.op())?;
-        }
-        Ok(())
-    }
-}
-
-/// A wrapper for displaying components from FlatGFA.
-pub struct Display<'a, T>(pub &'a flatgfa::FlatGFA<'a>, pub T);
-
-impl<'a> fmt::Display for Display<'a, flatgfa::Handle> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        let seg = self.0.get_handle_seg(self.1);
-        let name = seg.name;
-        write!(f, "{}{}", name, self.1.orient())
-    }
-}
-
-impl<'a> fmt::Display for Display<'a, &flatgfa::Path> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "P\t{}\t", self.0.get_path_name(&self.1))?;
-        let steps = &self.0.steps[self.1.steps];
-        write!(f, "{}", Display(self.0, steps[0]))?;
-        for step in steps[1..].iter() {
-            write!(f, ",{}", Display(self.0, *step))?;
-        }
-        write!(f, "\t")?;
-        let overlaps = &self.0.overlaps[self.1.overlaps];
-        if overlaps.is_empty() {
-            write!(f, "*")?;
-        } else {
-            write!(f, "{}", self.0.get_alignment(overlaps[0]))?;
-            for overlap in overlaps[1..].iter() {
-                write!(f, ",{}", self.0.get_alignment(*overlap))?;
-            }
-        }
-        Ok(())
-    }
-}
-
-impl<'a> fmt::Display for Display<'a, &flatgfa::Link> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        let from = self.1.from;
-        let from_name = self.0.get_handle_seg(from).name;
-        let to = self.1.to;
-        let to_name = self.0.get_handle_seg(to).name;
-        write!(
-            f,
-            "L\t{}\t{}\t{}\t{}\t{}",
-            from_name,
-            from.orient(),
-            to_name,
-            to.orient(),
-            self.0.get_alignment(self.1.overlap)
-        )
-    }
-}
-
-impl<'a> fmt::Display for Display<'a, &flatgfa::Segment> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        let name = self.1.name;
-        write!(f, "S\t{}\t{}", name, self.0.get_seq(self.1))?;
-        if !self.1.optional.is_empty() {
-            write!(f, "\t{}", self.0.get_optional_data(self.1))?;
-        }
-        Ok(())
-    }
-}
-
-/// Print a graph in the order preserved from an original GFA file.
-fn write_preserved(gfa: &flatgfa::FlatGFA, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-    let mut seg_iter = gfa.segs.all().iter();
-    let mut path_iter = gfa.paths.all().iter();
-    let mut link_iter = gfa.links.all().iter();
-    for kind in gfa.get_line_order() {
-        match kind {
-            flatgfa::LineKind::Header => {
-                let version = gfa.header;
-                assert!(!version.is_empty());
-                writeln!(f, "H\t{}", bstr::BStr::new(version.all()))?;
-            }
-            flatgfa::LineKind::Segment => {
-                let seg = seg_iter.next().expect("too few segments");
-                writeln!(f, "{}", Display(gfa, seg))?;
-            }
-            flatgfa::LineKind::Path => {
-                let path = path_iter.next().expect("too few paths");
-                writeln!(f, "{}", Display(gfa, path))?;
-            }
-            flatgfa::LineKind::Link => {
-                let link = link_iter.next().expect("too few links");
-                writeln!(f, "{}", Display(gfa, link))?;
-            }
-        }
-    }
-    Ok(())
-}
-
-/// Print a graph in a normalized order, ignoring the original GFA line order.
-pub fn write_normalized(gfa: &flatgfa::FlatGFA, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-    if !gfa.header.is_empty() {
-        writeln!(f, "H\t{}", bstr::BStr::new(gfa.header.all()))?;
-    }
-    for seg in gfa.segs.all().iter() {
-        writeln!(f, "{}", Display(gfa, seg))?;
-    }
-    for path in gfa.paths.all().iter() {
-        writeln!(f, "{}", Display(gfa, path))?;
-    }
-    for link in gfa.links.all().iter() {
-        writeln!(f, "{}", Display(gfa, link))?;
-    }
-    Ok(())
-}
-
-/// Print our flat representation as in GFA text format.
-impl<'a> fmt::Display for &'a flatgfa::FlatGFA<'a> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        if self.line_order.is_empty() {
-            write_normalized(self, f)
-        } else {
-            write_preserved(self, f)
-        }
-    }
-}

From 9dc781ba792998f47f0c10ad82e1708bce1a0e9c Mon Sep 17 00:00:00 2001
From: susan-garry <sgarry406@gmail.com>
Date: Fri, 27 Sep 2024 12:18:13 -0400
Subject: [PATCH 2/8] move fgfa_ds and commands into submodules in flatgfa/src

---
 flatgfa-py/Cargo.lock              |  65 ++--
 flatgfa-py/src/lib.rs              |   2 +-
 flatgfa/Cargo.lock                 |  79 ++---
 flatgfa/Cargo.toml                 |  17 +-
 flatgfa/src/cmds.rs                | 547 -----------------------------
 flatgfa/src/commands/basic_cmds.rs | 136 +++++++
 flatgfa/src/commands/chop.rs       | 160 +++++++++
 flatgfa/src/commands/depth.rs      |  37 ++
 flatgfa/src/commands/extract.rs    | 224 ++++++++++++
 flatgfa/src/commands/mod.rs        |   4 +
 flatgfa/src/fgfa_ds/file.rs        | 336 ++++++++++++++++++
 flatgfa/src/fgfa_ds/flatgfa.rs     | 430 +++++++++++++++++++++++
 flatgfa/src/fgfa_ds/gfaline.rs     | 272 ++++++++++++++
 flatgfa/src/fgfa_ds/mod.rs         |   7 +
 flatgfa/src/fgfa_ds/parse.rs       | 284 +++++++++++++++
 flatgfa/src/fgfa_ds/pool.rs        | 299 ++++++++++++++++
 flatgfa/src/fgfa_ds/print.rs       | 153 ++++++++
 flatgfa/src/lib.rs                 |   1 +
 flatgfa/src/main.rs                |  37 +-
 19 files changed, 2447 insertions(+), 643 deletions(-)
 delete mode 100644 flatgfa/src/cmds.rs
 create mode 100644 flatgfa/src/commands/basic_cmds.rs
 create mode 100644 flatgfa/src/commands/chop.rs
 create mode 100644 flatgfa/src/commands/depth.rs
 create mode 100644 flatgfa/src/commands/extract.rs
 create mode 100644 flatgfa/src/commands/mod.rs
 create mode 100644 flatgfa/src/fgfa_ds/file.rs
 create mode 100644 flatgfa/src/fgfa_ds/flatgfa.rs
 create mode 100644 flatgfa/src/fgfa_ds/gfaline.rs
 create mode 100644 flatgfa/src/fgfa_ds/mod.rs
 create mode 100644 flatgfa/src/fgfa_ds/parse.rs
 create mode 100644 flatgfa/src/fgfa_ds/pool.rs
 create mode 100644 flatgfa/src/fgfa_ds/print.rs
 create mode 100644 flatgfa/src/lib.rs

diff --git a/flatgfa-py/Cargo.lock b/flatgfa-py/Cargo.lock
index dcb76f0d..f8f3d345 100644
--- a/flatgfa-py/Cargo.lock
+++ b/flatgfa-py/Cargo.lock
@@ -56,9 +56,9 @@ checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1"
 
 [[package]]
 name = "bstr"
-version = "1.9.1"
+version = "1.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706"
+checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c"
 dependencies = [
  "memchr",
  "regex-automata",
@@ -77,6 +77,14 @@ version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 
+[[package]]
+name = "commands"
+version = "0.1.0"
+dependencies = [
+ "argh",
+ "fgfa_ds",
+]
+
 [[package]]
 name = "equivalent"
 version = "1.0.1"
@@ -101,6 +109,7 @@ name = "flatgfa"
 version = "0.1.0"
 dependencies = [
  "argh",
+ "commands",
  "fgfa_ds",
 ]
 
@@ -127,9 +136,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
 
 [[package]]
 name = "indexmap"
-version = "2.2.6"
+version = "2.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
+checksum = "68b900aa2f7301e21c36462b170ee99994de34dff39a4a6a528e80e7376d07e5"
 dependencies = [
  "equivalent",
  "hashbrown",
@@ -165,9 +174,9 @@ dependencies = [
 
 [[package]]
 name = "memchr"
-version = "2.7.2"
+version = "2.7.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d"
+checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
 
 [[package]]
 name = "memmap"
@@ -190,27 +199,27 @@ dependencies = [
 
 [[package]]
 name = "num-traits"
-version = "0.2.18"
+version = "0.2.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
 dependencies = [
  "autocfg",
 ]
 
 [[package]]
 name = "num_enum"
-version = "0.7.2"
+version = "0.7.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "02339744ee7253741199f897151b38e72257d13802d4ee837285cc2990a90845"
+checksum = "4e613fc340b2220f734a8595782c551f1250e969d87d3be1ae0579e8d4065179"
 dependencies = [
  "num_enum_derive",
 ]
 
 [[package]]
 name = "num_enum_derive"
-version = "0.7.2"
+version = "0.7.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "681030a937600a36906c185595136d26abfebb4aa9c65701cefcaf8578bb982b"
+checksum = "af1844ef2428cc3e1cb900be36181049ef3d3193c63e43026cfe202983b27a56"
 dependencies = [
  "proc-macro-crate",
  "proc-macro2",
@@ -255,9 +264,9 @@ checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0"
 
 [[package]]
 name = "proc-macro-crate"
-version = "3.1.0"
+version = "3.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6d37c51ca738a55da99dc0c4a34860fd675453b8b36209178c2249bb13651284"
+checksum = "8ecf48c7ca261d60b74ab1a7b20da18bede46776b2e55535cb958eb595c5fa7b"
 dependencies = [
  "toml_edit",
 ]
@@ -355,9 +364,9 @@ dependencies = [
 
 [[package]]
 name = "regex-automata"
-version = "0.4.6"
+version = "0.4.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
+checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
 
 [[package]]
 name = "scopeguard"
@@ -410,21 +419,21 @@ checksum = "e1fc403891a21bcfb7c37834ba66a547a8f402146eba7265b5a6d88059c9ff2f"
 
 [[package]]
 name = "tinyvec"
-version = "1.6.0"
+version = "1.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50"
+checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938"
 
 [[package]]
 name = "toml_datetime"
-version = "0.6.5"
+version = "0.6.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1"
+checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41"
 
 [[package]]
 name = "toml_edit"
-version = "0.21.1"
+version = "0.22.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6a8534fd7f78b5405e860340ad6575217ce99f38d4d5c8f2442cb5ecb50090e1"
+checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5"
 dependencies = [
  "indexmap",
  "toml_datetime",
@@ -531,18 +540,18 @@ checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0"
 
 [[package]]
 name = "winnow"
-version = "0.5.40"
+version = "0.6.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876"
+checksum = "36c1fec1a2bb5866f07c25f68c26e565c4c200aebb96d7e55710c19d3e8ac49b"
 dependencies = [
  "memchr",
 ]
 
 [[package]]
 name = "zerocopy"
-version = "0.7.32"
+version = "0.7.35"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be"
+checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
 dependencies = [
  "byteorder",
  "zerocopy-derive",
@@ -550,9 +559,9 @@ dependencies = [
 
 [[package]]
 name = "zerocopy-derive"
-version = "0.7.32"
+version = "0.7.35"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6"
+checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
 dependencies = [
  "proc-macro2",
  "quote",
diff --git a/flatgfa-py/src/lib.rs b/flatgfa-py/src/lib.rs
index 85452d48..a0593909 100644
--- a/flatgfa-py/src/lib.rs
+++ b/flatgfa-py/src/lib.rs
@@ -1,4 +1,4 @@
-use flatgfa::pool::Id;
+use flatgfa::fgfa_ds::pool::Id;
 use flatgfa::{self, file, print, FlatGFA, HeapGFAStore};
 use pyo3::exceptions::PyIndexError;
 use pyo3::prelude::*;
diff --git a/flatgfa/Cargo.lock b/flatgfa/Cargo.lock
index bf18eb98..8f3f20c6 100644
--- a/flatgfa/Cargo.lock
+++ b/flatgfa/Cargo.lock
@@ -44,15 +44,15 @@ dependencies = [
 
 [[package]]
 name = "autocfg"
-version = "1.1.0"
+version = "1.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
+checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
 
 [[package]]
 name = "bstr"
-version = "1.9.1"
+version = "1.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706"
+checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c"
 dependencies = [
  "memchr",
  "regex-automata",
@@ -72,9 +72,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
 
 [[package]]
-name = "fgfa_ds"
+name = "fgfa"
 version = "0.1.0"
 dependencies = [
+ "argh",
  "atoi",
  "bstr",
  "memchr",
@@ -84,25 +85,17 @@ dependencies = [
  "zerocopy",
 ]
 
-[[package]]
-name = "flatgfa"
-version = "0.1.0"
-dependencies = [
- "argh",
- "fgfa_ds",
-]
-
 [[package]]
 name = "hashbrown"
-version = "0.14.3"
+version = "0.14.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"
+checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
 
 [[package]]
 name = "indexmap"
-version = "2.2.5"
+version = "2.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7b0b929d511467233429c45a44ac1dcaa21ba0f5ba11e4879e6ed28ddb4f9df4"
+checksum = "68b900aa2f7301e21c36462b170ee99994de34dff39a4a6a528e80e7376d07e5"
 dependencies = [
  "equivalent",
  "hashbrown",
@@ -110,15 +103,15 @@ dependencies = [
 
 [[package]]
 name = "libc"
-version = "0.2.153"
+version = "0.2.159"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
+checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5"
 
 [[package]]
 name = "memchr"
-version = "2.7.1"
+version = "2.7.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149"
+checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
 
 [[package]]
 name = "memmap"
@@ -132,27 +125,27 @@ dependencies = [
 
 [[package]]
 name = "num-traits"
-version = "0.2.18"
+version = "0.2.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
 dependencies = [
  "autocfg",
 ]
 
 [[package]]
 name = "num_enum"
-version = "0.7.2"
+version = "0.7.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "02339744ee7253741199f897151b38e72257d13802d4ee837285cc2990a90845"
+checksum = "4e613fc340b2220f734a8595782c551f1250e969d87d3be1ae0579e8d4065179"
 dependencies = [
  "num_enum_derive",
 ]
 
 [[package]]
 name = "num_enum_derive"
-version = "0.7.2"
+version = "0.7.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "681030a937600a36906c185595136d26abfebb4aa9c65701cefcaf8578bb982b"
+checksum = "af1844ef2428cc3e1cb900be36181049ef3d3193c63e43026cfe202983b27a56"
 dependencies = [
  "proc-macro-crate",
  "proc-macro2",
@@ -162,9 +155,9 @@ dependencies = [
 
 [[package]]
 name = "proc-macro-crate"
-version = "3.1.0"
+version = "3.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6d37c51ca738a55da99dc0c4a34860fd675453b8b36209178c2249bb13651284"
+checksum = "8ecf48c7ca261d60b74ab1a7b20da18bede46776b2e55535cb958eb595c5fa7b"
 dependencies = [
  "toml_edit",
 ]
@@ -189,9 +182,9 @@ dependencies = [
 
 [[package]]
 name = "regex-automata"
-version = "0.4.6"
+version = "0.4.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
+checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
 
 [[package]]
 name = "serde"
@@ -226,21 +219,21 @@ dependencies = [
 
 [[package]]
 name = "tinyvec"
-version = "1.6.0"
+version = "1.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50"
+checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938"
 
 [[package]]
 name = "toml_datetime"
-version = "0.6.5"
+version = "0.6.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1"
+checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41"
 
 [[package]]
 name = "toml_edit"
-version = "0.21.1"
+version = "0.22.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6a8534fd7f78b5405e860340ad6575217ce99f38d4d5c8f2442cb5ecb50090e1"
+checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5"
 dependencies = [
  "indexmap",
  "toml_datetime",
@@ -277,18 +270,18 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
 
 [[package]]
 name = "winnow"
-version = "0.5.40"
+version = "0.6.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876"
+checksum = "36c1fec1a2bb5866f07c25f68c26e565c4c200aebb96d7e55710c19d3e8ac49b"
 dependencies = [
  "memchr",
 ]
 
 [[package]]
 name = "zerocopy"
-version = "0.7.32"
+version = "0.7.35"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be"
+checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
 dependencies = [
  "byteorder",
  "zerocopy-derive",
@@ -296,9 +289,9 @@ dependencies = [
 
 [[package]]
 name = "zerocopy-derive"
-version = "0.7.32"
+version = "0.7.35"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6"
+checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
 dependencies = [
  "proc-macro2",
  "quote",
diff --git a/flatgfa/Cargo.toml b/flatgfa/Cargo.toml
index 6178f607..f8bad886 100644
--- a/flatgfa/Cargo.toml
+++ b/flatgfa/Cargo.toml
@@ -1,18 +1,17 @@
-[workspace]
-members = ["fgfa_ds"]
-
 [package]
-name = "flatgfa"
+name = "fgfa"
 version = "0.1.0"
 edition = "2021"
 
-[[bin]]
-name = "fgfa"
-path = "src/main.rs"
-
 [dependencies]
 argh = "0.1.12"
-fgfa_ds = { path = "fgfa_ds" }
+atoi = "2.0.0"
+bstr = "1.9.1"
+memchr = "2.7.1"
+memmap = "0.7.0"
+num_enum = "0.7.2"
+tinyvec = "1.6.0"
+zerocopy = { version = "0.7.32", features = ["derive"] }
 
 [profile.profiling]
 inherits = "release"
diff --git a/flatgfa/src/cmds.rs b/flatgfa/src/cmds.rs
deleted file mode 100644
index 3e56d721..00000000
--- a/flatgfa/src/cmds.rs
+++ /dev/null
@@ -1,547 +0,0 @@
-use fgfa_ds::flatgfa::{self, Handle, Link, Orientation, Path, Segment};
-use fgfa_ds::pool::{self, Id, Span, Store};
-use fgfa_ds::{GFAStore, HeapFamily};
-use argh::FromArgs;
-use std::collections::{HashMap, HashSet};
-
-/// print the FlatGFA table of contents
-#[derive(FromArgs, PartialEq, Debug)]
-#[argh(subcommand, name = "toc")]
-pub struct Toc {}
-
-pub fn toc(gfa: &flatgfa::FlatGFA) {
-    eprintln!("header: {}", gfa.header.len());
-    eprintln!("segs: {}", gfa.segs.len());
-    eprintln!("paths: {}", gfa.paths.len());
-    eprintln!("links: {}", gfa.links.len());
-    eprintln!("steps: {}", gfa.steps.len());
-    eprintln!("seq_data: {}", gfa.seq_data.len());
-    eprintln!("overlaps: {}", gfa.overlaps.len());
-    eprintln!("alignment: {}", gfa.alignment.len());
-    eprintln!("name_data: {}", gfa.name_data.len());
-    eprintln!("optional_data: {}", gfa.optional_data.len());
-    eprintln!("line_order: {}", gfa.line_order.len());
-}
-
-/// list the paths
-#[derive(FromArgs, PartialEq, Debug)]
-#[argh(subcommand, name = "paths")]
-pub struct Paths {}
-
-pub fn paths(gfa: &flatgfa::FlatGFA) {
-    for path in gfa.paths.all().iter() {
-        println!("{}", gfa.get_path_name(path));
-    }
-}
-
-/// calculate graph statistics
-#[derive(FromArgs, PartialEq, Debug)]
-#[argh(subcommand, name = "stats")]
-pub struct Stats {
-    /// show basic metrics
-    #[argh(switch, short = 'S')]
-    summarize: bool,
-
-    /// number of segments with at least one self-loop link
-    #[argh(switch, short = 'L')]
-    self_loops: bool,
-}
-
-pub fn stats(gfa: &flatgfa::FlatGFA, args: Stats) {
-    if args.summarize {
-        println!("#length\tnodes\tedges\tpaths\tsteps");
-        println!(
-            "{}\t{}\t{}\t{}\t{}",
-            gfa.seq_data.len(),
-            gfa.segs.len(),
-            gfa.links.len(),
-            gfa.paths.len(),
-            gfa.steps.len()
-        );
-    } else if args.self_loops {
-        let mut counts: HashMap<Id<Segment>, usize> = HashMap::new();
-        let mut total: usize = 0;
-        for link in gfa.links.all().iter() {
-            if link.from.segment() == link.to.segment() {
-                let count = counts.entry(link.from.segment()).or_insert(0);
-                *count += 1;
-                total += 1;
-            }
-        }
-        println!("#type\tnum");
-        println!("total\t{}", total);
-        println!("unique\t{}", counts.len());
-    }
-}
-
-/// find a nucleotide position within a path
-#[derive(FromArgs, PartialEq, Debug)]
-#[argh(subcommand, name = "position")]
-pub struct Position {
-    /// path_name,offset,orientation
-    #[argh(option, short = 'p')]
-    path_pos: String,
-}
-
-pub fn position(gfa: &flatgfa::FlatGFA, args: Position) -> Result<(), &'static str> {
-    // Parse the position triple, which looks like `path,42,+`.
-    let (path_name, offset, orientation) = {
-        let parts: Vec<_> = args.path_pos.split(',').collect();
-        if parts.len() != 3 {
-            return Err("position must be path_name,offset,orientation");
-        }
-        let off: usize = parts[1].parse().or(Err("offset must be a number"))?;
-        let ori: flatgfa::Orientation = parts[2].parse().or(Err("orientation must be + or -"))?;
-        (parts[0], off, ori)
-    };
-
-    let path_id = gfa.find_path(path_name.into()).ok_or("path not found")?;
-    let path = &gfa.paths[path_id];
-    assert_eq!(
-        orientation,
-        flatgfa::Orientation::Forward,
-        "only + is implemented so far"
-    );
-
-    // Traverse the path until we reach the position.
-    let mut cur_pos = 0;
-    let mut found = None;
-    for step in &gfa.steps[path.steps] {
-        let seg = gfa.get_handle_seg(*step);
-        let end_pos = cur_pos + seg.len();
-        if offset < end_pos {
-            // Found it!
-            found = Some((*step, offset - cur_pos));
-            break;
-        }
-        cur_pos = end_pos;
-    }
-
-    // Print the match.
-    if let Some((handle, seg_off)) = found {
-        let seg = gfa.get_handle_seg(handle);
-        let seg_name = seg.name;
-        println!("#source.path.pos\ttarget.graph.pos");
-        println!(
-            "{},{},{}\t{},{},{}",
-            path_name,
-            offset,
-            orientation,
-            seg_name,
-            seg_off,
-            handle.orient()
-        );
-    }
-
-    Ok(())
-}
-
-/// create a subset graph
-#[derive(FromArgs, PartialEq, Debug)]
-#[argh(subcommand, name = "extract")]
-pub struct Extract {
-    /// segment to extract around
-    #[argh(option, short = 'n')]
-    seg_name: usize,
-
-    /// number of edges "away" from the node to include
-    #[argh(option, short = 'c')]
-    link_distance: usize,
-
-    /// maximum number of basepairs allowed between subpaths s.t. the subpaths are merged together
-    #[argh(option, short = 'd', long = "max-distance-subpaths", default = "300000")]
-    max_distance_subpaths: usize, // TODO: possibly make this bigger
-
-    /// maximum number of iterations before we stop merging subpaths
-    #[argh(option, short = 'e', long = "max-merging-iterations", default = "6")]
-    num_iterations: usize // TODO: probably make this smaller
-}
-
-pub fn extract(
-    gfa: &flatgfa::FlatGFA,
-    args: Extract,
-) -> Result<flatgfa::HeapGFAStore, &'static str> {
-    let origin_seg = gfa.find_seg(args.seg_name).ok_or("segment not found")?;
-
-    let mut subgraph = SubgraphBuilder::new(gfa);
-    subgraph.add_header();
-    subgraph.extract(origin_seg, args.link_distance, args.max_distance_subpaths, args.num_iterations);
-    Ok(subgraph.store)
-}
-
-/// A helper to construct a new graph that includes part of an old graph.
-struct SubgraphBuilder<'a> {
-    old: &'a flatgfa::FlatGFA<'a>,
-    store: flatgfa::HeapGFAStore,
-    seg_map: HashMap<Id<Segment>, Id<Segment>>,
-}
-
-struct SubpathStart {
-    step: Id<Handle>, // The id of the first step in the subpath.
-    pos: usize,       // The bp position at the start of the subpath.
-}
-
-impl<'a> SubgraphBuilder<'a> {
-    fn new(old: &'a flatgfa::FlatGFA) -> Self {
-        Self {
-            old,
-            store: flatgfa::HeapGFAStore::default(),
-            seg_map: HashMap::new(),
-        }
-    }
-
-    /// Include the old graph's header
-    fn add_header(&mut self) {
-            // pub fn add_header(&mut self, version: &[u8]) {
-            //     assert!(self.header.as_ref().is_empty());
-            //     self.header.add_slice(version);
-            // }
-        assert!(self.store.header.as_ref().is_empty());
-        self.store.header.add_slice(self.old.header.all());
-    }
-
-    /// Add a segment from the source graph to this subgraph.
-    fn include_seg(&mut self, seg_id: Id<Segment>) {
-        let seg = &self.old.segs[seg_id];
-        let new_seg_id = self.store.add_seg(
-            seg.name,
-            self.old.get_seq(seg),
-            self.old.get_optional_data(seg),
-        );
-        self.seg_map.insert(seg_id, new_seg_id);
-    }
-
-    /// Add a link from the source graph to the subgraph.
-    fn include_link(&mut self, link: &flatgfa::Link) {
-        let from = self.tr_handle(link.from);
-        let to = self.tr_handle(link.to);
-        let overlap = self.old.get_alignment(link.overlap);
-        self.store.add_link(from, to, overlap.ops.into());
-    }
-
-    /// Add a single subpath from the given path to the subgraph.
-    fn include_subpath(&mut self, path: &flatgfa::Path, start: &SubpathStart, end_pos: usize) {
-        let steps = pool::Span::new(start.step, self.store.steps.next_id()); // why the next id?
-        let name = format!("{}:{}-{}", self.old.get_path_name(path), start.pos, end_pos);
-        self.store
-            .add_path(name.as_bytes(), steps, std::iter::empty());
-    }
-
-    /// Identify all the subpaths in a path from the original graph that cross through
-    /// segments in this subgraph and merge them if possible.
-    fn merge_subpaths(&mut self, path: &flatgfa::Path, max_distance_subpaths: usize) {
-        // these are subpaths which *aren't* already included in the new graph
-        let mut cur_subpath_start: Option<usize> = Some(0);
-        let mut subpath_length = 0;
-        let mut ignore_path = true;
-
-        for (idx, step) in self.old.steps[path.steps].iter().enumerate() {
-            let in_neighb = self.seg_map.contains_key(&step.segment());
-
-            if let (Some(start), true) = (&cur_subpath_start, in_neighb) {
-                // We just entered the subgraph. End the current subpath.
-                if !ignore_path && subpath_length <= max_distance_subpaths {
-                    // TODO: type safety
-                    let subpath_span = Span::new(path.steps.start + *start as u32, path.steps.start + idx as u32);
-                    for step in &self.old.steps[subpath_span] {
-                        if !self.seg_map.contains_key(&step.segment()) {
-                            self.include_seg(step.segment());
-                        }
-                    }
-                }
-                cur_subpath_start = None;
-                ignore_path = false;
-            } else if let (None, false) = (&cur_subpath_start, in_neighb) {
-                // We've exited the current subgraph, start a new subpath
-                cur_subpath_start = Some(idx);
-            } 
-
-            // Track the current bp position in the path.
-            subpath_length += self.old.get_handle_seg(*step).len();
-        }
-    }
-
-    /// Identify all the subpaths in a path from the original graph that cross through
-    /// segments in this subgraph and add them.
-    fn find_subpaths(&mut self, path: &flatgfa::Path) {
-        let mut cur_subpath_start: Option<SubpathStart> = None;
-        let mut path_pos = 0;
-
-        for step in &self.old.steps[path.steps] {
-            let in_neighb = self.seg_map.contains_key(&step.segment());
-
-            if let (Some(start), false) = (&cur_subpath_start, in_neighb) {
-                // End the current subpath.
-                self.include_subpath(path, start, path_pos);
-                cur_subpath_start = None;
-            } else if let (None, true) = (&cur_subpath_start, in_neighb) {
-                // Start a new subpath.
-                cur_subpath_start = Some(SubpathStart {
-                    step: self.store.steps.next_id(),
-                    pos: path_pos,
-                });
-            }
-
-            // Add the (translated) step to the new graph.
-            if in_neighb {
-                self.store.add_step(self.tr_handle(*step));
-            }
-
-            // Track the current bp position in the path.
-            path_pos += self.old.get_handle_seg(*step).len();
-        }
-
-        // Did we reach the end of the path while still in the neighborhood?
-        if let Some(start) = cur_subpath_start {
-            self.include_subpath(path, &start, path_pos);
-        }
-    }
-
-    /// Translate a handle from the source graph to this subgraph.
-    fn tr_handle(&self, old_handle: flatgfa::Handle) -> flatgfa::Handle {
-        // TODO: is this just generating the handle or should we add it to the new graph?
-        flatgfa::Handle::new(self.seg_map[&old_handle.segment()], old_handle.orient())
-    }
-
-    /// Check whether a segment from the old graph is in the subgraph.
-    fn contains(&self, old_seg_id: Id<Segment>) -> bool {
-        self.seg_map.contains_key(&old_seg_id)
-    }
-
-    /// Extract a subgraph consisting of a neighborhood of segments up to `dist` links away
-    /// from the given segment in the original graph.
-    ///
-    /// Include any links between the segments in the neighborhood and subpaths crossing
-    /// through the neighborhood.
-    fn extract(&mut self, origin: Id<Segment>, dist: usize, max_distance_subpaths: usize, num_iterations: usize) {
-        self.include_seg(origin);
-
-        // Find the set of all segments that are c links away.
-        let mut frontier: Vec<Id<Segment>> = Vec::new();
-        let mut next_frontier: Vec<Id<Segment>> = Vec::new();
-        frontier.push(origin);
-        for _ in 0..dist {
-            while let Some(seg_id) = frontier.pop() {
-                for link in self.old.links.all().iter() {
-                    if let Some(other_seg) = link.incident_seg(seg_id) {
-                        // Add other_seg to the frontier set if it is not already in the frontier set or the seg_map
-                        if !self.seg_map.contains_key(&other_seg) {
-                            self.include_seg(other_seg);
-                            next_frontier.push(other_seg);
-                        }
-                    }
-                }
-            }
-            (frontier, next_frontier) = (next_frontier, frontier);
-        }
-
-        // Merge subpaths within max_distance_subpaths bp of each other, num_iterations times
-        for _ in 0..num_iterations {
-            for path in self.old.paths.all().iter() {
-                self.merge_subpaths(path, max_distance_subpaths);
-            }
-        }
-
-        // Include all links within the subgraph.
-        for link in self.old.links.all().iter() {
-            if self.contains(link.from.segment()) && self.contains(link.to.segment()) {
-                self.include_link(link);
-            }
-        }
-
-        // Find subpaths within the subgraph.
-        for path in self.old.paths.all().iter() {
-            self.find_subpaths(path);
-        }
-    }
-}
-
-/// compute node depth, the number of times paths cross a node
-#[derive(FromArgs, PartialEq, Debug)]
-#[argh(subcommand, name = "depth")]
-pub struct Depth {}
-
-pub fn depth(gfa: &flatgfa::FlatGFA) {
-    // Initialize node depth
-    let mut depths = vec![0; gfa.segs.len()];
-    // Initialize uniq_paths
-    let mut uniq_paths = Vec::<HashSet<usize>>::new();
-    uniq_paths.resize(gfa.segs.len(), HashSet::new());
-    // do not assume that each handle in `gfa.steps()` is unique
-    for (idx, path) in gfa.paths.all().iter().enumerate() {
-        for step in &gfa.steps[path.steps] {
-            let seg_id = step.segment().index();
-            // Increment depths
-            depths[seg_id] += 1;
-            // Update uniq_paths
-            uniq_paths[seg_id].insert(idx);
-        }
-    }
-    // print out depth and depth.uniq
-    println!("#node.id\tdepth\tdepth.uniq");
-    for (id, seg) in gfa.segs.items() {
-        let name: u32 = seg.name as u32;
-        println!(
-            "{}\t{}\t{}",
-            name,
-            depths[id.index()],
-            uniq_paths[id.index()].len()
-        );
-    }
-}
-
-/// chop the segments in a graph into sizes of N or smaller
-#[derive(FromArgs, PartialEq, Debug)]
-#[argh(subcommand, name = "chop")]
-pub struct Chop {
-    /// maximimum segment size.
-    // Use c in keeping with odgi convention
-    #[argh(option, short = 'c')]
-    c: usize,
-
-    /// compute new links
-    #[argh(switch, short = 'l')]
-    l: bool,
-}
-
-/// Chop a graph into segments of size no larger than c
-/// By default, compact node ids
-/// CIGAR strings, links, and optional Segment data are invalidated by chop
-/// Generates a new graph, rather than modifying the old one in place
-pub fn chop<'a>(
-    gfa: &'a flatgfa::FlatGFA<'a>,
-    args: Chop,
-) -> Result<flatgfa::HeapGFAStore, &'static str> {
-
-    let mut flat = flatgfa::HeapGFAStore::default();        
-
-    // when segment S is chopped into segments S1 through S2 (exclusive), 
-    // seg_map[S.name] = Span(Id(S1.name), Id(S2.name)). If S is not chopped: S=S1, S2.name = S1.name+1
-    let mut seg_map: Vec<Span<Segment>> = Vec::new();
-    // The smallest id (>0) which does not already belong to a segment in `flat`
-    let mut max_node_id = 1;
-
-    fn link_forward(flat: &mut GFAStore<'static, HeapFamily>, span: &Span<Segment>) {
-        // Link segments spanned by `span` from head to tail
-        let overlap = Span::new_empty();
-        flat.add_links((span.start.index()..span.end.index() - 1).map(|idx| Link {
-            from: Handle::new(Id::new(idx), Orientation::Forward),
-            to: Handle::new(Id::new(idx + 1), Orientation::Forward),
-            overlap,
-        }));
-    }
-
-    // Add new, chopped segments
-    for seg in gfa.segs.all().iter() {
-        let len = seg.len();
-        if len <= args.c {
-            // Leave the segment as is
-            let id = flat.segs.add(Segment {
-                name: max_node_id,
-                seq: seg.seq,
-                optional: Span::new_empty(), // TODO: Optional data may stay valid when seg not chopped?
-            });
-            max_node_id += 1;
-            seg_map.push(Span::new(id, flat.segs.next_id()));
-        } else {
-            let seq_end = seg.seq.end;
-            let mut offset = seg.seq.start.index();
-            let segs_start = flat.segs.next_id();
-            // Could also generate end_id by setting it equal to the start_id and
-            // updating it for each segment that is added - only benefits us if we 
-            // don't unroll the last iteration of this loop
-            while offset < seq_end.index() - args.c {
-                // Generate a new segment of length c
-                flat.segs.add(Segment {
-                    name: max_node_id,
-                    seq: Span::new(Id::new(offset), Id::new(offset + args.c)),
-                    optional: Span::new_empty()
-                });
-                offset += args.c;
-                max_node_id += 1;
-            }
-            // Generate the last segment
-            flat.segs.add(Segment {
-                name: max_node_id,
-                seq: Span::new(Id::new(offset), seq_end),
-                optional: Span::new_empty(),
-            });
-            max_node_id += 1;
-            let new_seg_span = Span::new(segs_start, flat.segs.next_id());
-            seg_map.push(new_seg_span);
-            if args.l {
-                link_forward(&mut flat, &new_seg_span);
-            }
-        }
-    }
-
-    // For each path, add updated handles. Then add the updated path
-    for path in gfa.paths.all().iter() {
-        let path_start = flat.steps.next_id();
-        let mut path_end = flat.steps.next_id();
-        // Generate the new handles
-        // Tentative to-do: see if it is faster to read Id from segs than to re-generate it?
-        for step in gfa.get_path_steps(path) {
-            let range = {
-                let span = seg_map[step.segment().index()];
-                std::ops::Range::from(span)
-            };
-            match step.orient() {
-                Orientation::Forward => {
-                    // In this builder, Id.index() == seg.name - 1 for all seg
-                    path_end = flat
-                        .add_steps(range.map(|idx| Handle::new(Id::new(idx), Orientation::Forward)))
-                        .end;
-                }
-                Orientation::Backward => {
-                    path_end = flat
-                        .add_steps(
-                            range
-                                .rev()
-                                .map(|idx| Handle::new(Id::new(idx), Orientation::Backward)),
-                        )
-                        .end;
-                }
-            }
-        }
-
-        // Add the updated path
-        flat.paths.add(Path {
-            name: path.name,
-            steps: Span::new(path_start, path_end),
-            overlaps: Span::new_empty(),
-        });
-    }
-
-    // If the 'l' flag is specified, compute the links in the new graph
-    if args.l {
-        // For each link in the old graph, from handle A -> B:
-        //      Add a link from
-        //          (A.forward ? (A.end, forward) : (A.begin, backwards))
-        //          -> (B.forward ? (B.begin, forward) : (B.end ? backwards))
-
-        for link in gfa.links.all().iter() {
-            let new_from = {
-                let old_from = link.from;
-                let chopped_segs = seg_map[old_from.segment().index()];
-                let seg_id = match old_from.orient() {
-                    Orientation::Forward => chopped_segs.end - 1,
-                    Orientation::Backward => chopped_segs.start,
-                };
-                Handle::new(seg_id, old_from.orient())
-            };
-            let new_to = {
-                let old_to = link.to;
-                let chopped_segs = seg_map[old_to.segment().index()];
-                let seg_id = match old_to.orient() {
-                    Orientation::Forward => chopped_segs.start,
-                    Orientation::Backward => chopped_segs.end - 1,
-                };
-                Handle::new(seg_id, old_to.orient())
-            };
-            flat.add_link(new_from, new_to, vec![]);
-        }
-    }
-
-    Ok(flat)
-}
diff --git a/flatgfa/src/commands/basic_cmds.rs b/flatgfa/src/commands/basic_cmds.rs
new file mode 100644
index 00000000..537b49e8
--- /dev/null
+++ b/flatgfa/src/commands/basic_cmds.rs
@@ -0,0 +1,136 @@
+use crate::fgfa_ds::flatgfa::{self, Orientation, Segment};
+use crate::fgfa_ds::pool::Id;
+use argh::FromArgs;
+use std::collections::HashMap;
+
+/// print the FlatGFA table of contents
+#[derive(FromArgs, PartialEq, Debug)]
+#[argh(subcommand, name = "toc")]
+pub struct Toc {}
+
+pub fn toc(gfa: &flatgfa::FlatGFA) {
+    eprintln!("header: {}", gfa.header.len());
+    eprintln!("segs: {}", gfa.segs.len());
+    eprintln!("paths: {}", gfa.paths.len());
+    eprintln!("links: {}", gfa.links.len());
+    eprintln!("steps: {}", gfa.steps.len());
+    eprintln!("seq_data: {}", gfa.seq_data.len());
+    eprintln!("overlaps: {}", gfa.overlaps.len());
+    eprintln!("alignment: {}", gfa.alignment.len());
+    eprintln!("name_data: {}", gfa.name_data.len());
+    eprintln!("optional_data: {}", gfa.optional_data.len());
+    eprintln!("line_order: {}", gfa.line_order.len());
+}
+
+/// list the paths
+#[derive(FromArgs, PartialEq, Debug)]
+#[argh(subcommand, name = "paths")]
+pub struct Paths {}
+
+pub fn paths(gfa: &flatgfa::FlatGFA) {
+    for path in gfa.paths.all().iter() {
+        println!("{}", gfa.get_path_name(path));
+    }
+}
+
+/// calculate graph statistics
+#[derive(FromArgs, PartialEq, Debug)]
+#[argh(subcommand, name = "stats")]
+pub struct Stats {
+    /// show basic metrics
+    #[argh(switch, short = 'S')]
+    summarize: bool,
+
+    /// number of segments with at least one self-loop link
+    #[argh(switch, short = 'L')]
+    self_loops: bool,
+}
+
+pub fn stats(gfa: &flatgfa::FlatGFA, args: Stats) {
+    if args.summarize {
+        println!("#length\tnodes\tedges\tpaths\tsteps");
+        println!(
+            "{}\t{}\t{}\t{}\t{}",
+            gfa.seq_data.len(),
+            gfa.segs.len(),
+            gfa.links.len(),
+            gfa.paths.len(),
+            gfa.steps.len()
+        );
+    } else if args.self_loops {
+        let mut counts: HashMap<Id<Segment>, usize> = HashMap::new();
+        let mut total: usize = 0;
+        for link in gfa.links.all().iter() {
+            if link.from.segment() == link.to.segment() {
+                let count = counts.entry(link.from.segment()).or_insert(0);
+                *count += 1;
+                total += 1;
+            }
+        }
+        println!("#type\tnum");
+        println!("total\t{}", total);
+        println!("unique\t{}", counts.len());
+    }
+}
+
+/// find a nucleotide position within a path
+#[derive(FromArgs, PartialEq, Debug)]
+#[argh(subcommand, name = "position")]
+pub struct Position {
+    /// path_name,offset,orientation
+    #[argh(option, short = 'p')]
+    path_pos: String,
+}
+
+pub fn position(gfa: &flatgfa::FlatGFA, args: Position) -> Result<(), &'static str> {
+    // Parse the position triple, which looks like `path,42,+`.
+    let (path_name, offset, orientation) = {
+        let parts: Vec<_> = args.path_pos.split(',').collect();
+        if parts.len() != 3 {
+            return Err("position must be path_name,offset,orientation");
+        }
+        let off: usize = parts[1].parse().or(Err("offset must be a number"))?;
+        let ori: Orientation = parts[2].parse().or(Err("orientation must be + or -"))?;
+        (parts[0], off, ori)
+    };
+
+    let path_id = gfa.find_path(path_name.into()).ok_or("path not found")?;
+    let path = &gfa.paths[path_id];
+    assert_eq!(
+        orientation,
+        Orientation::Forward,
+        "only + is implemented so far"
+    );
+
+    // Traverse the path until we reach the position.
+    let mut cur_pos = 0;
+    let mut found = None;
+    for step in &gfa.steps[path.steps] {
+        let seg = gfa.get_handle_seg(*step);
+        let end_pos = cur_pos + seg.len();
+        if offset < end_pos {
+            // Found it!
+            found = Some((*step, offset - cur_pos));
+            break;
+        }
+        cur_pos = end_pos;
+    }
+
+    // Print the match.
+    if let Some((handle, seg_off)) = found {
+        let seg = gfa.get_handle_seg(handle);
+        let seg_name = seg.name;
+        println!("#source.path.pos\ttarget.graph.pos");
+        println!(
+            "{},{},{}\t{},{},{}",
+            path_name,
+            offset,
+            orientation,
+            seg_name,
+            seg_off,
+            handle.orient()
+        );
+    }
+
+    Ok(())
+}
\ No newline at end of file
diff --git a/flatgfa/src/commands/chop.rs b/flatgfa/src/commands/chop.rs
new file mode 100644
index 00000000..695fd52a
--- /dev/null
+++ b/flatgfa/src/commands/chop.rs
@@ -0,0 +1,160 @@
+use crate::fgfa_ds::flatgfa::{self, Handle, Link, Orientation, Path, Segment};
+use crate::fgfa_ds::pool::{Id, Span, Store};
+use crate::fgfa_ds::flatgfa::{GFAStore, HeapFamily};
+use argh::FromArgs;
+
+/// chop the segments in a graph into sizes of N or smaller
+#[derive(FromArgs, PartialEq, Debug)]
+#[argh(subcommand, name = "chop")]
+pub struct Chop {
+    /// maximimum segment size.
+    // Use c in keeping with odgi convention
+    #[argh(option, short = 'c')]
+    c: usize,
+
+    /// compute new links
+    #[argh(switch, short = 'l')]
+    l: bool,
+}
+
+/// Chop a graph into segments of size no larger than c
+/// By default, compact node ids
+/// CIGAR strings, links, and optional Segment data are invalidated by chop
+/// Generates a new graph, rather than modifying the old one in place
+pub fn chop<'a>(
+    gfa: &'a flatgfa::FlatGFA<'a>,
+    args: Chop,
+) -> Result<flatgfa::HeapGFAStore, &'static str> {
+
+    let mut flat = flatgfa::HeapGFAStore::default();        
+
+    // when segment S is chopped into segments S1 through S2 (exclusive), 
+    // seg_map[S.name] = Span(Id(S1.name), Id(S2.name)). If S is not chopped: S=S1, S2.name = S1.name+1
+    let mut seg_map: Vec<Span<Segment>> = Vec::new();
+    // The smallest id (>0) which does not already belong to a segment in `flat`
+    let mut max_node_id = 1;
+
+    fn link_forward(flat: &mut GFAStore<'static, HeapFamily>, span: &Span<Segment>) {
+        // Link segments spanned by `span` from head to tail
+        let overlap = Span::new_empty();
+        flat.add_links((span.start.index()..span.end.index() - 1).map(|idx| Link {
+            from: Handle::new(Id::new(idx), Orientation::Forward),
+            to: Handle::new(Id::new(idx + 1), Orientation::Forward),
+            overlap,
+        }));
+    }
+
+    // Add new, chopped segments
+    for seg in gfa.segs.all().iter() {
+        let len = seg.len();
+        if len <= args.c {
+            // Leave the segment as is
+            let id = flat.segs.add(Segment {
+                name: max_node_id,
+                seq: seg.seq,
+                optional: Span::new_empty(), // TODO: Optional data may stay valid when seg not chopped?
+            });
+            max_node_id += 1;
+            seg_map.push(Span::new(id, flat.segs.next_id()));
+        } else {
+            let seq_end = seg.seq.end;
+            let mut offset = seg.seq.start.index();
+            let segs_start = flat.segs.next_id();
+            // Could also generate end_id by setting it equal to the start_id and
+            // updating it for each segment that is added - only benefits us if we 
+            // don't unroll the last iteration of this loop
+            while offset < seq_end.index() - args.c {
+                // Generate a new segment of length c
+                flat.segs.add(Segment {
+                    name: max_node_id,
+                    seq: Span::new(Id::new(offset), Id::new(offset + args.c)),
+                    optional: Span::new_empty()
+                });
+                offset += args.c;
+                max_node_id += 1;
+            }
+            // Generate the last segment
+            flat.segs.add(Segment {
+                name: max_node_id,
+                seq: Span::new(Id::new(offset), seq_end),
+                optional: Span::new_empty(),
+            });
+            max_node_id += 1;
+            let new_seg_span = Span::new(segs_start, flat.segs.next_id());
+            seg_map.push(new_seg_span);
+            if args.l {
+                link_forward(&mut flat, &new_seg_span);
+            }
+        }
+    }
+
+    // For each path, add updated handles. Then add the updated path
+    for path in gfa.paths.all().iter() {
+        let path_start = flat.steps.next_id();
+        let mut path_end = flat.steps.next_id();
+        // Generate the new handles
+        // Tentative to-do: see if it is faster to read Id from segs than to re-generate it?
+        for step in gfa.get_path_steps(path) {
+            let range = {
+                let span = seg_map[step.segment().index()];
+                std::ops::Range::from(span)
+            };
+            match step.orient() {
+                Orientation::Forward => {
+                    // In this builder, Id.index() == seg.name - 1 for all seg
+                    path_end = flat
+                        .add_steps(range.map(|idx| Handle::new(Id::new(idx), Orientation::Forward)))
+                        .end;
+                }
+                Orientation::Backward => {
+                    path_end = flat
+                        .add_steps(
+                            range
+                                .rev()
+                                .map(|idx| Handle::new(Id::new(idx), Orientation::Backward)),
+                        )
+                        .end;
+                }
+            }
+        }
+
+        // Add the updated path
+        flat.paths.add(Path {
+            name: path.name,
+            steps: Span::new(path_start, path_end),
+            overlaps: Span::new_empty(),
+        });
+    }
+
+    // If the 'l' flag is specified, compute the links in the new graph
+    if args.l {
+        // For each link in the old graph, from handle A -> B:
+        //      Add a link from
+        //          (A.forward ? (A.end, forward) : (A.begin, backwards))
+        //          -> (B.forward ? (B.begin, forward) : (B.end ? backwards))
+
+        for link in gfa.links.all().iter() {
+            let new_from = {
+                let old_from = link.from;
+                let chopped_segs = seg_map[old_from.segment().index()];
+                let seg_id = match old_from.orient() {
+                    Orientation::Forward => chopped_segs.end - 1,
+                    Orientation::Backward => chopped_segs.start,
+                };
+                Handle::new(seg_id, old_from.orient())
+            };
+            let new_to = {
+                let old_to = link.to;
+                let chopped_segs = seg_map[old_to.segment().index()];
+                let seg_id = match old_to.orient() {
+                    Orientation::Forward => chopped_segs.start,
+                    Orientation::Backward => chopped_segs.end - 1,
+                };
+                Handle::new(seg_id, old_to.orient())
+            };
+            flat.add_link(new_from, new_to, vec![]);
+        }
+    }
+
+    Ok(flat)
+}
\ No newline at end of file
diff --git a/flatgfa/src/commands/depth.rs b/flatgfa/src/commands/depth.rs
new file mode 100644
index 00000000..ef0b97ca
--- /dev/null
+++ b/flatgfa/src/commands/depth.rs
@@ -0,0 +1,37 @@
+use crate::fgfa_ds::flatgfa::FlatGFA;
+use argh::FromArgs;
+use std::collections::HashSet;
+
+/// compute node depth, the number of times paths cross a node
+#[derive(FromArgs, PartialEq, Debug)]
+#[argh(subcommand, name = "depth")]
+pub struct Depth {}
+
+pub fn depth(gfa: &FlatGFA) {
+    // Initialize node depth
+    let mut depths = vec![0; gfa.segs.len()];
+    // Initialize uniq_paths
+    let mut uniq_paths = Vec::<HashSet<usize>>::new();
+    uniq_paths.resize(gfa.segs.len(), HashSet::new());
+    // do not assume that each handle in `gfa.steps()` is unique
+    for (idx, path) in gfa.paths.all().iter().enumerate() {
+        for step in &gfa.steps[path.steps] {
+            let seg_id = step.segment().index();
+            // Increment depths
+            depths[seg_id] += 1;
+            // Update uniq_paths
+            uniq_paths[seg_id].insert(idx);
+        }
+    }
+    // print out depth and depth.uniq
+    println!("#node.id\tdepth\tdepth.uniq");
+    for (id, seg) in gfa.segs.items() {
+        let name: u32 = seg.name as u32;
+        println!(
+            "{}\t{}\t{}",
+            name,
+            depths[id.index()],
+            uniq_paths[id.index()].len()
+        );
+    }
+}
\ No newline at end of file
diff --git a/flatgfa/src/commands/extract.rs b/flatgfa/src/commands/extract.rs
new file mode 100644
index 00000000..1a0ada61
--- /dev/null
+++ b/flatgfa/src/commands/extract.rs
@@ -0,0 +1,224 @@
+use crate::fgfa_ds::flatgfa::{self, Handle, Link, Path, Segment};
+use crate::fgfa_ds::pool::{self, Id, Span, Store};
+use argh::FromArgs;
+use std::collections::HashMap;
+
+/// create a subset graph
+#[derive(FromArgs, PartialEq, Debug)]
+#[argh(subcommand, name = "extract")]
+pub struct Extract {
+    /// segment to extract around
+    #[argh(option, short = 'n')]
+    seg_name: usize,
+
+    /// number of edges "away" from the node to include
+    #[argh(option, short = 'c')]
+    link_distance: usize,
+
+    /// maximum number of basepairs allowed between subpaths s.t. the subpaths are merged together
+    #[argh(option, short = 'd', long = "max-distance-subpaths", default = "300000")]
+    max_distance_subpaths: usize, // TODO: possibly make this bigger
+
+    /// maximum number of iterations before we stop merging subpaths
+    #[argh(option, short = 'e', long = "max-merging-iterations", default = "6")]
+    num_iterations: usize // TODO: probably make this smaller
+}
+
+pub fn extract(
+    gfa: &flatgfa::FlatGFA,
+    args: Extract,
+) -> Result<flatgfa::HeapGFAStore, &'static str> {
+    let origin_seg = gfa.find_seg(args.seg_name).ok_or("segment not found")?;
+
+    let mut subgraph = SubgraphBuilder::new(gfa);
+    subgraph.add_header();
+    subgraph.extract(origin_seg, args.link_distance, args.max_distance_subpaths, args.num_iterations);
+    Ok(subgraph.store)
+}
+
+/// A helper to construct a new graph that includes part of an old graph.
+struct SubgraphBuilder<'a> {
+    old: &'a flatgfa::FlatGFA<'a>,
+    store: flatgfa::HeapGFAStore,
+    seg_map: HashMap<Id<Segment>, Id<Segment>>,
+}
+
+struct SubpathStart {
+    step: Id<Handle>, // The id of the first step in the subpath.
+    pos: usize,       // The bp position at the start of the subpath.
+}
+
+impl<'a> SubgraphBuilder<'a> {
+    fn new(old: &'a flatgfa::FlatGFA) -> Self {
+        Self {
+            old,
+            store: flatgfa::HeapGFAStore::default(),
+            seg_map: HashMap::new(),
+        }
+    }
+
+    /// Include the old graph's header
+    fn add_header(&mut self) {
+            // pub fn add_header(&mut self, version: &[u8]) {
+            //     assert!(self.header.as_ref().is_empty());
+            //     self.header.add_slice(version);
+            // }
+        assert!(self.store.header.as_ref().is_empty());
+        self.store.header.add_slice(self.old.header.all());
+    }
+
+    /// Add a segment from the source graph to this subgraph.
+    fn include_seg(&mut self, seg_id: Id<Segment>) {
+        let seg = &self.old.segs[seg_id];
+        let new_seg_id = self.store.add_seg(
+            seg.name,
+            self.old.get_seq(seg),
+            self.old.get_optional_data(seg),
+        );
+        self.seg_map.insert(seg_id, new_seg_id);
+    }
+
+    /// Add a link from the source graph to the subgraph.
+    fn include_link(&mut self, link: &Link) {
+        let from = self.tr_handle(link.from);
+        let to = self.tr_handle(link.to);
+        let overlap = self.old.get_alignment(link.overlap);
+        self.store.add_link(from, to, overlap.ops.into());
+    }
+
+    /// Add a single subpath from the given path to the subgraph.
+    fn include_subpath(&mut self, path: &Path, start: &SubpathStart, end_pos: usize) {
+        let steps = pool::Span::new(start.step, self.store.steps.next_id()); // why the next id?
+        let name = format!("{}:{}-{}", self.old.get_path_name(path), start.pos, end_pos);
+        self.store
+            .add_path(name.as_bytes(), steps, std::iter::empty());
+    }
+
+    /// Identify all the subpaths in a path from the original graph that cross through
+    /// segments in this subgraph and merge them if possible.
+    fn merge_subpaths(&mut self, path: &Path, max_distance_subpaths: usize) {
+        // these are subpaths which *aren't* already included in the new graph
+        let mut cur_subpath_start: Option<usize> = Some(0);
+        let mut subpath_length = 0;
+        let mut ignore_path = true;
+
+        for (idx, step) in self.old.steps[path.steps].iter().enumerate() {
+            let in_neighb = self.seg_map.contains_key(&step.segment());
+
+            if let (Some(start), true) = (&cur_subpath_start, in_neighb) {
+                // We just entered the subgraph. End the current subpath.
+                if !ignore_path && subpath_length <= max_distance_subpaths {
+                    // TODO: type safety
+                    let subpath_span = Span::new(path.steps.start + *start as u32, path.steps.start + idx as u32);
+                    for step in &self.old.steps[subpath_span] {
+                        if !self.seg_map.contains_key(&step.segment()) {
+                            self.include_seg(step.segment());
+                        }
+                    }
+                }
+                cur_subpath_start = None;
+                ignore_path = false;
+            } else if let (None, false) = (&cur_subpath_start, in_neighb) {
+                // We've exited the current subgraph, start a new subpath
+                cur_subpath_start = Some(idx);
+            } 
+
+            // Track the current bp position in the path.
+            subpath_length += self.old.get_handle_seg(*step).len();
+        }
+    }
+
+    /// Identify all the subpaths in a path from the original graph that cross through
+    /// segments in this subgraph and add them.
+    fn find_subpaths(&mut self, path: &Path) {
+        let mut cur_subpath_start: Option<SubpathStart> = None;
+        let mut path_pos = 0;
+
+        for step in &self.old.steps[path.steps] {
+            let in_neighb = self.seg_map.contains_key(&step.segment());
+
+            if let (Some(start), false) = (&cur_subpath_start, in_neighb) {
+                // End the current subpath.
+                self.include_subpath(path, start, path_pos);
+                cur_subpath_start = None;
+            } else if let (None, true) = (&cur_subpath_start, in_neighb) {
+                // Start a new subpath.
+                cur_subpath_start = Some(SubpathStart {
+                    step: self.store.steps.next_id(),
+                    pos: path_pos,
+                });
+            }
+
+            // Add the (translated) step to the new graph.
+            if in_neighb {
+                self.store.add_step(self.tr_handle(*step));
+            }
+
+            // Track the current bp position in the path.
+            path_pos += self.old.get_handle_seg(*step).len();
+        }
+
+        // Did we reach the end of the path while still in the neighborhood?
+        if let Some(start) = cur_subpath_start {
+            self.include_subpath(path, &start, path_pos);
+        }
+    }
+
+    /// Translate a handle from the source graph to this subgraph.
+    fn tr_handle(&self, old_handle: Handle) -> Handle {
+        // TODO: is this just generating the handle or should we add it to the new graph?
+        Handle::new(self.seg_map[&old_handle.segment()], old_handle.orient())
+    }
+
+    /// Check whether a segment from the old graph is in the subgraph.
+    fn contains(&self, old_seg_id: Id<Segment>) -> bool {
+        self.seg_map.contains_key(&old_seg_id)
+    }
+
+    /// Extract a subgraph consisting of a neighborhood of segments up to `dist` links away
+    /// from the given segment in the original graph.
+    ///
+    /// Include any links between the segments in the neighborhood and subpaths crossing
+    /// through the neighborhood.
+    fn extract(&mut self, origin: Id<Segment>, dist: usize, max_distance_subpaths: usize, num_iterations: usize) {
+        self.include_seg(origin);
+
+        // Find the set of all segments that are c links away.
+        let mut frontier: Vec<Id<Segment>> = Vec::new();
+        let mut next_frontier: Vec<Id<Segment>> = Vec::new();
+        frontier.push(origin);
+        for _ in 0..dist {
+            while let Some(seg_id) = frontier.pop() {
+                for link in self.old.links.all().iter() {
+                    if let Some(other_seg) = link.incident_seg(seg_id) {
+                        // Add other_seg to the frontier set if it is not already in the frontier set or the seg_map
+                        if !self.seg_map.contains_key(&other_seg) {
+                            self.include_seg(other_seg);
+                            next_frontier.push(other_seg);
+                        }
+                    }
+                }
+            }
+            (frontier, next_frontier) = (next_frontier, frontier);
+        }
+
+        // Merge subpaths within max_distance_subpaths bp of each other, num_iterations times
+        for _ in 0..num_iterations {
+            for path in self.old.paths.all().iter() {
+                self.merge_subpaths(path, max_distance_subpaths);
+            }
+        }
+
+        // Include all links within the subgraph.
+        for link in self.old.links.all().iter() {
+            if self.contains(link.from.segment()) && self.contains(link.to.segment()) {
+                self.include_link(link);
+            }
+        }
+
+        // Find subpaths within the subgraph.
+        for path in self.old.paths.all().iter() {
+            self.find_subpaths(path);
+        }
+    }
+}
\ No newline at end of file
diff --git a/flatgfa/src/commands/mod.rs b/flatgfa/src/commands/mod.rs
new file mode 100644
index 00000000..b4801f6b
--- /dev/null
+++ b/flatgfa/src/commands/mod.rs
@@ -0,0 +1,4 @@
+pub mod basic_cmds;
+pub mod chop;
+pub mod depth;
+pub mod extract;
\ No newline at end of file
diff --git a/flatgfa/src/fgfa_ds/file.rs b/flatgfa/src/fgfa_ds/file.rs
new file mode 100644
index 00000000..a0a55515
--- /dev/null
+++ b/flatgfa/src/fgfa_ds/file.rs
@@ -0,0 +1,336 @@
+use super::pool::{FixedStore, Pool, Span, Store};
+use super::flatgfa::{AlignOp, FlatGFA, FixedGFAStore, Handle, Link, Path, Segment};
+use memmap::{Mmap, MmapMut};
+use std::mem::{size_of, size_of_val};
+use tinyvec::SliceVec;
+use zerocopy::{AsBytes, FromBytes, FromZeroes};
+
+const MAGIC_NUMBER: u64 = 0xB101_1054;
+
+/// A table of contents for the FlatGFA file.
+#[derive(FromBytes, FromZeroes, AsBytes, Debug)]
+#[repr(packed)]
+pub struct Toc {
+    magic: u64,
+    header: Size,
+    segs: Size,
+    paths: Size,
+    links: Size,
+    steps: Size,
+    seq_data: Size,
+    overlaps: Size,
+    alignment: Size,
+    name_data: Size,
+    optional_data: Size,
+    line_order: Size,
+}
+
+/// A table-of-contents entry for a pool in the FlatGFA file.
+#[derive(FromBytes, FromZeroes, AsBytes, Clone, Copy, Debug)]
+#[repr(packed)]
+struct Size {
+    /// The number of actual elements in the pool.
+    len: usize,
+
+    // The allocated space for the pool. `capacity - len` slots are "empty."
+    capacity: usize,
+}
+
+impl Size {
+    fn of_pool<T>(pool: Pool<T>) -> Self {
+        Size {
+            len: pool.len(),
+            capacity: pool.len(),
+        }
+    }
+
+    fn of_store<T: Clone>(store: &FixedStore<'_, T>) -> Self {
+        Size {
+            len: store.len(),
+            capacity: store.capacity(),
+        }
+    }
+
+    fn bytes<T>(&self) -> usize {
+        self.capacity * size_of::<T>()
+    }
+
+    fn empty(capacity: usize) -> Self {
+        Size { len: 0, capacity }
+    }
+}
+
+impl Toc {
+    /// Get the total size in bytes of the file described.
+    pub fn size(&self) -> usize {
+        size_of::<Self>()
+            + self.header.bytes::<u8>()
+            + self.segs.bytes::<Segment>()
+            + self.paths.bytes::<Path>()
+            + self.links.bytes::<Link>()
+            + self.steps.bytes::<Handle>()
+            + self.seq_data.bytes::<u8>()
+            + self.overlaps.bytes::<Span<AlignOp>>()
+            + self.alignment.bytes::<AlignOp>()
+            + self.name_data.bytes::<u8>()
+            + self.optional_data.bytes::<u8>()
+            + self.line_order.bytes::<u8>()
+    }
+
+    /// Get a table of contents that fits a FlatGFA with no spare space.
+    fn full(gfa: &FlatGFA) -> Self {
+        Self {
+            magic: MAGIC_NUMBER,
+            header: Size::of_pool(gfa.header),
+            segs: Size::of_pool(gfa.segs),
+            paths: Size::of_pool(gfa.paths),
+            links: Size::of_pool(gfa.links),
+            steps: Size::of_pool(gfa.steps),
+            seq_data: Size::of_pool(gfa.seq_data),
+            overlaps: Size::of_pool(gfa.overlaps),
+            alignment: Size::of_pool(gfa.alignment),
+            name_data: Size::of_pool(gfa.name_data),
+            optional_data: Size::of_pool(gfa.optional_data),
+            line_order: Size::of_pool(gfa.line_order),
+        }
+    }
+
+    pub fn for_fixed_store(store: &FixedGFAStore) -> Self {
+        Self {
+            magic: MAGIC_NUMBER,
+            header: Size::of_store(&store.header),
+            segs: Size::of_store(&store.segs),
+            paths: Size::of_store(&store.paths),
+            links: Size::of_store(&store.links),
+            steps: Size::of_store(&store.steps),
+            seq_data: Size::of_store(&store.seq_data),
+            overlaps: Size::of_store(&store.overlaps),
+            alignment: Size::of_store(&store.alignment),
+            name_data: Size::of_store(&store.name_data),
+            optional_data: Size::of_store(&store.optional_data),
+            line_order: Size::of_store(&store.line_order),
+        }
+    }
+
+    /// Guess a reasonable set of capacities for a fresh file.
+    pub fn guess(factor: usize) -> Self {
+        Self {
+            magic: MAGIC_NUMBER,
+            header: Size::empty(128),
+            segs: Size::empty(32 * factor * factor),
+            paths: Size::empty(factor),
+            links: Size::empty(32 * factor * factor),
+            steps: Size::empty(1024 * factor * factor),
+            seq_data: Size::empty(512 * factor * factor),
+            overlaps: Size::empty(256 * factor),
+            alignment: Size::empty(64 * factor * factor),
+            name_data: Size::empty(64 * factor),
+            optional_data: Size::empty(512 * factor * factor),
+            line_order: Size::empty(64 * factor * factor),
+        }
+    }
+
+    /// Estimate a reasonable set of capacities for a fresh file based on some
+    /// measurements of the GFA text.
+    pub fn estimate(
+        segs: usize,
+        links: usize,
+        paths: usize,
+        header_bytes: usize,
+        seg_bytes: usize,
+        path_bytes: usize,
+    ) -> Self {
+        Self {
+            magic: MAGIC_NUMBER,
+            header: Size::empty(header_bytes),
+            segs: Size::empty(segs),
+            paths: Size::empty(paths),
+            links: Size::empty(links),
+            steps: Size::empty(path_bytes / 3),
+            seq_data: Size::empty(seg_bytes),
+            overlaps: Size::empty((links + paths) * 2),
+            alignment: Size::empty(links * 2 + paths * 4),
+            name_data: Size::empty(paths * 512),
+            optional_data: Size::empty(links * 16),
+            line_order: Size::empty(segs + links + paths + 8),
+        }
+    }
+}
+
+/// Consume `size.len` items from a byte slice, skip the remainder of `size.capacity`
+/// elements, and return the items and the rest of the slice.
+fn slice_prefix<T: FromBytes>(data: &[u8], size: Size) -> (&[T], &[u8]) {
+    let (prefix, rest) = T::slice_from_prefix(data, size.len).unwrap();
+    let pad = size_of::<T>() * (size.capacity - size.len);
+    (prefix, &rest[pad..])
+}
+
+/// Read the table of contents from a prefix of the byte buffer.
+fn read_toc(data: &[u8]) -> (&Toc, &[u8]) {
+    let toc = Toc::ref_from_prefix(data).unwrap();
+    let rest = &data[size_of::<Toc>()..];
+    let magic = toc.magic;
+    assert_eq!(magic, MAGIC_NUMBER);
+    (toc, rest)
+}
+
+fn read_toc_mut(data: &mut [u8]) -> (&mut Toc, &mut [u8]) {
+    let (toc_slice, rest) = Toc::mut_slice_from_prefix(data, 1).unwrap();
+    let toc = &mut toc_slice[0];
+    let magic = toc.magic;
+    assert_eq!(magic, MAGIC_NUMBER);
+    (toc, rest)
+}
+
+/// Get a FlatGFA backed by the data in a byte buffer.
+pub fn view(data: &[u8]) -> FlatGFA {
+    let (toc, rest) = read_toc(data);
+
+    let (header, rest) = slice_prefix(rest, toc.header);
+    let (segs, rest) = slice_prefix(rest, toc.segs);
+    let (paths, rest) = slice_prefix(rest, toc.paths);
+    let (links, rest) = slice_prefix(rest, toc.links);
+    let (steps, rest) = slice_prefix(rest, toc.steps);
+    let (seq_data, rest) = slice_prefix(rest, toc.seq_data);
+    let (overlaps, rest) = slice_prefix(rest, toc.overlaps);
+    let (alignment, rest) = slice_prefix(rest, toc.alignment);
+    let (name_data, rest) = slice_prefix(rest, toc.name_data);
+    let (optional_data, rest) = slice_prefix(rest, toc.optional_data);
+    let (line_order, _) = slice_prefix(rest, toc.line_order);
+
+    FlatGFA {
+        header: header.into(),
+        segs: segs.into(),
+        paths: paths.into(),
+        links: links.into(),
+        steps: steps.into(),
+        seq_data: seq_data.into(),
+        overlaps: overlaps.into(),
+        alignment: alignment.into(),
+        name_data: name_data.into(),
+        optional_data: optional_data.into(),
+        line_order: line_order.into(),
+    }
+}
+
+/// Like `slice_prefix`, but produce a `SliceVec`.
+fn slice_vec_prefix<T: FromBytes + AsBytes>(
+    data: &mut [u8],
+    size: Size,
+) -> (SliceVec<T>, &mut [u8]) {
+    let (prefix, rest) = T::mut_slice_from_prefix(data, size.capacity).unwrap();
+    let vec = SliceVec::from_slice_len(prefix, size.len);
+    (vec, rest)
+}
+
+/// Get a FlatGFA `SliceStore` from the suffix of a file just following the table of contents.
+fn slice_store<'a>(data: &'a mut [u8], toc: &Toc) -> FixedGFAStore<'a> {
+    let (header, rest) = slice_vec_prefix(data, toc.header);
+    let (segs, rest) = slice_vec_prefix(rest, toc.segs);
+    let (paths, rest) = slice_vec_prefix(rest, toc.paths);
+    let (links, rest) = slice_vec_prefix(rest, toc.links);
+    let (steps, rest) = slice_vec_prefix(rest, toc.steps);
+    let (seq_data, rest) = slice_vec_prefix(rest, toc.seq_data);
+    let (overlaps, rest) = slice_vec_prefix(rest, toc.overlaps);
+    let (alignment, rest) = slice_vec_prefix(rest, toc.alignment);
+    let (name_data, rest) = slice_vec_prefix(rest, toc.name_data);
+    let (optional_data, rest) = slice_vec_prefix(rest, toc.optional_data);
+    let (line_order, _) = slice_vec_prefix(rest, toc.line_order);
+
+    FixedGFAStore {
+        header: header.into(),
+        segs: segs.into(),
+        paths: paths.into(),
+        links: links.into(),
+        steps: steps.into(),
+        seq_data: seq_data.into(),
+        overlaps: overlaps.into(),
+        alignment: alignment.into(),
+        name_data: name_data.into(),
+        optional_data: optional_data.into(),
+        line_order: line_order.into(),
+    }
+}
+
+/// Get a mutable FlatGFA `SliceStore` backed by a byte buffer.
+pub fn view_store(data: &mut [u8]) -> FixedGFAStore {
+    let (toc, rest) = read_toc_mut(data);
+    slice_store(rest, toc)
+}
+
+/// Initialize a buffer with an empty FlatGFA store.
+pub fn init(data: &mut [u8], toc: Toc) -> (&mut Toc, FixedGFAStore) {
+    // Write the table of contents.
+    assert!(data.len() == toc.size());
+    toc.write_to_prefix(data).unwrap();
+
+    // Get a mutable reference to the embedded TOC.
+    let (toc_bytes, rest) = data.split_at_mut(size_of::<Toc>());
+    let toc_mut = Toc::mut_from(toc_bytes).unwrap();
+
+    // Extract a store from the remaining bytes.
+    (toc_mut, slice_store(rest, &toc))
+}
+
+fn write_bump<'a, T: AsBytes + ?Sized>(buf: &'a mut [u8], data: &T) -> Option<&'a mut [u8]> {
+    let len = size_of_val(data);
+    data.write_to_prefix(buf)?;
+    Some(&mut buf[len..])
+}
+
+fn write_bytes<'a>(buf: &'a mut [u8], data: &[u8]) -> Option<&'a mut [u8]> {
+    let len = data.len();
+    buf[0..len].copy_from_slice(data);
+    Some(&mut buf[len..])
+}
+
+/// Copy a FlatGFA into a byte buffer.
+pub fn dump(gfa: &FlatGFA, buf: &mut [u8]) {
+    // Table of contents.
+    let toc = Toc::full(gfa);
+    let rest = write_bump(buf, &toc).unwrap();
+
+    // All the slices.
+    let rest = write_bytes(rest, gfa.header.all()).unwrap();
+    let rest = write_bump(rest, gfa.segs.all()).unwrap();
+    let rest = write_bump(rest, gfa.paths.all()).unwrap();
+    let rest = write_bump(rest, gfa.links.all()).unwrap();
+    let rest = write_bump(rest, gfa.steps.all()).unwrap();
+    let rest = write_bytes(rest, gfa.seq_data.all()).unwrap();
+    let rest = write_bump(rest, gfa.overlaps.all()).unwrap();
+    let rest = write_bump(rest, gfa.alignment.all()).unwrap();
+    let rest = write_bytes(rest, gfa.name_data.all()).unwrap();
+    let rest = write_bytes(rest, gfa.optional_data.all()).unwrap();
+    write_bytes(rest, gfa.line_order.all()).unwrap();
+}
+
+/// Get the total size in bytes of a FlatGFA structure. This should result in a big
+/// enough buffer to write the entire FlatGFA into with `dump`.
+pub fn size(gfa: &FlatGFA) -> usize {
+    Toc::full(gfa).size()
+}
+
+pub fn map_file(name: &str) -> Mmap {
+    let file = std::fs::File::open(name).unwrap();
+    unsafe { Mmap::map(&file) }.unwrap()
+}
+
+pub fn map_new_file(name: &str, size: u64) -> MmapMut {
+    let file = std::fs::OpenOptions::new()
+        .read(true)
+        .write(true)
+        .create(true)
+        .open(name)
+        .unwrap();
+    file.set_len(size).unwrap();
+    unsafe { MmapMut::map_mut(&file) }.unwrap()
+}
+
+pub fn map_file_mut(name: &str) -> MmapMut {
+    let file = std::fs::OpenOptions::new()
+        .read(true)
+        .write(true)
+        .open(name)
+        .unwrap();
+    unsafe { MmapMut::map_mut(&file) }.unwrap()
+}
diff --git a/flatgfa/src/fgfa_ds/flatgfa.rs b/flatgfa/src/fgfa_ds/flatgfa.rs
new file mode 100644
index 00000000..9a9e53d4
--- /dev/null
+++ b/flatgfa/src/fgfa_ds/flatgfa.rs
@@ -0,0 +1,430 @@
+use std::str::FromStr;
+
+use super::pool::{self, Id, Pool, Span, Store};
+use bstr::BStr;
+use num_enum::{IntoPrimitive, TryFromPrimitive};
+use zerocopy::{AsBytes, FromBytes, FromZeroes};
+
+/// An efficient flattened representation of a GFA file.
+///
+/// This struct *borrows* the underlying data from some other data store. Namely, the
+/// `GFAStore` structs contain `Vec`s or `Vec`-like arenas as backing stores for each
+/// of the slices in this struct. `FlatGFA` itself provides access to the GFA data
+/// structure that is agnostic to the location of the underlying bytes. However, all
+/// its components have a fixed size; unlike the underlying `GFAStore`, it is not
+/// possible to add new objects.
+pub struct FlatGFA<'a> {
+    /// A GFA may optionally have a single header line, with a version number.
+    /// If this is empty, there is no header line.
+    pub header: Pool<'a, u8>,
+
+    /// The segment (S) lines in the GFA file.
+    pub segs: Pool<'a, Segment>,
+
+    /// The path (P) lines.
+    pub paths: Pool<'a, Path>,
+
+    /// The link (L) lines.
+    pub links: Pool<'a, Link>,
+
+    /// Paths consist of steps. This is a flat pool of steps, chunks of which are
+    /// associated with each path.
+    pub steps: Pool<'a, Handle>,
+
+    /// The actual base-pair sequences for the segments. This is a pool of
+    /// base-pair symbols, chunks of which are associated with each segment.
+    ///
+    /// TODO: This could certainly use a smaller representation than `u8`
+    /// (since we care only about 4 base pairs). If we want to pay the cost
+    /// of bit-packing.
+    pub seq_data: Pool<'a, u8>,
+
+    /// Both paths and links can have overlaps, which are CIGAR sequences. They
+    /// are all stored together here in a flat pool, elements of which point
+    /// to chunks of `alignment`.
+    pub overlaps: Pool<'a, Span<AlignOp>>,
+
+    /// The CIGAR aligment operations that make up the overlaps. `overlaps`
+    /// contains range of indices in this pool.
+    pub alignment: Pool<'a, AlignOp>,
+
+    /// The string names: currenly, just of paths. (We assume segments have integer
+    /// names, so they don't need to be stored separately.)
+    pub name_data: Pool<'a, u8>,
+
+    /// Segments can come with optional extra fields, which we store in a flat pool
+    /// as raw characters because we don't currently care about them.
+    pub optional_data: Pool<'a, u8>,
+
+    /// An "interleaving" order of GFA lines. This is to preserve perfect round-trip
+    /// fidelity: we record the order of lines as we saw them when parsing a GFA file
+    /// so we can emit them again in that order. Elements should be `LineKind` values
+    /// (but they are checked before we use them).
+    pub line_order: Pool<'a, u8>,
+}
+
+/// GFA graphs consist of "segment" nodes, which are fragments of base-pair sequences
+/// that can be strung together into paths.
+#[derive(Debug, FromZeroes, FromBytes, AsBytes, Clone, Copy)]
+#[repr(packed)]
+pub struct Segment {
+    /// The segment's name. We assume all names are just plain numbers.
+    pub name: usize,
+
+    /// The base-pair sequence for the segment. This is a range in the `seq_data` pool.
+    pub seq: Span<u8>,
+
+    /// Segments can have optional fields. This is a range in the `optional_data` pool.
+    pub optional: Span<u8>,
+}
+
+impl Segment {
+    #[allow(clippy::len_without_is_empty)]
+    pub fn len(&self) -> usize {
+        self.seq.len()
+    }
+}
+
+/// A path is a sequence of oriented references to segments.
+#[derive(Debug, FromZeroes, FromBytes, AsBytes, Clone, Copy)]
+#[repr(packed)]
+pub struct Path {
+    /// The path's name. This can be an arbitrary string. It is a range in the
+    /// `name_data` pool.
+    pub name: Span<u8>,
+
+    /// The sequence of path steps. This is a range in the `steps` pool.
+    pub steps: Span<Handle>,
+
+    /// The CIGAR overlaps for each step on the path. This is a range in the
+    /// `overlaps` pool.
+    pub overlaps: Span<Span<AlignOp>>,
+}
+
+impl Path {
+    pub fn step_count(&self) -> usize {
+        self.steps.end.index() - self.steps.start.index()
+    }
+}
+
+/// An allowed edge between two oriented segments.
+#[derive(Debug, FromBytes, FromZeroes, AsBytes, Clone, Copy)]
+#[repr(packed)]
+pub struct Link {
+    /// The source of the edge.
+    pub from: Handle,
+
+    // The destination of the edge.
+    pub to: Handle,
+
+    /// The CIGAR overlap between the segments. This is a range in the
+    /// `alignment` pool.
+    pub overlap: Span<AlignOp>,
+}
+
+impl Link {
+    /// Is either end of the link the given segment? If so, return the other end.
+    pub fn incident_seg(&self, seg_id: Id<Segment>) -> Option<Id<Segment>> {
+        if self.from.segment() == seg_id {
+            Some(self.to.segment())
+        } else if self.to.segment() == seg_id {
+            Some(self.from.segment())
+        } else {
+            None
+        }
+    }
+}
+
+/// A forward or backward direction.
+#[derive(Debug, PartialEq, IntoPrimitive, TryFromPrimitive)]
+#[repr(u8)]
+pub enum Orientation {
+    Forward,  // +
+    Backward, // -
+}
+
+impl FromStr for Orientation {
+    type Err = ();
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        if s == "+" {
+            Ok(Orientation::Forward)
+        } else if s == "-" {
+            Ok(Orientation::Backward)
+        } else {
+            Err(())
+        }
+    }
+}
+
+/// An oriented reference to a segment.
+///
+/// A Handle refers to the forward (+) or backward (-) orientation for a given segment.
+/// So, logically, it consists of a pair of a segment reference (usize) and an
+/// orientation (1 bit). We pack the two values into a single word.
+#[derive(Debug, FromBytes, FromZeroes, AsBytes, Clone, Copy, PartialEq, Eq, Hash)]
+#[repr(packed)]
+pub struct Handle(u32);
+
+impl Handle {
+    /// Create a new handle referring to a segment ID and an orientation.
+    pub fn new(segment: Id<Segment>, orient: Orientation) -> Self {
+        let seg_num: u32 = segment.into();
+        assert!(seg_num & (1 << (u32::BITS - 1)) == 0, "index too large");
+        let orient_bit: u8 = orient.into();
+        assert!(orient_bit & !1 == 0, "invalid orientation");
+        Self(seg_num << 1 | (orient_bit as u32))
+    }
+
+    /// Get the segment ID. This is an index in the `segs` pool.
+    pub fn segment(&self) -> Id<Segment> {
+        (self.0 >> 1).into()
+    }
+
+    /// Get the orientation (+ or -) for the handle.
+    pub fn orient(&self) -> Orientation {
+        ((self.0 & 1) as u8).try_into().unwrap()
+    }
+}
+
+/// The kind of each operation in a CIGAR alignment.
+#[derive(Debug, IntoPrimitive, TryFromPrimitive, Clone, Copy)]
+#[repr(u8)]
+pub enum AlignOpcode {
+    Match,     // M
+    Gap,       // N
+    Insertion, // D
+    Deletion,  // I
+}
+
+/// A single operation in a CIGAR alignment, like "3M" or "1D".
+///
+/// Logically, this is a pair of a number and an `AlignOpcode`. We pack the two
+/// into a single u32.
+#[derive(Debug, FromZeroes, FromBytes, AsBytes, Clone, Copy)]
+#[repr(packed)]
+pub struct AlignOp(u32);
+
+impl AlignOp {
+    /// Create a new alignment operation from an opcode and count.
+    pub fn new(op: AlignOpcode, len: u32) -> Self {
+        let op_byte: u8 = op.into();
+        assert!(len & !0xff == 0, "length too large");
+        Self((len << 8) | (op_byte as u32))
+    }
+
+    /// Get the operation (M, I, etc.) for this operation.
+    pub fn op(&self) -> AlignOpcode {
+        ((self.0 & 0xff) as u8).try_into().unwrap()
+    }
+
+    /// Get the length of the operation.
+    pub fn len(&self) -> u32 {
+        self.0 >> 8
+    }
+
+    /// Check whether there are zero operations in this alignment.
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+}
+
+/// An entire CIGAR alignment string, like "3M1D2M".
+#[derive(Debug)]
+#[repr(transparent)]
+pub struct Alignment<'a> {
+    /// The sequence of operations that make up the alignment.
+    pub ops: &'a [AlignOp],
+}
+
+/// A kind of GFA line. We use this in `line_order` to preserve the textual order
+/// in a GFA file for round-tripping.
+#[derive(Debug, IntoPrimitive, TryFromPrimitive)]
+#[repr(u8)]
+pub enum LineKind {
+    Header,
+    Segment,
+    Path,
+    Link,
+}
+
+impl<'a> FlatGFA<'a> {
+    /// Get the base-pair sequence for a segment.
+    pub fn get_seq(&self, seg: &Segment) -> &BStr {
+        self.seq_data[seg.seq].as_ref()
+    }
+
+    /// Look up a segment by its name.
+    pub fn find_seg(&self, name: usize) -> Option<Id<Segment>> {
+        // TODO Make this more efficient by maintaining the name index? This would not be
+        // too hard; we already have the machinery in `parse.rs`...
+        self.segs.search(|seg| seg.name == name)
+    }
+
+    /// Look up a path by its name.
+    pub fn find_path(&self, name: &BStr) -> Option<Id<Path>> {
+        self.paths.search(|path| self.get_path_name(path) == name)
+    }
+
+    /// Get the string name of a path.
+    pub fn get_path_name(&self, path: &Path) -> &BStr {
+        self.name_data[path.name].as_ref()
+    }
+
+    pub fn get_path_steps(&self, path: &Path) -> impl Iterator<Item = &Handle> {
+        self.steps[path.steps].iter()
+    }
+
+    /// Get a handle's associated segment.
+    pub fn get_handle_seg(&self, handle: Handle) -> &Segment {
+        &self.segs[handle.segment()]
+    }
+
+    /// Get the optional data for a segment, as a tab-separated string.
+    pub fn get_optional_data(&self, seg: &Segment) -> &BStr {
+        self.optional_data[seg.optional].as_ref()
+    }
+
+    /// Look up a CIGAR alignment.
+    pub fn get_alignment(&self, overlap: Span<AlignOp>) -> Alignment {
+        Alignment {
+            ops: &self.alignment[overlap],
+        }
+    }
+
+    /// Get the recorded order of line kinds.
+    pub fn get_line_order(&self) -> impl Iterator<Item = LineKind> + 'a {
+        self.line_order
+            .all()
+            .iter()
+            .map(|b| (*b).try_into().unwrap())
+    }
+}
+
+/// The data storage pools for a `FlatGFA`.
+#[derive(Default)]
+pub struct GFAStore<'a, P: StoreFamily<'a>> {
+    pub header: P::Store<u8>,
+    pub segs: P::Store<Segment>,
+    pub paths: P::Store<Path>,
+    pub links: P::Store<Link>,
+    pub steps: P::Store<Handle>,
+    pub seq_data: P::Store<u8>,
+    pub overlaps: P::Store<Span<AlignOp>>,
+    pub alignment: P::Store<AlignOp>,
+    pub name_data: P::Store<u8>,
+    pub optional_data: P::Store<u8>,
+    pub line_order: P::Store<u8>,
+}
+
+impl<'a, P: StoreFamily<'a>> GFAStore<'a, P> {
+    /// Add a header line for the GFA file. This may only be added once.
+    pub fn add_header(&mut self, version: &[u8]) {
+        assert!(self.header.as_ref().is_empty());
+        self.header.add_slice(version);
+    }
+
+    /// Add a new segment to the GFA file.
+    pub fn add_seg(&mut self, name: usize, seq: &[u8], optional: &[u8]) -> Id<Segment> {
+        self.segs.add(Segment {
+            name,
+            seq: self.seq_data.add_slice(seq),
+            optional: self.optional_data.add_slice(optional),
+        })
+    }
+
+    /// Add a new path.
+    pub fn add_path(
+        &mut self,
+        name: &[u8],
+        steps: Span<Handle>,
+        overlaps: impl Iterator<Item = Vec<AlignOp>>,
+    ) -> Id<Path> {
+        let overlaps = self.overlaps.add_iter(
+            overlaps
+                .into_iter()
+                .map(|align| self.alignment.add_iter(align)),
+        );
+        let name = self.name_data.add_slice(name);
+        self.paths.add(Path {
+            name,
+            steps,
+            overlaps,
+        })
+    }
+
+    /// Add a sequence of steps.
+    pub fn add_steps(&mut self, steps: impl Iterator<Item = Handle>) -> Span<Handle> {
+        self.steps.add_iter(steps)
+    }
+
+    /// Add a single step.
+    pub fn add_step(&mut self, step: Handle) -> Id<Handle> {
+        self.steps.add(step)
+    }
+
+    /// Add a sequence of links.
+    pub fn add_links(&mut self, links: impl Iterator<Item = Link>) -> Span<Link> {
+        self.links.add_iter(links)
+    }
+
+    /// Add a link between two (oriented) segments.
+    pub fn add_link(&mut self, from: Handle, to: Handle, overlap: Vec<AlignOp>) -> Id<Link> {
+        self.links.add(Link {
+            from,
+            to,
+            overlap: self.alignment.add_iter(overlap),
+        })
+    }
+
+    /// Record a line type to preserve the line order.
+    pub fn record_line(&mut self, kind: LineKind) {
+        self.line_order.add(kind.into());
+    }
+
+    /// Borrow a FlatGFA view of this data store.
+    pub fn as_ref(&self) -> FlatGFA {
+        FlatGFA {
+            header: self.header.as_ref(),
+            segs: self.segs.as_ref(),
+            paths: self.paths.as_ref(),
+            links: self.links.as_ref(),
+            name_data: self.name_data.as_ref(),
+            seq_data: self.seq_data.as_ref(),
+            steps: self.steps.as_ref(),
+            overlaps: self.overlaps.as_ref(),
+            alignment: self.alignment.as_ref(),
+            optional_data: self.optional_data.as_ref(),
+            line_order: self.line_order.as_ref(),
+        }
+    }
+}
+
+pub trait StoreFamily<'a> {
+    type Store<T: Clone + 'a>: pool::Store<T>;
+}
+
+#[derive(Default)]
+pub struct HeapFamily;
+impl<'a> StoreFamily<'a> for HeapFamily {
+    type Store<T: Clone + 'a> = pool::HeapStore<T>;
+}
+
+pub struct FixedFamily;
+impl<'a> StoreFamily<'a> for FixedFamily {
+    type Store<T: Clone + 'a> = pool::FixedStore<'a, T>;
+}
+
+/// A store for `FlatGFA` data backed by fixed-size slices.
+///
+/// This store contains `SliceVec`s, which act like `Vec`s but are allocated within
+/// a fixed region. This means they have a maximum size, but they can directly map
+/// onto the contents of a file.
+pub type FixedGFAStore<'a> = GFAStore<'a, FixedFamily>;
+
+/// A mutable, in-memory data store for `FlatGFA`.
+///
+/// This store contains a bunch of `Vec`s: one per array required to implement a
+/// `FlatGFA`. It exposes an API for building up a GFA data structure, so it is
+/// useful for creating new ones from scratch.
+pub type HeapGFAStore = GFAStore<'static, HeapFamily>;
diff --git a/flatgfa/src/fgfa_ds/gfaline.rs b/flatgfa/src/fgfa_ds/gfaline.rs
new file mode 100644
index 00000000..87178bbf
--- /dev/null
+++ b/flatgfa/src/fgfa_ds/gfaline.rs
@@ -0,0 +1,272 @@
+use super::flatgfa::{AlignOp, AlignOpcode, Orientation};
+use atoi::FromRadix10;
+
+type ParseResult<T> = Result<T, &'static str>;
+type LineResult<'a> = ParseResult<Line<'a>>;
+type PartialParseResult<'a, T> = ParseResult<(T, &'a [u8])>;
+
+/// A parsed GFA file line.
+pub enum Line<'a> {
+    Header(&'a [u8]),
+    Segment(Segment<'a>),
+    Link(Link),
+    Path(Path<'a>),
+}
+
+pub struct Segment<'a> {
+    pub name: usize,
+    pub seq: &'a [u8],
+    pub data: &'a [u8],
+}
+
+pub struct Link {
+    pub from_seg: usize,
+    pub from_orient: Orientation,
+    pub to_seg: usize,
+    pub to_orient: Orientation,
+    pub overlap: Vec<AlignOp>,
+}
+
+pub struct Path<'a> {
+    pub name: &'a [u8],
+    pub steps: &'a [u8],
+    pub overlaps: Vec<Vec<AlignOp>>,
+}
+
+/// Parse a single line of a GFA file.
+pub fn parse_line(line: &[u8]) -> LineResult {
+    if line.len() < 2 || line[1] != b'\t' {
+        return Err("expected marker and tab");
+    }
+    let rest = &line[2..];
+    match line[0] {
+        b'H' => parse_header(rest),
+        b'S' => parse_seg(rest),
+        b'L' => parse_link(rest),
+        b'P' => parse_path(rest),
+        _ => Err("unhandled line kind"),
+    }
+}
+
+/// Parse a header line, which looks like `H <data>`.
+fn parse_header(line: &[u8]) -> LineResult {
+    Ok(Line::Header(line))
+}
+
+/// Parse a segment line, which looks like `S <name> <seq> <data>`.
+fn parse_seg(line: &[u8]) -> LineResult {
+    let (name, rest) = parse_num(line)?;
+    let rest = parse_byte(rest, b'\t')?;
+    let (seq, data) = parse_field(rest)?;
+    Ok(Line::Segment(Segment { name, seq, data }))
+}
+
+/// Parse a link line, which looks like `L <from> <+-> <to> <+-> <CIGAR>`.
+fn parse_link(line: &[u8]) -> LineResult {
+    let (from_seg, rest) = parse_num(line)?;
+    let rest = parse_byte(rest, b'\t')?;
+    let (from_orient, rest) = parse_orient(rest)?;
+    let rest = parse_byte(rest, b'\t')?;
+    let (to_seg, rest) = parse_num(rest)?;
+    let rest = parse_byte(rest, b'\t')?;
+    let (to_orient, rest) = parse_orient(rest)?;
+    let rest = parse_byte(rest, b'\t')?;
+    let (overlap, rest) = parse_align(rest)?;
+    if !rest.is_empty() {
+        return Err("expected end of line");
+    }
+    Ok(Line::Link(Link {
+        from_seg,
+        from_orient,
+        to_seg,
+        to_orient,
+        overlap,
+    }))
+}
+
+/// Parse a path line, which looks like `P <name> <steps> <*|CIGARs>`.
+fn parse_path(line: &[u8]) -> LineResult {
+    let (name, rest) = parse_field(line)?;
+    let (steps, rest) = parse_field(rest)?;
+    let (overlaps, rest) = parse_maybe_overlap_list(rest)?;
+    if !rest.is_empty() {
+        return Err("expected end of line");
+    }
+    Ok(Line::Path(Path {
+        name,
+        steps,
+        overlaps,
+    }))
+}
+
+/// Parse a *possible* overlap list, which may be `*` (empty).
+pub fn parse_maybe_overlap_list(s: &[u8]) -> PartialParseResult<Vec<Vec<AlignOp>>> {
+    if s == b"*" {
+        Ok((vec![], &s[1..]))
+    } else {
+        parse_overlap_list(s)
+    }
+}
+
+/// Parse a comma-separated list of CIGAR strings.
+///
+/// TODO: This could be optimized to avoid accumulating into a vector.
+fn parse_overlap_list(s: &[u8]) -> PartialParseResult<Vec<Vec<AlignOp>>> {
+    let mut rest = s;
+    let mut overlaps = vec![];
+    while !rest.is_empty() {
+        let overlap;
+        (overlap, rest) = parse_align(rest)?;
+        overlaps.push(overlap);
+        if !rest.is_empty() {
+            rest = parse_byte(rest, b',')?;
+        }
+    }
+    Ok((overlaps, rest))
+}
+
+/// Consume a chunk of a string up to a given marker byte.
+fn parse_until(line: &[u8], marker: u8) -> PartialParseResult<&[u8]> {
+    let end = memchr::memchr(marker, line).unwrap_or(line.len());
+    let rest = if end == line.len() {
+        &[]
+    } else {
+        &line[end + 1..]
+    };
+    Ok((&line[..end], rest))
+}
+
+/// Consume a string from the line, until a tab (or the end of the line).
+pub fn parse_field(line: &[u8]) -> PartialParseResult<&[u8]> {
+    parse_until(line, b'\t')
+}
+
+/// Consume a specific byte.
+fn parse_byte(s: &[u8], byte: u8) -> ParseResult<&[u8]> {
+    if s.is_empty() || s[0] != byte {
+        return Err("expected byte");
+    }
+    Ok(&s[1..])
+}
+
+/// Parse a single integer.
+fn parse_num<T: FromRadix10>(s: &[u8]) -> PartialParseResult<T> {
+    match T::from_radix_10(s) {
+        (_, 0) => Err("expected number"),
+        (num, used) => Ok((num, &s[used..])),
+    }
+}
+
+/// Parse a segment orientation (+ or -).
+fn parse_orient(line: &[u8]) -> PartialParseResult<Orientation> {
+    if line.is_empty() {
+        return Err("expected orientation");
+    }
+    let orient = match line[0] {
+        b'+' => Orientation::Forward,
+        b'-' => Orientation::Backward,
+        _ => return Err("expected orient"),
+    };
+    Ok((orient, &line[1..]))
+}
+
+/// Parse a single CIGAR alignment operation (like `4D`).
+fn parse_align_op(s: &[u8]) -> PartialParseResult<AlignOp> {
+    let (len, rest) = parse_num::<u32>(s)?;
+    let op = match rest[0] {
+        b'M' => AlignOpcode::Match,
+        b'N' => AlignOpcode::Gap,
+        b'D' => AlignOpcode::Deletion,
+        b'I' => AlignOpcode::Insertion,
+        _ => return Err("expected align op"),
+    };
+    Ok((AlignOp::new(op, len), &rest[1..]))
+}
+
+/// Parse a complete CIGAR alignment string (like `3M2I`).
+///
+/// TODO This could be optimized to avoid collecting into a vector.
+fn parse_align(s: &[u8]) -> PartialParseResult<Vec<AlignOp>> {
+    let mut rest = s;
+    let mut align = vec![];
+    while !rest.is_empty() && rest[0].is_ascii_digit() {
+        let op;
+        (op, rest) = parse_align_op(rest)?;
+        align.push(op);
+    }
+    Ok((align, rest))
+}
+
+/// Parse GFA paths' segment lists. These look like `1+,2-,3+`.
+pub struct StepsParser<'a> {
+    str: &'a [u8],
+    index: usize,
+    state: StepsParseState,
+    seg: usize,
+}
+
+/// The parser state: we're either looking for a segment name (or a +/- terminator),
+/// or we're expecting a comma (or end of string).
+enum StepsParseState {
+    Seg,
+    Comma,
+}
+
+impl<'a> StepsParser<'a> {
+    pub fn new(str: &'a [u8]) -> Self {
+        StepsParser {
+            str,
+            index: 0,
+            state: StepsParseState::Seg,
+            seg: 0,
+        }
+    }
+
+    pub fn rest(&self) -> &[u8] {
+        &self.str[self.index..]
+    }
+}
+
+impl<'a> Iterator for StepsParser<'a> {
+    type Item = (usize, bool);
+    fn next(&mut self) -> Option<(usize, bool)> {
+        while self.index < self.str.len() {
+            // Consume one byte.
+            let byte = self.str[self.index];
+            self.index += 1;
+
+            match self.state {
+                StepsParseState::Seg => {
+                    if byte == b'+' || byte == b'-' {
+                        self.state = StepsParseState::Comma;
+                        return Some((self.seg, byte == b'+'));
+                    } else if byte.is_ascii_digit() {
+                        self.seg *= 10;
+                        self.seg += (byte - b'0') as usize;
+                    } else {
+                        return None;
+                    }
+                }
+                StepsParseState::Comma => {
+                    if byte == b',' {
+                        self.state = StepsParseState::Seg;
+                        self.seg = 0;
+                    } else {
+                        return None;
+                    }
+                }
+            }
+        }
+
+        None
+    }
+}
+
+#[test]
+fn test_parse_steps() {
+    let s = b"1+,23-,4+ suffix";
+    let mut parser = StepsParser::new(s);
+    let path: Vec<_> = (&mut parser).collect();
+    assert_eq!(path, vec![(1, true), (23, false), (4, true)]);
+    assert_eq!(parser.rest(), b"suffix");
+}
diff --git a/flatgfa/src/fgfa_ds/mod.rs b/flatgfa/src/fgfa_ds/mod.rs
new file mode 100644
index 00000000..32fd106b
--- /dev/null
+++ b/flatgfa/src/fgfa_ds/mod.rs
@@ -0,0 +1,7 @@
+pub mod file;
+pub mod flatgfa;
+pub mod gfaline;
+pub mod parse;
+pub mod pool;
+pub mod print;
+
diff --git a/flatgfa/src/fgfa_ds/parse.rs b/flatgfa/src/fgfa_ds/parse.rs
new file mode 100644
index 00000000..76ae4d90
--- /dev/null
+++ b/flatgfa/src/fgfa_ds/parse.rs
@@ -0,0 +1,284 @@
+use super::file::Toc;
+use super::flatgfa::{self, Handle, LineKind, Orientation};
+use super::gfaline;
+use std::collections::HashMap;
+use std::io::BufRead;
+
+pub struct Parser<'a, P: flatgfa::StoreFamily<'a>> {
+    /// The flat representation we're building.
+    flat: flatgfa::GFAStore<'a, P>,
+
+    /// All segment IDs, indexed by their names, which we need to refer to segments in paths.
+    seg_ids: NameMap,
+}
+
+impl<'a, P: flatgfa::StoreFamily<'a>> Parser<'a, P> {
+    pub fn new(builder: flatgfa::GFAStore<'a, P>) -> Self {
+        Self {
+            flat: builder,
+            seg_ids: NameMap::default(),
+        }
+    }
+
+    /// Parse a GFA text file from an I/O stream.
+    pub fn parse_stream<R: BufRead>(mut self, stream: R) -> flatgfa::GFAStore<'a, P> {
+        // We can parse segments immediately, but we need to defer links and paths until we have all
+        // the segment names that they might refer to.
+        let mut deferred_links = Vec::new();
+        let mut deferred_paths = Vec::new();
+
+        // Parse or defer each line.
+        for line in stream.split(b'\n') {
+            let line = line.unwrap();
+
+            // Avoid parsing paths entirely for now; just preserve the entire line for later.
+            if line[0] == b'P' {
+                self.flat.record_line(LineKind::Path);
+                deferred_paths.push(line);
+                continue;
+            }
+
+            // Parse other kinds of lines.
+            let gfa_line = gfaline::parse_line(line.as_ref()).unwrap();
+            self.record_line(&gfa_line);
+
+            match gfa_line {
+                gfaline::Line::Header(data) => {
+                    self.flat.add_header(data);
+                }
+                gfaline::Line::Segment(seg) => {
+                    self.add_seg(seg);
+                }
+                gfaline::Line::Link(link) => {
+                    deferred_links.push(link);
+                }
+                gfaline::Line::Path(_) => {
+                    unreachable!("paths handled separately")
+                }
+            }
+        }
+
+        // "Unwind" the deferred links and paths.
+        for link in deferred_links {
+            self.add_link(link);
+        }
+        for line in deferred_paths {
+            self.add_path(&line);
+        }
+
+        self.flat
+    }
+
+    /// Parse a GFA text file from an in-memory buffer.
+    pub fn parse_mem(mut self, buf: &[u8]) -> flatgfa::GFAStore<'a, P> {
+        let mut deferred_lines = Vec::new();
+
+        for line in MemchrSplit::new(b'\n', buf) {
+            // When parsing from memory, it's easy to entirely defer parsing of any line: we just keep
+            // pointers to them. So we defer both paths and links.
+            if line[0] == b'P' || line[0] == b'L' {
+                self.flat.record_line(if line[0] == b'P' {
+                    LineKind::Path
+                } else {
+                    LineKind::Link
+                });
+                deferred_lines.push(line);
+                continue;
+            }
+
+            // Actually parse other lines.
+            let gfa_line = gfaline::parse_line(line).unwrap();
+            self.record_line(&gfa_line);
+            match gfa_line {
+                gfaline::Line::Header(data) => {
+                    self.flat.add_header(data);
+                }
+                gfaline::Line::Segment(seg) => {
+                    self.add_seg(seg);
+                }
+                gfaline::Line::Link(_) | gfaline::Line::Path(_) => {
+                    unreachable!("paths and links handled separately")
+                }
+            }
+        }
+
+        // "Unwind" the deferred lines.
+        for line in deferred_lines {
+            if line[0] == b'P' {
+                self.add_path(line);
+            } else {
+                let gfa_line = gfaline::parse_line(line).unwrap();
+                if let gfaline::Line::Link(link) = gfa_line {
+                    self.add_link(link);
+                } else {
+                    unreachable!("unexpected deferred line")
+                }
+            }
+        }
+
+        self.flat
+    }
+
+    /// Record a marker that captures the original GFA line ordering.
+    fn record_line(&mut self, line: &gfaline::Line) {
+        match line {
+            gfaline::Line::Header(_) => self.flat.record_line(LineKind::Header),
+            gfaline::Line::Segment(_) => self.flat.record_line(LineKind::Segment),
+            gfaline::Line::Link(_) => self.flat.record_line(LineKind::Link),
+            gfaline::Line::Path(_) => self.flat.record_line(LineKind::Path),
+        }
+    }
+
+    fn add_seg(&mut self, seg: gfaline::Segment) {
+        let seg_id = self.flat.add_seg(seg.name, seg.seq, seg.data);
+        self.seg_ids.insert(seg.name, seg_id.into());
+    }
+
+    fn add_link(&mut self, link: gfaline::Link) {
+        let from = Handle::new(self.seg_ids.get(link.from_seg).into(), link.from_orient);
+        let to = Handle::new(self.seg_ids.get(link.to_seg).into(), link.to_orient);
+        self.flat.add_link(from, to, link.overlap);
+    }
+
+    fn add_path(&mut self, line: &[u8]) {
+        // This must be a path line.
+        assert_eq!(&line[..2], b"P\t");
+        let line = &line[2..];
+
+        // Parse the name.
+        let (name, rest) = gfaline::parse_field(line).unwrap();
+
+        // Parse the steps.
+        let mut step_parser = gfaline::StepsParser::new(rest);
+        let steps = self.flat.add_steps((&mut step_parser).map(|(name, dir)| {
+            Handle::new(
+                self.seg_ids.get(name).into(),
+                if dir {
+                    Orientation::Forward
+                } else {
+                    Orientation::Backward
+                },
+            )
+        }));
+        let rest = step_parser.rest();
+
+        // Parse the overlaps.
+        let (overlaps, rest) = gfaline::parse_maybe_overlap_list(rest).unwrap();
+
+        assert!(rest.is_empty());
+        self.flat.add_path(name, steps, overlaps.into_iter());
+    }
+}
+
+impl Parser<'static, flatgfa::HeapFamily> {
+    pub fn for_heap() -> Self {
+        Self::new(flatgfa::HeapGFAStore::default())
+    }
+}
+
+impl<'a> Parser<'a, flatgfa::FixedFamily> {
+    pub fn for_slice(store: flatgfa::FixedGFAStore<'a>) -> Self {
+        Self::new(store)
+    }
+}
+
+#[derive(Default)]
+struct NameMap {
+    /// Names at most this are assigned *sequential* IDs, i.e., the ID is just the name
+    /// minus one.
+    sequential_max: usize,
+
+    /// Non-sequential names go here.
+    others: HashMap<usize, u32>,
+}
+
+impl NameMap {
+    fn insert(&mut self, name: usize, id: u32) {
+        // Is this the next sequential name? If so, no need to record it in our hash table;
+        // just bump the number of sequential names we've seen.
+        if (name - 1) == self.sequential_max && (name - 1) == (id as usize) {
+            self.sequential_max += 1;
+        } else {
+            self.others.insert(name, id);
+        }
+    }
+
+    fn get(&self, name: usize) -> u32 {
+        if name <= self.sequential_max {
+            (name - 1) as u32
+        } else {
+            self.others[&name]
+        }
+    }
+}
+
+/// Scan a GFA text file to count the number of each type of line and measure some sizes
+/// that are useful in estimating the final size of the FlatGFA file.
+pub fn estimate_toc(buf: &[u8]) -> Toc {
+    let mut segs = 0;
+    let mut links = 0;
+    let mut paths = 0;
+    let mut header_bytes = 0;
+    let mut seg_bytes = 0;
+    let mut path_bytes = 0;
+
+    let mut rest = buf;
+    while !rest.is_empty() {
+        let marker = rest[0];
+        let next = memchr::memchr(b'\n', rest).unwrap_or(rest.len() + 1);
+
+        match marker {
+            b'H' => {
+                header_bytes += next;
+            }
+            b'S' => {
+                segs += 1;
+                seg_bytes += next;
+            }
+            b'L' => {
+                links += 1;
+            }
+            b'P' => {
+                paths += 1;
+                path_bytes += next;
+            }
+            _ => {
+                panic!("unknown line type")
+            }
+        }
+
+        if next >= rest.len() {
+            break;
+        }
+        rest = &rest[next + 1..];
+    }
+
+    Toc::estimate(segs, links, paths, header_bytes, seg_bytes, path_bytes)
+}
+
+struct MemchrSplit<'a> {
+    haystack: &'a [u8],
+    memchr: memchr::Memchr<'a>,
+    pos: usize,
+}
+
+impl<'a> Iterator for MemchrSplit<'a> {
+    type Item = &'a [u8];
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let start = self.pos;
+        let end = self.memchr.next()?;
+        self.pos = end + 1;
+        Some(&self.haystack[start..end])
+    }
+}
+
+impl MemchrSplit<'_> {
+    fn new(needle: u8, haystack: &[u8]) -> MemchrSplit {
+        MemchrSplit {
+            haystack,
+            memchr: memchr::memchr_iter(needle, haystack),
+            pos: 0,
+        }
+    }
+}
diff --git a/flatgfa/src/fgfa_ds/pool.rs b/flatgfa/src/fgfa_ds/pool.rs
new file mode 100644
index 00000000..2872388a
--- /dev/null
+++ b/flatgfa/src/fgfa_ds/pool.rs
@@ -0,0 +1,299 @@
+use std::ops::{Index, Add, Sub};
+use std::{hash::Hash, marker::PhantomData};
+use tinyvec::SliceVec;
+use zerocopy::{AsBytes, FromBytes, FromZeroes};
+
+/// An index into a pool.
+#[derive(Debug, FromZeroes, FromBytes, AsBytes, Clone, Copy)]
+#[repr(transparent)]
+pub struct Id<T>(u32, PhantomData<T>);
+
+impl<T> PartialEq for Id<T> {
+    fn eq(&self, other: &Self) -> bool {
+        self.0 == other.0
+    }
+}
+
+impl<T> Eq for Id<T> {}
+
+impl<T> Hash for Id<T> {
+    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+        self.0.hash(state)
+    }
+}
+
+impl<T> Add<u32> for Id<T> {
+    type Output = Self;
+
+    #[inline]
+    fn add(self, rhs: u32) -> Self::Output {
+        Self(self.0 + rhs, PhantomData)
+    }
+}
+
+impl<T> Sub<u32> for Id<T> {
+    type Output = Self;
+    #[inline]
+    fn sub(self, rhs:u32) -> Self::Output {
+        Self(self.0 - rhs, PhantomData)
+    }
+}
+
+impl<T> Id<T> {
+    pub fn index(self) -> usize {
+        self.0 as usize
+    }
+
+    pub fn new(index: usize) -> Self {
+        Self(index.try_into().expect("id too large"), PhantomData)
+    }
+}
+
+impl<T> From<u32> for Id<T> {
+    fn from(v: u32) -> Self {
+        Self(v, PhantomData)
+    }
+}
+
+impl<T> From<Id<T>> for u32 {
+    fn from(v: Id<T>) -> Self {
+        v.0
+    }
+}
+
+/// A range of indices into a pool.
+///
+/// TODO: Consider smaller indices for this, and possibly base/offset instead
+/// of start/end.
+#[derive(Debug, FromZeroes, FromBytes, AsBytes, Clone, Copy, PartialEq, Eq, Hash)]
+#[repr(packed)]
+pub struct Span<T> {
+    pub start: Id<T>,
+    pub end: Id<T>,
+    _marker: PhantomData<T>,
+}
+
+impl<T> From<Span<T>> for std::ops::Range<usize> {
+    fn from(span: Span<T>) -> std::ops::Range<usize> {
+        (span.start.index())..(span.end.index())
+    }
+}
+
+impl<T> From<&Span<T>> for std::ops::Range<usize> {
+    fn from(span: &Span<T>) -> std::ops::Range<usize> {
+        (span.start.0 as usize)..(span.end.0 as usize)
+    }
+}
+
+impl<T> Span<T> {
+    pub fn is_empty(&self) -> bool {
+        self.start.0 == self.end.0
+    }
+
+    pub fn len(&self) -> usize {
+        (self.end.0 - self.start.0) as usize
+    }
+
+    pub fn contains(&self, id: Id<T>) -> bool {
+        self.start.0 <= id.0 && id.0 < self.end.0
+    }
+
+    pub fn new(start: Id<T>, end: Id<T>) -> Self {
+        Self {
+            start,
+            end,
+            _marker: PhantomData,
+        }
+    }
+
+    pub fn new_empty() -> Self {
+        Span::new(Id::new(0), Id::new(0))
+    }
+}
+
+/// A simple arena for objects of a single type.
+///
+/// This trait provides convenient accessors for treating Vec and Vec-like objects
+/// as allocation arenas. This trait supports adding to the pool (i.e., growing the
+/// arena). Pools also `Deref` to slices, which are `&Pool`s and support convenient
+/// access to the current set of objects (but not addition of new objects).
+pub trait Store<T: Clone> {
+    /// Get a fixed-size view of the arena.
+    fn as_ref(&self) -> Pool<T>;
+
+    /// Add an item to the pool and get the new id.
+    fn add(&mut self, item: T) -> Id<T>;
+
+    /// Add an entire sequence of items to a "pool" vector and return the
+    /// range of new indices (IDs).
+    fn add_iter(&mut self, iter: impl IntoIterator<Item = T>) -> Span<T>;
+
+    /// Like `add_iter`, but for slices.
+    fn add_slice(&mut self, slice: &[T]) -> Span<T>;
+
+    /// Get the number of items in the pool.
+    fn len(&self) -> usize;
+
+    /// Check whether the pool is empty.
+    fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Get the next available ID.
+    fn next_id(&self) -> Id<T> {
+        Id::new(self.len())
+    }
+}
+
+/// A store that uses a `Vec` to allocate objects on the heap.
+///
+/// This is a "normal" arena that can freely grow to fill available memory.
+#[repr(transparent)]
+pub struct HeapStore<T>(Vec<T>);
+
+impl<T: Clone> Store<T> for HeapStore<T> {
+    fn as_ref(&self) -> Pool<T> {
+        Pool(&self.0)
+    }
+
+    fn add(&mut self, item: T) -> Id<T> {
+        let id = self.as_ref().next_id();
+        self.0.push(item);
+        id
+    }
+
+    fn add_iter(&mut self, iter: impl IntoIterator<Item = T>) -> Span<T> {
+        let start = self.as_ref().next_id();
+        self.0.extend(iter);
+        Span::new(start, self.as_ref().next_id())
+    }
+
+    fn add_slice(&mut self, slice: &[T]) -> Span<T> {
+        let start = self.as_ref().next_id();
+        self.0.extend_from_slice(slice);
+        Span::new(start, self.as_ref().next_id())
+    }
+
+    fn len(&self) -> usize {
+        self.0.len()
+    }
+}
+
+impl<T> Default for HeapStore<T> {
+    fn default() -> Self {
+        Self(Vec::new())
+    }
+}
+
+/// A store that keeps its data in fixed locations in memory.
+///
+/// This is a funkier kind of arena that uses memory that has already been pre-allocated
+/// somewhere else, such as in a memory-mapped file. A consequence is that there is a
+/// fixed maximum size for the arena; it's possible to add objects only until it fills up.
+#[repr(transparent)]
+pub struct FixedStore<'a, T>(SliceVec<'a, T>);
+
+impl<'a, T: Clone> Store<T> for FixedStore<'a, T> {
+    fn as_ref(&self) -> Pool<T> {
+        Pool(&self.0)
+    }
+
+    fn add(&mut self, item: T) -> Id<T> {
+        let id = self.next_id();
+        self.0.push(item);
+        id
+    }
+
+    fn add_iter(&mut self, iter: impl IntoIterator<Item = T>) -> Span<T> {
+        let start = self.next_id();
+        self.0.extend(iter);
+        Span::new(start, self.next_id())
+    }
+
+    fn add_slice(&mut self, slice: &[T]) -> Span<T> {
+        let start = self.next_id();
+        self.0.extend_from_slice(slice);
+        Span::new(start, self.next_id())
+    }
+
+    fn len(&self) -> usize {
+        self.0.len()
+    }
+}
+
+impl<'a, T> FixedStore<'a, T> {
+    pub fn capacity(&self) -> usize {
+        self.0.capacity()
+    }
+}
+
+impl<'a, T> From<SliceVec<'a, T>> for FixedStore<'a, T> {
+    fn from(slice: SliceVec<'a, T>) -> Self {
+        Self(slice)
+    }
+}
+
+/// A fixed-sized arena.
+///
+/// This trait allows id-based access to a fixed-size chunk of objects reflecting
+/// a `Store`. Unlike `Store`, it does not support adding new objects.
+#[repr(transparent)]
+#[derive(Clone, Copy)]
+pub struct Pool<'a, T>(&'a [T]);
+
+impl<'a, T> Pool<'a, T> {
+    /// Get the number of items in the pool.
+    pub fn len(&self) -> usize {
+        self.0.len()
+    }
+
+    /// Check if the pool is empty.
+    pub fn is_empty(&self) -> bool {
+        self.0.is_empty()
+    }
+
+    /// Get the next available ID.
+    pub fn next_id(&self) -> Id<T> {
+        Id::new(self.len())
+    }
+
+    /// Get the entire pool as a slice.
+    pub fn all(&self) -> &'a [T] {
+        self.0
+    }
+
+    /// Find the first item in the pool that satisfies a predicate.
+    pub fn search(&self, pred: impl Fn(&T) -> bool) -> Option<Id<T>> {
+        self.0.iter().position(pred).map(|i| Id::new(i))
+    }
+
+    /// Iterate over id/item pairs in the pool.
+    pub fn items(&self) -> impl Iterator<Item = (Id<T>, &T)> {
+        self.0
+            .iter()
+            .enumerate()
+            .map(|(i, item)| (Id::new(i), item))
+    }
+}
+
+impl<T> Index<Id<T>> for Pool<'_, T> {
+    type Output = T;
+
+    fn index(&self, id: Id<T>) -> &T {
+        &self.0[id.index()]
+    }
+}
+
+impl<T> Index<Span<T>> for Pool<'_, T> {
+    type Output = [T];
+
+    fn index(&self, span: Span<T>) -> &[T] {
+        &self.0[std::ops::Range::from(span)]
+    }
+}
+
+impl<'a, T> From<&'a [T]> for Pool<'a, T> {
+    fn from(slice: &'a [T]) -> Self {
+        Self(slice)
+    }
+}
diff --git a/flatgfa/src/fgfa_ds/print.rs b/flatgfa/src/fgfa_ds/print.rs
new file mode 100644
index 00000000..09532389
--- /dev/null
+++ b/flatgfa/src/fgfa_ds/print.rs
@@ -0,0 +1,153 @@
+use super::flatgfa;
+use std::fmt;
+
+impl fmt::Display for flatgfa::Orientation {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            flatgfa::Orientation::Forward => write!(f, "+"),
+            flatgfa::Orientation::Backward => write!(f, "-"),
+        }
+    }
+}
+
+impl fmt::Display for flatgfa::AlignOpcode {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            flatgfa::AlignOpcode::Match => write!(f, "M"),
+            flatgfa::AlignOpcode::Gap => write!(f, "N"),
+            flatgfa::AlignOpcode::Insertion => write!(f, "D"),
+            flatgfa::AlignOpcode::Deletion => write!(f, "I"),
+        }
+    }
+}
+
+impl<'a> fmt::Display for flatgfa::Alignment<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        if self.ops.len() == 0 {
+            write!(f, "0M")?;
+        }
+        for op in self.ops {
+            write!(f, "{}{}", op.len(), op.op())?;
+        }
+        Ok(())
+    }
+}
+
+/// A wrapper for displaying components from FlatGFA.
+pub struct Display<'a, T>(pub &'a flatgfa::FlatGFA<'a>, pub T);
+
+impl<'a> fmt::Display for Display<'a, flatgfa::Handle> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let seg = self.0.get_handle_seg(self.1);
+        let name = seg.name;
+        write!(f, "{}{}", name, self.1.orient())
+    }
+}
+
+impl<'a> fmt::Display for Display<'a, &flatgfa::Path> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "P\t{}\t", self.0.get_path_name(&self.1))?;
+        let steps = &self.0.steps[self.1.steps];
+        write!(f, "{}", Display(self.0, steps[0]))?;
+        for step in steps[1..].iter() {
+            write!(f, ",{}", Display(self.0, *step))?;
+        }
+        write!(f, "\t")?;
+        let overlaps = &self.0.overlaps[self.1.overlaps];
+        if overlaps.is_empty() {
+            write!(f, "*")?;
+        } else {
+            write!(f, "{}", self.0.get_alignment(overlaps[0]))?;
+            for overlap in overlaps[1..].iter() {
+                write!(f, ",{}", self.0.get_alignment(*overlap))?;
+            }
+        }
+        Ok(())
+    }
+}
+
+impl<'a> fmt::Display for Display<'a, &flatgfa::Link> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let from = self.1.from;
+        let from_name = self.0.get_handle_seg(from).name;
+        let to = self.1.to;
+        let to_name = self.0.get_handle_seg(to).name;
+        write!(
+            f,
+            "L\t{}\t{}\t{}\t{}\t{}",
+            from_name,
+            from.orient(),
+            to_name,
+            to.orient(),
+            self.0.get_alignment(self.1.overlap)
+        )
+    }
+}
+
+impl<'a> fmt::Display for Display<'a, &flatgfa::Segment> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let name = self.1.name;
+        write!(f, "S\t{}\t{}", name, self.0.get_seq(self.1))?;
+        if !self.1.optional.is_empty() {
+            write!(f, "\t{}", self.0.get_optional_data(self.1))?;
+        }
+        Ok(())
+    }
+}
+
+/// Print a graph in the order preserved from an original GFA file.
+fn write_preserved(gfa: &flatgfa::FlatGFA, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+    let mut seg_iter = gfa.segs.all().iter();
+    let mut path_iter = gfa.paths.all().iter();
+    let mut link_iter = gfa.links.all().iter();
+    for kind in gfa.get_line_order() {
+        match kind {
+            flatgfa::LineKind::Header => {
+                let version = gfa.header;
+                assert!(!version.is_empty());
+                writeln!(f, "H\t{}", bstr::BStr::new(version.all()))?;
+            }
+            flatgfa::LineKind::Segment => {
+                let seg = seg_iter.next().expect("too few segments");
+                writeln!(f, "{}", Display(gfa, seg))?;
+            }
+            flatgfa::LineKind::Path => {
+                let path = path_iter.next().expect("too few paths");
+                writeln!(f, "{}", Display(gfa, path))?;
+            }
+            flatgfa::LineKind::Link => {
+                let link = link_iter.next().expect("too few links");
+                writeln!(f, "{}", Display(gfa, link))?;
+            }
+        }
+    }
+    Ok(())
+}
+
+/// Print a graph in a normalized order, ignoring the original GFA line order.
+pub fn write_normalized(gfa: &flatgfa::FlatGFA, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+    if !gfa.header.is_empty() {
+        writeln!(f, "H\t{}", bstr::BStr::new(gfa.header.all()))?;
+    }
+    for seg in gfa.segs.all().iter() {
+        writeln!(f, "{}", Display(gfa, seg))?;
+    }
+    for path in gfa.paths.all().iter() {
+        writeln!(f, "{}", Display(gfa, path))?;
+    }
+    for link in gfa.links.all().iter() {
+        writeln!(f, "{}", Display(gfa, link))?;
+    }
+    Ok(())
+}
+
+/// Print our flat representation as in GFA text format.
+impl<'a> fmt::Display for &'a flatgfa::FlatGFA<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        if self.line_order.is_empty() {
+            write_normalized(self, f)
+        } else {
+            write_preserved(self, f)
+        }
+    }
+}
diff --git a/flatgfa/src/lib.rs b/flatgfa/src/lib.rs
new file mode 100644
index 00000000..9b7dc559
--- /dev/null
+++ b/flatgfa/src/lib.rs
@@ -0,0 +1 @@
+pub mod fgfa_ds;
\ No newline at end of file
diff --git a/flatgfa/src/main.rs b/flatgfa/src/main.rs
index 53968e99..3541b49a 100644
--- a/flatgfa/src/main.rs
+++ b/flatgfa/src/main.rs
@@ -1,10 +1,17 @@
 use argh::FromArgs;
+
+mod fgfa_ds;
 use fgfa_ds::flatgfa::FlatGFA;
 use fgfa_ds::parse::Parser;
 use fgfa_ds::pool::Store;
 use fgfa_ds::{file, parse}; // TODO: hopefully remove at some point, this breaks a lot of principles
 
-mod cmds;
+mod commands;
+use commands::basic_cmds::{Toc, Paths, Stats, Position};
+use commands::{chop::Chop, depth::Depth, extract::Extract};
+
+use commands::basic_cmds::{toc, paths, stats, position};
+use commands::{chop::chop, depth::depth, extract::extract};
 
 #[derive(FromArgs)]
 /// Convert between GFA text and FlatGFA binary formats.
@@ -36,13 +43,13 @@ struct PolBin {
 #[derive(FromArgs, PartialEq, Debug)]
 #[argh(subcommand)]
 enum Command {
-    Toc(cmds::Toc),
-    Paths(cmds::Paths),
-    Stats(cmds::Stats),
-    Position(cmds::Position),
-    Extract(cmds::Extract),
-    Depth(cmds::Depth),
-    Chop(cmds::Chop),
+    Toc(Toc),
+    Paths(Paths),
+    Stats(Stats),
+    Position(Position),
+    Extract(Extract),
+    Depth(Depth),
+    Chop(Chop),
 }
 
 fn main() -> Result<(), &'static str> {
@@ -90,26 +97,26 @@ fn main() -> Result<(), &'static str> {
 
     match args.command {
         Some(Command::Toc(_)) => {
-            cmds::toc(&gfa);
+            toc(&gfa);
         }
         Some(Command::Paths(_)) => {
-            cmds::paths(&gfa);
+            paths(&gfa);
         }
         Some(Command::Stats(sub_args)) => {
-            cmds::stats(&gfa, sub_args);
+            stats(&gfa, sub_args);
         }
         Some(Command::Position(sub_args)) => {
-            cmds::position(&gfa, sub_args)?;
+            position(&gfa, sub_args)?;
         }
         Some(Command::Extract(sub_args)) => {
-            let store = cmds::extract(&gfa, sub_args)?;
+            let store = extract(&gfa, sub_args)?;
             dump(&store.as_ref(), &args.output);
         }
         Some(Command::Depth(_)) => {
-            cmds::depth(&gfa);
+            depth(&gfa);
         }
         Some(Command::Chop(sub_args)) => {
-            let store = cmds::chop(&gfa, sub_args)?;
+            let store = chop(&gfa, sub_args)?;
             // TODO: Ideally, find a way to encapsulate the logic of chop in `cmd.rs`, instead of
             // defining here which values from out input `gfa` are needed by our final `flat` gfa.
             // Here we are reference values in two different Stores to create this Flatgfa, and 

From 29e8a5f4737ea96d479a26cfe37a6d344b1ff11f Mon Sep 17 00:00:00 2001
From: susan-garry <sgarry406@gmail.com>
Date: Mon, 30 Sep 2024 11:24:50 -0400
Subject: [PATCH 3/8] comment out unused code, could be useful in the future if
 we want to use spans as ranges

---
 flatgfa/src/fgfa_ds/pool.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/flatgfa/src/fgfa_ds/pool.rs b/flatgfa/src/fgfa_ds/pool.rs
index 2872388a..2080ad13 100644
--- a/flatgfa/src/fgfa_ds/pool.rs
+++ b/flatgfa/src/fgfa_ds/pool.rs
@@ -86,9 +86,9 @@ impl<T> From<&Span<T>> for std::ops::Range<usize> {
 }
 
 impl<T> Span<T> {
-    pub fn is_empty(&self) -> bool {
-        self.start.0 == self.end.0
-    }
+    // pub fn is_empty(&self) -> bool {
+    //     self.start.0 == self.end.0
+    // }
 
     pub fn len(&self) -> usize {
         (self.end.0 - self.start.0) as usize

From 7a839a7d3ed4c55e6bcf6b924f8abd73d84accfb Mon Sep 17 00:00:00 2001
From: susan-garry <sgarry406@gmail.com>
Date: Mon, 30 Sep 2024 11:25:50 -0400
Subject: [PATCH 4/8] nevermind, commit out the actual unused code which may or
 may not ever be useful

---
 flatgfa/src/fgfa_ds/pool.rs | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/flatgfa/src/fgfa_ds/pool.rs b/flatgfa/src/fgfa_ds/pool.rs
index 2080ad13..94376abb 100644
--- a/flatgfa/src/fgfa_ds/pool.rs
+++ b/flatgfa/src/fgfa_ds/pool.rs
@@ -86,9 +86,9 @@ impl<T> From<&Span<T>> for std::ops::Range<usize> {
 }
 
 impl<T> Span<T> {
-    // pub fn is_empty(&self) -> bool {
-    //     self.start.0 == self.end.0
-    // }
+    pub fn is_empty(&self) -> bool {
+        self.start.0 == self.end.0
+    }
 
     pub fn len(&self) -> usize {
         (self.end.0 - self.start.0) as usize
@@ -135,9 +135,9 @@ pub trait Store<T: Clone> {
     fn len(&self) -> usize;
 
     /// Check whether the pool is empty.
-    fn is_empty(&self) -> bool {
-        self.len() == 0
-    }
+    // fn is_empty(&self) -> bool {
+    //     self.len() == 0
+    // }
 
     /// Get the next available ID.
     fn next_id(&self) -> Id<T> {

From 18eeb0df6b071af920b903ae70caf61ee9a2bd7f Mon Sep 17 00:00:00 2001
From: susan-garry <sgarry406@gmail.com>
Date: Mon, 30 Sep 2024 12:16:19 -0400
Subject: [PATCH 5/8] comment out docs for unused function

---
 flatgfa/src/fgfa_ds/pool.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flatgfa/src/fgfa_ds/pool.rs b/flatgfa/src/fgfa_ds/pool.rs
index 94376abb..6055c166 100644
--- a/flatgfa/src/fgfa_ds/pool.rs
+++ b/flatgfa/src/fgfa_ds/pool.rs
@@ -134,7 +134,7 @@ pub trait Store<T: Clone> {
     /// Get the number of items in the pool.
     fn len(&self) -> usize;
 
-    /// Check whether the pool is empty.
+    // /// Check whether the pool is empty.
     // fn is_empty(&self) -> bool {
     //     self.len() == 0
     // }

From eaa48d75046c93c08245f9f115bc90c7f8913c8d Mon Sep 17 00:00:00 2001
From: susan-garry <sgarry406@gmail.com>
Date: Wed, 2 Oct 2024 12:47:15 -0400
Subject: [PATCH 6/8] fix flatgfa imports for flatgfa-py

---
 flatgfa-py/Cargo.lock | 20 ++------------------
 flatgfa-py/src/lib.rs |  4 ++--
 flatgfa/Cargo.lock    |  2 +-
 flatgfa/Cargo.toml    |  2 +-
 flatgfa/src/lib.rs    |  5 ++++-
 5 files changed, 10 insertions(+), 23 deletions(-)

diff --git a/flatgfa-py/Cargo.lock b/flatgfa-py/Cargo.lock
index f8f3d345..bb352e08 100644
--- a/flatgfa-py/Cargo.lock
+++ b/flatgfa-py/Cargo.lock
@@ -77,14 +77,6 @@ version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 
-[[package]]
-name = "commands"
-version = "0.1.0"
-dependencies = [
- "argh",
- "fgfa_ds",
-]
-
 [[package]]
 name = "equivalent"
 version = "1.0.1"
@@ -92,9 +84,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
 
 [[package]]
-name = "fgfa_ds"
+name = "flatgfa"
 version = "0.1.0"
 dependencies = [
+ "argh",
  "atoi",
  "bstr",
  "memchr",
@@ -104,15 +97,6 @@ dependencies = [
  "zerocopy",
 ]
 
-[[package]]
-name = "flatgfa"
-version = "0.1.0"
-dependencies = [
- "argh",
- "commands",
- "fgfa_ds",
-]
-
 [[package]]
 name = "flatgfa-py"
 version = "0.1.0"
diff --git a/flatgfa-py/src/lib.rs b/flatgfa-py/src/lib.rs
index a0593909..8a3a4a5f 100644
--- a/flatgfa-py/src/lib.rs
+++ b/flatgfa-py/src/lib.rs
@@ -1,5 +1,5 @@
-use flatgfa::fgfa_ds::pool::Id;
-use flatgfa::{self, file, print, FlatGFA, HeapGFAStore};
+use flatgfa::fgfa_ds::{file, print, pool::Id};
+use flatgfa::{self, FlatGFA, HeapGFAStore};
 use pyo3::exceptions::PyIndexError;
 use pyo3::prelude::*;
 use pyo3::types::{PyBytes, PySlice};
diff --git a/flatgfa/Cargo.lock b/flatgfa/Cargo.lock
index 8f3f20c6..7a0c0020 100644
--- a/flatgfa/Cargo.lock
+++ b/flatgfa/Cargo.lock
@@ -72,7 +72,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
 
 [[package]]
-name = "fgfa"
+name = "flatgfa"
 version = "0.1.0"
 dependencies = [
  "argh",
diff --git a/flatgfa/Cargo.toml b/flatgfa/Cargo.toml
index f8bad886..89c08188 100644
--- a/flatgfa/Cargo.toml
+++ b/flatgfa/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "fgfa"
+name = "flatgfa"
 version = "0.1.0"
 edition = "2021"
 
diff --git a/flatgfa/src/lib.rs b/flatgfa/src/lib.rs
index 9b7dc559..b0f1f4bc 100644
--- a/flatgfa/src/lib.rs
+++ b/flatgfa/src/lib.rs
@@ -1 +1,4 @@
-pub mod fgfa_ds;
\ No newline at end of file
+pub mod fgfa_ds;
+
+pub use fgfa_ds::*;
+pub use fgfa_ds::flatgfa::*;
\ No newline at end of file

From 8da1f54dc461fc576f268bde9f99a20656da784d Mon Sep 17 00:00:00 2001
From: susan-garry <sgarry406@gmail.com>
Date: Wed, 2 Oct 2024 16:46:19 -0400
Subject: [PATCH 7/8] turnt calls are verbose

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 15b77ce4..e2c374df 100644
--- a/Makefile
+++ b/Makefile
@@ -25,7 +25,7 @@ test-slow-odgi: fetch
 test-flatgfa: fetch
 	cd flatgfa ; cargo build
 
-	turnt -e flatgfa_mem -e flatgfa_file -e flatgfa_file_inplace tests/*.gfa
+	turnt -v -e flatgfa_mem -e flatgfa_file -e flatgfa_file_inplace tests/*.gfa
 
 	-turnt --save -v -e chop_oracle_fgfa tests/*.gfa
 	turnt -v -e flatgfa_chop tests/*.gfa

From d6f99e40a840313bd03ee372cfab892cc3cc2215 Mon Sep 17 00:00:00 2001
From: susan-garry <sgarry406@gmail.com>
Date: Wed, 2 Oct 2024 17:02:51 -0400
Subject: [PATCH 8/8] fixy fixy

---
 flatgfa/Cargo.toml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/flatgfa/Cargo.toml b/flatgfa/Cargo.toml
index 89c08188..2426a543 100644
--- a/flatgfa/Cargo.toml
+++ b/flatgfa/Cargo.toml
@@ -3,6 +3,10 @@ name = "flatgfa"
 version = "0.1.0"
 edition = "2021"
 
+[[bin]]
+name = "fgfa"
+path = "src/main.rs"
+
 [dependencies]
 argh = "0.1.12"
 atoi = "2.0.0"