Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mereg dev into main #10

Merged
merged 3 commits into from
Mar 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "oarfish"
version = "0.3.0"
version = "0.3.1"
edition = "2021"
authors = [
"Zahra Zare Jousheghani <[email protected]>",
Expand Down Expand Up @@ -34,9 +34,9 @@ anyhow = "1.0.80"
bio-types = "1.0.1"
clap = { version = "4.5.1", features = ["derive"] }
coitrees = "0.4.0"
noodles-bam = "0.55.0"
noodles-bam = "0.56.0"
noodles-gtf = "0.23.0"
noodles-sam = "0.52.0"
noodles-sam = "0.53.0"
num-format = "0.4.4"
tabled = "0.15.0"
tracing = "0.1.40"
Expand Down
46 changes: 0 additions & 46 deletions src/bootstrap.rs
Original file line number Diff line number Diff line change
@@ -1,52 +1,6 @@
use rand::distributions::{Distribution, Uniform};
use rand::Rng;

/// Wraps an iterator `I` to produce a new iterator that samples the original
/// with replacement.
pub struct SampleWithReplaceIter<'b, I: ExactSizeIterator> {
pub inds: &'b [usize],
pub iter: I,
pub idx: usize,
pub e_ptr: usize,
pub prev: Option<I::Item>,
}

impl<'b, I: ExactSizeIterator> Iterator for SampleWithReplaceIter<'b, I>
where
I::Item: Copy,
{
type Item = I::Item;

#[inline]
fn next(&mut self) -> Option<Self::Item> {
if let Some(next_ind) = self.inds.get(self.idx) {
// always makes sure we have a previous
// element. This will get the first
// element but wont yet increment
// the pointer (e_ptr)
if self.prev.is_none() {
self.prev = self.iter.next();
}
// next element is not a duplicate
if self.e_ptr != *next_ind {
// otherwise, call next until we have the
// correct element
loop {
self.prev = self.iter.next();
self.e_ptr += 1;
if self.e_ptr == *next_ind {
break;
}
}
}
self.idx += 1;
self.prev
} else {
None
}
}
}

pub fn get_sample_inds<R: Rng + ?Sized>(n: usize, rng: &mut R) -> Vec<usize> {
let dist = Uniform::new(0, n);
let mut inds: Vec<usize> = dist.sample_iter(rng).take(n).collect();
Expand Down
8 changes: 1 addition & 7 deletions src/em.rs
Original file line number Diff line number Diff line change
Expand Up @@ -237,13 +237,7 @@ pub fn do_bootstrap(em_info: &EMInfo) -> Vec<f64> {

// function that produces an iterator over the
// scored alignments on demand
let make_iter = || bootstrap::SampleWithReplaceIter {
inds: &inds,
iter: em_info.eq_map.iter(),
idx: 0,
e_ptr: 0,
prev: None,
};
let make_iter = || em_info.eq_map.random_sampling_iter(&inds);

do_em(em_info, make_iter, false)
}
Expand Down
49 changes: 36 additions & 13 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ use std::{
};

use num_format::{Locale, ToFormattedString};
use serde::Serialize;
use serde_json::json;
use tracing::info;
use tracing_subscriber::{filter::LevelFilter, fmt, prelude::*, EnvFilter};

Expand All @@ -30,14 +32,14 @@ use crate::util::write_function::{write_infrep_file, write_output};
/// These represent different "meta-options", specific settings
/// for all of the different filters that should be applied in
/// different cases.
#[derive(Clone, Debug, clap::ValueEnum)]
#[derive(Clone, Debug, clap::ValueEnum, Serialize)]
enum FilterGroup {
NoFilters,
NanocountFilters,
}

/// accurate transcript quantification from long-read RNA-seq data
#[derive(Parser, Debug)]
#[derive(Parser, Debug, Serialize)]
#[clap(author, version, about, long_about = None)]
struct Args {
/// be quiet (i.e. don't output log messages that aren't at least warnings)
Expand Down Expand Up @@ -168,6 +170,31 @@ fn get_filter_opts(args: &Args) -> AlignmentFilters {
}
}

fn get_json_info(args: &Args, emi: &EMInfo) -> serde_json::Value {
let prob = if args.model_coverage {
"binomial"
} else {
"no_coverage"
};

json!({
"prob_model" : prob,
"num_bins" : args.bins,
"filter_options" : &emi.eq_map.filter_opts,
"discard_table" : &emi.eq_map.discard_table,
"alignments": &args.alignments,
"output": &args.output,
"verbose": &args.verbose,
"quiet": &args.quiet,
"em_max_iter": &args.max_em_iter,
"em_convergence_thresh": &args.convergence_thresh,
"threads": &args.threads,
"filter_group": &args.filter_group,
"short_quant": &args.short_quant,
"num_bootstraps": &args.num_bootstraps
})
}

fn main() -> anyhow::Result<()> {
let env_filter = EnvFilter::builder()
.with_default_directive(LevelFilter::INFO.into())
Expand Down Expand Up @@ -254,8 +281,8 @@ fn main() -> anyhow::Result<()> {

// if we are seeding the quantification estimates with short read
// abundances, then read those in here.
let init_abundances = args.short_quant.map(|sr_path| {
read_short_quant_vec(&sr_path, &txps_name).unwrap_or_else(|e| panic!("{}", e))
let init_abundances = args.short_quant.as_ref().map(|sr_path| {
read_short_quant_vec(sr_path, &txps_name).unwrap_or_else(|e| panic!("{}", e))
});

// wrap up all of the relevant information we need for estimation
Expand All @@ -274,16 +301,12 @@ fn main() -> anyhow::Result<()> {
em::em(&emi, args.threads)
};

// prepare the JSON object we'll write
// to meta_info.json
let json_info = get_json_info(&args, &emi);

// write the output
write_output(
&args.output,
&args.model_coverage,
args.bins,
args.num_bootstraps,
&emi,
&header,
&counts,
)?;
write_output(&args.output, json_info, &header, &counts)?;

// if the user requested bootstrap replicates,
// compute and write those out now.
Expand Down
59 changes: 53 additions & 6 deletions src/util/oarfish_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,10 +240,44 @@ pub struct InMemoryAlignmentStore<'h> {
impl<'h> InMemoryAlignmentStore<'h> {
#[inline]
pub fn len(&self) -> usize {
self.boundaries.len()
self.boundaries.len().saturating_sub(1)
}
}

pub struct InMemoryAlignmentStoreSamplingWithReplacementIter<'a, 'h, 'b> {
pub store: &'a InMemoryAlignmentStore<'h>,
pub rand_inds: std::slice::Iter<'b, usize>,
}

impl<'a, 'b, 'h> Iterator for InMemoryAlignmentStoreSamplingWithReplacementIter<'a, 'b, 'h> {
type Item = (&'a [AlnInfo], &'a [f32], &'a [f64]);

#[inline]
fn next(&mut self) -> Option<Self::Item> {
if let Some(next_ind) = self.rand_inds.next() {
let start = self.store.boundaries[*next_ind];
let end = self.store.boundaries[*next_ind + 1];
Some((
&self.store.alignments[start..end],
&self.store.as_probabilities[start..end],
&self.store.coverage_probabilities[start..end],
))
} else {
None
}
}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
(self.rand_inds.len(), Some(self.rand_inds.len()))
}
}

impl<'a, 'b, 'h> ExactSizeIterator
for InMemoryAlignmentStoreSamplingWithReplacementIter<'a, 'b, 'h>
{
}

pub struct InMemoryAlignmentStoreIter<'a, 'h> {
pub store: &'a InMemoryAlignmentStore<'h>,
pub idx: usize,
Expand All @@ -267,15 +301,15 @@ impl<'a, 'h> Iterator for InMemoryAlignmentStoreIter<'a, 'h> {
))
}
}
}

impl<'a, 'h> ExactSizeIterator for InMemoryAlignmentStoreIter<'a, 'h> {
#[inline]
fn len(&self) -> usize {
self.store.boundaries.len()
fn size_hint(&self) -> (usize, Option<usize>) {
(self.store.len(), Some(self.store.len()))
}
}

impl<'a, 'h> ExactSizeIterator for InMemoryAlignmentStoreIter<'a, 'h> {}

impl<'h> InMemoryAlignmentStore<'h> {
pub fn new(fo: AlignmentFilters, header: &'h Header) -> Self {
InMemoryAlignmentStore {
Expand All @@ -296,6 +330,19 @@ impl<'h> InMemoryAlignmentStore<'h> {
}
}

pub fn random_sampling_iter<'a, 'b>(
&'a self,
inds: &'b [usize],
) -> InMemoryAlignmentStoreSamplingWithReplacementIter
where
'b: 'a,
{
InMemoryAlignmentStoreSamplingWithReplacementIter {
store: self,
rand_inds: inds.iter(),
}
}

pub fn add_group<T: sam::alignment::record::Record + std::fmt::Debug>(
&mut self,
txps: &mut [TranscriptInfo],
Expand Down Expand Up @@ -326,7 +373,7 @@ impl<'h> InMemoryAlignmentStore<'h> {

pub fn num_aligned_reads(&self) -> usize {
if !self.boundaries.is_empty() {
self.boundaries.len() - 1
self.len()
} else {
0
}
Expand Down
21 changes: 2 additions & 19 deletions src/util/write_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use arrow2::{
datatypes::{Field, Schema},
};
use path_tools::WithAdditionalExtension;
use serde_json::json;

use std::path::{Path, PathBuf};
use std::{
fs,
Expand All @@ -20,10 +20,7 @@ use std::{
//this part is taken from dev branch
pub fn write_output(
output: &PathBuf,
prob_flag: &bool,
bins: u32,
num_bootstraps: u32,
em_info: &EMInfo,
info: serde_json::Value,
header: &noodles_sam::header::Header,
counts: &[f64],
) -> io::Result<()> {
Expand All @@ -37,21 +34,7 @@ pub fn write_output(
}
}

let prob = if *prob_flag {
"binomial"
} else {
"no_coverage"
};

{
let info = json!({
"prob_model" : prob,
"num_bins" : bins,
"num_bootstraps" : num_bootstraps,
"filter_options" : em_info.eq_map.filter_opts,
"discard_table" : em_info.eq_map.discard_table
});

let info_path = output.with_additional_extension(".meta_info.json");
let write = OpenOptions::new()
.write(true)
Expand Down