From aa2b61206674a90dcf1d028ade936ec3a1bd16c4 Mon Sep 17 00:00:00 2001 From: Brent Pedersen Date: Fri, 18 Aug 2023 10:53:39 +0200 Subject: [PATCH] moving average and base-quality stubs --- src/bin/commands/trimmer.rs | 8 ++++ src/bin/main.rs | 4 +- src/lib/base_quality.rs | 78 +++++++++++++++++++++++++++++++++++++ src/lib/mod.rs | 1 + src/lib/moving_average.rs | 40 +++++++++++++++++++ 5 files changed, 129 insertions(+), 2 deletions(-) create mode 100644 src/lib/base_quality.rs create mode 100644 src/lib/moving_average.rs diff --git a/src/bin/commands/trimmer.rs b/src/bin/commands/trimmer.rs index 9a65e94..893c74f 100644 --- a/src/bin/commands/trimmer.rs +++ b/src/bin/commands/trimmer.rs @@ -19,6 +19,14 @@ pub(crate) struct TrimmerOpts { #[clap(long, short = 't', default_value = "5")] threads: usize, + /// Minimum base-quality to keep a base when trimming tails. + #[clap(long, short = 'q', default_value = "20")] + trim_tail_quality: u8, + + /// Window size for moving average when trimming tails. + #[clap(long, short = 'w', default_value = "20")] + trim_tail_window: u8, + /// Level of compression to use to compress outputs. #[clap(long, short = 'c', default_value = "5")] compression_level: usize, diff --git a/src/bin/main.rs b/src/bin/main.rs index 1e78df1..c42202b 100644 --- a/src/bin/main.rs +++ b/src/bin/main.rs @@ -5,7 +5,7 @@ pub mod commands; use anyhow::Result; use clap::Parser; use commands::command::Command; -use commands::{demux::Demux, trimmer::Trimmer}; +use commands::{demux::Demux, trimmer::TrimmerOpts}; use enum_dispatch::enum_dispatch; use env_logger::Env; @@ -23,7 +23,7 @@ struct Args { #[command(version)] enum Subcommand { Demux(Demux), - Trimmer(Trimmer), + Trimmer(TrimmerOpts), } fn main() -> Result<()> { diff --git a/src/lib/base_quality.rs b/src/lib/base_quality.rs new file mode 100644 index 0000000..2739422 --- /dev/null +++ b/src/lib/base_quality.rs @@ -0,0 +1,78 @@ +use std::ops::Range; + +pub(crate) fn find_oscillating_quals(bqs: &[u8]) -> Range { + return 0..0; +} + +pub(crate) enum Tail { + Left, + Right, + Both, +} + +/// A simple moving average calculator. +/// Only requires that T is convertable to f64. +/// Uses space of window * size_of(T) bytes. +struct MovingAverage { + window: usize, + values: Vec, + sum: f64, + idx: usize, +} + +impl> MovingAverage { + /// create a new moving average calculator with a window of `window` values. + fn new(window: usize) -> Self { + Self { window, values: vec![T::default(); window], sum: 0.0, idx: 0 } + } + + /// push a new value into the moving average calculator and get the new mean. + fn push(&mut self, value: T) -> f64 { + let old_value = self.values[self.idx]; + self.values[self.idx] = value; + self.sum = self.sum + value.into() - old_value.into(); + self.idx = (self.idx + 1) % self.window; + self.mean() + } + + /// get the current mean. + #[inline] + fn mean(&self) -> f64 { + self.sum / (self.window as f64) + } +} + +pub(crate) fn find_low_quality_bases( + bqs: &[u8], + min_quality: u8, + window: u8, + tail: Tail, +) -> Range { + if matches!(tail, Tail::Left | Tail::Both) { + let mut i = 0; + while i < bqs.len() && bqs[i] < min_quality { + i += 1; + } + return 0..i; + } + if matches!(tail, Tail::Right | Tail::Both) { + let mut i = bqs.len() - 1; + while i > 0 && bqs[i] < min_quality { + i -= 1; + } + return i..bqs.len(); + } + 0..0 +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_find_oscillating_quals() { + let bqs = b"!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJ"; + let range = find_oscillating_quals(bqs); + assert_eq!(range, 0..0); + } +} diff --git a/src/lib/mod.rs b/src/lib/mod.rs index 022a1e7..3b650df 100644 --- a/src/lib/mod.rs +++ b/src/lib/mod.rs @@ -1,4 +1,5 @@ pub mod barcode_matching; +pub mod base_quality; pub mod pair_overlap; pub mod samples; diff --git a/src/lib/moving_average.rs b/src/lib/moving_average.rs new file mode 100644 index 0000000..63f0f1a --- /dev/null +++ b/src/lib/moving_average.rs @@ -0,0 +1,40 @@ +/// A simple moving average calculator. +/// Only requires that T is convertable to f64. +/// Uses space of window * size_of(T) bytes. +struct MovingAverage { + window: usize, + values: Vec, + sum: f64, + idx: usize, +} + +impl> MovingAverage { + /// create a new moving average calculator with a window of `window` values. + fn new(window: usize) -> Self { + Self { window, values: vec![T::default(); window], sum: 0.0, idx: 0 } + } + + /// push a new value into the moving average calculator and get the new mean. + fn push(&mut self, value: T) -> f64 { + let old_value = self.values[self.idx]; + self.values[self.idx] = value; + self.sum = self.sum + value.into() - old_value.into(); + self.idx = (self.idx + 1) % self.window; + self.mean() + } + + /// get the current mean. + #[inline] + fn mean(&self) -> f64 { + self.sum / (self.window as f64) + } +} + +// write some tests for the calculator +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_moving_average() {} +}