From c21c6d5098b65819dcf5afc14c2027ee19961d0a Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Thu, 8 Feb 2024 15:38:12 -0600 Subject: [PATCH] feat: update dependencies --- Cargo.toml | 23 ++++++++++++--------- src/extract.rs | 55 ++++++++++++++++++++++++++------------------------ src/fa2fq.rs | 2 +- src/index.rs | 42 ++++++++++++++++++++++---------------- src/main.rs | 19 ++++++++--------- 5 files changed, 78 insertions(+), 63 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 87be3ee..f8b31de 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,28 +36,31 @@ opt-level = 3 codegen-units = 1 [dependencies] +ahash = "0.8.7" anyhow = { version = "1.0" } bio = "1.5.0" -clap = { version = "4.4.17", features = ["wrap_help", "derive", "cargo"] } -clap-verbosity-flag = "2.1.1" -clap_complete = "4.4" +bstr = "1.9" +clap = { version = "4.5.0", features = ["wrap_help", "derive", "cargo"] } +clap-verbosity-flag = "2.1.2" +clap_complete = "4.5" colored = "2" -env_logger = "0.10" +env_logger = "0.11" human-panic = "1.2.3" indicatif = "0.17" log = "0.4" -noodles-bam = "0.52.0" +noodles-bam = "0.54.1" noodles-bgzf = "0.26" -noodles-csi = "0.29" -noodles-fasta = "0.31" +noodles-core = "0.14" +noodles-csi = "0.30" +noodles-fasta = "0.32" noodles-fastq = "0.10" -noodles-sam = "0.49" +noodles-sam = "0.51" petgraph = { version = "0.6", features = ["serde-1"] } plotters = { version = "0.3" } -polars = { version = "0.36", features = ["lazy", "nightly"] } +polars = { version = "0.37", features = ["lazy"] } rayon = { version = "1.8" } regex = { version = "1.10" } serde = { version = "1.0" } @@ -70,4 +73,4 @@ assert_cmd = "2.0" assert_fs = "1.1" pretty_assertions = "1" sha256 = "1.5" -tempfile = "3.9" +tempfile = "3.10" diff --git a/src/extract.rs b/src/extract.rs index 2838de3..2483f0d 100644 --- a/src/extract.rs +++ b/src/extract.rs @@ -1,4 +1,4 @@ -use anyhow::{Error, Result}; +use anyhow::Result; use std::{ collections::HashSet, fs::File, @@ -9,30 +9,31 @@ use std::{ use noodles_bam as bam; use noodles_sam::{ self as sam, + header::record::value::map::program, header::record::value::{map::Program, Map}, - record::ReadName, }; +use bstr::BString; use clap::crate_version; use std::path::PathBuf; -fn writer(file: Option<&PathBuf>, is_bam: bool) -> Result> { +fn writer(file: Option<&PathBuf>, is_bam: bool) -> Result> { let sink: Box = if let Some(file) = file { Box::new(File::create(file)?) } else { Box::new(io::stdout().lock()) }; - let writer: Box = if is_bam { - Box::new(bam::Writer::new(sink)) + let writer: Box = if is_bam { + Box::new(bam::io::Writer::new(sink)) } else { - Box::new(sam::Writer::new(sink)) + Box::new(sam::io::Writer::new(sink)) }; Ok(writer) } -fn read_read_names_from_file

(src: P) -> Result> +fn read_read_names_from_file

(src: P) -> Result>> where P: AsRef, { @@ -40,17 +41,14 @@ where let mut read_names = HashSet::new(); for result in reader.lines() { - let read_name = result.and_then(|s| { - ReadName::try_from(s.into_bytes()) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)) - })?; + let read_name = result.map(|s| s.as_bytes().to_vec())?; read_names.insert(read_name); } Ok(read_names) } -fn parse_read_ids(read_ids: &str) -> Result> { +fn parse_read_ids(read_ids: &str) -> Result>> { if read_ids.is_empty() { return Ok(HashSet::new()); } @@ -63,7 +61,7 @@ fn parse_read_ids(read_ids: &str) -> Result> { read_ids .split(',') - .map(|id| ReadName::try_from(id.as_bytes().to_vec()).map_err(Error::from)) // Replace with the actual method to create ReadName from &str + .map(|id| Ok(id.as_bytes().to_vec())) // Replace with the actual method to create ReadName from &str .collect::, _>>() // Assuming } @@ -73,30 +71,35 @@ where { let read_names = parse_read_ids(read_ids)?; - let mut reader = bam::reader::Builder.build_from_path(&bam_file)?; + let mut reader = bam::io::reader::Builder.build_from_path(&bam_file)?; let mut header = reader.read_header()?; let program = Map::::builder() - .set_name("rboss") - .set_version(crate_version!()) - .set_command_line(format!( - "rboss extract {} {} {}", - read_ids, - bam_file.as_ref().to_string_lossy(), - if is_bam { "-b" } else { "" } - )) + .insert(program::tag::NAME, Vec::from("rboss")) + .insert(program::tag::VERSION, Vec::from(crate_version!())) + .insert( + program::tag::COMMAND_LINE, + Vec::from(format!( + "rboss extract {} {} {}", + read_ids, + bam_file.as_ref().to_string_lossy(), + if is_bam { "-b" } else { "" } + )), + ) .build()?; - header.programs_mut().insert(String::from("rboss"), program); + header + .programs_mut() + .insert(BString::from("rboss"), program); let mut writer = writer(None, is_bam)?; writer.write_alignment_header(&header)?; - for result in reader.records(&header) { + for result in reader.records() { let record = result?; - if let Some(read_name) = record.read_name() { - if read_names.contains(read_name) { + if let Some(read_name) = record.name() { + if read_names.contains(read_name.as_bytes()) { writer.write_alignment_record(&header, &record)?; } } diff --git a/src/fa2fq.rs b/src/fa2fq.rs index e1a1407..dc95632 100644 --- a/src/fa2fq.rs +++ b/src/fa2fq.rs @@ -9,7 +9,7 @@ pub fn fa2fq>(input: P) -> Result<()> { for result in reader.records() { let record = result?; - let name = record.name().to_string(); + let name = record.name(); let sequence = record.sequence().as_ref().to_vec(); let qualities = vec![b'@'; sequence.len()]; let fastq_record = fastq::Record::new( diff --git a/src/index.rs b/src/index.rs index c6536ba..8d9e8b4 100644 --- a/src/index.rs +++ b/src/index.rs @@ -1,24 +1,35 @@ use std::io; use std::path::Path; +use noodles_core::Position; + use noodles_bam::{self as bam, bai}; use noodles_csi::binning_index::{index::reference_sequence::bin::Chunk, Indexer}; -use noodles_sam::{self as sam, alignment::Record}; +use noodles_sam::{self as sam}; +use sam::alignment::RecordBuf; fn is_coordinate_sorted(header: &sam::Header) -> bool { - use sam::header::record::value::map::header::SortOrder; + use sam::header::record::value::map::header::{sort_order, tag}; - if let Some(hdr) = header.header() { - if let Some(sort_order) = hdr.sort_order() { - return sort_order == SortOrder::Coordinate; - } - } + header + .header() + .and_then(|hdr| hdr.other_fields().get(&tag::SORT_ORDER)) + .map(|sort_order| sort_order == sort_order::COORDINATE) + .unwrap_or_default() +} - false +fn alignment_context( + record: &sam::alignment::RecordBuf, +) -> io::Result<(Option, Option, Option)> { + Ok(( + record.reference_sequence_id(), + record.alignment_start(), + record.alignment_end(), + )) } pub fn index_bam, W: io::Write>(file: P, index_file: Option) -> io::Result<()> { - let mut reader = bam::reader::Builder.build_from_path(file.as_ref())?; + let mut reader = bam::io::reader::Builder.build_from_path(file.as_ref())?; let header = reader.read_header()?; if !is_coordinate_sorted(&header) { @@ -28,22 +39,19 @@ pub fn index_bam, W: io::Write>(file: P, index_file: Option) - )); } - let mut record = Record::default(); + let mut record = RecordBuf::default(); let mut builder = Indexer::default(); let mut start_position = reader.virtual_position(); - while reader.read_record(&header, &mut record)? != 0 { + while reader.read_record_buf(&header, &mut record)? != 0 { let end_position = reader.virtual_position(); let chunk = Chunk::new(start_position, end_position); - let alignment_context = match ( - record.reference_sequence_id(), - record.alignment_start(), - record.alignment_end(), - ) { + let alignment_context = match alignment_context(&record)? { (Some(id), Some(start), Some(end)) => { - Some((id, start, end, !record.flags().is_unmapped())) + let is_mapped = !record.flags().is_unmapped(); + Some((id, start, end, is_mapped)) } _ => None, }; diff --git a/src/main.rs b/src/main.rs index 89ff416..31782fb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -14,7 +14,7 @@ mod fq2fa; mod index; mod rsoft; -mod anno; +// mod anno; mod graph; #[derive(Parser, Debug)] @@ -91,9 +91,8 @@ enum Commands { /// Graph Analysis Graph(graph::GraphArgs), - - /// Annotation scannls - AnnoScan(anno::AnnoArgs), + // Annotation scannls + // AnnoScan(anno::AnnoArgs), } fn print_completions(gen: G, cmd: &mut Command) { @@ -168,12 +167,14 @@ fn main() { graph::analyze(args).unwrap(); } - Some(Commands::AnnoScan(args)) => { - info!("'anno' {args:?} "); - anno::run_poa(&args.vcf).unwrap(); - } + // Some(Commands::AnnoScan(args)) => { + // info!("'anno' {args:?} "); + // anno::run_poa(&args.vcf).unwrap(); + // } - // If no subcommand was used, it's a normal top level command + // // If no subcommand was used, it's a normal top level command None => info!("No subcommand was used"), + + _ => info!("Subcommand not implemented yet!"), } }