From cbaedc40cc132db5212c3e76fdb887fdc54eeb65 Mon Sep 17 00:00:00 2001 From: Douwe Schulte Date: Tue, 29 Oct 2024 17:29:07 +0100 Subject: [PATCH 01/11] Added method to retrieve the raw spectrum from any USI --- Cargo.lock | 142 ++++++++ Cargo.toml | 11 +- examples/averaging_writer.rs | 3 +- examples/describe_instrument.rs | 4 +- examples/get_scan_by.rs | 32 +- examples/mzcat.rs | 2 +- examples/mzconvert.rs | 5 +- src/io/compression.rs | 36 +- src/{io.rs => io/mod.rs} | 3 +- src/io/proxi.rs | 615 +++++++++++++++++++++++++++----- src/io/usi.rs | 103 ++++-- 11 files changed, 782 insertions(+), 174 deletions(-) rename src/{io.rs => io/mod.rs} (98%) diff --git a/Cargo.lock b/Cargo.lock index e6de61a..d84cd0d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -146,6 +146,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d92bec98840b8f03a5ff5413de5293bfcd8bf96467cf5452609f939ec6f5de16" +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + [[package]] name = "autocfg" version = "1.4.0" @@ -810,6 +816,15 @@ version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +[[package]] +name = "encoding_rs" +version = "0.8.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" +dependencies = [ + "cfg-if", +] + [[package]] name = "enum-map" version = "2.7.3" @@ -987,6 +1002,21 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "futures" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + [[package]] name = "futures-channel" version = "0.3.31" @@ -1003,12 +1033,34 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + [[package]] name = "futures-io" version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.79", +] + [[package]] name = "futures-sink" version = "0.3.31" @@ -1027,8 +1079,10 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" dependencies = [ + "futures-channel", "futures-core", "futures-io", + "futures-macro", "futures-sink", "futures-task", "memchr", @@ -1076,6 +1130,25 @@ version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" +[[package]] +name = "h2" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524e8ac6999421f49a846c2d4411f337e53497d8ec55d67753beffa43c5d9205" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "half" version = "2.4.1" @@ -1256,6 +1329,7 @@ dependencies = [ "bytes", "futures-channel", "futures-util", + "h2", "http", "http-body", "httparse", @@ -1284,6 +1358,22 @@ dependencies = [ "webpki-roots", ] +[[package]] +name = "hyper-tls" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" +dependencies = [ + "bytes", + "http-body-util", + "hyper", + "hyper-util", + "native-tls", + "tokio", + "tokio-native-tls", + "tower-service", +] + [[package]] name = "hyper-util" version = "0.1.9" @@ -1685,6 +1775,7 @@ dependencies = [ "env_logger 0.10.2", "filename", "flate2", + "futures", "hdf5", "hdf5-sys", "indexmap", @@ -1700,6 +1791,7 @@ dependencies = [ "quick-xml", "rayon", "regex", + "reqwest", "serde", "serde_json", "sha1", @@ -2445,19 +2537,23 @@ checksum = "f713147fbe92361e52392c73b8c9e48c04c6625bce969ef54dc901e58e042a7b" dependencies = [ "base64", "bytes", + "encoding_rs", "futures-channel", "futures-core", "futures-util", + "h2", "http", "http-body", "http-body-util", "hyper", "hyper-rustls", + "hyper-tls", "hyper-util", "ipnet", "js-sys", "log", "mime", + "native-tls", "once_cell", "percent-encoding", "pin-project-lite", @@ -2469,7 +2565,9 @@ dependencies = [ "serde_json", "serde_urlencoded", "sync_wrapper", + "system-configuration", "tokio", + "tokio-native-tls", "tokio-rustls", "tower-service", "url", @@ -2859,6 +2957,27 @@ dependencies = [ "futures-core", ] +[[package]] +name = "system-configuration" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" +dependencies = [ + "bitflags 2.6.0", + "core-foundation", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "tar" version = "0.4.42" @@ -3027,6 +3146,16 @@ dependencies = [ "syn 2.0.79", ] +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", +] + [[package]] name = "tokio-rustls" version = "0.26.0" @@ -3038,6 +3167,19 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-util" +version = "0.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + [[package]] name = "toml" version = "0.8.19" diff --git a/Cargo.toml b/Cargo.toml index 49c398f..60024e7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -79,6 +79,9 @@ doc-only = ["thermorawfilereader/doc-only"] async = ["tokio", "quick-xml/async-tokio"] +proxi = ["reqwest"] +proxi-async = ["proxi", "futures"] + [dependencies] regex = "1" lazy_static = "1.4.0" @@ -91,7 +94,9 @@ indexmap = { version = "2.0.0", features = ["serde"] } log = "0.4.20" mzpeaks = { version = ">=0.22.0,<1.0.0" } rayon = { version = ">=1.8.0,<2.0", optional = true } -mzsignal = { version = ">=0.25.0,<1.0.0", default-features = false, optional = true, features = ['avx']} +mzsignal = { version = ">=0.25.0,<1.0.0", default-features = false, optional = true, features = [ + 'avx', +] } md5 = "0.7.0" tokio = { version = "1.32.0", optional = true, features = [ "macros", @@ -107,6 +112,8 @@ libz-sys = { version = "1.1", default-features = false, features = [ ], optional = true } ndarray = { version = "0.15.6", optional = true } filename = { version = "0.1.1", optional = true } +reqwest = { version = "0.12", features = ["json", "blocking"], optional = true } +futures = { version = "0.3", optional = true } numpress = { version = "1.1.0", optional = true } bytemuck = { version = "1.18.0", features = ["extern_crate_alloc"] } @@ -139,6 +146,8 @@ features = [ "mzmlb", "async", "thermorawfilereader", + "proxi", + "proxi-async", "doc-only", ] no-default-features = true diff --git a/examples/averaging_writer.rs b/examples/averaging_writer.rs index de8c93d..8017dd4 100644 --- a/examples/averaging_writer.rs +++ b/examples/averaging_writer.rs @@ -60,7 +60,8 @@ fn main() -> io::Result<()> { ); }); - let collator_task = thread::spawn(move || Collator::collate_sync(input_receiver, output_sender)); + let collator_task = + thread::spawn(move || Collator::collate_sync(input_receiver, output_sender)); let writer_task = thread::spawn(move || -> io::Result<()> { for (_, group) in output_receiver { diff --git a/examples/describe_instrument.rs b/examples/describe_instrument.rs index ed8667e..7e4b877 100644 --- a/examples/describe_instrument.rs +++ b/examples/describe_instrument.rs @@ -1,8 +1,8 @@ use std::env; use std::io; -use mzdata::prelude::*; use mzdata::mz_read; +use mzdata::prelude::*; fn main() -> io::Result<()> { env_logger::init(); @@ -29,4 +29,4 @@ fn main() -> io::Result<()> { } Ok(()) -} \ No newline at end of file +} diff --git a/examples/get_scan_by.rs b/examples/get_scan_by.rs index f95af23..4835b54 100644 --- a/examples/get_scan_by.rs +++ b/examples/get_scan_by.rs @@ -4,32 +4,30 @@ use log::info; use mzdata::io::MZReader; use mzdata::prelude::*; - fn main() -> io::Result<()> { env_logger::init(); let mut args = env::args().skip(1); - let path = path::PathBuf::from( - args.next() - .expect("Please pass an MS data file path"), - ); + let path = path::PathBuf::from(args.next().expect("Please pass an MS data file path")); - let key = args.next().expect("Please provide a key type, \"id\", \"index\" or \"time\""); - let key_value = args.next().expect("Please provide a key value matching the key type"); + let key = args + .next() + .expect("Please provide a key type, \"id\", \"index\" or \"time\""); + let key_value = args + .next() + .expect("Please provide a key value matching the key type"); info!("Opening {}", path.display()); let mut reader = MZReader::open_path(path)?; let spectrum = match key.as_str() { - "id" => { - reader.get_spectrum_by_id(&key_value).unwrap() - }, - "index" => { - reader.get_spectrum_by_index(key_value.parse().unwrap()).unwrap() - }, - "time" => { - reader.get_spectrum_by_time(key_value.parse().unwrap()).unwrap() - }, + "id" => reader.get_spectrum_by_id(&key_value).unwrap(), + "index" => reader + .get_spectrum_by_index(key_value.parse().unwrap()) + .unwrap(), + "time" => reader + .get_spectrum_by_time(key_value.parse().unwrap()) + .unwrap(), _ => { panic!("Unknown key type {}", key); } @@ -38,4 +36,4 @@ fn main() -> io::Result<()> { dbg!(spectrum); Ok(()) -} \ No newline at end of file +} diff --git a/examples/mzcat.rs b/examples/mzcat.rs index 87495a9..70d36f0 100644 --- a/examples/mzcat.rs +++ b/examples/mzcat.rs @@ -1,10 +1,10 @@ use std::time; use std::{env, io, path}; -use rayon::prelude::*; use env_logger; use mzdata::spectrum::MultiLayerSpectrum; use mzdata::{prelude::*, MZReader}; +use rayon::prelude::*; fn scan_file + Send>(reader: &mut R) { let start = time::Instant::now(); diff --git a/examples/mzconvert.rs b/examples/mzconvert.rs index d6693f5..8c0689d 100644 --- a/examples/mzconvert.rs +++ b/examples/mzconvert.rs @@ -111,7 +111,10 @@ impl MassSpectrometryReadWriteProcess for MZConv .file_description() .source_files .iter() - .flat_map(|f| f.get_param_by_name("SHA-1").map(|c| c.value.as_str() == checksum)) + .flat_map(|f| { + f.get_param_by_name("SHA-1") + .map(|c| c.value.as_str() == checksum) + }) .all(|a| a); if !has_already { let mut sf = SourceFile::default(); diff --git a/src/io/compression.rs b/src/io/compression.rs index ddfc90c..ec2e5b7 100644 --- a/src/io/compression.rs +++ b/src/io/compression.rs @@ -74,28 +74,24 @@ impl Seek for RestartableGzDecoder { io::ErrorKind::Unsupported, "Cannot seek relative to end of a gzip stream", )), - io::SeekFrom::Current(o) => { - match o { - 0 => { - Ok(self.offset) - }, - _ if o < 0 => { - if o.unsigned_abs() > self.offset { - Err(io::Error::new( - io::ErrorKind::Unsupported, - "Cannot earlier than the start of the stream", - )) - } else { - self.seek(io::SeekFrom::Start((self.offset as i64 + o) as u64)) - } - }, - _ => { - let mut buf = vec![0; o as usize]; - self.read_exact(&mut buf)?; - Ok(self.offset) + io::SeekFrom::Current(o) => match o { + 0 => Ok(self.offset), + _ if o < 0 => { + if o.unsigned_abs() > self.offset { + Err(io::Error::new( + io::ErrorKind::Unsupported, + "Cannot earlier than the start of the stream", + )) + } else { + self.seek(io::SeekFrom::Start((self.offset as i64 + o) as u64)) } } - } + _ => { + let mut buf = vec![0; o as usize]; + self.read_exact(&mut buf)?; + Ok(self.offset) + } + }, } } } diff --git a/src/io.rs b/src/io/mod.rs similarity index 98% rename from src/io.rs rename to src/io/mod.rs index cddf1c5..97beecc 100644 --- a/src/io.rs +++ b/src/io/mod.rs @@ -9,6 +9,8 @@ pub mod mzml; #[cfg(feature = "mzmlb")] pub mod mzmlb; mod offset_index; +#[cfg(feature = "proxi")] +pub mod proxi; mod shorthand; pub(crate) mod traits; mod utils; @@ -44,4 +46,3 @@ pub mod thermo; pub use thermo::ThermoRawReader; pub mod usi; -pub mod proxi; diff --git a/src/io/proxi.rs b/src/io/proxi.rs index 0c00b0f..8748a54 100644 --- a/src/io/proxi.rs +++ b/src/io/proxi.rs @@ -1,12 +1,230 @@ -use std::fmt::Display; -use std::str::FromStr; +use std::{cmp::Ordering, fmt::Display, marker::PhantomData, str::FromStr}; +use num_traits::AsPrimitive; use serde::{Deserialize, Serialize}; -use super::usi::USI; -use crate::params::{ControlledVocabulary, Param, ParamCow, Value, CURIE}; -use crate::spectrum::{ArrayType, BinaryArrayMap, BinaryDataArrayType, DataArray, IsolationWindowState, MultiLayerSpectrum, Precursor, ScanPolarity, SignalContinuity, SpectrumDescription}; -use crate::{curie, prelude::*}; +use crate::{ + curie, + io::usi::USI, + params::{ControlledVocabulary, Param, ParamCow, Value, CURIE}, + prelude::*, + spectrum::{ + ArrayType, BinaryArrayMap, BinaryDataArrayType, DataArray, IsolationWindowState, + MultiLayerSpectrum, Precursor, ScanPolarity, SignalContinuity, SpectrumDescription, + }, +}; + +/// The possible PROXI backends +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)] +pub enum PROXIBackend { + PeptideAtlas, + MassIVE, + Pride, + Jpost, + ProteomeXchange, +} + +impl PROXIBackend { + const ALL: &[Self] = &[ + Self::PeptideAtlas, + Self::MassIVE, + Self::Pride, + Self::Jpost, + Self::ProteomeXchange, + ]; + + /// The PROXI server base url which needs concatenating of the USI at the end + const fn base_url(self) -> &'static str { + match self { + Self::PeptideAtlas => "http://www.peptideatlas.org/api/proxi/v0.1/spectra?resultType=full&usi=", + Self::MassIVE => "http://massive.ucsd.edu/ProteoSAFe/proxi/v0.1/spectra?resultType=full&usi=", + Self::Pride => "http://www.ebi.ac.uk/pride/proxi/archive/v0.1/spectra?resultType=full&usi=", + Self::Jpost => "https://repository.jpostdb.org/proxi/spectra?resultType=full&usi=", + Self::ProteomeXchange => "http://proteomecentral.proteomexchange.org/api/proxi/v0.1/spectra?resultType=full&usi=", + } + } +} + +impl USI { + /// Retrieve this USI from the given PROXI backend. If no PROXI backend is indicated it will + /// aggregate the results from all known backends and return the first successful spectrum. + /// + /// This function is only available with the feature `proxi`. + pub fn get_spectrum_blocking( + &self, + backend: Option, + ) -> Result<(PROXIBackend, Vec), PROXIError> { + backend.map_or_else( + || { + let client = reqwest::blocking::Client::new(); + let mut last_error = None; + PROXIBackend::ALL + .iter() + .find_map(|backend| { + transform_response( + *backend, + client + .get(backend.base_url().to_string() + &self.to_string()) + .send() + .and_then(reqwest::blocking::Response::json), + ) + .map_err(|err| { + last_error = Some(err); + }) + .ok() + }) + .ok_or(last_error.unwrap_or(PROXIError::NotFound)) + }, + |backend| { + transform_response( + backend, + reqwest::blocking::get(backend.base_url().to_string() + &self.to_string()) + .and_then(reqwest::blocking::Response::json), + ) + }, + ) + } + + /// Retrieve this USI from the given PROXI backend. If no PROXI backend is indicated it will + /// aggregate the results from all known backends and return the first successful spectrum. + /// + /// This function is only available with the feature `proxi-async`. + #[cfg(feature = "proxi-async")] + pub async fn get_spectrum_async( + &self, + backend: Option, + ) -> Result<(PROXIBackend, Vec), PROXIError> { + async fn get_response( + client: &reqwest::Client, + backend: PROXIBackend, + usi: &str, + ) -> Result<(PROXIBackend, Vec), PROXIError> { + transform_response( + backend, + match client + .get(backend.base_url().to_string() + usi) + .send() + .await + { + Ok(r) => r.json::().await, + Err(e) => Err(e), + }, + ) + } + + let client = reqwest::Client::new(); + if let Some(backend) = backend { + get_response(&client, backend, &self.to_string()).await + } else { + use futures::StreamExt; + + let mut requests = futures::stream::FuturesUnordered::new(); + let mut last_error = None; + for backend in PROXIBackend::ALL { + requests.push(get_response(&client, *backend, &self.to_string())); + } + + while let Some(res) = requests.next().await { + match res { + Ok(s) => return Ok(s), + Err(e) => last_error = Some(e), + } + } + + Err(last_error.unwrap_or(PROXIError::NotFound)) + } + } +} + +fn transform_response( + backend: PROXIBackend, + response: Result, +) -> Result<(PROXIBackend, Vec), PROXIError> { + match response { + Ok(PROXIResponse::Spectra(s)) => Ok((backend, s)), + Ok(PROXIResponse::Error { + detail, + status, + title, + kind, + }) => Err(PROXIError::Error { + backend, + detail, + status, + title, + kind, + }), + Err(err) => Err(PROXIError::IO(backend, err)), + } +} + +#[derive(Debug, Deserialize)] +#[serde(untagged)] +enum PROXIResponse { + Spectra(Vec), + Error { + detail: String, + status: usize, + title: PROXIErrorType, + #[serde(rename = "type")] + kind: String, + }, +} + +/// An error returned when accessing a PROXI server +#[derive(Debug)] +pub enum PROXIError { + /// An error during the network request or decoding of the JSON response + IO(PROXIBackend, reqwest::Error), + /// A returned error by the server + Error { + /// Which backend failed + backend: PROXIBackend, + /// The type of error + title: PROXIErrorType, + /// Detailed explanation on the error + detail: String, + /// HTTP status code + status: usize, + /// The error kind, often "about:blank" + kind: String, + }, + /// An error when none of the aggregated backends returned a positive or negative result + NotFound, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum PROXIErrorType { + /// The dataset is not present on this PROXI backend, it could be present on a different backend or the identifier does not exist + #[serde(rename = "DatasetNotHere")] + DataSetNotHere, + /// The dataset is present, but the ms run is not, likely a mistake in the ms run name + #[serde(rename = "MsRunNotFound")] + MsRunNotFound, + /// The dataset and the ms run are available, but the scan number does not exist in this file + #[serde(rename = "ScanNotFound")] + ScanNotFound, + /// The dataset identifier is not of a recognisable format, commonly PXD identifiers are used eg 'PXD004939', but see the USI spec for more details + #[serde(rename = "UnrecognizedIdentifierFormat")] + UnrecognizedIdentifierFormat, + /// The interpretation part of the USI is unable to be parsed, note that some PROXI backends + /// require the addition of a charge to all peptides. Additionally, the PROXI servers do not + /// require the existence of the interpretation part of the USI, so removing this field before + /// sending the request might help prevent errors. + #[serde(rename = "MalformedInterpretation")] + MalformedInterpretation, + /// The index flag (scan/index/nativeid) is malformed + #[serde(rename = "UnrecognizedIndexFlag")] + UnrecognizedIndexFlag, + /// Mandatory 'mzspec:' preamble is missing from the USI + #[serde(rename = "MissingPreamble")] + MissingPreamble, + /// The USI is malformed and has too few fields + #[serde(rename = "TooFewFields")] + TooFewFields, + #[serde(untagged)] + Other(String), +} #[derive(Debug, Clone, Copy, Serialize, Deserialize)] pub enum Status { @@ -16,7 +234,7 @@ pub enum Status { PeakUnavailable, } -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct PROXIValue(Value); impl Default for PROXIValue { @@ -26,7 +244,7 @@ impl Default for PROXIValue { } impl PROXIValue { - pub fn is_empty(&self) -> bool { + pub const fn is_empty(&self) -> bool { matches!(self.0, Value::Empty) } } @@ -57,49 +275,34 @@ where formatter.write_str("PROXIValue string") } - fn visit_str(self, v: &str) -> Result - where - E: serde::de::Error, - { + fn visit_str(self, v: &str) -> Result { match v.parse::() { Ok(v) => Ok(v), Err(e) => Err(E::custom(e)), } } - fn visit_bool(self, v: bool) -> Result - where - E: serde::de::Error, { + fn visit_bool(self, v: bool) -> Result { Ok(Value::Boolean(v).into()) } - fn visit_i64(self, v: i64) -> Result - where - E: serde::de::Error, { + fn visit_i64(self, v: i64) -> Result { Ok(Value::Int(v).into()) } - fn visit_u64(self, v: u64) -> Result - where - E: serde::de::Error, { + fn visit_u64(self, v: u64) -> Result { Ok(Value::Int(v as i64).into()) } - fn visit_f64(self, v: f64) -> Result - where - E: serde::de::Error, { + fn visit_f64(self, v: f64) -> Result { Ok(Value::Float(v).into()) } - fn visit_none(self) -> Result - where - E: serde::de::Error, { + fn visit_none(self) -> Result { Ok(Value::Empty.into()) } - fn visit_unit(self) -> Result - where - E: serde::de::Error, { + fn visit_unit(self) -> Result { Ok(Value::Empty.into()) } } @@ -222,10 +425,7 @@ where formatter.write_str("expected CURIE string") } - fn visit_str(self, v: &str) -> Result - where - E: serde::de::Error, - { + fn visit_str(self, v: &str) -> Result { match v.parse::() { Ok(v) => Ok(v), Err(e) => Err(E::custom(e)), @@ -362,9 +562,8 @@ where { match usi { Some(usi) => serializer.serialize_str(&usi.to_string()), - None => serializer.serialize_none() + None => serializer.serialize_none(), } - } fn usi_deserialize<'de, D>(deserializer: D) -> Result, D::Error> @@ -383,16 +582,11 @@ where Ok(None) } - fn visit_unit(self) -> Result - where - E: serde::de::Error, { + fn visit_unit(self) -> Result { Ok(None) } - fn visit_str(self, v: &str) -> Result - where - E: serde::de::Error, - { + fn visit_str(self, v: &str) -> Result { if v == "null" { Ok(None) } else { @@ -407,29 +601,121 @@ where deserializer.deserialize_any(USIVisit {}) } +use serde::de::{self, Visitor}; + +/// MassIVE returns a list of strings instead of a list of numbers, this type can be deserialized if a number of string is given in the JSON +#[derive(Debug, Default, Clone)] +pub struct Wrapped(T); + +impl std::ops::Deref for Wrapped { + type Target = T; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl<'de, T: 'static + Default + Copy + FromStr> serde::Deserialize<'de> for Wrapped +where + T::Err: Display, + f64: AsPrimitive, + u64: AsPrimitive, + i64: AsPrimitive, +{ + fn deserialize(deserializer: D) -> Result + where + D: de::Deserializer<'de>, + { + deserializer + .deserialize_any(PotentiallyWrappedNumberVisitor::::default()) + .map(Wrapped) + } +} + +impl serde::Serialize for Wrapped { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + serializer.serialize_f64(self.0) + } +} + +impl serde::Serialize for Wrapped { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + serializer.serialize_f32(self.0) + } +} + +#[derive(Debug, Default)] +struct PotentiallyWrappedNumberVisitor { + marker: PhantomData, +} + +impl<'de, T: 'static + Copy + FromStr> Visitor<'de> for PotentiallyWrappedNumberVisitor +where + T::Err: Display, + f64: AsPrimitive, + u64: AsPrimitive, + i64: AsPrimitive, +{ + type Value = T; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("a wrapped number") + } + + fn visit_f64(self, value: f64) -> Result { + Ok(value.as_()) + } + + fn visit_i64(self, value: i64) -> Result { + Ok(value.as_()) + } + + fn visit_u64(self, value: u64) -> Result { + Ok(value.as_()) + } + + fn visit_str(self, value: &str) -> Result { + value.parse().map_err(|e| serde::de::Error::custom(e)) + } +} + +/// A spectrum returnd by a PROXI server #[derive(Debug, Default, Clone, Serialize, Deserialize)] pub struct PROXISpectrum { - #[serde(serialize_with = "usi_serialize", deserialize_with = "usi_deserialize")] + /// The USI as returned by the PROXI server + #[serde( + default, + serialize_with = "usi_serialize", + deserialize_with = "usi_deserialize" + )] pub usi: Option, + /// The status of this request pub status: Option, + /// Metadata for this spectrum + #[serde(default)] pub attributes: Vec, #[serde(default)] - pub mzs: Vec, + pub mzs: Vec>, #[serde(default)] - pub intensities: Vec, + pub intensities: Vec>, #[serde(skip_serializing_if = "Option::is_none", default)] pub charges: Option>, } impl PROXISpectrum { pub fn add_attribute>(&mut self, param: P) { - self.attributes.push(param.into()) + self.attributes.push(param.into()); } } impl From<&PROXISpectrum> for SpectrumDescription { fn from(value: &PROXISpectrum) -> Self { - let mut this = SpectrumDescription::default(); + let mut this = Self::default(); if let Some(usi) = value.usi.as_ref() { this.id = usi.to_string(); if let Some(ident) = usi.identifier.as_ref() { @@ -446,8 +732,11 @@ impl From<&PROXISpectrum> for SpectrumDescription { let mut has_precursor = false; let mut precursor = Precursor::default(); - for param in value.attributes.iter() { - if matches!(param.accession.controlled_vocabulary, ControlledVocabulary::UO) { + for param in &value.attributes { + if matches!( + param.accession.controlled_vocabulary, + ControlledVocabulary::UO + ) { continue; } match param.name.as_str() { @@ -465,21 +754,28 @@ impl From<&PROXISpectrum> for SpectrumDescription { } "centroid spectrum" => { this.signal_continuity = SignalContinuity::Centroid; - }, + } "scan start time" => { if let Some(s) = this.acquisition.first_scan_mut() { - s.start_time = param.value.to_f64().expect("Failed to extract scan start time") / 60.0; + s.start_time = param + .value + .to_f64() + .expect("Failed to extract scan start time") + / 60.0; } - }, + } "ion injection time" => { if let Some(s) = this.acquisition.first_scan_mut() { - s.injection_time = param.to_f32().expect("Failed to extract ion injection time"); + s.injection_time = param + .to_f32() + .expect("Failed to extract ion injection time"); } } "filter string" => { if let Some(s) = this.acquisition.first_scan_mut() { - let mut param = Param::new_key_value("filter string", param.value.to_string()); + let mut param = + Param::new_key_value("filter string", param.value.to_string()); param.controlled_vocabulary = Some(ControlledVocabulary::MS); param.accession = Some(1000512); s.add_param(param); @@ -508,13 +804,17 @@ impl From<&PROXISpectrum> for SpectrumDescription { .expect("Failed to parse isolation window target"); precursor.isolation_window.flags = match precursor.isolation_window.flags { IsolationWindowState::Unknown => IsolationWindowState::Complete, - IsolationWindowState::Explicit => IsolationWindowState::Complete, + IsolationWindowState::Explicit | IsolationWindowState::Complete => { + IsolationWindowState::Complete + } IsolationWindowState::Offset => { - precursor.isolation_window.lower_bound = precursor.isolation_window.target - precursor.isolation_window.lower_bound; - precursor.isolation_window.upper_bound += precursor.isolation_window.target; + precursor.isolation_window.lower_bound = + precursor.isolation_window.target + - precursor.isolation_window.lower_bound; + precursor.isolation_window.upper_bound += + precursor.isolation_window.target; IsolationWindowState::Complete } - IsolationWindowState::Complete => IsolationWindowState::Complete, }; } "isolation window lower offset" => { @@ -528,7 +828,8 @@ impl From<&PROXISpectrum> for SpectrumDescription { precursor.isolation_window.lower_bound = lower_bound; } IsolationWindowState::Complete => { - precursor.isolation_window.lower_bound = precursor.isolation_window.target - lower_bound; + precursor.isolation_window.lower_bound = + precursor.isolation_window.target - lower_bound; } _ => {} } @@ -544,7 +845,8 @@ impl From<&PROXISpectrum> for SpectrumDescription { precursor.isolation_window.upper_bound = upper_bound; } IsolationWindowState::Complete => { - precursor.isolation_window.upper_bound = precursor.isolation_window.target + upper_bound; + precursor.isolation_window.upper_bound = + precursor.isolation_window.target + upper_bound; } _ => {} } @@ -554,7 +856,10 @@ impl From<&PROXISpectrum> for SpectrumDescription { let lower_bound = param .to_f32() .expect("Failed to parse isolation window limit"); - if let IsolationWindowState::Unknown = precursor.isolation_window.flags { + if matches!( + precursor.isolation_window.flags, + IsolationWindowState::Unknown + ) { precursor.isolation_window.flags = IsolationWindowState::Explicit; precursor.isolation_window.lower_bound = lower_bound; } @@ -564,13 +869,16 @@ impl From<&PROXISpectrum> for SpectrumDescription { let upper_bound = param .to_f32() .expect("Failed to parse isolation window limit"); - if let IsolationWindowState::Unknown = precursor.isolation_window.flags { + if matches!( + precursor.isolation_window.flags, + IsolationWindowState::Unknown + ) { precursor.isolation_window.flags = IsolationWindowState::Explicit; precursor.isolation_window.upper_bound = upper_bound; } } _ => { - let mut p = Param::new_key_value(param.name.clone(), param.value.as_ref().to_owned()); + let mut p = Param::new_key_value(param.name.clone(), param.value.clone()); p.accession = Some(param.accession.accession); p.controlled_vocabulary = Some(param.accession.controlled_vocabulary); this.add_param(p); @@ -585,26 +893,43 @@ impl From<&PROXISpectrum> for SpectrumDescription { } } -impl From for MultiLayerSpectrum { +impl< + C: CentroidLike + Default + BuildFromArrayMap + BuildArrayMapFrom, + D: DeconvolutedCentroidLike + Default + BuildFromArrayMap + BuildArrayMapFrom, + > From for MultiLayerSpectrum +{ fn from(value: PROXISpectrum) -> Self { let descr: SpectrumDescription = (&value).into(); let mut arrays = BinaryArrayMap::default(); - let mut mz_array = DataArray::from_name_and_type(&ArrayType::MZArray, BinaryDataArrayType::Float64); - mz_array.extend(&value.mzs).unwrap(); + let mut mz_array = + DataArray::from_name_and_type(&ArrayType::MZArray, BinaryDataArrayType::Float64); + mz_array + .extend(&value.mzs.into_iter().map(|v| v.0).collect::>()) + .unwrap(); arrays.add(mz_array); - let mut intensity_array = DataArray::from_name_and_type(&ArrayType::IntensityArray, BinaryDataArrayType::Float32); - intensity_array.extend(&value.intensities).unwrap(); + let mut intensity_array = + DataArray::from_name_and_type(&ArrayType::IntensityArray, BinaryDataArrayType::Float32); + intensity_array + .extend( + &value + .intensities + .into_iter() + .map(|v| v.0) + .collect::>(), + ) + .unwrap(); arrays.add(intensity_array); if let Some(charges) = value.charges.as_ref() { - let mut charge_arrays = DataArray::from_name_and_type(&ArrayType::ChargeArray, BinaryDataArrayType::Int32); - charge_arrays.extend(&charges).unwrap(); + let mut charge_arrays = + DataArray::from_name_and_type(&ArrayType::ChargeArray, BinaryDataArrayType::Int32); + charge_arrays.extend(charges).unwrap(); arrays.add(charge_arrays); }; - MultiLayerSpectrum::from_arrays_and_description(arrays, descr) + Self::from_arrays_and_description(arrays, descr) } } @@ -613,21 +938,22 @@ where T: SpectrumLike, { fn from(value: &T) -> Self { - let mut this = PROXISpectrum::default(); - this.status = Some(Status::Readable); + let mut this = Self { + status: Some(Status::Readable), + ..Default::default() + }; - let ms_level = value.ms_level(); - if ms_level == 1 { - this.add_attribute(MS1_SPECTRUM.clone()); - } else if ms_level > 1 { - this.add_attribute(MSN_SPECTRUM.clone()); + match value.ms_level().cmp(&1) { + Ordering::Equal => this.add_attribute(MS1_SPECTRUM.clone()), + Ordering::Greater => this.add_attribute(MSN_SPECTRUM.clone()), + Ordering::Less => (), } for param in value.params().iter().filter(|p| p.is_controlled()) { if param.curie().unwrap() == curie!(MS:1003063) { this.usi = Some(param.value.as_str().parse().unwrap()); } else { - this.add_attribute(param.clone()) + this.add_attribute(param.clone()); } } @@ -639,27 +965,27 @@ where this.add_attribute(PROXIParam { name: "ms level".to_string(), - value: PROXIValue(Value::Int(ms_level as i64)), + value: PROXIValue(Value::Int(value.ms_level() as i64)), accession: curie!(MS:1000511), }); match value.polarity() { crate::spectrum::ScanPolarity::Unknown => {} crate::spectrum::ScanPolarity::Positive => { - this.attributes.push(Param::from(POSITIVE_SCAN).into()) + this.attributes.push(Param::from(POSITIVE_SCAN).into()); } crate::spectrum::ScanPolarity::Negative => { - this.attributes.push(Param::from(NEGATIVE_SCAN).into()) + this.attributes.push(Param::from(NEGATIVE_SCAN).into()); } } match value.signal_continuity() { crate::spectrum::SignalContinuity::Unknown => {} crate::spectrum::SignalContinuity::Centroid => { - this.attributes.push(Param::from(CENTROID_SPECTRUM).into()) + this.attributes.push(Param::from(CENTROID_SPECTRUM).into()); } crate::spectrum::SignalContinuity::Profile => { - this.attributes.push(Param::from(PROFILE_SPECTRUM).into()) + this.attributes.push(Param::from(PROFILE_SPECTRUM).into()); } } @@ -720,20 +1046,35 @@ where match value.peaks() { crate::spectrum::RefPeakDataLevel::Missing => {} crate::spectrum::RefPeakDataLevel::RawData(arrays) => { - this.mzs = arrays.mzs().unwrap().to_vec(); - this.intensities = arrays.intensities().unwrap().to_vec(); + this.mzs = arrays.mzs().unwrap().iter().copied().map(Wrapped).collect(); + this.intensities = arrays + .intensities() + .unwrap() + .iter() + .copied() + .map(Wrapped) + .collect(); if let Ok(arr) = arrays.charges() { - this.charges = Some(arr.to_vec()) + this.charges = Some(arr.to_vec()); } } crate::spectrum::RefPeakDataLevel::Centroid(peaks) => { - (this.mzs, this.intensities) = - peaks.iter().map(|p| (p.mz(), p.intensity())).unzip(); + (this.mzs, this.intensities) = peaks + .iter() + .map(|p| (Wrapped(p.mz()), Wrapped(p.intensity()))) + .unzip(); } crate::spectrum::RefPeakDataLevel::Deconvoluted(peaks) => { - (this.mzs, this.intensities) = - peaks.iter().map(|p| (p.mz(), p.intensity())).unzip(); - this.charges = Some(peaks.iter().map(|p| p.charge()).collect::>()); + (this.mzs, this.intensities) = peaks + .iter() + .map(|p| (Wrapped(p.mz()), Wrapped(p.intensity()))) + .unzip(); + this.charges = Some( + peaks + .iter() + .map(mzpeaks::KnownCharge::charge) + .collect::>(), + ); } } @@ -750,8 +1091,8 @@ mod test { use std::io; use super::*; - use serde_json; use crate::MZReader; + use serde_json; #[test] fn test_convert() -> io::Result<()> { @@ -761,9 +1102,91 @@ mod test { let scan_message = PROXISpectrum::from(&scan); let message = serde_json::to_string(&scan_message)?; - let dup: PROXISpectrum = serde_json::from_str(&message)?; + let dup: PROXISpectrum = serde_json::from_str(&message)?; assert_eq!(dup.usi, scan_message.usi); assert_eq!(dup.attributes, scan_message.attributes); Ok(()) } -} \ No newline at end of file + + #[test] + fn get_peptide_atlas() { + let usi: USI = + "mzspec:PXD000561:Adult_Frontalcortex_bRP_Elite_85_f09:scan:17555000000:VLHPLEGAVVIIFK/2" + .parse() + .unwrap(); + let (_, response) = usi + .get_spectrum_blocking(Some(PROXIBackend::PeptideAtlas)) + .unwrap(); + dbg!(&response); + assert!(!response.is_empty()); + todo!(); + } + + #[test] + fn get_massive() { + let usi: USI = "mzspec:MSV000078547:120228_nbut_3610_it_it_take2:scan:389" + .parse() + .unwrap(); + let (_, response) = usi + .get_spectrum_blocking(Some(PROXIBackend::MassIVE)) + .unwrap(); + assert!(!response.is_empty()); + } + + #[test] + fn get_pride() { + let usi: USI = + "mzspec:PXD043489:20201103_F1_UM5_Peng0013_SA_139H2_InS_Elastase.raw:scan:11809:VSLFPPSSEQLTSNASVV" + .parse() + .unwrap(); + let (_, response) = usi + .get_spectrum_blocking(Some(PROXIBackend::Pride)) + .unwrap(); + assert!(!response.is_empty()); + } + + #[test] + fn get_proteomexchange() { + let usi: USI = + "mzspec:PXD004939:Rice_phos_ABA_3h_20per_F1_R2:scan:2648:DAEKS[UNIMOD:21]PIN[UNIMOD:7]GR/2" + .parse() + .unwrap(); + let (_, response) = usi + .get_spectrum_blocking(Some(PROXIBackend::ProteomeXchange)) + .unwrap(); + assert!(!response.is_empty()); + } + + #[test] + fn get_aggregate() { + for usi in [ + "mzspec:PXD000561:Adult_Frontalcortex_bRP_Elite_85_f09:scan:17555:VLHPLEGAVVIIFK/2", + "mzspec:MSV000078547:120228_nbut_3610_it_it_take2:scan:389", + "mzspec:PXD043489:20201103_F1_UM5_Peng0013_SA_139H2_InS_Elastase.raw:scan:11809:VSLFPPSSEQLTSNASVV", + "mzspec:PXD004939:Rice_phos_ABA_3h_20per_F1_R2:scan:2648:DAEKS[UNIMOD:21]PIN[UNIMOD:7]GR/2"] { + println!("Trying: {usi}"); + let usi: USI = usi.parse().unwrap(); + let (_, response) = usi.get_spectrum_blocking(None).unwrap(); + assert!(!response.is_empty()); + } + } +} + +#[cfg(all(feature = "proxi-async", feature = "tokio"))] +mod tests { + use super::*; + + #[tokio::test] + async fn get_aggregate_async() { + for usi in [ + "mzspec:PXD000561:Adult_Frontalcortex_bRP_Elite_85_f09:scan:17555:VLHPLEGAVVIIFK/2", + "mzspec:MSV000078547:120228_nbut_3610_it_it_take2:scan:389", + "mzspec:PXD043489:20201103_F1_UM5_Peng0013_SA_139H2_InS_Elastase.raw:scan:11809:VSLFPPSSEQLTSNASVV", + "mzspec:PXD004939:Rice_phos_ABA_3h_20per_F1_R2:scan:2648:DAEKS[UNIMOD:21]PIN[UNIMOD:7]GR/2"] { + println!("Trying: {usi}"); + let usi: USI = usi.parse().unwrap(); + let (_, response) = usi.get_spectrum_async(None).await.unwrap(); + assert!(!response.is_empty()); + } + } +} diff --git a/src/io/usi.rs b/src/io/usi.rs index ff38288..0217627 100644 --- a/src/io/usi.rs +++ b/src/io/usi.rs @@ -16,17 +16,16 @@ pub enum USIParseError { MalformedIndex(String, String, String), } - #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] pub enum Protocol { #[default] - MZSpec + MZSpec, } impl Display for Protocol { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Self::MZSpec => write!(f, "mzspec") + Self::MZSpec => write!(f, "mzspec"), } } } @@ -38,7 +37,6 @@ pub enum Identifier { NativeID(Box>), } - #[derive(Debug, Default, Clone, PartialEq, Eq, Hash)] pub struct USI { pub protocol: Protocol, @@ -60,7 +58,12 @@ impl FromStr for USI { "mzspec" => { this.protocol = Protocol::MZSpec; } - _ => return Err(USIParseError::UnknownProtocol(protocol.to_string(), s.to_string())) + _ => { + return Err(USIParseError::UnknownProtocol( + protocol.to_string(), + s.to_string(), + )) + } }; if let Some(dataset) = tokens.next() { @@ -71,38 +74,52 @@ impl FromStr for USI { if let (Some(ident_type), Some(ident_value)) = (tokens.next(), tokens.next()) { match ident_type { - "scan" => { - match ident_value.parse() { - Ok(v) => { - this.identifier = Some(Identifier::Scan(v)); - }, - Err(e) => { - return Err(USIParseError::MalformedIndex(ident_value.to_string(), e.to_string(), s.to_string())) - } + "scan" => match ident_value.parse() { + Ok(v) => { + this.identifier = Some(Identifier::Scan(v)); + } + Err(e) => { + return Err(USIParseError::MalformedIndex( + ident_value.to_string(), + e.to_string(), + s.to_string(), + )) } }, - "index" => { - match ident_value.parse() { - Ok(v) => { - this.identifier = Some(Identifier::Index(v)); - }, - Err(e) => { - return Err(USIParseError::MalformedIndex(ident_value.to_string(), e.to_string(), s.to_string())) - } + "index" => match ident_value.parse() { + Ok(v) => { + this.identifier = Some(Identifier::Index(v)); + } + Err(e) => { + return Err(USIParseError::MalformedIndex( + ident_value.to_string(), + e.to_string(), + s.to_string(), + )) } }, "nativeId" => { - let res: Result, _> = ident_value.split(',').map(|t| t.parse()).collect(); + let res: Result, _> = + ident_value.split(',').map(|t| t.parse()).collect(); match res { Ok(vals) => { this.identifier = Some(Identifier::NativeID(vals.into())) - }, + } Err(e) => { - return Err(USIParseError::MalformedIndex(ident_value.to_string(), e.to_string(), s.to_string())) + return Err(USIParseError::MalformedIndex( + ident_value.to_string(), + e.to_string(), + s.to_string(), + )) } } - }, - _ => return Err(USIParseError::UnknownIndexType(ident_type.to_string(), s.to_string())) + } + _ => { + return Err(USIParseError::UnknownIndexType( + ident_type.to_string(), + s.to_string(), + )) + } }; this.interpretation = tokens.next().map(|s| s.to_string()); @@ -112,12 +129,14 @@ impl FromStr for USI { } else { Err(USIParseError::MissingRun(s.to_string())) } - } else { Err(USIParseError::MissingDataset(s.to_string())) } } else { - Err(USIParseError::UnknownProtocol("".to_string(), s.to_string())) + Err(USIParseError::UnknownProtocol( + "".to_string(), + s.to_string(), + )) } } } @@ -128,9 +147,20 @@ impl Display for USI { let (ident_class, ident_val) = match ident { Identifier::Scan(i) => ("scan", i.to_string()), Identifier::Index(i) => ("index", i.to_string()), - Identifier::NativeID(parts) => ("nativeId", parts.iter().map(|i| i.to_string()).collect::>().join(",")) + Identifier::NativeID(parts) => ( + "nativeId", + parts + .iter() + .map(|i| i.to_string()) + .collect::>() + .join(","), + ), }; - write!(f, "{}:{}:{}:{ident_class}:{ident_val}", self.protocol, self.dataset, self.run_name)?; + write!( + f, + "{}:{}:{}:{ident_class}:{ident_val}", + self.protocol, self.dataset, self.run_name + )?; if let Some(interp) = self.interpretation.as_ref() { write!(f, ":{}", interp)?; if let Some(provenance) = self.provenance.as_ref() { @@ -144,7 +174,6 @@ impl Display for USI { } } - #[cfg(test)] mod test { use super::*; @@ -169,9 +198,15 @@ mod test { fn test_example() -> Result<(), USIParseError> { let usi: USI = "mzspec:PXD019909:20180914_QE8_nLC0_BDA_SA_DIA_Skin_Dendritic_cells_DC_MT_600000:scan:62396:SAGQGEVLVYVEDPAGHQEEAK/3".parse()?; assert_eq!(usi.dataset, "PXD019909"); - assert_eq!(usi.run_name, "20180914_QE8_nLC0_BDA_SA_DIA_Skin_Dendritic_cells_DC_MT_600000"); + assert_eq!( + usi.run_name, + "20180914_QE8_nLC0_BDA_SA_DIA_Skin_Dendritic_cells_DC_MT_600000" + ); assert_eq!(usi.identifier, Some(Identifier::Scan(62396))); - assert_eq!(usi.interpretation, Some("SAGQGEVLVYVEDPAGHQEEAK/3".to_string())); + assert_eq!( + usi.interpretation, + Some("SAGQGEVLVYVEDPAGHQEEAK/3".to_string()) + ); Ok(()) } -} \ No newline at end of file +} From b14c81cc4d4c9069ca2b96b3c7ee4e7473d292df Mon Sep 17 00:00:00 2001 From: Douwe Schulte Date: Wed, 30 Oct 2024 10:58:01 +0100 Subject: [PATCH 02/11] Solved nonnumeric CURIEs and added PROXI to tests --- .github/workflows/test.yaml | 2 +- src/io/proxi.rs | 53 ++++++++++++++++++++++++------------- 2 files changed, 36 insertions(+), 19 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index aeb5003..a98f5f9 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -44,4 +44,4 @@ jobs: uses: actions-rs/cargo@v1 with: command: test - args: --no-default-features --features nalgebra,parallelism,async,mzsignal,thermo -- --nocapture --show-output \ No newline at end of file + args: --no-default-features --features nalgebra,parallelism,async,mzsignal,thermo,proxi,proxi-async,tokio -- --nocapture --show-output \ No newline at end of file diff --git a/src/io/proxi.rs b/src/io/proxi.rs index 8748a54..6a2617c 100644 --- a/src/io/proxi.rs +++ b/src/io/proxi.rs @@ -113,15 +113,16 @@ impl USI { } let client = reqwest::Client::new(); + let usi = self.to_string(); if let Some(backend) = backend { - get_response(&client, backend, &self.to_string()).await + get_response(&client, backend, &usi).await } else { use futures::StreamExt; let mut requests = futures::stream::FuturesUnordered::new(); let mut last_error = None; for backend in PROXIBackend::ALL { - requests.push(get_response(&client, *backend, &self.to_string())); + requests.push(get_response(&client, *backend, &usi)); } while let Some(res) = requests.next().await { @@ -438,11 +439,7 @@ where #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct PROXIParam { - #[serde( - serialize_with = "curie_serialize", - deserialize_with = "curie_deserialize" - )] - pub accession: CURIE, + pub accession: PROXIAccession, pub name: String, #[serde( serialize_with = "proxi_value_serialize", @@ -453,6 +450,26 @@ pub struct PROXIParam { pub value: PROXIValue, } +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub enum PROXIAccession { + #[serde( + serialize_with = "curie_serialize", + deserialize_with = "curie_deserialize" + )] + CURIE(CURIE), + #[serde(untagged)] + Other(String), +} + +impl PROXIAccession { + fn controlled_vocabulary(&self) -> ControlledVocabulary { + match self { + PROXIAccession::CURIE(c) => c.controlled_vocabulary, + PROXIAccession::Other(_) => ControlledVocabulary::Unknown, + } + } +} + impl ParamValue for PROXIParam { fn is_empty(&self) -> bool { ::is_empty(&self.value) @@ -518,7 +535,7 @@ impl ParamValue for PROXIParam { impl PROXIParam { pub fn new>(accession: CURIE, name: S, value: V) -> Self { Self { - accession, + accession: PROXIAccession::CURIE(accession), name: name.to_string(), value: value.into(), } @@ -528,7 +545,7 @@ impl PROXIParam { impl From for PROXIParam { fn from(value: Param) -> Self { Self { - accession: value.curie().unwrap(), + accession: PROXIAccession::CURIE(value.curie().unwrap()), name: value.name, value: value.value.into(), } @@ -538,7 +555,7 @@ impl From for PROXIParam { impl<'a> From> for PROXIParam { fn from(value: ParamCow<'a>) -> Self { Self { - accession: value.curie().unwrap(), + accession: PROXIAccession::CURIE(value.curie().unwrap()), name: value.name.to_string(), value: Value::from(value.value).into(), } @@ -734,7 +751,7 @@ impl From<&PROXISpectrum> for SpectrumDescription { for param in &value.attributes { if matches!( - param.accession.controlled_vocabulary, + param.accession.controlled_vocabulary(), ControlledVocabulary::UO ) { continue; @@ -879,8 +896,10 @@ impl From<&PROXISpectrum> for SpectrumDescription { } _ => { let mut p = Param::new_key_value(param.name.clone(), param.value.clone()); - p.accession = Some(param.accession.accession); - p.controlled_vocabulary = Some(param.accession.controlled_vocabulary); + if let PROXIAccession::CURIE(c) = param.accession { + p.accession = Some(c.accession); + } + p.controlled_vocabulary = Some(param.accession.controlled_vocabulary()); this.add_param(p); } } @@ -966,7 +985,7 @@ where this.add_attribute(PROXIParam { name: "ms level".to_string(), value: PROXIValue(Value::Int(value.ms_level() as i64)), - accession: curie!(MS:1000511), + accession: PROXIAccession::CURIE(curie!(MS:1000511)), }); match value.polarity() { @@ -1032,7 +1051,7 @@ where for event in value.acquisition().iter() { let p = PROXIParam { name: "scan start time".into(), - accession: curie!(MS:1000016), + accession: PROXIAccession::CURIE(curie!(MS:1000016)), value: Value::Float(event.start_time * 60.0).into(), }; this.add_attribute(p); @@ -1111,15 +1130,13 @@ mod test { #[test] fn get_peptide_atlas() { let usi: USI = - "mzspec:PXD000561:Adult_Frontalcortex_bRP_Elite_85_f09:scan:17555000000:VLHPLEGAVVIIFK/2" + "mzspec:PXD000561:Adult_Frontalcortex_bRP_Elite_85_f09:scan:17555:VLHPLEGAVVIIFK/2" .parse() .unwrap(); let (_, response) = usi .get_spectrum_blocking(Some(PROXIBackend::PeptideAtlas)) .unwrap(); - dbg!(&response); assert!(!response.is_empty()); - todo!(); } #[test] From 50b48c47f22f85ea749eb8832f73181b30c36bc7 Mon Sep 17 00:00:00 2001 From: Douwe Schulte Date: Thu, 31 Oct 2024 13:00:02 +0100 Subject: [PATCH 03/11] Implemented requested changes on PR --- Cargo.toml | 2 +- src/io/proxi.rs | 59 ++++++++++++++++++++++++++++--------------------- 2 files changed, 35 insertions(+), 26 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 60024e7..bd4317b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,7 +44,7 @@ debug = true [features] # default = ["nalgebra", "parallelism", "mzsignal", "zlib-ng-compat"] -default = ["zlib-ng-compat"] +default = ["zlib-ng-compat", "proxi-async"] openblas = ["mzsignal", "mzsignal/openblas"] netlib = ["mzsignal", "mzsignal/netlib"] diff --git a/src/io/proxi.rs b/src/io/proxi.rs index 6a2617c..7be6055 100644 --- a/src/io/proxi.rs +++ b/src/io/proxi.rs @@ -15,13 +15,17 @@ use crate::{ }; /// The possible PROXI backends -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)] +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] pub enum PROXIBackend { PeptideAtlas, MassIVE, Pride, Jpost, ProteomeXchange, + /// A custom backend with the given PROXI url, the pattern `{USI}` will be replaced with the actual usi in building the final request. + /// + /// For example the custom url `http://yourdomain.rs/proxi/v0.1/spectra?usi={USI}&resultType=full` will map to `http://yourdomain.rs/proxi/v0.1/spectra?usi=mzspec:ID:FILE:scan:SCAN&resultType=full`. + Custom(String), } impl PROXIBackend { @@ -34,13 +38,14 @@ impl PROXIBackend { ]; /// The PROXI server base url which needs concatenating of the USI at the end - const fn base_url(self) -> &'static str { + fn proxi_url(&self, usi: &str) -> String { match self { - Self::PeptideAtlas => "http://www.peptideatlas.org/api/proxi/v0.1/spectra?resultType=full&usi=", - Self::MassIVE => "http://massive.ucsd.edu/ProteoSAFe/proxi/v0.1/spectra?resultType=full&usi=", - Self::Pride => "http://www.ebi.ac.uk/pride/proxi/archive/v0.1/spectra?resultType=full&usi=", - Self::Jpost => "https://repository.jpostdb.org/proxi/spectra?resultType=full&usi=", - Self::ProteomeXchange => "http://proteomecentral.proteomexchange.org/api/proxi/v0.1/spectra?resultType=full&usi=", + Self::PeptideAtlas => format!("http://www.peptideatlas.org/api/proxi/v0.1/spectra?resultType=full&usi={usi}") , + Self::MassIVE => format!("http://massive.ucsd.edu/ProteoSAFe/proxi/v0.1/spectra?resultType=full&usi={usi}"), + Self::Pride => format!("http://www.ebi.ac.uk/pride/proxi/archive/v0.1/spectra?resultType=full&usi={usi}"), + Self::Jpost => format!("https://repository.jpostdb.org/proxi/spectra?resultType=full&usi={usi}"), + Self::ProteomeXchange => format!("http://proteomecentral.proteomexchange.org/api/proxi/v0.1/spectra?resultType=full&usi={usi}"), + Self::Custom(url) => url.replace("{USI}", usi), } } } @@ -49,22 +54,26 @@ impl USI { /// Retrieve this USI from the given PROXI backend. If no PROXI backend is indicated it will /// aggregate the results from all known backends and return the first successful spectrum. /// + /// A [`reqwest::blocking::Client`] can be provided to create the requests, if no client is + /// provided a default client will be used. + /// /// This function is only available with the feature `proxi`. pub fn get_spectrum_blocking( &self, backend: Option, + client: Option, ) -> Result<(PROXIBackend, Vec), PROXIError> { backend.map_or_else( || { - let client = reqwest::blocking::Client::new(); + let client = client.unwrap_or_default(); let mut last_error = None; PROXIBackend::ALL .iter() .find_map(|backend| { transform_response( - *backend, + backend.clone(), client - .get(backend.base_url().to_string() + &self.to_string()) + .get(backend.proxi_url(&self.to_string())) .send() .and_then(reqwest::blocking::Response::json), ) @@ -77,8 +86,8 @@ impl USI { }, |backend| { transform_response( - backend, - reqwest::blocking::get(backend.base_url().to_string() + &self.to_string()) + backend.clone(), + reqwest::blocking::get(backend.proxi_url(&self.to_string())) .and_then(reqwest::blocking::Response::json), ) }, @@ -88,11 +97,15 @@ impl USI { /// Retrieve this USI from the given PROXI backend. If no PROXI backend is indicated it will /// aggregate the results from all known backends and return the first successful spectrum. /// + /// A [`reqwest::Client`] can be provided to create the requests, if no client is + /// provided a default client will be used. + /// /// This function is only available with the feature `proxi-async`. #[cfg(feature = "proxi-async")] pub async fn get_spectrum_async( &self, backend: Option, + client: Option, ) -> Result<(PROXIBackend, Vec), PROXIError> { async fn get_response( client: &reqwest::Client, @@ -100,19 +113,15 @@ impl USI { usi: &str, ) -> Result<(PROXIBackend, Vec), PROXIError> { transform_response( - backend, - match client - .get(backend.base_url().to_string() + usi) - .send() - .await - { + backend.clone(), + match client.get(backend.proxi_url(usi)).send().await { Ok(r) => r.json::().await, Err(e) => Err(e), }, ) } - let client = reqwest::Client::new(); + let client = client.unwrap_or_default(); let usi = self.to_string(); if let Some(backend) = backend { get_response(&client, backend, &usi).await @@ -122,7 +131,7 @@ impl USI { let mut requests = futures::stream::FuturesUnordered::new(); let mut last_error = None; for backend in PROXIBackend::ALL { - requests.push(get_response(&client, *backend, &usi)); + requests.push(get_response(&client, backend.clone(), &usi)); } while let Some(res) = requests.next().await { @@ -1134,7 +1143,7 @@ mod test { .parse() .unwrap(); let (_, response) = usi - .get_spectrum_blocking(Some(PROXIBackend::PeptideAtlas)) + .get_spectrum_blocking(Some(PROXIBackend::PeptideAtlas), None) .unwrap(); assert!(!response.is_empty()); } @@ -1145,7 +1154,7 @@ mod test { .parse() .unwrap(); let (_, response) = usi - .get_spectrum_blocking(Some(PROXIBackend::MassIVE)) + .get_spectrum_blocking(Some(PROXIBackend::MassIVE), None) .unwrap(); assert!(!response.is_empty()); } @@ -1157,7 +1166,7 @@ mod test { .parse() .unwrap(); let (_, response) = usi - .get_spectrum_blocking(Some(PROXIBackend::Pride)) + .get_spectrum_blocking(Some(PROXIBackend::Pride), None) .unwrap(); assert!(!response.is_empty()); } @@ -1169,7 +1178,7 @@ mod test { .parse() .unwrap(); let (_, response) = usi - .get_spectrum_blocking(Some(PROXIBackend::ProteomeXchange)) + .get_spectrum_blocking(Some(PROXIBackend::ProteomeXchange), None) .unwrap(); assert!(!response.is_empty()); } @@ -1183,7 +1192,7 @@ mod test { "mzspec:PXD004939:Rice_phos_ABA_3h_20per_F1_R2:scan:2648:DAEKS[UNIMOD:21]PIN[UNIMOD:7]GR/2"] { println!("Trying: {usi}"); let usi: USI = usi.parse().unwrap(); - let (_, response) = usi.get_spectrum_blocking(None).unwrap(); + let (_, response) = usi.get_spectrum_blocking(None, None).unwrap(); assert!(!response.is_empty()); } } From e1817caa49dfb97d53a9848116d1af5b5a9c945b Mon Sep 17 00:00:00 2001 From: Douwe Schulte Date: Thu, 31 Oct 2024 13:13:54 +0100 Subject: [PATCH 04/11] Handled the edge case when all returned spectra have negative status --- src/io/proxi.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/io/proxi.rs b/src/io/proxi.rs index 7be6055..4d434b2 100644 --- a/src/io/proxi.rs +++ b/src/io/proxi.rs @@ -151,7 +151,13 @@ fn transform_response( response: Result, ) -> Result<(PROXIBackend, Vec), PROXIError> { match response { - Ok(PROXIResponse::Spectra(s)) => Ok((backend, s)), + Ok(PROXIResponse::Spectra(s)) + if s.iter() + .all(|s| s.status.is_none_or(|s| s == Status::Readable)) => + { + Ok((backend, s)) + } + Ok(PROXIResponse::Spectra(s)) => Err(PROXIError::PeakUnavailable(backend, s)), Ok(PROXIResponse::Error { detail, status, @@ -199,6 +205,8 @@ pub enum PROXIError { /// The error kind, often "about:blank" kind: String, }, + /// When the server returns only spectra with [`Status::PeakUnavailable`]. + PeakUnavailable(PROXIBackend, Vec), /// An error when none of the aggregated backends returned a positive or negative result NotFound, } @@ -236,7 +244,7 @@ pub enum PROXIErrorType { Other(String), } -#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd)] pub enum Status { #[serde(rename = "READABLE")] Readable, @@ -630,7 +638,7 @@ where use serde::de::{self, Visitor}; /// MassIVE returns a list of strings instead of a list of numbers, this type can be deserialized if a number of string is given in the JSON -#[derive(Debug, Default, Clone)] +#[derive(Debug, Default, Clone, Copy)] pub struct Wrapped(T); impl std::ops::Deref for Wrapped { From a85ec04b7a41612de88e760606ef50c210de9d79 Mon Sep 17 00:00:00 2001 From: Douwe Schulte Date: Thu, 31 Oct 2024 13:20:04 +0100 Subject: [PATCH 05/11] Undo default features addition --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index bd4317b..60024e7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,7 +44,7 @@ debug = true [features] # default = ["nalgebra", "parallelism", "mzsignal", "zlib-ng-compat"] -default = ["zlib-ng-compat", "proxi-async"] +default = ["zlib-ng-compat"] openblas = ["mzsignal", "mzsignal/openblas"] netlib = ["mzsignal", "mzsignal/netlib"] From a018e605a4137be797ef5f85f7ceb05f03fad57b Mon Sep 17 00:00:00 2001 From: Douwe Schulte Date: Thu, 31 Oct 2024 16:51:33 +0100 Subject: [PATCH 06/11] Some convenient display implements --- src/io/proxi.rs | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/io/proxi.rs b/src/io/proxi.rs index 4d434b2..c1ae913 100644 --- a/src/io/proxi.rs +++ b/src/io/proxi.rs @@ -28,6 +28,23 @@ pub enum PROXIBackend { Custom(String), } +impl std::fmt::Display for PROXIBackend { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + match self { + Self::PeptideAtlas => "PeptideAtlas", + Self::MassIVE => "MassIVE", + Self::Pride => "PRIDE", + Self::Jpost => "jPOST", + Self::ProteomeXchange => "ProteomeXchange", + Self::Custom(_) => "Custom", + } + ) + } +} + impl PROXIBackend { const ALL: &[Self] = &[ Self::PeptideAtlas, @@ -244,6 +261,26 @@ pub enum PROXIErrorType { Other(String), } +impl std::fmt::Display for PROXIErrorType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + match self { + Self::DataSetNotHere => "DataSetNotHere", + Self::MsRunNotFound => "MsRunNotFound", + Self::ScanNotFound => "ScanNotFound", + Self::UnrecognizedIdentifierFormat => "UnrecognizedIdentifierFormat", + Self::MalformedInterpretation => "MalformedInterpretation", + Self::UnrecognizedIndexFlag => "UnrecognizedIndexFlag", + Self::MissingPreamble => "MissingPreamble", + Self::TooFewFields => "TooFewFields", + Self::Other(o) => o, + } + ) + } +} + #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd)] pub enum Status { #[serde(rename = "READABLE")] From 2d47a3cde488c64958fdd1d8f31260e73f7f5d01 Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Thu, 31 Oct 2024 20:04:12 -0400 Subject: [PATCH 07/11] Remove the `Wrapped` in-memory storage --- src/io/proxi.rs | 89 ++++++++++++++++++++++++++++++------------------- 1 file changed, 55 insertions(+), 34 deletions(-) diff --git a/src/io/proxi.rs b/src/io/proxi.rs index c1ae913..d88268e 100644 --- a/src/io/proxi.rs +++ b/src/io/proxi.rs @@ -1,7 +1,7 @@ -use std::{cmp::Ordering, fmt::Display, marker::PhantomData, str::FromStr}; +use std::{cmp::Ordering, fmt::{self, Display}, marker::PhantomData, str::FromStr}; use num_traits::AsPrimitive; -use serde::{Deserialize, Serialize}; +use serde::{de::SeqAccess, Deserialize, Deserializer, Serialize}; use crate::{ curie, @@ -170,7 +170,7 @@ fn transform_response( match response { Ok(PROXIResponse::Spectra(s)) if s.iter() - .all(|s| s.status.is_none_or(|s| s == Status::Readable)) => + .all(|s| s.status.map(|s| s == Status::Readable).unwrap_or(true)) => { Ok((backend, s)) } @@ -678,13 +678,6 @@ use serde::de::{self, Visitor}; #[derive(Debug, Default, Clone, Copy)] pub struct Wrapped(T); -impl std::ops::Deref for Wrapped { - type Target = T; - fn deref(&self) -> &Self::Target { - &self.0 - } -} - impl<'de, T: 'static + Default + Copy + FromStr> serde::Deserialize<'de> for Wrapped where T::Err: Display, @@ -721,9 +714,7 @@ impl serde::Serialize for Wrapped { } #[derive(Debug, Default)] -struct PotentiallyWrappedNumberVisitor { - marker: PhantomData, -} +struct PotentiallyWrappedNumberVisitor(PhantomData); impl<'de, T: 'static + Copy + FromStr> Visitor<'de> for PotentiallyWrappedNumberVisitor where @@ -751,8 +742,49 @@ where } fn visit_str(self, value: &str) -> Result { - value.parse().map_err(|e| serde::de::Error::custom(e)) + value.parse().map_err(|e| de::Error::custom(e)) + } +} + + +fn deserialize_wrapped_series<'de, T, D>(deserializer: D) -> Result, D::Error> +where + T: Deserialize<'de> + Default + Copy + FromStr + 'static, + T::Err: Display, + f64: AsPrimitive, + u64: AsPrimitive, + i64: AsPrimitive, + D: Deserializer<'de>, +{ + struct WrappingVisitor(PhantomData T>); + + impl<'de, T> Visitor<'de> for WrappingVisitor + where + T: Deserialize<'de> + Default + Copy + FromStr + 'static, + T::Err: Display, + f64: AsPrimitive, + u64: AsPrimitive, + i64: AsPrimitive, + { + type Value = Vec; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a nonempty sequence of numbers") + } + + fn visit_seq(self, mut seq: S) -> Result, S::Error> + where + S: SeqAccess<'de>, + { + let mut buf = Vec::new(); + while let Some(value) = seq.next_element::>()? { + buf.push(value.0); + } + Ok(buf) + } } + let visitor = WrappingVisitor::(PhantomData); + deserializer.deserialize_seq(visitor) } /// A spectrum returnd by a PROXI server @@ -770,10 +802,10 @@ pub struct PROXISpectrum { /// Metadata for this spectrum #[serde(default)] pub attributes: Vec, - #[serde(default)] - pub mzs: Vec>, - #[serde(default)] - pub intensities: Vec>, + #[serde(default, deserialize_with = "deserialize_wrapped_series")] + pub mzs: Vec, + #[serde(default, deserialize_with = "deserialize_wrapped_series")] + pub intensities: Vec, #[serde(skip_serializing_if = "Option::is_none", default)] pub charges: Option>, } @@ -977,22 +1009,12 @@ impl< let mut mz_array = DataArray::from_name_and_type(&ArrayType::MZArray, BinaryDataArrayType::Float64); - mz_array - .extend(&value.mzs.into_iter().map(|v| v.0).collect::>()) - .unwrap(); + mz_array.extend(&value.mzs).unwrap(); arrays.add(mz_array); let mut intensity_array = DataArray::from_name_and_type(&ArrayType::IntensityArray, BinaryDataArrayType::Float32); - intensity_array - .extend( - &value - .intensities - .into_iter() - .map(|v| v.0) - .collect::>(), - ) - .unwrap(); + intensity_array.extend(&value.intensities).unwrap(); arrays.add(intensity_array); if let Some(charges) = value.charges.as_ref() { @@ -1119,13 +1141,12 @@ where match value.peaks() { crate::spectrum::RefPeakDataLevel::Missing => {} crate::spectrum::RefPeakDataLevel::RawData(arrays) => { - this.mzs = arrays.mzs().unwrap().iter().copied().map(Wrapped).collect(); + this.mzs = arrays.mzs().unwrap().iter().copied().collect(); this.intensities = arrays .intensities() .unwrap() .iter() .copied() - .map(Wrapped) .collect(); if let Ok(arr) = arrays.charges() { this.charges = Some(arr.to_vec()); @@ -1134,13 +1155,13 @@ where crate::spectrum::RefPeakDataLevel::Centroid(peaks) => { (this.mzs, this.intensities) = peaks .iter() - .map(|p| (Wrapped(p.mz()), Wrapped(p.intensity()))) + .map(|p| (p.mz(), p.intensity())) .unzip(); } crate::spectrum::RefPeakDataLevel::Deconvoluted(peaks) => { (this.mzs, this.intensities) = peaks .iter() - .map(|p| (Wrapped(p.mz()), Wrapped(p.intensity()))) + .map(|p| (p.mz(), p.intensity())) .unzip(); this.charges = Some( peaks From 0df294cc98fa014d47abf2b6876e674f9f29f3ac Mon Sep 17 00:00:00 2001 From: Douwe Schulte Date: Fri, 1 Nov 2024 10:13:11 +0100 Subject: [PATCH 08/11] Expanded documentation --- src/io/proxi.rs | 65 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 23 deletions(-) diff --git a/src/io/proxi.rs b/src/io/proxi.rs index d88268e..66e6156 100644 --- a/src/io/proxi.rs +++ b/src/io/proxi.rs @@ -1,4 +1,9 @@ -use std::{cmp::Ordering, fmt::{self, Display}, marker::PhantomData, str::FromStr}; +use std::{ + cmp::Ordering, + fmt::{self, Display}, + marker::PhantomData, + str::FromStr, +}; use num_traits::AsPrimitive; use serde::{de::SeqAccess, Deserialize, Deserializer, Serialize}; @@ -68,11 +73,23 @@ impl PROXIBackend { } impl USI { - /// Retrieve this USI from the given PROXI backend. If no PROXI backend is indicated it will - /// aggregate the results from all known backends and return the first successful spectrum. + /// Download this USI using the [PROXI](https://github.com/HUPO-PSI/proxi-schemas/) API from + /// the given PROXI backend. If no PROXI backend is indicated it will try all known backends + /// sequentially and return the first successful spectrum. The result contains the backend that + /// ultimately succeeded as well as the list of returned spectra. This is a list because the + /// USI could contain the stem of the filename and the backend could have multiple formats for + /// that file in which case the data from each file format is returned. /// /// A [`reqwest::blocking::Client`] can be provided to create the requests, if no client is - /// provided a default client will be used. + /// provided a default client will be used. This is useful to reuse a client over multiple + /// requests for performance or if needed to use proxies to download the data. + /// + /// # Errors + /// It returns a [`PROXIError::IO`] when the network request or the parsing of the answer + /// failed. It returns [`PROXIError::Error`] if the backend returned an error. It returns + /// [`PROXIError::PeakUnavailable`] if all returned spectra have [`Status::PeakUnavailable`]. + /// It returns [`PROXIError::NotFound`] if no backend gave any result (also no error) in the + /// case of the aggregate PROXI calling, this is a major error and should not occur readily. /// /// This function is only available with the feature `proxi`. pub fn get_spectrum_blocking( @@ -111,11 +128,23 @@ impl USI { ) } - /// Retrieve this USI from the given PROXI backend. If no PROXI backend is indicated it will - /// aggregate the results from all known backends and return the first successful spectrum. + /// Download this USI using the [PROXI](https://github.com/HUPO-PSI/proxi-schemas/) API from + /// the given PROXI backend. If no PROXI backend is indicated it will try all known backends + /// concurrently and return the first successful spectrum. The result contains the backend that + /// ultimately succeeded as well as the list of returned spectra. This is a list because the + /// USI could contain the stem of the filename and the backend could have multiple formats for + /// that file in which case the data from each file format is returned. + /// + /// A [`reqwest::blocking::Client`] can be provided to create the requests, if no client is + /// provided a default client will be used. This is useful to reuse a client over multiple + /// requests for performance or if needed to use proxies to download the data. /// - /// A [`reqwest::Client`] can be provided to create the requests, if no client is - /// provided a default client will be used. + /// # Errors + /// It returns a [`PROXIError::IO`] when the network request or the parsing of the answer + /// failed. It returns [`PROXIError::Error`] if the backend returned an error. It returns + /// [`PROXIError::PeakUnavailable`] if all returned spectra have [`Status::PeakUnavailable`]. + /// It returns [`PROXIError::NotFound`] if no backend gave any result (also no error) in the + /// case of the aggregate PROXI calling, this is a major error and should not occur readily. /// /// This function is only available with the feature `proxi-async`. #[cfg(feature = "proxi-async")] @@ -746,7 +775,6 @@ where } } - fn deserialize_wrapped_series<'de, T, D>(deserializer: D) -> Result, D::Error> where T: Deserialize<'de> + Default + Copy + FromStr + 'static, @@ -1142,27 +1170,18 @@ where crate::spectrum::RefPeakDataLevel::Missing => {} crate::spectrum::RefPeakDataLevel::RawData(arrays) => { this.mzs = arrays.mzs().unwrap().iter().copied().collect(); - this.intensities = arrays - .intensities() - .unwrap() - .iter() - .copied() - .collect(); + this.intensities = arrays.intensities().unwrap().iter().copied().collect(); if let Ok(arr) = arrays.charges() { this.charges = Some(arr.to_vec()); } } crate::spectrum::RefPeakDataLevel::Centroid(peaks) => { - (this.mzs, this.intensities) = peaks - .iter() - .map(|p| (p.mz(), p.intensity())) - .unzip(); + (this.mzs, this.intensities) = + peaks.iter().map(|p| (p.mz(), p.intensity())).unzip(); } crate::spectrum::RefPeakDataLevel::Deconvoluted(peaks) => { - (this.mzs, this.intensities) = peaks - .iter() - .map(|p| (p.mz(), p.intensity())) - .unzip(); + (this.mzs, this.intensities) = + peaks.iter().map(|p| (p.mz(), p.intensity())).unzip(); this.charges = Some( peaks .iter() From a9807d888fd16251e9b08004f9f2c5fae1354a19 Mon Sep 17 00:00:00 2001 From: Douwe Schulte Date: Fri, 1 Nov 2024 12:48:08 +0100 Subject: [PATCH 09/11] Fixed spectrum representation parsing --- src/io/proxi.rs | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/src/io/proxi.rs b/src/io/proxi.rs index 66e6156..daed96d 100644 --- a/src/io/proxi.rs +++ b/src/io/proxi.rs @@ -874,19 +874,24 @@ impl From<&PROXISpectrum> for SpectrumDescription { "ms level" => { this.ms_level = param.value.to_i32().expect("Failed to parse ms level") as u8; } - "positive scan" => { - this.polarity = ScanPolarity::Positive; - } - "negative scan" => { - this.polarity = ScanPolarity::Negative; - } - "profile spectrum" => { - this.signal_continuity = SignalContinuity::Profile; - } - "centroid spectrum" => { - this.signal_continuity = SignalContinuity::Centroid; - } - + "scan polarity" => match param.value.as_str().as_ref() { + "positive scan" => { + this.polarity = ScanPolarity::Positive; + } + "negative scan" => { + this.polarity = ScanPolarity::Negative; + } + _ => (), + }, + "spectrum representation" => match param.value.as_str().as_ref() { + "profile spectrum" => { + this.signal_continuity = SignalContinuity::Profile; + } + "centroid spectrum" => { + this.signal_continuity = SignalContinuity::Centroid; + } + _ => (), + }, "scan start time" => { if let Some(s) = this.acquisition.first_scan_mut() { s.start_time = param From 1cc1816b219b62f18481a63a2c7a54662a27e273 Mon Sep 17 00:00:00 2001 From: Douwe Schulte Date: Fri, 1 Nov 2024 12:54:14 +0100 Subject: [PATCH 10/11] Renamed to 'download_spectrum_*' and fixed link in docs --- src/io/proxi.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/io/proxi.rs b/src/io/proxi.rs index daed96d..278cb2a 100644 --- a/src/io/proxi.rs +++ b/src/io/proxi.rs @@ -92,7 +92,7 @@ impl USI { /// case of the aggregate PROXI calling, this is a major error and should not occur readily. /// /// This function is only available with the feature `proxi`. - pub fn get_spectrum_blocking( + pub fn download_spectrum_blocking( &self, backend: Option, client: Option, @@ -135,7 +135,7 @@ impl USI { /// USI could contain the stem of the filename and the backend could have multiple formats for /// that file in which case the data from each file format is returned. /// - /// A [`reqwest::blocking::Client`] can be provided to create the requests, if no client is + /// A [`reqwest::Client`] can be provided to create the requests, if no client is /// provided a default client will be used. This is useful to reuse a client over multiple /// requests for performance or if needed to use proxies to download the data. /// @@ -148,7 +148,7 @@ impl USI { /// /// This function is only available with the feature `proxi-async`. #[cfg(feature = "proxi-async")] - pub async fn get_spectrum_async( + pub async fn download_spectrum_async( &self, backend: Option, client: Option, @@ -1233,7 +1233,7 @@ mod test { .parse() .unwrap(); let (_, response) = usi - .get_spectrum_blocking(Some(PROXIBackend::PeptideAtlas), None) + .download_spectrum_blocking(Some(PROXIBackend::PeptideAtlas), None) .unwrap(); assert!(!response.is_empty()); } @@ -1244,7 +1244,7 @@ mod test { .parse() .unwrap(); let (_, response) = usi - .get_spectrum_blocking(Some(PROXIBackend::MassIVE), None) + .download_spectrum_blocking(Some(PROXIBackend::MassIVE), None) .unwrap(); assert!(!response.is_empty()); } @@ -1256,7 +1256,7 @@ mod test { .parse() .unwrap(); let (_, response) = usi - .get_spectrum_blocking(Some(PROXIBackend::Pride), None) + .download_spectrum_blocking(Some(PROXIBackend::Pride), None) .unwrap(); assert!(!response.is_empty()); } @@ -1268,7 +1268,7 @@ mod test { .parse() .unwrap(); let (_, response) = usi - .get_spectrum_blocking(Some(PROXIBackend::ProteomeXchange), None) + .download_spectrum_blocking(Some(PROXIBackend::ProteomeXchange), None) .unwrap(); assert!(!response.is_empty()); } @@ -1282,7 +1282,7 @@ mod test { "mzspec:PXD004939:Rice_phos_ABA_3h_20per_F1_R2:scan:2648:DAEKS[UNIMOD:21]PIN[UNIMOD:7]GR/2"] { println!("Trying: {usi}"); let usi: USI = usi.parse().unwrap(); - let (_, response) = usi.get_spectrum_blocking(None, None).unwrap(); + let (_, response) = usi.download_spectrum_blocking(None, None).unwrap(); assert!(!response.is_empty()); } } @@ -1301,7 +1301,7 @@ mod tests { "mzspec:PXD004939:Rice_phos_ABA_3h_20per_F1_R2:scan:2648:DAEKS[UNIMOD:21]PIN[UNIMOD:7]GR/2"] { println!("Trying: {usi}"); let usi: USI = usi.parse().unwrap(); - let (_, response) = usi.get_spectrum_async(None).await.unwrap(); + let (_, response) = usi.download_spectrum_async(None).await.unwrap(); assert!(!response.is_empty()); } } From 2c375cdeebbabd9a773678dc2763e6defe436226 Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Fri, 1 Nov 2024 08:52:27 -0400 Subject: [PATCH 11/11] chore: fix signature --- src/io/proxi.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/io/proxi.rs b/src/io/proxi.rs index 278cb2a..1d2fa9e 100644 --- a/src/io/proxi.rs +++ b/src/io/proxi.rs @@ -1301,7 +1301,7 @@ mod tests { "mzspec:PXD004939:Rice_phos_ABA_3h_20per_F1_R2:scan:2648:DAEKS[UNIMOD:21]PIN[UNIMOD:7]GR/2"] { println!("Trying: {usi}"); let usi: USI = usi.parse().unwrap(); - let (_, response) = usi.download_spectrum_async(None).await.unwrap(); + let (_, response) = usi.download_spectrum_async(None, None).await.unwrap(); assert!(!response.is_empty()); } }