Skip to content

Commit

Permalink
feature: add more descriptive errors and failure messages when encoun…
Browse files Browse the repository at this point in the history
…tering errors while parsing mzML files
  • Loading branch information
mobiusklein committed Sep 22, 2024
1 parent 551f499 commit 46d66cf
Show file tree
Hide file tree
Showing 2 changed files with 134 additions and 106 deletions.
185 changes: 103 additions & 82 deletions src/io/mzml/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,15 @@ pub trait SpectrumBuilding<
) -> Self;
}

macro_rules! xml_error {
($state:ident, $xml_err:ident) => {
MzMLParserError::XMLError($state, $xml_err)
};
($state:ident, $xml_err:ident, $ctx:expr) => {
MzMLParserError::XMLErrorContext($state, $xml_err, $ctx)
};
}

const BUFFER_SIZE: usize = 10000;

/// An accumulator for the attributes of a spectrum as it is read from an
Expand Down Expand Up @@ -633,10 +642,16 @@ impl<
match attr_parsed {
Ok(attr) => match attr.key.as_ref() {
b"id" => {
self.entry_id = attr
.unescape_value()
.expect("Error decoding id")
.to_string();
self.entry_id = match attr.unescape_value() {
Ok(value) => value.to_string(),
Err(e) => {
return Err(xml_error!(
state,
e,
"Failed to decode spectrum id".into()
))
}
}
}
b"index" => {
self.index = String::from_utf8_lossy(&attr.value)
Expand Down Expand Up @@ -778,106 +793,112 @@ impl<
// Inline the `fill_param_into` to avoid excessive copies.
b"cvParam" | b"userParam" => {
match Self::handle_param_borrowed(event, reader_position, state) {
Ok(param) => match state {
MzMLParserState::Spectrum | MzMLParserState::Chromatogram => {
self.fill_spectrum(param)
}
MzMLParserState::ScanList => {
if param.is_controlled() {
if let Some(comb) = ScanCombination::from_accession(
param.controlled_vocabulary.unwrap(),
param.accession.unwrap(),
) {
self.acquisition.combination = comb
Ok(param) => {
match state {
MzMLParserState::Spectrum | MzMLParserState::Chromatogram => {
self.fill_spectrum(param)
}
MzMLParserState::ScanList => {
if param.is_controlled() {
if let Some(comb) = ScanCombination::from_accession(
param.controlled_vocabulary.unwrap(),
param.accession.unwrap(),
) {
self.acquisition.combination = comb
} else {
self.acquisition.add_param(param.into())
}
} else {
self.acquisition.add_param(param.into())
}
} else {
self.acquisition.add_param(param.into())
}
}
MzMLParserState::Scan => match param.name.as_bytes() {
b"scan start time" => {
let value: f64 = param
MzMLParserState::Scan => match param.name.as_bytes() {
b"scan start time" => {
let value: f64 = param
.to_f64()
.unwrap_or_else(|e| panic!("Expected floating point number for scan time: {e} for {}", self.warning_context()));
let value = match &param.unit {
Unit::Minute => value,
Unit::Second => value / 60.0,
Unit::Millisecond => value / 60000.0,
_ => {
warn!("Could not infer unit for {:?} for {}", param, self.warning_context());
value
}
};
self.acquisition.scans.last_mut().unwrap().start_time = value;
}
b"ion injection time" => {
self.acquisition.scans.last_mut().unwrap().injection_time = param.to_f32().unwrap_or_else(
let value = match &param.unit {
Unit::Minute => value,
Unit::Second => value / 60.0,
Unit::Millisecond => value / 60000.0,
_ => {
warn!(
"Could not infer unit for {:?} for {}",
param,
self.warning_context()
);
value
}
};
self.acquisition.scans.last_mut().unwrap().start_time = value;
}
b"ion injection time" => {
self.acquisition.scans.last_mut().unwrap().injection_time = param.to_f32().unwrap_or_else(
|e| panic!("Expected floating point number for injection time: {e} for {}", self.warning_context())
);
}
_ => self
}
_ => self
.acquisition
.scans
.last_mut()
.unwrap()
.add_param(param.into()),
},
MzMLParserState::ScanWindowList => self
.acquisition
.scans
.last_mut()
.unwrap()
.add_param(param.into()),
},
MzMLParserState::ScanWindowList => self
.acquisition
.scans
.last_mut()
.unwrap()
.add_param(param.into()),
MzMLParserState::ScanWindow => {
self.fill_scan_window(param.into());
}
MzMLParserState::IsolationWindow => {
self.fill_isolation_window(param.into());
}
MzMLParserState::SelectedIon | MzMLParserState::SelectedIonList => {
self.fill_selected_ion(param.into());
}
MzMLParserState::Activation => {
if Activation::is_param_activation(&param) {
self.precursor.activation.methods_mut().push(param.into());
} else {
let dissociation_energy = param.curie().and_then(|c| {
MzMLParserState::ScanWindow => {
self.fill_scan_window(param.into());
}
MzMLParserState::IsolationWindow => {
self.fill_isolation_window(param.into());
}
MzMLParserState::SelectedIon | MzMLParserState::SelectedIonList => {
self.fill_selected_ion(param.into());
}
MzMLParserState::Activation => {
if Activation::is_param_activation(&param) {
self.precursor.activation.methods_mut().push(param.into());
} else {
let dissociation_energy = param.curie().and_then(|c| {
DissociationEnergyTerm::from_curie(&c, param.value().to_f32().unwrap_or_else(|e| {
warn!("Failed to convert dissociation energy: {e} for {} for {}", param.name(), self.warning_context());
0.0
}))
});
match dissociation_energy {
Some(t) => {
if t.is_supplemental() {
self.precursor.activation.add_param(param.into())
} else {
if self.precursor.activation.energy != 0.0 {
warn!(
match dissociation_energy {
Some(t) => {
if t.is_supplemental() {
self.precursor.activation.add_param(param.into())
} else {
if self.precursor.activation.energy != 0.0 {
warn!(
"Multiple dissociation energies detected. Saw {t} after already setting dissociation energy for {}",
self.warning_context()
);
}
self.precursor.activation.energy = t.energy();
}
self.precursor.activation.energy = t.energy();
}
}
None => {
self.precursor.activation.add_param(param.into());
None => {
self.precursor.activation.add_param(param.into());
}
}
}
}
MzMLParserState::BinaryDataArrayList => {}
MzMLParserState::BinaryDataArray => {
self.fill_binary_data_array(param);
}
MzMLParserState::Precursor | MzMLParserState::PrecursorList => {
warn!("cvParam found for {:?} where none are allowed", &state);
}
_ => {}
}
MzMLParserState::BinaryDataArrayList => {}
MzMLParserState::BinaryDataArray => {
self.fill_binary_data_array(param);
}
MzMLParserState::Precursor | MzMLParserState::PrecursorList => {
warn!("cvParam found for {:?} where none are allowed", &state);
}
_ => {}
},
}
Err(err) => return Err(err),
}
}
Expand Down Expand Up @@ -907,9 +928,9 @@ impl<
b"binaryDataArray" => {
let mut array = mem::take(&mut self.current_array);
if self.detail_level == DetailLevel::Full {
array
.decode_and_store()
.expect("Error during decoding and storing of array data");
array.decode_and_store().map_err(|e| {
MzMLParserError::ArrayDecodingError(state, array.name.clone(), e)
})?;
}
self.arrays.add(array);
return Ok(MzMLParserState::BinaryDataArrayList);
Expand All @@ -926,7 +947,7 @@ impl<
if state == MzMLParserState::Binary && self.detail_level != DetailLevel::MetadataOnly {
let bin = event
.unescape()
.expect("Failed to unescape binary data array content");
.map_err(|e| MzMLParserError::XMLError(state, e))?;
self.current_array.data = Bytes::from(bin.as_bytes());
}
Ok(state)
Expand Down Expand Up @@ -1827,7 +1848,7 @@ impl<
self.spectrum_index.init = true;
*self.chromatogram_index = indexer.chromatogram_index;
self.chromatogram_index.init = true;
self.handle.seek(SeekFrom::Start(current_position)).unwrap();
self.handle.seek(SeekFrom::Start(current_position))?;
Ok(self.spectrum_index.len() as u64)
}

Expand Down
55 changes: 31 additions & 24 deletions src/io/mzml/reading_shared.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,17 @@ use quick_xml::Error as XMLError;

use thiserror::Error;

use crate::prelude::*;
use super::reader::Bytes;
use crate::io::traits::SeekRead;
use crate::io::OffsetIndex;
use crate::meta::{
Component, ComponentType, DataProcessing, FileDescription, InstrumentConfiguration, MassSpectrometerFileFormatTerm, NativeSpectrumIdentifierFormatTerm, ProcessingMethod, Sample, Software, SourceFile
Component, ComponentType, DataProcessing, FileDescription, InstrumentConfiguration,
MassSpectrometerFileFormatTerm, NativeSpectrumIdentifierFormatTerm, ProcessingMethod, Sample,
Software, SourceFile,
};
use crate::params::{curie_to_num, ControlledVocabulary, Param, ParamCow, Unit};

use super::reader::Bytes;
use crate::prelude::*;
use crate::spectrum::{bindata::ArrayRetrievalError, ArrayType};

/**
The different states the [`MzMLReaderType`](crate::io::mzml::MzMLReaderType) can enter while parsing
Expand Down Expand Up @@ -90,7 +92,7 @@ pub enum MzMLParserState {
ChromatogramListDone,

ParserError,
EOF
EOF,
}

impl Display for MzMLParserState {
Expand All @@ -103,26 +105,30 @@ impl Display for MzMLParserState {
pub enum EntryType {
#[default]
Spectrum,
Chromatogram
Chromatogram,
}

/**
All the ways that mzML parsing can go wrong
*/
#[derive(Debug, Error)]
pub enum MzMLParserError {
#[error("An error occurred outside of normal conditions {0:?}")]
#[error("An error occurred outside of normal conditions {0}")]
UnknownError(MzMLParserState),
#[error("An incomplete spectrum was parsed")]
IncompleteSpectrum,
#[error("An incomplete element {0} was encountered in {1:?}")]
#[error("An incomplete element {0} was encountered in {1}")]
IncompleteElementError(String, MzMLParserState),
#[error("An XML error {1:?} was encountered in {0:?}")]
#[error("An XML error {1} was encountered in {0}")]
XMLError(MzMLParserState, #[source] XMLError),
#[error("An IO error {1} was encountered in {0:?}")]
#[error("An XML error {1} was encountered in {0}: {2}")]
XMLErrorContext(MzMLParserState, #[source] XMLError, String),
#[error("An IO error {1} was encountered in {0}")]
IOError(MzMLParserState, #[source] io::Error),
#[error("The {0} section is over")]
SectionOver(&'static str)
SectionOver(&'static str),
#[error("Failed to decode {1}: {2} for {0}")]
ArrayDecodingError(MzMLParserState, ArrayType, ArrayRetrievalError),
}

impl From<MzMLParserError> for io::Error {
Expand Down Expand Up @@ -783,10 +789,11 @@ impl<'a> FileMetadataBuilder<'a> {
.expect("Error decoding id")
.to_string();
} else if attr.key.as_ref() == b"name" {
sample.name = Some(attr
.unescape_value()
.expect("Error decoding name")
.to_string());
sample.name = Some(
attr.unescape_value()
.expect("Error decoding name")
.to_string(),
);
}
}
Err(msg) => {
Expand Down Expand Up @@ -862,10 +869,8 @@ impl<'a> FileMetadataBuilder<'a> {
let value = attr
.unescape_value()
.expect("Error decoding default instrument configuration ID");
self.default_instrument_config = self
.instrument_id_map
.as_mut()
.map(|m| m.get(&value));
self.default_instrument_config =
self.instrument_id_map.as_mut().map(|m| m.get(&value));
}
b"defaultSourceFileRef" => {
self.default_source_file = Some(
Expand All @@ -875,11 +880,13 @@ impl<'a> FileMetadataBuilder<'a> {
);
}
b"startTimeStamp" => {
let val = attr.unescape_value().expect("Error decoding start timestamp");
let val = DateTime::parse_from_rfc3339(&val).expect("Expected a dateTime value conforming to ISO 8601 standard");
self.start_timestamp = Some(
val
let val = attr
.unescape_value()
.expect("Error decoding start timestamp");
let val = DateTime::parse_from_rfc3339(&val).expect(
"Expected a dateTime value conforming to ISO 8601 standard",
);
self.start_timestamp = Some(val);
}
_ => {}
}
Expand Down Expand Up @@ -929,7 +936,7 @@ impl<'a> FileMetadataBuilder<'a> {
} else {
sf.add_param(param)
}
},
}
MzMLParserState::Sample => {
let sample = self.samples.last_mut().unwrap();
sample.add_param(param)
Expand Down

0 comments on commit 46d66cf

Please sign in to comment.