Skip to content

Commit

Permalink
Remove null bytes from StudyInstanceUID and SeriesInstanceUID
Browse files Browse the repository at this point in the history
  • Loading branch information
jennydaman committed Jul 31, 2023
1 parent 59c4ebe commit 9475dd8
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 26 deletions.
20 changes: 20 additions & 0 deletions src/helpers.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use crate::errors::DicomTagReadError;
use dicom::core::Tag;
use dicom::object::DefaultDicomObject;
use regex::Regex;
use std::sync::OnceLock;

/// Get the `&str` to a DICOM object.
///
Expand All @@ -13,3 +15,21 @@ pub(crate) fn tt(dcm: &DefaultDicomObject, tag: Tag) -> Result<&str, DicomTagRea
.map_err(|error| DicomTagReadError::NotString { error, tag })
})
}

pub(crate) fn tts(dcm: &DefaultDicomObject, tag: Tag) -> Result<String, DicomTagReadError> {
tt(dcm, tag).map(|s| s.replace('\0', ""))
}

/// Replace disallowed characters with "_".
/// https://github.com/FNNDSC/pypx/blob/7619c15f4d2303d6d5ca7c255d81d06c7ab8682b/pypx/repack.py#L424
///
/// Also, it's necessary to handle NUL bytes...
pub(crate) fn sanitize<S: AsRef<str>>(s: S) -> String {
let s_nonull = s.as_ref().replace('\0', "");
VALID_CHARS_RE
.get_or_init(|| Regex::new(r#"[^A-Za-z0-9\.\-]+"#).unwrap())
.replace_all(&s_nonull, "_")
.to_string()
}

static VALID_CHARS_RE: OnceLock<Regex> = OnceLock::new();
6 changes: 3 additions & 3 deletions src/log_models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ impl<'a> StudyDataMeta<'a> {
PatientID: e.PatientID,
StudyDescription: e.StudyDescription,
StudyDate: e.StudyDate,
StudyInstanceUID: e.StudyInstanceUID,
StudyInstanceUID: &e.StudyInstanceUID,
PerformedStationAETitle: tt(&d, tags::PERFORMED_STATION_AE_TITLE).unwrap_or(""),
};
Ok(data)
Expand Down Expand Up @@ -195,8 +195,8 @@ impl<'a> SeriesDataMeta<'a> {
) -> Result<Self, DicomTagReadError> {
let data = Self {
PatientID: e.PatientID,
StudyInstanceUID: e.StudyInstanceUID,
SeriesInstanceUID: e.SeriesInstanceUID,
StudyInstanceUID: &e.StudyInstanceUID,
SeriesInstanceUID: &e.SeriesInstanceUID,
SeriesDescription: e.SeriesDescription,
SeriesNumber: e.SeriesNumber.clone(),
SeriesDate: e.StudyDate,
Expand Down
2 changes: 1 addition & 1 deletion src/log_write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ pub(crate) fn write_logs(
unpack.dir.to_string(),
&dcm,
)?;
let data: HashMap<_, _> = [(elements.StudyInstanceUID, study_series_meta)].into();
let data: HashMap<_, _> = [(&elements.StudyInstanceUID, study_series_meta)].into();
write_json(data, study_series_meta_fname)?;
}

Expand Down
27 changes: 5 additions & 22 deletions src/pack_path.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
//! Functions for deciding where to copy the received DICOM to.
use crate::errors::DicomTagReadError;
use crate::helpers::tt;
use crate::helpers::{sanitize, tt, tts};
use camino::{Utf8Path, Utf8PathBuf};
use dicom::dictionary_std::tags;
use dicom::object::DefaultDicomObject;
use regex::Regex;

use std::sync::OnceLock;

/// Destination directory and file name for the DICOM file.
pub(crate) struct PypxPath {
Expand Down Expand Up @@ -63,8 +60,8 @@ pub(crate) struct PypxPathElements<'a> {
pub SeriesDescription: &'a str,

// these are not part of the path name, but used in the log path names.
pub StudyInstanceUID: &'a str,
pub SeriesInstanceUID: &'a str,
pub StudyInstanceUID: String,
pub SeriesInstanceUID: String,
}

impl<'a> TryFrom<&'a DefaultDicomObject> for PypxPathElements<'a> {
Expand All @@ -84,23 +81,9 @@ impl<'a> TryFrom<&'a DefaultDicomObject> for PypxPathElements<'a> {
StudyDate: tt(&dcm, tags::STUDY_DATE)?,
SeriesNumber: tt(&dcm, tags::SERIES_NUMBER)?.parse()?,
SeriesDescription: tt(&dcm, tags::SERIES_DESCRIPTION)?,
StudyInstanceUID: tt(&dcm, tags::STUDY_INSTANCE_UID)?,
SeriesInstanceUID: tt(&dcm, tags::SERIES_INSTANCE_UID)?,
StudyInstanceUID: tts(&dcm, tags::STUDY_INSTANCE_UID)?,
SeriesInstanceUID: tts(&dcm, tags::SERIES_INSTANCE_UID)?,
};
Ok(data)
}
}

/// Replace disallowed characters with "_".
/// https://github.com/FNNDSC/pypx/blob/7619c15f4d2303d6d5ca7c255d81d06c7ab8682b/pypx/repack.py#L424
///
/// Also, it's necessary to handle NUL bytes...
fn sanitize<S: AsRef<str>>(s: S) -> String {
let s_nonull = s.as_ref().replace('\0', "");
VALID_CHARS_RE
.get_or_init(|| Regex::new(r#"[^A-Za-z0-9\.\-]+"#).unwrap())
.replace_all(&s_nonull, "_")
.to_string()
}

static VALID_CHARS_RE: OnceLock<Regex> = OnceLock::new();

0 comments on commit 9475dd8

Please sign in to comment.