Skip to content

Commit

Permalink
Adds UOF formats support
Browse files Browse the repository at this point in the history
  • Loading branch information
mmalecot committed Jan 28, 2024
1 parent bc68481 commit 0991fae
Show file tree
Hide file tree
Showing 10 changed files with 136 additions and 70 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ identification.
- Sun XML Writer (SXW)
- Sun XML Writer Global (SGW)
- Sun XML Writer Template (STW)
- Uniform Office Format Text (UOT)
- WordPerfect Document (WPD)

### Executable
Expand Down Expand Up @@ -508,6 +509,7 @@ identification.
- StarImpress (SDD)
- Sun XML Impress (SXI)
- Sun XML Impress Template (STI)
- Uniform Office Format Presentation (UOP)
- WordPerfect Presentations (SHW)

### ROM
Expand Down Expand Up @@ -538,6 +540,7 @@ identification.
- StarCalc (SDC)
- Sun XML Calc (SXC)
- Sun XML Calc Template (STC)
- Uniform Office Format Spreadsheet (UOS)

### Subtitle

Expand Down
2 changes: 2 additions & 0 deletions fixtures/document/sample.uot
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8"?>
<uof:UOF xmlns:uof="http://schemas.uof.org/cn/2003/uof" xmlns:表="http://schemas.uof.org/cn/2003/uof-spreadsheet" xmlns:演="http://schemas.uof.org/cn/2003/uof-slideshow" xmlns:字="http://schemas.uof.org/cn/2003/uof-wordproc" xmlns:图="http://schemas.uof.org/cn/2003/graph" xmlns:dt="uuid:C2F41010-65B3-11d1-A29F-00AA00C14882" xmlns:html="http://www.w3.org/TR/REC-html40" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" uof:language="cn" uof:locID="u0000" uof:version="1.0" uof:mimetype="vnd.uof.text">
2 changes: 2 additions & 0 deletions fixtures/presentation/sample.uop
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<uof:UOF xmlns:uof="http://schemas.uof.org/cn/2003/uof" xmlns:图="http://schemas.uof.org/cn/2003/graph" xmlns:字="http://schemas.uof.org/cn/2003/uof-wordproc" xmlns:演="http://schemas.uof.org/cn/2003/uof-slideshow" xmlns:表="http://schemas.uof.org/cn/2003/uof-spreadsheet" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" uof:language="cn" uof:version="1.0" uof:locID="u0000" uof:mimetype="vnd.uof.presentation">
2 changes: 2 additions & 0 deletions fixtures/spreadsheet/sample.uos
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8"?>
<uof:UOF xmlns:uof="http://schemas.uof.org/cn/2003/uof" xmlns:表="http://schemas.uof.org/cn/2003/uof-spreadsheet" xmlns:演="http://schemas.uof.org/cn/2003/uof-slideshow" xmlns:字="http://schemas.uof.org/cn/2003/uof-wordproc" xmlns:图="http://schemas.uof.org/cn/2003/graph" xmlns:dt="uuid:C2F41010-65B3-11d1-A29F-00AA00C14882" xmlns:html="http://www.w3.org/TR/REC-html40" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" uof:language="cn" uof:locID="u0000" uof:version="1.0" uof:mimetype="vnd.uof.spreadsheet">
21 changes: 21 additions & 0 deletions src/formats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2656,6 +2656,27 @@ formats! {
extension = "mod"
kind = Audio

format = UniformOfficeFormatPresentation
name = "Uniform Office Format Presentation"
short_name = "UOP"
media_type = "application/vnd.uof.presentation"
extension = "uop"
kind = Presentation

format = UniformOfficeFormatSpreadsheet
name = "Uniform Office Format Spreadsheet"
short_name = "UOS"
media_type = "application/vnd.uof.spreadsheet"
extension = "uos"
kind = Spreadsheet

format = UniformOfficeFormatText
name = "Uniform Office Format Text"
short_name = "UOT"
media_type = "application/vnd.uof.text"
extension = "uot"
kind = Document

format = Universal3d
name = "Universal 3D"
short_name = "U3D"
Expand Down
3 changes: 3 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,9 @@ identification.
* [Tiled Tileset XML (TSX)](`FileFormat::TiledTilesetXml`)
* [Timed Text Markup Language (TTML)](`FileFormat::TimedTextMarkupLanguage`)
* [Training Center XML (TCX)](`FileFormat::TrainingCenterXml`)
* [Uniform Office Format Presentation (UOP)](`FileFormat::UniformOfficeFormatPresentation`)
* [Uniform Office Format Spreadsheet (UOS)](`FileFormat::UniformOfficeFormatSpreadsheet`)
* [Uniform Office Format Text (UOT)](`FileFormat::UniformOfficeFormatText`)
* [Universal Subtitle Format (USF)](`FileFormat::UniversalSubtitleFormat`)
* [XML Localization Interchange File Format (XLIFF)](`FileFormat::XmlLocalizationInterchangeFileFormat`)
* [XML Shareable Playlist Format (XSPF)](`FileFormat::XmlShareablePlaylistFormat`)
Expand Down
152 changes: 82 additions & 70 deletions src/readers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -568,8 +568,8 @@ impl crate::FileFormat {
// Determines the start index for searching the buffer.
let start = if total_nread == 0 { OVERLAP_SIZE } else { 0 };

// Checks if the buffer contains the AI file format marker.
if buf[start..OVERLAP_SIZE + nread].find(AI_MARKER).is_some() {
// Checks if the buffer holds the AI file format marker.
if buf[start..OVERLAP_SIZE + nread].holds(AI_MARKER) {
return Ok(Self::AdobeIllustratorArtwork);
}

Expand Down Expand Up @@ -685,75 +685,81 @@ impl crate::FileFormat {

/// Determines file format from a XML reader.
#[cfg(feature = "reader-xml")]
pub(crate) fn from_xml_reader<R: Read + Seek>(reader: R) -> Result<Self> {
// Maximum number of lines that can be processed by the reader.
const LINE_LIMIT: usize = 8;

// Maximum number of bytes that can be processed by the reader (32 KB).
const READ_LIMIT: u64 = 32_768;

// Creates a buffered reader.
let mut reader = BufReader::new(reader);

// Rewinds to the beginning of the stream.
reader.rewind()?;
pub(crate) fn from_xml_reader<R: Read + Seek>(mut reader: R) -> Result<Self> {
// Rewinds to the beginning of the stream plus the size of the XML file format signature.
reader.seek(SeekFrom::Start(5))?;

// Searches the reader for lines indicating the presence of various file formats.
for result in reader.take(READ_LIMIT).lines().take(LINE_LIMIT) {
let line = result?;
if line.contains("<abiword template=\"false\"") {
return Ok(Self::Abiword);
} else if line.contains("<abiword template=\"true\"") {
return Ok(Self::AbiwordTemplate);
} else if line.contains("<amf") {
return Ok(Self::AdditiveManufacturingFormat);
} else if line.contains("<ASX") || line.contains("<asx") {
return Ok(Self::AdvancedStreamRedirector);
} else if line.contains("<feed") {
return Ok(Self::Atom);
} else if line.contains("<COLLADA") || line.contains("<collada") {
return Ok(Self::DigitalAssetExchange);
} else if line.contains("<mxfile") {
return Ok(Self::Drawio);
} else if line.contains("<X3D") || line.contains("<x3d") {
return Ok(Self::Extensible3d);
} else if line.contains("<xsl") {
return Ok(Self::ExtensibleStylesheetLanguageTransformations);
} else if line.contains("<FictionBook") {
return Ok(Self::Fictionbook);
} else if line.contains("<gml") {
return Ok(Self::GeographyMarkupLanguage);
} else if line.contains("<gpx") {
return Ok(Self::GpsExchangeFormat);
} else if line.contains("<kml") {
return Ok(Self::KeyholeMarkupLanguage);
} else if line.contains("<math") {
return Ok(Self::MathematicalMarkupLanguage);
} else if line.contains("<MPD") {
return Ok(Self::MpegDashManifest);
} else if line.contains("<score-partwise") {
return Ok(Self::Musicxml);
} else if line.contains("<rss") {
return Ok(Self::ReallySimpleSyndication);
} else if line.contains("<SVG") || line.contains("<svg") {
return Ok(Self::ScalableVectorGraphics);
} else if line.contains("<soap") {
return Ok(Self::SimpleObjectAccessProtocol);
} else if line.contains("<map") {
return Ok(Self::TiledMapXml);
} else if line.contains("<tileset") {
return Ok(Self::TiledTilesetXml);
} else if line.contains("<tt") && line.contains("xmlns=\"http://www.w3.org/ns/ttml\"") {
return Ok(Self::TimedTextMarkupLanguage);
} else if line.contains("<TrainingCenterDatabase") {
return Ok(Self::TrainingCenterXml);
} else if line.contains("<USFSubtitles") {
return Ok(Self::UniversalSubtitleFormat);
} else if line.contains("<xliff") {
return Ok(Self::XmlLocalizationInterchangeFileFormat);
} else if line.contains("<playlist") {
return Ok(Self::XmlShareablePlaylistFormat);
}
// Creates and fills a buffer.
let mut buf = [0; 8192];
let nread = reader.read(&mut buf)?;

// Checks if the buffer holds markers indicating the presence of various file formats.
if buf[..nread].holds("<abiword template=\"false\"") {
return Ok(Self::Abiword);
} else if buf[..nread].holds("<abiword template=\"true\"") {
return Ok(Self::AbiwordTemplate);
} else if buf[..nread].holds("<amf") {
return Ok(Self::AdditiveManufacturingFormat);
} else if buf[..nread].holds("<ASX") || buf[..nread].holds("<asx") {
return Ok(Self::AdvancedStreamRedirector);
} else if buf[..nread].holds("<feed") {
return Ok(Self::Atom);
} else if buf[..nread].holds("<COLLADA") || buf[..nread].holds("<collada") {
return Ok(Self::DigitalAssetExchange);
} else if buf[..nread].holds("<mxfile") {
return Ok(Self::Drawio);
} else if buf[..nread].holds("<X3D") || buf[..nread].holds("<x3d") {
return Ok(Self::Extensible3d);
} else if buf[..nread].holds("<xsl") {
return Ok(Self::ExtensibleStylesheetLanguageTransformations);
} else if buf[..nread].holds("<FictionBook") {
return Ok(Self::Fictionbook);
} else if buf[..nread].holds("<gml") {
return Ok(Self::GeographyMarkupLanguage);
} else if buf[..nread].holds("<gpx") {
return Ok(Self::GpsExchangeFormat);
} else if buf[..nread].holds("<kml") {
return Ok(Self::KeyholeMarkupLanguage);
} else if buf[..nread].holds("<math") {
return Ok(Self::MathematicalMarkupLanguage);
} else if buf[..nread].holds("<MPD") {
return Ok(Self::MpegDashManifest);
} else if buf[..nread].holds("<score-partwise") {
return Ok(Self::Musicxml);
} else if buf[..nread].holds("<rss") {
return Ok(Self::ReallySimpleSyndication);
} else if buf[..nread].holds("<SVG") || buf[..nread].holds("<svg") {
return Ok(Self::ScalableVectorGraphics);
} else if buf[..nread].holds("<soap") {
return Ok(Self::SimpleObjectAccessProtocol);
} else if buf[..nread].holds("<map") {
return Ok(Self::TiledMapXml);
} else if buf[..nread].holds("<tileset") {
return Ok(Self::TiledTilesetXml);
} else if buf[..nread].holds("<tt")
&& buf[..nread].holds("xmlns=\"http://www.w3.org/ns/ttml\"")
{
return Ok(Self::TimedTextMarkupLanguage);
} else if buf[..nread].holds("<TrainingCenterDatabase") {
return Ok(Self::TrainingCenterXml);
} else if buf[..nread].holds("<uof:UOF")
& buf[..nread].holds("uof:mimetype=\"vnd.uof.presentation\"")
{
return Ok(Self::UniformOfficeFormatPresentation);
} else if buf[..nread].holds("<uof:UOF")
& buf[..nread].holds("uof:mimetype=\"vnd.uof.spreadsheet\"")
{
return Ok(Self::UniformOfficeFormatSpreadsheet);
} else if buf[..nread].holds("<uof:UOF")
& buf[..nread].holds("uof:mimetype=\"vnd.uof.text\"")
{
return Ok(Self::UniformOfficeFormatText);
} else if buf[..nread].holds("<USFSubtitles") {
return Ok(Self::UniversalSubtitleFormat);
} else if buf[..nread].holds("<xliff") {
return Ok(Self::XmlLocalizationInterchangeFileFormat);
} else if buf[..nread].holds("<playlist") {
return Ok(Self::XmlShareablePlaylistFormat);
}

// Returns the default value.
Expand Down Expand Up @@ -1126,6 +1132,12 @@ trait FindBytes: AsRef<[u8]> {
None
}

/// Returns `true` if the data holds the specified byte pattern.
#[inline]
fn holds<P: AsRef<[u8]>>(&self, pat: P) -> bool {
self.find(pat).is_some()
}

/// Searches for the specified byte pattern and returns the index of the last occurrence.
fn rfind<P: AsRef<[u8]>>(&self, pat: P) -> Option<usize> {
// Retrieves references to data and pattern.
Expand Down
7 changes: 7 additions & 0 deletions tests/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,13 @@ fn test_sun_xml_writer_template() {
assert_eq!(fmt, FileFormat::SunXmlWriterTemplate);
}

#[cfg(feature = "reader-xml")]
#[test]
fn test_uniform_office_format_text() {
let fmt = FileFormat::from_file("fixtures/document/sample.uot").unwrap();
assert_eq!(fmt, FileFormat::UniformOfficeFormatText);
}

#[test]
fn test_wordperfect_document_1() {
let fmt = FileFormat::from_file("fixtures/document/sample1.wpd").unwrap();
Expand Down
7 changes: 7 additions & 0 deletions tests/presentation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,13 @@ fn test_sun_xml_impress_template() {
assert_eq!(fmt, FileFormat::SunXmlImpressTemplate);
}

#[cfg(feature = "reader-xml")]
#[test]
fn test_uniform_office_format_presentation() {
let fmt = FileFormat::from_file("fixtures/presentation/sample.uop").unwrap();
assert_eq!(fmt, FileFormat::UniformOfficeFormatPresentation);
}

#[test]
fn test_wordperfect_presentations() {
let fmt = FileFormat::from_file("fixtures/presentation/sample3.shw").unwrap();
Expand Down
7 changes: 7 additions & 0 deletions tests/spreadsheet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,10 @@ fn test_sun_xml_calc_template() {
let fmt = FileFormat::from_file("fixtures/spreadsheet/sample.stc").unwrap();
assert_eq!(fmt, FileFormat::SunXmlCalcTemplate);
}

#[cfg(feature = "reader-xml")]
#[test]
fn test_uniform_office_format_spreadsheet() {
let fmt = FileFormat::from_file("fixtures/spreadsheet/sample.uos").unwrap();
assert_eq!(fmt, FileFormat::UniformOfficeFormatSpreadsheet);
}

0 comments on commit 0991fae

Please sign in to comment.