diff --git a/CHANGELOG.md b/CHANGELOG.md index a9d7e9c..6590112 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,18 @@ +# Version 0.26.0 (unreleased) + +## New formats support + +- ID3v2 (ID3) + +## API + +- Add `reader-id3v2` feature + +## Improvements + +- Improve Free Lossless Audio Codec (FLAC) detection when it contains ID3v2 metadata +- Improve MPEG-1/2 Audio Layer 3 (MP3) detection + # Version 0.25.0 (2024-05-08) ## Fixes diff --git a/Cargo.toml b/Cargo.toml index 75badfb..d72d3e7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "file-format" -version = "0.25.0" +version = "0.26.0" authors = ["Mickaël Malécot "] edition = "2021" description = "Crate for determining the file format of a given file or stream." @@ -21,6 +21,7 @@ reader = [ "reader-cfb", "reader-ebml", "reader-exe", + "reader-id3v2", "reader-mp4", "reader-pdf", "reader-rm", @@ -33,6 +34,7 @@ reader-asf = [] reader-cfb = [] reader-ebml = [] reader-exe = [] +reader-id3v2 = [] reader-mp4 = [] reader-pdf = [] reader-rm = [] diff --git a/README.md b/README.md index afa668d..c53b0bd 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ Add this to your `Cargo.toml`: ```toml [dependencies] -file-format = "0.25" +file-format = "0.26" ``` ## Crate features @@ -68,6 +68,7 @@ identification. - `reader-cfb` - Enables Compound File Binary (CFB) based file formats detection. - `reader-ebml` - Enables Extensible Binary Meta Language (EBML) based file formats detection. - `reader-exe` - Enables MS-DOS Executable (EXE) based file formats detection. +- `reader-id3v2` - Enables ID3v2 (ID3) based file formats detection. - `reader-mp4` - Enables MPEG-4 Part 14 (MP4) based file formats detection. - `reader-pdf` - Enables Portable Document Format (PDF) based file formats detection. - `reader-rm` - Enables RealMedia (RM) based file formats detection. @@ -353,6 +354,7 @@ identification. - Android Binary XML (AXML) - BitTorrent (Torrent) - CD Audio (CDA) +- ID3v2 (ID3) - Meta Information Encapsulation (MIE) - TASTy - Windows Shortcut (LNK) diff --git a/src/formats.rs b/src/formats.rs index 21862e5..190436a 100644 --- a/src/formats.rs +++ b/src/formats.rs @@ -1005,6 +1005,13 @@ formats! { extension = "icc" kind = Other + format = Id3v2 + name = "ID3v2" + short_name = "ID3" + media_type = "application/x-id3v2" + extension = "id3" + kind = Metadata + format = ImpulseTrackerModule name = "Impulse Tracker Module" short_name = "IT" diff --git a/src/lib.rs b/src/lib.rs index f3616c3..5282201 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -99,11 +99,10 @@ identification. * [Linear Executable (LE)](`FileFormat::LinearExecutable`) * [New Executable (NE)](`FileFormat::NewExecutable`) * [Portable Executable (PE)](`FileFormat::PortableExecutable`) +- `reader-id3v2` - Enables [ID3v2 (ID3)](`FileFormat::Id3v2`) based file formats detection. + * [Free Lossless Audio Codec (FLAC)](`FileFormat::FreeLosslessAudioCodec`) + * [MPEG-1/2 Audio Layer 3 (MP3)](`FileFormat::Mpeg12AudioLayer3`) - `reader-mp4` - Enables [MPEG-4 Part 14 (MP4)](`FileFormat::Mpeg4Part14`) based file formats - detection. - * [MPEG-4 Part 14 Audio (MP4)](`FileFormat::Mpeg4Part14Audio`) - * [MPEG-4 Part 14 Subtitles (MP4)](`FileFormat::Mpeg4Part14Subtitles`) - * [MPEG-4 Part 14 Video (MP4)](`FileFormat::Mpeg4Part14Video`) - `reader-pdf` - Enables [Portable Document Format (PDF)](`FileFormat::PortableDocumentFormat`) based file formats detection. * [Adobe Illustrator Artwork (AI)](`FileFormat::AdobeIllustratorArtwork`) diff --git a/src/readers.rs b/src/readers.rs index 5f02bf8..a3fdd8b 100644 --- a/src/readers.rs +++ b/src/readers.rs @@ -18,6 +18,8 @@ impl crate::FileFormat { Self::ExtensibleBinaryMetaLanguage => Self::from_ebml_reader(reader)?, #[cfg(feature = "reader-exe")] Self::MsDosExecutable => Self::from_exe_reader(reader)?, + #[cfg(feature = "reader-id3v2")] + Self::Id3v2 => Self::from_id3v2_reader(reader)?, #[cfg(feature = "reader-mp4")] Self::Mpeg4Part14 => Self::from_mp4_reader(reader)?, #[cfg(feature = "reader-pdf")] @@ -460,6 +462,73 @@ impl crate::FileFormat { Ok(Self::MsDosExecutable) } + /// Determines file format from an ID3v2 reader. + #[cfg(feature = "reader-id3v2")] + pub(crate) fn from_id3v2_reader(mut reader: R) -> Result { + // Loops while in ID3v2 segment. + let mut offset = 0; + loop { + // Reads first 10 bytes. + reader.seek(SeekFrom::Start(offset))?; + let buf = reader.read_bytes(10)?; + + // Checks for ID3 magic bytes. + if &buf[..3] != b"ID3" { + break; + } + + // Decodes tag size. + let tag_size = ((buf[6] as u64 & 0x7F) << 21) + | ((buf[7] as u64 & 0x7F) << 14) + | ((buf[8] as u64 & 0x7F) << 7) + | (buf[9] as u64 & 0x7F); + + // Checks for extended header flag. + let flags = buf[5]; + let extended_header_present = (flags & 0x40) != 0; + let footer_present = (flags & 0x10) != 0; + + // Calculates next offset. + let mut next_offset = offset + 10 + tag_size; + + // Skips extended header if present. + if extended_header_present { + next_offset += reader.read_u32_be()? as u64; + } + + // Adds footer size if present. + if footer_present { + next_offset += 10; + } + + // Sets new offset for next potential tag. + offset = next_offset; + } + + // Skips ID3v2 segment. + reader.seek(SeekFrom::Start(offset))?; + + // Checks if the file contains the FLAC file format signature. + let buf = reader.read_bytes(4)?; + if buf.eq(b"fLaC") { + return Ok(Self::FreeLosslessAudioCodec); + } + + // Checks if the file contains one of the MP3 file format signature. + match &buf[..2] { + b"\xFF\xE2" => return Ok(Self::Mpeg12AudioLayer3), + b"\xFF\xE3" => return Ok(Self::Mpeg12AudioLayer3), + b"\xFF\xF2" => return Ok(Self::Mpeg12AudioLayer3), + b"\xFF\xF3" => return Ok(Self::Mpeg12AudioLayer3), + b"\xFF\xFA" => return Ok(Self::Mpeg12AudioLayer3), + b"\xFF\xFB" => return Ok(Self::Mpeg12AudioLayer3), + _ => {} + } + + // Returns the default value. + Ok(Self::Id3v2) + } + /// Determines file format from a MP4 reader. #[cfg(feature = "reader-mp4")] pub(crate) fn from_mp4_reader(reader: R) -> Result { diff --git a/src/signatures.rs b/src/signatures.rs index 1038c67..3af43fc 100644 --- a/src/signatures.rs +++ b/src/signatures.rs @@ -1097,6 +1097,11 @@ signatures! { format = IccProfile value = b"acsp" offset = 36 + format = Id3v2 + value = b"ID3\x02" + value = b"ID3\x03" + value = b"ID3\x04" + format = ImpulseTrackerModule value = b"IMPM" @@ -1316,15 +1321,6 @@ signatures! { format = JpegExtendedRange value = b"\x49\x49\xBC" - format = Mpeg12AudioLayer3 - value = b"ID3" - value = b"\xFF\xE2" - value = b"\xFF\xE3" - value = b"\xFF\xF2" - value = b"\xFF\xF3" - value = b"\xFF\xFA" - value = b"\xFF\xFB" - format = Mtv value = b"AMV" @@ -1422,6 +1418,14 @@ signatures! { value = b"\xFF\xFC" value = b"\xFF\xFD" + format = Mpeg12AudioLayer3 + value = b"\xFF\xE2" + value = b"\xFF\xE3" + value = b"\xFF\xF2" + value = b"\xFF\xF3" + value = b"\xFF\xFA" + value = b"\xFF\xFB" + format = Mpeg2TransportStream value = b"\x47", b"\x47" offset = 188