Skip to content

Commit

Permalink
Add ID3v2 format support and improves MP3/FLAC detection
Browse files Browse the repository at this point in the history
  • Loading branch information
mmalecot committed Oct 31, 2024
1 parent 6cc794f commit 1938699
Show file tree
Hide file tree
Showing 7 changed files with 113 additions and 15 deletions.
15 changes: 15 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
# Version 0.26.0 (unreleased)

## New formats support

- ID3v2 (ID3)

## API

- Add `reader-id3v2` feature

## Improvements

- Improve Free Lossless Audio Codec (FLAC) detection when it contains ID3v2 metadata
- Improve MPEG-1/2 Audio Layer 3 (MP3) detection

# Version 0.25.0 (2024-05-08)

## Fixes
Expand Down
4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "file-format"
version = "0.25.0"
version = "0.26.0"
authors = ["Mickaël Malécot <[email protected]>"]
edition = "2021"
description = "Crate for determining the file format of a given file or stream."
Expand All @@ -21,6 +21,7 @@ reader = [
"reader-cfb",
"reader-ebml",
"reader-exe",
"reader-id3v2",
"reader-mp4",
"reader-pdf",
"reader-rm",
Expand All @@ -33,6 +34,7 @@ reader-asf = []
reader-cfb = []
reader-ebml = []
reader-exe = []
reader-id3v2 = []
reader-mp4 = []
reader-pdf = []
reader-rm = []
Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ Add this to your `Cargo.toml`:

```toml
[dependencies]
file-format = "0.25"
file-format = "0.26"
```

## Crate features
Expand All @@ -68,6 +68,7 @@ identification.
- `reader-cfb` - Enables Compound File Binary (CFB) based file formats detection.
- `reader-ebml` - Enables Extensible Binary Meta Language (EBML) based file formats detection.
- `reader-exe` - Enables MS-DOS Executable (EXE) based file formats detection.
- `reader-id3v2` - Enables ID3v2 (ID3) based file formats detection.
- `reader-mp4` - Enables MPEG-4 Part 14 (MP4) based file formats detection.
- `reader-pdf` - Enables Portable Document Format (PDF) based file formats detection.
- `reader-rm` - Enables RealMedia (RM) based file formats detection.
Expand Down Expand Up @@ -353,6 +354,7 @@ identification.
- Android Binary XML (AXML)
- BitTorrent (Torrent)
- CD Audio (CDA)
- ID3v2 (ID3)
- Meta Information Encapsulation (MIE)
- TASTy
- Windows Shortcut (LNK)
Expand Down
7 changes: 7 additions & 0 deletions src/formats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1005,6 +1005,13 @@ formats! {
extension = "icc"
kind = Other

format = Id3v2
name = "ID3v2"
short_name = "ID3"
media_type = "application/x-id3v2"
extension = "id3"
kind = Metadata

format = ImpulseTrackerModule
name = "Impulse Tracker Module"
short_name = "IT"
Expand Down
7 changes: 3 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,10 @@ identification.
* [Linear Executable (LE)](`FileFormat::LinearExecutable`)
* [New Executable (NE)](`FileFormat::NewExecutable`)
* [Portable Executable (PE)](`FileFormat::PortableExecutable`)
- `reader-id3v2` - Enables [ID3v2 (ID3)](`FileFormat::Id3v2`) based file formats detection.
* [Free Lossless Audio Codec (FLAC)](`FileFormat::FreeLosslessAudioCodec`)
* [MPEG-1/2 Audio Layer 3 (MP3)](`FileFormat::Mpeg12AudioLayer3`)
- `reader-mp4` - Enables [MPEG-4 Part 14 (MP4)](`FileFormat::Mpeg4Part14`) based file formats
detection.
* [MPEG-4 Part 14 Audio (MP4)](`FileFormat::Mpeg4Part14Audio`)
* [MPEG-4 Part 14 Subtitles (MP4)](`FileFormat::Mpeg4Part14Subtitles`)
* [MPEG-4 Part 14 Video (MP4)](`FileFormat::Mpeg4Part14Video`)
- `reader-pdf` - Enables [Portable Document Format (PDF)](`FileFormat::PortableDocumentFormat`)
based file formats detection.
* [Adobe Illustrator Artwork (AI)](`FileFormat::AdobeIllustratorArtwork`)
Expand Down
69 changes: 69 additions & 0 deletions src/readers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ impl crate::FileFormat {
Self::ExtensibleBinaryMetaLanguage => Self::from_ebml_reader(reader)?,
#[cfg(feature = "reader-exe")]
Self::MsDosExecutable => Self::from_exe_reader(reader)?,
#[cfg(feature = "reader-id3v2")]
Self::Id3v2 => Self::from_id3v2_reader(reader)?,
#[cfg(feature = "reader-mp4")]
Self::Mpeg4Part14 => Self::from_mp4_reader(reader)?,
#[cfg(feature = "reader-pdf")]
Expand Down Expand Up @@ -460,6 +462,73 @@ impl crate::FileFormat {
Ok(Self::MsDosExecutable)
}

/// Determines file format from an ID3v2 reader.
#[cfg(feature = "reader-id3v2")]
pub(crate) fn from_id3v2_reader<R: Read + Seek>(mut reader: R) -> Result<Self> {
// Loops while in ID3v2 segment.
let mut offset = 0;
loop {
// Reads first 10 bytes.
reader.seek(SeekFrom::Start(offset))?;
let buf = reader.read_bytes(10)?;

// Checks for ID3 magic bytes.
if &buf[..3] != b"ID3" {
break;
}

// Decodes tag size.
let tag_size = ((buf[6] as u64 & 0x7F) << 21)
| ((buf[7] as u64 & 0x7F) << 14)
| ((buf[8] as u64 & 0x7F) << 7)
| (buf[9] as u64 & 0x7F);

// Checks for extended header flag.
let flags = buf[5];
let extended_header_present = (flags & 0x40) != 0;
let footer_present = (flags & 0x10) != 0;

// Calculates next offset.
let mut next_offset = offset + 10 + tag_size;

// Skips extended header if present.
if extended_header_present {
next_offset += reader.read_u32_be()? as u64;
}

// Adds footer size if present.
if footer_present {
next_offset += 10;
}

// Sets new offset for next potential tag.
offset = next_offset;
}

// Skips ID3v2 segment.
reader.seek(SeekFrom::Start(offset))?;

// Checks if the file contains the FLAC file format signature.
let buf = reader.read_bytes(4)?;
if buf.eq(b"fLaC") {
return Ok(Self::FreeLosslessAudioCodec);
}

// Checks if the file contains one of the MP3 file format signature.
match &buf[..2] {
b"\xFF\xE2" => return Ok(Self::Mpeg12AudioLayer3),
b"\xFF\xE3" => return Ok(Self::Mpeg12AudioLayer3),
b"\xFF\xF2" => return Ok(Self::Mpeg12AudioLayer3),
b"\xFF\xF3" => return Ok(Self::Mpeg12AudioLayer3),
b"\xFF\xFA" => return Ok(Self::Mpeg12AudioLayer3),
b"\xFF\xFB" => return Ok(Self::Mpeg12AudioLayer3),
_ => {}
}

// Returns the default value.
Ok(Self::Id3v2)
}

/// Determines file format from a MP4 reader.
#[cfg(feature = "reader-mp4")]
pub(crate) fn from_mp4_reader<R: Read + Seek>(reader: R) -> Result<Self> {
Expand Down
22 changes: 13 additions & 9 deletions src/signatures.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1097,6 +1097,11 @@ signatures! {
format = IccProfile
value = b"acsp" offset = 36

format = Id3v2
value = b"ID3\x02"
value = b"ID3\x03"
value = b"ID3\x04"

format = ImpulseTrackerModule
value = b"IMPM"

Expand Down Expand Up @@ -1316,15 +1321,6 @@ signatures! {
format = JpegExtendedRange
value = b"\x49\x49\xBC"

format = Mpeg12AudioLayer3
value = b"ID3"
value = b"\xFF\xE2"
value = b"\xFF\xE3"
value = b"\xFF\xF2"
value = b"\xFF\xF3"
value = b"\xFF\xFA"
value = b"\xFF\xFB"

format = Mtv
value = b"AMV"

Expand Down Expand Up @@ -1422,6 +1418,14 @@ signatures! {
value = b"\xFF\xFC"
value = b"\xFF\xFD"

format = Mpeg12AudioLayer3
value = b"\xFF\xE2"
value = b"\xFF\xE3"
value = b"\xFF\xF2"
value = b"\xFF\xF3"
value = b"\xFF\xFA"
value = b"\xFF\xFB"

format = Mpeg2TransportStream
value = b"\x47", b"\x47" offset = 188

Expand Down

0 comments on commit 1938699

Please sign in to comment.