From ef0ddd45ea67b8545cefb7d428a708d71856272e Mon Sep 17 00:00:00 2001 From: max-ishere <47008271+max-ishere@users.noreply.github.com> Date: Fri, 26 Jan 2024 15:57:30 +0200 Subject: [PATCH] feat(pff2): Added font parser --- Cargo.lock | 314 ++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 15 ++ examples/pff2.rs | 35 +++++ src/lib.rs | 7 + src/pff2.rs | 350 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 721 insertions(+) create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 examples/pff2.rs create mode 100644 src/pff2.rs diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..288d7a0 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,314 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "anstream" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e2e1ebcb11de5c03c67de28a7df593d32191b44939c482e97702baaaa6ab6a5" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87" + +[[package]] +name = "anstyle-parse" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" +dependencies = [ + "anstyle", + "windows-sys", +] + +[[package]] +name = "anyhow" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "080e9890a082662b09c1ad45f567faeeb47f22b5fb23895fbe1e651e718e25ca" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clap" +version = "4.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e578d6ec4194633722ccf9544794b71b1385c3c027efe0c55db226fc880865c" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4df4df40ec50c46000231c914968278b1eb05098cf8f1b3a518a95030e71d1c7" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1" + +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "memchr" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "proc-macro2" +version = "1.0.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "syn" +version = "2.0.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "test-case" +version = "3.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb2550dd13afcd286853192af8601920d959b14c401fcece38071d53bf0768a8" +dependencies = [ + "test-case-macros", +] + +[[package]] +name = "test-case-core" +version = "3.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adcb7fd841cd518e279be3d5a3eb0636409487998a4aff22f3de87b81e88384f" +dependencies = [ + "cfg-if", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "test-case-macros" +version = "3.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c89e72a01ed4c579669add59014b9a524d609c0c88c6a585ce37485879f6ffb" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "test-case-core", +] + +[[package]] +name = "theme-parser" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "nom", + "test-case", + "thiserror", +] + +[[package]] +name = "thiserror" +version = "1.0.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..519f06c --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "theme-parser" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +anyhow = "1.0.79" +clap = { version = "4.4.18", features = ["derive"] } +nom = "7.1.3" +thiserror = "1.0.56" + +[dev-dependencies] +test-case = "3.3.1" diff --git a/examples/pff2.rs b/examples/pff2.rs new file mode 100644 index 0000000..35fd41f --- /dev/null +++ b/examples/pff2.rs @@ -0,0 +1,35 @@ +//! A minimal font.pf2 parser impl that prints the parsed Rust struct + +use std::fs::read; + +use args::Args; +use clap::Parser as _; +use theme_parser::pff2::Parser; + +mod args { + use std::path::PathBuf; + + use clap::Parser; + + #[derive(Parser)] + pub struct Args { + #[clap(long, short = 'f')] + pub font_file: PathBuf, + } +} + +fn main() -> anyhow::Result<()> { + let args = Args::parse(); + + let data = read(args.font_file)?; + let font = Parser::parse(&data).unwrap().validate(); + + let print = format!("{font:#?}") + .split("\n") + .take(100) + .fold(String::new(), |print, line| print + line + "\n"); + + println!("{print}"); + + Ok(()) +} diff --git a/src/lib.rs b/src/lib.rs index e6ff8a8..ae7418d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,12 @@ +use std::rc::Rc; + #[cfg(test)] #[macro_use] extern crate test_case; +extern crate thiserror; + +pub mod pff2; pub mod theme_txt; + +pub type OwnedSlice = Rc; diff --git a/src/pff2.rs b/src/pff2.rs new file mode 100644 index 0000000..00991a8 --- /dev/null +++ b/src/pff2.rs @@ -0,0 +1,350 @@ +use core::fmt::Debug; +use std::{marker::PhantomData, rc::Rc, string::FromUtf8Error}; + +use nom::{InputLength, ToUsize}; +use thiserror::Error; + +use crate::OwnedSlice; + +pub type Font = Pff2; +pub type Parser = Pff2; + +#[allow(private_bounds)] +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Pff2 { + pub name: String, + pub family: String, + pub point_size: u16, + pub weight: String, + pub max_char_width: u16, + pub max_char_height: u16, + pub ascent: u16, + pub descent: u16, + pub leading: u16, + pub glyphs: OwnedSlice<[Glyph]>, + + _validation: PhantomData, +} + +impl Default for Pff2 { + fn default() -> Self { + Self { + name: Default::default(), + family: Default::default(), + point_size: Default::default(), + weight: Default::default(), + max_char_width: Default::default(), + max_char_height: Default::default(), + ascent: Default::default(), + descent: Default::default(), + leading: Default::default(), + + glyphs: OwnedSlice::new([]), + + _validation: Default::default(), + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Glyph { + pub code: u32, + + pub width: u16, + pub height: u16, + pub x_offset: u16, + pub y_offset: u16, + pub device_width: u16, + + pub bitmap: OwnedSlice<[u8]>, +} + +impl Parser { + const MAGIC: &'static [u8; 4 + 4 + 4] = b"FILE\0\0\0\x04PFF2"; + + pub fn parse(input: &[u8]) -> Result { + let input_for_data_section = input; // Save this because data offsets are absolute + + let (magic, mut input) = input.split_at(4 + 4 + 4); + // This is technically a section, but because its always first and same content + // we just compare it in one go. + if magic != Self::MAGIC { + return Err(ParserError::BadMagicBytes); + } + + let mut font = Self::default(); + let mut char_indexes = Vec::new(); + + 'parsing: while input.input_len() != 0 { + // Prevents shadowing input. We need it to maintain parsing state + input = 'input: { + let (section, length, input) = Self::parse_section_header(input)?; + + let Ok(section) = SectionName::try_from(section) else { + break 'input &input[length..]; + }; + + use SectionName::*; + match section { + FontName => font.name = Self::parse_string(&input[..length])?, + Family => font.family = Self::parse_string(&input[..length])?, + PointSize => font.point_size = Self::parse_u16(&input[..length])?, + Weight => font.weight = Self::parse_string(&input[..length])?, + MaxCharWidth => font.max_char_width = Self::parse_u16(&input[..length])?, + MaxCharHeight => font.max_char_height = Self::parse_u16(&input[..length])?, + Ascent => font.ascent = Self::parse_u16(&input[..length])?, + Descent => font.descent = Self::parse_u16(&input[..length])?, + CharIndex => char_indexes = Self::parse_char_indexes(&input[..length])?, + Data => { + font.glyphs = + Self::parse_data_section(char_indexes, &input_for_data_section)?; + break 'parsing; + } + } + + if length < input.len() { + &input[length..] + } else { + break 'parsing; + } + } + } + + Ok(font) + } + + fn parse_section_header(input: &[u8]) -> Result<([u8; 4], usize, &[u8]), ParserError> { + let (section, input) = input.split_at(4); + let section: [u8; 4] = section + .try_into() + .map_err(|_| ParserError::InsufficientHeaderBytes)?; + + let (length, input) = input.split_at(4); + + let length = u32::from_be_bytes( + length + .try_into() + .map_err(|_| ParserError::InsufficientHeaderBytes)?, + ) + .to_usize(); + + Ok((section, length, input)) + } + + fn parse_string(input: &[u8]) -> Result { + if input.len() == 0 { + return Ok(String::new()); + } + + if input.last() == Some(&0) { + return String::from_utf8(input[..input.len() - 1].to_vec()); + } + + String::from_utf8(input[..input.len()].to_vec()) + } + + fn parse_u16(input: &[u8]) -> Result { + if input.len() != 2 { + return Err(ParserError::InvalidU16Length(input.len())); + } + + Ok(u16::from_be_bytes([input[0], input[1]])) + } + + pub fn validate(self) -> Result { + use FontValidationError::*; + if self.name.is_empty() { + return Err(EmptyName); + } + + for (prop, err) in [ + (self.max_char_width, ZeroMaxCharWidth), + (self.max_char_height, ZeroMaxCharHeight), + (self.ascent, ZeroAscent), + (self.descent, ZeroDescent), + ] { + if prop == 0 { + return Err(err); + } + } + + if self.glyphs.len() == 0 { + return Err(NoGlyphs); + } + + Ok(Font { + name: self.name, + family: self.family, + point_size: self.point_size, + weight: self.weight, + max_char_width: self.max_char_width, + max_char_height: self.max_char_height, + ascent: self.ascent, + descent: self.descent, + leading: self.leading, + glyphs: self.glyphs, + _validation: PhantomData, + }) + } + + fn parse_char_indexes(input: &[u8]) -> Result, ParserError> { + const ALLIGNMENT: usize = 4 + 1 + 4; + + if input.len() % ALLIGNMENT != 0 { + return Err(ParserError::InvalidCharacterIndex); + } + + Ok(input + .chunks(ALLIGNMENT) + .into_iter() + .map(|chunk| CharIndex { + code: u32::from_be_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]), + // skipp [4], it's a `storage_flags`, and GRUB never uses that field anyway + offset: u32::from_be_bytes([chunk[5], chunk[6], chunk[7], chunk[8]]).to_usize(), + }) + .collect()) + } + + fn parse_data_section( + indexes: Vec, + input: &[u8], + ) -> Result, ParserError> { + let mut glyphs = Vec::with_capacity(input.len()); + + for index in indexes { + let offset = index.offset; + + // make sure there are enough bytes to read glyph data + if offset + 4 > input.len() { + continue; + } + + let width = Self::parse_u16(&input[offset..offset + 2])?; + let height = Self::parse_u16(&input[offset + 2..offset + 4])?; + + let bitmap_len = (width * height + 7) / 8; + + if offset + 12 + bitmap_len as usize > input.len() { + continue; + } + + let glyph = Glyph { + code: index.code, + width, + height, + x_offset: Self::parse_u16(&input[offset + 6..offset + 8])?, + y_offset: Self::parse_u16(&input[offset + 8..offset + 10])?, + device_width: Self::parse_u16(&input[offset + 10..offset + 12])?, + bitmap: Rc::from(&input[offset + 12..offset + 12 + bitmap_len as usize]), + }; + + glyphs.push(glyph); + } + + Ok(Rc::from(glyphs.as_slice())) + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +enum SectionName { + FontName, + Family, + PointSize, + Weight, + MaxCharWidth, + MaxCharHeight, + Ascent, + Descent, + CharIndex, + Data, +} + +struct CharIndex { + pub code: u32, + pub offset: usize, +} + +#[derive(Error, Debug, Clone, PartialEq, Eq)] +pub enum ParserError { + #[error("Bad PFF2 magic bytes")] + BadMagicBytes, + + #[error("Insufficient section header bytes")] + InsufficientHeaderBytes, + + #[error("Insufficient section length bytes")] + InsufficientLengthBytes, + + #[error("Invalid UTF-8 string: {0}")] + FromUtf8Error(#[from] FromUtf8Error), + + #[error("A u16 is not encoded using exactly 2 bytes, instead: {0}b")] + InvalidU16Length(usize), + + #[error("Invalid data in the character index")] + InvalidCharacterIndex, +} + +#[derive(Error, Debug, Clone, PartialEq, Eq)] +pub enum FontValidationError { + #[error("Font has no name")] + EmptyName, + #[error("Font doesnt define maximum glyph width")] + ZeroMaxCharWidth, + #[error("Font doesnt define maximum glyph height")] + ZeroMaxCharHeight, + #[error("Font doesnt define char ascent")] + ZeroAscent, + #[error("Font doesnt define char descent")] + ZeroDescent, + #[error("Font contains no glyphs")] + NoGlyphs, +} + +impl TryFrom<[u8; 4]> for SectionName { + /// Unknown section names are usually ignored so no point returning them to the caller. + type Error = (); + + /// Converts the byte string into a known section name. + /// The [`Err(())`] indicates that this section name is unknown. + fn try_from(bytes: [u8; 4]) -> Result { + match bytes.as_ref() { + b"NAME" => Ok(SectionName::FontName), + b"FAMI" => Ok(SectionName::Family), + b"PTSZ" => Ok(SectionName::PointSize), + b"WEIG" => Ok(SectionName::Weight), + b"MAXW" => Ok(SectionName::MaxCharWidth), + b"MAXH" => Ok(SectionName::MaxCharHeight), + b"ASCE" => Ok(SectionName::Ascent), + b"DESC" => Ok(SectionName::Descent), + b"CHIX" => Ok(SectionName::CharIndex), + b"DATA" => Ok(SectionName::Data), + _ => Err(()), + } + } +} + +impl Default for Glyph { + fn default() -> Self { + Self { + code: Default::default(), + + width: Default::default(), + height: Default::default(), + x_offset: Default::default(), + y_offset: Default::default(), + device_width: Default::default(), + + bitmap: OwnedSlice::new([]), + } + } +} +trait FontValidation: Clone + PartialEq + Eq + Debug {} + +#[derive(Debug, Default, Clone, PartialEq, Eq)] +pub struct Validated; +impl FontValidation for Validated {} + +#[derive(Debug, Default, Clone, PartialEq, Eq)] +pub struct Unchecked; +impl FontValidation for Unchecked {}