From d3bd83ecd1c74a441f65d8ad38ed854e57f246f6 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Wed, 8 Jun 2022 20:41:12 +0200 Subject: [PATCH] Tidy up API surface and docs --- src/cff/dict.rs | 6 +- src/cff/mod.rs | 42 ++--- src/lib.rs | 406 ++++++++++++++++++++++++++---------------------- 3 files changed, 242 insertions(+), 212 deletions(-) diff --git a/src/cff/dict.rs b/src/cff/dict.rs index c559be24..93968b90 100644 --- a/src/cff/dict.rs +++ b/src/cff/dict.rs @@ -33,7 +33,7 @@ impl<'a> Dict<'a> { } } - pub fn keep(&mut self, ops: &[Op]) { + pub fn retain(&mut self, ops: &[Op]) { self.0.retain(|pair| ops.contains(&pair.op)); } @@ -175,7 +175,7 @@ impl<'a> Structure<'a> for Operand<'a> { fn write(&self, w: &mut Writer) { match self { Self::Int(int) => { - // TODO: More compact. + // TODO: Select most compact encoding. w.write::(29); w.write::(*int); } @@ -192,7 +192,6 @@ impl<'a> Structure<'a> for Operand<'a> { } /// Top DICT operators. -#[allow(unused)] pub mod top { use super::Op; @@ -237,7 +236,6 @@ pub mod top { pub const FONT_BBOX: Op = Op(5, 0); pub const STROKE_WIDTH: Op = Op(12, 8); pub const CHARSET: Op = Op(15, 0); - pub const ENCODING: Op = Op(16, 0); pub const CHAR_STRINGS: Op = Op(17, 0); pub const PRIVATE: Op = Op(18, 0); pub const POST_SCRIPT: Op = Op(12, 21); diff --git a/src/cff/mod.rs b/src/cff/mod.rs index 37d3feba..c54089cd 100644 --- a/src/cff/mod.rs +++ b/src/cff/mod.rs @@ -21,9 +21,15 @@ struct Table<'a> { cid: Option>, } -/// An opaque charset. +/// A charset. struct Charset<'a>(Opaque<'a>); +/// Data specific to Private DICTs. +struct PrivateData<'a> { + dict: Dict<'a>, + subrs: Option>>, +} + /// Data specific to CID-keyed fonts. struct CidData<'a> { array: Index>, @@ -34,12 +40,6 @@ struct CidData<'a> { /// An FD Select dat structure. struct FdSelect<'a>(Cow<'a, [u8]>); -/// Data specific to Private DICTs. -struct PrivateData<'a> { - dict: Dict<'a>, - subrs: Option>>, -} - /// Recorded offsets that will be written into DICTs. struct Offsets { char_strings: usize, @@ -48,6 +48,12 @@ struct Offsets { cid: Option, } +/// Offsets specific to Private DICTs. +struct PrivateOffsets { + dict: Range, + subrs: Option, +} + /// Offsets specific to CID-keyed fonts. struct CidOffsets { array: usize, @@ -55,12 +61,6 @@ struct CidOffsets { private: Vec, } -/// Offsets specific to Private DICTs. -struct PrivateOffsets { - dict: Range, - subrs: Option, -} - /// Subset the CFF table by removing glyph data for unused glyphs. pub(crate) fn subset(ctx: &mut Context) -> Result<()> { let cff = ctx.expect_table(Tag::CFF)?; @@ -74,15 +74,15 @@ pub(crate) fn subset(ctx: &mut Context) -> Result<()> { } // Parse CFF table. - let mut table = read_table(ctx, cff)?; + let mut table = read_cff_table(ctx, cff)?; // Subset the char strings. subset_char_strings(ctx, &mut table.char_strings)?; // Subset Top and Private DICT. - table.top.keep(top::KEEP); + table.top.retain(top::KEEP); if let Some(private) = &mut table.private { - private.dict.keep(private::KEEP); + private.dict.retain(private::KEEP); } // Subset data specific to CID-keyed fonts. @@ -90,11 +90,11 @@ pub(crate) fn subset(ctx: &mut Context) -> Result<()> { subset_font_dicts(ctx, cid)?; for dict in cid.array.iter_mut() { - dict.keep(top::KEEP); + dict.retain(top::KEEP); } for private in &mut cid.private { - private.dict.keep(private::KEEP); + private.dict.retain(private::KEEP); } } @@ -107,7 +107,7 @@ pub(crate) fn subset(ctx: &mut Context) -> Result<()> { for _ in 0 .. 2 { let mut w = Writer::new(); insert_offsets(&mut table, &offsets); - write_table(&mut w, &table, &mut offsets); + write_cff_table(&mut w, &table, &mut offsets); sub_cff = w.finish(); } @@ -152,7 +152,7 @@ fn subset_font_dicts(ctx: &Context, cid: &mut CidData) -> Result<()> { } /// Parse a CFF table. -fn read_table<'a>(ctx: &Context, cff: &'a [u8]) -> Result> { +fn read_cff_table<'a>(ctx: &Context, cff: &'a [u8]) -> Result> { // Skip header. let mut r = Reader::new(cff); r.read::()?; @@ -207,7 +207,7 @@ fn read_table<'a>(ctx: &Context, cff: &'a [u8]) -> Result> { } /// Write the a new CFF table. -fn write_table(w: &mut Writer, table: &Table, offsets: &mut Offsets) { +fn write_cff_table(w: &mut Writer, table: &Table, offsets: &mut Offsets) { // Write header. w.write::(1); w.write::(0); diff --git a/src/lib.rs b/src/lib.rs index 55743b99..075ccfa3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,137 +1,50 @@ -//! Reduce the size and coverage of OpenType fonts. +/*! +Reduce the size and coverage of OpenType fonts. -#![deny(unsafe_code)] -#![deny(missing_docs)] - -mod cff; -mod glyf; -mod stream; - -use std::borrow::Cow; -use std::fmt::{self, Debug, Display, Formatter}; - -use crate::stream::{Reader, Structure, Writer}; - -/// Parse a font face from OpenType data. -/// -/// The `index` is only relevant if the data contains a font collection (`.ttc` -/// or `.otc` file). Otherwise, it should be 0. -/// -/// Supports only raw OpenType fonts and collections. If you have a WOFF file or -/// get the tables from somewhere else, you can implement [`Face`] yourself. -pub fn parse(data: &[u8], index: u32) -> Result { - struct Parsed<'a> { - data: &'a [u8], - records: Vec, - } - - impl Face for Parsed<'_> { - fn table(&self, tag: Tag) -> Option<&[u8]> { - let i = self.records.binary_search_by(|record| record.tag.cmp(&tag)).ok()?; - let record = self.records.get(i)?; - let start = record.offset as usize; - let end = start + (record.length as usize); - self.data.get(start .. end) - } - } - - let mut r = Reader::new(data); - let mut kind = r.read::()?; - - // Parse font collection header if necessary. - if kind == FontKind::Collection { - let offset = u32::read_at(data, 12 + 4 * (index as usize))?; - let subdata = data.get(offset as usize ..).ok_or(Error::InvalidOffset)?; - r = Reader::new(subdata); - kind = r.read::()?; - if kind == FontKind::Collection { - return Err(Error::UnknownKind); - } - } - - // Read number of table records. - let count = r.read::()?; - r.read::()?; - r.read::()?; - r.read::()?; - - // Read table records. - let mut records = vec![]; - for _ in 0 .. count { - records.push(r.read::()?); - } +Supports both TrueType and CFF outlines. - Ok(Parsed { data, records }) -} +# Example +In the example below, we remove all glyphs except the ones with IDs 68, 69, 70. +Those correspond to the letters 'a', 'b' and 'c'. -/// A font face with OpenType tables. -pub trait Face { - /// Retrieve the data for the given table. - fn table(&self, tag: Tag) -> Option<&[u8]>; -} +``` +use subsetter::{subset, Profile}; -/// A 4-byte OpenType tag. -#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] -pub struct Tag(pub [u8; 4]); +# fn main() -> Result<(), Box> { +// Read the raw font data. +let data = std::fs::read("fonts/NotoSans-Regular.ttf")?; -#[allow(unused)] -impl Tag { - // General tables. - const CMAP: Self = Self(*b"cmap"); - const HEAD: Self = Self(*b"head"); - const HHEA: Self = Self(*b"hhea"); - const HMTX: Self = Self(*b"hmtx"); - const MAXP: Self = Self(*b"maxp"); - const NAME: Self = Self(*b"name"); - const OS2: Self = Self(*b"OS/2"); - const POST: Self = Self(*b"post"); +// Keep only three glyphs and the OpenType tables +// required for embedding the font in a PDF file. +let glyphs = &[68, 69, 70]; +let profile = Profile::pdf(glyphs); +let sub = subset(&data, 0, profile)?; - // TrueType. - const GLYF: Self = Self(*b"glyf"); - const LOCA: Self = Self(*b"loca"); - const PREP: Self = Self(*b"prep"); - const FPGM: Self = Self(*b"fpgm"); - const CVT: Self = Self(*b"cvt "); - const GASP: Self = Self(*b"gasp"); +// Write the resulting file. +std::fs::write("target/Noto-Small.ttf", sub)?; +# Ok(()) +# } +``` - // CFF. - const CFF: Self = Self(*b"CFF "); - const CFF2: Self = Self(*b"CFF2"); - const VORG: Self = Self(*b"VORG"); +Notably, this crate does not really remove glyphs, just their outlines. This +means that you don't have to worry about changed glyphs IDs. However, it also +means that the resulting font won't always be as small as possible. - // Bitmap and color fonts. - const EBDT: Self = Self(*b"EBDT"); - const EBLC: Self = Self(*b"EBLC"); - const EBSC: Self = Self(*b"EBSC"); - const COLR: Self = Self(*b"COLR"); - const CPAL: Self = Self(*b"CPAL"); - const CBDT: Self = Self(*b"CBDT"); - const CBLC: Self = Self(*b"CBLC"); - const SBIX: Self = Self(*b"sbix"); - const SVG: Self = Self(*b"SVG "); -} +In this example, the original font was 375 KB while the resulting font is 78 KB. +There is still some possiblity for improvement through better subsetting. +*/ -impl Structure<'_> for Tag { - fn read(r: &mut Reader) -> Result { - r.read::<[u8; 4]>().map(Self) - } +#![deny(unsafe_code)] +#![deny(missing_docs)] - fn write(&self, w: &mut Writer) { - w.write::<[u8; 4]>(self.0) - } -} +mod cff; +mod glyf; +mod stream; -impl Debug for Tag { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "Tag({self})") - } -} +use std::borrow::Cow; +use std::fmt::{self, Debug, Display, Formatter}; -impl Display for Tag { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - f.pad(std::str::from_utf8(&self.0).unwrap_or("...")) - } -} +use crate::stream::{Reader, Structure, Writer}; /// Defines which things to keep in the font. /// @@ -165,66 +78,26 @@ impl<'a> Profile<'a> { } } -/// Subsetting context. -struct Context<'a> { - /// Original face. - face: &'a dyn Face, - /// The number of glyphs in the original and subsetted face. - /// - /// Subsetting doesn't actually delete glyphs, just their outlines. - num_glyphs: u16, - /// The subsetting profile. - profile: Profile<'a>, - /// The kind of face. - kind: FontKind, - /// Subsetted tables. - tables: Vec<(Tag, Cow<'a, [u8]>)>, -} - -impl<'a> Context<'a> { - /// Expect a table. - fn expect_table(&self, tag: Tag) -> Result<&'a [u8]> { - self.face.table(tag).ok_or(Error::MissingTable(tag)) - } - - /// Process a table. - fn process(&mut self, tag: Tag) -> Result<()> { - let data = match self.face.table(tag) { - Some(data) => data, - None => return Ok(()), - }; - - match tag { - Tag::GLYF => glyf::subset(self)?, - Tag::LOCA => panic!("handled by glyf"), - Tag::CFF => cff::subset(self)?, - _ => self.push(tag, data), - } - - Ok(()) - } - - /// Push a subsetted table. - fn push(&mut self, tag: Tag, table: impl Into>) { - debug_assert!( - !self.tables.iter().any(|&(prev, _)| prev == tag), - "duplicate {tag} table" - ); - self.tables.push((tag, table.into())); - } -} - /// Subset a font face to include less glyphs and tables. -pub fn subset(face: &dyn Face, profile: Profile) -> Result> { +/// +/// - The `data` must be in the OpenType font format. +/// - The `index` is only relevant if the data contains a font collection +/// (`.ttc` or `.otc` file). Otherwise, it should be 0. +pub fn subset(data: &[u8], index: u32, profile: Profile) -> Result> { + let face = parse(data, index)?; + let kind = match face.table(Tag::CFF).or(face.table(Tag::CFF2)) { + Some(_) => FontKind::CFF, + None => FontKind::TrueType, + }; + let maxp = face.table(Tag::MAXP).ok_or(Error::MissingTable(Tag::MAXP))?; + let num_glyphs = u16::read_at(maxp, 4)?; + let mut ctx = Context { face, - num_glyphs: u16::read_at(maxp, 4)?, + num_glyphs, profile, - kind: match face.table(Tag::CFF).or(face.table(Tag::CFF2)) { - Some(_) => FontKind::CFF, - None => FontKind::TrueType, - }, + kind, tables: vec![], }; @@ -256,6 +129,37 @@ pub fn subset(face: &dyn Face, profile: Profile) -> Result> { Ok(construct(ctx)) } +/// Parse a font face from OpenType data. +fn parse(data: &[u8], index: u32) -> Result> { + let mut r = Reader::new(data); + let mut kind = r.read::()?; + + // Parse font collection header if necessary. + if kind == FontKind::Collection { + let offset = u32::read_at(data, 12 + 4 * (index as usize))?; + let subdata = data.get(offset as usize ..).ok_or(Error::InvalidOffset)?; + r = Reader::new(subdata); + kind = r.read::()?; + if kind == FontKind::Collection { + return Err(Error::UnknownKind); + } + } + + // Read number of table records. + let count = r.read::()?; + r.read::()?; + r.read::()?; + r.read::()?; + + // Read table records. + let mut records = vec![]; + for _ in 0 .. count { + records.push(r.read::()?); + } + + Ok(Face { data, records }) +} + /// Construct a brand new font. fn construct(mut ctx: Context) -> Vec { let mut w = Writer::new(); @@ -289,7 +193,6 @@ fn construct(mut ctx: Context) -> Vec { } let len = data.len(); - println!("{}: {}", tag, len); w.write(TableRecord { tag: *tag, checksum: checksum(data), @@ -297,6 +200,9 @@ fn construct(mut ctx: Context) -> Vec { length: len as u32, }); + #[cfg(test)] + println!("{}: {}", tag, len); + // Increase offset, plus padding zeros to align to 4 bytes. offset += len; while offset % 4 != 0 { @@ -335,6 +241,71 @@ fn checksum(data: &[u8]) -> u32 { sum } +/// Subsetting context. +struct Context<'a> { + /// Original fa'ce. + face: Face<'a>, + /// The number of glyphs in the original and subsetted face. + /// + /// Subsetting doesn't actually delete glyphs, just their outlines. + num_glyphs: u16, + /// The subsetting profile. + profile: Profile<'a>, + /// The kind of face. + kind: FontKind, + /// Subsetted tables. + tables: Vec<(Tag, Cow<'a, [u8]>)>, +} + +impl<'a> Context<'a> { + /// Expect a table. + fn expect_table(&self, tag: Tag) -> Result<&'a [u8]> { + self.face.table(tag).ok_or(Error::MissingTable(tag)) + } + + /// Process a table. + fn process(&mut self, tag: Tag) -> Result<()> { + let data = match self.face.table(tag) { + Some(data) => data, + None => return Ok(()), + }; + + match tag { + Tag::GLYF => glyf::subset(self)?, + Tag::LOCA => panic!("handled by glyf"), + Tag::CFF => cff::subset(self)?, + _ => self.push(tag, data), + } + + Ok(()) + } + + /// Push a subsetted table. + fn push(&mut self, tag: Tag, table: impl Into>) { + debug_assert!( + !self.tables.iter().any(|&(prev, _)| prev == tag), + "duplicate {tag} table" + ); + self.tables.push((tag, table.into())); + } +} + +/// A font face with OpenType tables. +struct Face<'a> { + data: &'a [u8], + records: Vec, +} + +impl<'a> Face<'a> { + fn table(&self, tag: Tag) -> Option<&'a [u8]> { + let i = self.records.binary_search_by(|record| record.tag.cmp(&tag)).ok()?; + let record = self.records.get(i)?; + let start = record.offset as usize; + let end = start + (record.length as usize); + self.data.get(start .. end) + } +} + /// What kind of contents the font has. #[derive(Debug, Copy, Clone, Eq, PartialEq)] enum FontKind { @@ -365,16 +336,66 @@ impl Structure<'_> for FontKind { } } -/// A signed 16-bit fixed-point number. -struct F2Dot14(u16); +/// A 4-byte OpenType tag. +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct Tag(pub [u8; 4]); -impl Structure<'_> for F2Dot14 { +#[allow(unused)] +impl Tag { + // General tables. + const CMAP: Self = Self(*b"cmap"); + const HEAD: Self = Self(*b"head"); + const HHEA: Self = Self(*b"hhea"); + const HMTX: Self = Self(*b"hmtx"); + const MAXP: Self = Self(*b"maxp"); + const NAME: Self = Self(*b"name"); + const OS2: Self = Self(*b"OS/2"); + const POST: Self = Self(*b"post"); + + // TrueType. + const GLYF: Self = Self(*b"glyf"); + const LOCA: Self = Self(*b"loca"); + const PREP: Self = Self(*b"prep"); + const FPGM: Self = Self(*b"fpgm"); + const CVT: Self = Self(*b"cvt "); + const GASP: Self = Self(*b"gasp"); + + // CFF. + const CFF: Self = Self(*b"CFF "); + const CFF2: Self = Self(*b"CFF2"); + const VORG: Self = Self(*b"VORG"); + + // Bitmap and color fonts. + const EBDT: Self = Self(*b"EBDT"); + const EBLC: Self = Self(*b"EBLC"); + const EBSC: Self = Self(*b"EBSC"); + const COLR: Self = Self(*b"COLR"); + const CPAL: Self = Self(*b"CPAL"); + const CBDT: Self = Self(*b"CBDT"); + const CBLC: Self = Self(*b"CBLC"); + const SBIX: Self = Self(*b"sbix"); + const SVG: Self = Self(*b"SVG "); +} + +impl Structure<'_> for Tag { fn read(r: &mut Reader) -> Result { - r.read::().map(Self) + r.read::<[u8; 4]>().map(Self) } fn write(&self, w: &mut Writer) { - w.write::(self.0) + w.write::<[u8; 4]>(self.0) + } +} + +impl Debug for Tag { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "Tag({self})") + } +} + +impl Display for Tag { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + f.pad(std::str::from_utf8(&self.0).unwrap_or("...")) } } @@ -405,6 +426,19 @@ impl Structure<'_> for TableRecord { } } +/// A signed 16-bit fixed-point number. +struct F2Dot14(u16); + +impl Structure<'_> for F2Dot14 { + fn read(r: &mut Reader) -> Result { + r.read::().map(Self) + } + + fn write(&self, w: &mut Writer) { + w.write::(self.0) + } +} + /// The result type for everything. type Result = std::result::Result; @@ -419,7 +453,6 @@ pub enum Error { MissingData, /// Parsed data was invalid. InvalidData, - /// The read data does not conform. /// A table is missing. /// /// Mostly, the subsetter just ignores (i.e. not subsets) tables if they are @@ -447,7 +480,7 @@ impl std::error::Error for Error {} mod tests { use std::path::Path; - use super::{parse, subset, Profile}; + use super::{subset, Profile}; #[test] fn test_subset_truetype() { @@ -486,9 +519,8 @@ mod tests { let glyphs: Vec<_> = text.chars().filter_map(|c| Some(ttf.glyph_index(c)?.0)).collect(); - let face = parse(&data, 0).unwrap(); let profile = Profile::pdf(&glyphs); - let subs = subset(&face, profile).unwrap(); + let subs = subset(&data, 0, profile).unwrap(); let stem = Path::new(path).file_stem().unwrap().to_str().unwrap(); let out = Path::new("target").join(Path::new(stem)).with_extension("ttf");