From e5783ea36a870ca0b200d24045c7e3ff3165e904 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Wed, 8 Jun 2022 19:49:32 +0200 Subject: [PATCH] Subset data related CID-keyed fonts --- src/cff/mod.rs | 559 +++++++++++++++++++++++++------------------------ src/lib.rs | 1 - src/stream.rs | 17 +- 3 files changed, 303 insertions(+), 274 deletions(-) diff --git a/src/cff/mod.rs b/src/cff/mod.rs index 6db3483d..7dece6dd 100644 --- a/src/cff/mod.rs +++ b/src/cff/mod.rs @@ -12,30 +12,39 @@ use super::*; /// A CFF table. struct Table<'a> { name: Index>, - top_dict: Dict<'a>, + top: Dict<'a>, strings: Index>, global_subrs: Index>, char_strings: Index>, - charset: Option>, - private_dict: Option>, - local_subrs: Option>>, + charset: Option>, + private: Option>, cid: Option>, } +/// An opaque charset. +struct Charset<'a>(Opaque<'a>); + /// Data specific to CID-keyed fonts. struct CidData<'a> { array: Index>, - select: Cow<'a, [u8]>, - private_dicts: Vec>, - local_subrs: Vec>>>, + select: FdSelect<'a>, + private: Vec>, +} + +/// An FD Select dat structure. +struct FdSelect<'a>(Cow<'a, [u8]>); + +/// Data specific to PRivate DICTs. +struct PrivateData<'a> { + dict: Dict<'a>, + subrs: Option>>, } /// Recorded offsets that will be written into DICTs. struct Offsets { char_strings: usize, charset: Option, - private_dict: Option>, - local_subrs: Option, + private: Option, cid: Option, } @@ -43,341 +52,342 @@ struct Offsets { struct CidOffsets { array: usize, select: usize, - private_dicts: Vec>, - local_subrs: Vec>, + private: Vec, +} + +/// Offsets specific to Private DICTs. +struct PrivateOffsets { + dict: Range, + subrs: Option, } /// Subset the CFF table by removing glyph data for unused glyphs. pub(crate) fn subset(ctx: &mut Context) -> Result<()> { let cff = ctx.expect_table(Tag::CFF)?; - let mut r = Reader::new(cff); - // Read version. - let (major, _) = (r.read::()?, r.read::()); + // Check version. + let mut r = Reader::new(cff); + let major = r.read::()?; if major != 1 { ctx.push(Tag::CFF, cff); return Ok(()); } + // Parse CFF table. + let mut table = read_table(ctx, cff)?; + + // Subset the char strings. + subset_char_strings(ctx, &mut table.char_strings)?; + + // Subset Top and Private DICT. + table.top.keep(top::KEEP); + if let Some(private) = &mut table.private { + private.dict.keep(private::KEEP); + } + + // Subset data specific to CID-keyed fonts. + if let Some(cid) = &mut table.cid { + subset_font_dicts(ctx, cid)?; + + for dict in cid.array.iter_mut() { + dict.keep(top::KEEP); + } + for private in &mut cid.private { + private.dict.keep(private::KEEP); + } + } + + // Construct a new CFF table. + let mut sub_cff = vec![]; + let mut offsets = create_offsets(&table); + + for _ in 0 .. 2 { + insert_offsets(&mut table, &offsets); + let mut w = Writer::new(); + write_table(&mut w, &table, &mut offsets); + sub_cff = w.finish(); + } + + ctx.push(Tag::CFF, sub_cff); + + Ok(()) +} + +/// Subset the glyph descriptions. +fn subset_char_strings<'a>(ctx: &Context, strings: &mut Index>) -> Result<()> { + // The set of all glyphs we will include in the subset. + let subset: HashSet = ctx.profile.glyphs.iter().copied().collect(); + + for glyph in 0 .. ctx.num_glyphs { + if !subset.contains(&glyph) { + // The byte sequence [14] is the minimal valid charstring consisting + // of just a single `endchar` operator. + *strings.get_mut(glyph as usize).ok_or(Error::InvalidOffset)? = Opaque(&[14]); + } + } + + Ok(()) +} + +/// Subset CID-related data. +fn subset_font_dicts(ctx: &Context, cid: &mut CidData) -> Result<()> { + // Determine which font dicts to keep. + let mut sub_fds = HashSet::new(); + for &glyph in ctx.profile.glyphs { + sub_fds.insert(*cid.select.0.get(usize::from(glyph)).ok_or(Error::MissingData)?); + } + + // Remove subroutines for unused Private DICTs. + for (i, dict) in cid.private.iter_mut().enumerate() { + if !sub_fds.contains(&(i as u8)) { + dict.subrs = None; + } + } + + Ok(()) +} + +/// Create initial zero offsets for all data structures. +fn create_offsets(table: &Table) -> Offsets { + Offsets { + char_strings: 0, + charset: table.charset.as_ref().map(|_| 0), + private: table.private.as_ref().map(create_private_offsets), + cid: table.cid.as_ref().map(create_cid_offsets), + } +} + +/// Create initial zero offsets for all CID-related data structures. +fn create_cid_offsets(cid: &CidData) -> CidOffsets { + CidOffsets { + array: 0, + select: 0, + private: cid.private.iter().map(create_private_offsets).collect(), + } +} + +/// Create initial zero offsets for a Private DICT. +fn create_private_offsets(private: &PrivateData) -> PrivateOffsets { + PrivateOffsets { + dict: 0 .. 0, + subrs: private.subrs.as_ref().map(|_| 0), + } +} + +/// Insert the offsets of various parts of the font into the relevant DICTs. +fn insert_offsets(table: &mut Table, offsets: &Offsets) { + if let Some(offset) = offsets.charset { + table.top.set_offset(top::CHARSET, offset); + } + + table.top.set_offset(top::CHAR_STRINGS, offsets.char_strings); + + if let (Some(private), Some(offsets)) = (&mut table.private, &offsets.private) { + table.top.set_range(top::PRIVATE, &offsets.dict); + + if let Some(offset) = offsets.subrs { + private.dict.set_offset(private::SUBRS, offset); + } + } + + if let (Some(cid), Some(offsets)) = (&mut table.cid, &offsets.cid) { + table.top.set_offset(top::FD_ARRAY, offsets.array); + table.top.set_offset(top::FD_SELECT, offsets.select); + + for (dict, offsets) in cid.array.iter_mut().zip(&offsets.private) { + dict.set_range(top::PRIVATE, &offsets.dict); + } + + for (private, offsets) in cid.private.iter_mut().zip(&offsets.private) { + if let Some(offset) = offsets.subrs { + private.dict.set_offset(private::SUBRS, offset); + } + } + } +} + +/// Parse a CFF table. +fn read_table<'a>(ctx: &Context, cff: &'a [u8]) -> Result> { + // Skip header. + let mut r = Reader::new(cff); + r.read::()?; + r.read::()?; let header_size = r.read::()? as usize; r = Reader::new(cff.get(header_size ..).ok_or(Error::InvalidOffset)?); // Read four indices at fixed positions. let name = r.read::>()?; - let top_dicts = r.read::>()?; + let tops = r.read::>()?; let strings = r.read::>()?; let global_subrs = r.read::>()?; // Extract only Top DICT. - let mut top_dict = top_dicts.into_one().ok_or(Error::MissingData)?; + let top = tops.into_one().ok_or(Error::MissingData)?; - // These are the glyph descriptions. - let mut char_strings = { - let offset = top_dict.get_offset(top::CHAR_STRINGS).ok_or(Error::MissingData)?; + // Read the glyph descriptions. + let char_strings = { + let offset = top.get_offset(top::CHAR_STRINGS).ok_or(Error::MissingData)?; Index::read_at(cff, offset)? }; - // Copy over charset. + // Read the charset. let mut charset = None; - if let Some(offset @ 1 ..) = top_dict.get_offset(top::CHARSET) { + if let Some(offset @ 1 ..) = top.get_offset(top::CHARSET) { let sub = cff.get(offset ..).ok_or(Error::InvalidOffset)?; charset = Some(read_charset(sub, ctx.num_glyphs)?); } // Read Private DICT with local subroutines. - let mut private_dict = None; - let mut local_subrs = None; - if let Some(range) = top_dict.get_range(top::PRIVATE) { - let start = range.start; - let sub = cff.get(range).ok_or(Error::InvalidOffset)?; - let dict = Dict::read_at(sub, 0)?; - - if let Some(offset) = dict.get_offset(private::SUBRS) { - local_subrs = Some(Index::read_at(cff, start + offset)?); - } - - private_dict = Some(dict); + let mut private = None; + if let Some(range) = top.get_range(top::PRIVATE) { + private = Some(read_private_dict(cff, range)?); } // Read data specific to CID-keyed fonts. let mut cid = None; - if top_dict.get(top::ROS).is_some() { - cid = Some(read_cid_data(ctx, cff, &top_dict)?); + if top.get(top::ROS).is_some() { + cid = Some(read_cid_data(ctx, cff, &top)?); } - // Subset things. - subset_top(&mut top_dict); - subset_char_strings(ctx, &mut char_strings)?; - - if let Some(dict) = &mut private_dict { - subset_private(dict); - } - - if let Some(cid) = &mut cid { - for dict in cid.array.iter_mut() { - subset_top(dict) - } - for dict in &mut cid.private_dicts { - subset_private(dict); - } - } - - // Construct a new CFF table. - let sub_cff = construct(Table { + Ok(Table { name, - top_dict, + top, strings, global_subrs, charset, char_strings, - private_dict, - local_subrs, + private, cid, - }); + }) +} - ctx.push(Tag::CFF, sub_cff); +/// Write the a new CFF table. +fn write_table(w: &mut Writer, table: &Table, offsets: &mut Offsets) { + // Write header. + w.write::(1); + w.write::(0); + w.write::(4); + w.write::(4); + w.inspect("Header"); - Ok(()) + // Write the four fixed indices. + w.write_ref(&table.name); + w.inspect("Name INDEX"); + + w.write(Index::from_one(table.top.clone())); + w.inspect("Top DICT INDEX"); + + w.write_ref(&table.strings); + w.inspect("String INDEX"); + + w.write_ref(&table.global_subrs); + w.inspect("Global Subroutine INDEX"); + + // Write charset. + if let Some(charset) = &table.charset { + offsets.charset = Some(w.len()); + write_charset(w, charset); + w.inspect("Charset"); + } + + // Write char strings. + offsets.char_strings = w.len(); + w.write_ref(&table.char_strings); + w.inspect("Charstring INDEX"); + + // Write private dict. + if let (Some(private), Some(offsets)) = (&table.private, &mut offsets.private) { + write_private_data(w, private, offsets); + } + + // Write data specific to CID-keyed fonts. + if let (Some(cid), Some(offsets)) = (&table.cid, &mut offsets.cid) { + write_cid_data(w, cid, offsets); + } } /// Read data specific to CID-keyed fonts. fn read_cid_data<'a>( ctx: &Context, cff: &'a [u8], - top_dict: &Dict<'a>, + top: &Dict<'a>, ) -> Result> { // Read FD ARRAY. let array = { - let offset = top_dict.get_offset(top::FD_ARRAY).ok_or(Error::MissingData)?; + let offset = top.get_offset(top::FD_ARRAY).ok_or(Error::MissingData)?; Index::>::read_at(cff, offset)? }; // Read FD Select data structure. let select = { - let offset = top_dict.get_offset(top::FD_SELECT).ok_or(Error::MissingData)?; + let offset = top.get_offset(top::FD_SELECT).ok_or(Error::MissingData)?; let sub = cff.get(offset ..).ok_or(Error::InvalidOffset)?; - parse_fd_select(sub, ctx.num_glyphs)? + read_fd_select(sub, ctx.num_glyphs)? }; - let mut private_dicts = vec![]; - let mut local_subrs = vec![]; - // Read CID private dicts. + let mut private = vec![]; for dict in array.iter() { let range = dict.get_range(top::PRIVATE).ok_or(Error::MissingData)?; - let start = range.start; - let sub = cff.get(range).ok_or(Error::InvalidOffset)?; - let dict = Dict::read_at(sub, 0)?; - - let mut local_subr = None; - if let Some(offset) = dict.get_offset(private::SUBRS) { - local_subr = Some(Index::read_at(cff, start + offset)?); - } - - private_dicts.push(dict); - local_subrs.push(local_subr); - } - - Ok(CidData { - array, - select, - private_dicts, - local_subrs, - }) -} - -/// Construct a new CFF table. -fn construct(mut table: Table) -> Vec { - let mut data = vec![]; - let mut offsets = setup_offsets(&table); - - for run in 0 .. 2 { - let mut last = 0; - let mut inspect = |w: &Writer, _name: &str| { - if run > 0 { - #[cfg(test)] - eprintln!("{_name} took {} bytes", w.len() - last); - last = w.len(); - } - }; - - set_offets(&mut table, &offsets); - - // Write header. - let mut w = Writer::new(); - w.write::(1); - w.write::(0); - w.write::(4); - w.write::(4); - inspect(&w, "Header"); - - // Write the four fixed indices. - w.write_ref(&table.name); - inspect(&w, "Name INDEX"); - - w.write(Index::from_one(table.top_dict.clone())); - inspect(&w, "Top DICT INDEX"); - - w.write_ref(&table.strings); - inspect(&w, "String INDEX"); - - w.write_ref(&table.global_subrs); - inspect(&w, "Global Subroutine INDEX"); - - // Write charset. - if let Some(charset) = &table.charset { - offsets.charset = Some(w.len()); - w.write_ref(charset); - inspect(&w, "Charset"); - } - - // Write char strings. - offsets.char_strings = w.len(); - w.write_ref(&table.char_strings); - inspect(&w, "Charstring INDEX"); - - // Write private dict. - if let (Some(private_dict), Some(range)) = - (&table.private_dict, &mut offsets.private_dict) - { - range.start = w.len(); - w.write_ref(private_dict); - range.end = w.len(); - inspect(&w, "Private DICT"); - } - - // Write local subroutines. - if let Some(local_subrs) = &table.local_subrs { - let base = offsets.private_dict.as_ref().unwrap().start; - offsets.local_subrs = Some(w.len() - base); - w.write_ref(local_subrs); - inspect(&w, "Local Subroutine INDEX"); - } - - // Write data specific to CID-keyed fonts. - if let (Some(data), Some(offsets)) = (&table.cid, &mut offsets.cid) { - // Write FD Array. - offsets.array = w.len(); - w.write_ref(&data.array); - inspect(&w, "FD Array"); - - // Write FD Select. - offsets.select = w.len(); - write_fd_select(&mut w, &data.select); - inspect(&w, "FD Select"); - - // Write Private DICTS. - for (dict, range) in data.private_dicts.iter().zip(&mut offsets.private_dicts) - { - range.start = w.len(); - w.write_ref(dict); - range.end = w.len(); - inspect(&w, "Private DICT"); - } - - // Write local subroutines. - for (i, subrs) in data.local_subrs.iter().enumerate() { - if let Some(subrs) = subrs { - let base = offsets.private_dicts[i].start; - offsets.local_subrs[i] = Some(w.len() - base); - w.write_ref(subrs); - inspect(&w, "Local Subroutine INDEX"); - } - } - } - - data = w.finish(); + private.push(read_private_dict(cff, range)?); } - data + Ok(CidData { array, select, private }) } -/// Create initial zero offsets for all data structures. -fn setup_offsets(table: &Table) -> Offsets { - Offsets { - char_strings: 0, - charset: table.charset.as_ref().map(|_| 0), - private_dict: table.private_dict.as_ref().map(|_| 0 .. 0), - local_subrs: table.local_subrs.as_ref().map(|_| 0), - cid: table.cid.as_ref().map(|cid| CidOffsets { - array: 0, - select: 0, - private_dicts: vec![0 .. 0; cid.array.len()], - local_subrs: cid - .local_subrs - .iter() - .map(|subr| subr.as_ref().map(|_| 0)) - .collect(), - }), +/// Write data specific to CID-keyed fonts. +fn write_cid_data(w: &mut Writer, cid: &CidData, offsets: &mut CidOffsets) { + // Write FD Array. + offsets.array = w.len(); + w.write_ref(&cid.array); + w.inspect("FD Array"); + + // Write FD Select. + offsets.select = w.len(); + write_fd_select(w, &cid.select); + w.inspect("FD Select"); + + // Write Private DICTS. + for (private, offsets) in cid.private.iter().zip(&mut offsets.private) { + write_private_data(w, private, offsets); } } -/// Insert the offsets of various parts of the font into the relevant -/// dictionaries. -fn set_offets(table: &mut Table, offsets: &Offsets) { - if let Some(offset) = offsets.charset { - table.top_dict.set_offset(top::CHARSET, offset); - } - - table.top_dict.set_offset(top::CHAR_STRINGS, offsets.char_strings); - - if let Some(range) = &offsets.private_dict { - table.top_dict.set_range(top::PRIVATE, range); - } - - if let (Some(private), Some(offset)) = (&mut table.private_dict, offsets.local_subrs) - { - private.set_offset(private::SUBRS, offset); - } - - if let (Some(data), Some(offsets)) = (&mut table.cid, &offsets.cid) { - table.top_dict.set_offset(top::FD_ARRAY, offsets.array); - table.top_dict.set_offset(top::FD_SELECT, offsets.select); - - for (dict, range) in data.array.iter_mut().zip(&offsets.private_dicts) { - dict.set_range(top::PRIVATE, range); - } +/// Read a Private DICT and optionally local subroutines. +fn read_private_dict<'a>(cff: &'a [u8], range: Range) -> Result> { + let start = range.start; + let sub = cff.get(range).ok_or(Error::InvalidOffset)?; + let dict = Dict::read_at(sub, 0)?; - for (private, offset) in data.private_dicts.iter_mut().zip(&offsets.local_subrs) { - if let &Some(offset) = offset { - private.set_offset(private::SUBRS, offset); - } - } + let mut subrs = None; + if let Some(offset) = dict.get_offset(private::SUBRS) { + subrs = Some(Index::read_at(cff, start + offset)?); } -} -/// Subset a Top DICT. -/// -/// Keeps only relevant non-offset entries. Offset entries are inserted later. -fn subset_top<'a>(top_dict: &mut Dict<'a>) { - top_dict.keep(top::KEEP); + Ok(PrivateData { dict, subrs }) } -/// Subset a Private DICT. -/// -/// Keeps only relevant non-offset entries. Offset entries are inserted later. -fn subset_private<'a>(private_dict: &mut Dict<'a>) { - private_dict.keep(private::KEEP); -} - -/// Subset the glyph descriptions. -fn subset_char_strings<'a>(ctx: &Context, strings: &mut Index>) -> Result<()> { - // The set of all glyphs we will include in the subset. - let subset: HashSet = ctx.profile.glyphs.iter().copied().collect(); - - for glyph in 0 .. ctx.num_glyphs { - if !subset.contains(&glyph) { - // The byte sequence [14] is the minimal valid charstring consisting - // of just a single `endchar` operator. - *strings.get_mut(glyph as usize).ok_or(Error::InvalidOffset)? = Opaque(&[14]); - } +/// Write a Private DICT and optionally local subroutines. +fn write_private_data( + w: &mut Writer, + private: &PrivateData, + offsets: &mut PrivateOffsets, +) { + offsets.dict.start = w.len(); + w.write_ref(&private.dict); + offsets.dict.end = w.len(); + w.inspect("Private DICT"); + + // Write local subroutines. + if let Some(subrs) = &private.subrs { + offsets.subrs = Some(w.len() - offsets.dict.start); + w.write_ref(subrs); + w.inspect("Local Subroutine INDEX"); } - - Ok(()) } -/// Extract the charset bytes. -fn read_charset(data: &[u8], num_glyphs: u16) -> Result> { +/// Read a charset. +fn read_charset(data: &[u8], num_glyphs: u16) -> Result> { let mut r = Reader::new(data); let mut len = 1; @@ -407,14 +417,21 @@ fn read_charset(data: &[u8], num_glyphs: u16) -> Result> { _ => return Err(Error::InvalidData), } - Ok(Opaque(data.get(.. len).ok_or(Error::InvalidOffset)?)) + Ok(Charset(Opaque( + data.get(.. len).ok_or(Error::InvalidOffset)?, + ))) +} + +/// Write a charset. +fn write_charset(w: &mut Writer, charset: &Charset) { + w.write_ref(&charset.0); } /// Returns the font dict index for each glyph. -fn parse_fd_select(data: &[u8], num_glyphs: u16) -> Result> { +fn read_fd_select(data: &[u8], num_glyphs: u16) -> Result> { let mut r = Reader::new(data); let format = r.read::()?; - Ok(match format { + Ok(FdSelect(match format { 0 => Cow::Borrowed(r.take(num_glyphs as usize)?), 3 => { let count = r.read::()?; @@ -431,13 +448,13 @@ fn parse_fd_select(data: &[u8], num_glyphs: u16) -> Result> { Cow::Owned(fds) } _ => return Err(Error::InvalidData), - }) + })) } /// Write an FD Select data structure. -fn write_fd_select(w: &mut Writer, fd: &[u8]) { +fn write_fd_select(w: &mut Writer, select: &FdSelect) { w.write::(0); - w.give(fd); + w.give(&select.0); } /// An opaque binary data structure. diff --git a/src/lib.rs b/src/lib.rs index 2eb85205..efd1d7e1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -476,7 +476,6 @@ mod tests { let out = Path::new("target").join(Path::new(stem)).with_extension("ttf"); std::fs::write(out, &subs).unwrap(); - let ttfs = ttf_parser::Face::from_slice(&subs, 0).unwrap(); let cff = ttfs.tables().cff; for c in text.chars() { diff --git a/src/stream.rs b/src/stream.rs index 219fdd21..4ced6f41 100644 --- a/src/stream.rs +++ b/src/stream.rs @@ -47,12 +47,16 @@ impl<'a> Reader<'a> { } /// A writable stream of binary data. -pub struct Writer(Vec); +pub struct Writer(Vec, #[cfg(test)] usize); impl Writer { /// Create a new writable stream of binary data. pub fn new() -> Self { - Self(Vec::with_capacity(1024)) + Self( + Vec::with_capacity(1024), + #[cfg(test)] + 0, + ) } /// Write `T` into the data. @@ -86,6 +90,15 @@ impl Writer { pub fn finish(self) -> Vec { self.0 } + + /// Print how many bytes were written since the last inspect call. + pub fn inspect(&mut self, _name: &str) { + #[cfg(test)] + { + eprintln!("{_name} took {} bytes", self.len() - self.1); + self.1 = self.len(); + } + } } /// Decode structures from a stream of binary data.