-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #773 from googlefonts/glyphdata-one-more-time
The continued adventures of GlyphData
- Loading branch information
Showing
17 changed files
with
74,003 additions
and
4,480 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
use std::env; | ||
use std::path::Path; | ||
|
||
include!("src/glyphdata/glyphdata_impl.rs"); | ||
|
||
fn parse_xml_files() -> Result<Vec<GlyphInfo>, GlyphDataError> { | ||
let mut one = parse_xml_file("data/GlyphData.xml")?; | ||
let two = parse_xml_file("data/GlyphData_Ideographs.xml")?; | ||
one.extend(two); | ||
Ok(one) | ||
} | ||
|
||
fn parse_xml_file(path: &str) -> Result<Vec<GlyphInfo>, GlyphDataError> { | ||
let Ok(bytes) = std::fs::read(path) else { | ||
panic!("failed to read path '{path}'"); | ||
}; | ||
parse_entries(&bytes) | ||
} | ||
|
||
// tell cargo when to rerun this script | ||
fn register_dependencies() { | ||
println!("cargo::rerun-if-changed=data"); | ||
println!("cargo::rerun-if-changed=src/glyphdata/glyphdata_impl.rs"); | ||
} | ||
|
||
fn main() { | ||
let out_dir = env::var_os("OUT_DIR").unwrap(); | ||
let dest_path = Path::new(&out_dir).join("glyphdata.bin"); | ||
let parsed = parse_xml_files().expect("failed to parse GlyphData xml files"); | ||
let bytes = bincode::serialize(&parsed).expect("bincode failed"); | ||
std::fs::write(dest_path, bytes).unwrap(); | ||
|
||
register_dependencies() | ||
} |
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
<!--Used to test that user-provided data overrides the bundled data--> | ||
<?xml version="1.0" encoding="UTF-8" ?> | ||
<!DOCTYPE glyphData [ | ||
<!ELEMENT glyphData (glyph)+> | ||
<!ATTLIST glyphData | ||
format CDATA #IMPLIED> | ||
<!ELEMENT glyph EMPTY> | ||
<!ATTLIST glyph | ||
unicode CDATA #IMPLIED | ||
unicodeLegacy CDATA #IMPLIED | ||
name CDATA #REQUIRED | ||
category CDATA #REQUIRED | ||
subCategory CDATA #IMPLIED | ||
case CDATA #IMPLIED | ||
direction CDATA #IMPLIED | ||
script CDATA #IMPLIED | ||
description CDATA #IMPLIED | ||
production CDATA #IMPLIED | ||
altNames CDATA #IMPLIED> | ||
]> | ||
<glyphData> | ||
<glyph unicode="0030" name="zero" category="Other" subCategory="Ligature" description="DIGIT ZERO" /> | ||
<glyph unicode="0043" name="C" category="Number" case="upper" script="latin" description="LATIN CAPITAL LETTER C" /> | ||
<glyph unicode="021C" name="Yogh" category="Letter" case="upper" script="latin" production="Yolo" description="LATIN CAPITAL LETTER YOGH" /> | ||
</glyphData> | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# GlyphData.xml | ||
|
||
This directory includes XML files containing data used by glyphs.app. This data | ||
is extracted from the glyphsLib python package using the `update.py` script | ||
contained here. | ||
|
||
That data in turn is takend from the [GlyphsInfo](https://github.com/schriftgestalt/GlyphsInfo) | ||
repository. | ||
|
||
This data is bundled into our crate using a `build.rs` script in the crate root. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
XML files copied from glyphsLib version 6.6.6. | ||
(this file generated by update.py) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
"""Update bundled xml files | ||
We try to match the behaviour of the python toolchain, so we want to ship the | ||
same data files as are currently bundled in glyphsLib. This script copies those | ||
files out of the currently active version of glyphsLib. | ||
Usage: | ||
python data/update.py | ||
""" | ||
|
||
import glyphsLib | ||
from importlib import resources | ||
import os | ||
import shutil | ||
|
||
def script_dir(): | ||
return os.path.dirname(os.path.abspath(__file__)) | ||
|
||
def get_data_file(filepath): | ||
return resources.files(glyphsLib).joinpath("data").joinpath(filepath) | ||
|
||
|
||
def copy_data_files(): | ||
target_dir = script_dir() | ||
for target in ["GlyphData.xml", "GlyphData_Ideographs.xml"]: | ||
file = get_data_file(target) | ||
target = os.path.join(target_dir, target) | ||
with file.open("rb") as source, open(target, "wb") as dest: | ||
shutil.copyfileobj(source, dest) | ||
|
||
def write_version_file(): | ||
version = glyphsLib.__version__ | ||
with open(os.path.join(script_dir(), 'VERSION'), 'w') as f: | ||
f.write(f"XML files copied from glyphsLib version {version}.\n" | ||
"(this file generated by update.py)\n") | ||
|
||
def main(_): | ||
copy_data_files() | ||
write_version_file() | ||
|
||
|
||
if __name__ == "__main__": | ||
main(None) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
//! determining glyph properties | ||
//! | ||
//! This module provides access to glyph info extracted from bundled | ||
//! (and potentially user-provided) data files. | ||
|
||
// NOTE: we define the types and parsing code in a separate file, so that | ||
// we can borrow it in our build.rs script without causing a cycle | ||
mod glyphdata_impl; | ||
use std::{ | ||
collections::{HashMap, HashSet}, | ||
path::Path, | ||
}; | ||
|
||
pub use glyphdata_impl::*; | ||
use smol_str::SmolStr; | ||
|
||
static BUNDLED_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/glyphdata.bin")); | ||
|
||
/// A queryable set of glyph data | ||
/// | ||
/// This is generally expensive to create, and is intended to be cached, or | ||
/// used behind a OnceCell. It is never modified after initial creation. | ||
pub struct GlyphData { | ||
// The info for all the glyphs we know of. | ||
data: Vec<GlyphInfo>, | ||
// the values in all maps are indices into the `data` vec. we use u32 to save space. | ||
name_map: HashMap<SmolStr, u32>, | ||
unicode_map: HashMap<u32, u32>, | ||
alt_name_map: HashMap<SmolStr, u32>, | ||
} | ||
|
||
impl GlyphData { | ||
/// Create a new data set, optionally loading user provided overrides | ||
pub fn new(user_overrides: Option<&Path>) -> Result<Self, GlyphDataError> { | ||
let user_overrides = user_overrides | ||
.map(|path| { | ||
let bytes = std::fs::read(path).map_err(|err| GlyphDataError::UserFile { | ||
path: path.to_owned(), | ||
reason: err.kind(), | ||
}); | ||
bytes.and_then(|xml| parse_entries(&xml)) | ||
}) | ||
.transpose()?; | ||
let bundled = load_bundled_data(); | ||
let all_entries = match user_overrides { | ||
Some(user_overrides) => merge_data(bundled, user_overrides), | ||
None => bundled, | ||
}; | ||
|
||
Ok(Self::new_impl(all_entries)) | ||
} | ||
|
||
fn new_impl(entries: Vec<GlyphInfo>) -> Self { | ||
let mut name_map = HashMap::with_capacity(entries.len()); | ||
let mut unicode_map = HashMap::with_capacity(entries.len()); | ||
let mut alt_name_map = HashMap::new(); | ||
|
||
for (i, entry) in entries.iter().enumerate() { | ||
name_map.insert(entry.name.clone(), i as u32); | ||
if let Some(cp) = entry.unicode { | ||
unicode_map.insert(cp, i as _); | ||
} | ||
for alt in &entry.alt_names { | ||
alt_name_map.insert(alt.clone(), i as _); | ||
} | ||
} | ||
|
||
Self { | ||
data: entries, | ||
name_map, | ||
unicode_map, | ||
alt_name_map, | ||
} | ||
} | ||
|
||
/// Look up info for a glyph by name | ||
/// | ||
/// This checks primary names first, and alternates afterwards. | ||
pub fn get_by_name(&self, name: impl AsRef<str>) -> Option<&GlyphInfo> { | ||
let name = name.as_ref(); | ||
self.name_map | ||
.get(name) | ||
.or_else(|| self.alt_name_map.get(name)) | ||
.and_then(|idx| self.data.get(*idx as usize)) | ||
} | ||
|
||
/// Look up info for a glyph by codepoint | ||
pub fn get_by_codepoint(&self, codepoint: u32) -> Option<&GlyphInfo> { | ||
self.unicode_map | ||
.get(&codepoint) | ||
.and_then(|idx| self.data.get(*idx as usize)) | ||
} | ||
} | ||
|
||
fn load_bundled_data() -> Vec<GlyphInfo> { | ||
bincode::deserialize(BUNDLED_DATA).unwrap() | ||
} | ||
|
||
fn merge_data(mut base: Vec<GlyphInfo>, overrides: Vec<GlyphInfo>) -> Vec<GlyphInfo> { | ||
let skip_names = overrides | ||
.iter() | ||
.map(|info| &info.name) | ||
.collect::<HashSet<_>>(); | ||
base.retain(|info| !skip_names.contains(&info.name)); | ||
base.extend(overrides); | ||
base | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
|
||
#[test] | ||
fn test_bundled_data() { | ||
let data = load_bundled_data(); | ||
assert_eq!(data.len(), 73329); | ||
} | ||
|
||
#[test] | ||
fn simple_overrides() { | ||
let overrides = vec![GlyphInfo { | ||
name: "A".into(), | ||
category: Category::Mark, | ||
subcategory: Subcategory::SpacingCombining, | ||
unicode: Some(b'A' as u32), | ||
production: None, | ||
alt_names: Default::default(), | ||
}]; | ||
let bundled = load_bundled_data(); | ||
let merged = merge_data(bundled, overrides); | ||
let data = GlyphData::new_impl(merged); | ||
|
||
assert_eq!(data.get_by_name("A").unwrap().category, Category::Mark); | ||
} | ||
|
||
#[test] | ||
fn overrides_from_file() { | ||
let data = GlyphData::new(Some(Path::new("./data/GlyphData_override_test.xml"))).unwrap(); | ||
assert_eq!(data.get_by_name("zero").unwrap().category, Category::Other); | ||
assert_eq!(data.get_by_name("C").unwrap().category, Category::Number); | ||
assert_eq!( | ||
data.get_by_name("Yogh").unwrap().production, | ||
Some("Yolo".into()) | ||
); | ||
} | ||
} |
Oops, something went wrong.