diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a0410d..c7fd8e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ ## Unreleased +* Fix parsing of flat JSON front matter objects + ## v0.1.1 (October 11, 2024) * Automated release setup diff --git a/README.md b/README.md index 31fa870..668ae33 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,65 @@ # Matterhorn A lenient front matter parsing crate that supports files prefixed with YAML, JSON, and TOML front matter. +The type of front matter is detected automatically. + +The order of input keys is retained, and parsing is lenient where possible. +Notably, duplicate keys are supported in YAML front matter. + +All parsed front matter is returned as a `serde_json` Value. + +## Usage + +```bash +cargo add matterhorn +``` + +```rust +const YAML_SOURCE_FILE: &str = r#" +--- +title: Hello World +order: 12 +--- +# Main Title + +Cras mattis consectetur purus sit amet fermentum. +"#; + +const TOML_SOURCE_FILE: &str = r#" ++++ +title = "Hello World" +order = 12 ++++ +# Main Title + +Cras mattis consectetur purus sit amet fermentum. +"#; + +const JSON_SOURCE_FILE: &str = r#" +{ + "title": "Hello World", + "order": 12 +} +# Main Title + +Cras mattis consectetur purus sit amet fermentum. +"#; + +fn main() { + let document = matterhorn::parse_document(YAML_SOURCE_FILE).expect("Input should be valid"); + + println!("{:#?}", document.front_matter); + // Returns: + // serde_json::Value::Object { + // "title": serde_json::Value::String("Hello World"), + // "order": serde_json::Value::Number(12), + // } + + println!("{:#?}", document.content); + // Returns: + // "# Main Title\n\nCras mattis consectetur purus sit amet fermentum.\n" + + assert_eq!(matterhorn::parse_document(YAML_SOURCE_FILE), matterhorn::parse_document(TOML_SOURCE_FILE)); + assert_eq!(matterhorn::parse_document(TOML_SOURCE_FILE), matterhorn::parse_document(JSON_SOURCE_FILE)); +} +``` diff --git a/src/lib.rs b/src/lib.rs index 203435d..1e824e7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -49,7 +49,10 @@ pub fn parse_document<'d>( document_start + separator.len() }; - for (pre_fm_end, _) in original_content.match_indices(separator).skip(1) { + for (pre_fm_end, _) in original_content + .match_indices(separator) + .filter(|(i, _)| *i > fm_start) + { let post_fm_end = pre_fm_end + separator.len(); let potential_front_matter = if should_parse_separator { &original_content[fm_start..post_fm_end]