From 336533dd2944709a1195eac91a8214cbb4a1a7e6 Mon Sep 17 00:00:00 2001 From: Aritz Beobide-Cardinal Date: Thu, 2 Jan 2025 14:50:57 -0500 Subject: [PATCH] Allow nested shortcodes (#2748) * Add ShortcodeInvocationCounter * Add nested shortcode parsing and rendering * Remove ShortcodeInvocationCounter from render context * Remove interior mutability in ShortcodeInvocationCounter * Rename Shortcode::flatten to render_inner_shortcodes --- components/markdown/benches/all.rs | 1 + components/markdown/src/content.pest | 6 +- components/markdown/src/shortcode/mod.rs | 43 +++++-- components/markdown/src/shortcode/parser.rs | 115 +++++++++++++++--- components/markdown/tests/common.rs | 3 + components/markdown/tests/shortcodes.rs | 74 +++++++++++ ...down_in_nested_shortcodes_with_bodies.snap | 13 ++ ...ested_shortcodes_with_bodies_with_nth.snap | 17 +++ components/utils/src/templates.rs | 18 +++ 9 files changed, 259 insertions(+), 31 deletions(-) create mode 100644 components/markdown/tests/snapshots/shortcodes__can_render_markdown_in_nested_shortcodes_with_bodies.snap create mode 100644 components/markdown/tests/snapshots/shortcodes__can_render_nested_shortcodes_with_bodies_with_nth.snap diff --git a/components/markdown/benches/all.rs b/components/markdown/benches/all.rs index 393c77483..ffec28508 100644 --- a/components/markdown/benches/all.rs +++ b/components/markdown/benches/all.rs @@ -147,6 +147,7 @@ fn bench_render_content_with_emoji(b: &mut test::Bencher) { let tera = Tera::default(); let content2 = CONTENT.replace(r#"{{ youtube(id="my_youtube_id") }}"#, ""); let mut config = Config::default_for_test(); + config.markdown.highlight_code = false; config.markdown.render_emoji = true; let permalinks_ctx = HashMap::new(); diff --git a/components/markdown/src/content.pest b/components/markdown/src/content.pest index fcf8c1b5b..d70665612 100644 --- a/components/markdown/src/content.pest +++ b/components/markdown/src/content.pest @@ -55,9 +55,9 @@ ignored_sc_body_end = !{ "{%/*" ~ "end" ~ "*/%}" } shortcode_with_body = !{ sc_body_start ~ text_in_body_sc ~ sc_body_end } ignored_shortcode_with_body = { ignored_sc_body_start ~ text_in_ignored_body_sc ~ ignored_sc_body_end } -text_in_body_sc = ${ (!(sc_body_end) ~ ANY)+ } -text_in_ignored_body_sc = ${ (!(ignored_sc_body_end) ~ ANY)+ } -text = ${ (!(inline_shortcode | ignored_inline_shortcode | shortcode_with_body | ignored_shortcode_with_body) ~ ANY)+ } +text_in_body_sc = ${ (!(sc_body_end) ~ content)+ } +text_in_ignored_body_sc = ${ (!(ignored_sc_body_end) ~ content)+ } +text = ${ (!(inline_shortcode | ignored_inline_shortcode | sc_body_start | ignored_sc_body_start | sc_body_end | ignored_sc_body_end) ~ ANY)+ } content = _{ ignored_inline_shortcode | diff --git a/components/markdown/src/shortcode/mod.rs b/components/markdown/src/shortcode/mod.rs index 92ca6641d..dd13aff20 100644 --- a/components/markdown/src/shortcode/mod.rs +++ b/components/markdown/src/shortcode/mod.rs @@ -1,8 +1,8 @@ use std::collections::HashMap; -use errors::{Error, Result}; +use errors::Result; use libs::tera; -use utils::templates::{ShortcodeDefinition, ShortcodeFileType}; +use utils::templates::{ShortcodeDefinition, ShortcodeFileType, ShortcodeInvocationCounter}; mod parser; @@ -13,14 +13,11 @@ pub fn extract_shortcodes( source: &str, definitions: &HashMap, ) -> Result<(String, Vec)> { - let (out, mut shortcodes) = parse_for_shortcodes(source)?; + let (out, mut shortcodes) = + parse_for_shortcodes(source, &mut ShortcodeInvocationCounter::new())?; for sc in &mut shortcodes { - if let Some(def) = definitions.get(&sc.name) { - sc.tera_name = def.tera_name.clone(); - } else { - return Err(Error::msg(format!("Found usage of a shortcode named `{}` but we do not know about. Make sure it's not a typo and that a field name `{}.{{html,md}}` exists in the `templates/shortcodes` directory.", sc.name, sc.name))); - } + sc.fill_tera_name(definitions)?; } Ok((out, shortcodes)) @@ -79,6 +76,7 @@ mod tests { span: 0..SHORTCODE_PLACEHOLDER.len(), body: None, nth: 1, + inner: Vec::new(), tera_name: "shortcodes/a.md".to_owned(), }, Shortcode { @@ -87,6 +85,7 @@ mod tests { span: SHORTCODE_PLACEHOLDER.len()..(2 * SHORTCODE_PLACEHOLDER.len()), body: None, nth: 2, + inner: Vec::new(), tera_name: "shortcodes/a.md".to_owned(), } ], @@ -107,7 +106,35 @@ mod tests { span: 9..(9 + SHORTCODE_PLACEHOLDER.len()), body: Some("Content of the body".to_owned()), nth: 1, + inner: Vec::new(), + tera_name: "shortcodes/bodied.md".to_owned(), + },], + &tera_context, + &tera + ) + .unwrap() + .0, + "Much wow Content of the body".to_string() + ); + assert_eq!( + insert_md_shortcodes( + format!("Much wow {}", SHORTCODE_PLACEHOLDER), + vec![Shortcode { + name: "bodied".to_string(), + args: to_value(&HashMap::::new()).unwrap(), + span: 9..(9 + SHORTCODE_PLACEHOLDER.len()), + body: Some(format!("Content of {SHORTCODE_PLACEHOLDER}")), + nth: 1, + inner: vec![Shortcode { + name: "bodied".to_string(), + args: to_value(&HashMap::::new()).unwrap(), + span: 11..(11 + SHORTCODE_PLACEHOLDER.len()), + body: Some("the body".to_owned()), + nth: 1, + inner: Vec::new(), + tera_name: "shortcodes/bodied.md".to_owned(), + },], tera_name: "shortcodes/bodied.md".to_owned(), },], &tera_context, diff --git a/components/markdown/src/shortcode/parser.rs b/components/markdown/src/shortcode/parser.rs index 4ce30b7bd..479e2b558 100644 --- a/components/markdown/src/shortcode/parser.rs +++ b/components/markdown/src/shortcode/parser.rs @@ -1,12 +1,11 @@ -use std::ops::Range; +use std::{collections::HashMap, ops::Range}; use errors::{bail, Context as ErrorContext, Result}; use libs::tera::{to_value, Context, Map, Tera, Value}; use pest::iterators::Pair; use pest::Parser; use pest_derive::Parser; -use std::collections::HashMap; -use utils::templates::ShortcodeFileType; +use utils::templates::{ShortcodeDefinition, ShortcodeFileType, ShortcodeInvocationCounter}; pub const SHORTCODE_PLACEHOLDER: &str = "@@ZOLA_SC_PLACEHOLDER@@"; @@ -14,14 +13,34 @@ pub const SHORTCODE_PLACEHOLDER: &str = "@@ZOLA_SC_PLACEHOLDER@@"; pub struct Shortcode { pub(crate) name: String, pub(crate) args: Value, + // In practice, span.len() is always equal to SHORTCODE_PLACEHOLDER.len() pub(crate) span: Range, pub(crate) body: Option, pub(crate) nth: usize, + pub(crate) inner: Vec, // set later down the line, for quick access without needing the definitions pub(crate) tera_name: String, } impl Shortcode { + /// Attempts to fill the `tera_name` field from the provided definitions for self and all of self.inner. + /// + /// This returns an error if the definitions do not have this shortcode. + pub fn fill_tera_name( + &mut self, + definitions: &HashMap, + ) -> Result<()> { + if let Some(def) = definitions.get(&self.name) { + self.tera_name = def.tera_name.clone(); + } else { + return Err(errors::anyhow!("Found usage of a shortcode named `{}` but we do not know about. Make sure it's not a typo and that a field name `{}.{{html,md}}` exists in the `templates/shortcodes` directory.", self.name, self.name)); + } + for inner_sc in self.inner.iter_mut() { + inner_sc.fill_tera_name(definitions)?; + } + Ok(()) + } + pub fn file_type(&self) -> ShortcodeFileType { if self.tera_name.ends_with("md") { ShortcodeFileType::Markdown @@ -30,7 +49,34 @@ impl Shortcode { } } - pub fn render(self, tera: &Tera, context: &Context) -> Result { + /// Expands all inner-shortcodes and leaves self.inner empty. + /// + /// This function has no effect with shortcodes without bodies. + pub fn render_inner_shortcodes(&mut self, tera: &Tera, context: &Context) -> Result<()> { + let Some(body) = &mut self.body else { + return Ok(()); + }; + for inner_sc in std::mem::take(&mut self.inner).into_iter().rev() { + // We're not considering the file_type of the inner shortcodes. + // - HTML SC invokes HTML SC: works as expected. + // - MD SC invokes HTML SC: MD can do inline-html, it is assumed that this is intentional. + // - MD SC invokes MD SC: works as expected. + // - HTML SC invokes MD SC: HTML SC's with MD bodies usually use the "markdown" filter. + let inner_sc_span = inner_sc.span.clone(); + let inner_sc_result = inner_sc.render(tera, context)?; + body.replace_range(inner_sc_span, &inner_sc_result); + } + Ok(()) + } + + pub fn render(mut self, tera: &Tera, context: &Context) -> Result { + // This function gets called under the following circumstances + // 1. as an .md shortcode, the resulting body is inserted into the document _before_ MD -> HTML conversion + // 2. as an .html shortcode, the result is inserted into the document _during_ MD -> HTML conversion. (The HTML + // is injected into cmark's AST) + // 3. As an inner-part of a shortcode which is being flattened. The file_type is not considered. + self.render_inner_shortcodes(tera, context)?; + let name = self.name; let tpl_name = self.tera_name; let mut new_context = Context::from_value(self.args)?; @@ -49,6 +95,7 @@ impl Shortcode { Ok(res) } + /// Shifts `self.span` by `(rendered_length - sc_span.len())` pub fn update_range(&mut self, sc_span: &Range, rendered_length: usize) { if self.span.start < sc_span.start { return; @@ -152,14 +199,11 @@ fn parse_shortcode_call(pair: Pair) -> (String, Value) { (name.unwrap(), Value::Object(args)) } -pub fn parse_for_shortcodes(content: &str) -> Result<(String, Vec)> { +pub fn parse_for_shortcodes( + content: &str, + invocation_counter: &mut ShortcodeInvocationCounter, +) -> Result<(String, Vec)> { let mut shortcodes = Vec::new(); - let mut nths = HashMap::new(); - let mut get_invocation_count = |name: &str| { - let nth = nths.entry(String::from(name)).or_insert(0); - *nth += 1; - *nth - }; let mut output = String::with_capacity(content.len()); let mut pairs = match ContentParser::parse(Rule::page, content) { @@ -207,13 +251,14 @@ pub fn parse_for_shortcodes(content: &str) -> Result<(String, Vec)> { Rule::inline_shortcode => { let start = output.len(); let (name, args) = parse_shortcode_call(p); - let nth = get_invocation_count(&name); + let nth = invocation_counter.get(&name); shortcodes.push(Shortcode { name, args, span: start..(start + SHORTCODE_PLACEHOLDER.len()), body: None, nth, + inner: Vec::new(), tera_name: String::new(), }); output.push_str(SHORTCODE_PLACEHOLDER); @@ -224,14 +269,18 @@ pub fn parse_for_shortcodes(content: &str) -> Result<(String, Vec)> { // 3 items in inner: call, body, end // we don't care about the closing tag let (name, args) = parse_shortcode_call(inner.next().unwrap()); - let body = inner.next().unwrap().as_span().as_str().trim(); - let nth = get_invocation_count(&name); + let nth = invocation_counter.get(&name); + let (body, inner) = parse_for_shortcodes( + inner.next().unwrap().as_span().as_str().trim(), + invocation_counter, + )?; shortcodes.push(Shortcode { name, args, span: start..(start + SHORTCODE_PLACEHOLDER.len()), - body: Some(body.to_string()), + body: Some(body), nth, + inner, tera_name: String::new(), }); output.push_str(SHORTCODE_PLACEHOLDER) @@ -374,6 +423,7 @@ mod tests { span: 10..20, body: None, nth: 0, + inner: Vec::new(), tera_name: String::new(), }; // 6 -> 10 in length so +4 on both sides of the range @@ -393,6 +443,7 @@ mod tests { span: 42..65, body: None, nth: 0, + inner: Vec::new(), tera_name: String::new(), }; sc.update_range(&(9..32), 3); @@ -403,6 +454,7 @@ mod tests { fn can_extract_basic_inline_shortcode_with_args() { let (out, shortcodes) = parse_for_shortcodes( "Inline shortcode: {{ hello(string='hey', int=1, float=2.1, bool=true, array=[true, false]) }} hey", + &mut ShortcodeInvocationCounter::new(), ) .unwrap(); assert_eq!(out, format!("Inline shortcode: {} hey", SHORTCODE_PLACEHOLDER)); @@ -423,8 +475,11 @@ mod tests { #[test] fn can_unignore_ignored_inline_shortcode() { - let (out, shortcodes) = - parse_for_shortcodes("Hello World {{/* youtube() */}} hey").unwrap(); + let (out, shortcodes) = parse_for_shortcodes( + "Hello World {{/* youtube() */}} hey", + &mut ShortcodeInvocationCounter::new(), + ) + .unwrap(); assert_eq!(out, "Hello World {{ youtube() }} hey"); assert_eq!(shortcodes.len(), 0); } @@ -433,6 +488,7 @@ mod tests { fn can_extract_shortcode_with_body() { let (out, shortcodes) = parse_for_shortcodes( "Body shortcode\n {% quote(author='Bobby', array=[[true]]) %}DROP TABLES;{% end %} \n hey", + &mut ShortcodeInvocationCounter::new() ) .unwrap(); assert_eq!(out, format!("Body shortcode\n {} \n hey", SHORTCODE_PLACEHOLDER)); @@ -451,9 +507,11 @@ mod tests { #[test] fn can_unignore_ignored_shortcode_with_body() { - let (out, shortcodes) = - parse_for_shortcodes("Hello World {%/* youtube() */%} Somebody {%/* end */%} hey") - .unwrap(); + let (out, shortcodes) = parse_for_shortcodes( + "Hello World {%/* youtube() */%} Somebody {%/* end */%} hey", + &mut ShortcodeInvocationCounter::new(), + ) + .unwrap(); assert_eq!(out, "Hello World {% youtube() %} Somebody {% end %} hey"); assert_eq!(shortcodes.len(), 0); } @@ -462,6 +520,7 @@ mod tests { fn can_extract_multiple_shortcodes_and_increment_nth() { let (out, shortcodes) = parse_for_shortcodes( "Hello World {% youtube() %} Somebody {% end %} {{ hello() }}\n {{hello()}}", + &mut ShortcodeInvocationCounter::new(), ) .unwrap(); assert_eq!( @@ -477,6 +536,21 @@ mod tests { assert_eq!(shortcodes[2].nth, 2); } + #[test] + fn can_extract_nested_shortcode_bodies_and_increment_nth() { + let (out, shortcodes) = parse_for_shortcodes( + "Hello World {% i_am_gonna_nest() %} Somebody {% i_am_gonna_nest() %} Somebody {% end %} {% end %}!!", + &mut ShortcodeInvocationCounter::new(), + ) + .unwrap(); + assert_eq!(out, format!("Hello World {}!!", SHORTCODE_PLACEHOLDER,)); + assert_eq!(shortcodes.len(), 1); + assert_eq!(shortcodes[0].inner.len(), 1); + assert_eq!(shortcodes[0].nth, 1); + assert_eq!(shortcodes[0].inner[0].nth, 2); + assert_eq!(shortcodes[0].body, Some(format!("Somebody {SHORTCODE_PLACEHOLDER}"))); + } + #[test] fn can_handle_multiple_shortcodes() { let (_, shortcodes) = parse_for_shortcodes( @@ -486,6 +560,7 @@ mod tests { {{ vimeo(id="210073083#hello", n_a_me="hello") }} {{ streamable(id="c0ic", n1=true) }} {{ gist(url="https://gist.github.com/Keats/32d26f699dcc13ebd41b") }}"#, + &mut ShortcodeInvocationCounter::new(), ) .unwrap(); assert_eq!(shortcodes.len(), 5); diff --git a/components/markdown/tests/common.rs b/components/markdown/tests/common.rs index aaef9784c..6cf6198f9 100644 --- a/components/markdown/tests/common.rs +++ b/components/markdown/tests/common.rs @@ -41,6 +41,8 @@ fn configurable_render( .unwrap(); tera.add_raw_template("shortcodes/a.html", "

a: {{ nth }}

").unwrap(); tera.add_raw_template("shortcodes/b.html", "

b: {{ nth }}

").unwrap(); + tera.add_raw_template("shortcodes/a_md.md", "**a: {{ nth }}**").unwrap(); + tera.add_raw_template("shortcodes/b_md.md", "**b: {{ nth }}**").unwrap(); tera.add_raw_template("shortcodes/quote.html", "{{body}}").unwrap(); tera.add_raw_template("shortcodes/pre.html", "
{{body}}
").unwrap(); tera.add_raw_template("shortcodes/four_spaces.html", " no highlight\n or there").unwrap(); @@ -51,6 +53,7 @@ fn configurable_render( ) .unwrap(); tera.add_raw_template("shortcodes/md_passthrough.md", "{{body}}").unwrap(); + tera.add_raw_template("shortcodes/nth.html", "{{ nth }}").unwrap(); let mut permalinks = HashMap::new(); permalinks.insert("pages/about.md".to_owned(), "https://getzola.org/about/".to_owned()); diff --git a/components/markdown/tests/shortcodes.rs b/components/markdown/tests/shortcodes.rs index 5b63811bc..6da64d64c 100644 --- a/components/markdown/tests/shortcodes.rs +++ b/components/markdown/tests/shortcodes.rs @@ -323,3 +323,77 @@ Here is {{ ex1(page="") }} example. .body; insta::assert_snapshot!(body); } + +#[test] +fn can_render_markdown_in_nested_shortcodes_with_bodies() { + let config = Config::default_for_test(); + let body = common::render_with_config( + r#" +# Begin level 0 + +{% render_md() %} + +## Begin level 1 + +{% render_md() %} + +### Begin level 2 + +{{ a_md() }}, {{ a_md() }}, {{ b_md() }}, {{ b_md() }} + +### End level 2 + +{% end %} + +## End level 1 + +{% end %} + +# End level 0 + "#, + config, + ) + .unwrap() + .body; + insta::assert_snapshot!(body); +} + +#[test] +fn can_render_nested_shortcodes_with_bodies_with_nth() { + let config = Config::default_for_test(); + let body = common::render_with_config( + r#" +{{ a_md() }} + +{{ a_md() }} + +{% render_md() %} + +{{ a_md() }} + +{{ a_md() }} + +{% render_md() %} + +{{ a_md() }} + +{{ a_md() }} + +{% end %} + +{{ a_md() }} + +{{ a_md() }} + +{% end %} + +{{ a_md() }} + +{{ a_md() }} + "#, + config, + ) + .unwrap() + .body; + insta::assert_snapshot!(body); +} diff --git a/components/markdown/tests/snapshots/shortcodes__can_render_markdown_in_nested_shortcodes_with_bodies.snap b/components/markdown/tests/snapshots/shortcodes__can_render_markdown_in_nested_shortcodes_with_bodies.snap new file mode 100644 index 000000000..0bc32408e --- /dev/null +++ b/components/markdown/tests/snapshots/shortcodes__can_render_markdown_in_nested_shortcodes_with_bodies.snap @@ -0,0 +1,13 @@ +--- +source: components/markdown/tests/shortcodes.rs +assertion_line: 358 +expression: body +--- +

Begin level 0

+

Begin level 1

+

Begin level 2

+

a: 1, a: 2, b: 1, b: 2

+

End level 2

+
+

End level 1

+

End level 0

diff --git a/components/markdown/tests/snapshots/shortcodes__can_render_nested_shortcodes_with_bodies_with_nth.snap b/components/markdown/tests/snapshots/shortcodes__can_render_nested_shortcodes_with_bodies_with_nth.snap new file mode 100644 index 000000000..8d258a84d --- /dev/null +++ b/components/markdown/tests/snapshots/shortcodes__can_render_nested_shortcodes_with_bodies_with_nth.snap @@ -0,0 +1,17 @@ +--- +source: components/markdown/tests/shortcodes.rs +assertion_line: 398 +expression: body +--- +

a: 1

+

a: 2

+

a: 3

+

a: 4

+

a: 5

+

a: 6

+
+

a: 7

+

a: 8

+
+

a: 9

+

a: 10

diff --git a/components/utils/src/templates.rs b/components/utils/src/templates.rs index a70780dcf..a6d54f0d5 100644 --- a/components/utils/src/templates.rs +++ b/components/utils/src/templates.rs @@ -34,6 +34,24 @@ impl ShortcodeDefinition { } } +#[derive(Debug, Default, Clone)] +pub struct ShortcodeInvocationCounter { + amounts: HashMap, +} +impl ShortcodeInvocationCounter { + pub fn new() -> Self { + Self::default() + } + pub fn get(&mut self, str: &str) -> usize { + let nth = self.amounts.entry(str.into()).or_insert(0); + *nth += 1; + return *nth; + } + pub fn reset(&mut self) { + self.amounts.clear(); + } +} + /// Fetches all the shortcodes from the Tera instances pub fn get_shortcodes(tera: &Tera) -> HashMap { let mut shortcode_definitions = HashMap::new();