diff --git a/src/attr.rs b/src/attr.rs index 5fa19d1e..2802eda8 100644 --- a/src/attr.rs +++ b/src/attr.rs @@ -44,6 +44,18 @@ impl<'s> AttributeValue<'s> { pub fn parts(&'s self) -> AttributeValueParts<'s> { AttributeValueParts { ahead: &self.raw } } + + fn extend(&mut self, s: &'s str) { + match &mut self.raw { + CowStr::Borrowed(prev) => { + self.raw = format!("{} {}", prev, s).into(); + } + CowStr::Owned(ref mut prev) => { + prev.push(' '); + prev.push_str(s); + } + } + } } impl<'s> From<&'s str> for AttributeValue<'s> { @@ -233,6 +245,18 @@ impl<'a, 's> Parser<'a, 's> { } } + pub fn restart(&mut self) { + self.state = State::Start; + } + + pub fn set_input(&mut self, input: &'s str) { + debug_assert_eq!(self.chars.next(), None); + self.input = input; + self.chars = input.chars(); + self.pos = 0; + self.pos_prev = 0; + } + pub fn step(&mut self) -> StepResult { self.chars.next().map_or(StepResult::More, |c| { use State::*; @@ -272,12 +296,14 @@ impl<'a, 's> Parser<'a, 's> { Some((Element::Key, sp)) => self.span_key = Some(sp), Some((Element::Value { continuation }, sp)) => { if continuation { - self.attrs.0.as_mut().unwrap().last_mut().unwrap().1 = format!( - "{} {}", - self.attrs.0.as_ref().unwrap().last().unwrap().1, - sp.of(self.input), - ) - .into(); + self.attrs + .0 + .as_mut() + .unwrap() + .last_mut() + .unwrap() + .1 + .extend(sp.of(self.input)); } else { self.attrs.insert( self.span_key.take().unwrap().of(self.input), @@ -295,6 +321,10 @@ impl<'a, 's> Parser<'a, 's> { } }) } + + pub fn len(&self) -> usize { + self.input.len() - self.chars.as_str().len() + } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/src/inline.rs b/src/inline.rs index 81f63591..af721a2c 100644 --- a/src/inline.rs +++ b/src/inline.rs @@ -60,7 +60,10 @@ pub enum EventKind<'s> { Exit(Container<'s>), Atom(Atom), Str, - Attributes { container: bool }, + Attributes { + container: bool, + attrs: attr::Attributes<'s>, + }, Placeholder, } @@ -147,25 +150,6 @@ impl<'s> Input<'s> { self.span = self.span.empty_after(); } - fn ahead_attributes(&mut self) -> Option<(bool, Span)> { - let mut span = self.span.empty_after(); - let mut ahead = self.lexer.ahead().chars(); - let (mut attr_len, mut has_attr) = attr::valid(&mut ahead); - if attr_len > 0 { - while attr_len > 0 { - span = span.extend(attr_len); - self.lexer = lex::Lexer::new(ahead.as_str()); - - let (l, non_empty) = attr::valid(&mut ahead); - has_attr |= non_empty; - attr_len = l; - } - Some((has_attr, span)) - } else { - None - } - } - fn ahead_raw_format(&mut self) -> Option { if matches!( self.lexer.peek().map(|t| &t.kind), @@ -212,6 +196,12 @@ pub struct VerbatimState { non_whitespace_last: Option<(lex::Kind, usize)>, } +#[derive(Clone)] +pub enum AttributesElementType { + Container { e_placeholder: usize }, + Word, +} + #[derive(Clone)] pub struct Parser<'s> { input: Input<'s>, @@ -222,6 +212,8 @@ pub struct Parser<'s> { events: std::collections::VecDeque>, /// State if inside a verbatim container. verbatim: Option, + /// State if currently parsing potential attributes. + attributes: Option, } pub enum ControlFlow { @@ -240,6 +232,7 @@ impl<'s> Parser<'s> { openers: Vec::new(), events: std::collections::VecDeque::new(), verbatim: None, + attributes: None, } } @@ -251,7 +244,7 @@ impl<'s> Parser<'s> { debug_assert!(self.events.is_empty()); self.input.reset(); self.openers.clear(); - debug_assert!(self.events.is_empty()); + debug_assert!(self.attributes.is_none()); debug_assert!(self.verbatim.is_none()); } @@ -266,10 +259,11 @@ impl<'s> Parser<'s> { fn parse_event(&mut self) -> ControlFlow { self.input.reset_span(); + if let Some(first) = self.input.eat() { let ctrl = self - .parse_verbatim(&first) - .or_else(|| self.parse_attributes(&first)) + .parse_attributes(&first) + .or_else(|| self.parse_verbatim(&first)) .or_else(|| self.parse_autolink(&first)) .or_else(|| self.parse_symbol(&first)) .or_else(|| self.parse_footnote_reference(&first)) @@ -309,15 +303,6 @@ impl<'s> Parser<'s> { self.events[event_opener].span = span_format; self.input.span = span_format.translate(1); span_closer = span_format; - } else if let Some((non_empty, span_attr)) = self.input.ahead_attributes() { - if non_empty { - let e_attr = event_opener - 1; - self.events[e_attr] = Event { - kind: EventKind::Attributes { container: true }, - span: span_attr, - }; - } - self.input.span = span_attr; }; let ty_opener = if let EventKind::Enter(ty) = &self.events[event_opener].kind { debug_assert!(matches!( @@ -333,6 +318,18 @@ impl<'s> Parser<'s> { self.events.drain(*event_skip..); } self.push_sp(EventKind::Exit(ty_opener), span_closer); + if raw_format.is_none() + && self.input.peek().map_or(false, |t| { + matches!(t.kind, lex::Kind::Open(Delimiter::Brace)) + }) + { + self.ahead_attributes( + AttributesElementType::Container { + e_placeholder: event_opener - 1, + }, + false, + ); + } self.verbatim = None; } else { // continue verbatim @@ -380,41 +377,105 @@ impl<'s> Parser<'s> { non_whitespace_encountered: false, non_whitespace_last: None, }); + self.attributes = None; self.push(EventKind::Enter(ty)) } } fn parse_attributes(&mut self, first: &lex::Token) -> Option { - if first.kind == lex::Kind::Open(Delimiter::Brace) { - let mut ahead = self.input.lexer.ahead().chars(); - let (mut attr_len, mut has_attr) = attr::valid(std::iter::once('{').chain(&mut ahead)); - attr_len = attr_len.saturating_sub(1); // rm { - if attr_len > 0 { - while attr_len > 0 { - self.input.span = self.input.span.extend(attr_len); - self.input.lexer = lex::Lexer::new(ahead.as_str()); - - let (l, non_empty) = attr::valid(&mut ahead); - attr_len = l; - has_attr |= non_empty; + if let Some(elem_ty) = self.attributes.take() { + self.ahead_attributes(elem_ty, true) + } else if matches!(first.kind, lex::Kind::Open(Delimiter::Brace)) { + self.ahead_attributes(AttributesElementType::Word, true) + } else { + None + } + } + + fn ahead_attributes( + &mut self, + elem_ty: AttributesElementType, + opener_eaten: bool, + ) -> Option { + let start_attr = self.input.span.end() - usize::from(opener_eaten); + debug_assert!(self.input.src[start_attr..].starts_with('{')); + + let mut line_start = start_attr; + let mut line_end = self.input.span_line.end(); + let mut line = 0; + let mut end_attr = start_attr; + let mut attrs = attr::Attributes::new(); + let mut parser = attr::Parser::new(&mut attrs, &self.input.src[start_attr..line_end]); + + loop { + match parser.step() { + attr::StepResult::Done => { + end_attr = line_start + parser.len(); + if self.input.src[end_attr..].starts_with('{') { + parser.restart(); + } else { + break; + } } + attr::StepResult::Invalid => break, + attr::StepResult::Valid => {} + attr::StepResult::More => { + if let Some(l) = self.input.ahead.get(line) { + line += 1; + line_start = l.start(); + line_end = l.end(); + parser.set_input(l.of(self.input.src)); + } else if self.input.last() { + break; + } else { + self.attributes = Some(elem_ty); + if opener_eaten { + self.input.span = Span::empty_at(start_attr); + self.input.lexer = lex::Lexer::new( + &self.input.src[start_attr..self.input.span_line.end()], + ); + } + return Some(More); + } + } + } + } - let set_attr = has_attr - && self - .events - .back() - .map_or(false, |e| e.kind == EventKind::Str); + if start_attr == end_attr { + return None; + } - if set_attr { - self.push(EventKind::Attributes { container: false }); - } else { - self.push_sp(EventKind::Placeholder, self.input.span.empty_before()); + for _ in 0..line { + let l = self.input.ahead.pop_front().unwrap(); + self.input.set_current_line(l); + } + self.input.span = Span::new(start_attr, end_attr); + self.input.lexer = lex::Lexer::new(&self.input.src[end_attr..line_end]); + + if !attrs.is_empty() { + let attr_event = Event { + kind: EventKind::Attributes { + container: matches!(elem_ty, AttributesElementType::Container { .. }), + attrs, + }, + span: self.input.span, + }; + match elem_ty { + AttributesElementType::Container { e_placeholder } => { + self.events[e_placeholder] = attr_event; + if matches!(self.events[e_placeholder + 1].kind, EventKind::Str) { + self.events[e_placeholder + 1].kind = EventKind::Enter(Span); + let last = self.events.len() - 1; + self.events[last].kind = EventKind::Exit(Span); + } + } + AttributesElementType::Word => { + self.events.push_back(attr_event); } - return Some(Continue); } } - None + Some(Continue) } fn parse_autolink(&mut self, first: &lex::Token) -> Option { @@ -547,7 +608,7 @@ impl<'s> Parser<'s> { } self.openers.drain(o..); - let mut closed = match DelimEventKind::from(opener) { + let closed = match DelimEventKind::from(opener) { DelimEventKind::Container(cont) => { self.events[e_opener].kind = EventKind::Enter(cont.clone()); self.push(EventKind::Exit(cont)) @@ -572,8 +633,9 @@ impl<'s> Parser<'s> { self.input.reset_span(); self.input.eat(); // [ or ( return self.push(EventKind::Str); - }; - None + } else { + self.push(EventKind::Str) // ] + } } DelimEventKind::Link { event_span, @@ -650,23 +712,18 @@ impl<'s> Parser<'s> { } }; - if let Some((non_empty, span)) = self.input.ahead_attributes() { - if non_empty { - self.events[e_attr] = Event { - kind: EventKind::Attributes { container: true }, - span, - }; - } - - if closed.is_none() { - self.events[e_opener].kind = EventKind::Enter(Container::Span); - closed = self.push(EventKind::Exit(Container::Span)); - } - - self.input.span = span; + if self.input.peek().map_or(false, |t| { + matches!(t.kind, lex::Kind::Open(Delimiter::Brace)) + }) { + self.ahead_attributes( + AttributesElementType::Container { + e_placeholder: e_attr, + }, + false, + ) + } else { + closed } - - closed }) .or_else(|| { let opener = Opener::from_token(first.kind)?; @@ -788,7 +845,10 @@ impl<'s> Parser<'s> { if matches!( self.events.front().map(|ev| &ev.kind), - Some(EventKind::Attributes { container: false }) + Some(EventKind::Attributes { + container: false, + .. + }) ) { self.apply_word_attributes(span) } else { @@ -981,6 +1041,7 @@ impl<'s> Iterator for Parser<'s> { while self.events.is_empty() || !self.openers.is_empty() || self.verbatim.is_some() + || self.attributes.is_some() || self // for merge or attributes .events .back() @@ -1010,7 +1071,10 @@ impl<'s> Iterator for Parser<'s> { self.events.pop_front().and_then(|e| match e.kind { EventKind::Str if e.span.is_empty() => self.next(), EventKind::Str => Some(self.merge_str_events(e.span)), - EventKind::Placeholder | EventKind::Attributes { container: false } => self.next(), + EventKind::Placeholder + | EventKind::Attributes { + container: false, .. + } => self.next(), _ => Some(e), }) } @@ -1094,7 +1158,13 @@ mod test { test_parse!( "pre `raw`{#id} post", (Str, "pre "), - (Attributes { container: true }, "{#id}"), + ( + Attributes { + container: true, + attrs: [("id", "id")].into_iter().collect() + }, + "{#id}" + ), (Enter(Verbatim), "`"), (Str, "raw"), (Exit(Verbatim), "`"), @@ -1285,7 +1355,13 @@ mod test { fn span_url_attr_unclosed() { test_parse!( "[text]({.cls}", - (Attributes { container: false }, "{.cls}"), + ( + Attributes { + container: false, + attrs: [("class", "cls")].into_iter().collect(), + }, + "{.cls}" + ), (Enter(Span), ""), (Str, "[text]("), (Exit(Span), ""), @@ -1328,7 +1404,13 @@ mod test { fn span_attr() { test_parse!( "[abc]{.def}", - (Attributes { container: true }, "{.def}"), + ( + Attributes { + container: true, + attrs: [("class", "def")].into_iter().collect(), + }, + "{.def}" + ), (Enter(Span), "["), (Str, "abc"), (Exit(Span), "]"), @@ -1336,6 +1418,23 @@ mod test { test_parse!("not a [span] {#id}.", (Str, "not a [span] "), (Str, ".")); } + #[test] + fn span_attr_cont() { + test_parse!( + "[x_y]{.bar_}", + ( + Attributes { + container: true, + attrs: [("class", "bar_")].into_iter().collect(), + }, + "{.bar_}" + ), + (Enter(Span), "["), + (Str, "x_y"), + (Exit(Span), "]"), + ); + } + #[test] fn autolink() { test_parse!( @@ -1433,7 +1532,13 @@ mod test { fn container_attr() { test_parse!( "_abc def_{.attr}", - (Attributes { container: true }, "{.attr}"), + ( + Attributes { + container: true, + attrs: [("class", "attr")].into_iter().collect(), + }, + "{.attr}" + ), (Enter(Emphasis), "_"), (Str, "abc def"), (Exit(Emphasis), "_"), @@ -1461,7 +1566,13 @@ mod test { fn container_attr_multiple() { test_parse!( "_abc def_{.a}{.b}{.c} {.d}", - (Attributes { container: true }, "{.a}{.b}{.c}"), + ( + Attributes { + container: true, + attrs: [("class", "a b c")].into_iter().collect(), + }, + "{.a}{.b}{.c}" + ), (Enter(Emphasis), "_"), (Str, "abc def"), (Exit(Emphasis), "_"), @@ -1473,7 +1584,13 @@ mod test { fn attr() { test_parse!( "word{a=b}", - (Attributes { container: false }, "{a=b}"), + ( + Attributes { + container: false, + attrs: [("a", "b")].into_iter().collect() + }, + "{a=b}" + ), (Enter(Span), ""), (Str, "word"), (Exit(Span), ""), @@ -1481,7 +1598,13 @@ mod test { test_parse!( "some word{.a}{.b} with attrs", (Str, "some "), - (Attributes { container: false }, "{.a}{.b}"), + ( + Attributes { + container: false, + attrs: [("class", "a b")].into_iter().collect(), + }, + "{.a}{.b}" + ), (Enter(Span), ""), (Str, "word"), (Exit(Span), ""), @@ -1494,6 +1617,7 @@ mod test { test_parse!("word {%comment%}", (Str, "word ")); test_parse!("word {%comment%} word", (Str, "word "), (Str, " word")); test_parse!("word {a=b}", (Str, "word ")); + test_parse!("word {.d}", (Str, "word ")); } #[test] diff --git a/src/lex.rs b/src/lex.rs index 6efee3da..d56fe1a9 100644 --- a/src/lex.rs +++ b/src/lex.rs @@ -103,9 +103,11 @@ impl<'s> Lexer<'s> { } pub fn ahead(&self) -> &'s str { - let pos = - self.src.len() - self.chars.as_str().len() - self.next.as_ref().map_or(0, |t| t.len); - &self.src[pos..] + &self.src[self.pos()..] + } + + fn pos(&self) -> usize { + self.src.len() - self.chars.as_str().len() - self.next.as_ref().map_or(0, |t| t.len) } fn next_token(&mut self) -> Option { diff --git a/src/lib.rs b/src/lib.rs index 98551677..bbc56380 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -677,23 +677,15 @@ impl<'s> Parser<'s> { } fn inline(&mut self) -> Option> { - let mut inline = self.inline_parser.next(); - - inline.as_ref()?; - - let mut first_is_attr = false; - let mut attributes = inline.as_ref().map_or_else(Attributes::new, |inl| { - if let inline::EventKind::Attributes { .. } = inl.kind { - first_is_attr = true; - attr::parse(inl.span.of(self.src)) - } else { - Attributes::new() - } - }); - - if first_is_attr { - inline = self.inline_parser.next(); - } + let next = self.inline_parser.next()?; + + let (inline, mut attributes) = match next { + inline::Event { + kind: inline::EventKind::Attributes { attrs, .. }, + .. + } => (self.inline_parser.next(), attrs), + inline => (Some(inline), Attributes::new()), + }; inline.map(|inline| { let enter = matches!(inline.kind, inline::EventKind::Enter(_)); @@ -1561,7 +1553,6 @@ mod test { ); } - #[ignore = "multiline attributes broken"] #[test] fn attr_multiline() { test_parse!( @@ -1577,6 +1568,53 @@ mod test { End(Paragraph), End(Blockquote), ); + test_parse!( + concat!( + "> a{\n", // + "> %%\n", // + "> a=a}\n", // + ), + Start(Blockquote, Attributes::new()), + Start(Paragraph, Attributes::new()), + Start(Span, [("a", "a")].into_iter().collect()), + Str("a".into()), + End(Span), + End(Paragraph), + End(Blockquote), + ); + } + + #[test] + fn attr_multiline_unclosed() { + test_parse!( + concat!( + "a{\n", // + " b\n", // + ), + Start(Paragraph, Attributes::new()), + Str("a{".into()), + Softbreak, + Str("b".into()), + End(Paragraph), + ); + } + + #[test] + fn attr_multiline_invalid() { + test_parse!( + concat!( + "a{a=b\n", // + " b\n", // + "}", // + ), + Start(Paragraph, Attributes::new()), + Str("a{a=b".into()), + Softbreak, + Str("b".into()), + Softbreak, + Str("}".into()), + End(Paragraph), + ); } #[test] diff --git a/tests/suite/skip b/tests/suite/skip index 0a65d2ee..b393caa1 100644 --- a/tests/suite/skip +++ b/tests/suite/skip @@ -1,6 +1,5 @@ 38d85f9:multi-line block attributes 6c14561:multi-line block attributes -613a9d6:attribute container precedence f4f22fc:attribute key class order ae6fc15:bugged left/right quote 168469a:bugged left/right quote