diff --git a/src/attr.rs b/src/attr.rs index 4f4b2aac..886a419a 100644 --- a/src/attr.rs +++ b/src/attr.rs @@ -220,6 +220,85 @@ impl<'s> FromIterator<(&'s str, &'s str)> for Attributes<'s> { } } +pub enum StepResult { + /// Attributes are valid and completed. + Done, + /// Attributes are invalid. + Invalid, + /// Attributes are valid so far. + Valid, + /// Attributes are valid so far, more input is needed. + More, +} + +pub struct Builder<'s> { + input: &'s str, + chars: std::str::Chars<'s>, + attrs: Attributes<'s>, + parser: Parser, + checkpoint_events: usize, +} + +impl<'s> Builder<'s> { + pub fn new(input: &'s str) -> Self { + Self { + input, + chars: input.chars(), + attrs: Attributes::new(), + parser: Parser { + pos: 0, + pos_prev: 0, + state: State::Start, + }, + checkpoint_events: 0, + } + } + + pub fn restart(&mut self) { + self.parser.state = State::Start; + } + + pub fn set_input(&mut self, input: &'s str) { + debug_assert_eq!(self.chars.next(), None); + self.input = input; + self.chars = input.chars(); + self.parser.pos = 0; + self.parser.pos_prev = 0; + } + + pub fn step(&mut self) -> StepResult { + if let Some(c) = self.chars.next() { + if let Some((ev, sp)) = self.parser.step(c) { + self.attrs.push_event(ev, sp.of(self.input)); + } + match self.parser.state { + State::Done => { + if let Some(elems) = self.attrs.0.as_ref() { + self.checkpoint_events = elems.len(); + } + StepResult::Done + } + State::Invalid => StepResult::Invalid, + _ => StepResult::Valid, + } + } else { + StepResult::More + } + } + + pub fn len(&self) -> usize { + self.input.len() - self.chars.as_str().len() + } + + pub fn finish(mut self) -> Attributes<'s> { + if let Some(elems) = self.attrs.0.as_mut() { + debug_assert_ne!(elems.len(), 0); + elems.drain(self.checkpoint_events..); + } + self.attrs + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum State { Start, diff --git a/src/inline.rs b/src/inline.rs index 8f4853c0..9029d9f1 100644 --- a/src/inline.rs +++ b/src/inline.rs @@ -63,18 +63,21 @@ pub enum QuoteType { } #[derive(Clone, Debug, PartialEq, Eq)] -pub enum EventKind { +pub enum EventKind<'s> { Enter(Container), Exit(Container), Atom(Atom), Str, - Attributes { container: bool }, + Attributes { + container: bool, + attrs: attr::Attributes<'s>, + }, Placeholder, } #[derive(Clone, Debug, PartialEq, Eq)] -pub struct Event { - pub kind: EventKind, +pub struct Event<'s> { + pub kind: EventKind<'s>, pub span: Span, } @@ -84,6 +87,8 @@ pub struct Input<'s> { lexer: lex::Lexer<'s>, /// The block is complete, the final line has been provided. complete: bool, + /// Absolute byte position of end of the current line. + line_end: usize, /// Upcoming lines within the current block. ahead: std::collections::VecDeque, /// Span of current event. @@ -96,6 +101,7 @@ impl<'s> Input<'s> { src, lexer: lex::Lexer::new(""), complete: false, + line_end: 0, ahead: std::collections::VecDeque::new(), span: Span::empty_at(0), } @@ -118,6 +124,7 @@ impl<'s> Input<'s> { fn set_current_line(&mut self, line: Span) { self.lexer = lex::Lexer::new(line.of(self.src)); self.span = line.empty_before(); + self.line_end = line.end(); } fn reset(&mut self) { @@ -154,25 +161,6 @@ impl<'s> Input<'s> { self.span = self.span.empty_after(); } - fn ahead_attributes(&mut self) -> Option<(bool, Span)> { - let mut span = self.span.empty_after(); - let mut ahead = self.lexer.ahead().chars(); - let (mut attr_len, mut has_attr) = attr::valid(&mut ahead); - if attr_len > 0 { - while attr_len > 0 { - span = span.extend(attr_len); - self.lexer = lex::Lexer::new(ahead.as_str()); - - let (l, non_empty) = attr::valid(&mut ahead); - has_attr |= non_empty; - attr_len = l; - } - Some((has_attr, span)) - } else { - None - } - } - fn ahead_raw_format(&mut self) -> Option { if matches!( self.lexer.peek().map(|t| &t.kind), @@ -218,15 +206,23 @@ pub struct VerbatimState { non_whitespace_last: Option<(lex::Kind, usize)>, } +#[derive(Debug)] +pub enum AttributesElementType { + Container { e_placeholder: usize }, + Word, +} + pub struct Parser<'s> { input: Input<'s>, /// Stack with kind and index of _potential_ openers for containers. openers: Vec<(Opener, usize)>, /// Buffer queue for next events. Events are buffered until no modifications due to future /// characters are needed. - events: std::collections::VecDeque, + events: std::collections::VecDeque>, /// State if inside a verbatim container. verbatim: Option, + /// State if currently parsing potential attributes. + attributes: Option, } pub enum ControlFlow { @@ -245,6 +241,7 @@ impl<'s> Parser<'s> { openers: Vec::new(), events: std::collections::VecDeque::new(), verbatim: None, + attributes: None, } } @@ -256,24 +253,25 @@ impl<'s> Parser<'s> { debug_assert!(self.events.is_empty()); self.input.reset(); self.openers.clear(); - debug_assert!(self.events.is_empty()); + debug_assert!(self.attributes.is_none()); debug_assert!(self.verbatim.is_none()); } - fn push_sp(&mut self, kind: EventKind, span: Span) -> Option { + fn push_sp(&mut self, kind: EventKind<'s>, span: Span) -> Option { self.events.push_back(Event { kind, span }); Some(Continue) } - fn push(&mut self, kind: EventKind) -> Option { + fn push(&mut self, kind: EventKind<'s>) -> Option { self.push_sp(kind, self.input.span) } fn parse_event(&mut self) -> ControlFlow { self.input.reset_span(); + if let Some(first) = self.input.eat() { - self.parse_verbatim(&first) - .or_else(|| self.parse_attributes(&first)) + self.parse_attributes(&first) + .or_else(|| self.parse_verbatim(&first)) .or_else(|| self.parse_autolink(&first)) .or_else(|| self.parse_symbol(&first)) .or_else(|| self.parse_footnote_reference(&first)) @@ -307,15 +305,6 @@ impl<'s> Parser<'s> { self.events[event_opener].span = span_format; self.input.span = span_format.translate(1); span_closer = span_format; - } else if let Some((non_empty, span_attr)) = self.input.ahead_attributes() { - if non_empty { - let e_attr = event_opener - 1; - self.events[e_attr] = Event { - kind: EventKind::Attributes { container: true }, - span: span_attr, - }; - } - self.input.span = span_attr; }; let ty_opener = if let EventKind::Enter(ty) = self.events[event_opener].kind { ty @@ -327,6 +316,18 @@ impl<'s> Parser<'s> { self.events.drain(*event_skip..); } self.push_sp(EventKind::Exit(ty_opener), span_closer); + if raw_format.is_none() + && self.input.peek().map_or(false, |t| { + matches!(t.kind, lex::Kind::Open(Delimiter::Brace)) + }) + { + self.ahead_attributes( + AttributesElementType::Container { + e_placeholder: event_opener - 1, + }, + false, + ); + } self.verbatim = None; } else { // continue verbatim @@ -373,41 +374,104 @@ impl<'s> Parser<'s> { non_whitespace_encountered: false, non_whitespace_last: None, }); + self.attributes = None; self.push(EventKind::Enter(Verbatim(ty))) } } fn parse_attributes(&mut self, first: &lex::Token) -> Option { - if first.kind == lex::Kind::Open(Delimiter::Brace) { - let mut ahead = self.input.lexer.ahead().chars(); - let (mut attr_len, mut has_attr) = attr::valid(std::iter::once('{').chain(&mut ahead)); - attr_len = attr_len.saturating_sub(1); // rm { - if attr_len > 0 { - while attr_len > 0 { - self.input.span = self.input.span.extend(attr_len); - self.input.lexer = lex::Lexer::new(ahead.as_str()); - - let (l, non_empty) = attr::valid(&mut ahead); - attr_len = l; - has_attr |= non_empty; + if let Some(elem_ty) = self.attributes.take() { + self.ahead_attributes(elem_ty, true) + } else if matches!(first.kind, lex::Kind::Open(Delimiter::Brace)) { + self.ahead_attributes(AttributesElementType::Word, true) + } else { + None + } + } + + fn ahead_attributes( + &mut self, + elem_ty: AttributesElementType, + opener_eaten: bool, + ) -> Option { + let start_attr = self.input.span.end() - usize::from(opener_eaten); + debug_assert!(self.input.src[start_attr..].starts_with('{')); + + let mut line_start = start_attr; + let mut line_end = self.input.line_end; + let mut line = 0; + let mut end_attr = start_attr; + let mut builder = attr::Builder::new(&self.input.src[start_attr..line_end]); + + loop { + match builder.step() { + attr::StepResult::Done => { + end_attr = line_start + builder.len(); + if self.input.src[end_attr..].starts_with('{') { + builder.restart(); + } else { + break; + } + } + attr::StepResult::Invalid => break, + attr::StepResult::Valid => {} + attr::StepResult::More => { + if let Some(l) = self.input.ahead.get(line) { + line += 1; + line_start = l.start(); + line_end = l.end(); + builder.set_input(l.of(self.input.src)); + } else if self.input.complete { + break; + } else { + self.attributes = Some(elem_ty); + if opener_eaten { + self.input.span = Span::empty_at(start_attr); + self.input.lexer = + lex::Lexer::new(&self.input.src[start_attr..self.input.line_end]); + } + return Some(More); + } } + } + } - let set_attr = has_attr - && self - .events - .back() - .map_or(false, |e| e.kind == EventKind::Str); + if start_attr == end_attr { + return None; + } - if set_attr { - self.push(EventKind::Attributes { container: false }); - } else { - self.push_sp(EventKind::Placeholder, self.input.span.empty_before()); + for _ in 0..line { + let l = self.input.ahead.pop_front().unwrap(); + self.input.set_current_line(l); + } + self.input.span = Span::new(start_attr, end_attr); + self.input.lexer = lex::Lexer::new(&self.input.src[end_attr..line_end]); + + let attrs = builder.finish(); + if !attrs.is_empty() { + let attr_event = Event { + kind: EventKind::Attributes { + container: matches!(elem_ty, AttributesElementType::Container { .. }), + attrs, + }, + span: self.input.span, + }; + match elem_ty { + AttributesElementType::Container { e_placeholder } => { + self.events[e_placeholder] = attr_event; + if matches!(self.events[e_placeholder + 1].kind, EventKind::Str) { + self.events[e_placeholder + 1].kind = EventKind::Enter(Span); + let last = self.events.len() - 1; + self.events[last].kind = EventKind::Exit(Span); + } + } + AttributesElementType::Word => { + self.events.push_back(attr_event); } - return Some(Continue); } } - None + Some(Continue) } fn parse_autolink(&mut self, first: &lex::Token) -> Option { @@ -540,7 +604,7 @@ impl<'s> Parser<'s> { } self.openers.drain(o..); - let mut closed = match DelimEventKind::from(opener) { + let closed = match DelimEventKind::from(opener) { DelimEventKind::Container(cont) => { self.events[e_opener].kind = EventKind::Enter(cont); self.push(EventKind::Exit(cont)) @@ -565,8 +629,9 @@ impl<'s> Parser<'s> { self.input.reset_span(); self.input.eat(); // [ or ( return self.push(EventKind::Str); - }; - None + } else { + self.push(EventKind::Str) // ] + } } DelimEventKind::Link { event_span, @@ -600,23 +665,18 @@ impl<'s> Parser<'s> { } }; - if let Some((non_empty, span)) = self.input.ahead_attributes() { - if non_empty { - self.events[e_attr] = Event { - kind: EventKind::Attributes { container: true }, - span, - }; - } - - if closed.is_none() { - self.events[e_opener].kind = EventKind::Enter(Container::Span); - closed = self.push(EventKind::Exit(Container::Span)); - } - - self.input.span = span; + if self.input.peek().map_or(false, |t| { + matches!(t.kind, lex::Kind::Open(Delimiter::Brace)) + }) { + self.ahead_attributes( + AttributesElementType::Container { + e_placeholder: e_attr, + }, + false, + ) + } else { + closed } - - closed }) .or_else(|| { let opener = Opener::from_token(first.kind)?; @@ -725,7 +785,7 @@ impl<'s> Parser<'s> { self.push(EventKind::Atom(atom)) } - fn merge_str_events(&mut self, span_str: Span) -> Event { + fn merge_str_events(&mut self, span_str: Span) -> Event<'s> { let mut span = span_str; let should_merge = |e: &Event, span: Span| { matches!(e.kind, EventKind::Str | EventKind::Placeholder) @@ -738,7 +798,10 @@ impl<'s> Parser<'s> { if matches!( self.events.front().map(|ev| &ev.kind), - Some(EventKind::Attributes { container: false }) + Some(EventKind::Attributes { + container: false, + .. + }) ) { self.apply_word_attributes(span) } else { @@ -749,7 +812,7 @@ impl<'s> Parser<'s> { } } - fn apply_word_attributes(&mut self, span_str: Span) -> Event { + fn apply_word_attributes(&mut self, span_str: Span) -> Event<'s> { if let Some(i) = self .input .content(span_str) @@ -925,12 +988,13 @@ impl From for DelimEventKind { } impl<'s> Iterator for Parser<'s> { - type Item = Event; + type Item = Event<'s>; fn next(&mut self) -> Option { while self.events.is_empty() || !self.openers.is_empty() || self.verbatim.is_some() + || self.attributes.is_some() || self // for merge or attributes .events .back() @@ -956,7 +1020,10 @@ impl<'s> Iterator for Parser<'s> { self.events.pop_front().and_then(|e| match e.kind { EventKind::Str if e.span.is_empty() => self.next(), EventKind::Str => Some(self.merge_str_events(e.span)), - EventKind::Placeholder | EventKind::Attributes { container: false } => self.next(), + EventKind::Placeholder + | EventKind::Attributes { + container: false, .. + } => self.next(), _ => Some(e), }) } @@ -1039,7 +1106,13 @@ mod test { fn verbatim_attr() { test_parse!( "`raw`{#id}", - (Attributes { container: true }, "{#id}"), + ( + Attributes { + container: true, + attrs: [("id", "id")].into_iter().collect() + }, + "{#id}" + ), (Enter(Verbatim(VerbatimType::Verbatim)), "`"), (Str, "raw"), (Exit(Verbatim(VerbatimType::Verbatim)), "`"), @@ -1223,7 +1296,13 @@ mod test { fn span_url_attr_unclosed() { test_parse!( "[text]({.cls}", - (Attributes { container: false }, "{.cls}"), + ( + Attributes { + container: false, + attrs: [("class", "cls")].into_iter().collect(), + }, + "{.cls}" + ), (Enter(Span), ""), (Str, "[text]("), (Exit(Span), ""), @@ -1266,7 +1345,13 @@ mod test { fn span_attr() { test_parse!( "[abc]{.def}", - (Attributes { container: true }, "{.def}"), + ( + Attributes { + container: true, + attrs: [("class", "def")].into_iter().collect(), + }, + "{.def}" + ), (Enter(Span), "["), (Str, "abc"), (Exit(Span), "]"), @@ -1274,6 +1359,23 @@ mod test { test_parse!("not a [span] {#id}.", (Str, "not a [span] "), (Str, ".")); } + #[test] + fn span_attr_cont() { + test_parse!( + "[x_y]{.bar_}", + ( + Attributes { + container: true, + attrs: [("class", "bar_")].into_iter().collect(), + }, + "{.bar_}" + ), + (Enter(Span), "["), + (Str, "x_y"), + (Exit(Span), "]"), + ); + } + #[test] fn autolink() { test_parse!( @@ -1371,7 +1473,13 @@ mod test { fn container_attr() { test_parse!( "_abc def_{.attr}", - (Attributes { container: true }, "{.attr}"), + ( + Attributes { + container: true, + attrs: [("class", "attr")].into_iter().collect(), + }, + "{.attr}" + ), (Enter(Emphasis), "_"), (Str, "abc def"), (Exit(Emphasis), "_"), @@ -1399,7 +1507,15 @@ mod test { fn container_attr_multiple() { test_parse!( "_abc def_{.a}{.b}{.c} {.d}", - (Attributes { container: true }, "{.a}{.b}{.c}"), + ( + Attributes { + container: true, + attrs: [("class", "a"), ("class", "b"), ("class", "c")] + .into_iter() + .collect(), + }, + "{.a}{.b}{.c}" + ), (Enter(Emphasis), "_"), (Str, "abc def"), (Exit(Emphasis), "_"), @@ -1411,7 +1527,13 @@ mod test { fn attr() { test_parse!( "word{a=b}", - (Attributes { container: false }, "{a=b}"), + ( + Attributes { + container: false, + attrs: [("a", "b")].into_iter().collect() + }, + "{a=b}" + ), (Enter(Span), ""), (Str, "word"), (Exit(Span), ""), @@ -1419,7 +1541,13 @@ mod test { test_parse!( "some word{.a}{.b} with attrs", (Str, "some "), - (Attributes { container: false }, "{.a}{.b}"), + ( + Attributes { + container: false, + attrs: [("class", "a"), ("class", "b")].into_iter().collect(), + }, + "{.a}{.b}" + ), (Enter(Span), ""), (Str, "word"), (Exit(Span), ""), @@ -1432,6 +1560,7 @@ mod test { test_parse!("word {%comment%}", (Str, "word ")); test_parse!("word {%comment%} word", (Str, "word "), (Str, " word")); test_parse!("word {a=b}", (Str, "word ")); + test_parse!("word {.d}", (Str, "word ")); } #[test] diff --git a/src/lex.rs b/src/lex.rs index 6efee3da..d56fe1a9 100644 --- a/src/lex.rs +++ b/src/lex.rs @@ -103,9 +103,11 @@ impl<'s> Lexer<'s> { } pub fn ahead(&self) -> &'s str { - let pos = - self.src.len() - self.chars.as_str().len() - self.next.as_ref().map_or(0, |t| t.len); - &self.src[pos..] + &self.src[self.pos()..] + } + + fn pos(&self) -> usize { + self.src.len() - self.chars.as_str().len() - self.next.as_ref().map_or(0, |t| t.len) } fn next_token(&mut self) -> Option { diff --git a/src/lib.rs b/src/lib.rs index 45e0e129..38509490 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -674,23 +674,15 @@ impl<'s> Parser<'s> { } fn inline(&mut self) -> Option> { - let mut inline = self.inline_parser.next(); - - inline.as_ref()?; - - let mut first_is_attr = false; - let mut attributes = inline.as_ref().map_or_else(Attributes::new, |inl| { - if let inline::EventKind::Attributes { .. } = inl.kind { - first_is_attr = true; - attr::parse(inl.span.of(self.src)) - } else { - Attributes::new() - } - }); - - if first_is_attr { - inline = self.inline_parser.next(); - } + let next = self.inline_parser.next()?; + + let (inline, mut attributes) = match next { + inline::Event { + kind: inline::EventKind::Attributes { attrs, .. }, + .. + } => (self.inline_parser.next(), attrs), + inline => (Some(inline), Attributes::new()), + }; inline.map(|inline| match inline.kind { inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => { @@ -1566,7 +1558,6 @@ mod test { ); } - #[ignore = "multiline attributes broken"] #[test] fn attr_multiline() { test_parse!( @@ -1582,6 +1573,53 @@ mod test { End(Paragraph), End(Blockquote), ); + test_parse!( + concat!( + "> a{\n", // + "> %%\n", // + "> a=a}\n", // + ), + Start(Blockquote, Attributes::new()), + Start(Paragraph, Attributes::new()), + Start(Span, [("a", "a")].into_iter().collect()), + Str("a".into()), + End(Span), + End(Paragraph), + End(Blockquote), + ); + } + + #[test] + fn attr_multiline_unclosed() { + test_parse!( + concat!( + "a{\n", // + " b\n", // + ), + Start(Paragraph, Attributes::new()), + Str("a{".into()), + Softbreak, + Str("b".into()), + End(Paragraph), + ); + } + + #[test] + fn attr_multiline_invalid() { + test_parse!( + concat!( + "a{a=b\n", // + " b\n", // + "}", // + ), + Start(Paragraph, Attributes::new()), + Str("a{a=b".into()), + Softbreak, + Str("b".into()), + Softbreak, + Str("}".into()), + End(Paragraph), + ); } #[test]