diff --git a/parser/src/code_block.rs b/parser/src/code_block.rs deleted file mode 100644 index d9732bcda..000000000 --- a/parser/src/code_block.rs +++ /dev/null @@ -1,165 +0,0 @@ -use pulldown_cmark::{Event, Parser, Tag}; -use std::ops::Range; - -#[derive(Debug)] -pub struct ColorCodeBlocks { - code: Vec>, -} - -impl ColorCodeBlocks { - pub fn new(s: &str) -> ColorCodeBlocks { - let mut code = Vec::new(); - let mut parser = Parser::new(s).into_offset_iter(); - while let Some((event, range)) = parser.next() { - if let Event::Start(Tag::CodeBlock(_)) = event { - let start = range.start; - while let Some((event, range)) = parser.next() { - if let Event::End(Tag::CodeBlock(_)) = event { - code.push(start..range.end); - break; - } - } - } else if let Event::Code(_) = event { - code.push(range); - } - } - - ColorCodeBlocks { code } - } - - pub fn overlaps_code(&self, region: Range) -> Option> { - for code in &self.code { - // See https://stackoverflow.com/questions/3269434. - if code.start <= region.end && region.start <= code.end { - return Some(code.clone()); - } - } - None - } -} - -#[cfg(test)] -#[derive(Debug, PartialEq, Eq)] -enum Code<'a> { - Yes(&'a str), - No(&'a str), -} - -#[cfg(test)] -fn bodies(s: &str) -> Vec> { - let mut bodies = Vec::new(); - let cbs = ColorCodeBlocks::new(s); - let mut previous = 0..0; - for range in &cbs.code { - let range = range.clone(); - if previous.end != range.start { - bodies.push(Code::No(&s[previous.end..range.start])); - } - bodies.push(Code::Yes(&s[range.clone()])); - previous = range.clone(); - } - if let Some(range) = cbs.code.last() { - if range.end != s.len() { - bodies.push(Code::No(&s[range.end..])); - } - } - bodies -} - -#[test] -fn cbs_1() { - assert_eq!( - bodies("`hey you`bar me too"), - [Code::Yes("`hey you`"), Code::No("bar me too")] - ); -} - -#[test] -fn cbs_2() { - assert_eq!( - bodies("`hey you` me too"), - [Code::Yes("`hey you`"), Code::No(" me too")] - ); -} - -#[test] -fn cbs_3() { - assert_eq!( - bodies(r"`hey you\` `me too"), - [Code::Yes(r"`hey you\`"), Code::No(" `me too")] - ); -} - -#[test] -fn cbs_4() { - assert_eq!( - bodies( - " -```language_spec -testing -``` - -nope -" - ), - [ - Code::No("\n"), - Code::Yes("```language_spec\ntesting\n```"), - Code::No("\n\nnope\n") - ], - ); -} - -#[test] -fn cbs_5() { - assert_eq!( - bodies(concat!( - " -``` tag_after_space -testing -```", - " " - )), - [ - Code::No("\n"), - Code::Yes("``` tag_after_space\ntesting\n``` "), - ], - ); -} - -#[test] -fn cbs_6() { - assert_eq!( - bodies( - " - this is indented - this is indented too -" - ), - [ - Code::No("\n "), - Code::Yes("this is indented\n this is indented too\n"), - ], - ); -} - -#[test] -fn cbs_7() { - assert_eq!( - bodies( - " -``` -testing unclosed -" - ), - [Code::No("\n"), Code::Yes("```\ntesting unclosed\n"),], - ); -} - -#[test] -fn cbs_8() { - assert_eq!( - bodies("`one` not `two`"), - [Code::Yes("`one`"), Code::No(" not "), Code::Yes("`two`")] - ); -} diff --git a/parser/src/command.rs b/parser/src/command.rs index 7edde0f60..5ee4f1a07 100644 --- a/parser/src/command.rs +++ b/parser/src/command.rs @@ -1,5 +1,5 @@ -use crate::code_block::ColorCodeBlocks; use crate::error::Error; +use crate::ignore_block::IgnoreBlocks; use crate::token::{Token, Tokenizer}; pub mod assign; @@ -33,7 +33,7 @@ pub enum Command<'a> { pub struct Input<'a> { all: &'a str, parsed: usize, - code: ColorCodeBlocks, + ignore: IgnoreBlocks, // A list of possible bot names. bot: Vec<&'a str>, @@ -64,7 +64,7 @@ impl<'a> Input<'a> { Input { all: input, parsed: 0, - code: ColorCodeBlocks::new(input), + ignore: IgnoreBlocks::new(input), bot, } } @@ -141,11 +141,11 @@ impl<'a> Input<'a> { } if self - .code - .overlaps_code((self.parsed)..(self.parsed + tok.position())) + .ignore + .overlaps_ignore((self.parsed)..(self.parsed + tok.position())) .is_some() { - log::info!("command overlaps code; code: {:?}", self.code); + log::info!("command overlaps ignored block; ignore: {:?}", self.ignore); return None; } diff --git a/parser/src/ignore_block.rs b/parser/src/ignore_block.rs new file mode 100644 index 000000000..250273e42 --- /dev/null +++ b/parser/src/ignore_block.rs @@ -0,0 +1,241 @@ +use pulldown_cmark::{Event, Parser, Tag}; +use std::ops::Range; + +#[derive(Debug)] +pub struct IgnoreBlocks { + ignore: Vec>, +} + +impl IgnoreBlocks { + pub fn new(s: &str) -> IgnoreBlocks { + let mut ignore = Vec::new(); + let mut parser = Parser::new(s).into_offset_iter(); + while let Some((event, range)) = parser.next() { + if let Event::Start(Tag::CodeBlock(_)) = event { + let start = range.start; + while let Some((event, range)) = parser.next() { + if let Event::End(Tag::CodeBlock(_)) = event { + ignore.push(start..range.end); + break; + } + } + } else if let Event::Start(Tag::BlockQuote) = event { + let start = range.start; + let mut count = 1; + while let Some((event, range)) = parser.next() { + if let Event::Start(Tag::BlockQuote) = event { + count += 1; + } else if let Event::End(Tag::BlockQuote) = event { + count -= 1; + if count == 0 { + ignore.push(start..range.end); + break; + } + } + } + } else if let Event::Code(_) = event { + ignore.push(range); + } + } + + IgnoreBlocks { ignore } + } + + pub fn overlaps_ignore(&self, region: Range) -> Option> { + for ignore in &self.ignore { + // See https://stackoverflow.com/questions/3269434. + if ignore.start <= region.end && region.start <= ignore.end { + return Some(ignore.clone()); + } + } + None + } +} + +#[cfg(test)] +#[derive(Debug, PartialEq, Eq)] +enum Ignore<'a> { + Yes(&'a str), + No(&'a str), +} + +#[cfg(test)] +fn bodies(s: &str) -> Vec> { + let mut bodies = Vec::new(); + let cbs = IgnoreBlocks::new(s); + let mut previous = 0..0; + for range in &cbs.ignore { + let range = range.clone(); + if previous.end != range.start { + bodies.push(Ignore::No(&s[previous.end..range.start])); + } + bodies.push(Ignore::Yes(&s[range.clone()])); + previous = range.clone(); + } + if let Some(range) = cbs.ignore.last() { + if range.end != s.len() { + bodies.push(Ignore::No(&s[range.end..])); + } + } + bodies +} + +#[test] +fn cbs_1() { + assert_eq!( + bodies("`hey you`bar me too"), + [Ignore::Yes("`hey you`"), Ignore::No("bar me too")] + ); +} + +#[test] +fn cbs_2() { + assert_eq!( + bodies("`hey you` me too"), + [Ignore::Yes("`hey you`"), Ignore::No(" me too")] + ); +} + +#[test] +fn cbs_3() { + assert_eq!( + bodies(r"`hey you\` `me too"), + [Ignore::Yes(r"`hey you\`"), Ignore::No(" `me too")] + ); +} + +#[test] +fn cbs_4() { + assert_eq!( + bodies( + " +```language_spec +testing +``` + +nope +" + ), + [ + Ignore::No("\n"), + Ignore::Yes("```language_spec\ntesting\n```"), + Ignore::No("\n\nnope\n") + ], + ); +} + +#[test] +fn cbs_5() { + assert_eq!( + bodies(concat!( + " +``` tag_after_space +testing +```", + " " + )), + [ + Ignore::No("\n"), + Ignore::Yes("``` tag_after_space\ntesting\n``` "), + ], + ); +} + +#[test] +fn cbs_6() { + assert_eq!( + bodies( + " + this is indented + this is indented too +" + ), + [ + Ignore::No("\n "), + Ignore::Yes("this is indented\n this is indented too\n"), + ], + ); +} + +#[test] +fn cbs_7() { + assert_eq!( + bodies( + " +``` +testing unclosed +" + ), + [Ignore::No("\n"), Ignore::Yes("```\ntesting unclosed\n"),], + ); +} + +#[test] +fn cbs_8() { + assert_eq!( + bodies("`one` not `two`"), + [ + Ignore::Yes("`one`"), + Ignore::No(" not "), + Ignore::Yes("`two`") + ] + ); +} + +#[test] +fn cbs_9() { + assert_eq!( + bodies( + " +some text +> testing citations +still in citation + +more text +" + ), + [ + Ignore::No("\nsome text\n"), + Ignore::Yes("> testing citations\nstill in citation\n"), + Ignore::No("\nmore text\n") + ], + ); +} + +#[test] +fn cbs_10() { + assert_eq!( + bodies( + " +# abc + +> multiline +> citation + +lorem ipsum +" + ), + [ + Ignore::No("\n# abc\n\n"), + Ignore::Yes("> multiline\n> citation\n"), + Ignore::No("\nlorem ipsum\n") + ], + ); +} + +#[test] +fn cbs_11() { + assert_eq!( + bodies( + " +> some +> > nested +> citations +" + ), + [ + Ignore::No("\n"), + Ignore::Yes("> some\n> > nested\n> citations\n"), + ], + ); +} diff --git a/parser/src/lib.rs b/parser/src/lib.rs index fda2ffd67..b7fae2668 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -1,6 +1,6 @@ -mod code_block; pub mod command; pub mod error; +mod ignore_block; mod mentions; mod token; diff --git a/parser/src/mentions.rs b/parser/src/mentions.rs index 95d81d18a..ab0a15500 100644 --- a/parser/src/mentions.rs +++ b/parser/src/mentions.rs @@ -6,7 +6,7 @@ /// /// Note that the `@` is skipped in the final output. pub fn get_mentions(input: &str) -> Vec<&str> { - let code_regions = crate::code_block::ColorCodeBlocks::new(input); + let ignore_regions = crate::ignore_block::IgnoreBlocks::new(input); let mut mentions = Vec::new(); for (idx, _) in input.match_indices('@') { @@ -41,8 +41,8 @@ pub fn get_mentions(input: &str) -> Vec<&str> { if username.is_empty() { continue; } - if code_regions - .overlaps_code(idx..idx + username.len()) + if ignore_regions + .overlaps_ignore(idx..idx + username.len()) .is_some() { continue;