diff --git a/Cargo.toml b/Cargo.toml index 6649856f..4f9fd724 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,6 +21,7 @@ rustdoc-args = ["--cfg", "procmacro2_semver_exempt"] [dependencies] unicode-xid = "0.2" +rustc_lexer = "0.1.0" [dev-dependencies] quote = { version = "1.0", default_features = false } diff --git a/src/fallback.rs b/src/fallback.rs index fe582b3b..595c4da7 100644 --- a/src/fallback.rs +++ b/src/fallback.rs @@ -11,8 +11,8 @@ use std::path::PathBuf; use std::str::FromStr; use std::vec; -use crate::strnom::{block_comment, skip_whitespace, whitespace, word_break, Cursor, PResult}; use crate::{Delimiter, Punct, Spacing, TokenTree}; +use rustc_lexer::{first_token, Token, TokenKind}; use unicode_xid::UnicodeXID; #[derive(Clone)] @@ -23,6 +23,27 @@ pub struct TokenStream { #[derive(Debug)] pub struct LexError; +struct Cursor<'a> { + pub rest: &'a str, + pub head: Option, + #[cfg(span_locations)] + pub off: u32, +} + +#[cfg(span_locations)] +impl<'a> Clone for Cursor<'a> { + fn clone(&self) -> Self { + Cursor::new(self.rest, self.off) + } +} + +#[cfg(not(span_locations))] +impl<'a> Clone for Cursor<'a> { + fn clone(&self) -> Self { + Cursor::new(self.rest, 0) + } +} + impl TokenStream { pub fn new() -> TokenStream { TokenStream { inner: Vec::new() } @@ -40,16 +61,13 @@ fn get_cursor(src: &str) -> Cursor { let mut cm = cm.borrow_mut(); let name = format!("", cm.files.len()); let span = cm.add_file(&name, src); - Cursor { - rest: src, - off: span.lo, - } + Cursor::new(src, span.lo) }) } #[cfg(not(span_locations))] fn get_cursor(src: &str) -> Cursor { - Cursor { rest: src } + Cursor::new(src, 0) } impl FromStr for TokenStream { @@ -57,17 +75,10 @@ impl FromStr for TokenStream { fn from_str(src: &str) -> Result { // Create a dummy file & add it to the source map - let cursor = get_cursor(src); - - match token_stream(cursor) { - Ok((input, output)) => { - if skip_whitespace(input).len() != 0 { - Err(LexError) - } else { - Ok(output) - } - } - Err(LexError) => Err(LexError), + let mut cursor = get_cursor(src); + match token_stream(&mut cursor) { + Ok(tts) if cursor.is_empty() => Ok(tts), + _ => Err(LexError), } } } @@ -262,7 +273,7 @@ impl FileInfo { fn offset_line_column(&self, offset: usize) -> LineColumn { assert!(self.span_within(Span { lo: offset as u32, - hi: offset as u32 + hi: offset as u32, })); let offset = offset - self.span.lo as usize; match self.lines.binary_search(&offset) { @@ -282,7 +293,7 @@ impl FileInfo { } } -/// Computesthe offsets of each line in the given source string. +/// Computes the offsets of each line in the given source string. #[cfg(span_locations)] fn lines_offsets(s: &str) -> Vec { let mut lines = vec![0]; @@ -800,635 +811,272 @@ impl fmt::Debug for Literal { } } -fn token_stream(mut input: Cursor) -> PResult { - let mut trees = Vec::new(); - loop { - let input_no_ws = skip_whitespace(input); - if input_no_ws.rest.len() == 0 { - break; - } - if let Ok((a, tokens)) = doc_comment(input_no_ws) { - input = a; - trees.extend(tokens); - continue; - } - - let (a, tt) = match token_tree(input_no_ws) { - Ok(p) => p, - Err(_) => break, - }; - trees.push(tt); - input = a; +fn lex(src: &str) -> Option { + if src.is_empty() { + None + } else { + Some(first_token(src)) } - Ok((input, TokenStream { inner: trees })) } -#[cfg(not(span_locations))] -fn spanned<'a, T>( - input: Cursor<'a>, - f: fn(Cursor<'a>) -> PResult<'a, T>, -) -> PResult<'a, (T, crate::Span)> { - let (a, b) = f(skip_whitespace(input))?; - Ok((a, ((b, crate::Span::_new_stable(Span::call_site()))))) -} - -#[cfg(span_locations)] -fn spanned<'a, T>( - input: Cursor<'a>, - f: fn(Cursor<'a>) -> PResult<'a, T>, -) -> PResult<'a, (T, crate::Span)> { - let input = skip_whitespace(input); - let lo = input.off; - let (a, b) = f(input)?; - let hi = a.off; - let span = crate::Span::_new_stable(Span { lo, hi }); - Ok((a, (b, span))) -} - -fn token_tree(input: Cursor) -> PResult { - let (rest, (mut tt, span)) = spanned(input, token_kind)?; - tt.set_span(span); - Ok((rest, tt)) -} - -named!(token_kind -> TokenTree, alt!( - map!(group, |g| TokenTree::Group(crate::Group::_new_stable(g))) - | - map!(literal, |l| TokenTree::Literal(crate::Literal::_new_stable(l))) // must be before symbol - | - map!(op, TokenTree::Punct) - | - symbol_leading_ws -)); - -named!(group -> Group, alt!( - delimited!( - punct!("("), - token_stream, - punct!(")") - ) => { |ts| Group::new(Delimiter::Parenthesis, ts) } - | - delimited!( - punct!("["), - token_stream, - punct!("]") - ) => { |ts| Group::new(Delimiter::Bracket, ts) } - | - delimited!( - punct!("{"), - token_stream, - punct!("}") - ) => { |ts| Group::new(Delimiter::Brace, ts) } -)); - -fn symbol_leading_ws(input: Cursor) -> PResult { - symbol(skip_whitespace(input)) -} - -fn symbol(input: Cursor) -> PResult { - let raw = input.starts_with("r#"); - let rest = input.advance((raw as usize) << 1); - - let (rest, sym) = symbol_not_raw(rest)?; - - if !raw { - let ident = crate::Ident::new(sym, crate::Span::call_site()); - return Ok((rest, ident.into())); +impl<'a> Cursor<'a> { + #[cfg(span_locations)] + pub fn new(rest: &'a str, off: u32) -> Self { + Cursor { + rest, + off, + head: lex(rest), + } } - if sym == "_" { - return Err(LexError); + #[cfg(not(span_locations))] + pub fn new(rest: &'a str, _off: u32) -> Self { + Cursor { + rest, + head: lex(rest), + } } - let ident = crate::Ident::_new_raw(sym, crate::Span::call_site()); - Ok((rest, ident.into())) -} - -fn symbol_not_raw(input: Cursor) -> PResult<&str> { - let mut chars = input.char_indices(); - - match chars.next() { - Some((_, ch)) if is_ident_start(ch) => {} - _ => return Err(LexError), + #[cfg(span_locations)] + pub fn bumpped(&self) -> Self { + assert!(!self.is_empty()); + let off = self.head.as_ref().unwrap().len; + Cursor::new(&self.rest[off..], self.off + off as u32) } - let mut end = input.len(); - for (i, ch) in chars { - if !is_ident_continue(ch) { - end = i; - break; - } + #[cfg(not(span_locations))] + pub fn bumpped(&self) -> Self { + assert!(!self.is_empty()); + let off = self.head.as_ref().unwrap().len; + Cursor::new(&self.rest[off..], off as u32) } - Ok((input.advance(end), &input.rest[..end])) -} - -fn literal(input: Cursor) -> PResult { - let input_no_ws = skip_whitespace(input); + pub fn is_empty(&self) -> bool { + self.rest.is_empty() + } - match literal_nocapture(input_no_ws) { - Ok((a, ())) => { - let start = input.len() - input_no_ws.len(); - let len = input_no_ws.len() - a.len(); - let end = start + len; - Ok((a, Literal::_new(input.rest[start..end].to_string()))) - } - Err(LexError) => Err(LexError), + pub fn slice(&self) -> &'a str { + assert!(!self.is_empty()); + &self.rest[0..self.head.as_ref().unwrap().len] } } -named!(literal_nocapture -> (), alt!( - string - | - byte_string - | - byte - | - character - | - float - | - int -)); - -named!(string -> (), alt!( - quoted_string - | - preceded!( - punct!("r"), - raw_string - ) => { |_| () } -)); - -named!(quoted_string -> (), do_parse!( - punct!("\"") >> - cooked_string >> - tag!("\"") >> - option!(symbol_not_raw) >> - (()) -)); - -fn cooked_string(input: Cursor) -> PResult<()> { - let mut chars = input.char_indices().peekable(); - while let Some((byte_offset, ch)) = chars.next() { - match ch { - '"' => { - return Ok((input.advance(byte_offset), ())); +fn token_stream(input: &mut Cursor<'_>) -> Result { + let mut trees = Vec::new(); + while !input.is_empty() { + match input.head.as_ref().unwrap().kind { + TokenKind::Whitespace => *input = input.bumpped(), + TokenKind::LineComment | TokenKind::BlockComment { .. } => { + trees.extend(comment(input)?) } - '\r' => { - if let Some((_, '\n')) = chars.next() { - // ... - } else { - break; - } + TokenKind::CloseBrace | TokenKind::CloseBracket | TokenKind::CloseParen => break, + TokenKind::OpenBrace | TokenKind::OpenBracket | TokenKind::OpenParen => { + trees.push(token_tree(input)?) } - '\\' => match chars.next() { - Some((_, 'x')) => { - if !backslash_x_char(&mut chars) { - break; - } - } - Some((_, 'n')) | Some((_, 'r')) | Some((_, 't')) | Some((_, '\\')) - | Some((_, '\'')) | Some((_, '"')) | Some((_, '0')) => {} - Some((_, 'u')) => { - if !backslash_u(&mut chars) { - break; - } - } - Some((_, '\n')) | Some((_, '\r')) => { - while let Some(&(_, ch)) = chars.peek() { - if ch.is_whitespace() { - chars.next(); - } else { - break; - } - } - } - _ => break, - }, - _ch => {} + _ => trees.extend(leaf_token(input)?), } } - Err(LexError) -} - -named!(byte_string -> (), alt!( - delimited!( - punct!("b\""), - cooked_byte_string, - tag!("\"") - ) => { |_| () } - | - preceded!( - punct!("br"), - raw_string - ) => { |_| () } -)); - -fn cooked_byte_string(mut input: Cursor) -> PResult<()> { - let mut bytes = input.bytes().enumerate(); - 'outer: while let Some((offset, b)) = bytes.next() { - match b { - b'"' => { - return Ok((input.advance(offset), ())); - } - b'\r' => { - if let Some((_, b'\n')) = bytes.next() { - // ... + Ok(TokenStream { inner: trees }) +} + +fn comment(input: &mut Cursor<'_>) -> Result, LexError> { + fn doc_comment_contents<'a>( + input: &mut Cursor<'a>, + ) -> Result, LexError> { + assert!(!input.is_empty()); + match input.head.as_ref().unwrap().kind { + TokenKind::LineComment => { + let slice = input.slice(); + *input = input.bumpped(); + if slice.starts_with("//!") { + Ok(Some((&slice[3..], true))) + } else if slice.starts_with("///") && !slice.starts_with("////") { + Ok(Some((&slice[3..], false))) } else { - break; + Ok(None) } } - b'\\' => match bytes.next() { - Some((_, b'x')) => { - if !backslash_x_byte(&mut bytes) { - break; - } - } - Some((_, b'n')) | Some((_, b'r')) | Some((_, b't')) | Some((_, b'\\')) - | Some((_, b'0')) | Some((_, b'\'')) | Some((_, b'"')) => {} - Some((newline, b'\n')) | Some((newline, b'\r')) => { - let rest = input.advance(newline + 1); - for (offset, ch) in rest.char_indices() { - if !ch.is_whitespace() { - input = rest.advance(offset); - bytes = input.bytes().enumerate(); - continue 'outer; - } - } - break; - } - _ => break, - }, - b if b < 0x80 => {} - _ => break, - } - } - Err(LexError) -} - -fn raw_string(input: Cursor) -> PResult<()> { - let mut chars = input.char_indices(); - let mut n = 0; - while let Some((byte_offset, ch)) = chars.next() { - match ch { - '"' => { - n = byte_offset; - break; - } - '#' => {} - _ => return Err(LexError), - } - } - for (byte_offset, ch) in chars { - match ch { - '"' if input.advance(byte_offset + 1).starts_with(&input.rest[..n]) => { - let rest = input.advance(byte_offset + 1 + n); - return Ok((rest, ())); - } - '\r' => {} - _ => {} - } - } - Err(LexError) -} - -named!(byte -> (), do_parse!( - punct!("b") >> - tag!("'") >> - cooked_byte >> - tag!("'") >> - (()) -)); - -fn cooked_byte(input: Cursor) -> PResult<()> { - let mut bytes = input.bytes().enumerate(); - let ok = match bytes.next().map(|(_, b)| b) { - Some(b'\\') => match bytes.next().map(|(_, b)| b) { - Some(b'x') => backslash_x_byte(&mut bytes), - Some(b'n') | Some(b'r') | Some(b't') | Some(b'\\') | Some(b'0') | Some(b'\'') - | Some(b'"') => true, - _ => false, - }, - b => b.is_some(), - }; - if ok { - match bytes.next() { - Some((offset, _)) => { - if input.chars().as_str().is_char_boundary(offset) { - Ok((input.advance(offset), ())) + TokenKind::BlockComment { terminated: false } => Err(LexError), + TokenKind::BlockComment { .. } => { + let slice = input.slice(); + *input = input.bumpped(); + if slice.starts_with("/*!") { + Ok(Some((&slice[3..], true))) + } else if slice.starts_with("/**") + && !slice.starts_with("/***") + && !slice.starts_with("/**/") + { + Ok(Some((&slice[3..], false))) } else { - Err(LexError) + Ok(None) } } - None => Ok((input.advance(input.len()), ())), + kind => unreachable!("comment {:?}", kind), } - } else { - Err(LexError) } -} -named!(character -> (), do_parse!( - punct!("'") >> - cooked_char >> - tag!("'") >> - (()) -)); - -fn cooked_char(input: Cursor) -> PResult<()> { - let mut chars = input.char_indices(); - let ok = match chars.next().map(|(_, ch)| ch) { - Some('\\') => match chars.next().map(|(_, ch)| ch) { - Some('x') => backslash_x_char(&mut chars), - Some('u') => backslash_u(&mut chars), - Some('n') | Some('r') | Some('t') | Some('\\') | Some('0') | Some('\'') | Some('"') => { - true - } - _ => false, - }, - ch => ch.is_some(), - }; - if ok { - match chars.next() { - Some((idx, _)) => Ok((input.advance(idx), ())), - None => Ok((input.advance(input.len()), ())), + let mut trees = Vec::new(); + if let (Some((comment, inner)), span) = spanned(input, doc_comment_contents)? { + trees.push(TokenTree::Punct(Punct::new('#', Spacing::Alone))); + if inner { + trees.push(Punct::new('!', Spacing::Alone).into()); } - } else { - Err(LexError) - } -} - -macro_rules! next_ch { - ($chars:ident @ $pat:pat $(| $rest:pat)*) => { - match $chars.next() { - Some((_, ch)) => match ch { - $pat $(| $rest)* => ch, - _ => return false, - }, - None => return false + let mut stream = vec![ + TokenTree::Ident(crate::Ident::new("doc", span)), + TokenTree::Punct(Punct::new('=', Spacing::Alone)), + TokenTree::Literal(crate::Literal::string(comment)), + ]; + for tt in stream.iter_mut() { + tt.set_span(span); } - }; -} - -fn backslash_x_char(chars: &mut I) -> bool -where - I: Iterator, -{ - next_ch!(chars @ '0'..='7'); - next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F'); - true -} - -fn backslash_x_byte(chars: &mut I) -> bool -where - I: Iterator, -{ - next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F'); - next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F'); - true -} - -fn backslash_u(chars: &mut I) -> bool -where - I: Iterator, -{ - next_ch!(chars @ '{'); - next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F'); - loop { - let c = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F' | '_' | '}'); - if c == '}' { - return true; + let group = Group::new(Delimiter::Bracket, stream.into_iter().collect()); + trees.push(crate::Group::_new_stable(group).into()); + for tt in trees.iter_mut() { + tt.set_span(span); } } + Ok(trees) } -fn float(input: Cursor) -> PResult<()> { - let (mut rest, ()) = float_digits(input)?; - if let Some(ch) = rest.chars().next() { - if is_ident_start(ch) { - rest = symbol_not_raw(rest)?.0; +fn token_tree(input: &mut Cursor<'_>) -> Result { + fn delimited_stream( + close: TokenKind, + delimiter: Delimiter, + input: &mut Cursor<'_>, + ) -> Result { + *input = input.bumpped(); + let tts = token_stream(input)?; + if input.head.as_ref().map_or(false, |t| t.kind == close) { + let g = Group::new(delimiter, tts); + Ok(TokenTree::Group(crate::Group::_new_stable(g))) + } else { + Err(LexError) } } - word_break(rest) -} -fn float_digits(input: Cursor) -> PResult<()> { - let mut chars = input.chars().peekable(); - match chars.next() { - Some(ch) if ch >= '0' && ch <= '9' => {} - _ => return Err(LexError), - } - - let mut len = 1; - let mut has_dot = false; - let mut has_exp = false; - while let Some(&ch) = chars.peek() { - match ch { - '0'..='9' | '_' => { - chars.next(); - len += 1; + fn token_tree_(input: &mut Cursor<'_>) -> Result { + assert!(!input.is_empty()); + match input.head.as_ref().unwrap().kind { + TokenKind::OpenParen => { + delimited_stream(TokenKind::CloseParen, Delimiter::Parenthesis, input) } - '.' => { - if has_dot { - break; - } - chars.next(); - if chars - .peek() - .map(|&ch| ch == '.' || is_ident_start(ch)) - .unwrap_or(false) - { - return Err(LexError); - } - len += 1; - has_dot = true; + TokenKind::OpenBracket => { + delimited_stream(TokenKind::CloseBracket, Delimiter::Bracket, input) } - 'e' | 'E' => { - chars.next(); - len += 1; - has_exp = true; - break; + TokenKind::OpenBrace => { + delimited_stream(TokenKind::CloseBrace, Delimiter::Brace, input) } - _ => break, + kind => unreachable!("token_tree {:?}", kind), } } - let rest = input.advance(len); - if !(has_dot || has_exp || rest.starts_with("f32") || rest.starts_with("f64")) { - return Err(LexError); - } - - if has_exp { - let mut has_exp_value = false; - while let Some(&ch) = chars.peek() { - match ch { - '+' | '-' => { - if has_exp_value { - break; - } - chars.next(); - len += 1; - } - '0'..='9' => { - chars.next(); - len += 1; - has_exp_value = true; - } - '_' => { - chars.next(); - len += 1; - } - _ => break, - } - } - if !has_exp_value { - return Err(LexError); - } - } - - Ok((input.advance(len), ())) -} - -fn int(input: Cursor) -> PResult<()> { - let (mut rest, ()) = digits(input)?; - if let Some(ch) = rest.chars().next() { - if is_ident_start(ch) { - rest = symbol_not_raw(rest)?.0; + let (mut tt, span) = spanned(input, token_tree_)?; + tt.set_span(span); + Ok(tt) +} + +fn leaf_token(input: &mut Cursor<'_>) -> Result, LexError> { + fn is_punct(kind: TokenKind) -> bool { + match kind { + TokenKind::Semi + | TokenKind::Comma + | TokenKind::Dot + | TokenKind::At + | TokenKind::Pound + | TokenKind::Tilde + | TokenKind::Question + | TokenKind::Colon + | TokenKind::Dollar + | TokenKind::Eq + | TokenKind::Not + | TokenKind::Lt + | TokenKind::Gt + | TokenKind::Minus + | TokenKind::And + | TokenKind::Or + | TokenKind::Plus + | TokenKind::Star + | TokenKind::Slash + | TokenKind::Caret + | TokenKind::Percent => true, + _ => false, } } - word_break(rest) -} -fn digits(mut input: Cursor) -> PResult<()> { - let base = if input.starts_with("0x") { - input = input.advance(2); - 16 - } else if input.starts_with("0o") { - input = input.advance(2); - 8 - } else if input.starts_with("0b") { - input = input.advance(2); - 2 - } else { - 10 - }; - - let mut len = 0; - let mut empty = true; - for b in input.bytes() { - let digit = match b { - b'0'..=b'9' => (b - b'0') as u64, - b'a'..=b'f' => 10 + (b - b'a') as u64, - b'A'..=b'F' => 10 + (b - b'A') as u64, - b'_' => { - if empty && base == 10 { - return Err(LexError); + fn leaf_token_(input: &mut Cursor<'_>) -> Result, LexError> { + assert!(!input.is_empty()); + match input.head.as_ref().unwrap().kind { + TokenKind::Literal { .. } => { + let slice = input.slice(); + let l = Literal::_new(slice.to_string()); + *input = input.bumpped(); + Ok(vec![TokenTree::Literal(crate::Literal::_new_stable(l))]) + } + TokenKind::Ident => { + let sym = input.slice(); + let ident = crate::Ident::new(sym, crate::Span::call_site()); + *input = input.bumpped(); + Ok(vec![ident.into()]) + } + TokenKind::RawIdent => { + let sym = &input.slice()[2..]; // remove `r#` prefix + if sym == "_" { + Err(LexError) + } else { + let ident = crate::Ident::_new_raw(sym, crate::Span::call_site()); + *input = input.bumpped(); + Ok(vec![ident.into()]) } - len += 1; - continue; } - _ => break, - }; - if digit >= base { - return Err(LexError); + TokenKind::Lifetime { .. } => { + let slice = &input.slice()[1..]; // strip leading `'` + let p = Punct::new('\'', Spacing::Joint); + let i = crate::Ident::new(slice, crate::Span::call_site()); + Ok(vec![p.into(), i.into()]) // FIXME: `spanned` does the wrong thing here + // specifically: both the apostrophe and the ident have the same span + } + kind if is_punct(kind) => { + let slice = input.slice(); + assert!(slice.chars().count() == 1); + let ch = slice.chars().next().unwrap(); + *input = input.bumpped(); + let joint = if input.head.as_ref().map_or(false, |t| is_punct(t.kind)) { + Spacing::Joint + } else { + Spacing::Alone + }; + let p = Punct::new(ch, joint); + Ok(vec![TokenTree::Punct(p)]) + } + TokenKind::Unknown => Err(LexError), + kind => unreachable!("leaf_token {:?}", kind), } - len += 1; - empty = false; } - if empty { - Err(LexError) - } else { - Ok((input.advance(len), ())) - } -} -fn op(input: Cursor) -> PResult { - let input = skip_whitespace(input); - match op_char(input) { - Ok((rest, '\'')) => { - symbol(rest)?; - Ok((rest, Punct::new('\'', Spacing::Joint))) - } - Ok((rest, ch)) => { - let kind = match op_char(rest) { - Ok(_) => Spacing::Joint, - Err(LexError) => Spacing::Alone, - }; - Ok((rest, Punct::new(ch, kind))) - } - Err(LexError) => Err(LexError), + let (mut tts, span) = spanned(input, leaf_token_)?; + for tt in &mut tts { + tt.set_span(span); } + Ok(tts) } -fn op_char(input: Cursor) -> PResult { - if input.starts_with("//") || input.starts_with("/*") { - // Do not accept `/` of a comment as an op. - return Err(LexError); - } - - let mut chars = input.chars(); - let first = match chars.next() { - Some(ch) => ch, - None => { - return Err(LexError); - } - }; - let recognized = "~!@#$%^&*-=+|;:,<.>/?'"; - if recognized.contains(first) { - Ok((input.advance(first.len_utf8()), first)) - } else { - Err(LexError) - } +#[cfg(not(span_locations))] +fn spanned<'a, T>( + input: &mut Cursor<'a>, + f: fn(&mut Cursor<'a>) -> Result, +) -> Result<(T, crate::Span), LexError> { + let out = f(input)?; + Ok((out, crate::Span::_new_stable(Span::call_site()))) } -fn doc_comment(input: Cursor) -> PResult> { - let mut trees = Vec::new(); - let (rest, ((comment, inner), span)) = spanned(input, doc_comment_contents)?; - trees.push(TokenTree::Punct(Punct::new('#', Spacing::Alone))); - if inner { - trees.push(Punct::new('!', Spacing::Alone).into()); - } - let mut stream = vec![ - TokenTree::Ident(crate::Ident::new("doc", span)), - TokenTree::Punct(Punct::new('=', Spacing::Alone)), - TokenTree::Literal(crate::Literal::string(comment)), - ]; - for tt in stream.iter_mut() { - tt.set_span(span); - } - let group = Group::new(Delimiter::Bracket, stream.into_iter().collect()); - trees.push(crate::Group::_new_stable(group).into()); - for tt in trees.iter_mut() { - tt.set_span(span); - } - Ok((rest, trees)) +#[cfg(span_locations)] +fn spanned<'a, T>( + input: &mut Cursor<'a>, + f: fn(&mut Cursor<'a>) -> Result, +) -> Result<(T, crate::Span), LexError> { + let lo = input.off; + let out = f(input)?; + let hi = input.off; + let span = crate::Span::_new_stable(Span { lo, hi }); + Ok((out, span)) } - -named!(doc_comment_contents -> (&str, bool), alt!( - do_parse!( - punct!("//!") >> - s: take_until_newline_or_eof!() >> - ((s, true)) - ) - | - do_parse!( - option!(whitespace) >> - peek!(tag!("/*!")) >> - s: block_comment >> - ((s, true)) - ) - | - do_parse!( - punct!("///") >> - not!(tag!("/")) >> - s: take_until_newline_or_eof!() >> - ((s, false)) - ) - | - do_parse!( - option!(whitespace) >> - peek!(tuple!(tag!("/**"), not!(tag!("*")))) >> - s: block_comment >> - ((s, false)) - ) -)); diff --git a/src/lib.rs b/src/lib.rs index a08be3e8..731910c6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -96,8 +96,6 @@ use std::path::PathBuf; use std::rc::Rc; use std::str::FromStr; -#[macro_use] -mod strnom; mod fallback; #[cfg(not(wrap_proc_macro))] diff --git a/src/strnom.rs b/src/strnom.rs deleted file mode 100644 index eb7d0b8a..00000000 --- a/src/strnom.rs +++ /dev/null @@ -1,391 +0,0 @@ -//! Adapted from [`nom`](https://github.com/Geal/nom). - -use crate::fallback::LexError; -use std::str::{Bytes, CharIndices, Chars}; -use unicode_xid::UnicodeXID; - -#[derive(Copy, Clone, Eq, PartialEq)] -pub struct Cursor<'a> { - pub rest: &'a str, - #[cfg(span_locations)] - pub off: u32, -} - -impl<'a> Cursor<'a> { - #[cfg(not(span_locations))] - pub fn advance(&self, amt: usize) -> Cursor<'a> { - Cursor { - rest: &self.rest[amt..], - } - } - #[cfg(span_locations)] - pub fn advance(&self, amt: usize) -> Cursor<'a> { - Cursor { - rest: &self.rest[amt..], - off: self.off + (amt as u32), - } - } - - pub fn find(&self, p: char) -> Option { - self.rest.find(p) - } - - pub fn starts_with(&self, s: &str) -> bool { - self.rest.starts_with(s) - } - - pub fn is_empty(&self) -> bool { - self.rest.is_empty() - } - - pub fn len(&self) -> usize { - self.rest.len() - } - - pub fn as_bytes(&self) -> &'a [u8] { - self.rest.as_bytes() - } - - pub fn bytes(&self) -> Bytes<'a> { - self.rest.bytes() - } - - pub fn chars(&self) -> Chars<'a> { - self.rest.chars() - } - - pub fn char_indices(&self) -> CharIndices<'a> { - self.rest.char_indices() - } -} - -pub type PResult<'a, O> = Result<(Cursor<'a>, O), LexError>; - -pub fn whitespace(input: Cursor) -> PResult<()> { - if input.is_empty() { - return Err(LexError); - } - - let bytes = input.as_bytes(); - let mut i = 0; - while i < bytes.len() { - let s = input.advance(i); - if bytes[i] == b'/' { - if s.starts_with("//") - && (!s.starts_with("///") || s.starts_with("////")) - && !s.starts_with("//!") - { - if let Some(len) = s.find('\n') { - i += len + 1; - continue; - } - break; - } else if s.starts_with("/**/") { - i += 4; - continue; - } else if s.starts_with("/*") - && (!s.starts_with("/**") || s.starts_with("/***")) - && !s.starts_with("/*!") - { - let (_, com) = block_comment(s)?; - i += com.len(); - continue; - } - } - match bytes[i] { - b' ' | 0x09..=0x0d => { - i += 1; - continue; - } - b if b <= 0x7f => {} - _ => { - let ch = s.chars().next().unwrap(); - if is_whitespace(ch) { - i += ch.len_utf8(); - continue; - } - } - } - return if i > 0 { Ok((s, ())) } else { Err(LexError) }; - } - Ok((input.advance(input.len()), ())) -} - -pub fn block_comment(input: Cursor) -> PResult<&str> { - if !input.starts_with("/*") { - return Err(LexError); - } - - let mut depth = 0; - let bytes = input.as_bytes(); - let mut i = 0; - let upper = bytes.len() - 1; - while i < upper { - if bytes[i] == b'/' && bytes[i + 1] == b'*' { - depth += 1; - i += 1; // eat '*' - } else if bytes[i] == b'*' && bytes[i + 1] == b'/' { - depth -= 1; - if depth == 0 { - return Ok((input.advance(i + 2), &input.rest[..i + 2])); - } - i += 1; // eat '/' - } - i += 1; - } - Err(LexError) -} - -pub fn skip_whitespace(input: Cursor) -> Cursor { - match whitespace(input) { - Ok((rest, _)) => rest, - Err(LexError) => input, - } -} - -fn is_whitespace(ch: char) -> bool { - // Rust treats left-to-right mark and right-to-left mark as whitespace - ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}' -} - -pub fn word_break(input: Cursor) -> PResult<()> { - match input.chars().next() { - Some(ch) if UnicodeXID::is_xid_continue(ch) => Err(LexError), - Some(_) | None => Ok((input, ())), - } -} - -macro_rules! named { - ($name:ident -> $o:ty, $submac:ident!( $($args:tt)* )) => { - fn $name<'a>(i: Cursor<'a>) -> $crate::strnom::PResult<'a, $o> { - $submac!(i, $($args)*) - } - }; -} - -macro_rules! alt { - ($i:expr, $e:ident | $($rest:tt)*) => { - alt!($i, call!($e) | $($rest)*) - }; - - ($i:expr, $subrule:ident!( $($args:tt)*) | $($rest:tt)*) => { - match $subrule!($i, $($args)*) { - res @ Ok(_) => res, - _ => alt!($i, $($rest)*) - } - }; - - ($i:expr, $subrule:ident!( $($args:tt)* ) => { $gen:expr } | $($rest:tt)+) => { - match $subrule!($i, $($args)*) { - Ok((i, o)) => Ok((i, $gen(o))), - Err(LexError) => alt!($i, $($rest)*) - } - }; - - ($i:expr, $e:ident => { $gen:expr } | $($rest:tt)*) => { - alt!($i, call!($e) => { $gen } | $($rest)*) - }; - - ($i:expr, $e:ident => { $gen:expr }) => { - alt!($i, call!($e) => { $gen }) - }; - - ($i:expr, $subrule:ident!( $($args:tt)* ) => { $gen:expr }) => { - match $subrule!($i, $($args)*) { - Ok((i, o)) => Ok((i, $gen(o))), - Err(LexError) => Err(LexError), - } - }; - - ($i:expr, $e:ident) => { - alt!($i, call!($e)) - }; - - ($i:expr, $subrule:ident!( $($args:tt)*)) => { - $subrule!($i, $($args)*) - }; -} - -macro_rules! do_parse { - ($i:expr, ( $($rest:expr),* )) => { - Ok(($i, ( $($rest),* ))) - }; - - ($i:expr, $e:ident >> $($rest:tt)*) => { - do_parse!($i, call!($e) >> $($rest)*) - }; - - ($i:expr, $submac:ident!( $($args:tt)* ) >> $($rest:tt)*) => { - match $submac!($i, $($args)*) { - Err(LexError) => Err(LexError), - Ok((i, _)) => do_parse!(i, $($rest)*), - } - }; - - ($i:expr, $field:ident : $e:ident >> $($rest:tt)*) => { - do_parse!($i, $field: call!($e) >> $($rest)*) - }; - - ($i:expr, $field:ident : $submac:ident!( $($args:tt)* ) >> $($rest:tt)*) => { - match $submac!($i, $($args)*) { - Err(LexError) => Err(LexError), - Ok((i, o)) => { - let $field = o; - do_parse!(i, $($rest)*) - }, - } - }; -} - -macro_rules! peek { - ($i:expr, $submac:ident!( $($args:tt)* )) => { - match $submac!($i, $($args)*) { - Ok((_, o)) => Ok(($i, o)), - Err(LexError) => Err(LexError), - } - }; -} - -macro_rules! call { - ($i:expr, $fun:expr $(, $args:expr)*) => { - $fun($i $(, $args)*) - }; -} - -macro_rules! option { - ($i:expr, $f:expr) => { - match $f($i) { - Ok((i, o)) => Ok((i, Some(o))), - Err(LexError) => Ok(($i, None)), - } - }; -} - -macro_rules! take_until_newline_or_eof { - ($i:expr,) => {{ - if $i.len() == 0 { - Ok(($i, "")) - } else { - match $i.find('\n') { - Some(i) => Ok(($i.advance(i), &$i.rest[..i])), - None => Ok(($i.advance($i.len()), &$i.rest[..$i.len()])), - } - } - }}; -} - -macro_rules! tuple { - ($i:expr, $($rest:tt)*) => { - tuple_parser!($i, (), $($rest)*) - }; -} - -/// Do not use directly. Use `tuple!`. -macro_rules! tuple_parser { - ($i:expr, ($($parsed:tt),*), $e:ident, $($rest:tt)*) => { - tuple_parser!($i, ($($parsed),*), call!($e), $($rest)*) - }; - - ($i:expr, (), $submac:ident!( $($args:tt)* ), $($rest:tt)*) => { - match $submac!($i, $($args)*) { - Err(LexError) => Err(LexError), - Ok((i, o)) => tuple_parser!(i, (o), $($rest)*), - } - }; - - ($i:expr, ($($parsed:tt)*), $submac:ident!( $($args:tt)* ), $($rest:tt)*) => { - match $submac!($i, $($args)*) { - Err(LexError) => Err(LexError), - Ok((i, o)) => tuple_parser!(i, ($($parsed)* , o), $($rest)*), - } - }; - - ($i:expr, ($($parsed:tt),*), $e:ident) => { - tuple_parser!($i, ($($parsed),*), call!($e)) - }; - - ($i:expr, (), $submac:ident!( $($args:tt)* )) => { - $submac!($i, $($args)*) - }; - - ($i:expr, ($($parsed:expr),*), $submac:ident!( $($args:tt)* )) => { - match $submac!($i, $($args)*) { - Err(LexError) => Err(LexError), - Ok((i, o)) => Ok((i, ($($parsed),*, o))) - } - }; - - ($i:expr, ($($parsed:expr),*)) => { - Ok(($i, ($($parsed),*))) - }; -} - -macro_rules! not { - ($i:expr, $submac:ident!( $($args:tt)* )) => { - match $submac!($i, $($args)*) { - Ok((_, _)) => Err(LexError), - Err(LexError) => Ok(($i, ())), - } - }; -} - -macro_rules! tag { - ($i:expr, $tag:expr) => { - if $i.starts_with($tag) { - Ok(($i.advance($tag.len()), &$i.rest[..$tag.len()])) - } else { - Err(LexError) - } - }; -} - -macro_rules! punct { - ($i:expr, $punct:expr) => { - $crate::strnom::punct($i, $punct) - }; -} - -/// Do not use directly. Use `punct!`. -pub fn punct<'a>(input: Cursor<'a>, token: &'static str) -> PResult<'a, &'a str> { - let input = skip_whitespace(input); - if input.starts_with(token) { - Ok((input.advance(token.len()), token)) - } else { - Err(LexError) - } -} - -macro_rules! preceded { - ($i:expr, $submac:ident!( $($args:tt)* ), $submac2:ident!( $($args2:tt)* )) => { - match tuple!($i, $submac!($($args)*), $submac2!($($args2)*)) { - Ok((remaining, (_, o))) => Ok((remaining, o)), - Err(LexError) => Err(LexError), - } - }; - - ($i:expr, $submac:ident!( $($args:tt)* ), $g:expr) => { - preceded!($i, $submac!($($args)*), call!($g)) - }; -} - -macro_rules! delimited { - ($i:expr, $submac:ident!( $($args:tt)* ), $($rest:tt)+) => { - match tuple_parser!($i, (), $submac!($($args)*), $($rest)*) { - Err(LexError) => Err(LexError), - Ok((i1, (_, o, _))) => Ok((i1, o)) - } - }; -} - -macro_rules! map { - ($i:expr, $submac:ident!( $($args:tt)* ), $g:expr) => { - match $submac!($i, $($args)*) { - Err(LexError) => Err(LexError), - Ok((i, o)) => Ok((i, call!(o, $g))) - } - }; - - ($i:expr, $f:expr, $g:expr) => { - map!($i, call!($f), $g) - }; -}