From cab6bd2128613fa0f448a9dde751b59420771a74 Mon Sep 17 00:00:00 2001 From: Max Larsson Date: Sat, 28 May 2022 01:14:22 -0400 Subject: [PATCH 1/5] Add row and col tracking for errors --- examples/bad_strs.fl | 9 +++--- src/lexer/error.rs | 37 +++++++++++++++++++++++-- src/lexer/source_iterator.rs | 53 ++++++++++++++++++++++++++++++++++-- src/lexer/tokens.rs | 44 ++++++++++++++++-------------- 4 files changed, 113 insertions(+), 30 deletions(-) diff --git a/examples/bad_strs.fl b/examples/bad_strs.fl index a5cd772..46d0c21 100644 --- a/examples/bad_strs.fl +++ b/examples/bad_strs.fl @@ -1,4 +1,5 @@ -x = " -y = "\w" -z = "\x3 ninety" -w = "\x3 ninety \u38" +x = "abscscsc +y = "abscscsc +z = "\w" +w = "\x3 ninety" +j = "\x3 ninety \u38" diff --git a/src/lexer/error.rs b/src/lexer/error.rs index f5c5d07..2594192 100644 --- a/src/lexer/error.rs +++ b/src/lexer/error.rs @@ -3,19 +3,50 @@ use std::fmt::{Display, Formatter}; #[derive(Debug, PartialEq)] pub struct Error { + pub(crate) row: usize, + pub(crate) col: usize, pub(crate) kind: ErrorKind, } impl Error { - pub fn new(kind: ErrorKind) -> Self { - Self { kind } + pub fn new(location: (usize, usize), kind: ErrorKind) -> Self { + Self { + row: location.0, + col: location.1, + kind, + } } } impl std::error::Error for Error {} impl Display for Error { + // TODO: What if n digit numbers for rows and cols? fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "{}: {}", "error".red(), self.kind) + writeln!( + f, + "{}: {}", + "error".red().bold(), + self.kind.to_string().bold() + )?; + writeln!( + f, + " {} {}:{}:{}", + "-->".blue().bold(), + "temp-file-path", + self.row, + self.col + )?; + writeln!(f, " {}", "|".blue().bold())?; + writeln!( + f, + "{:>2} {} {}", + self.row.to_string().bold(), + "|".blue().bold(), + "pub fn temp_line()" + )?; + writeln!(f, " {}", "|".blue().bold())?; + writeln!(f, " {}", "|".blue().bold())?; + Ok(()) } } diff --git a/src/lexer/source_iterator.rs b/src/lexer/source_iterator.rs index 7204870..3feb0f2 100644 --- a/src/lexer/source_iterator.rs +++ b/src/lexer/source_iterator.rs @@ -3,12 +3,16 @@ use std::str::Chars; #[derive(Debug)] pub struct SourceIterator<'a> { + row: usize, + col: usize, src: Peekable>>, } impl<'a> SourceIterator<'a> { pub fn new(source: &'a str) -> Self { Self { + row: 1, + col: 1, src: source.chars().enumerate().peekable(), } } @@ -19,7 +23,7 @@ impl<'a> SourceIterator<'a> { pub fn skip(&mut self, n: usize) { for _ in 0..n { - if self.src.next().is_none() { + if self.next().is_none() { break; } } @@ -56,8 +60,27 @@ impl<'a> SourceIterator<'a> { } } + /// Returns the next character of the iterator and steps the iterator. This functions also + /// keeps track of row and column numbers. + /// + /// Assumptions: Every function that steps the iterator does so through this function pub fn next(&mut self) -> Option { - self.src.next().map(|(_, c)| c) + let next = self.src.next().map(|(_, c)| c); + + match next { + Some('\n') => { + self.row += 1; + self.col = 1; + } + _ => self.col += 1, + } + + next + } + + /// Returns (row, col) + pub fn loc(&self) -> (usize, usize) { + (self.row, self.col) } } @@ -117,4 +140,30 @@ mod tests { iter.skip(100); assert_eq!(iter.next(), None); } + + #[test] + fn row_count() { + let mut iter = SourceIterator::new("row 1\nrow 2\nrow 3\nrow 4"); + iter.skip(6); + assert_eq!(iter.loc(), (2, 1)); + iter.skip(6); + assert_eq!(iter.loc(), (3, 1)); + iter.skip(6); + assert_eq!(iter.loc(), (4, 1)); + iter.skip(6); + assert_eq!(iter.next(), None); + } + + #[test] + fn col_count() { + let mut iter = SourceIterator::new("123456789\n123456789"); + iter.skip(6); + assert_eq!(iter.loc(), (1, 7)); + iter.skip(6); + assert_eq!(iter.loc(), (2, 3)); + iter.skip(1); + assert_eq!(iter.loc(), (2, 4)); + iter.skip(100); + assert_eq!(iter.next(), None); + } } diff --git a/src/lexer/tokens.rs b/src/lexer/tokens.rs index 58d202b..3cac3a5 100644 --- a/src/lexer/tokens.rs +++ b/src/lexer/tokens.rs @@ -75,13 +75,15 @@ impl<'a> Iterator for Tokens<'a> { c if c.is_ascii_digit() => self.read_numeric_literal(), c if c.is_alphabetic() || c == '_' => Ok(self.read_identifier_or_kw()), - c => Err(Error::new(UnknownStartOfToken(c))), + c => Err(Error::new(self.src.loc(), UnknownStartOfToken(c))), }; match tok { Ok(Token::Punctuation(Slash)) => {} Ok(Token::Punctuation(_)) => self.src.skip(1), Err(Error { + row: _, + col: _, kind: UnknownStartOfToken(_), }) => self.src.skip(1), _ => {} @@ -121,7 +123,7 @@ impl<'a> Tokens<'a> { loop { match self.src.peek() { - Some('\n') | None => return Err(Error::new(UnterminatedStr)), + Some('\n') | None => return Err(Error::new(self.src.loc(), UnterminatedStr)), Some('"') => { self.src.skip(1); let default = Ok(Token::Literal(StrLiteral(contents))); @@ -151,8 +153,8 @@ impl<'a> Tokens<'a> { Some('"') => Ok('"'), Some('u') => self.take_hex_code_and_conv_to_char(4), Some('x') => self.take_hex_code_and_conv_to_char(2), - Some(c) => Err(Error::new(UnknownEscape(c))), - None => Err(Error::new(UnterminatedStr)), + Some(c) => Err(Error::new(self.src.loc(), UnknownEscape(c))), + None => Err(Error::new(self.src.loc(), UnterminatedStr)), // todo newline! } } @@ -169,18 +171,18 @@ impl<'a> Tokens<'a> { self.src.skip(1); code.push(c); } - Some('"') => return Err(Error::new(TruncatedEscapeSequence)), + Some('"') => return Err(Error::new(self.src.loc(), TruncatedEscapeSequence)), Some(c) => { self.src.skip(1); - return Err(Error::new(InvalidCharInEscape(c))); + return Err(Error::new(self.src.loc(), InvalidCharInEscape(c))); } - None => return Err(Error::new(UnterminatedStr)), + None => return Err(Error::new(self.src.loc(), UnterminatedStr)), } } // It is safe to unwrap from_str_radix(), which panics if radix > 36 (ours is 16) match char::from_u32(u32::from_str_radix(&code, 16).unwrap()) { Some(c) => Ok(c), - None => Err(Error::new(BadUnicodeEscape(code))), + None => Err(Error::new(self.src.loc(), BadUnicodeEscape(code))), } } @@ -207,7 +209,7 @@ impl<'a> Tokens<'a> { if num.contains(['e', 'E', '.']) { match num.parse() { Ok(parsed) => Ok(Token::Literal(FloatLiteral(parsed))), - Err(_) => Err(Error::new(InvalidFloat(num))), + Err(_) => Err(Error::new(self.src.loc(), InvalidFloat(num))), } } else { // This is safe to unwrap because we can't get an invalid int @@ -271,35 +273,35 @@ mod tests { #[test] fn err_given_unknown_str_escape() { let source = r#""\e""#; - let expected = vec![Err(Error::new(UnknownEscape('e')))]; + let expected = vec![Err(Error::new((1, 4), UnknownEscape('e')))]; assert_source_has_expected_output!(&source.to_string(), expected) } #[test] fn err_given_truncated_unicode_escape() { let source = r#""\u3b9""#; - let expected = vec![Err(Error::new(TruncatedEscapeSequence))]; + let expected = vec![Err(Error::new((1, 7), TruncatedEscapeSequence))]; assert_source_has_expected_output!(&source.to_string(), expected) } #[test] fn err_given_invalid_unicode_escape() { let source = r#""\u3b9wadsfdsfs""#; - let expected = vec![Err(Error::new(InvalidCharInEscape('w')))]; + let expected = vec![Err(Error::new((1, 7), InvalidCharInEscape('w')))]; assert_source_has_expected_output!(&source.to_string(), expected) } #[test] fn err_given_truncated_hex_escape() { let source = r#""\x9""#; - let expected = vec![Err(Error::new(TruncatedEscapeSequence))]; + let expected = vec![Err(Error::new((1, 5), TruncatedEscapeSequence))]; assert_source_has_expected_output!(&source.to_string(), expected) } #[test] fn err_given_invalid_hex_escape() { let source = r#""\x3z9wadsfdsfs""#; - let expected = vec![Err(Error::new(InvalidCharInEscape('z')))]; + let expected = vec![Err(Error::new((1, 5), InvalidCharInEscape('z')))]; assert_source_has_expected_output!(&source.to_string(), expected) } @@ -308,9 +310,9 @@ mod tests { let source = "\"test\n\""; let expected = vec![ - Err(Error::new(UnterminatedStr)), + Err(Error::new((1, 5), UnterminatedStr)), Ok(Token::Punctuation(Newline)), - Err(Error::new(UnterminatedStr)), + Err(Error::new((2, 1), UnterminatedStr)), ]; assert_source_has_expected_output!(&source.to_string(), expected) @@ -319,21 +321,21 @@ mod tests { #[test] fn err_given_unknown_char() { let source = '∂'; - let expected = vec![Err(Error::new(UnknownStartOfToken(source)))]; + let expected = vec![Err(Error::new((1, 1), UnknownStartOfToken(source)))]; assert_source_has_expected_output!(&source.to_string(), expected) } #[test] fn err_given_float_with_multiple_e() { let source = "1.2312E-33333E+9999"; - let expected = vec![Err(Error::new(InvalidFloat(source.to_string())))]; + let expected = vec![Err(Error::new((1, 19), InvalidFloat(source.to_string())))]; assert_source_has_expected_output!(source, expected) } #[test] fn err_given_float_with_consecutive_e() { let source = "1.2312Ee9999"; - let expected = vec![Err(Error::new(InvalidFloat(source.to_string())))]; + let expected = vec![Err(Error::new((1, 12), InvalidFloat(source.to_string())))]; assert_source_has_expected_output!(source, expected) } @@ -342,7 +344,7 @@ mod tests { let source = "1.2312E+-9999"; let expected = vec![ - Err(Error::new(InvalidFloat("1.2312E+".to_string()))), + Err(Error::new((1, 8), InvalidFloat("1.2312E+".to_string()))), Ok(Token::Punctuation(Dash)), Ok(Token::Literal(IntLiteral(9999))), ]; @@ -353,7 +355,7 @@ mod tests { #[test] fn err_given_float_with_multiple_decimals() { let source = "123.456.789"; - let expected = vec![Err(Error::new(InvalidFloat(source.to_string())))]; + let expected = vec![Err(Error::new((1, 11), InvalidFloat(source.to_string())))]; assert_source_has_expected_output!(source, expected) } From 1223c8e12a2c9ec9a24eaa09d448f17e8fecdd1c Mon Sep 17 00:00:00 2001 From: Max Larsson Date: Sat, 28 May 2022 18:10:49 -0400 Subject: [PATCH 2/5] Fix line number implementation --- src/lexer/error.rs | 17 +++--- src/lexer/location.rs | 17 ++++++ src/lexer/mod.rs | 1 + src/lexer/source_iterator.rs | 48 +++++++++------- src/lexer/tokens.rs | 105 +++++++++++++++++------------------ 5 files changed, 106 insertions(+), 82 deletions(-) create mode 100644 src/lexer/location.rs diff --git a/src/lexer/error.rs b/src/lexer/error.rs index 2594192..213dc62 100644 --- a/src/lexer/error.rs +++ b/src/lexer/error.rs @@ -1,18 +1,17 @@ +use crate::lexer::location::Location; use colored::Colorize; use std::fmt::{Display, Formatter}; #[derive(Debug, PartialEq)] pub struct Error { - pub(crate) row: usize, - pub(crate) col: usize, + pub(crate) location: Location, pub(crate) kind: ErrorKind, } impl Error { - pub fn new(location: (usize, usize), kind: ErrorKind) -> Self { + pub fn new(location: impl Into, kind: ErrorKind) -> Self { Self { - row: location.0, - col: location.1, + location: location.into(), kind, } } @@ -33,16 +32,16 @@ impl Display for Error { " {} {}:{}:{}", "-->".blue().bold(), "temp-file-path", - self.row, - self.col + self.location.row, + self.location.col )?; writeln!(f, " {}", "|".blue().bold())?; writeln!( f, "{:>2} {} {}", - self.row.to_string().bold(), + self.location.row.to_string().bold(), "|".blue().bold(), - "pub fn temp_line()" + "pub fn temp_line() {}" )?; writeln!(f, " {}", "|".blue().bold())?; writeln!(f, " {}", "|".blue().bold())?; diff --git a/src/lexer/location.rs b/src/lexer/location.rs new file mode 100644 index 0000000..1baf666 --- /dev/null +++ b/src/lexer/location.rs @@ -0,0 +1,17 @@ +#[derive(Debug, PartialEq, Copy, Clone)] +pub struct Location { + pub(crate) row: usize, + pub(crate) col: usize, +} + +impl Location { + pub fn new(row: usize, col: usize) -> Self { + Self { row, col } + } +} + +impl Into for (usize, usize) { + fn into(self) -> Location { + Location::new(self.0, self.1) + } +} diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 3601063..a18a90b 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -2,6 +2,7 @@ mod error; mod source_iterator; mod token; mod tokens; +mod location; pub use token::Bracket; pub use token::Comment; diff --git a/src/lexer/source_iterator.rs b/src/lexer/source_iterator.rs index 3feb0f2..4a20e54 100644 --- a/src/lexer/source_iterator.rs +++ b/src/lexer/source_iterator.rs @@ -1,18 +1,21 @@ +use crate::lexer::location::Location; use std::iter::{Enumerate, Peekable}; use std::str::Chars; #[derive(Debug)] pub struct SourceIterator<'a> { - row: usize, - col: usize, + /// Location of character that self.next() will return next time it is called + next_location: Option, + /// Location of what self.next() returned last time it was called + cur_location: Option, src: Peekable>>, } impl<'a> SourceIterator<'a> { pub fn new(source: &'a str) -> Self { Self { - row: 1, - col: 1, + next_location: Some(Location::new(1, 1)), + cur_location: None, src: source.chars().enumerate().peekable(), } } @@ -67,20 +70,24 @@ impl<'a> SourceIterator<'a> { pub fn next(&mut self) -> Option { let next = self.src.next().map(|(_, c)| c); + self.cur_location = self.next_location; + + // Safe to unwrap self.next_location when next is Some() because next_location is only set to None + // when self.src runs out of chars to yield match next { Some('\n') => { - self.row += 1; - self.col = 1; + self.next_location.as_mut().unwrap().row += 1; + self.next_location.as_mut().unwrap().col = 1; } - _ => self.col += 1, + Some(_) => self.next_location.as_mut().unwrap().col += 1, + None => self.next_location = None, } next } - /// Returns (row, col) - pub fn loc(&self) -> (usize, usize) { - (self.row, self.col) + pub fn loc(&self) -> Option { + self.cur_location } } @@ -143,26 +150,29 @@ mod tests { #[test] fn row_count() { - let mut iter = SourceIterator::new("row 1\nrow 2\nrow 3\nrow 4"); - iter.skip(6); - assert_eq!(iter.loc(), (2, 1)); - iter.skip(6); - assert_eq!(iter.loc(), (3, 1)); + let mut iter = SourceIterator::new("row 1\nrow 2\nrow 3"); + iter.skip(1); + assert_eq!(iter.loc().map(|loc| loc.row), Some(1)); iter.skip(6); - assert_eq!(iter.loc(), (4, 1)); + assert_eq!(iter.loc().map(|loc| loc.row), Some(2)); iter.skip(6); + assert_eq!(iter.loc().map(|loc| loc.row), Some(3)); + iter.skip(4); + assert_eq!(iter.loc().map(|loc| loc.row), Some(3)); + iter.skip(1); assert_eq!(iter.next(), None); + assert_eq!(iter.loc(), None); } #[test] fn col_count() { let mut iter = SourceIterator::new("123456789\n123456789"); iter.skip(6); - assert_eq!(iter.loc(), (1, 7)); + assert_eq!(iter.loc().map(|loc| loc.col), Some(6)); iter.skip(6); - assert_eq!(iter.loc(), (2, 3)); + assert_eq!(iter.loc().map(|loc| loc.col), Some(2)); iter.skip(1); - assert_eq!(iter.loc(), (2, 4)); + assert_eq!(iter.loc().map(|loc| loc.col), Some(3)); iter.skip(100); assert_eq!(iter.next(), None); } diff --git a/src/lexer/tokens.rs b/src/lexer/tokens.rs index 3cac3a5..df10a0e 100644 --- a/src/lexer/tokens.rs +++ b/src/lexer/tokens.rs @@ -15,7 +15,7 @@ use crate::lexer::Punctuation::{ Equals, Exclamation, Hashtag, Newline, OpenBracket, Percent, Pipe, Plus, Question, SingleQuote, Slash, Tilde, }; -use crate::lexer::{Error, Result, Token}; +use crate::lexer::{Error, ErrorKind, Result, Token}; #[derive(Debug)] pub struct Tokens<'a> { @@ -39,56 +39,48 @@ impl<'a> Iterator for Tokens<'a> { .skip_while(|c| c.is_ascii_whitespace() && c != '\n'); let tok = match self.src.peek()? { - '&' => Ok(Token::Punctuation(Ampersand)), - '*' => Ok(Token::Punctuation(Asterisk)), - '@' => Ok(Token::Punctuation(At)), - '\\' => Ok(Token::Punctuation(Backslash)), - '^' => Ok(Token::Punctuation(Caret)), - ':' => Ok(Token::Punctuation(Colon)), - ',' => Ok(Token::Punctuation(Comma)), - '-' => Ok(Token::Punctuation(Dash)), - '$' => Ok(Token::Punctuation(Dollar)), - '.' => Ok(Token::Punctuation(Dot)), - '=' => Ok(Token::Punctuation(Equals)), - '!' => Ok(Token::Punctuation(Exclamation)), - '#' => Ok(Token::Punctuation(Hashtag)), - '\n' => Ok(Token::Punctuation(Newline)), - '%' => Ok(Token::Punctuation(Percent)), - '|' => Ok(Token::Punctuation(Pipe)), - '+' => Ok(Token::Punctuation(Plus)), - '?' => Ok(Token::Punctuation(Question)), - '\'' => Ok(Token::Punctuation(SingleQuote)), - '~' => Ok(Token::Punctuation(Tilde)), - - '<' => Ok(Token::Punctuation(OpenBracket(Angle))), - '>' => Ok(Token::Punctuation(CloseBracket(Angle))), - '{' => Ok(Token::Punctuation(OpenBracket(Curly))), - '}' => Ok(Token::Punctuation(CloseBracket(Curly))), - '(' => Ok(Token::Punctuation(OpenBracket(Round))), - ')' => Ok(Token::Punctuation(CloseBracket(Round))), - '[' => Ok(Token::Punctuation(OpenBracket(Square))), - ']' => Ok(Token::Punctuation(CloseBracket(Square))), - '/' => Ok(self.read_slash_or_comment()), '"' => self.read_str_literal(), c if c.is_ascii_digit() => self.read_numeric_literal(), c if c.is_alphabetic() || c == '_' => Ok(self.read_identifier_or_kw()), - c => Err(Error::new(self.src.loc(), UnknownStartOfToken(c))), + // otherwise, consume the peeked char + _ => match self.src.next().unwrap() { + '&' => Ok(Token::Punctuation(Ampersand)), + '*' => Ok(Token::Punctuation(Asterisk)), + '@' => Ok(Token::Punctuation(At)), + '\\' => Ok(Token::Punctuation(Backslash)), + '^' => Ok(Token::Punctuation(Caret)), + ':' => Ok(Token::Punctuation(Colon)), + ',' => Ok(Token::Punctuation(Comma)), + '-' => Ok(Token::Punctuation(Dash)), + '$' => Ok(Token::Punctuation(Dollar)), + '.' => Ok(Token::Punctuation(Dot)), + '=' => Ok(Token::Punctuation(Equals)), + '!' => Ok(Token::Punctuation(Exclamation)), + '#' => Ok(Token::Punctuation(Hashtag)), + '\n' => Ok(Token::Punctuation(Newline)), + '%' => Ok(Token::Punctuation(Percent)), + '|' => Ok(Token::Punctuation(Pipe)), + '+' => Ok(Token::Punctuation(Plus)), + '?' => Ok(Token::Punctuation(Question)), + '\'' => Ok(Token::Punctuation(SingleQuote)), + '~' => Ok(Token::Punctuation(Tilde)), + + '<' => Ok(Token::Punctuation(OpenBracket(Angle))), + '>' => Ok(Token::Punctuation(CloseBracket(Angle))), + '{' => Ok(Token::Punctuation(OpenBracket(Curly))), + '}' => Ok(Token::Punctuation(CloseBracket(Curly))), + '(' => Ok(Token::Punctuation(OpenBracket(Round))), + ')' => Ok(Token::Punctuation(CloseBracket(Round))), + '[' => Ok(Token::Punctuation(OpenBracket(Square))), + ']' => Ok(Token::Punctuation(CloseBracket(Square))), + + c => Err(self.create_error(UnknownStartOfToken(c))), + }, }; - match tok { - Ok(Token::Punctuation(Slash)) => {} - Ok(Token::Punctuation(_)) => self.src.skip(1), - Err(Error { - row: _, - col: _, - kind: UnknownStartOfToken(_), - }) => self.src.skip(1), - _ => {} - } - Some(tok) } } @@ -123,7 +115,7 @@ impl<'a> Tokens<'a> { loop { match self.src.peek() { - Some('\n') | None => return Err(Error::new(self.src.loc(), UnterminatedStr)), + Some('\n') | None => return Err(self.create_error(UnterminatedStr)), Some('"') => { self.src.skip(1); let default = Ok(Token::Literal(StrLiteral(contents))); @@ -153,8 +145,8 @@ impl<'a> Tokens<'a> { Some('"') => Ok('"'), Some('u') => self.take_hex_code_and_conv_to_char(4), Some('x') => self.take_hex_code_and_conv_to_char(2), - Some(c) => Err(Error::new(self.src.loc(), UnknownEscape(c))), - None => Err(Error::new(self.src.loc(), UnterminatedStr)), + Some(c) => Err(self.create_error(UnknownEscape(c))), + None => Err(self.create_error(UnterminatedStr)), // todo newline! } } @@ -171,18 +163,18 @@ impl<'a> Tokens<'a> { self.src.skip(1); code.push(c); } - Some('"') => return Err(Error::new(self.src.loc(), TruncatedEscapeSequence)), + Some('"') => return Err(self.create_error(TruncatedEscapeSequence)), Some(c) => { self.src.skip(1); - return Err(Error::new(self.src.loc(), InvalidCharInEscape(c))); + return Err(self.create_error(InvalidCharInEscape(c))); } - None => return Err(Error::new(self.src.loc(), UnterminatedStr)), + None => return Err(self.create_error(UnterminatedStr)), } } // It is safe to unwrap from_str_radix(), which panics if radix > 36 (ours is 16) match char::from_u32(u32::from_str_radix(&code, 16).unwrap()) { Some(c) => Ok(c), - None => Err(Error::new(self.src.loc(), BadUnicodeEscape(code))), + None => Err(self.create_error(BadUnicodeEscape(code))), } } @@ -209,7 +201,7 @@ impl<'a> Tokens<'a> { if num.contains(['e', 'E', '.']) { match num.parse() { Ok(parsed) => Ok(Token::Literal(FloatLiteral(parsed))), - Err(_) => Err(Error::new(self.src.loc(), InvalidFloat(num))), + Err(_) => Err(self.create_error(InvalidFloat(num))), } } else { // This is safe to unwrap because we can't get an invalid int @@ -217,6 +209,11 @@ impl<'a> Tokens<'a> { } } + fn create_error(&self, kind: ErrorKind) -> Error { + // Safe to unwrap self.src.loc() because we never throw errors after self.src returns None + Error::new(self.src.loc().unwrap(), kind) + } + /// Returns next identifier or keyword consisting of alphabetic letters, digits, /// and underscores. /// @@ -273,14 +270,14 @@ mod tests { #[test] fn err_given_unknown_str_escape() { let source = r#""\e""#; - let expected = vec![Err(Error::new((1, 4), UnknownEscape('e')))]; + let expected = vec![Err(Error::new((1, 3), UnknownEscape('e')))]; assert_source_has_expected_output!(&source.to_string(), expected) } #[test] fn err_given_truncated_unicode_escape() { let source = r#""\u3b9""#; - let expected = vec![Err(Error::new((1, 7), TruncatedEscapeSequence))]; + let expected = vec![Err(Error::new((1, 6), TruncatedEscapeSequence))]; assert_source_has_expected_output!(&source.to_string(), expected) } @@ -294,7 +291,7 @@ mod tests { #[test] fn err_given_truncated_hex_escape() { let source = r#""\x9""#; - let expected = vec![Err(Error::new((1, 5), TruncatedEscapeSequence))]; + let expected = vec![Err(Error::new((1, 4), TruncatedEscapeSequence))]; assert_source_has_expected_output!(&source.to_string(), expected) } From 3491ff437a4a4b96bd87a2b16becbf89d53eefb1 Mon Sep 17 00:00:00 2001 From: Max Larsson Date: Sat, 28 May 2022 18:31:11 -0400 Subject: [PATCH 3/5] Make Assumptions singular to be consistent Co-authored-by: Thomas Breydo <40642484+thomasbreydo@users.noreply.github.com> --- src/lexer/source_iterator.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lexer/source_iterator.rs b/src/lexer/source_iterator.rs index 4a20e54..fc2c4b7 100644 --- a/src/lexer/source_iterator.rs +++ b/src/lexer/source_iterator.rs @@ -66,7 +66,7 @@ impl<'a> SourceIterator<'a> { /// Returns the next character of the iterator and steps the iterator. This functions also /// keeps track of row and column numbers. /// - /// Assumptions: Every function that steps the iterator does so through this function + /// Assumption: Every function that steps the iterator does so through this function pub fn next(&mut self) -> Option { let next = self.src.next().map(|(_, c)| c); From 80f0d7c8e0334c3ba4747259a18444f0a28355a7 Mon Sep 17 00:00:00 2001 From: Max Larsson Date: Sat, 28 May 2022 18:34:40 -0400 Subject: [PATCH 4/5] Refer to rows as lines --- src/lexer/error.rs | 4 ++-- src/lexer/location.rs | 6 +++--- src/lexer/source_iterator.rs | 22 +++++++++++----------- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/lexer/error.rs b/src/lexer/error.rs index 213dc62..02666c0 100644 --- a/src/lexer/error.rs +++ b/src/lexer/error.rs @@ -32,14 +32,14 @@ impl Display for Error { " {} {}:{}:{}", "-->".blue().bold(), "temp-file-path", - self.location.row, + self.location.line, self.location.col )?; writeln!(f, " {}", "|".blue().bold())?; writeln!( f, "{:>2} {} {}", - self.location.row.to_string().bold(), + self.location.line.to_string().bold(), "|".blue().bold(), "pub fn temp_line() {}" )?; diff --git a/src/lexer/location.rs b/src/lexer/location.rs index 1baf666..6d1bd77 100644 --- a/src/lexer/location.rs +++ b/src/lexer/location.rs @@ -1,12 +1,12 @@ #[derive(Debug, PartialEq, Copy, Clone)] pub struct Location { - pub(crate) row: usize, + pub(crate) line: usize, pub(crate) col: usize, } impl Location { - pub fn new(row: usize, col: usize) -> Self { - Self { row, col } + pub fn new(line: usize, col: usize) -> Self { + Self { line, col } } } diff --git a/src/lexer/source_iterator.rs b/src/lexer/source_iterator.rs index fc2c4b7..998f00d 100644 --- a/src/lexer/source_iterator.rs +++ b/src/lexer/source_iterator.rs @@ -64,7 +64,7 @@ impl<'a> SourceIterator<'a> { } /// Returns the next character of the iterator and steps the iterator. This functions also - /// keeps track of row and column numbers. + /// keeps track of line and column numbers. /// /// Assumption: Every function that steps the iterator does so through this function pub fn next(&mut self) -> Option { @@ -76,7 +76,7 @@ impl<'a> SourceIterator<'a> { // when self.src runs out of chars to yield match next { Some('\n') => { - self.next_location.as_mut().unwrap().row += 1; + self.next_location.as_mut().unwrap().line += 1; self.next_location.as_mut().unwrap().col = 1; } Some(_) => self.next_location.as_mut().unwrap().col += 1, @@ -149,16 +149,16 @@ mod tests { } #[test] - fn row_count() { - let mut iter = SourceIterator::new("row 1\nrow 2\nrow 3"); + fn line_count() { + let mut iter = SourceIterator::new("line 1\nline 2\nline 3"); iter.skip(1); - assert_eq!(iter.loc().map(|loc| loc.row), Some(1)); - iter.skip(6); - assert_eq!(iter.loc().map(|loc| loc.row), Some(2)); - iter.skip(6); - assert_eq!(iter.loc().map(|loc| loc.row), Some(3)); - iter.skip(4); - assert_eq!(iter.loc().map(|loc| loc.row), Some(3)); + assert_eq!(iter.loc().map(|loc| loc.line), Some(1)); + iter.skip(7); + assert_eq!(iter.loc().map(|loc| loc.line), Some(2)); + iter.skip(7); + assert_eq!(iter.loc().map(|loc| loc.line), Some(3)); + iter.skip(5); + assert_eq!(iter.loc().map(|loc| loc.line), Some(3)); iter.skip(1); assert_eq!(iter.next(), None); assert_eq!(iter.loc(), None); From f9d1ba7d72984d1188ee0a85e9e9e930ef3435bc Mon Sep 17 00:00:00 2001 From: Max Larsson Date: Sat, 28 May 2022 18:36:32 -0400 Subject: [PATCH 5/5] Refer to locations as locs --- src/lexer/error.rs | 12 ++++++------ src/lexer/source_iterator.rs | 22 +++++++++++----------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/lexer/error.rs b/src/lexer/error.rs index 02666c0..a1d69ea 100644 --- a/src/lexer/error.rs +++ b/src/lexer/error.rs @@ -4,14 +4,14 @@ use std::fmt::{Display, Formatter}; #[derive(Debug, PartialEq)] pub struct Error { - pub(crate) location: Location, + pub(crate) loc: Location, pub(crate) kind: ErrorKind, } impl Error { - pub fn new(location: impl Into, kind: ErrorKind) -> Self { + pub fn new(loc: impl Into, kind: ErrorKind) -> Self { Self { - location: location.into(), + loc: loc.into(), kind, } } @@ -32,14 +32,14 @@ impl Display for Error { " {} {}:{}:{}", "-->".blue().bold(), "temp-file-path", - self.location.line, - self.location.col + self.loc.line, + self.loc.col )?; writeln!(f, " {}", "|".blue().bold())?; writeln!( f, "{:>2} {} {}", - self.location.line.to_string().bold(), + self.loc.line.to_string().bold(), "|".blue().bold(), "pub fn temp_line() {}" )?; diff --git a/src/lexer/source_iterator.rs b/src/lexer/source_iterator.rs index 998f00d..3d3b33c 100644 --- a/src/lexer/source_iterator.rs +++ b/src/lexer/source_iterator.rs @@ -5,17 +5,17 @@ use std::str::Chars; #[derive(Debug)] pub struct SourceIterator<'a> { /// Location of character that self.next() will return next time it is called - next_location: Option, + next_loc: Option, /// Location of what self.next() returned last time it was called - cur_location: Option, + cur_loc: Option, src: Peekable>>, } impl<'a> SourceIterator<'a> { pub fn new(source: &'a str) -> Self { Self { - next_location: Some(Location::new(1, 1)), - cur_location: None, + next_loc: Some(Location::new(1, 1)), + cur_loc: None, src: source.chars().enumerate().peekable(), } } @@ -70,24 +70,24 @@ impl<'a> SourceIterator<'a> { pub fn next(&mut self) -> Option { let next = self.src.next().map(|(_, c)| c); - self.cur_location = self.next_location; + self.cur_loc = self.next_loc; - // Safe to unwrap self.next_location when next is Some() because next_location is only set to None + // Safe to unwrap self.next_loc when next is Some() because next_loc is only set to None // when self.src runs out of chars to yield match next { Some('\n') => { - self.next_location.as_mut().unwrap().line += 1; - self.next_location.as_mut().unwrap().col = 1; + self.next_loc.as_mut().unwrap().line += 1; + self.next_loc.as_mut().unwrap().col = 1; } - Some(_) => self.next_location.as_mut().unwrap().col += 1, - None => self.next_location = None, + Some(_) => self.next_loc.as_mut().unwrap().col += 1, + None => self.next_loc = None, } next } pub fn loc(&self) -> Option { - self.cur_location + self.cur_loc } }