From 7d2b941609877605f246cc88aeb3812544270974 Mon Sep 17 00:00:00 2001 From: Santi Weight Date: Sun, 12 Sep 2021 20:30:37 -0700 Subject: [PATCH] review --- .vscode/tasks.json | 50 +++++++ hie.yaml | 4 + package.yaml | 3 + src/Scanner.hs | 348 +++++++++++++++++++++------------------------ stack.yaml | 4 +- test/test_lexer.hs | 20 +-- 6 files changed, 234 insertions(+), 195 deletions(-) create mode 100644 .vscode/tasks.json create mode 100644 hie.yaml diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..c7efda6 --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,50 @@ + +{ + // Automatically created by phoityne-vscode extension. + + "version": "2.0.0", + "presentation": { + "reveal": "always", + "panel": "new" + }, + "tasks": [ + { + // F7 + "group": { + "kind": "build", + "isDefault": true + }, + "label": "haskell build", + "type": "shell", + //"command": "cabal configure && cabal build" + "command": "stack build" + }, + { + // F6 + "group": "build", + "type": "shell", + "label": "haskell clean & build", + //"command": "cabal clean && cabal configure && cabal build" + "command": "stack clean && stack build" + //"command": "stack clean ; stack build" // for powershell + }, + { + // F8 + "group": { + "kind": "test", + "isDefault": true + }, + "type": "shell", + "label": "haskell test", + //"command": "cabal test" + "command": "stack test" + }, + { + // F6 + "isBackground": true, + "type": "shell", + "label": "haskell watch", + "command": "stack build --test --no-run-tests --file-watch" + } + ] +} diff --git a/hie.yaml b/hie.yaml new file mode 100644 index 0000000..3b836a5 --- /dev/null +++ b/hie.yaml @@ -0,0 +1,4 @@ +cradle: + stack: + - path: "./src" + component: "haskell-lox:lib" \ No newline at end of file diff --git a/package.yaml b/package.yaml index c950724..6f703e4 100644 --- a/package.yaml +++ b/package.yaml @@ -21,6 +21,9 @@ description: Please see the README on Github at = 4.11 && < 10 +# I personally wouldn't depend on rio for the purposes of a blog series +# rio's great, but it's a commitment to an ecosystem, not really a standard +# library like the other dependencies. - rio >= 0.1.12.0 - array >= 0.5.4.0 - text >= 1.2.4.1 diff --git a/src/Scanner.hs b/src/Scanner.hs index 6cbae53..48e23f2 100644 --- a/src/Scanner.hs +++ b/src/Scanner.hs @@ -1,78 +1,78 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE ScopedTypeVariables #-} module Scanner where -import Import hiding (many, (<|>), try) -import Data.Text as T -import Data.Char -import Text.Parsec.String as PS -import Text.Parsec.Char as PC -import Text.Parsec -import RIO.Partial (read) +import Data.Char +import Data.Text as T +import Import hiding ( (<|>) + , many + , try + ) +import RIO.Partial ( read ) +import Text.Parsec +import Text.Parsec.Char as PC +import Text.Parsec.String as PS data LoxObject = JString | JDouble deriving (Show, Eq) +data Op = And | Or | Plus | Minus | Eq | Neq | Leq | Geq | GT | LT + deriving (Show, Eq) + data LoxTok = -- Single-character tokens. - LEFT_PAREN - | RIGHT_PAREN - | LEFT_BRACE - | RIGHT_BRACE - | COMMA - | DOT - | MINUS - | PLUS - | SEMICOLON - | SLASH - | STAR + LParen + | RParen + | LBrace + | RBrace + | Comma + | Dot + | Semicolon + | Slash + | Star | -- One or two character tokens. - BANG - | BANG_EQUAL - | EQUAL - | EQUAL_EQUAL - | GREATER - | GREATER_EQUAL - | LESS - | LESS_EQUAL + Bang + | Equal + | Greater + | Less + | LessEqual | -- Literals. - IDENTIFIER String - | STRING String - | NUMBER Double - | COMMENT Text + Identifier String + | String String + | Number Double + | Comment Text -- Keywords. - | AND - | CLASS - | ELSE - | FALSE - | FUN - | FOR - | IF - | NIL - | OR - | PRINT - | RETURN - | SUPER - | THIS - | TRUE - | VAR - | WHILE - | WHITESPACE - | EOF + | Class + | Else + | BoolTok Bool + | OpTok Op + | Fun + | For + | If + | Nil + | Print + | Return + | Super + | This + | Var + | While + | Whitespace + | Eof deriving (Show, Eq) data LoxTokInfo = LoxTokInfo - { tokinfo_type :: LoxTok, - tokinfo_lexeme :: Maybe T.Text, - tokinfo_literal :: Maybe LoxObject, - tok_position :: SourcePos + { tok :: LoxTok + , tokinfo_lexeme :: Maybe T.Text + , tokinfo_literal :: Maybe LoxObject + , position :: SourcePos } deriving (Show, Eq) - tokenShow :: LoxTokInfo -> String -tokenShow t = "LoxTok=" ++ show (tokinfo_type t) +tokenShow t = "LoxTok=" ++ show (tok t) type LoxScannerResult = Either ParseError [LoxTokInfo] -- type LoxScanner = Parsec String () [LoxTok] @@ -80,161 +80,143 @@ type LoxScannerResult = Either ParseError [LoxTokInfo] whitespace :: Parser () whitespace = void $ many $ oneOf " \n\t" --- whitespaceToken1 :: Parser LoxTokInfo --- whitespaceToken1 = do --- source_pos <- getPosition --- return $ LoxTokInfo WHITESPACE Nothing Nothing source_pos - -whitespaceToken :: Parser LoxTokInfo -charMapping :: [(LoxTok, Char)] -charMapping = - [ (LEFT_PAREN, '('), - (RIGHT_PAREN, ')'), - (LEFT_BRACE, '{'), - (RIGHT_BRACE, '}'), - (COMMA, ','), - (DOT, '.'), - (MINUS, '-'), - (PLUS, '+'), - (SEMICOLON, ';'), - (SLASH, '/'), - (STAR, '*'), - (BANG, '!'), - (EQUAL, '='), - (GREATER, '>'), - (LESS, '<') - ] +whitespace1 :: Parser LoxTokInfo +whitespace1 = withInfo $ Whitespace <$ many1 (char ' ') -scanSingleCharToken :: Parser LoxTokInfo -scanSingleCharToken = do - source_pos <- getPosition - sel <- choice $ build <$> charMapping - return $ LoxTokInfo sel Nothing Nothing source_pos - where - build :: (LoxTok, Char) -> Parser LoxTok - build (x, y) = x <$ char y <* whitespace - -doubleCharMapping :: [(LoxTok, String)] -doubleCharMapping = - [ (BANG_EQUAL, "!="), - (EQUAL_EQUAL, "=="), - (GREATER_EQUAL, ">="), - (LESS_EQUAL, "<=") - ] - -scanDoubleToken :: Parser LoxTokInfo -scanDoubleToken = do - source_pos <- getPosition - sel <- choice $ build <$> doubleCharMapping - return $ LoxTokInfo sel Nothing Nothing source_pos - where - build :: (LoxTok, String) -> Parser LoxTok - build (x, y) = x <$ string y <* whitespace - -keywordMapping :: [(LoxTok, String)] -keywordMapping = - [ (AND, "and"), - (CLASS, "class"), - (ELSE, "else"), - (FALSE, "false"), - (FUN, "fun"), - (FOR, "for"), - (IF, "if"), - (NIL, "nil"), - (OR, "or"), - (PRINT, "print"), - (RETURN, "return"), - (SUPER, "super"), - (THIS, "this"), - (TRUE, "true"), - (VAR, "var"), - (WHILE, "while") - ] +lexeme :: Parser a -> Parser a +lexeme p = p <* whitespace -scanKeywordToken :: Parser LoxTokInfo -scanKeywordToken = do +withInfo :: Parser LoxTok -> Parser LoxTokInfo +withInfo p = do source_pos <- getPosition - sel <- choice $ build <$> keywordMapping - return $ LoxTokInfo sel Nothing Nothing source_pos - where - build :: (LoxTok, String) -> Parser LoxTok - build (x, y) = x <$ string y <* whitespace + sel <- p + pure $ LoxTokInfo sel Nothing Nothing source_pos -whitespaceToken = do +-- whitespaceToken1 :: Parser LoxTokInfo +-- whitespaceToken1 = do +-- source_pos <- getPosition +-- pure $ LoxTokInfo WHITESPACE Nothing Nothing source_pos + +lSingleChar :: Parser LoxTokInfo +lSingleChar = + withInfo + . choice + $ build + <$> [ (LParen , '(') + , (RParen , ')') + , (LBrace , '{') + , (LBrace , '}') + , (Comma , ',') + , (Dot , '.') + , (OpTok Minus, '-') + , (OpTok Plus , '+') + , (Semicolon , ';') + , (Slash , '/') + , (Star , '*') + , (Bang , '!') + , (Equal , '=') + , (Greater , '>') + , (Less , '<') + ] + where + build :: (LoxTok, Char) -> Parser LoxTok + build (x, y) = lexeme $ x <$ char y + +build :: (LoxTok, String) -> Parser LoxTok +build (x, y) = lexeme $ x <$ string y + +lOp :: Parser LoxTok +lOp = + choice + $ build + <$> [ (OpTok Neq, "!=") + , (OpTok Eq , "==") + , (OpTok Geq, ">=") + , (OpTok Leq, "<=") + ] + +lDoubleTok :: Parser LoxTokInfo +lDoubleTok = do source_pos <- getPosition - _ <- many1 $ char ' ' - return $ LoxTokInfo WHITESPACE Nothing Nothing source_pos + sel <- lOp + pure $ LoxTokInfo sel Nothing Nothing source_pos + +keywordMap :: [(LoxTok, String)] +keywordMap = + [ (OpTok And , "and") + , (Class , "class") + , (Else , "else") + , (BoolTok False, "false") + , (Fun , "fun") + , (For , "for") + , (If , "if") + , (Nil , "nil") + , (OpTok Or , "or") + , (Print , "print") + , (Return , "pure") + , (Super , "super") + , (This , "this") + , (BoolTok True , "true") + , (Var , "var") + , (While , "while") + ] scanDouble :: Parser LoxTokInfo -scanDouble = do - source_pos <- getPosition - let la = lookAhead (whitespaceToken <|> scanSingleCharToken) - sel <- do - firstPart <- Text.Parsec.many1 digit - try (secondCharacter firstPart <* la <* whitespace) <|> NUMBER (read firstPart) <$ la <* whitespace - return $ LoxTokInfo sel Nothing Nothing source_pos - where - secondCharacter :: String -> Parser LoxTok - secondCharacter firstPart = do - void $ char '.' - secondPart <- Text.Parsec.many1 digit <* whitespace - return $ NUMBER $ read $ Import.concat [firstPart, ".", secondPart] +scanDouble = withInfo $ do + preDot <- Text.Parsec.many1 digit + postDotMay <- Text.Parsec.optionMaybe $ do + dot <- char '.' + decimals <- Text.Parsec.many1 digit + pure $ dot : decimals + pure $ Number $ read $ preDot <> fromMaybe "" postDotMay + -- -- https://stackoverflow.com/questions/24106314/parser-for-quoted-string-using-parsec escape :: Parser String -escape = do - d <- char '\\' - c <- oneOf "\\\"0nrvtbf" -- all the characters which can be escaped - return [d, c] - -nonEscape :: Parser Char -nonEscape = noneOf "\\\"\0\n\r\v\t\b\f" +escape = liftA2 (:) (char '\\') (pure <$> oneOf "\\\"0nrvtbf") -- all the characters which can be escaped -character :: Parser String -character = fmap return nonEscape <|> escape +lBetween :: Parser l -> Parser a -> Parser r -> Parser a +lBetween l p r = lexeme $ l *> p <* r scanQuotedString :: Parser LoxTokInfo -scanQuotedString = do - source_pos <- getPosition - qstring <- char '"' *> many character <* char '"' <* whitespace - return $ LoxTokInfo (STRING $ Import.concat qstring) Nothing Nothing source_pos +scanQuotedString = withInfo lString + where + lString = + String . Import.concat <$> lBetween (char '"') (many lStringChar) (char '"') + lStringChar = nonEscape <|> escape + nonEscape = pure <$> noneOf "\\\"\0\n\r\v\t\b\f" -- -- http://jakewheat.github.io/intro_to_parsing/#_var var :: Parser String -var = do - fc <- firstChar - rest <- many nonFirstChar - return (fc : rest) - where - firstChar = satisfy (\a -> isLetter a || a == '_') - nonFirstChar = satisfy (\a -> isDigit a || isLetter a || a == '_') - -checkIfIdentifier :: Parser LoxTokInfo -checkIfIdentifier = do - source_pos <- getPosition +var = liftA2 (:) firstChar (many nonFirstChar) + where + firstChar = satisfy (\a -> isLetter a || a == '_') + nonFirstChar = satisfy (\a -> isDigit a || isLetter a || a == '_') + +lKeywordOrIdent :: Parser LoxTokInfo +lKeywordOrIdent = withInfo $ do s <- var - result ([(x, y) | (x, y) <- keywordMapping, y == s]) s source_pos - where - result xs s source_pos = do - case xs of - [] -> return $ LoxTokInfo (IDENTIFIER s) Nothing Nothing source_pos - (x, _) : _ -> return $ LoxTokInfo x Nothing Nothing source_pos + -- A map lookup would make more sense here + case Import.filter ((== s) . snd) keywordMap of + [] -> pure (Identifier s) + (x, _) : _ -> pure x scanComment :: Parser LoxTokInfo scanComment = do source_pos <- getPosition - _ <- string "//" + _ <- string "//" -- TODO: Find a better way to do this, scanning this more than once is not desirable - comment <- try (manyTill anyToken (try (oneOf "\n"))) <|> manyTill anyToken eof - return $ LoxTokInfo (COMMENT (T.pack comment)) Nothing Nothing source_pos + comment <- + try (manyTill anyToken (try (oneOf "\n"))) <|> manyTill anyToken eof + pure $ LoxTokInfo (Comment (T.pack comment)) Nothing Nothing source_pos scanToken :: Parser LoxTokInfo scanToken = try scanComment - <|> try scanDoubleToken - <|> try scanSingleCharToken + <|> try lDoubleTok + <|> try lSingleChar <|> try scanQuotedString - <|> scanDouble - <|> checkIfIdentifier + <|> try scanDouble + <|> lKeywordOrIdent scanner :: String -> LoxScannerResult scanner = parse (many scanToken <* eof) "" diff --git a/stack.yaml b/stack.yaml index ab3a60e..abf5a10 100644 --- a/stack.yaml +++ b/stack.yaml @@ -16,9 +16,9 @@ # a snapshot provided as a file might change, whereas a url resource does not. # # resolver: ./custom-snapshot.yaml -# resolver: https://example.com/snapshots/2018-01-01.yaml +# resolver: https://example.com/sjnapshots/2018-01-01.yaml resolver: - url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/7.yaml + url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/6.yaml # User packages to be built. # Various formats can be used as shown in the example below. diff --git a/test/test_lexer.hs b/test/test_lexer.hs index bae24e4..ea91502 100644 --- a/test/test_lexer.hs +++ b/test/test_lexer.hs @@ -20,7 +20,7 @@ testToken test_name token_str expected_tok = testCase test_name $ do let result = scanner token_str case result of Left x -> assertFailure $ show x - Right (x : _) -> assertEqual token_str (tokinfo_type x) expected_tok + Right (x : _) -> assertEqual token_str (tok x) expected_tok @@ -30,23 +30,23 @@ testInvalidToken test_name token_str = testCase test_name $ do Left x -> return () Right x -> assertFailure $ show x -testSingleCharToken = testToken "testSingleCharToken" "+" PLUS +testSingleCharToken = testToken "testSingleCharToken" "+" (OpTok Plus) -testDoubleCharToken = testToken "testDoubleCharToken" "==" EQUAL_EQUAL +testDoubleCharToken = testToken "testDoubleCharToken" "==" (OpTok Eq) -testKeywordToken = testToken "testKeywordToken" "class" CLASS +testKeywordToken = testToken "testKeywordToken" "class" Class -testScanDouble_1 = testToken "testScanDouble_1" "1121.1121;" (NUMBER 1121.1121) -testScanDouble_2 = testToken "testScanDouble_2" "0.1121;" (NUMBER 0.1121) +testScanDouble_1 = testToken "testScanDouble_1" "1121.1121;" (Number 1121.1121) +testScanDouble_2 = testToken "testScanDouble_2" "0.1121;" (Number 0.1121) -- We don't like the Lexer doing this, but we will try handling these scenarios in the parser -testScanDouble_4 = testToken "testScanDouble_4" "1121." (NUMBER 1121.0) +testScanDouble_4 = testToken "testScanDouble_4" "1121." (Number 1121.0) testScanDoubleInvalid_2 = testInvalidToken "testScanDouble_2" "1121." -testScanIdentifier = testToken "testScanIdentifier" "and_1" (IDENTIFIER "and_1") +testScanIdentifier = testToken "testScanIdentifier" "and_1" (Identifier "and_1") -- invalid tokens @@ -59,8 +59,8 @@ testScanInvalidDOT = testInvalidToken "testScanInvalidDoubleDOT" ".1121" testScanInvalidIdentifier_1 = testInvalidToken "testScanInvalidIdentifier_1" "1and" testScanInvalidIdentifier_2 = testInvalidToken "testScanInvalidIdentifier_2" "1_and" -testComment_1 = testToken "testComment" "// this is a comment" (COMMENT " this is a comment") -testComment_2 = testToken "testComment" "// this is a comment\n" (COMMENT " this is a comment") +testComment_1 = testToken "testComment" "// this is a comment" (Comment " this is a comment") +testComment_2 = testToken "testComment" "// this is a comment\n" (Comment " this is a comment") main = do defaultMain $ testGroup "tokenizer_tests_example_1" [