From 7d2b941609877605f246cc88aeb3812544270974 Mon Sep 17 00:00:00 2001
From: Santi Weight <santiagoweight@forwardnetworks.com>
Date: Sun, 12 Sep 2021 20:30:37 -0700
Subject: [PATCH] review

---
 .vscode/tasks.json |  50 +++++++
 hie.yaml           |   4 +
 package.yaml       |   3 +
 src/Scanner.hs     | 348 +++++++++++++++++++++------------------------
 stack.yaml         |   4 +-
 test/test_lexer.hs |  20 +--
 6 files changed, 234 insertions(+), 195 deletions(-)
 create mode 100644 .vscode/tasks.json
 create mode 100644 hie.yaml

diff --git a/.vscode/tasks.json b/.vscode/tasks.json
new file mode 100644
index 0000000..c7efda6
--- /dev/null
+++ b/.vscode/tasks.json
@@ -0,0 +1,50 @@
+
+{
+  // Automatically created by phoityne-vscode extension.
+
+  "version": "2.0.0",
+  "presentation": {
+    "reveal": "always",
+    "panel": "new"
+  },
+  "tasks": [
+    {
+      // F7
+      "group": {
+        "kind": "build",
+        "isDefault": true
+      },
+      "label": "haskell build",
+      "type": "shell",
+      //"command": "cabal configure && cabal build"
+      "command": "stack build"
+    },
+    {
+      // F6
+      "group": "build",
+      "type": "shell",
+      "label": "haskell clean & build",
+      //"command": "cabal clean && cabal configure && cabal build"
+      "command": "stack clean && stack build"
+      //"command": "stack clean ; stack build"  // for powershell
+    },
+    {
+      // F8
+      "group": {
+        "kind": "test",
+        "isDefault": true
+      },
+      "type": "shell",
+      "label": "haskell test",
+      //"command": "cabal test"
+      "command": "stack test"
+    },
+    {
+      // F6
+      "isBackground": true,
+      "type": "shell",
+      "label": "haskell watch",
+      "command": "stack build --test --no-run-tests --file-watch"
+    }
+  ]
+}
diff --git a/hie.yaml b/hie.yaml
new file mode 100644
index 0000000..3b836a5
--- /dev/null
+++ b/hie.yaml
@@ -0,0 +1,4 @@
+cradle:
+  stack:
+    - path: "./src"
+      component: "haskell-lox:lib"
\ No newline at end of file
diff --git a/package.yaml b/package.yaml
index c950724..6f703e4 100644
--- a/package.yaml
+++ b/package.yaml
@@ -21,6 +21,9 @@ description:         Please see the README on Github at <https://github.com/gith
 
 dependencies:
 - base >= 4.11 && < 10
+# I personally wouldn't depend on rio for the purposes of a blog series
+# rio's great, but it's a commitment to an ecosystem, not really a standard
+# library like the other dependencies.
 - rio >= 0.1.12.0
 - array >= 0.5.4.0
 - text >= 1.2.4.1
diff --git a/src/Scanner.hs b/src/Scanner.hs
index 6cbae53..48e23f2 100644
--- a/src/Scanner.hs
+++ b/src/Scanner.hs
@@ -1,78 +1,78 @@
 {-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE ScopedTypeVariables #-}
 
 module Scanner where
 
-import Import hiding (many, (<|>), try)
-import Data.Text as T
-import Data.Char
-import Text.Parsec.String as PS
-import Text.Parsec.Char as PC
-import Text.Parsec
-import RIO.Partial (read)
+import           Data.Char
+import           Data.Text                     as T
+import           Import                  hiding ( (<|>)
+                                                , many
+                                                , try
+                                                )
+import           RIO.Partial                    ( read )
+import           Text.Parsec
+import           Text.Parsec.Char              as PC
+import           Text.Parsec.String            as PS
 
 data LoxObject = JString | JDouble
   deriving (Show, Eq)
 
+data Op = And | Or | Plus | Minus | Eq | Neq | Leq | Geq | GT | LT
+  deriving (Show, Eq)
+
 data LoxTok
   = -- Single-character tokens.
-    LEFT_PAREN
-  | RIGHT_PAREN
-  | LEFT_BRACE
-  | RIGHT_BRACE
-  | COMMA
-  | DOT
-  | MINUS
-  | PLUS
-  | SEMICOLON
-  | SLASH
-  | STAR
+    LParen
+  | RParen
+  | LBrace
+  | RBrace
+  | Comma
+  | Dot
+  | Semicolon
+  | Slash
+  | Star
   | -- One or two character tokens.
-    BANG
-  | BANG_EQUAL
-  | EQUAL
-  | EQUAL_EQUAL
-  | GREATER
-  | GREATER_EQUAL
-  | LESS
-  | LESS_EQUAL
+    Bang
+  | Equal
+  | Greater
+  | Less
+  | LessEqual
   | -- Literals.
-    IDENTIFIER String
-  | STRING String
-  | NUMBER Double
-  | COMMENT Text
+    Identifier String
+  | String String
+  | Number Double
+  | Comment Text
    -- Keywords.
-  | AND
-  | CLASS
-  | ELSE
-  | FALSE
-  | FUN
-  | FOR
-  | IF
-  | NIL
-  | OR
-  | PRINT
-  | RETURN
-  | SUPER
-  | THIS
-  | TRUE
-  | VAR
-  | WHILE
-  | WHITESPACE
-  | EOF
+  | Class
+  | Else
+  | BoolTok Bool
+  | OpTok Op
+  | Fun
+  | For
+  | If
+  | Nil
+  | Print
+  | Return
+  | Super
+  | This
+  | Var
+  | While
+  | Whitespace
+  | Eof
   deriving (Show, Eq)
 
 data LoxTokInfo = LoxTokInfo
-  { tokinfo_type :: LoxTok,
-    tokinfo_lexeme :: Maybe T.Text,
-    tokinfo_literal :: Maybe LoxObject,
-    tok_position :: SourcePos
+  { tok             :: LoxTok
+  , tokinfo_lexeme  :: Maybe T.Text
+  , tokinfo_literal :: Maybe LoxObject
+  , position        :: SourcePos
   }
   deriving (Show, Eq)
 
-
 tokenShow :: LoxTokInfo -> String
-tokenShow t = "LoxTok=" ++ show (tokinfo_type t)
+tokenShow t = "LoxTok=" ++ show (tok t)
 
 type LoxScannerResult = Either ParseError [LoxTokInfo]
 -- type LoxScanner = Parsec String () [LoxTok]
@@ -80,161 +80,143 @@ type LoxScannerResult = Either ParseError [LoxTokInfo]
 whitespace :: Parser ()
 whitespace = void $ many $ oneOf " \n\t"
 
--- whitespaceToken1 :: Parser LoxTokInfo
--- whitespaceToken1 = do
---   source_pos <- getPosition
---   return $ LoxTokInfo WHITESPACE Nothing Nothing source_pos
-
-whitespaceToken :: Parser LoxTokInfo
-charMapping :: [(LoxTok, Char)]
-charMapping =
-  [ (LEFT_PAREN, '('),
-    (RIGHT_PAREN, ')'),
-    (LEFT_BRACE, '{'),
-    (RIGHT_BRACE, '}'),
-    (COMMA, ','),
-    (DOT, '.'),
-    (MINUS, '-'),
-    (PLUS, '+'),
-    (SEMICOLON, ';'),
-    (SLASH, '/'),
-    (STAR, '*'),
-    (BANG, '!'),
-    (EQUAL, '='),
-    (GREATER, '>'),
-    (LESS, '<')
-  ]
+whitespace1 :: Parser LoxTokInfo
+whitespace1 = withInfo $ Whitespace <$ many1 (char ' ')
 
-scanSingleCharToken :: Parser LoxTokInfo
-scanSingleCharToken = do
-  source_pos <- getPosition
-  sel <- choice $ build <$> charMapping
-  return $ LoxTokInfo sel Nothing Nothing source_pos
-  where
-    build :: (LoxTok, Char) -> Parser LoxTok
-    build (x, y) = x <$ char y <* whitespace
-
-doubleCharMapping :: [(LoxTok, String)]
-doubleCharMapping =
-  [ (BANG_EQUAL, "!="),
-    (EQUAL_EQUAL, "=="),
-    (GREATER_EQUAL, ">="),
-    (LESS_EQUAL, "<=")
-  ]
-
-scanDoubleToken :: Parser LoxTokInfo
-scanDoubleToken = do
-  source_pos <- getPosition
-  sel <- choice $ build <$> doubleCharMapping
-  return $ LoxTokInfo sel Nothing Nothing source_pos
-  where
-    build :: (LoxTok, String) -> Parser LoxTok
-    build (x, y) = x <$ string y <* whitespace
-
-keywordMapping :: [(LoxTok, String)]
-keywordMapping =
-  [ (AND, "and"),
-    (CLASS, "class"),
-    (ELSE, "else"),
-    (FALSE, "false"),
-    (FUN, "fun"),
-    (FOR, "for"),
-    (IF, "if"),
-    (NIL, "nil"),
-    (OR, "or"),
-    (PRINT, "print"),
-    (RETURN, "return"),
-    (SUPER, "super"),
-    (THIS, "this"),
-    (TRUE, "true"),
-    (VAR, "var"),
-    (WHILE, "while")
-  ]
+lexeme :: Parser a -> Parser a
+lexeme p = p <* whitespace
 
-scanKeywordToken :: Parser LoxTokInfo
-scanKeywordToken = do
+withInfo :: Parser LoxTok -> Parser LoxTokInfo
+withInfo p = do
   source_pos <- getPosition
-  sel <- choice $ build <$> keywordMapping
-  return $ LoxTokInfo sel Nothing Nothing source_pos
-  where
-    build :: (LoxTok, String) -> Parser LoxTok
-    build (x, y) = x <$ string y <* whitespace
+  sel        <- p
+  pure $ LoxTokInfo sel Nothing Nothing source_pos
 
-whitespaceToken = do
+-- whitespaceToken1 :: Parser LoxTokInfo
+-- whitespaceToken1 = do
+--   source_pos <- getPosition
+--   pure $ LoxTokInfo WHITESPACE Nothing Nothing source_pos
+
+lSingleChar :: Parser LoxTokInfo
+lSingleChar =
+  withInfo
+    .   choice
+    $   build
+    <$> [ (LParen     , '(')
+        , (RParen     , ')')
+        , (LBrace     , '{')
+        , (LBrace     , '}')
+        , (Comma      , ',')
+        , (Dot        , '.')
+        , (OpTok Minus, '-')
+        , (OpTok Plus , '+')
+        , (Semicolon  , ';')
+        , (Slash      , '/')
+        , (Star       , '*')
+        , (Bang       , '!')
+        , (Equal      , '=')
+        , (Greater    , '>')
+        , (Less       , '<')
+        ]
+ where
+  build :: (LoxTok, Char) -> Parser LoxTok
+  build (x, y) = lexeme $ x <$ char y
+
+build :: (LoxTok, String) -> Parser LoxTok
+build (x, y) = lexeme $ x <$ string y
+
+lOp :: Parser LoxTok
+lOp =
+  choice
+    $   build
+    <$> [ (OpTok Neq, "!=")
+        , (OpTok Eq , "==")
+        , (OpTok Geq, ">=")
+        , (OpTok Leq, "<=")
+        ]
+
+lDoubleTok :: Parser LoxTokInfo
+lDoubleTok = do
   source_pos <- getPosition
-  _ <- many1 $ char ' '
-  return $ LoxTokInfo WHITESPACE Nothing Nothing source_pos
+  sel        <- lOp
+  pure $ LoxTokInfo sel Nothing Nothing source_pos
+
+keywordMap :: [(LoxTok, String)]
+keywordMap =
+  [ (OpTok And    , "and")
+  , (Class        , "class")
+  , (Else         , "else")
+  , (BoolTok False, "false")
+  , (Fun          , "fun")
+  , (For          , "for")
+  , (If           , "if")
+  , (Nil          , "nil")
+  , (OpTok Or     , "or")
+  , (Print        , "print")
+  , (Return       , "pure")
+  , (Super        , "super")
+  , (This         , "this")
+  , (BoolTok True , "true")
+  , (Var          , "var")
+  , (While        , "while")
+  ]
 
 scanDouble :: Parser LoxTokInfo
-scanDouble = do
-  source_pos <- getPosition
-  let la = lookAhead (whitespaceToken <|> scanSingleCharToken)
-  sel <- do
-    firstPart <- Text.Parsec.many1 digit
-    try (secondCharacter firstPart <* la <* whitespace) <|> NUMBER (read firstPart) <$ la <* whitespace
-  return $ LoxTokInfo sel Nothing Nothing source_pos
-  where
-    secondCharacter :: String -> Parser LoxTok
-    secondCharacter firstPart = do
-      void $ char '.'
-      secondPart <- Text.Parsec.many1 digit <* whitespace
-      return $ NUMBER $ read $ Import.concat [firstPart, ".", secondPart]
+scanDouble = withInfo $ do
+  preDot     <- Text.Parsec.many1 digit
+  postDotMay <- Text.Parsec.optionMaybe $ do
+    dot      <- char '.'
+    decimals <- Text.Parsec.many1 digit
+    pure $ dot : decimals
+  pure $ Number $ read $ preDot <> fromMaybe "" postDotMay
+
 -- -- https://stackoverflow.com/questions/24106314/parser-for-quoted-string-using-parsec
 escape :: Parser String
-escape = do
-  d <- char '\\'
-  c <- oneOf "\\\"0nrvtbf" -- all the characters which can be escaped
-  return [d, c]
-
-nonEscape :: Parser Char
-nonEscape = noneOf "\\\"\0\n\r\v\t\b\f"
+escape = liftA2 (:) (char '\\') (pure <$> oneOf "\\\"0nrvtbf") -- all the characters which can be escaped
 
-character :: Parser String
-character = fmap return nonEscape <|> escape
+lBetween :: Parser l -> Parser a -> Parser r -> Parser a
+lBetween l p r = lexeme $ l *> p <* r
 
 scanQuotedString :: Parser LoxTokInfo
-scanQuotedString = do
-  source_pos <- getPosition
-  qstring <- char '"' *> many character <* char '"' <* whitespace
-  return $ LoxTokInfo (STRING $ Import.concat qstring) Nothing Nothing source_pos
+scanQuotedString = withInfo lString
+ where
+  lString =
+    String . Import.concat <$> lBetween (char '"') (many lStringChar) (char '"')
+  lStringChar = nonEscape <|> escape
+  nonEscape   = pure <$> noneOf "\\\"\0\n\r\v\t\b\f"
 
 -- -- http://jakewheat.github.io/intro_to_parsing/#_var
 var :: Parser String
-var = do
-  fc <- firstChar
-  rest <- many nonFirstChar
-  return (fc : rest)
-  where
-    firstChar = satisfy (\a -> isLetter a || a == '_')
-    nonFirstChar = satisfy (\a -> isDigit a || isLetter a || a == '_')
-
-checkIfIdentifier :: Parser LoxTokInfo
-checkIfIdentifier = do
-  source_pos <- getPosition
+var = liftA2 (:) firstChar (many nonFirstChar)
+ where
+  firstChar    = satisfy (\a -> isLetter a || a == '_')
+  nonFirstChar = satisfy (\a -> isDigit a || isLetter a || a == '_')
+
+lKeywordOrIdent :: Parser LoxTokInfo
+lKeywordOrIdent = withInfo $ do
   s <- var
-  result ([(x, y) | (x, y) <- keywordMapping, y == s]) s source_pos
-  where
-    result xs s source_pos = do
-      case xs of
-        [] -> return $ LoxTokInfo (IDENTIFIER s) Nothing Nothing source_pos
-        (x, _) : _ -> return $ LoxTokInfo x Nothing Nothing source_pos
+  -- A map lookup would make more sense here
+  case Import.filter ((== s) . snd) keywordMap of
+    []         -> pure (Identifier s)
+    (x, _) : _ -> pure x
 
 scanComment :: Parser LoxTokInfo
 scanComment = do
   source_pos <- getPosition
-  _ <- string "//"
+  _          <- string "//"
   -- TODO: Find a better way to do this, scanning this more than once is not desirable
-  comment <- try (manyTill anyToken (try (oneOf "\n"))) <|> manyTill anyToken eof
-  return $ LoxTokInfo (COMMENT (T.pack comment)) Nothing Nothing source_pos
+  comment    <-
+    try (manyTill anyToken (try (oneOf "\n"))) <|> manyTill anyToken eof
+  pure $ LoxTokInfo (Comment (T.pack comment)) Nothing Nothing source_pos
 
 scanToken :: Parser LoxTokInfo
 scanToken =
   try scanComment
-    <|> try scanDoubleToken
-    <|> try scanSingleCharToken
+    <|> try lDoubleTok
+    <|> try lSingleChar
     <|> try scanQuotedString
-    <|> scanDouble
-    <|> checkIfIdentifier
+    <|> try scanDouble
+    <|> lKeywordOrIdent
 
 scanner :: String -> LoxScannerResult
 scanner = parse (many scanToken <* eof) ""
diff --git a/stack.yaml b/stack.yaml
index ab3a60e..abf5a10 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -16,9 +16,9 @@
 # a snapshot provided as a file might change, whereas a url resource does not.
 #
 # resolver: ./custom-snapshot.yaml
-# resolver: https://example.com/snapshots/2018-01-01.yaml
+# resolver: https://example.com/sjnapshots/2018-01-01.yaml
 resolver:
-  url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/7.yaml
+  url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/6.yaml
 
 # User packages to be built.
 # Various formats can be used as shown in the example below.
diff --git a/test/test_lexer.hs b/test/test_lexer.hs
index bae24e4..ea91502 100644
--- a/test/test_lexer.hs
+++ b/test/test_lexer.hs
@@ -20,7 +20,7 @@ testToken test_name token_str expected_tok = testCase test_name $ do
   let result = scanner token_str
   case result of
     Left x -> assertFailure $ show x
-    Right (x : _) -> assertEqual token_str (tokinfo_type x) expected_tok
+    Right (x : _) -> assertEqual token_str (tok x) expected_tok
 
 
 
@@ -30,23 +30,23 @@ testInvalidToken test_name token_str = testCase test_name $ do
     Left x -> return ()
     Right x -> assertFailure $ show x
 
-testSingleCharToken = testToken "testSingleCharToken" "+" PLUS
+testSingleCharToken = testToken "testSingleCharToken" "+" (OpTok Plus)
 
-testDoubleCharToken = testToken "testDoubleCharToken"  "==" EQUAL_EQUAL
+testDoubleCharToken = testToken "testDoubleCharToken"  "==" (OpTok Eq)
 
-testKeywordToken = testToken "testKeywordToken" "class" CLASS
+testKeywordToken = testToken "testKeywordToken" "class" Class
 
-testScanDouble_1 = testToken "testScanDouble_1" "1121.1121;" (NUMBER 1121.1121)
-testScanDouble_2 = testToken "testScanDouble_2" "0.1121;" (NUMBER 0.1121)
+testScanDouble_1 = testToken "testScanDouble_1" "1121.1121;" (Number 1121.1121)
+testScanDouble_2 = testToken "testScanDouble_2" "0.1121;" (Number 0.1121)
 
 -- We don't like the Lexer doing this, but we will try handling these scenarios in the parser
 
-testScanDouble_4 = testToken "testScanDouble_4" "1121." (NUMBER 1121.0)
+testScanDouble_4 = testToken "testScanDouble_4" "1121." (Number 1121.0)
 
 
 testScanDoubleInvalid_2 = testInvalidToken "testScanDouble_2" "1121."
 
-testScanIdentifier = testToken "testScanIdentifier" "and_1" (IDENTIFIER "and_1")
+testScanIdentifier = testToken "testScanIdentifier" "and_1" (Identifier "and_1")
 
 -- invalid tokens
 
@@ -59,8 +59,8 @@ testScanInvalidDOT = testInvalidToken "testScanInvalidDoubleDOT" ".1121"
 testScanInvalidIdentifier_1 = testInvalidToken "testScanInvalidIdentifier_1" "1and"
 testScanInvalidIdentifier_2 = testInvalidToken "testScanInvalidIdentifier_2" "1_and"
 
-testComment_1 = testToken "testComment" "// this is a comment" (COMMENT " this is a comment")
-testComment_2 = testToken "testComment" "// this is a comment\n" (COMMENT " this is a comment")
+testComment_1 = testToken "testComment" "// this is a comment" (Comment " this is a comment")
+testComment_2 = testToken "testComment" "// this is a comment\n" (Comment " this is a comment")
 
 main = do
   defaultMain $ testGroup "tokenizer_tests_example_1" [