harpocrates
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎language-rust.cabal
Lines changed: 8 additions & 2 deletions b/‎language-rust.cabal
Lines changed: 8 additions & 2 deletions
diff --git a/‎src/Language/Rust/Data/Position.hs
Lines changed: 41 additions & 9 deletions b/‎src/Language/Rust/Data/Position.hs
Lines changed: 41 additions & 9 deletions
diff --git a/‎src/Language/Rust/Parser/Lexer.x
Lines changed: 51 additions & 58 deletions b/‎src/Language/Rust/Parser/Lexer.x
Lines changed: 51 additions & 58 deletions
diff --git a/‎src/Language/Rust/Parser/ParseMonad.hs
Lines changed: 20 additions & 7 deletions b/‎src/Language/Rust/Parser/ParseMonad.hs
Lines changed: 20 additions & 7 deletions
@@ -22,3 +22,4 @@ tags
 /_site/
 /.dir-locals.el
 /src/Language/Parser/Lexer.hs
+happyinfo.txt
@@ -23,10 +23,11 @@ library
   exposed-modules:     Language.Rust.Pretty
                        Language.Rust.Syntax.Token
                        Language.Rust.Syntax.Ident
+                       Language.Rust.Syntax.Constants
                        Language.Rust.Syntax.AST
                        Language.Rust.Parser.ParseMonad
                        Language.Rust.Parser.Lexer
- --                      Language.Rust.Parser.Parser2
+                       Language.Rust.Parser.Parser2
                        Language.Rust.Data.Position
                        Language.Rust.Data.InputStream
   -- other-modules:
@@ -40,7 +41,7 @@ library
                      , DeriveFunctor
 
   build-depends:       base >=4.9 && <4.10
-                     , pretty >=1.1 && <1.2
+                     , wl-pprint-annotated >= 0.0.1 && < 0.0.2
                      , transformers >=0.5 && <0.6
                      , array >=0.5 && <0.6
                      , bytestring >= 0.10
@@ -50,12 +51,17 @@ library
 test-suite tests
   hs-source-dirs:      tests
   main-is:             Main.hs
+  other-modules:       LexerTest
+                       ParserTest
+                       PrettyTest
   Type:                exitcode-stdio-1.0
   default-language:    Haskell2010
   build-depends:       base >=4.9 && <4.10
                      , Cabal >= 1.10.0
                      , transformers >=0.5 && <0.6
                      , HUnit
+                     , pretty >=1.1 && <1.2
+                     , wl-pprint-annotated >= 0.0.1 && < 0.0.2
                      , test-framework
                      , test-framework-hunit >= 0.3.0
                      , language-rust
@@ -2,9 +2,13 @@
 
 module Language.Rust.Data.Position where
 
+import Data.Ord (comparing)
+import Data.List (maximumBy, minimumBy)
+import Data.Monoid (Monoid, mappend, mempty)
+
 -- Taken and abbreviated from
--- | A position in a source file. The row and column information is kept only for its convenience
--- and human-readability.
+-- | A position in a source file. The row and column information is kept only
+-- for its convenience and human-readability.
 -- https://hackage.haskell.org/package/language-c-0.5.0/docs/src/Language-C-Data-Position.html#Position
 data Position = Position {
     absoluteOffset :: {-# UNPACK #-} !Int, -- ^ absolute offset the source file.
@@ -14,6 +18,17 @@ data Position = Position {
   | NoPosition
   deriving (Eq, Ord)
 
+-- | Maximum and minimum positions, bias for actual positions in either case
+maxPos, minPos :: Position -> Position -> Position
+
+maxPos NoPosition p2 = p2
+maxPos p1 NoPosition = p1
+maxPos p1 p2 = maximumBy (comparing absoluteOffset) [p1,p2]
+
+minPos NoPosition p2 = p2
+minPos p1 NoPosition = p1
+minPos p1 p2 = minimumBy (comparing absoluteOffset) [p1,p2]
+
 -- | starting position in a file
 initPos :: Position
 initPos = Position 0 1 1
@@ -42,23 +57,40 @@ instance Show Position where
 type ExpnId = Int -- https://docs.serde.rs/syntex_pos/struct.ExpnId.html
 
 
--- | Spans represent a region of code, used for error reporting. Positions in spans are absolute positions from the
--- beginning of the codemap, not positions relative to FileMaps. Methods on the CodeMap can be used to relate spans
--- back to the original source. You must be careful if the span crosses more than one file - you will not be able to
--- use many of the functions on spans in codemap and you cannot assume that the length of the span = hi - lo; there may
--- be space in the BytePos range between files.
+-- | Spans represent a region of code, used for error reporting. Positions in
+-- spans are absolute positions from the beginning of the codemap, not
+-- positions relative to FileMaps. Methods on the CodeMap can be used to relate
+-- spans back to the original source. You must be careful if the span crosses
+-- more than one file - you will not be able to use many of the functions on 
+-- spans in codemap and you cannot assume that the length of the span = hi - lo;
+-- there may be space in the BytePos range between files.
 -- https://docs.serde.rs/syntex_syntax/ext/quote/rt/struct.Span.html
 data Span
   = Span {
     lo :: Position,
     hi :: Position --,
     -- expnId :: ExpnId
-  }
+  } deriving (Eq)
+
+-- Spans are merged by taking the smallest span that covers both arguments
+instance Monoid Span where
+  mempty = Span NoPosition NoPosition
+  s1 `mappend` s2 = Span (lo s1 `minPos` lo s2) (hi s1 `maxPos` hi s2)
 
 instance Show Span where
   show (Span lo hi) = show lo ++ " - " ++ show hi
 
+-- | A "tagging" of something with a 'Span' that describes its extent
 data Spanned a = Spanned { unspan :: a, span :: Span } deriving (Functor)
 
+instance Applicative Spanned where
+  pure x = Spanned x mempty
+  Spanned f s1 <*> Spanned x s2 = Spanned (f x) (s1 `mappend` s2)
+
+instance Monad Spanned where
+  return = pure
+  Spanned x s1 >>= f = let Spanned y s2 = f x in Spanned y (s1 `mappend` s2) 
+
 instance Show a => Show (Spanned a) where
-  show (Spanned n p) = "at " ++ show p ++ ": " ++ show n
+  show (Spanned p s) = "at " ++ show p ++ ": " ++ show s
+
@@ -4,7 +4,7 @@ module Language.Rust.Parser.Lexer (lexToken, lexTokens, lexRust, lexicalError, p
 import Language.Rust.Data.InputStream
 import Language.Rust.Data.Position
 import Language.Rust.Parser.ParseMonad
-import Language.Rust.Syntax.Token (Token(..), TokenSpace(..), Space(..), LitTok(..), BinOpToken(..), DelimToken(..))
+import Language.Rust.Syntax.Token 
 import Language.Rust.Syntax.Ident (mkIdent, Ident(..), Name(..))
 
 import Data.Word (Word8)
@@ -924,47 +924,33 @@ $hexit = [0-9a-fA-F]
 
 -- Comments
 
-@undoc_comment  = "////" [^\n]*
-@yesdoc_comment = "///" [^\r\n]*
-@outer_doc_cmt  = "//!" [^\r\n]*
+@outer_doc_line   = "///" [^\r\n]*
+@outer_doc_inline = "/**"
+
+@inner_doc_line   = "//!" [^\r\n]*
+@inner_doc_inline = "/*!"
+
 @line_comment   = "//" ( [^\n\/]* [^\n]* )?
-@inline_comment = "/*" 
+@inline_comment = "/*"
 
 
 tokens :-
 
 $white+         { \s -> pure (Space Whitespace (Name s))  }
 
-"="             { token Eq }
-"<"             { token Lt }
-"<="            { token Le }
-"=="            { token EqEq }
-">="            { token Ge }
-">"             { token Gt }
-"&&"            { token AndAnd }
-"||"            { token OrOr }
+"="             { token Equal }
+"<"             { token Less }
+">"             { token Greater }
+"&"             { token Ampersand }
+"|"             { token Pipe }
 "!"             { token Exclamation }
 "~"             { token Tilde }
-"+"             { token (BinOp Plus) }
-"-"             { token (BinOp Minus) }
-"*"             { token (BinOp Star) }
-"/"             { token (BinOp Slash) }
-"%"             { token (BinOp Percent) }
-"^"             { token (BinOp Caret) }
-"&"             { token (BinOp And) }
-"|"             { token (BinOp Or) }
-"<<"            { token (BinOp Shl) }
-">>"            { token (BinOp Shr) }
-"+="            { token (BinOpEq Plus) }
-"-="            { token (BinOpEq Minus) }
-"*="            { token (BinOpEq Star) }
-"/="            { token (BinOpEq Slash) }
-"%="            { token (BinOpEq Percent) }
-"^="            { token (BinOpEq Caret) }
-"&="            { token (BinOpEq And) }
-"|="            { token (BinOpEq Or) }
-"<<="           { token (BinOpEq Shl) }
-">>="           { token (BinOpEq Shr) }
+"+"             { token Plus }
+"-"             { token Minus }
+"*"             { token Star }
+"/"             { token Slash }
+"%"             { token Percent }
+"^"             { token Caret }
 "#!"            { token Shebang }
 
 "@"             { token At }          
@@ -1017,11 +1003,15 @@ $white+         { \s -> pure (Space Whitespace (Name s))  }
 @ident          { \s -> pure (IdentTok (mkIdent s)) } 
 @lifetime       { \s -> (pure (LifetimeTok (mkIdent (tail s))) :: P Token) }
 
-@undoc_comment  { \c -> pure (Space DocComment (Name c)) }
-@yesdoc_comment { \c -> pure (Space DocComment (Name c)) }
-@outer_doc_cmt  { \c -> pure (Space DocComment (Name c)) }
-@line_comment   { \c -> pure (Space Comment (Name c)) }
-@inline_comment { \_ -> nestedComment 1 }
+
+@outer_doc_line   { \c -> pure (Doc (drop 3 c) OuterDoc) } 
+@outer_doc_inline { \_ -> Doc <$> nestedComment <*> pure OuterDoc }
+
+@inner_doc_line   { \c -> pure (Doc (drop 3 c) InnerDoc) }
+@inner_doc_inline { \_ -> Doc <$> nestedComment <*> pure InnerDoc }
+
+@line_comment     { \c -> pure (Space Comment (Name (drop 2 c))) }
+@inline_comment   { \_ -> Space Comment <$> (Name <$> nestedComment) }
 
 {
 
@@ -1067,25 +1057,28 @@ rawString n = do
     Just c -> ([c] ++) <$> rawString n 
 
 -- | Consume a full inline comment (which may be nested).
-nestedComment :: Int -> P Token
-nestedComment 0 = pure (Space Comment (Name "comment not captured (TODO)"))
-nestedComment n = do
-  c <- nextChar
-  case c of
-    Nothing -> fail "Unclosed comment"
-    Just '*' -> do
-      c' <- peekChar
-      case c' of 
-        Nothing -> fail "Unclosed comment"
-        Just '/' -> nextChar *> nestedComment (n-1)
-        Just _ -> nestedComment n
-    Just '/' -> do
-      c' <- peekChar
-      case c' of 
+nestedComment :: P String
+nestedComment = go 1 ""
+  where
+    go :: Int -> String -> P String
+    go 0 s = pure (reverse (drop 2 s))
+    go n s = do
+      c <- nextChar
+      case c of
         Nothing -> fail "Unclosed comment"
-        Just '*' -> nextChar *> nestedComment (n+1)
-        Just _ -> nestedComment n
-    Just _ -> nestedComment n
+        Just '*' -> do
+          c' <- peekChar
+          case c' of 
+            Nothing -> fail "Unclosed comment"
+            Just '/' -> nextChar *> go (n-1) ('/':'*':s)
+            Just _ -> go n ('*':s)
+        Just '/' -> do
+          c' <- peekChar
+          case c' of 
+            Nothing -> fail "Unclosed comment"
+            Just '*' -> nextChar *> go (n+1) ('*':'/':s) 
+            Just _ -> go n ('/':s)
+        Just c' -> go n (c':s)
 
 
 -- Monadic functions
@@ -1127,8 +1120,8 @@ lexicalError = do
   fail ("Lexical error: the character " ++ show c ++ " does not fit here")
 
 -- | Signal a syntax error.
-parseError :: Spanned Token -> P a
-parseError (Spanned tok _) = do
+parseError :: TokenSpace Spanned -> P a
+parseError (TokenSpace (Spanned tok _) _) = do
   fail ("Syntax error: the symbol `" ++ show tok ++ "' does not fit here")
 
 
 
@@ -10,8 +10,21 @@ import Language.Rust.Syntax.Ident
 import Control.Monad
 import Control.Monad.Trans.Except
 
+-- | Pattern for Identifiers
 pattern Identifier :: String -> Spanned Token
-pattern Identifier s <- (Spanned (IdentTok (Ident (Name s) _)) _) 
+pattern Identifier s <- (Spanned (IdentTok (Ident (Name s) _)) _)
+
+-- | Pattern for tokens preceded by space
+pattern SpTok :: Spanned Token -> TokenSpace Spanned
+pattern SpTok s <- (TokenSpace s (_:_))
+
+-- | Pattern for tokens not preceded by space
+pattern NoSpTok :: Spanned Token -> TokenSpace Spanned
+pattern NoSpTok s <- (TokenSpace s [])
+
+-- | Pattern for tokens (preceded or not by space)
+pattern Tok :: Spanned Token -> TokenSpace Spanned
+pattern Tok s <- (TokenSpace s _)
 
 -- | the result of running a parser
 data ParseResult a
@@ -58,12 +71,12 @@ execParser (P parser) input pos =
   case parser initialState of
     Failed message errpos -> throwE (errpos,message)
     Ok result _ -> return result
-  where initialState = PState {
-          curPos = pos,
-          curInput = input,
-          prevToken = error "CLexer.execParser: Touched undefined token!",
-          savedToken = error "CLexer.execParser: Touched undefined token (saved token)!"
-        }
+  where initialState = PState
+          { curPos = pos
+          , curInput = input
+          , prevToken = error "CLexer.execParser: Touched undefined token!"
+          , savedToken = error "CLexer.execParser: Touched undefined token (saved token)!"
+          }
 
 -- | update the position of the parser
 updatePosition :: (Position -> Position) -> P ()