Skip to content

Commit 5b09ca9

Browse files
authored
Merge pull request #5 from harpocrates/wl-pprint-annotated
2 parents 4664bb8 + 9035eab commit 5b09ca9

17 files changed

+2681
-1099
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,4 @@ tags
2222
/_site/
2323
/.dir-locals.el
2424
/src/Language/Parser/Lexer.hs
25+
happyinfo.txt

language-rust.cabal

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,11 @@ library
2323
exposed-modules: Language.Rust.Pretty
2424
Language.Rust.Syntax.Token
2525
Language.Rust.Syntax.Ident
26+
Language.Rust.Syntax.Constants
2627
Language.Rust.Syntax.AST
2728
Language.Rust.Parser.ParseMonad
2829
Language.Rust.Parser.Lexer
29-
-- Language.Rust.Parser.Parser2
30+
Language.Rust.Parser.Parser2
3031
Language.Rust.Data.Position
3132
Language.Rust.Data.InputStream
3233
-- other-modules:
@@ -40,7 +41,7 @@ library
4041
, DeriveFunctor
4142

4243
build-depends: base >=4.9 && <4.10
43-
, pretty >=1.1 && <1.2
44+
, wl-pprint-annotated >= 0.0.1 && < 0.0.2
4445
, transformers >=0.5 && <0.6
4546
, array >=0.5 && <0.6
4647
, bytestring >= 0.10
@@ -50,12 +51,17 @@ library
5051
test-suite tests
5152
hs-source-dirs: tests
5253
main-is: Main.hs
54+
other-modules: LexerTest
55+
ParserTest
56+
PrettyTest
5357
Type: exitcode-stdio-1.0
5458
default-language: Haskell2010
5559
build-depends: base >=4.9 && <4.10
5660
, Cabal >= 1.10.0
5761
, transformers >=0.5 && <0.6
5862
, HUnit
63+
, pretty >=1.1 && <1.2
64+
, wl-pprint-annotated >= 0.0.1 && < 0.0.2
5965
, test-framework
6066
, test-framework-hunit >= 0.3.0
6167
, language-rust

src/Language/Rust/Data/Position.hs

Lines changed: 41 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,13 @@
22

33
module Language.Rust.Data.Position where
44

5+
import Data.Ord (comparing)
6+
import Data.List (maximumBy, minimumBy)
7+
import Data.Monoid (Monoid, mappend, mempty)
8+
59
-- Taken and abbreviated from
6-
-- | A position in a source file. The row and column information is kept only for its convenience
7-
-- and human-readability.
10+
-- | A position in a source file. The row and column information is kept only
11+
-- for its convenience and human-readability.
812
-- https://hackage.haskell.org/package/language-c-0.5.0/docs/src/Language-C-Data-Position.html#Position
913
data Position = Position {
1014
absoluteOffset :: {-# UNPACK #-} !Int, -- ^ absolute offset the source file.
@@ -14,6 +18,17 @@ data Position = Position {
1418
| NoPosition
1519
deriving (Eq, Ord)
1620

21+
-- | Maximum and minimum positions, bias for actual positions in either case
22+
maxPos, minPos :: Position -> Position -> Position
23+
24+
maxPos NoPosition p2 = p2
25+
maxPos p1 NoPosition = p1
26+
maxPos p1 p2 = maximumBy (comparing absoluteOffset) [p1,p2]
27+
28+
minPos NoPosition p2 = p2
29+
minPos p1 NoPosition = p1
30+
minPos p1 p2 = minimumBy (comparing absoluteOffset) [p1,p2]
31+
1732
-- | starting position in a file
1833
initPos :: Position
1934
initPos = Position 0 1 1
@@ -42,23 +57,40 @@ instance Show Position where
4257
type ExpnId = Int -- https://docs.serde.rs/syntex_pos/struct.ExpnId.html
4358

4459

45-
-- | Spans represent a region of code, used for error reporting. Positions in spans are absolute positions from the
46-
-- beginning of the codemap, not positions relative to FileMaps. Methods on the CodeMap can be used to relate spans
47-
-- back to the original source. You must be careful if the span crosses more than one file - you will not be able to
48-
-- use many of the functions on spans in codemap and you cannot assume that the length of the span = hi - lo; there may
49-
-- be space in the BytePos range between files.
60+
-- | Spans represent a region of code, used for error reporting. Positions in
61+
-- spans are absolute positions from the beginning of the codemap, not
62+
-- positions relative to FileMaps. Methods on the CodeMap can be used to relate
63+
-- spans back to the original source. You must be careful if the span crosses
64+
-- more than one file - you will not be able to use many of the functions on
65+
-- spans in codemap and you cannot assume that the length of the span = hi - lo;
66+
-- there may be space in the BytePos range between files.
5067
-- https://docs.serde.rs/syntex_syntax/ext/quote/rt/struct.Span.html
5168
data Span
5269
= Span {
5370
lo :: Position,
5471
hi :: Position --,
5572
-- expnId :: ExpnId
56-
}
73+
} deriving (Eq)
74+
75+
-- Spans are merged by taking the smallest span that covers both arguments
76+
instance Monoid Span where
77+
mempty = Span NoPosition NoPosition
78+
s1 `mappend` s2 = Span (lo s1 `minPos` lo s2) (hi s1 `maxPos` hi s2)
5779

5880
instance Show Span where
5981
show (Span lo hi) = show lo ++ " - " ++ show hi
6082

83+
-- | A "tagging" of something with a 'Span' that describes its extent
6184
data Spanned a = Spanned { unspan :: a, span :: Span } deriving (Functor)
6285

86+
instance Applicative Spanned where
87+
pure x = Spanned x mempty
88+
Spanned f s1 <*> Spanned x s2 = Spanned (f x) (s1 `mappend` s2)
89+
90+
instance Monad Spanned where
91+
return = pure
92+
Spanned x s1 >>= f = let Spanned y s2 = f x in Spanned y (s1 `mappend` s2)
93+
6394
instance Show a => Show (Spanned a) where
64-
show (Spanned n p) = "at " ++ show p ++ ": " ++ show n
95+
show (Spanned p s) = "at " ++ show p ++ ": " ++ show s
96+

src/Language/Rust/Parser/Lexer.x

Lines changed: 51 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ module Language.Rust.Parser.Lexer (lexToken, lexTokens, lexRust, lexicalError, p
44
import Language.Rust.Data.InputStream
55
import Language.Rust.Data.Position
66
import Language.Rust.Parser.ParseMonad
7-
import Language.Rust.Syntax.Token (Token(..), TokenSpace(..), Space(..), LitTok(..), BinOpToken(..), DelimToken(..))
7+
import Language.Rust.Syntax.Token
88
import Language.Rust.Syntax.Ident (mkIdent, Ident(..), Name(..))
99

1010
import Data.Word (Word8)
@@ -924,47 +924,33 @@ $hexit = [0-9a-fA-F]
924924

925925
-- Comments
926926

927-
@undoc_comment = "////" [^\n]*
928-
@yesdoc_comment = "///" [^\r\n]*
929-
@outer_doc_cmt = "//!" [^\r\n]*
927+
@outer_doc_line = "///" [^\r\n]*
928+
@outer_doc_inline = "/**"
929+
930+
@inner_doc_line = "//!" [^\r\n]*
931+
@inner_doc_inline = "/*!"
932+
930933
@line_comment = "//" ( [^\n\/]* [^\n]* )?
931-
@inline_comment = "/*"
934+
@inline_comment = "/*"
932935

933936

934937
tokens :-
935938

936939
$white+ { \s -> pure (Space Whitespace (Name s)) }
937940

938-
"=" { token Eq }
939-
"<" { token Lt }
940-
"<=" { token Le }
941-
"==" { token EqEq }
942-
">=" { token Ge }
943-
">" { token Gt }
944-
"&&" { token AndAnd }
945-
"||" { token OrOr }
941+
"=" { token Equal }
942+
"<" { token Less }
943+
">" { token Greater }
944+
"&" { token Ampersand }
945+
"|" { token Pipe }
946946
"!" { token Exclamation }
947947
"~" { token Tilde }
948-
"+" { token (BinOp Plus) }
949-
"-" { token (BinOp Minus) }
950-
"*" { token (BinOp Star) }
951-
"/" { token (BinOp Slash) }
952-
"%" { token (BinOp Percent) }
953-
"^" { token (BinOp Caret) }
954-
"&" { token (BinOp And) }
955-
"|" { token (BinOp Or) }
956-
"<<" { token (BinOp Shl) }
957-
">>" { token (BinOp Shr) }
958-
"+=" { token (BinOpEq Plus) }
959-
"-=" { token (BinOpEq Minus) }
960-
"*=" { token (BinOpEq Star) }
961-
"/=" { token (BinOpEq Slash) }
962-
"%=" { token (BinOpEq Percent) }
963-
"^=" { token (BinOpEq Caret) }
964-
"&=" { token (BinOpEq And) }
965-
"|=" { token (BinOpEq Or) }
966-
"<<=" { token (BinOpEq Shl) }
967-
">>=" { token (BinOpEq Shr) }
948+
"+" { token Plus }
949+
"-" { token Minus }
950+
"*" { token Star }
951+
"/" { token Slash }
952+
"%" { token Percent }
953+
"^" { token Caret }
968954
"#!" { token Shebang }
969955

970956
"@" { token At }
@@ -1017,11 +1003,15 @@ $white+ { \s -> pure (Space Whitespace (Name s)) }
10171003
@ident { \s -> pure (IdentTok (mkIdent s)) }
10181004
@lifetime { \s -> (pure (LifetimeTok (mkIdent (tail s))) :: P Token) }
10191005

1020-
@undoc_comment { \c -> pure (Space DocComment (Name c)) }
1021-
@yesdoc_comment { \c -> pure (Space DocComment (Name c)) }
1022-
@outer_doc_cmt { \c -> pure (Space DocComment (Name c)) }
1023-
@line_comment { \c -> pure (Space Comment (Name c)) }
1024-
@inline_comment { \_ -> nestedComment 1 }
1006+
1007+
@outer_doc_line { \c -> pure (Doc (drop 3 c) OuterDoc) }
1008+
@outer_doc_inline { \_ -> Doc <$> nestedComment <*> pure OuterDoc }
1009+
1010+
@inner_doc_line { \c -> pure (Doc (drop 3 c) InnerDoc) }
1011+
@inner_doc_inline { \_ -> Doc <$> nestedComment <*> pure InnerDoc }
1012+
1013+
@line_comment { \c -> pure (Space Comment (Name (drop 2 c))) }
1014+
@inline_comment { \_ -> Space Comment <$> (Name <$> nestedComment) }
10251015

10261016
{
10271017

@@ -1067,25 +1057,28 @@ rawString n = do
10671057
Just c -> ([c] ++) <$> rawString n
10681058

10691059
-- | Consume a full inline comment (which may be nested).
1070-
nestedComment :: Int -> P Token
1071-
nestedComment 0 = pure (Space Comment (Name "comment not captured (TODO)"))
1072-
nestedComment n = do
1073-
c <- nextChar
1074-
case c of
1075-
Nothing -> fail "Unclosed comment"
1076-
Just '*' -> do
1077-
c' <- peekChar
1078-
case c' of
1079-
Nothing -> fail "Unclosed comment"
1080-
Just '/' -> nextChar *> nestedComment (n-1)
1081-
Just _ -> nestedComment n
1082-
Just '/' -> do
1083-
c' <- peekChar
1084-
case c' of
1060+
nestedComment :: P String
1061+
nestedComment = go 1 ""
1062+
where
1063+
go :: Int -> String -> P String
1064+
go 0 s = pure (reverse (drop 2 s))
1065+
go n s = do
1066+
c <- nextChar
1067+
case c of
10851068
Nothing -> fail "Unclosed comment"
1086-
Just '*' -> nextChar *> nestedComment (n+1)
1087-
Just _ -> nestedComment n
1088-
Just _ -> nestedComment n
1069+
Just '*' -> do
1070+
c' <- peekChar
1071+
case c' of
1072+
Nothing -> fail "Unclosed comment"
1073+
Just '/' -> nextChar *> go (n-1) ('/':'*':s)
1074+
Just _ -> go n ('*':s)
1075+
Just '/' -> do
1076+
c' <- peekChar
1077+
case c' of
1078+
Nothing -> fail "Unclosed comment"
1079+
Just '*' -> nextChar *> go (n+1) ('*':'/':s)
1080+
Just _ -> go n ('/':s)
1081+
Just c' -> go n (c':s)
10891082

10901083

10911084
-- Monadic functions
@@ -1127,8 +1120,8 @@ lexicalError = do
11271120
fail ("Lexical error: the character " ++ show c ++ " does not fit here")
11281121
11291122
-- | Signal a syntax error.
1130-
parseError :: Spanned Token -> P a
1131-
parseError (Spanned tok _) = do
1123+
parseError :: TokenSpace Spanned -> P a
1124+
parseError (TokenSpace (Spanned tok _) _) = do
11321125
fail ("Syntax error: the symbol `" ++ show tok ++ "' does not fit here")
11331126
11341127

src/Language/Rust/Parser/ParseMonad.hs

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,21 @@ import Language.Rust.Syntax.Ident
1010
import Control.Monad
1111
import Control.Monad.Trans.Except
1212

13+
-- | Pattern for Identifiers
1314
pattern Identifier :: String -> Spanned Token
14-
pattern Identifier s <- (Spanned (IdentTok (Ident (Name s) _)) _)
15+
pattern Identifier s <- (Spanned (IdentTok (Ident (Name s) _)) _)
16+
17+
-- | Pattern for tokens preceded by space
18+
pattern SpTok :: Spanned Token -> TokenSpace Spanned
19+
pattern SpTok s <- (TokenSpace s (_:_))
20+
21+
-- | Pattern for tokens not preceded by space
22+
pattern NoSpTok :: Spanned Token -> TokenSpace Spanned
23+
pattern NoSpTok s <- (TokenSpace s [])
24+
25+
-- | Pattern for tokens (preceded or not by space)
26+
pattern Tok :: Spanned Token -> TokenSpace Spanned
27+
pattern Tok s <- (TokenSpace s _)
1528

1629
-- | the result of running a parser
1730
data ParseResult a
@@ -58,12 +71,12 @@ execParser (P parser) input pos =
5871
case parser initialState of
5972
Failed message errpos -> throwE (errpos,message)
6073
Ok result _ -> return result
61-
where initialState = PState {
62-
curPos = pos,
63-
curInput = input,
64-
prevToken = error "CLexer.execParser: Touched undefined token!",
65-
savedToken = error "CLexer.execParser: Touched undefined token (saved token)!"
66-
}
74+
where initialState = PState
75+
{ curPos = pos
76+
, curInput = input
77+
, prevToken = error "CLexer.execParser: Touched undefined token!"
78+
, savedToken = error "CLexer.execParser: Touched undefined token (saved token)!"
79+
}
6780

6881
-- | update the position of the parser
6982
updatePosition :: (Position -> Position) -> P ()

0 commit comments

Comments
 (0)