forked from yzyzsun/CP-next
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathParser.purs
581 lines (495 loc) · 16.6 KB
/
Parser.purs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
module Language.CP.Parser where
import Prelude hiding (between)
import Control.Alt ((<|>))
import Control.Lazy (fix)
import Data.Array as Array
import Data.Either (Either(..), either)
import Data.Identity (Identity)
import Data.List (List, foldl, many, null, some, toUnfoldable)
import Data.List.NonEmpty (toList)
import Data.Maybe (Maybe(..), isJust, isNothing, optional)
import Data.String.CodeUnits as SCU
import Data.String.Regex.Flags (noFlags)
import Data.Tuple (Tuple(..))
import Language.CP.Syntax.Common (ArithOp(..), BinOp(..), CompOp(..), LogicOp(..), UnOp(..))
import Language.CP.Syntax.Source (Bias(..), Def(..), MethodPattern(..), Prog(..), RcdField(..), RcdTy(..), SelfAnno, Tm(..), TmParam(..), Ty(..), TyParam, TypeDef(..))
import Language.CP.Util (foldl1, isCapitalized)
import Parsing (Parser, fail, position)
import Parsing.Combinators (between, choice, endBy, option, sepEndBy, sepEndBy1, try)
import Parsing.Expr (Assoc(..), Operator(..), OperatorTable, buildExprParser)
import Parsing.Language (haskellStyle)
import Parsing.String (char, regex)
import Parsing.String.Basic (lower, upper)
import Parsing.Token (GenLanguageDef(..), LanguageDef, TokenParser, makeTokenParser, unGenLanguageDef)
import Partial.Unsafe (unsafeCrashWith)
type SParser a = Parser String a
-- Program --
program :: SParser Prog
program = do
defs <- many def
optExpr <- optional expr
pure $ Prog defs $ case optExpr of
Just e -> e
Nothing -> TmUnit
def :: SParser Def
def = interface <|> tyDef <|> tmDef
interface :: SParser Def
interface = do
reserved "interface"
a <- upperIdentifier
sorts <- many (angles upperIdentifier)
params <- many upperIdentifier
typeDef <- option Interface (reserved "extends" *> bty ty <#> InterfaceExtends)
t <- recordTy
symbol ";"
pure $ TyDef typeDef a sorts params t
tyDef :: SParser Def
tyDef = do
reserved "type"
a <- upperIdentifier
sorts <- many (angles upperIdentifier)
params <- many upperIdentifier
symbol "="
t <- ty
symbol ";"
pure $ TyDef TypeAlias a sorts params t
tmDef :: SParser Def
tmDef = do
d <- try do
x <- lowerIdentifier
tys <- many $ try $ tyParams false
tms <- many tmParams
t <- optional (symbol ":" *> ty)
symbol "="
pure $ TmDef x tys tms t
e <- expr
symbol ";"
pure $ d e
-- Expressions --
expr :: SParser Tm
expr = fix $ \e -> position >>= \p -> TmPos p <$> colonexpr e
colonexpr :: SParser Tm -> SParser Tm
colonexpr e = opexpr e >>= \e' ->
TmAnno e' <$ symbol ":" <*> ty <|> pure e'
opexpr :: SParser Tm -> SParser Tm
opexpr e = buildExprParser operators $ lexpr e
lexpr :: SParser Tm -> SParser Tm
lexpr e = fexpr e <|> lambdaAbs <|> tyLambdaAbs <|> trait <|> new <|>
ifThenElse <|> letIn <|> letrec <|> open <|> toString <|>
fixpoint <|> fold <|> unfold <|> ref <|> switch
fexpr :: SParser Tm -> SParser Tm
fexpr e = do
Tuple isCtor f <- Tuple true <<< TmVar <$> upperIdentifier <|>
Tuple false <$> excludexpr e
args <- many $ flip TmTApp <$> tyArg <|>
flip TmApp <$> excludexpr e
pure $ (if isCtor then TmNew else identity) (foldl (#) f args)
excludexpr :: SParser Tm -> SParser Tm
excludexpr e = renamexpr e >>= \e' ->
TmExclude e' <$ symbol "\\\\" <*> bty ty <|>
TmRemoval e' <$ symbol "\\" <*> identifier <|>
pure e'
renamexpr :: SParser Tm -> SParser Tm
renamexpr e = dotexpr e >>= \e' -> option e' $ try $ brackets do
l1 <- identifier
symbol "<-"
l2 <- identifier
pure $ TmRename e' l1 l2
dotexpr :: SParser Tm -> SParser Tm
dotexpr e = bangexpr e >>= \e' -> foldl (#) e' <$>
many (flip TmPrj <$ char '.' <*> identifier)
-- prevent `arr!!i` from being parsed as `arr (!(!i))`
bangexpr :: SParser Tm -> SParser Tm
bangexpr e = try (symbol "!" *> aexpr e <#> TmDeref) <|> aexpr e
aexpr :: SParser Tm -> SParser Tm
aexpr e = choice [ naturalOrFloat <#> fromIntOrNumber
, between (char '`') (symbol "`") $ document e
, stringLiteral <#> TmString
, reserved "true" $> TmBool true
, reserved "false" $> TmBool false
, symbol "()" $> TmUnit
, reserved "undefined" $> TmUndefined
, lowerIdentifier <#> TmVar
, upperIdentifier <#> TmVar >>> TmNew
, char '$' *> upperIdentifier <#> TmVar
, brackets $ TmArray <<< toUnfoldable <$> sepEndBySemi e
, braces $ recordUpdate e <|> recordLit e
, parens e
]
lambdaAbs :: SParser Tm
lambdaAbs = do
symbol "\\"
xs <- some tmParams
symbol "->"
e <- expr
pure $ TmAbs xs e
tyLambdaAbs :: SParser Tm
tyLambdaAbs = do
symbol "/\\"
xs <- some (tyParams true)
symbol "."
e <- expr
pure $ TmTAbs xs e
trait :: SParser Tm
trait = do
reserved "trait"
self <- selfAnno
sig <- optional (reserved "implements" *> ty)
e1 <- optional (reserved "inherits" *> expr)
sig' <- optional (reserved "implements" *> ty)
symbol "=>"
e2 <- expr
pure $ TmTrait self (sig <|> sig') e1 e2
new :: SParser Tm
new = do
reserved "new"
e <- opexpr expr
pure $ TmNew e
ifThenElse :: SParser Tm
ifThenElse = do
reserved "if"
e1 <- expr
reserved "then"
e2 <- expr
reserved "else"
e3 <- expr
pure $ TmIf e1 e2 e3
letIn :: SParser Tm
letIn = do
reserved "let"
x <- lowerIdentifier
tys <- many $ try $ tyParams false
tms <- many tmParams
symbol "="
e1 <- expr
reserved "in"
e2 <- expr
pure $ TmLet x tys tms e1 e2
letrec :: SParser Tm
letrec = do
reserved "letrec"
x <- lowerIdentifier
tys <- many $ try $ tyParams false
tms <- many tmParams
symbol ":"
t <- ty
symbol "="
e1 <- expr
reserved "in"
e2 <- expr
pure $ TmLetrec x tys tms t e1 e2
open :: SParser Tm
open = do
reserved "open"
e1 <- expr
reserved "in"
e2 <- expr
pure $ TmOpen e1 e2
toString :: SParser Tm
toString = do
reserved "toString"
e <- dotexpr expr
pure $ TmToString e
fixpoint :: SParser Tm
fixpoint = do
reserved "fix"
x <- lowerIdentifier
symbol ":"
t <- ty
symbol "."
e <- opexpr expr
pure $ TmFix x e t
fold :: SParser Tm
fold = do
reserved "fold"
t <- tyArg
e <- dotexpr expr
pure $ TmFold t e
unfold :: SParser Tm
unfold = do
reserved "unfold"
t <- tyArg
e <- dotexpr expr
pure $ TmUnfold t e
ref :: SParser Tm
ref = do
reserved "ref"
e <- dotexpr expr
pure $ TmRef e
switch :: SParser Tm
switch = do
reserved "switch"
e <- expr
alias <- optional (reserved "as" *> lowerIdentifier)
cases <- some do
reserved "case"
t <- ty
symbol "=>"
e' <- expr
pure $ Tuple t e'
pure $ TmSwitch e alias cases
document :: SParser Tm -> SParser Tm
document p = position >>= \pos -> TmPos pos <<< TmDoc <$> do
docs <- many (backslash <|> plaintext)
pure $ if null docs then newStr (TmString "") else foldl1 newComp docs
where
backslash = char '\\' *> (command <|> interpolation <|> newline)
command = do
pos <- position
cmd <- identifier
targs <- many tyArg
args <- many $ choice [ parensWithoutTrailingSpace p
, bracesWithoutTrailingSpace recordArg
, bracketsWithoutConsumingSpace $ document p
]
let f = if isCapitalized cmd then TmNew else identity
pure $ TmPos pos (f (foldl TmApp (foldl TmTApp (TmVar cmd) targs) args))
recordArg = TmRcd <$> sepEndBySemi (recordField p false)
interpolation = newStr <<< TmToString <$> parensWithoutTrailingSpace p
newline = char '\\' $> newEndl
plaintext = newStr <<< TmString <$> re
re = either unsafeCrashWith identity $ regex """[^\\\]`]+""" noFlags
newEndl = TmNew (TmVar "Endl")
newStr s = TmNew (TmApp (TmVar "Str") s)
newComp x y = TmNew (TmApp (TmApp (TmVar "Comp") x) y)
parensWithoutTrailingSpace :: forall a. SParser a -> SParser a
parensWithoutTrailingSpace = between (symbol "(") (char ')')
bracesWithoutTrailingSpace :: forall a. SParser a -> SParser a
bracesWithoutTrailingSpace = between (symbol "{") (char '}')
bracketsWithoutConsumingSpace :: forall a. SParser a -> SParser a
bracketsWithoutConsumingSpace = between (char '[') (char ']')
recordUpdate :: SParser Tm -> SParser Tm
recordUpdate p = do
rcd <- try $ p <* reserved "with"
fields <- sepEndBySemi1 (Tuple <$> identifier <* symbol "=" <*> p)
pure $ TmUpdate rcd fields
recordLit :: SParser Tm -> SParser Tm
recordLit p = TmRcd <$> sepEndBySemi do
o <- isJust <$> optional (reserved "override")
self <- selfAnno
recordField p o <|> methodPattern p o self <|> defaultPattern p self
recordField :: SParser Tm -> Boolean -> SParser RcdField
recordField p o = do
l <- identifier
params <- many tmParams
symbol "="
e <- p
pure $ RcdField o l params (Left e)
methodPattern :: SParser Tm -> Boolean -> SelfAnno -> SParser RcdField
methodPattern p o self = do
if isJust self then symbol "@" else pure unit
symbol "("
l <- identifier
params <- many tmParams
symbol ")"
symbol "."
l' <- identifier
params' <- many tmParams
symbol "="
e <- p
pure $ RcdField o l params (Right (MethodPattern self l' params' e))
defaultPattern :: SParser Tm -> SelfAnno -> SParser RcdField
defaultPattern p self = do
if isNothing self then symbol "_" else pure unit
symbol "."
l <- identifier
params <- many tmParams
symbol "="
e <- p
pure $ DefaultPattern (MethodPattern self l params e)
operators :: OperatorTable Identity String Tm
operators = [ [ Prefix (reservedOp "-" $> TmUnary Neg)
, Prefix (reservedOp "#" $> TmUnary Len)
, Prefix (reservedOp "√" $> TmUnary Sqrt)
]
, [ Infix (reservedOp "!!" $> TmBinary Index) AssocLeft ]
, [ Infix (reservedOp "*" $> TmBinary (Arith Mul)) AssocLeft
, Infix (reservedOp "/" $> TmBinary (Arith Div)) AssocLeft
, Infix (reservedOp "%" $> TmBinary (Arith Mod)) AssocLeft
]
, [ Infix (reservedOp "+" $> TmBinary (Arith Add)) AssocLeft
, Infix (reservedOp "-" $> TmBinary (Arith Sub)) AssocLeft
]
, [ Infix (reservedOp "++" $> TmBinary Append) AssocLeft ]
, [ Infix (reservedOp "==" $> TmBinary (Comp Eql)) AssocNone
, Infix (reservedOp "!=" $> TmBinary (Comp Neq)) AssocNone
, Infix (reservedOp "<" $> TmBinary (Comp Lt)) AssocNone
, Infix (reservedOp "<=" $> TmBinary (Comp Le)) AssocNone
, Infix (reservedOp ">" $> TmBinary (Comp Gt)) AssocNone
, Infix (reservedOp ">=" $> TmBinary (Comp Ge)) AssocNone
]
, [ Infix (reservedOp "&&" $> TmBinary (Logic And)) AssocRight ]
, [ Infix (reservedOp "||" $> TmBinary (Logic Or)) AssocRight ]
, [ Infix (reservedOp "^" $> TmForward) AssocLeft ]
, [ Infix (reservedOp ",," $> TmMerge Neutral) AssocLeft
, Infix (reservedOp "," $> TmMerge Neutral) AssocLeft
, Infix (reservedOp "+," $> TmMerge Leftist) AssocLeft
, Infix (reservedOp ",+" $> TmMerge Rightist) AssocLeft
, Infix (reservedOp "\\-" $> TmDiff) AssocLeft
]
, [ Infix (reservedOp ":=" $> TmAssign) AssocNone ]
, [ Infix (reservedOp ">>" $> TmSeq) AssocLeft ]
]
-- Types --
ty :: SParser Ty
ty = fix \t -> buildExprParser toperators $ cty t
cty :: SParser Ty -> SParser Ty
cty t = foldl TyApp <$> bty t <*> many (bty t) <|>
forallTy <|> muTy <|> refTy <|> traitTy <|> absTy
bty :: SParser Ty -> SParser Ty
bty t = foldl TyApp <$> aty t <*> many (sortTy t)
aty :: SParser Ty -> SParser Ty
aty t = choice [ reserved "Int" $> TyInt
, reserved "Double" $> TyDouble
, reserved "String" $> TyString
, reserved "Bool" $> TyBool
, symbol "()" $> TyUnit
, reserved "Top" $> TyTop
, reserved "Bot" $> TyBot
, upperIdentifier <#> TyVar
, brackets t <#> TyArray
, recordTy
, parens t
]
sortTy :: SParser Ty -> SParser Ty
sortTy t = angles do
ti <- t
to <- optional (symbol "=>" *> t)
pure $ TySort ti to
forallTy :: SParser Ty
forallTy = do
reserved "forall"
xs <- some (tyParams true)
symbol "."
t <- ty
pure $ TyForall xs t
muTy :: SParser Ty
muTy = do
reserved "mu"
x <- upperIdentifier
symbol "."
t <- ty
pure $ TyRec x t
refTy :: SParser Ty
refTy = do
reserved "Ref"
t <- bty ty
pure $ TyRef t
traitTy :: SParser Ty
traitTy = do
reserved "Trait"
angles do
ti <- optional (try (ty <* symbol "=>"))
to <- ty
pure $ TyTrait ti to
-- used for serialization of type aliases
absTy :: SParser Ty
absTy = do
symbol "\\"
e <- choice [ Left <$> try upperIdentifier
, Right <$> angles (Tuple <$> upperIdentifier <* symbol "," <*> upperIdentifier)
]
symbol "->"
t <- ty
pure $ case e of Left a -> TyAbs a t
Right (Tuple a b) -> TySig a b t
recordTy :: SParser Ty
recordTy = braces $ TyRcd <$> sepEndBySemi do
l <- identifier
opt <- isJust <$> optional (symbol "?")
symbol ":"
t <- ty
pure $ RcdTy l t opt
toperators :: OperatorTable Identity String Ty
toperators = [ [ Infix (reservedOp "&" $> TyAnd) AssocLeft ]
, [ Infix (reservedOp "|" $> TyOr) AssocLeft ]
, [ Infix (reservedOp "\\" $> TyDiff) AssocLeft ]
, [ Infix (reservedOp "->" $> TyArrow) AssocRight ]
]
-- Helpers --
fromIntOrNumber :: Either Int Number -> Tm
fromIntOrNumber (Left int) = TmInt int
fromIntOrNumber (Right number) = TmDouble number
tyArg :: SParser Ty
tyArg = char '@' *> bty ty
tyParams :: Boolean -> SParser TyParam
tyParams us = Tuple <$> id <*> pure Nothing <|>
parens (Tuple <$> id <* symbol "*" <*> (Just <$> ty))
where id = if us then underscore <|> upperIdentifier else upperIdentifier
tmParams :: SParser TmParam
tmParams = choice [ parensNameColonType
, TmParam <$> id <@> Nothing
, WildCard <$> braces (endBySemi defaultField <* symbol "..")
]
where id = lowerIdentifier <|> underscore
parensNameColonType = parens do
x <- id
symbol ":"
t <- ty
pure $ TmParam x (Just t)
defaultField = do
x <- lowerIdentifier
symbol "="
e <- expr
pure $ Tuple x e
selfAnno :: SParser SelfAnno
selfAnno = optional $ brackets $
Tuple <$> lowerIdentifier <*> optional (symbol ":" *> ty)
-- Lexer --
langDef :: LanguageDef
langDef = LanguageDef (unGenLanguageDef haskellStyle) { reservedNames =
[ "true", "false", "undefined", "if", "then", "else", "toString", "fix"
, "trait", "implements", "inherits", "override", "new", "fold", "unfold"
, "let", "letrec", "open", "in", "with", "ref", "switch", "as", "case"
, "type", "interface", "extends", "forall", "mu"
, "Int", "Double", "String", "Bool", "Top", "Bot", "Trait", "Ref"
]
}
lang :: TokenParser
lang = makeTokenParser langDef
identifier :: SParser String
identifier = lang.identifier
reserved :: String -> SParser Unit
reserved = lang.reserved
operator :: SParser String
operator = lang.operator
reservedOp :: String -> SParser Unit
reservedOp = lang.reservedOp
stringLiteral :: SParser String
stringLiteral = lang.stringLiteral
naturalOrFloat :: SParser (Either Int Number)
naturalOrFloat = lang.naturalOrFloat
symbol :: String -> SParser Unit
symbol s = lang.symbol s $> unit
underscore :: SParser String
underscore = lang.symbol "_"
lexeme :: forall a. SParser a -> SParser a
lexeme = lang.lexeme
whiteSpace :: SParser Unit
whiteSpace = lang.whiteSpace
parens :: forall a. SParser a -> SParser a
parens = lang.parens
braces :: forall a. SParser a -> SParser a
braces = lang.braces
angles :: forall a. SParser a -> SParser a
angles = lang.angles
brackets :: forall a. SParser a -> SParser a
brackets = lang.brackets
sepEndBySemi1 :: forall a. SParser a -> SParser (List a)
sepEndBySemi1 p = toList <$> sepEndBy1 p (symbol ";")
sepEndBySemi :: forall a. SParser a -> SParser (List a)
sepEndBySemi = flip sepEndBy $ symbol ";"
endBySemi :: forall a. SParser a -> SParser (List a)
endBySemi = flip endBy $ symbol ";"
ident :: SParser Char -> SParser String
ident identStart = lexeme $ try do
let languageDef = unGenLanguageDef langDef
c <- identStart
cs <- Array.many languageDef.identLetter
let word = SCU.singleton c <> SCU.fromCharArray cs
if not (Array.elem word languageDef.reservedNames) then pure word
else fail $ "Unexpected reserved word " <> show word
lowerIdentifier :: SParser String
lowerIdentifier = ident lower
upperIdentifier :: SParser String
upperIdentifier = ident upper