Skip to content

Commit f2fb0fd

Browse files
author
jared
committed
Improved documentation for the parser
1 parent 49e035f commit f2fb0fd

File tree

1 file changed

+28
-8
lines changed
  • lambda-buffers-frontend/src/LambdaBuffers/Frontend

1 file changed

+28
-8
lines changed

lambda-buffers-frontend/src/LambdaBuffers/Frontend/Parsec.hs

+28-8
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,10 @@ type Parser s m a = ParsecT s () m a
5353
-- - nonterm -> alt1 | ... | altn
5454
--
5555
--
56-
-- Tokens form the vocabulary of Lambda Buffer Files. There are classes of
57-
-- tokens (keyword, modulename, longmodulename, tyname, longtyname, varname,
58-
-- punctuation, fieldname, classname, longclassname) as follows.
56+
-- Tokens form the vocabulary of Lambda Buffer Files. The classes of *tokens*
57+
-- (keyword, modulename, longmodulename, tyname, longtyname, varname,
58+
-- punctuation, fieldname, classname, longclassname) are as follows.
59+
-- Note that some of the tokens overlap but may be distinguished via parsing.
5960
--
6061
-- keyword -> 'module' | 'sum' | 'prod' | 'record' | 'opaque' | 'class' | 'instance' | 'import' | 'qualified' | 'as'
6162
-- modulename -> upperCamelCase
@@ -80,16 +81,22 @@ type Parser s m a = ParsecT s () m a
8081
-- line comments. At each point, the longest possible token satisfying the
8182
-- token definitions is read.
8283
--
84+
-- A *line comment* is any sequence of characters which begins with '--'
85+
-- followed by zero or more printable Unicode character to the first end of
86+
-- line ('\n' or '\r\n').
87+
--
8388
-- Finally, the grammar for Lambda Buffer Files is as follows.
8489
--
90+
-- start -> module
91+
--
8592
-- module -> 'module' modulename imports statements
8693
--
8794
-- import -> 'import' [ 'qualified' ] longmodulename
8895
-- [ 'as' longmodulename ]
8996
-- [ '(' [ { tyname ',' } tyname [','] ] ')' ]
9097
-- imports -> { import }
9198
--
92-
-- statements -> [ { statement newlines1 } statement [ newlines1 ] ]
99+
-- statements -> { statement }
93100
-- statement -> tydef
94101
-- | classdef
95102
-- | instanceclause
@@ -114,7 +121,7 @@ type Parser s m a = ParsecT s () m a
114121
-- opaquetydef -> 'opaque' tyname { varname }
115122
--
116123
-- classdef -> 'class' [ classexps '<=' ] classname { varname }
117-
-- // Warning: this part makes it not LL(1)!
124+
-- // Warning: this is not LL(1)!
118125
-- // In the future, we should shift to some form of
119126
-- // an LALR(1) parser.
120127
-- classexp -> classref { varname }
@@ -174,8 +181,8 @@ type Parser s m a = ParsecT s () m a
174181
-- * Primitives
175182

176183
{- | @'token' pa@ runs the parser @pa@ with 'try' followed by 'junk' to remove
177-
whitespace. Moreover, this gets the SourceInfo of the parsed token w/o the
178-
whitespace
184+
whitespace. Moreover, this gets the 'SourceInfo' of the parsed token without
185+
the whitespace
179186
180187
See [Note: Parser Implementation].
181188
-}
@@ -216,6 +223,12 @@ runParser p = runParserT (junk *> p <* eof) ()
216223

217224
-- * Lexical elements
218225

226+
--
227+
-- - Functions which have @parse@ as a prefix simply parse the token
228+
--
229+
-- - Functions which have @token@ as a prefix wrap the corresponding @parse@
230+
-- function with the 'token' function.
231+
219232
parseModuleNamePart :: Stream s m Char => Parser s m (ModuleNamePart SourceInfo)
220233
parseModuleNamePart = withSourceInfo . label' "module part name" $ ModuleNamePart <$> pModuleNamePart
221234

@@ -391,6 +404,11 @@ parseField :: Stream s m Char => Parser s m (Field SourceInfo)
391404
parseField = withSourceInfo . label' "record field" $ do
392405
fn <- tokenFieldName
393406
_ <- token $ char ':'
407+
-- TODO: strictly speaking, there's a bug with this when parsing
408+
-- > record A a = { fieldName :-- a }
409+
-- since this will parse the @:--@ as @:@ and @--@ will start a comment.
410+
-- Technically, the specification says that this should parse as the token
411+
-- @:-@, and then the remaining @-@ should parse error.
394412
Field fn <$> parseTyTopLevel
395413

396414
parseTyDef :: Stream s m Char => Parser s m (TyDef SourceInfo)
@@ -527,9 +545,11 @@ parseImport = label' "import statement" $ do
527545
mayNames = fmap snd mayBracketSrcInfoAndNames
528546

529547
return $
530-
Import isQual modName mayNames mayModAlias $ -- Get the rightmost position of the rightmost parsed token
548+
Import isQual modName mayNames mayModAlias $
531549
srcInfo
532550
{ to =
551+
-- Get the rightmost position of the rightmost parsed token
552+
-- Note: the 'fromJust' clearly never fails.
533553
fromJust $
534554
fmap to mayBracketSrcInfo
535555
<|> ( case mayModAlias of

0 commit comments

Comments
 (0)