From d4790ca1a0fa4908b3e55a0f5eebe19d460df8d9 Mon Sep 17 00:00:00 2001
From: jared <>
Date: Tue, 17 Oct 2023 02:46:46 -0600
Subject: [PATCH 01/10] Restructured frontend parser

- Uniform handling of whitespace

- Added a syntactic specification in the comments

- Updated test suite to reflect the changes
---
 .../src/LambdaBuffers/Frontend/Parsec.hs      | 460 +++++++++++++-----
 .../src/LambdaBuffers/Frontend/Syntax.hs      |  31 ++
 .../test/Test/LambdaBuffers/Frontend.hs       |   2 +-
 .../Test/LambdaBuffers/Frontend/Parsec.hs     |  93 ++--
 4 files changed, 431 insertions(+), 155 deletions(-)

diff --git a/lambda-buffers-frontend/src/LambdaBuffers/Frontend/Parsec.hs b/lambda-buffers-frontend/src/LambdaBuffers/Frontend/Parsec.hs
index b2e93cf8..6d02d139 100644
--- a/lambda-buffers-frontend/src/LambdaBuffers/Frontend/Parsec.hs
+++ b/lambda-buffers-frontend/src/LambdaBuffers/Frontend/Parsec.hs
@@ -13,52 +13,263 @@ module LambdaBuffers.Frontend.Parsec (
   parseDerive,
   parseClassDef,
   parseClassSups,
+  junk,
 ) where
 
 import Control.Applicative (Alternative ((<|>)))
-import Control.Monad (MonadPlus (mzero), void)
+import Control.Monad (MonadPlus (mzero), void, when)
 import Data.Char qualified as Char
 import Data.Kind (Type)
 import Data.Maybe (fromJust, isJust)
 import Data.String (IsString (fromString))
 import LambdaBuffers.Compiler.NamingCheck (pClassName, pConstrName, pFieldName, pModuleNamePart, pTyName)
-import LambdaBuffers.Frontend.Syntax (ClassConstraint (ClassConstraint), ClassDef (ClassDef), ClassName (ClassName), ClassRef (ClassRef), ConstrName (ConstrName), Constraint (Constraint), Constructor (Constructor), Derive (Derive), Field (Field), FieldName (FieldName), Import (Import), InstanceClause (InstanceClause), Module (Module), ModuleAlias (ModuleAlias), ModuleName (ModuleName), ModuleNamePart (ModuleNamePart), Name (Name), Product (Product), Record (Record), SourceInfo (SourceInfo), SourcePos (SourcePos), Statement (StClassDef, StDerive, StInstanceClause, StTyDef), Sum (Sum), Ty (TyApp, TyRef', TyVar), TyArg (TyArg), TyBody (Opaque, ProductBody, RecordBody, SumBody), TyDef (TyDef), TyName (TyName), TyRef (TyRef), VarName (VarName), kwClassDef, kwDerive, kwInstance, kwTyDefOpaque, kwTyDefProduct, kwTyDefRecord, kwTyDefSum)
-import Text.Parsec (ParseError, ParsecT, SourceName, Stream, between, char, endOfLine, eof, getPosition, label, lower, many, many1, optionMaybe, optional, runParserT, satisfy, sepBy, sepEndBy, sourceColumn, sourceLine, sourceName, space, string, try)
+import LambdaBuffers.Frontend.Syntax (ClassConstraint (ClassConstraint), ClassDef (ClassDef), ClassName (ClassName), ClassRef (ClassRef), ConstrName (ConstrName), Constraint (Constraint), Constructor (Constructor), Derive (Derive), Field (Field), FieldName (FieldName), Import (Import), InstanceClause (InstanceClause), Module (Module), ModuleAlias (ModuleAlias), ModuleName (ModuleName), ModuleNamePart (ModuleNamePart), Name (Name), Product (Product), Record (Record), SourceInfo (SourceInfo, to), SourcePos (SourcePos), Statement (StClassDef, StDerive, StInstanceClause, StTyDef), Sum (Sum), Ty (TyApp, TyRef', TyVar), TyArg (TyArg), TyBody (Opaque, ProductBody, RecordBody, SumBody), TyDef (TyDef), TyName (TyName), TyRef (TyRef), VarName (VarName), kwAs, kwClassDef, kwDerive, kwImport, kwInstance, kwModule, kwQualified, kwTyDefOpaque, kwTyDefProduct, kwTyDefRecord, kwTyDefSum, kws)
+import Text.Parsec (ParseError, ParsecT, SourceName, Stream, alphaNum, between, char, endOfLine, eof, getPosition, label, lower, many, many1, manyTill, notFollowedBy, optionMaybe, runParserT, satisfy, sepBy, sepEndBy, sourceColumn, sourceLine, sourceName, space, string, try, unexpected, (<?>))
 
 type Parser :: Type -> (Type -> Type) -> Type -> Type
 type Parser s m a = ParsecT s () m a
 
+-- Note: Syntactic Form of Lambda Buffer Files.
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- The notational conventions used to present the syntax is based off of [1].
+-- So, these notational conventions are used for presenting syntax.
+--
+--  - [ pattern ]   optional
+--
+--  - { pattern }   zero or more repetitions
+--
+--  - ( pattern )   grouping
+--
+--  - pat1 | pat2   choice
+--
+--  - pat1\pat2     difference -- elements generated by pat1 except those
+--                  generated by pat2.
+--
+--  - 'terminal'    terminal syntax
+--
+--  - // comment    comment
+--
+-- Productions will be of the form
+--  - nonterm -> alt1 | ... | altn
+--
+--
+-- Tokens form the vocabulary of Lambda Buffer Files. There are classes of
+-- tokens (keyword, modulename, longmodulename, tyname, longtyname, varname,
+-- punctuation, fieldname, classname, longclassname) as follows.
+--
+--  keyword         -> 'module' | 'sum' | 'prod' | 'record' | 'opaque' | 'class' | 'instance' | 'import' | 'qualified' | 'as'
+--  modulename      -> upperCamelCase
+--  longmodulename  -> long modulename
+--  tyname          -> upperCamelCase
+--  fieldname       -> lowerCamelCase\keyword
+--  longtyname      -> long tyname
+--  varname         -> lowers\keyword
+--  punctuation     -> '<=' | ',' | '.' | '(' | ')' | '{' | '}' | ':' | ':-' | '=' | '|'
+--  classname       -> upperCamelCase
+--  longclassname   -> long upperCamelCase
+--
+--  upperCamelCase -> upper { alphaNum }
+--  lowerCamelCase -> lower { alphaNum }
+--  long           -> { upperCamelCase '.' }
+--  upper          -> // upper case or title case alphabetic unicode characters (letters)
+--  lower          -> // lower case alphabetic unicode characters (letters)
+--  lowers         -> lower { lower }
+--  alphaNum       -> // alphabetic or numeric unicode characters
+--
+-- Input files are broken into *tokens* which are delimited by whitespace or
+-- line comments. At each point, the longest possible token satisfying the
+-- token definitions is read.
+--
+-- Finally, the grammar for Lambda Buffer Files is as follows.
+--
+-- module -> 'module' modulename imports statements
+--
+-- import -> 'import' [ 'qualified' ] longmodulename
+--      [ 'as' longmodulename ]
+--      [ '(' [ { tyname ',' } tyname [','] ] ')' ]
+-- imports -> { import }
+--
+-- statements -> [ { statement newlines1 } statement [ newlines1 ] ]
+-- statement -> tydef
+--            | classdef
+--            | instanceclause
+--            | derivedef
+--
+-- tydef -> sumtydef | prodtydef | recordtydef | opaquetydef
+--
+-- sumtydef  -> 'sum' tyname { varname } '=' sum
+-- sum -> sumconstructor { '|' sumconstructor }
+-- sumconstructor  -> tyname prod
+--
+-- prodtydef  -> 'prod' tyname { varname } '=' prod
+-- prod -> { tyexpr }
+-- tyexpr -> varname
+--         | longtyname
+--         | '(' prod ')'
+--
+-- recordtydef  -> 'record' tyname { varname } '=' record
+-- record -> '{' [ field { ',' field  } ] '}'
+-- field -> fieldname ':' prod
+--
+-- opaquetydef -> 'opaque' tyname { varname }
+--
+-- classdef    -> 'class' [ classexps '<=' ] classname { varname }
+--                          // Warning: this part makes it not LL(1)!
+--                          // In the future, we should shift to some form of
+--                          // an LALR(1) parser.
+-- classexp    -> classref { varname }
+--              | '(' classexps ')'
+-- classexps   -> [ classexp { ',' classexp } ]
+--
+-- instanceclause -> 'instance'  constraint [ ':-' classexps ]
+-- constraint -> classref { tyexpr }
+--
+-- derivedef -> 'derive' constraint
+--
+-- References.
+--  [1] Haskell 2010 Language Report by Simon Marlow
+--
+-- Note: Parser Implementation.
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- We use Parsec [1] to parse the grammar.
+--
+-- We have the following invariant.
+--
+-- - Whitespace Invariant: Each parser @pa@ assumes to starts at a
+--   nonwhitespace character where whitespace is defined by the parser 'junk'
+--   i.e., whitespace or comments.
+--
+-- Remark.
+-- The Whitespace Invariant is originally from [2].
+--
+-- For the Whitespace Invariant to be initially true, @'runParser' pa@ calls
+-- 'junk', then @pa@, then 'Text.Parsec.eof' which ensures:
+--
+--  1. the Whitespace Invariant is initially true for the parser @pa@; and
+--
+--  2. the entire input is consumed.
+--
+-- Then, to ensure that all "subparsers" maintain the Whitespace Invariant, we
+-- introduce the parser combinator 'token' for which @'token' pa@ runs @pa@,
+-- then runs 'junk' to ensure that any following parsers will start at a non
+-- whitespace character.
+--
+-- Thus, if we want to parse the string @"pomeranian"@ we should write
+--
+-- > token (Text.Parser.string "pomeranian")
+--
+-- instead of
+--
+-- > -- do NOT do this since it will NOT maintain the Whitespace Invariant.
+-- > Text.Parser.string "pomeranian"
+--
+-- References.
+--
+--  [1] Parsec: Direct Style Monadic Parser Combinators For the Real World by
+--  Daan Leijen and Erik Meijer
+--
+--  [2] Monadic Parser Combinators by Graham Hutton and Erik Meijer
+
+-- * Primitives
+
+{- | @'token' pa@ runs the parser @pa@ with 'try' followed by 'junk' to remove
+ whitespace. Moreover, this gets the SourceInfo of the parsed token w/o the
+ whitespace
+
+ See [Note: Parser Implementation].
+-}
+token :: Stream s m Char => Parser s m a -> Parser s m (SourceInfo, a)
+token pa = withSourceInfo (try $ fmap (\a srcInfo -> (srcInfo, a)) pa) <* junk
+
+token' :: Stream s m Char => Parser s m a -> Parser s m a
+token' = fmap snd . token
+
+{- | 'junk' skips whitespace and comments.
+
+ See [Note: Parser Implementation].
+-}
+junk :: forall s m. Stream s m Char => Parser s m ()
+junk = void (many (spaces1 <|> comment))
+  where
+    spaces1 :: Parser s m ()
+    spaces1 = void $ many1 (space <?> "")
+
+    comment :: Parser s m ()
+    comment =
+      void $
+        try (string "--" <?> "")
+          *>
+          -- Note: the 'try' for 'endOfLine' is necessary because of the
+          -- overlapping instances of both parsers as we may note that
+          -- 'endOfLine' parses \r\n and \n.
+          manyTill (satisfy Char.isPrint) (try endOfLine)
+
+{- | 'keyword' parses the provided keyword ensuring that the keyword does *not*
+ overlap with varname tokens.
+-}
+keyword :: Stream s m Char => String -> Parser s m ()
+keyword k = void $ string k *> notFollowedBy alphaNum
+
 runParser :: (Stream s IO Char) => Parser s IO a -> SourceName -> s -> IO (Either ParseError a)
-runParser p = runParserT (p <* eof) ()
+runParser p = runParserT (junk *> p <* eof) ()
+
+-- * Lexical elements
 
 parseModuleNamePart :: Stream s m Char => Parser s m (ModuleNamePart SourceInfo)
 parseModuleNamePart = withSourceInfo . label' "module part name" $ ModuleNamePart <$> pModuleNamePart
 
 parseModuleName :: Stream s m Char => Parser s m (ModuleName SourceInfo)
-parseModuleName = withSourceInfo . label' "module name" $ ModuleName <$> sepBy (try parseModuleNamePart) (try $ char '.')
+parseModuleName = withSourceInfo . label' "module name" $ ModuleName <$> sepBy parseModuleNamePart (char '.')
+
+tokenModuleName :: Stream s m Char => Parser s m (ModuleName SourceInfo)
+tokenModuleName = token' parseModuleName
 
 parseTyVarName :: Stream s m Char => Parser s m (VarName SourceInfo)
-parseTyVarName = withSourceInfo . label' "type variable name" $ VarName . fromString <$> many1 lower
+parseTyVarName = withSourceInfo . label' "type variable name" $ do
+  v <- many1 lower
+  notKeyword v
+  return . VarName . fromString $ v
+
+-- | 'notKeyword' tests if the string is not a keyword -- failing otherwise.
+notKeyword :: Stream s m Char => String -> Parser s m ()
+notKeyword v = when (v `elem` kws) $ unexpected "keyword"
 
+-- | 'parseName' is a class or a type name
 parseName :: Stream s m Char => Parser s m (Name SourceInfo)
 parseName = withSourceInfo . label' "either class or type name" $ Name <$> pTyName
 
+tokenName :: Stream s m Char => Parser s m (Name SourceInfo)
+tokenName = token' parseName
+
 parseTyName :: Stream s m Char => Parser s m (TyName SourceInfo)
 parseTyName = withSourceInfo . label' "type name" $ TyName <$> pTyName
 
+tokenTyName :: Stream s m Char => Parser s m (TyName SourceInfo)
+tokenTyName = token' parseTyName
+
 parseClassName :: Stream s m Char => Parser s m (ClassName SourceInfo)
 parseClassName = withSourceInfo . label' "class name" $ ClassName <$> pClassName
 
+tokenClassName :: Stream s m Char => Parser s m (ClassName SourceInfo)
+tokenClassName = token' parseClassName
+
 parseModuleAliasInRef :: Stream s m Char => Parser s m (ModuleAlias SourceInfo)
 parseModuleAliasInRef =
   withSourceInfo . label' "module alias in type or class reference" $
     ModuleAlias <$> do
+      -- some awkwardness with the 'try' here.
+      -- Ideally, we should use the @.@ to be the first set to determine when
+      -- to stop parsing this... but oh well...
       ps <- many1 (try (parseModuleNamePart <* char '.'))
       withSourceInfo . return $ ModuleName ps
 
 parseModuleAliasInImport :: Stream s m Char => Parser s m (ModuleAlias SourceInfo)
 parseModuleAliasInImport = withSourceInfo . label' "module alias in module import" $ ModuleAlias <$> parseModuleName
 
+tokenModuleAliasInImport :: Stream s m Char => Parser s m (ModuleAlias SourceInfo)
+tokenModuleAliasInImport = token' parseModuleAliasInImport
+
 parseTyRef' :: Stream s m Char => Parser s m (TyRef SourceInfo)
 parseTyRef' = withSourceInfo . label' "type reference" $ do
   mayAlias <- optionMaybe parseModuleAliasInRef
@@ -67,9 +278,53 @@ parseTyRef' = withSourceInfo . label' "type reference" $ do
 parseTyVar :: Stream s m Char => Parser s m (Ty SourceInfo)
 parseTyVar = label' "type variable" $ TyVar <$> parseTyVarName
 
+tokenTyVar :: Stream s m Char => Parser s m (Ty SourceInfo)
+tokenTyVar = token' parseTyVar
+
 parseTyRef :: Stream s m Char => Parser s m (Ty SourceInfo)
 parseTyRef = withSourceInfo . label' "type reference" $ TyRef' <$> parseTyRef'
 
+tokenTyRef :: Stream s m Char => Parser s m (Ty SourceInfo)
+tokenTyRef = token' parseTyRef
+
+parseFieldName :: Stream s m Char => Parser s m (FieldName SourceInfo)
+parseFieldName =
+  withSourceInfo . label' "record field name" $
+    -- TODO: technically, we should have the following implementation, but the
+    -- test suite wants keywords to be allowed to use as field names...
+    -- > v <- pFieldName
+    -- > notKeyword $ Data.Text.unpack v
+    -- > return $ FieldName v
+    -- TODO: fix the documentation to reflect this
+    FieldName <$> pFieldName
+
+tokenFieldName :: Stream s m Char => Parser s m (FieldName SourceInfo)
+tokenFieldName = token' parseFieldName
+
+parseConstructorName :: Stream s m Char => Parser s m (ConstrName SourceInfo)
+parseConstructorName = withSourceInfo . label' "sum constructor name" $ ConstrName <$> pConstrName
+
+tokenConstructorName :: Stream s m Char => Parser s m (ConstrName SourceInfo)
+tokenConstructorName = token' parseConstructorName
+
+parseTyArg :: Stream s m Char => Parser s m (TyArg SourceInfo)
+parseTyArg = withSourceInfo . label' "type argument" $ do
+  VarName vn _ <- parseTyVarName
+  return $ TyArg vn
+
+tokenTyArg :: Stream s m Char => Parser s m (TyArg SourceInfo)
+tokenTyArg = token' parseTyArg
+
+parseClassRef :: Stream s m Char => Parser s m (ClassRef SourceInfo)
+parseClassRef = withSourceInfo . label' "class reference" $ do
+  mayAlias <- optionMaybe parseModuleAliasInRef
+  ClassRef mayAlias <$> parseClassName
+
+tokenClassRef :: Stream s m Char => Parser s m (ClassRef SourceInfo)
+tokenClassRef = token' parseClassRef
+
+-- * Grammar
+
 {- | Inner type expression.
  Valid examples:
 
@@ -88,16 +343,16 @@ parseTyTopLevel = label' "top level type expression" $ parseTys >>= tysToTy
 
 -- | Sexp :- var | TyRef | (Sexp)
 parseSexp :: forall s m. Stream s m Char => Parser s m (Ty SourceInfo)
-parseSexp = label' "s-expression" $ between parseLineSpaces parseLineSpaces (parseSexpList <|> parseSexpAtom)
+parseSexp = label' "s-expression" $ parseSexpAtom <|> parseSexpList
 
 parseSexpAtom :: forall s m. Stream s m Char => Parser s m (Ty SourceInfo)
-parseSexpAtom = try parseTyRef <|> try parseTyVar
+parseSexpAtom = tokenTyRef <|> tokenTyVar
 
 parseTys :: forall s m. Stream s m Char => Parser s m [Ty SourceInfo]
 parseTys = many parseSexp
 
 parseSexpList :: forall s m. Stream s m Char => Parser s m (Ty SourceInfo)
-parseSexpList = between (char '(') (char ')') (parseTys >>= tysToTy)
+parseSexpList = between (token (char '(')) (token (char ')')) (parseTys >>= tysToTy)
 
 tysToTy :: Stream s m Char => [Ty SourceInfo] -> Parser s m (Ty SourceInfo)
 tysToTy tys = withSourceInfo $ case tys of
@@ -110,11 +365,15 @@ parseSum = withSourceInfo . label' "sum type expression" $ do
   cs <-
     sepBy
       parseSumConstructor
-      (parseLineSpaces >> char '|' >> parseLineSpaces)
+      (token (char '|'))
   return $ Sum cs
 
 parseSumConstructor :: Stream s m Char => Parser s m (Constructor SourceInfo)
-parseSumConstructor = withSourceInfo . label' "sum type constructor" $ Constructor <$> parseConstructorName <*> (parseLineSpaces >> parseProduct)
+parseSumConstructor =
+  withSourceInfo . label' "sum type constructor" $
+    Constructor
+      <$> tokenConstructorName
+      <*> parseProduct
 
 parseProduct :: Stream s m Char => Parser s m (Product SourceInfo)
 parseProduct = withSourceInfo . label' "product type expression" $ Product <$> parseTys
@@ -123,25 +382,17 @@ parseRecord :: Stream s m Char => Parser s m (Record SourceInfo)
 parseRecord = withSourceInfo . label' "record type expression" $ do
   fields <-
     between
-      (char '{' >> parseLineSpaces)
-      (parseLineSpaces >> char '}')
-      $ sepBy parseField (parseLineSpaces >> char ',' >> parseLineSpaces)
+      (token $ char '{')
+      (token $ char '}')
+      $ sepBy parseField (token $ char ',')
   return $ Record fields
 
 parseField :: Stream s m Char => Parser s m (Field SourceInfo)
 parseField = withSourceInfo . label' "record field" $ do
-  fn <- parseFieldName
-  parseLineSpaces1
-  _ <- char ':'
-  parseLineSpaces1
+  fn <- tokenFieldName
+  _ <- token $ char ':'
   Field fn <$> parseTyTopLevel
 
-parseFieldName :: Stream s m Char => Parser s m (FieldName SourceInfo)
-parseFieldName = withSourceInfo . label' "record field name" $ FieldName <$> pFieldName
-
-parseConstructorName :: Stream s m Char => Parser s m (ConstrName SourceInfo)
-parseConstructorName = withSourceInfo . label' "sum constructor name" $ ConstrName <$> pConstrName
-
 parseTyDef :: Stream s m Char => Parser s m (TyDef SourceInfo)
 parseTyDef = label' "type definition" $ parseSumTyDef <|> parseProdTyDef <|> parseRecordTyDef <|> parseOpaqueTyDef
 
@@ -156,49 +407,30 @@ parseRecordTyDef = parseTyDef' kwTyDefRecord (RecordBody <$> parseRecord)
 
 parseTyDef' :: Stream s m Char => String -> Parser s m (TyBody SourceInfo) -> Parser s m (TyDef SourceInfo)
 parseTyDef' kw parseBody = withSourceInfo . label' (kw <> " type definition") $ do
-  _ <- string kw
-  _ <- parseLineSpaces1
-  tyN <- parseTyName
-  _ <- parseLineSpaces1
-  args <- sepEndBy parseTyArg parseLineSpaces1
-  _ <- char '='
-  _ <- parseLineSpaces1
+  _ <- token $ keyword kw
+  tyN <- tokenTyName
+  args <- many tokenTyArg
+  _ <- token $ char '='
   TyDef tyN args <$> parseBody
 
 parseOpaqueTyDef :: Stream s m Char => Parser s m (TyDef SourceInfo)
 parseOpaqueTyDef = withSourceInfo . label' "opaque type definition" $ do
-  _ <- string kwTyDefOpaque
-  _ <- parseLineSpaces1
-  tyN <- parseTyName
-  maySpace <- optionMaybe parseLineSpace
-  args <- case maySpace of
-    Nothing -> parseLineSpaces >> return []
-    Just _ -> do
-      _ <- parseLineSpaces
-      sepBy parseTyArg parseLineSpaces1
+  _ <- token (keyword kwTyDefOpaque)
+  tyN <- tokenTyName
+  args <- many tokenTyArg
   return $ TyDef tyN args Opaque
 
-parseTyArg :: Stream s m Char => Parser s m (TyArg SourceInfo)
-parseTyArg = withSourceInfo . label' "type argument" $ do
-  VarName vn _ <- parseTyVarName
-  return $ TyArg vn
-
-parseClassRef :: Stream s m Char => Parser s m (ClassRef SourceInfo)
-parseClassRef = withSourceInfo . label' "class reference" $ do
-  mayAlias <- optionMaybe parseModuleAliasInRef
-  ClassRef mayAlias <$> parseClassName
-
 parseConstraint :: Stream s m Char => Parser s m (Constraint SourceInfo)
-parseConstraint = withSourceInfo . label' "constraint" $ Constraint <$> parseClassRef <*> parseTys
+parseConstraint = withSourceInfo . label' "constraint" $ Constraint <$> tokenClassRef <*> parseTys
 
 parseDerive :: Stream s m Char => Parser s m (Derive SourceInfo)
-parseDerive = label' "derive statement" $ string kwDerive >> parseLineSpaces >> Derive <$> parseConstraint
+parseDerive = label' "derive statement" $ token (keyword kwDerive) >> Derive <$> parseConstraint
 
 parseInstanceClause :: Stream s m Char => Parser s m (InstanceClause SourceInfo)
 parseInstanceClause = withSourceInfo . label' "instance clause" $ do
-  _ <- string kwInstance
-  clauseHead <- between parseLineSpaces parseLineSpaces parseConstraint
-  mayBodyFollows <- optionMaybe (string ":-")
+  _ <- token (keyword kwInstance)
+  clauseHead <- parseConstraint
+  mayBodyFollows <- optionMaybe (token (string ":-"))
   case mayBodyFollows of
     Nothing -> return $ InstanceClause clauseHead []
     Just _ -> InstanceClause clauseHead <$> parseInstanceBody
@@ -208,56 +440,57 @@ parseInstanceBody = parseConstraints
 
 -- | Constraints sexp.
 parseConstraintSexp :: Stream s m Char => Parser s m [Constraint SourceInfo]
-parseConstraintSexp = between parseLineSpaces parseLineSpaces (parseConstraintList <|> parseConstraintAtom)
+parseConstraintSexp = parseConstraintList <|> parseConstraintAtom
 
 parseConstraintAtom :: Stream s m Char => Parser s m [Constraint SourceInfo]
 parseConstraintAtom = pure <$> parseConstraint
 
 parseConstraintList :: Stream s m Char => Parser s m [Constraint SourceInfo]
-parseConstraintList = between (char '(') (char ')') parseConstraints
+parseConstraintList = between (token (char '(')) (token (char ')')) parseConstraints
 
 parseConstraints :: Stream s m Char => Parser s m [Constraint SourceInfo]
-parseConstraints = concat <$> sepBy parseConstraintSexp (char ',')
+parseConstraints = concat <$> sepBy parseConstraintSexp (token (char ','))
 
 parseClassDef :: Stream s m Char => Parser s m (ClassDef SourceInfo)
 parseClassDef = withSourceInfo . label' "class definition" $ do
-  _ <- string kwClassDef
+  _ <- token (keyword kwClassDef)
   maySups <-
     optionMaybe
+      -- TODO: parsing this is problematic for LL(1) parsers, hence the
+      -- rather large 'try'.
+      -- We really are abusing the infinite look ahead here...
       ( try $ do
           sups <- parseClassSups
-          _ <- string "<="
+          _ <- token (string "<=")
           return sups
       )
-  _ <- parseLineSpaces1
-  clName <- parseClassName
-  clArgs <- fromJust <$> optionMaybe (parseLineSpaces >> parseClassArgs)
+  clName <- tokenClassName
+  clArgs <- parseClassArgs
   case maySups of
     Nothing -> return $ ClassDef clName clArgs []
     Just sups -> return $ ClassDef clName clArgs sups
 
 parseClassArgs :: Stream s m Char => Parser s m [TyArg SourceInfo]
-parseClassArgs = label' "class args" $ sepBy parseTyArg (try parseLineSpaces1)
+parseClassArgs = label' "class args" $ many tokenTyArg
 
 -- | ClassCnstrs sexp.
 parseClassCnstrSexp :: Stream s m Char => Parser s m [ClassConstraint SourceInfo]
-parseClassCnstrSexp = between parseLineSpaces parseLineSpaces (parseClassCnstrList <|> parseClassCnstrAtom)
+parseClassCnstrSexp = parseClassCnstrList <|> parseClassCnstrAtom
 
 parseClassCnstrAtom :: Stream s m Char => Parser s m [ClassConstraint SourceInfo]
 parseClassCnstrAtom = pure <$> parseClassCnstr
 
 parseClassCnstrList :: Stream s m Char => Parser s m [ClassConstraint SourceInfo]
-parseClassCnstrList = between (char '(') (char ')') parseClassSups
+parseClassCnstrList = between (token $ char '(') (token $ char ')') parseClassSups
 
 -- FIXME(bladyjoker): Should accept "Eq a "
 parseClassSups :: Stream s m Char => Parser s m [ClassConstraint SourceInfo]
-parseClassSups = concat <$> sepBy parseClassCnstrSexp (char ',')
+parseClassSups = concat <$> sepBy parseClassCnstrSexp (token (char ','))
 
 parseClassCnstr :: Stream s m Char => Parser s m (ClassConstraint SourceInfo)
-parseClassCnstr = label' "class constraint" $ do
-  ref <- parseClassRef
-  args <- fromJust <$> optionMaybe (parseLineSpaces >> parseClassArgs)
-  return $ ClassConstraint ref args
+parseClassCnstr =
+  label' "class constraint" $
+    ClassConstraint <$> tokenClassRef <*> parseClassArgs
 
 parseStatement :: Stream s m Char => Parser s m (Statement SourceInfo)
 parseStatement =
@@ -266,62 +499,45 @@ parseStatement =
     <|> (StInstanceClause <$> parseInstanceClause)
     <|> (StDerive <$> parseDerive)
 
-parseStatements :: Stream s m Char => Parser s m [Statement SourceInfo]
-parseStatements = sepEndBy parseStatement (many1 parseNewLine)
-
 parseModule :: Stream s m Char => Parser s m (Module SourceInfo)
 parseModule = withSourceInfo . label' "module definition" $ do
-  _ <- string "module"
-  _ <- parseLineSpaces1
-  modName <- parseModuleName
-  _ <- parseLineSpaces
-  _ <- many1 parseNewLine
-  imports <- sepEndBy parseImport (many1 parseNewLine)
-  stmnts <- parseStatements
-  _ <- many space
+  _ <- token $ keyword kwModule
+  modName <- tokenModuleName
+  imports <- many parseImport
+  stmnts <- many parseStatement
   return $ Module modName imports stmnts
 
 parseImport :: Stream s m Char => Parser s m (Import SourceInfo)
-parseImport = withSourceInfo . label' "import statement" $ do
-  _ <- string "import"
-  _ <- parseLineSpaces1
-  isQual <- isJust <$> optionMaybe (string "qualified" >> parseLineSpaces1)
-  modName <- parseModuleName
-  may <-
-    optionMaybe
-      ( do
-          mayModAlias <- optionMaybe (try $ parseLineSpaces1 >> string "as" >> parseLineSpaces1 *> parseModuleAliasInImport)
-          mayNames <-
-            optionMaybe
-              ( try $ do
-                  parseLineSpaces1 >> char '(' >> parseLineSpaces
-                  names <- sepEndBy parseName (char ',' >> parseLineSpaces)
-                  _ <- try parseLineSpaces >> char ')'
-                  return names
-              )
-          _ <- try parseLineSpaces
-          return (mayModAlias, mayNames)
-      )
-  case may of
-    Nothing -> return $ Import isQual modName Nothing Nothing
-    Just (mayModAlias, mayNames) -> return $ Import isQual modName mayNames mayModAlias
-
-parseNewLine :: Stream s m Char => Parser s m ()
-parseNewLine = label' "lb new line" $ void endOfLine <|> try parseComment
-
-parseComment :: Stream s m Char => Parser s m ()
-parseComment = label' "comment" $ void $ between (string "--") endOfLine (many (char ' ' <|> satisfy Char.isPrint))
-
-parseLineSpace :: Stream s m Char => Parser s m ()
-parseLineSpace = label' "line space" $ void $ try $ do
-  optional endOfLine
-  char ' ' <|> char '\t'
-
-parseLineSpaces1 :: Stream s m Char => Parser s m ()
-parseLineSpaces1 = void $ try $ many1 parseLineSpace
-
-parseLineSpaces :: Stream s m Char => Parser s m ()
-parseLineSpaces = void $ try $ many parseLineSpace
+parseImport = label' "import statement" $ do
+  -- Getting the starting position
+  (srcInfo, _) <- token $ keyword kwImport
+
+  isQual <- isJust <$> optionMaybe (token $ keyword kwQualified)
+  modName@(ModuleName _ nameSrcInfo) <- tokenModuleName
+
+  mayModAlias <- optionMaybe $ token (keyword kwAs) *> tokenModuleAliasInImport
+
+  mayBracketSrcInfoAndNames <- optionMaybe $ do
+    _ <- token $ char '('
+    names <- sepEndBy tokenName (token $ char ',')
+    (bracketSrcInfo, _) <- token $ char ')'
+    return (bracketSrcInfo, names)
+
+  let mayBracketSrcInfo = fmap fst mayBracketSrcInfoAndNames
+      mayNames = fmap snd mayBracketSrcInfoAndNames
+
+  return $
+    Import isQual modName mayNames mayModAlias $ -- Get the rightmost position of the rightmost parsed token
+      srcInfo
+        { to =
+            fromJust $
+              fmap to mayBracketSrcInfo
+                <|> ( case mayModAlias of
+                        Just (ModuleAlias _ modAliasSrcInfo) -> Just $ to modAliasSrcInfo
+                        _ -> Nothing
+                    )
+                <|> fmap to (Just nameSrcInfo)
+        }
 
 getSourcePosition :: Stream s m Char => Parser s m SourcePos
 getSourcePosition = do
diff --git a/lambda-buffers-frontend/src/LambdaBuffers/Frontend/Syntax.hs b/lambda-buffers-frontend/src/LambdaBuffers/Frontend/Syntax.hs
index fcc35154..daa959a3 100644
--- a/lambda-buffers-frontend/src/LambdaBuffers/Frontend/Syntax.hs
+++ b/lambda-buffers-frontend/src/LambdaBuffers/Frontend/Syntax.hs
@@ -38,6 +38,11 @@ module LambdaBuffers.Frontend.Syntax (
   kwDerive,
   kwClassDef,
   kwInstance,
+  kwImport,
+  kwQualified,
+  kwAs,
+  kwModule,
+  kws,
 ) where
 
 import Data.Text (Text)
@@ -56,6 +61,32 @@ kwInstance :: String
 kwInstance = "instance" :: String
 kwClassDef :: String
 kwClassDef = "class" :: String
+kwImport :: String
+kwImport = "import" :: String
+kwQualified :: String
+kwQualified = "qualified" :: String
+kwAs :: String
+kwAs = "as" :: String
+kwModule :: String
+kwModule = "module" :: String
+
+{- | 'kws' is a list of all keywords.
+ Warning: this invariant must be maintained manually
+-}
+kws :: [String]
+kws =
+  [ kwTyDefSum
+  , kwTyDefProduct
+  , kwTyDefRecord
+  , kwTyDefOpaque
+  , kwDerive
+  , kwInstance
+  , kwClassDef
+  , kwImport
+  , kwModule
+  , kwQualified
+  , kwAs
+  ]
 
 tyBodyToTyDefKw :: TyBody info -> String
 tyBodyToTyDefKw (SumBody _) = kwTyDefSum
diff --git a/lambda-buffers-frontend/test/Test/LambdaBuffers/Frontend.hs b/lambda-buffers-frontend/test/Test/LambdaBuffers/Frontend.hs
index 4ab20342..7ac37978 100644
--- a/lambda-buffers-frontend/test/Test/LambdaBuffers/Frontend.hs
+++ b/lambda-buffers-frontend/test/Test/LambdaBuffers/Frontend.hs
@@ -59,7 +59,7 @@ frontendErrorTests dataDir =
             fileIn = workDir </> "A.lbf"
             fileErr = fileIn
         errOrMod <- runFrontend [workDir] [fileIn]
-        assertError ("[" <> fileErr <> ":(3:1)] \nunexpected 't'\nexpecting lb new line, import statement, type definition, class definition, instance clause, derive statement, space or end of input") errOrMod
+        assertError ("[" <> fileErr <> ":(3:1)] \nunexpected 't'\nexpecting import statement, type definition, class definition, instance clause, derive statement or end of input") errOrMod
     , testCase "Multiple modules found" $ do
         let workDir = dataDir </> "multiple_modules_found"
             fileIn = workDir </> "A.lbf"
diff --git a/lambda-buffers-frontend/test/Test/LambdaBuffers/Frontend/Parsec.hs b/lambda-buffers-frontend/test/Test/LambdaBuffers/Frontend/Parsec.hs
index 1a7d8f98..4c1446b5 100644
--- a/lambda-buffers-frontend/test/Test/LambdaBuffers/Frontend/Parsec.hs
+++ b/lambda-buffers-frontend/test/Test/LambdaBuffers/Frontend/Parsec.hs
@@ -4,7 +4,7 @@ import Test.Tasty (TestTree, testGroup)
 
 import Control.Monad (void)
 import Data.Set qualified as Set
-import LambdaBuffers.Frontend.Parsec (parseClassDef, parseClassSups, parseConstraint, parseDerive, parseInstanceBody, parseInstanceClause, parseProduct, parseRecord, parseSum, parseTyInner, parseTyTopLevel)
+import LambdaBuffers.Frontend.Parsec (junk, parseClassDef, parseClassSups, parseConstraint, parseDerive, parseInstanceBody, parseInstanceClause, parseProduct, parseRecord, parseSum, parseTyInner, parseTyTopLevel)
 import LambdaBuffers.Frontend.Syntax (ClassConstraint, Constraint, SourceInfo)
 import Test.Tasty.HUnit (assertFailure, testCase)
 import Text.Parsec (Parsec, eof, runParser)
@@ -34,7 +34,16 @@ testInnerTypeExpression =
         "parses"
         [ parsesEq ["a", " a", "a ", " a ", "(a)", "( a )", "(  (a   ) )"] parseTyInner
         , parsesEq ["Int", " Int", "Int ", "(Int)", "( Int)", "(Int )", " (Int)", "(Int) ", "((Int))"] parseTyInner
-        , parsesEq
+        , -- TODO: this test case is screwed.. there's problems with the data
+          -- representation for why this won't pass e.g. @A a a@ is @A@ applied
+          -- to the list @[a,a]@; so breaking this down to the left associative
+          -- chain of applications really is broken.
+          -- , parsesEq
+          --     [ "(A.B.A a a a)"
+          --     , "((A.B.A a) a a)"
+          --     ]
+          --     parseTyInner
+          parsesEq
             [ "(Maybe a)"
             , " (Maybe a)"
             , "(Maybe a) "
@@ -179,6 +188,9 @@ testTopLevelTypeExpression =
         , parses "( Maybe ( Maybe ( Maybe (Maybe a ))))" parseTyTopLevel
         , parses "(Maybe (A a) b (c) (d) )" parseTyTopLevel
         , parses "Maybe a Int b String" parseTyTopLevel
+        , parses "Maybe\na" parseTyTopLevel
+        , parses "Maybe \na" parseTyTopLevel
+        , parses "Maybe a\n" parseTyTopLevel
         ]
     , testGroup
         "fails"
@@ -186,9 +198,6 @@ testTopLevelTypeExpression =
         , fails "( a ))" parseTyTopLevel
         , fails "(  (a   ) ))" parseTyTopLevel
         , fails "(Int))" parseTyTopLevel
-        , fails "Maybe\na" parseTyTopLevel
-        , fails "Maybe \na" parseTyTopLevel
-        , fails "Maybe a\n" parseTyTopLevel
         ]
     ]
 
@@ -205,23 +214,23 @@ testRecordExpression =
         , parsesEq ["{x : Either a b}", "{ x : Either a b}", "{x : Either a b }", "{ x : Either a b }", "{x : (Either a b)}"] parseRecord
         , parsesEq ["{x : a, y : Int, z : Maybe a}", "{  x : a,y : Int , z : Maybe a }", "{\n x : a,\n y : Int ,\n z : Maybe a\n }"] parseRecord
         , parsesEq ["{x : a, y : Prelude.Numeric.Int, z : Prelude.Maybe a}", "{  x : a,y : Prelude.Numeric.Int , z : Prelude.Maybe a }", "{\n x : a,\n y : Prelude.Numeric.Int ,\n z : Prelude.Maybe a\n }"] parseRecord
+        , parses "{x:y}" parseRecord
+        , parses "{ x:y }" parseRecord
+        , parses "{ x: y}" parseRecord
+        , parses "{ x :y}" parseRecord
+        , parses "{x :y}" parseRecord
+        , parses "{x: y}" parseRecord
+        , parses "{\nx : a}" parseRecord
+        , parses "{x\n: a}" parseRecord
+        , parses "{x :\na}" parseRecord
+        , parses "{x : a\n}" parseRecord
+        , parses " {}" parseRecord
         ]
     , testGroup
         "fails"
-        [ fails " {}" parseRecord
-        , fails "{x}" parseRecord
+        [ fails "{x}" parseRecord
         , fails "{ x }" parseRecord
-        , fails "{x:y}" parseRecord
-        , fails "{ x:y }" parseRecord
         , fails "{ x: }" parseRecord
-        , fails "{ x: y}" parseRecord
-        , fails "{ x :y}" parseRecord
-        , fails "{x :y}" parseRecord
-        , fails "{x: y}" parseRecord
-        , fails "{\nx : a}" parseRecord
-        , fails "{x\n: a}" parseRecord
-        , fails "{x :\na}" parseRecord
-        , fails "{x : a\n}" parseRecord
         ]
     ]
 
@@ -243,14 +252,15 @@ testProductExpression =
         , parses "a Int (Maybe a)" parseProduct
         , parses "   a y  Int  z  (Maybe a) " parseProduct
         , parses "Maybe\n Int" parseProduct
+        , parses "\n" parseProduct
+        , parses "\nMaybe Int" parseProduct
+        , parses "Maybe \nInt" parseProduct
+        , parses "Maybe Int\n" parseProduct
         ]
     , testGroup
         "fails"
-        [ fails "\n" parseProduct
-        , fails "\nMaybe Int" parseProduct
-        , fails "Maybe \nInt" parseProduct
-        , fails "Maybe Int\n" parseProduct
-        , fails "()" parseProduct
+        [ fails "()" parseProduct
+        , fails "(    ) -- dog" parseProduct
         ]
     ]
 
@@ -271,11 +281,11 @@ testSumExpression =
         , parses "A a b | B b a | C c d" parseSum
         , parses "A ((a) b) | B (b a) | C (c) (d)" parseSum
         , parses "A Int (Maybe Int String) | B (Prelude.Maybe a) | C Prelude.Numeric.Int Prelude.Numeric.String" parseSum
+        , parses "\n" parseSum
         ]
     , testGroup
         "fails"
-        [ fails "\n" parseSum
-        , fails "A |" parseSum
+        [ fails "A |" parseSum
         , fails "A ()| B" parseSum
         , fails "A | B ()" parseSum
         , fails "A (B | C)" parseSum
@@ -317,6 +327,7 @@ testInstanceBodyExpression =
     [ testGroup
         "parses"
         [ parsesEq ["", "()"] parseIB -- TODO(bladyjoker): Figure out ().
+        , parsesEq ["Eq a, Show a", "(Eq a, Show a)"] parseIB
         , parsesEq ["Eq a", "Eq  a", "Eq a ", " Eq a", "\n Eq a", "Eq\n a"] parseIB
         , parsesEq ["Eq a, Eq b", "Eq a , Eq b", "Eq a\n , Eq b", "Eq a\n , Eq b, ()"] parseIB
         , parses "Eq Int" parseIB
@@ -337,11 +348,11 @@ testInstanceBodyExpression =
             , "Eq a, (Show b, Json c), MPTC (Maybe a) (Either a Int) c"
             ]
             parseIB
+        , parses "\n" parseIB
         ]
     , testGroup
         "fails"
-        [ fails "\n" parseIB
-        , fails "eq a" parseIB
+        [ fails "eq a" parseIB
         , fails "a" parseIB
         , fails "Eq a," parseIB
         , fails "Eq a, " parseIB
@@ -405,6 +416,14 @@ testClassSups =
         [ parses "" parseCS
         , parses "Eq a" parseCS
         , parses " Eq a" parseCS
+        , parses " Eq a, Show a, Eq b" parseCS
+        , parsesEq
+            [ " Eq a, Show a, Eq b"
+            , " Eq a, (Show a, Eq b)"
+            , " (Eq a, Show a, Eq b)"
+            , " (Eq a, Show a), Eq b"
+            ]
+            parseCS
         , -- FIX(bladyjoker): parses "Eq a " parseCS
           parsesEq
             [ "Eq a"
@@ -417,10 +436,12 @@ testClassSups =
             , "(Eq  a)"
             ]
             parseCS
+        , parses "\n" parseCS
         ]
     , testGroup
         "fails"
-        [ fails "\n" parseCS
+        [ fails "Eq Int" parseCS
+        , fails "Eq Int, show Int" parseCS
         ]
     ]
   where
@@ -444,6 +465,12 @@ testClassDef =
             , "class ( Eq  a) <= Ord a"
             ]
             parseClassDef
+        , parsesEq
+            [ "class (Eq a), Eq b <= Ord a"
+            , "class (Eq  a, Eq b) <= Ord a"
+            , "class Eq  a ,        Eq b <= Ord a"
+            ]
+            parseClassDef
         , parsesEq
             [ "class Trivial"
             , "class  Trivial"
@@ -456,15 +483,17 @@ testClassDef =
             , "class ((MPTC1 b a, MPTC2 c b a)) <= MPTC a b c"
             ]
             parseClassDef
+        , parses " class Eq a" parseClassDef
+        , parses "class Eq a " parseClassDef
+        , parses "class () <= Eq a" parseClassDef
         ]
     , testGroup
         "fails"
         [ fails "\n" parseClassDef
         , fails "" parseClassDef
-        , fails " class Eq a" parseClassDef
-        , fails "class Eq a " parseClassDef
         , fails "class Eq a <=" parseClassDef
         , fails "class Eq a <= " parseClassDef
+        , fails "class Eq a, Eq a <= Eq a<= Eq a" parseClassDef
         , fails "class (Eq a)" parseClassDef
         ]
     ]
@@ -472,7 +501,7 @@ testClassDef =
 parsesEq :: forall a info. (Functor a, Show (a ()), Ord (a ())) => [String] -> Parsec String () (a info) -> TestTree
 parsesEq inputs parser =
   testCase (show inputs <> " should parse the same") $
-    let ress = runParser (parser <* eof) () "test" <$> inputs
+    let ress = runParser (junk *> parser <* eof) () "test" <$> inputs
      in case foldr
           ( \res (errs, ps) -> case res of
               Left err -> (err : errs, ps)
@@ -484,11 +513,11 @@ parsesEq inputs parser =
           (errs, ps) -> assertFailure $ show ("Wanted all to parse the same" :: String, errs, ps)
 
 parses :: String -> Parsec String () a -> TestTree
-parses input parser = testCase (show input) $ case runParser (parser <* eof) () "test" input of
+parses input parser = testCase (show input) $ case runParser (junk *> parser <* eof) () "test" input of
   Left err -> assertFailure (show err)
   Right _ -> return ()
 
 fails :: Show a => String -> Parsec String () a -> TestTree
-fails input parser = testCase (show input) $ case runParser (parser <* eof) () "test" input of
+fails input parser = testCase (show input) $ case runParser (junk *> parser <* eof) () "test" input of
   Left _ -> return ()
   Right res -> assertFailure (show res)

From e8c328c656f88dad0985424733543b436bf42850 Mon Sep 17 00:00:00 2001
From: jared <>
Date: Tue, 17 Oct 2023 03:05:25 -0600
Subject: [PATCH 02/10] Added front end parser test cases

---
 .../data/good_instance/GoodInstance.lbf       | 11 +++++++++++
 .../ModuleDocumentation.lbf                   | 13 +++++++++++++
 .../test/Test/LambdaBuffers/Frontend.hs       | 10 ++++++++++
 .../Test/LambdaBuffers/Frontend/Parsec.hs     | 19 +++++++++++++++++++
 4 files changed, 53 insertions(+)
 create mode 100644 lambda-buffers-frontend/data/good_instance/GoodInstance.lbf
 create mode 100644 lambda-buffers-frontend/data/good_module_documentation/ModuleDocumentation.lbf

diff --git a/lambda-buffers-frontend/data/good_instance/GoodInstance.lbf b/lambda-buffers-frontend/data/good_instance/GoodInstance.lbf
new file mode 100644
index 00000000..e95d4812
--- /dev/null
+++ b/lambda-buffers-frontend/data/good_instance/GoodInstance.lbf
@@ -0,0 +1,11 @@
+module GoodInstance
+
+instance MyClass A
+
+class MyClass a
+
+sum A = A
+
+-- if we're wondering why this test case is here, previous parser versions
+-- confused 'instance' with 'import' and reported an unexpected 'n' in the
+-- 'instance' keyword.
diff --git a/lambda-buffers-frontend/data/good_module_documentation/ModuleDocumentation.lbf b/lambda-buffers-frontend/data/good_module_documentation/ModuleDocumentation.lbf
new file mode 100644
index 00000000..a369b17d
--- /dev/null
+++ b/lambda-buffers-frontend/data/good_module_documentation/ModuleDocumentation.lbf
@@ -0,0 +1,13 @@
+
+-- Some documentation here
+
+module ModuleDocumentation
+
+-- More documentation
+sum A = A
+
+
+-- Woo hoo, documentation is great
+-- (who reads it anyways)
+
+-- dog pomeranian yorkie maltese
diff --git a/lambda-buffers-frontend/test/Test/LambdaBuffers/Frontend.hs b/lambda-buffers-frontend/test/Test/LambdaBuffers/Frontend.hs
index 7ac37978..92ca207c 100644
--- a/lambda-buffers-frontend/test/Test/LambdaBuffers/Frontend.hs
+++ b/lambda-buffers-frontend/test/Test/LambdaBuffers/Frontend.hs
@@ -115,6 +115,16 @@ frontendSuccessTests dataDir =
                 fileIn = workDir </> "BadFormat.lbf"
             errOrMod' <- runFrontend [workDir] [fileIn]
             assertSuccess ["A", "BadFormat"] errOrMod'
+        , testCase "good_module_documentation/ModuleDocumentation.lbf also compiles" $ do
+            let workDir = dataDir </> "good_module_documentation"
+                fileIn = workDir </> "ModuleDocumentation.lbf"
+            errOrMod' <- runFrontend [workDir] [fileIn]
+            assertSuccess ["ModuleDocumentation"] errOrMod'
+        , testCase "good_instance/GoodInstance.lbf also compiles" $ do
+            let workDir = dataDir </> "good_instance"
+                fileIn = workDir </> "GoodInstance.lbf"
+            errOrMod' <- runFrontend [workDir] [fileIn]
+            assertSuccess ["GoodInstance"] errOrMod'
         ]
     ]
 
diff --git a/lambda-buffers-frontend/test/Test/LambdaBuffers/Frontend/Parsec.hs b/lambda-buffers-frontend/test/Test/LambdaBuffers/Frontend/Parsec.hs
index 4c1446b5..5cc83644 100644
--- a/lambda-buffers-frontend/test/Test/LambdaBuffers/Frontend/Parsec.hs
+++ b/lambda-buffers-frontend/test/Test/LambdaBuffers/Frontend/Parsec.hs
@@ -471,6 +471,17 @@ testClassDef =
             , "class Eq  a ,        Eq b <= Ord a"
             ]
             parseClassDef
+        , parsesEq
+            [ "class (Eq a), Eq b, Eq c <= Ord a"
+            , "class (Eq  a, Eq b), Eq c <= Ord a"
+            , "class Eq  a, (Eq b,      Eq c) <= Ord a"
+            , "class ((Eq  a), Eq b), Eq c <= Ord a"
+            , "class Eq  a, (Eq b,      (Eq c)) <= Ord a"
+            , "class (Eq  a, (Eq b)), Eq c <= Ord a"
+            , "class Eq  a, ((Eq b),      Eq c) <= Ord a"
+            , "class (Eq  a, ((Eq b),      Eq c)) <= Ord a"
+            ]
+            parseClassDef
         , parsesEq
             [ "class Trivial"
             , "class  Trivial"
@@ -498,6 +509,14 @@ testClassDef =
         ]
     ]
 
+-- * Parsing testing functions
+
+-- Note: when testing parses, since all parsers assume the invariant that they
+-- _must_ start at a non whitespace character, we always run 'junk' before
+-- before running the parser. When the parser finishes, we of course run 'eof'
+-- to ensure it consumes the entire input.
+-- See [Note: Parser Implementation] in "LambdaBuffers.Frontend.Parsec" for details
+
 parsesEq :: forall a info. (Functor a, Show (a ()), Ord (a ())) => [String] -> Parsec String () (a info) -> TestTree
 parsesEq inputs parser =
   testCase (show inputs <> " should parse the same") $

From 2013699b7016de003b4116d85c6e220ebf54a4a3 Mon Sep 17 00:00:00 2001
From: jared <>
Date: Tue, 17 Oct 2023 11:23:32 -0600
Subject: [PATCH 03/10] Improved documentation for the parser

---
 .../src/LambdaBuffers/Frontend/Parsec.hs      | 36 ++++++++++++++-----
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/lambda-buffers-frontend/src/LambdaBuffers/Frontend/Parsec.hs b/lambda-buffers-frontend/src/LambdaBuffers/Frontend/Parsec.hs
index 6d02d139..6b865c7b 100644
--- a/lambda-buffers-frontend/src/LambdaBuffers/Frontend/Parsec.hs
+++ b/lambda-buffers-frontend/src/LambdaBuffers/Frontend/Parsec.hs
@@ -53,9 +53,10 @@ type Parser s m a = ParsecT s () m a
 --  - nonterm -> alt1 | ... | altn
 --
 --
--- Tokens form the vocabulary of Lambda Buffer Files. There are classes of
--- tokens (keyword, modulename, longmodulename, tyname, longtyname, varname,
--- punctuation, fieldname, classname, longclassname) as follows.
+-- Tokens form the vocabulary of Lambda Buffer Files. The classes of *tokens*
+-- (keyword, modulename, longmodulename, tyname, longtyname, varname,
+-- punctuation, fieldname, classname, longclassname) are as follows.
+-- Note that some of the tokens overlap but may be distinguished via parsing.
 --
 --  keyword         -> 'module' | 'sum' | 'prod' | 'record' | 'opaque' | 'class' | 'instance' | 'import' | 'qualified' | 'as'
 --  modulename      -> upperCamelCase
@@ -80,8 +81,14 @@ type Parser s m a = ParsecT s () m a
 -- line comments. At each point, the longest possible token satisfying the
 -- token definitions is read.
 --
+-- A *line comment* is any sequence of characters which begins with '--'
+-- followed by zero or more printable Unicode character to the first end of
+-- line ('\n' or '\r\n').
+--
 -- Finally, the grammar for Lambda Buffer Files is as follows.
 --
+-- start -> module
+--
 -- module -> 'module' modulename imports statements
 --
 -- import -> 'import' [ 'qualified' ] longmodulename
@@ -89,7 +96,7 @@ type Parser s m a = ParsecT s () m a
 --      [ '(' [ { tyname ',' } tyname [','] ] ')' ]
 -- imports -> { import }
 --
--- statements -> [ { statement newlines1 } statement [ newlines1 ] ]
+-- statements -> { statement }
 -- statement -> tydef
 --            | classdef
 --            | instanceclause
@@ -114,7 +121,7 @@ type Parser s m a = ParsecT s () m a
 -- opaquetydef -> 'opaque' tyname { varname }
 --
 -- classdef    -> 'class' [ classexps '<=' ] classname { varname }
---                          // Warning: this part makes it not LL(1)!
+--                          // Warning: this is not LL(1)!
 --                          // In the future, we should shift to some form of
 --                          // an LALR(1) parser.
 -- classexp    -> classref { varname }
@@ -174,8 +181,8 @@ type Parser s m a = ParsecT s () m a
 -- * Primitives
 
 {- | @'token' pa@ runs the parser @pa@ with 'try' followed by 'junk' to remove
- whitespace. Moreover, this gets the SourceInfo of the parsed token w/o the
- whitespace
+ whitespace. Moreover, this gets the 'SourceInfo' of the parsed token without
+ the whitespace
 
  See [Note: Parser Implementation].
 -}
@@ -216,6 +223,12 @@ runParser p = runParserT (junk *> p <* eof) ()
 
 -- * Lexical elements
 
+--
+-- - Functions which have @parse@ as a prefix simply parse the token
+--
+--  - Functions which have @token@ as a prefix wrap the corresponding @parse@
+--  function with the 'token' function.
+
 parseModuleNamePart :: Stream s m Char => Parser s m (ModuleNamePart SourceInfo)
 parseModuleNamePart = withSourceInfo . label' "module part name" $ ModuleNamePart <$> pModuleNamePart
 
@@ -391,6 +404,11 @@ parseField :: Stream s m Char => Parser s m (Field SourceInfo)
 parseField = withSourceInfo . label' "record field" $ do
   fn <- tokenFieldName
   _ <- token $ char ':'
+  -- TODO: strictly speaking, there's a bug with this when parsing
+  -- > record A a = { fieldName :-- a }
+  -- since this will parse the @:--@ as @:@ and @--@ will start a comment.
+  -- Technically, the specification says that this should parse as the token
+  -- @:-@, and then the remaining @-@ should parse error.
   Field fn <$> parseTyTopLevel
 
 parseTyDef :: Stream s m Char => Parser s m (TyDef SourceInfo)
@@ -527,9 +545,11 @@ parseImport = label' "import statement" $ do
       mayNames = fmap snd mayBracketSrcInfoAndNames
 
   return $
-    Import isQual modName mayNames mayModAlias $ -- Get the rightmost position of the rightmost parsed token
+    Import isQual modName mayNames mayModAlias $
       srcInfo
         { to =
+            -- Get the rightmost position of the rightmost parsed token
+            -- Note: the 'fromJust' clearly never fails.
             fromJust $
               fmap to mayBracketSrcInfo
                 <|> ( case mayModAlias of

From e3a40b6bedb5b233ff6946444f7a619dee6ef72c Mon Sep 17 00:00:00 2001
From: jared <>
Date: Wed, 18 Oct 2023 21:16:07 -0600
Subject: [PATCH 04/10] Parser improvements

- Changed parsing test case to no longer use keywords as a field name (as per the specification)

- Improved `LambdaBuffers/Frontend/Parsec.hs` documentation

- Updated TODO in `Test/LambdaBuffers/Frontend/Parsec.hs`

- Fixed `LambdaBuffers/Frontend/Parsec.hs` incorrectly parsing `:--`
---
 .../data/goldens/good/LambdaBuffers.lbf       |   6 +-
 .../src/LambdaBuffers/Frontend/Parsec.hs      | 134 ++----------------
 .../Test/LambdaBuffers/Frontend/Parsec.hs     |   2 +-
 3 files changed, 19 insertions(+), 123 deletions(-)

diff --git a/lambda-buffers-frontend/data/goldens/good/LambdaBuffers.lbf b/lambda-buffers-frontend/data/goldens/good/LambdaBuffers.lbf
index b98ea211..451da56e 100644
--- a/lambda-buffers-frontend/data/goldens/good/LambdaBuffers.lbf
+++ b/lambda-buffers-frontend/data/goldens/good/LambdaBuffers.lbf
@@ -52,7 +52,7 @@ record ClassDef = { name : ClassName
 
 derive Eq ClassDef
 
-record ClassConstraint = { class : ClassRef, args : List TyArg }
+record ClassConstraint = { classRef : ClassRef, args : List TyArg }
 
 derive Eq ClassConstraint
 
@@ -64,7 +64,7 @@ prod Derive = Constraint
 
 derive Eq Derive
 
-record Constraint = { class : ClassRef, args : List Ty }
+record Constraint = { classRef : ClassRef, args : List Ty }
 
 derive Eq Constraint
 
@@ -111,4 +111,4 @@ derive Eq ModuleNamePart
 
 prod ClassName = Text
 
-derive Eq ClassName
\ No newline at end of file
+derive Eq ClassName
diff --git a/lambda-buffers-frontend/src/LambdaBuffers/Frontend/Parsec.hs b/lambda-buffers-frontend/src/LambdaBuffers/Frontend/Parsec.hs
index 6b865c7b..b8aef8d7 100644
--- a/lambda-buffers-frontend/src/LambdaBuffers/Frontend/Parsec.hs
+++ b/lambda-buffers-frontend/src/LambdaBuffers/Frontend/Parsec.hs
@@ -22,6 +22,7 @@ import Data.Char qualified as Char
 import Data.Kind (Type)
 import Data.Maybe (fromJust, isJust)
 import Data.String (IsString (fromString))
+import Data.Text qualified as Text
 import LambdaBuffers.Compiler.NamingCheck (pClassName, pConstrName, pFieldName, pModuleNamePart, pTyName)
 import LambdaBuffers.Frontend.Syntax (ClassConstraint (ClassConstraint), ClassDef (ClassDef), ClassName (ClassName), ClassRef (ClassRef), ConstrName (ConstrName), Constraint (Constraint), Constructor (Constructor), Derive (Derive), Field (Field), FieldName (FieldName), Import (Import), InstanceClause (InstanceClause), Module (Module), ModuleAlias (ModuleAlias), ModuleName (ModuleName), ModuleNamePart (ModuleNamePart), Name (Name), Product (Product), Record (Record), SourceInfo (SourceInfo, to), SourcePos (SourcePos), Statement (StClassDef, StDerive, StInstanceClause, StTyDef), Sum (Sum), Ty (TyApp, TyRef', TyVar), TyArg (TyArg), TyBody (Opaque, ProductBody, RecordBody, SumBody), TyDef (TyDef), TyName (TyName), TyRef (TyRef), VarName (VarName), kwAs, kwClassDef, kwDerive, kwImport, kwInstance, kwModule, kwQualified, kwTyDefOpaque, kwTyDefProduct, kwTyDefRecord, kwTyDefSum, kws)
 import Text.Parsec (ParseError, ParsecT, SourceName, Stream, alphaNum, between, char, endOfLine, eof, getPosition, label, lower, many, many1, manyTill, notFollowedBy, optionMaybe, runParserT, satisfy, sepBy, sepEndBy, sourceColumn, sourceLine, sourceName, space, string, try, unexpected, (<?>))
@@ -31,110 +32,7 @@ type Parser s m a = ParsecT s () m a
 
 -- Note: Syntactic Form of Lambda Buffer Files.
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--- The notational conventions used to present the syntax is based off of [1].
--- So, these notational conventions are used for presenting syntax.
---
---  - [ pattern ]   optional
---
---  - { pattern }   zero or more repetitions
---
---  - ( pattern )   grouping
---
---  - pat1 | pat2   choice
---
---  - pat1\pat2     difference -- elements generated by pat1 except those
---                  generated by pat2.
---
---  - 'terminal'    terminal syntax
---
---  - // comment    comment
---
--- Productions will be of the form
---  - nonterm -> alt1 | ... | altn
---
---
--- Tokens form the vocabulary of Lambda Buffer Files. The classes of *tokens*
--- (keyword, modulename, longmodulename, tyname, longtyname, varname,
--- punctuation, fieldname, classname, longclassname) are as follows.
--- Note that some of the tokens overlap but may be distinguished via parsing.
---
---  keyword         -> 'module' | 'sum' | 'prod' | 'record' | 'opaque' | 'class' | 'instance' | 'import' | 'qualified' | 'as'
---  modulename      -> upperCamelCase
---  longmodulename  -> long modulename
---  tyname          -> upperCamelCase
---  fieldname       -> lowerCamelCase\keyword
---  longtyname      -> long tyname
---  varname         -> lowers\keyword
---  punctuation     -> '<=' | ',' | '.' | '(' | ')' | '{' | '}' | ':' | ':-' | '=' | '|'
---  classname       -> upperCamelCase
---  longclassname   -> long upperCamelCase
---
---  upperCamelCase -> upper { alphaNum }
---  lowerCamelCase -> lower { alphaNum }
---  long           -> { upperCamelCase '.' }
---  upper          -> // upper case or title case alphabetic unicode characters (letters)
---  lower          -> // lower case alphabetic unicode characters (letters)
---  lowers         -> lower { lower }
---  alphaNum       -> // alphabetic or numeric unicode characters
---
--- Input files are broken into *tokens* which are delimited by whitespace or
--- line comments. At each point, the longest possible token satisfying the
--- token definitions is read.
---
--- A *line comment* is any sequence of characters which begins with '--'
--- followed by zero or more printable Unicode character to the first end of
--- line ('\n' or '\r\n').
---
--- Finally, the grammar for Lambda Buffer Files is as follows.
---
--- start -> module
---
--- module -> 'module' modulename imports statements
---
--- import -> 'import' [ 'qualified' ] longmodulename
---      [ 'as' longmodulename ]
---      [ '(' [ { tyname ',' } tyname [','] ] ')' ]
--- imports -> { import }
---
--- statements -> { statement }
--- statement -> tydef
---            | classdef
---            | instanceclause
---            | derivedef
---
--- tydef -> sumtydef | prodtydef | recordtydef | opaquetydef
---
--- sumtydef  -> 'sum' tyname { varname } '=' sum
--- sum -> sumconstructor { '|' sumconstructor }
--- sumconstructor  -> tyname prod
---
--- prodtydef  -> 'prod' tyname { varname } '=' prod
--- prod -> { tyexpr }
--- tyexpr -> varname
---         | longtyname
---         | '(' prod ')'
---
--- recordtydef  -> 'record' tyname { varname } '=' record
--- record -> '{' [ field { ',' field  } ] '}'
--- field -> fieldname ':' prod
---
--- opaquetydef -> 'opaque' tyname { varname }
---
--- classdef    -> 'class' [ classexps '<=' ] classname { varname }
---                          // Warning: this is not LL(1)!
---                          // In the future, we should shift to some form of
---                          // an LALR(1) parser.
--- classexp    -> classref { varname }
---              | '(' classexps ')'
--- classexps   -> [ classexp { ',' classexp } ]
---
--- instanceclause -> 'instance'  constraint [ ':-' classexps ]
--- constraint -> classref { tyexpr }
---
--- derivedef -> 'derive' constraint
---
--- References.
---  [1] Haskell 2010 Language Report by Simon Marlow
+-- See docs/syntax.md
 --
 -- Note: Parser Implementation.
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -213,7 +111,7 @@ junk = void (many (spaces1 <|> comment))
           manyTill (satisfy Char.isPrint) (try endOfLine)
 
 {- | 'keyword' parses the provided keyword ensuring that the keyword does *not*
- overlap with varname tokens.
+ overlap with varname tokens and fieldname tokens.
 -}
 keyword :: Stream s m Char => String -> Parser s m ()
 keyword k = void $ string k *> notFollowedBy alphaNum
@@ -302,14 +200,11 @@ tokenTyRef = token' parseTyRef
 
 parseFieldName :: Stream s m Char => Parser s m (FieldName SourceInfo)
 parseFieldName =
-  withSourceInfo . label' "record field name" $
-    -- TODO: technically, we should have the following implementation, but the
-    -- test suite wants keywords to be allowed to use as field names...
-    -- > v <- pFieldName
-    -- > notKeyword $ Data.Text.unpack v
-    -- > return $ FieldName v
-    -- TODO: fix the documentation to reflect this
-    FieldName <$> pFieldName
+  withSourceInfo . label' "record field name" $ do
+    v <- pFieldName
+    -- Recall in the lexical specification that fieldnames are disjoint from keywords
+    notKeyword $ Text.unpack v
+    return $ FieldName v
 
 tokenFieldName :: Stream s m Char => Parser s m (FieldName SourceInfo)
 tokenFieldName = token' parseFieldName
@@ -403,12 +298,13 @@ parseRecord = withSourceInfo . label' "record type expression" $ do
 parseField :: Stream s m Char => Parser s m (Field SourceInfo)
 parseField = withSourceInfo . label' "record field" $ do
   fn <- tokenFieldName
-  _ <- token $ char ':'
-  -- TODO: strictly speaking, there's a bug with this when parsing
+  _ <- token $ char ':' *> notFollowedBy (char '-')
+  -- Why is the @'notFollowedBy'@ here?
+  -- Consider:
   -- > record A a = { fieldName :-- a }
-  -- since this will parse the @:--@ as @:@ and @--@ will start a comment.
-  -- Technically, the specification says that this should parse as the token
-  -- @:-@, and then the remaining @-@ should parse error.
+  -- We want to parse the @:--@ as @:-@ and @-@ (the specification says this),
+  -- but without the @'notFollowedBy'@, this would parse as @:@ and @--@ will
+  -- start a comment.
   Field fn <$> parseTyTopLevel
 
 parseTyDef :: Stream s m Char => Parser s m (TyDef SourceInfo)
@@ -474,7 +370,7 @@ parseClassDef = withSourceInfo . label' "class definition" $ do
   _ <- token (keyword kwClassDef)
   maySups <-
     optionMaybe
-      -- TODO: parsing this is problematic for LL(1) parsers, hence the
+      -- Remark: parsing this is problematic for LL(1) parsers, hence the
       -- rather large 'try'.
       -- We really are abusing the infinite look ahead here...
       ( try $ do
diff --git a/lambda-buffers-frontend/test/Test/LambdaBuffers/Frontend/Parsec.hs b/lambda-buffers-frontend/test/Test/LambdaBuffers/Frontend/Parsec.hs
index 5cc83644..63f48b58 100644
--- a/lambda-buffers-frontend/test/Test/LambdaBuffers/Frontend/Parsec.hs
+++ b/lambda-buffers-frontend/test/Test/LambdaBuffers/Frontend/Parsec.hs
@@ -34,7 +34,7 @@ testInnerTypeExpression =
         "parses"
         [ parsesEq ["a", " a", "a ", " a ", "(a)", "( a )", "(  (a   ) )"] parseTyInner
         , parsesEq ["Int", " Int", "Int ", "(Int)", "( Int)", "(Int )", " (Int)", "(Int) ", "((Int))"] parseTyInner
-        , -- TODO: this test case is screwed.. there's problems with the data
+        , -- TODO(jaredponn): this test case is screwed.. there's problems with the data
           -- representation for why this won't pass e.g. @A a a@ is @A@ applied
           -- to the list @[a,a]@; so breaking this down to the left associative
           -- chain of applications really is broken.

From 24c602fa6faec7a86d109ec53b16e170e2d7ea36 Mon Sep 17 00:00:00 2001
From: jared <>
Date: Wed, 18 Oct 2023 22:53:10 -0600
Subject: [PATCH 05/10] Added chapter on syntactic forms of LambdaBuffers
 files.

---
 _typos.toml     |   5 +-
 docs/SUMMARY.md |   1 +
 docs/syntax.md  | 255 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 259 insertions(+), 2 deletions(-)
 create mode 100644 docs/syntax.md

diff --git a/_typos.toml b/_typos.toml
index e613a6f3..d1730be7 100644
--- a/_typos.toml
+++ b/_typos.toml
@@ -1,6 +1,7 @@
 [default.extend-words]
 substituters = "substituters"
-hask= "hask"
+hask = "hask"
+Nd = "Nd"
 
 [type.pdf]
 extend-glob = ["*.pdf"]
@@ -8,4 +9,4 @@ check-file = false
 
 [type.png]
 extend-glob = ["*.png"]
-check-file = false
\ No newline at end of file
+check-file = false
diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md
index 58a4b6ab..2b85f9a7 100644
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -6,6 +6,7 @@
 - [LambdaBuffers to Purescript](purescript.md)
 - [Design](design.md)
 - [API](api.md)
+- [LambdaBuffers file](syntax.md)
 - [Compiler](compiler.md)
 - [Codegen](codegen.md)
 - [Command line interface](command-line-interface.md)
diff --git a/docs/syntax.md b/docs/syntax.md
new file mode 100644
index 00000000..cb85c520
--- /dev/null
+++ b/docs/syntax.md
@@ -0,0 +1,255 @@
+# LambdaBuffers file
+
+The input to LambdaBuffers is a text file which contains a module that defines
+    a specification of the types you want to generate.
+This section gives the exact syntax of a LambdaBuffers file, and informally describes meaning of the syntactic constructs.
+
+The name of a LambdaBuffers file must end with `.lbf`.
+
+## Notation
+In the following description of a LambdaBuffers file's syntax, we use
+    a similar BNF syntax from [Section 10.1 of the Haskell Report](https://www.haskell.org/onlinereport/haskell2010/).
+So, the following notational conventions are used for presenting syntax.
+
+|  Syntax       | Description                                                                 |
+| ------------- | --------------------------------------------------------------------------- |
+| `[pattern]`   | optional                                                                    |
+| `{pattern}`   | zero or more repetitions                                                    |
+| `(pattern)`   | grouping                                                                    |
+| `pat1⎮pat2`   | choice                                                                      |
+| `pat1\pat2`   | difference -- elements generated by `pat1` except those generated by `pat2` |
+| `'terminal'`  | terminal syntax surrounded by single quotes                                 |
+
+<!-- Apparently, `mdbook`'s markdown can't escape the vertical bar in codeblocks in a table....
+     So, we're using code point U+23AE to look like a vertical bar when it really isn't...
+
+| `pat1|pat2`  | choice                                                                      | 
+-->
+
+Note that the terminal syntax permits C-style escape sequences e.g.
+    `'\n'` denotes line feed (newline), and `'\r'` denotes carriage return.
+
+Productions will be of the form
+
+```text
+nonterm -> alt1 | ... | altn
+```
+
+## Input file representation
+The input file is Unicode text where the encoding is subject to the system locale.
+We will often use the unqualified term *character* to refer to a Unicode code point in the input file.
+
+## Characters
+The following terms are used to denote specific Unicode character categories:
+
+- `upper` denotes a Unicode code point categorized as an uppercase letter or titlecase letter (i.e., with General Category value Lt or Lu).
+
+- `lower` denotes a Unicode code point categorized as a lower-case letter (i.e., with General Category value Ll).
+
+- `alphanum` denotes either `upper` or `lower`; or a Unicode code point categorized as a modifier letter, other letter, decimal digit number, letter number, or other number (i.e., with General Category value Lt, Lu, Ll, Lm, Lo, Nd, Nl or No).
+
+- `space` denotes a Unicode code point categorized as a separator space (i.e., with General Category value Zs), or any of the control characters `'\t'`, `'\n'`, `'\r'`, `'\f'`, or `'\v'`.
+
+Interested readers may find details of Unicode character categories in [Section 4.5 of The Unicode Standard 15.1.0](https://www.unicode.org/versions/Unicode15.1.0/), and the [Unicode Character Database](https://unicode.org/ucd/).
+
+## Lexical syntax
+
+Tokens form the vocabulary of LambdaBuffers files.
+The classes of tokens are defined as follows.
+
+```text
+keyword         -> 'module' | 'sum' | 'prod' | 'record'
+                 | 'opaque' | 'class' | 'instance' | 'import' 
+                 | 'qualified' | 'as'
+modulename      -> uppercamelcase
+longmodulename  -> long modulename
+tyname          -> uppercamelcase
+fieldname       -> lowercamelcase\keyword
+longtyname      -> long tyname
+varname         -> lowers\keyword
+punctuation     -> '<=' | ',' | '(' | ')' | '{' | '}' 
+                 | ':' | ':-' | '=' | '|'
+classname       -> uppercamelcase
+longclassname   -> long uppercamelcase
+```
+
+where
+
+```text
+uppercamelcase -> upper { alphanum }
+lowercamelcase -> lower { alphanum }
+long           -> { uppercamelcase '.' }
+lowers         -> lower { lower }
+```
+
+Input files are broken into *tokens* which use the *maximal munch* rule i.e.,
+    at each point, the next token is the longest sequence of characters that
+    form a valid token.
+`space`s or line comments are ignored except as it separates tokens that
+    would otherwise combine into a single token.
+
+### Line comments
+A *line comment* starts with the terminal `'--'` followed by zero or more printable Unicode characters stopping at the first end of line (`'\n'` or `'\r\n'`).
+
+## Syntax of LambdaBuffers files
+A LambdaBuffers file defines a module that is a collection of data types, classes, instance clauses, and derive clauses.
+
+The overall layout of a LambdaBuffers file is:
+
+```text
+module -> 'module' longmodulename { import } { statement }
+```
+
+The file must specify the module's `longmodulename` where its `modulename` must match the file's name not including the `.lbf` extension.
+After, the file may contain a sequence of `import`s followed by a sequence of `statement`s.
+
+### Import
+Imports bring *entities* (types and classes) of other modules into scope.
+
+```text
+import     -> 'import' [ 'qualified' ] longmodulename [ 'as' longmodulename ] [ importspec ]
+importspec -> '(' [ { tyname ',' } tyname [','] ] ')'
+```
+
+If `importspec` is omitted, then all entities specified in the module are imported; otherwise only the specified entities are imported.
+
+### Statement
+
+Statements define types, classes, instance clauses, and derive clauses.
+
+```text
+statement -> typedef
+           | classdef
+           | instanceclause
+           | deriveclause
+```
+
+#### Type definitions
+Types may be either sum types, product types, record types, or opaque types.
+
+```text
+typedef -> prodtypedef | sumtypedef |  recordtypedef | opaquetypedef
+```
+
+##### Product type definition
+A product type definition defines a new product type.
+
+```text
+prodtypedef  -> 'prod' tyname { varname } '=' prod
+prod -> { tyexpr }
+tyexpr -> varname
+        | longtyname
+        | '(' prod ')'
+```
+
+Product type definitions instruct the code generator to generate a product type for the target language.
+
+##### Sum type definition
+A sum type definition defines a new sum type.
+
+```text
+sumtypedef  -> 'sum' tyname { varname } '=' sum
+sum -> sumconstructor { '|' sumconstructor }
+sumconstructor  -> tyname prod
+```
+
+Sum type definitions instruct the code generator to generate a sum type for the target language.
+
+##### Record type definition
+A record type definition defines a new record type.
+
+```text
+recordtypedef  -> 'record' tyname { varname } '=' record
+record -> '{' [ field { ',' field  } ] '}'
+field -> fieldname ':' prod
+````
+
+Record type definitions instruct the code generator to generate a record type for the target language.
+
+##### Opaque type
+An opaque type definition defines a new opaque type.
+
+```text
+opaquetypedef -> 'opaque' tyname { varname }
+```
+
+Opaque type definitions do not instruct the code generator to generate code, and an opaque type must be instead implemented in the target language.
+
+#### Class definition
+A class definition introduces a new class.
+
+```text
+classdef       -> 'class' [ constraintexps '<=' ] classname { varname }
+constraintexp  -> classref { varname }
+                | '(' constraintexps ')'
+constraintexps -> [ constraintexp { ',' constraintexp } ]
+```
+
+Class definitions do not instruct the code generator to generate code, but
+    instead provides a means to communicate with the code generator the
+    instances one would like to generate (via a derive clause).
+
+#### Instance clause
+An instance clause specifies a type is an instance of a class.
+
+```text
+instanceclause -> 'instance'  constraint [ ':-' constraintexps ]
+constraint     -> classref { tyexpr }
+```
+
+Instance clauses do not instruct the code generator to generate code, but
+    instead instructs the compiler (semantic checking) that the target language
+    provides instances for the given type provided that the given `constraintexps`
+    have instances.
+
+#### Derive clause
+Derive clauses instruct the code generator to generate code for a type so that it is an instance of a class.
+
+```text
+deriveclause -> 'derive' constraint
+```
+
+Note the code generation of a type for a class is implemented via builtin derivation rules (which developers may extend).
+
+### Syntax reference
+The summarized productions of a LambdaBuffers file is as follows.
+
+```text
+module -> 'module' longmodulename { import } { statement }
+
+import     -> 'import' [ 'qualified' ] longmodulename [ 'as' longmodulename ] [ importspec ]
+importspec -> '(' [ { tyname ',' } tyname [','] ] ')'
+
+statement -> typedef
+           | classdef
+           | instanceclause
+           | deriveclause
+
+typedef -> prodtypedef | sumtypedef |  recordtypedef | opaquetypedef
+
+prodtypedef  -> 'prod' tyname { varname } '=' prod
+prod -> { tyexpr }
+tyexpr -> varname
+        | longtyname
+        | '(' prod ')'
+
+sumtypedef  -> 'sum' tyname { varname } '=' sum
+sum -> sumconstructor { '|' sumconstructor }
+sumconstructor  -> tyname prod
+
+recordtypedef  -> 'record' tyname { varname } '=' record
+record -> '{' [ field { ',' field  } ] '}'
+field -> fieldname ':' prod
+
+opaquetypedef -> 'opaque' tyname { varname }
+
+classdef       -> 'class' [ constraintexps '<=' ] classname { varname }
+constraintexp  -> classref { varname }
+                | '(' constraintexps ')'
+constraintexps -> [ constraintexp { ',' constraintexp } ]
+
+instanceclause -> 'instance'  constraint [ ':-' constraintexps ]
+constraint     -> classref { tyexpr }
+
+deriveclause -> 'derive' constraint
+```

From ed6abb26539a0e9ef3d556ced74b9ea661884fa8 Mon Sep 17 00:00:00 2001
From: jared <>
Date: Wed, 18 Oct 2023 22:56:09 -0600
Subject: [PATCH 06/10] Lined up arrows in grammar documentation

---
 docs/syntax.md | 44 ++++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/docs/syntax.md b/docs/syntax.md
index cb85c520..7c2d26d0 100644
--- a/docs/syntax.md
+++ b/docs/syntax.md
@@ -135,11 +135,11 @@ typedef -> prodtypedef | sumtypedef |  recordtypedef | opaquetypedef
 A product type definition defines a new product type.
 
 ```text
-prodtypedef  -> 'prod' tyname { varname } '=' prod
-prod -> { tyexpr }
-tyexpr -> varname
-        | longtyname
-        | '(' prod ')'
+prodtypedef -> 'prod' tyname { varname } '=' prod
+prod        -> { tyexpr }
+tyexpr      -> varname
+             | longtyname
+             | '(' prod ')'
 ```
 
 Product type definitions instruct the code generator to generate a product type for the target language.
@@ -148,9 +148,9 @@ Product type definitions instruct the code generator to generate a product type
 A sum type definition defines a new sum type.
 
 ```text
-sumtypedef  -> 'sum' tyname { varname } '=' sum
-sum -> sumconstructor { '|' sumconstructor }
-sumconstructor  -> tyname prod
+sumtypedef     -> 'sum' tyname { varname } '=' sum
+sum            -> sumconstructor { '|' sumconstructor }
+sumconstructor -> tyname prod
 ```
 
 Sum type definitions instruct the code generator to generate a sum type for the target language.
@@ -159,9 +159,9 @@ Sum type definitions instruct the code generator to generate a sum type for the
 A record type definition defines a new record type.
 
 ```text
-recordtypedef  -> 'record' tyname { varname } '=' record
-record -> '{' [ field { ',' field  } ] '}'
-field -> fieldname ':' prod
+recordtypedef -> 'record' tyname { varname } '=' record
+record        -> '{' [ field { ',' field  } ] '}'
+field         -> fieldname ':' prod
 ````
 
 Record type definitions instruct the code generator to generate a record type for the target language.
@@ -227,19 +227,19 @@ statement -> typedef
 
 typedef -> prodtypedef | sumtypedef |  recordtypedef | opaquetypedef
 
-prodtypedef  -> 'prod' tyname { varname } '=' prod
-prod -> { tyexpr }
-tyexpr -> varname
-        | longtyname
-        | '(' prod ')'
+prodtypedef -> 'prod' tyname { varname } '=' prod
+prod        -> { tyexpr }
+tyexpr      -> varname
+             | longtyname
+             | '(' prod ')'
 
-sumtypedef  -> 'sum' tyname { varname } '=' sum
-sum -> sumconstructor { '|' sumconstructor }
-sumconstructor  -> tyname prod
+sumtypedef     -> 'sum' tyname { varname } '=' sum
+sum            -> sumconstructor { '|' sumconstructor }
+sumconstructor -> tyname prod
 
-recordtypedef  -> 'record' tyname { varname } '=' record
-record -> '{' [ field { ',' field  } ] '}'
-field -> fieldname ':' prod
+recordtypedef -> 'record' tyname { varname } '=' record
+record        -> '{' [ field { ',' field  } ] '}'
+field         -> fieldname ':' prod
 
 opaquetypedef -> 'opaque' tyname { varname }
 

From 51667a9e83d7548946e7925c93ee0b5974236d75 Mon Sep 17 00:00:00 2001
From: jared <>
Date: Wed, 18 Oct 2023 22:59:32 -0600
Subject: [PATCH 07/10] Readded warning about the non LL(1) part of the grammar

---
 .../src/LambdaBuffers/Frontend/Parsec.hs                    | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/lambda-buffers-frontend/src/LambdaBuffers/Frontend/Parsec.hs b/lambda-buffers-frontend/src/LambdaBuffers/Frontend/Parsec.hs
index b8aef8d7..b0eeb34a 100644
--- a/lambda-buffers-frontend/src/LambdaBuffers/Frontend/Parsec.hs
+++ b/lambda-buffers-frontend/src/LambdaBuffers/Frontend/Parsec.hs
@@ -34,6 +34,12 @@ type Parser s m a = ParsecT s () m a
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 -- See docs/syntax.md
 --
+-- Warning: In the production
+--  classdef       -> 'class' [ constraintexps '<=' ] classname { varname }
+-- this is not LL(1)! Either we live with what we currently have which has a
+-- large 'try' around parsing @[ constraintexps '<=' ]@, or we move to an
+-- LALR(1) parser generator which should has no issues with parsing this.
+--
 -- Note: Parser Implementation.
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 --

From e1756cf709ab05c2034a29e3ad44a6e6545666f2 Mon Sep 17 00:00:00 2001
From: jared <>
Date: Wed, 18 Oct 2023 23:16:38 -0600
Subject: [PATCH 08/10] Undo `fieldnames` should be disjoint from `keywords`

---
 .../data/goldens/good/LambdaBuffers.lbf         |  6 +++---
 .../src/LambdaBuffers/Frontend/Parsec.hs        | 17 ++++++++++++-----
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/lambda-buffers-frontend/data/goldens/good/LambdaBuffers.lbf b/lambda-buffers-frontend/data/goldens/good/LambdaBuffers.lbf
index 451da56e..b98ea211 100644
--- a/lambda-buffers-frontend/data/goldens/good/LambdaBuffers.lbf
+++ b/lambda-buffers-frontend/data/goldens/good/LambdaBuffers.lbf
@@ -52,7 +52,7 @@ record ClassDef = { name : ClassName
 
 derive Eq ClassDef
 
-record ClassConstraint = { classRef : ClassRef, args : List TyArg }
+record ClassConstraint = { class : ClassRef, args : List TyArg }
 
 derive Eq ClassConstraint
 
@@ -64,7 +64,7 @@ prod Derive = Constraint
 
 derive Eq Derive
 
-record Constraint = { classRef : ClassRef, args : List Ty }
+record Constraint = { class : ClassRef, args : List Ty }
 
 derive Eq Constraint
 
@@ -111,4 +111,4 @@ derive Eq ModuleNamePart
 
 prod ClassName = Text
 
-derive Eq ClassName
+derive Eq ClassName
\ No newline at end of file
diff --git a/lambda-buffers-frontend/src/LambdaBuffers/Frontend/Parsec.hs b/lambda-buffers-frontend/src/LambdaBuffers/Frontend/Parsec.hs
index b0eeb34a..53eea33c 100644
--- a/lambda-buffers-frontend/src/LambdaBuffers/Frontend/Parsec.hs
+++ b/lambda-buffers-frontend/src/LambdaBuffers/Frontend/Parsec.hs
@@ -22,7 +22,6 @@ import Data.Char qualified as Char
 import Data.Kind (Type)
 import Data.Maybe (fromJust, isJust)
 import Data.String (IsString (fromString))
-import Data.Text qualified as Text
 import LambdaBuffers.Compiler.NamingCheck (pClassName, pConstrName, pFieldName, pModuleNamePart, pTyName)
 import LambdaBuffers.Frontend.Syntax (ClassConstraint (ClassConstraint), ClassDef (ClassDef), ClassName (ClassName), ClassRef (ClassRef), ConstrName (ConstrName), Constraint (Constraint), Constructor (Constructor), Derive (Derive), Field (Field), FieldName (FieldName), Import (Import), InstanceClause (InstanceClause), Module (Module), ModuleAlias (ModuleAlias), ModuleName (ModuleName), ModuleNamePart (ModuleNamePart), Name (Name), Product (Product), Record (Record), SourceInfo (SourceInfo, to), SourcePos (SourcePos), Statement (StClassDef, StDerive, StInstanceClause, StTyDef), Sum (Sum), Ty (TyApp, TyRef', TyVar), TyArg (TyArg), TyBody (Opaque, ProductBody, RecordBody, SumBody), TyDef (TyDef), TyName (TyName), TyRef (TyRef), VarName (VarName), kwAs, kwClassDef, kwDerive, kwImport, kwInstance, kwModule, kwQualified, kwTyDefOpaque, kwTyDefProduct, kwTyDefRecord, kwTyDefSum, kws)
 import Text.Parsec (ParseError, ParsecT, SourceName, Stream, alphaNum, between, char, endOfLine, eof, getPosition, label, lower, many, many1, manyTill, notFollowedBy, optionMaybe, runParserT, satisfy, sepBy, sepEndBy, sourceColumn, sourceLine, sourceName, space, string, try, unexpected, (<?>))
@@ -207,10 +206,18 @@ tokenTyRef = token' parseTyRef
 parseFieldName :: Stream s m Char => Parser s m (FieldName SourceInfo)
 parseFieldName =
   withSourceInfo . label' "record field name" $ do
-    v <- pFieldName
-    -- Recall in the lexical specification that fieldnames are disjoint from keywords
-    notKeyword $ Text.unpack v
-    return $ FieldName v
+    -- TODO(jaredponn): Technically, the specification says that field names
+    -- are disjoint from keywords, but some of the other golden tests use this
+    -- fact.
+    -- We leave it in as a fairly harmless bug for now.
+    --
+    -- But the version that fixes this is as follows:
+    --
+    -- > v <- pFieldName
+    -- > -- Recall in the lexical specification that fieldnames are disjoint from keywords
+    -- > notKeyword $ Data.Text.unpack v
+    -- > return $ FieldName v
+    FieldName <$> pFieldName
 
 tokenFieldName :: Stream s m Char => Parser s m (FieldName SourceInfo)
 tokenFieldName = token' parseFieldName

From 7f685c03fb8d065298c056df25e361aa3ecb25a3 Mon Sep 17 00:00:00 2001
From: jared <>
Date: Thu, 19 Oct 2023 00:13:39 -0600
Subject: [PATCH 09/10] Changed `syntax.md` to have more consistent naming.

---
 docs/syntax.md | 46 +++++++++++++++++++++++-----------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/docs/syntax.md b/docs/syntax.md
index 7c2d26d0..ce7b735a 100644
--- a/docs/syntax.md
+++ b/docs/syntax.md
@@ -63,9 +63,9 @@ keyword         -> 'module' | 'sum' | 'prod' | 'record'
                  | 'qualified' | 'as'
 modulename      -> uppercamelcase
 longmodulename  -> long modulename
-tyname          -> uppercamelcase
+typename        -> uppercamelcase
 fieldname       -> lowercamelcase\keyword
-longtyname      -> long tyname
+longtypename    -> long typename
 varname         -> lowers\keyword
 punctuation     -> '<=' | ',' | '(' | ')' | '{' | '}' 
                  | ':' | ':-' | '=' | '|'
@@ -108,7 +108,7 @@ Imports bring *entities* (types and classes) of other modules into scope.
 
 ```text
 import     -> 'import' [ 'qualified' ] longmodulename [ 'as' longmodulename ] [ importspec ]
-importspec -> '(' [ { tyname ',' } tyname [','] ] ')'
+importspec -> '(' [ { typename ',' } typename [','] ] ')'
 ```
 
 If `importspec` is omitted, then all entities specified in the module are imported; otherwise only the specified entities are imported.
@@ -135,10 +135,10 @@ typedef -> prodtypedef | sumtypedef |  recordtypedef | opaquetypedef
 A product type definition defines a new product type.
 
 ```text
-prodtypedef -> 'prod' tyname { varname } '=' prod
-prod        -> { tyexpr }
-tyexpr      -> varname
-             | longtyname
+prodtypedef -> 'prod' typename { varname } '=' prod
+prod        -> { typeexp }
+typeexp     -> varname
+             | longtypename
              | '(' prod ')'
 ```
 
@@ -148,9 +148,9 @@ Product type definitions instruct the code generator to generate a product type
 A sum type definition defines a new sum type.
 
 ```text
-sumtypedef     -> 'sum' tyname { varname } '=' sum
+sumtypedef     -> 'sum' typename { varname } '=' sum
 sum            -> sumconstructor { '|' sumconstructor }
-sumconstructor -> tyname prod
+sumconstructor -> typename prod
 ```
 
 Sum type definitions instruct the code generator to generate a sum type for the target language.
@@ -159,18 +159,18 @@ Sum type definitions instruct the code generator to generate a sum type for the
 A record type definition defines a new record type.
 
 ```text
-recordtypedef -> 'record' tyname { varname } '=' record
+recordtypedef -> 'record' typename { varname } '=' record
 record        -> '{' [ field { ',' field  } ] '}'
 field         -> fieldname ':' prod
 ````
 
 Record type definitions instruct the code generator to generate a record type for the target language.
 
-##### Opaque type
+##### Opaque type definition
 An opaque type definition defines a new opaque type.
 
 ```text
-opaquetypedef -> 'opaque' tyname { varname }
+opaquetypedef -> 'opaque' typename { varname }
 ```
 
 Opaque type definitions do not instruct the code generator to generate code, and an opaque type must be instead implemented in the target language.
@@ -194,7 +194,7 @@ An instance clause specifies a type is an instance of a class.
 
 ```text
 instanceclause -> 'instance'  constraint [ ':-' constraintexps ]
-constraint     -> classref { tyexpr }
+constraint     -> classref { typeexp }
 ```
 
 Instance clauses do not instruct the code generator to generate code, but
@@ -218,7 +218,7 @@ The summarized productions of a LambdaBuffers file is as follows.
 module -> 'module' longmodulename { import } { statement }
 
 import     -> 'import' [ 'qualified' ] longmodulename [ 'as' longmodulename ] [ importspec ]
-importspec -> '(' [ { tyname ',' } tyname [','] ] ')'
+importspec -> '(' [ { typename ',' } typename [','] ] ')'
 
 statement -> typedef
            | classdef
@@ -227,21 +227,21 @@ statement -> typedef
 
 typedef -> prodtypedef | sumtypedef |  recordtypedef | opaquetypedef
 
-prodtypedef -> 'prod' tyname { varname } '=' prod
-prod        -> { tyexpr }
-tyexpr      -> varname
-             | longtyname
+prodtypedef -> 'prod' typename { varname } '=' prod
+prod        -> { typeexp }
+typeexp     -> varname
+             | longtypename
              | '(' prod ')'
 
-sumtypedef     -> 'sum' tyname { varname } '=' sum
+sumtypedef     -> 'sum' typename { varname } '=' sum
 sum            -> sumconstructor { '|' sumconstructor }
-sumconstructor -> tyname prod
+sumconstructor -> typename prod
 
-recordtypedef -> 'record' tyname { varname } '=' record
+recordtypedef -> 'record' typename { varname } '=' record
 record        -> '{' [ field { ',' field  } ] '}'
 field         -> fieldname ':' prod
 
-opaquetypedef -> 'opaque' tyname { varname }
+opaquetypedef -> 'opaque' typename { varname }
 
 classdef       -> 'class' [ constraintexps '<=' ] classname { varname }
 constraintexp  -> classref { varname }
@@ -249,7 +249,7 @@ constraintexp  -> classref { varname }
 constraintexps -> [ constraintexp { ',' constraintexp } ]
 
 instanceclause -> 'instance'  constraint [ ':-' constraintexps ]
-constraint     -> classref { tyexpr }
+constraint     -> classref { typeexp }
 
 deriveclause -> 'derive' constraint
 ```

From 72086d2bd11e9381e3d6a2d31d6555032bee2af6 Mon Sep 17 00:00:00 2001
From: jared <>
Date: Thu, 19 Oct 2023 15:11:51 -0600
Subject: [PATCH 10/10] Documentation improvements to `docs/syntax.md`

- Fixed strange whitespace (added whitespace after headers / put
  paragraphs in a single line)

- Changed `long` to `modulealias`

- General wording improvements + fixed error in `opaque` type defn.
---
 docs/SUMMARY.md |  2 +-
 docs/syntax.md  | 73 ++++++++++++++++++++++++-------------------------
 2 files changed, 37 insertions(+), 38 deletions(-)

diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md
index 2b85f9a7..29b2d840 100644
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -6,7 +6,7 @@
 - [LambdaBuffers to Purescript](purescript.md)
 - [Design](design.md)
 - [API](api.md)
-- [LambdaBuffers file](syntax.md)
+- [LambdaBuffers Frontend (.lbf) syntax](syntax.md)
 - [Compiler](compiler.md)
 - [Codegen](codegen.md)
 - [Command line interface](command-line-interface.md)
diff --git a/docs/syntax.md b/docs/syntax.md
index ce7b735a..1673dad8 100644
--- a/docs/syntax.md
+++ b/docs/syntax.md
@@ -1,15 +1,12 @@
-# LambdaBuffers file
+# LambdaBuffers Frontend (.lbf) syntax
 
-The input to LambdaBuffers is a text file which contains a module that defines
-    a specification of the types you want to generate.
-This section gives the exact syntax of a LambdaBuffers file, and informally describes meaning of the syntactic constructs.
+The input to the LambdaBuffers Frontend is a text file which contains a module that defines a specification of the types and type class instances you want to generate. This chapter gives the exact syntax of a LambdaBuffers Frontend file, and informally describes meaning of the syntactic constructs.
 
-The name of a LambdaBuffers file must end with `.lbf`.
+The name of a LambdaBuffers Frontend file must end with `.lbf`, and hence may also be referred to as a .lbf file or a .lbf schema.
 
 ## Notation
-In the following description of a LambdaBuffers file's syntax, we use
-    a similar BNF syntax from [Section 10.1 of the Haskell Report](https://www.haskell.org/onlinereport/haskell2010/).
-So, the following notational conventions are used for presenting syntax.
+
+In the following description of a LambdaBuffers Frontend file's syntax, we use a similar BNF syntax from [Section 10.1 of the Haskell Report](https://www.haskell.org/onlinereport/haskell2010/). So, the following notational conventions are used for presenting syntax.
 
 |  Syntax       | Description                                                                 |
 | ------------- | --------------------------------------------------------------------------- |
@@ -26,20 +23,20 @@ So, the following notational conventions are used for presenting syntax.
 | `pat1|pat2`  | choice                                                                      | 
 -->
 
-Note that the terminal syntax permits C-style escape sequences e.g.
-    `'\n'` denotes line feed (newline), and `'\r'` denotes carriage return.
+Note that the terminal syntax permits C-style escape sequences e.g. `'\n'` denotes line feed (newline), and `'\r'` denotes carriage return.
 
-Productions will be of the form
+Productions will be of the form:
 
 ```text
 nonterm -> alt1 | ... | altn
 ```
 
 ## Input file representation
-The input file is Unicode text where the encoding is subject to the system locale.
-We will often use the unqualified term *character* to refer to a Unicode code point in the input file.
+
+The input file is Unicode text where the encoding is subject to the system locale. We will often use the unqualified term *character* to refer to a Unicode code point in the input file.
 
 ## Characters
+
 The following terms are used to denote specific Unicode character categories:
 
 - `upper` denotes a Unicode code point categorized as an uppercase letter or titlecase letter (i.e., with General Category value Lt or Lu).
@@ -54,23 +51,22 @@ Interested readers may find details of Unicode character categories in [Section
 
 ## Lexical syntax
 
-Tokens form the vocabulary of LambdaBuffers files.
-The classes of tokens are defined as follows.
+Tokens form the vocabulary of LambdaBuffers Frontend files. The classes of tokens are defined as follows.
 
 ```text
 keyword         -> 'module' | 'sum' | 'prod' | 'record'
                  | 'opaque' | 'class' | 'instance' | 'import' 
                  | 'qualified' | 'as'
 modulename      -> uppercamelcase
-longmodulename  -> long modulename
+longmodulename  -> modulealias modulename
 typename        -> uppercamelcase
 fieldname       -> lowercamelcase\keyword
-longtypename    -> long typename
+longtypename    -> modulealias typename
 varname         -> lowers\keyword
 punctuation     -> '<=' | ',' | '(' | ')' | '{' | '}' 
                  | ':' | ':-' | '=' | '|'
 classname       -> uppercamelcase
-longclassname   -> long uppercamelcase
+longclassname   -> modulealias uppercamelcase
 ```
 
 where
@@ -78,32 +74,31 @@ where
 ```text
 uppercamelcase -> upper { alphanum }
 lowercamelcase -> lower { alphanum }
-long           -> { uppercamelcase '.' }
+modulealias    -> { uppercamelcase '.' }
 lowers         -> lower { lower }
 ```
 
-Input files are broken into *tokens* which use the *maximal munch* rule i.e.,
-    at each point, the next token is the longest sequence of characters that
-    form a valid token.
-`space`s or line comments are ignored except as it separates tokens that
-    would otherwise combine into a single token.
+Input files are broken into *tokens* which use the *maximal munch* rule i.e., at each point, the next token is the longest sequence of characters that form a valid token. `space`s or line comments are ignored except as it separates tokens that would otherwise combine into a single token.
 
 ### Line comments
+
 A *line comment* starts with the terminal `'--'` followed by zero or more printable Unicode characters stopping at the first end of line (`'\n'` or `'\r\n'`).
 
-## Syntax of LambdaBuffers files
-A LambdaBuffers file defines a module that is a collection of data types, classes, instance clauses, and derive clauses.
+## Syntax of LambdaBuffers Frontend files
 
-The overall layout of a LambdaBuffers file is:
+A LambdaBuffers Frontend file defines a module that is a collection of data types, classes, instance clauses, and derive clauses.
+
+The overall layout of a LambdaBuffers Frontend file is:
 
 ```text
 module -> 'module' longmodulename { import } { statement }
 ```
 
-The file must specify the module's `longmodulename` where its `modulename` must match the file's name not including the `.lbf` extension.
+The file must specify the module's `longmodulename` where its `modulename` must match the LambdaBuffers Frontend file's file name not including the `.lbf` extension.
 After, the file may contain a sequence of `import`s followed by a sequence of `statement`s.
 
 ### Import
+
 Imports bring *entities* (types and classes) of other modules into scope.
 
 ```text
@@ -125,6 +120,7 @@ statement -> typedef
 ```
 
 #### Type definitions
+
 Types may be either sum types, product types, record types, or opaque types.
 
 ```text
@@ -132,6 +128,7 @@ typedef -> prodtypedef | sumtypedef |  recordtypedef | opaquetypedef
 ```
 
 ##### Product type definition
+
 A product type definition defines a new product type.
 
 ```text
@@ -145,6 +142,7 @@ typeexp     -> varname
 Product type definitions instruct the code generator to generate a product type for the target language.
 
 ##### Sum type definition
+
 A sum type definition defines a new sum type.
 
 ```text
@@ -156,6 +154,7 @@ sumconstructor -> typename prod
 Sum type definitions instruct the code generator to generate a sum type for the target language.
 
 ##### Record type definition
+
 A record type definition defines a new record type.
 
 ```text
@@ -167,15 +166,17 @@ field         -> fieldname ':' prod
 Record type definitions instruct the code generator to generate a record type for the target language.
 
 ##### Opaque type definition
+
 An opaque type definition defines a new opaque type.
 
 ```text
 opaquetypedef -> 'opaque' typename { varname }
 ```
 
-Opaque type definitions do not instruct the code generator to generate code, and an opaque type must be instead implemented in the target language.
+Opaque type definitions must map to existing types in the target language and it's up to the Codegen module to determine how that's exactly done.
 
 #### Class definition
+
 A class definition introduces a new class.
 
 ```text
@@ -185,11 +186,10 @@ constraintexp  -> classref { varname }
 constraintexps -> [ constraintexp { ',' constraintexp } ]
 ```
 
-Class definitions do not instruct the code generator to generate code, but
-    instead provides a means to communicate with the code generator the
-    instances one would like to generate (via a derive clause).
+Class definitions communicate with the code generator the implementations that already exist (via instance clauses) or that one would like to generate (via derive clauses).
 
 #### Instance clause
+
 An instance clause specifies a type is an instance of a class.
 
 ```text
@@ -197,12 +197,10 @@ instanceclause -> 'instance'  constraint [ ':-' constraintexps ]
 constraint     -> classref { typeexp }
 ```
 
-Instance clauses do not instruct the code generator to generate code, but
-    instead instructs the compiler (semantic checking) that the target language
-    provides instances for the given type provided that the given `constraintexps`
-    have instances.
+Instance clauses do not instruct the code generator to generate code, but instead instructs the compiler (semantic checking) that the target language environment provides type class implementations for the given type (provided that the given `constraintexps` also have implementations).
 
 #### Derive clause
+
 Derive clauses instruct the code generator to generate code for a type so that it is an instance of a class.
 
 ```text
@@ -212,7 +210,8 @@ deriveclause -> 'derive' constraint
 Note the code generation of a type for a class is implemented via builtin derivation rules (which developers may extend).
 
 ### Syntax reference
-The summarized productions of a LambdaBuffers file is as follows.
+
+The summarized productions of a LambdaBuffers Frontend file is as follows.
 
 ```text
 module -> 'module' longmodulename { import } { statement }