Skip to content

Commit

Permalink
Handle multiline comments in Package (ipkg)
Browse files Browse the repository at this point in the history
...just like in Source (idr).
  • Loading branch information
stephen-smith committed Sep 22, 2024
1 parent f3dca12 commit 510d1a9
Show file tree
Hide file tree
Showing 9 changed files with 76 additions and 49 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG_NEXT.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ This CHANGELOG describes the merged but unreleased changes. Please see [CHANGELO

* Support for macOS PowerPC added.

* Multiline comments `{- text -}` are now supported in ipkg files.

### Language changes

* Autobind and Typebind modifier on operators allow the user to
Expand Down
1 change: 1 addition & 0 deletions CONTRIBUTORS
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ Wen Kokke
Wind Wong
Zoe Stafford
Claudio Etterli
Boyd Stephen Smith Jr.

Apologies to anyone we've missed - let us know and we'll correct it (or just
send a PR with the correction).
50 changes: 50 additions & 0 deletions src/Parser/Lexer/Common.idr
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,56 @@ comment
<+> many (is '-') <+> reject (is '}') -- not a closing delimiter
<+> many (isNot '\n') -- till the end of line

mutual
||| The mutually defined functions represent different states in a
||| small automaton.
||| `toEndComment` is the default state and it will munch through
||| the input until we detect a special character (a dash, an
||| opening brace, or a double quote) and then switch to the
||| appropriate state.
toEndComment : (k : Nat) -> Recognise (k /= 0)
toEndComment Z = empty
toEndComment (S k)
= some (pred (\c => c /= '-' && c /= '{' && c /= '"' && c /= '\''))
<+> (eof <|> toEndComment (S k))
<|> is '{' <+> singleBrace k
<|> is '-' <+> singleDash k
<|> (charLit <|> pred (== '\'')) <+> toEndComment (S k)
<|> stringLit <+> toEndComment (S k)

||| After reading a single brace, we may either finish reading an
||| opening delimiter or ignore it (e.g. it could be an implicit
||| binder).
singleBrace : (k : Nat) -> Lexer
singleBrace k
= is '-' <+> many (is '-') -- opening delimiter
<+> (eof <|> singleDash (S k)) -- `singleDash` handles the {----} special case
-- `eof` handles the file ending with {---
<|> toEndComment (S k) -- not a valid comment

||| After reading a single dash, we may either find another one,
||| meaning we may have started reading a line comment, or find
||| a closing brace meaning we have found a closing delimiter.
singleDash : (k : Nat) -> Lexer
singleDash k
= is '-' <+> doubleDash k -- comment or closing delimiter
<|> is '}' <+> toEndComment k -- closing delimiter
<|> toEndComment (S k) -- not a valid comment

||| After reading a double dash, we are potentially reading a line
||| comment unless the series of uninterrupted dashes is ended with
||| a closing brace in which case it is a closing delimiter.
doubleDash : (k : Nat) -> Lexer
doubleDash k = with Prelude.(::)
many (is '-') <+> choice -- absorb all dashes
[ is '}' <+> toEndComment k -- closing delimiter
, many (isNot '\n') <+> toEndComment (S k) -- line comment
]

export
blockComment : Lexer
blockComment = is '{' <+> is '-' <+> many (is '-') <+> (eof <|> toEndComment 1)

-- Identifier Lexer
-- There are multiple variants.

Expand Down
1 change: 1 addition & 0 deletions src/Parser/Lexer/Package.idr
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ andop = is '&' <+> is '&'
rawTokens : TokenMap Token
rawTokens =
[ (comment, Comment . drop 2)
, (blockComment, Comment . shrink 2)
, (namespacedIdent, uncurry DotSepIdent . mkNamespacedIdent)
, (identAllowDashes, DotSepIdent Nothing)
, (separator, const Separator)
Expand Down
49 changes: 0 additions & 49 deletions src/Parser/Lexer/Source.idr
Original file line number Diff line number Diff line change
Expand Up @@ -140,55 +140,6 @@ Pretty Void Token where
pretty (MagicDebugInfo di) = pretty (show di)
pretty (Unrecognised x) = pretty "Unrecognised" <++> pretty x

mutual
||| The mutually defined functions represent different states in a
||| small automaton.
||| `toEndComment` is the default state and it will munch through
||| the input until we detect a special character (a dash, an
||| opening brace, or a double quote) and then switch to the
||| appropriate state.
toEndComment : (k : Nat) -> Recognise (k /= 0)
toEndComment Z = empty
toEndComment (S k)
= some (pred (\c => c /= '-' && c /= '{' && c /= '"' && c /= '\''))
<+> (eof <|> toEndComment (S k))
<|> is '{' <+> singleBrace k
<|> is '-' <+> singleDash k
<|> (charLit <|> pred (== '\'')) <+> toEndComment (S k)
<|> stringLit <+> toEndComment (S k)

||| After reading a single brace, we may either finish reading an
||| opening delimiter or ignore it (e.g. it could be an implicit
||| binder).
singleBrace : (k : Nat) -> Lexer
singleBrace k
= is '-' <+> many (is '-') -- opening delimiter
<+> (eof <|> singleDash (S k)) -- `singleDash` handles the {----} special case
-- `eof` handles the file ending with {---
<|> toEndComment (S k) -- not a valid comment

||| After reading a single dash, we may either find another one,
||| meaning we may have started reading a line comment, or find
||| a closing brace meaning we have found a closing delimiter.
singleDash : (k : Nat) -> Lexer
singleDash k
= is '-' <+> doubleDash k -- comment or closing delimiter
<|> is '}' <+> toEndComment k -- closing delimiter
<|> toEndComment (S k) -- not a valid comment

||| After reading a double dash, we are potentially reading a line
||| comment unless the series of uninterrupted dashes is ended with
||| a closing brace in which case it is a closing delimiter.
doubleDash : (k : Nat) -> Lexer
doubleDash k = with Prelude.(::)
many (is '-') <+> choice -- absorb all dashes
[ is '}' <+> toEndComment k -- closing delimiter
, many (isNot '\n') <+> toEndComment (S k) -- line comment
]

blockComment : Lexer
blockComment = is '{' <+> is '-' <+> many (is '-') <+> (eof <|> toEndComment 1)

docComment : Lexer
docComment = is '|' <+> is '|' <+> is '|' <+> many (isNot '\n')

Expand Down
4 changes: 4 additions & 0 deletions tests/idris2/pkg/multiline-comments/Main.idr
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
module Main

main : IO ()
main = putStrLn "CouCou!"
2 changes: 2 additions & 0 deletions tests/idris2/pkg/multiline-comments/expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
1/1: Building Main (Main.idr)
Now compiling the executable: test
4 changes: 4 additions & 0 deletions tests/idris2/pkg/multiline-comments/run
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
. ../../../testutils.sh

# Previously reported: Uncaught error: Error: Can't recognise token.
idris2 --build test.ipkg
12 changes: 12 additions & 0 deletions tests/idris2/pkg/multiline-comments/test.ipkg
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package test
{-
A test of
multiline
comments.
-}
version = 0.1.0
authors = "Boyd Stephen Smith Jr."
main = Main
executable = "test"
{- "." is the top-level package directory -}
sourcedir = "."

0 comments on commit 510d1a9

Please sign in to comment.