Skip to content

Commit ad70394

Browse files
committed
Fix haskell#197 by only lexing numeric literals in multiplicity expressions.
In issue haskell#141, multiplicity annotations in regexes where extended to the general, multi-digit case {nnn,mmm}. However, lexing numeric literals broke parsing of regexes like: 32|64 [01-89] The solution here is to only lex numeric literals in a special lexer state called `multiplicity` which is entered by the parser when parsing multiplicity braces {nnn,mmm}. This restores alex' handling of digits as characters in the non-multiplicity situations.
1 parent e4843f2 commit ad70394

File tree

4 files changed

+66
-12
lines changed

4 files changed

+66
-12
lines changed

src/Parser.y

+13-9
Original file line numberDiff line numberDiff line change
@@ -174,15 +174,19 @@ rep :: { RExp -> RExp }
174174
: '*' { Star }
175175
| '+' { Plus }
176176
| '?' { Ques }
177-
-- Single digits are CHAR, not NUM.
178-
-- TODO: these don't check for digits
179-
-- properly.
180-
| '{' CHAR '}' { repeat_rng (digit $2) Nothing }
181-
| '{' CHAR ',' '}' { repeat_rng (digit $2) (Just Nothing) }
182-
| '{' CHAR ',' CHAR '}' { repeat_rng (digit $2) (Just (Just (digit $4))) }
183-
| '{' NUM '}' { repeat_rng $2 Nothing }
184-
| '{' NUM ',' '}' { repeat_rng $2 (Just Nothing) }
185-
| '{' NUM ',' NUM '}' { repeat_rng $2 (Just (Just $4)) }
177+
| begin_mult '{' mult '}' { $3 }
178+
-- A bit counterintuitively, we need @begin_mult@ already before the left brace,
179+
-- not just before @mult@. This might be due to the lookahead in the parser.
180+
181+
-- Enter the "multiplicity" lexer mode to scan number literals
182+
begin_mult :: { () }
183+
: {- empty -} {% setStartCode multiplicity }
184+
185+
-- Parse a numeric multiplicity.
186+
mult :: { RExp -> RExp }
187+
: NUM { repeat_rng $1 Nothing }
188+
| NUM ',' { repeat_rng $1 (Just Nothing) }
189+
| NUM ',' NUM { repeat_rng $1 (Just (Just $3)) }
186190
187191
rexp0 :: { RExp }
188192
: '(' ')' { Eps }

src/Scan.x

+9-3
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
-------------------------------------------------------------------------------
1212

1313
{
14-
module Scan (lexer, AlexPosn(..), Token(..), Tkn(..), tokPosn) where
14+
module Scan (lexer, AlexPosn(..), Token(..), Tkn(..), tokPosn, multiplicity) where
1515

1616
import Data.Char
1717
import ParseMonad
@@ -56,8 +56,7 @@ alex :-
5656
<0> \\ x $hexdig+ { hexch }
5757
<0> \\ o $octal+ { octch }
5858
<0> \\ $printable { escape }
59-
<0> $nonspecial # [\<] { char } -- includes 1 digit numbers
60-
<0> $digit+ { num } -- should be after char
59+
<0> $nonspecial # [\<] { char }
6160
<0> @smac { smac }
6261
<0> @rmac { rmac }
6362
@@ -75,6 +74,13 @@ alex :-
7574
-- so don't try to interpret the opening { as a code block.
7675
<afterstartcodes> \{ (\n | [^$digit ]) { special `andBegin` 0 }
7776
<afterstartcodes> () { skip `andBegin` 0 } -- note: empty pattern
77+
78+
-- Numeric literals are only lexed in multiplicity braces e.g. {nnn,mmm}.
79+
-- Switching to the @multiplicity@ lexer state happens in the parser.
80+
<multiplicity> $digit+ { num }
81+
<multiplicity> \, { special }
82+
<multiplicity> \} { special `andBegin` 0 }
83+
7884
{
7985

8086
-- -----------------------------------------------------------------------------

tests/Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ TESTS = \
5555
issue_71.x \
5656
issue_119.x \
5757
issue_141.x \
58+
issue_197.x \
5859
monad_typeclass.x \
5960
monad_typeclass_bytestring.x \
6061
monadUserState_typeclass.x \

tests/issue_197.x

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
{
2+
-- Issue #197
3+
-- reported 2022-01-21 by https://github.com/Commelina
4+
-- fixed 2022-01-23 by Andreas Abel & John Ericson
5+
--
6+
-- Problem was:
7+
-- Surface syntax regressed and could no longer handle character strings
8+
-- that looked like numbers.
9+
10+
module Main (main) where
11+
12+
import System.Exit
13+
}
14+
15+
%wrapper "posn"
16+
%token "Token"
17+
18+
@iec60559suffix = (32|64|128)[x]?
19+
@any = [01-89]+[x]?
20+
21+
:-
22+
23+
$white+ ;
24+
@iec60559suffix { \ _ -> Good }
25+
@any { \ _ -> Bad }
26+
27+
{
28+
data Token = Good String | Bad String
29+
deriving (Eq, Show)
30+
31+
input = "32 32x 99 99x 128x"
32+
expected_result = [Good "32", Good "32x", Bad "99", Bad "99x", Good "128x"]
33+
34+
main :: IO ()
35+
main
36+
| result == expected_result = do
37+
exitWith ExitSuccess
38+
| otherwise = do
39+
print result
40+
exitFailure
41+
where
42+
result = alexScanTokens input
43+
}

0 commit comments

Comments
 (0)