diff --git a/src/Parser.y b/src/Parser.y index bf627fa8..7ba718ad 100644 --- a/src/Parser.y +++ b/src/Parser.y @@ -53,8 +53,8 @@ import Data.Char BIND { T _ (BindT $$) } ID { T _ (IdT $$) } CODE { T _ (CodeT _) } + DIGIT { T _ (DigitT $$) } CHAR { T _ (CharT $$) } - NUM { T _ (NumT $$) } SMAC { T _ (SMacT _) } RMAC { T _ (RMacT $$) } SMAC_DEF { T _ (SMacDefT $$) } @@ -174,15 +174,9 @@ rep :: { RExp -> RExp } : '*' { Star } | '+' { Plus } | '?' { Ques } - -- Single digits are CHAR, not NUM. - -- TODO: these don't check for digits - -- properly. - | '{' CHAR '}' { repeat_rng (digit $2) Nothing } - | '{' CHAR ',' '}' { repeat_rng (digit $2) (Just Nothing) } - | '{' CHAR ',' CHAR '}' { repeat_rng (digit $2) (Just (Just (digit $4))) } - | '{' NUM '}' { repeat_rng $2 Nothing } - | '{' NUM ',' '}' { repeat_rng $2 (Just Nothing) } - | '{' NUM ',' NUM '}' { repeat_rng $2 (Just (Just $4)) } + | '{' natnum '}' { repeat_rng $2 Nothing } + | '{' natnum ',' '}' { repeat_rng $2 (Just Nothing) } + | '{' natnum ',' natnum '}' { repeat_rng $2 (Just (Just $4)) } rexp0 :: { RExp } : '(' ')' { Eps } @@ -197,8 +191,8 @@ set :: { CharSet } | set0 { $1 } set0 :: { CharSet } - : CHAR { charSetSingleton $1 } - | CHAR '-' CHAR { charSetRange $1 $3 } + : char { charSetSingleton $1 } + | char '-' char { charSetRange $1 $3 } | smac {% lookupSMac $1 } | '[' sets ']' { foldr charSetUnion emptyCharSet $2 } @@ -222,6 +216,17 @@ smac :: { (AlexPosn,String) } : '.' { (tokPosn $1, ".") } | SMAC { case $1 of T p (SMacT s) -> (p, s) } +char :: { Char } + : DIGIT { $1 } + | CHAR { $1 } + +natnum :: { Int } + : digit { $1 } + | natnum digit { $2 * 10 + $1 } + +digit :: { Int } + : DIGIT { digitToInt $1 } + { happyError :: P a happyError = failP "parse error" @@ -229,8 +234,6 @@ happyError = failP "parse error" -- ----------------------------------------------------------------------------- -- Utils -digit c = ord c - ord '0' - repeat_rng :: Int -> Maybe (Maybe Int) -> (RExp->RExp) repeat_rng n (Nothing) re = foldr (:%%) Eps (replicate n re) repeat_rng n (Just Nothing) re = foldr (:%%) (Star re) (replicate n re) diff --git a/src/Scan.x b/src/Scan.x index 76be8a4f..358d7f17 100644 --- a/src/Scan.x +++ b/src/Scan.x @@ -56,8 +56,8 @@ alex :- <0> \\ x $hexdig+ { hexch } <0> \\ o $octal+ { octch } <0> \\ $printable { escape } +<0> $digit { digit } -- should be before char <0> $nonspecial # [\<] { char } -- includes 1 digit numbers -<0> $digit+ { num } -- should be after char <0> @smac { smac } <0> @rmac { rmac } @@ -92,12 +92,12 @@ data Tkn | IdT String | StringT String | BindT String + | DigitT Char | CharT Char | SMacT String | RMacT String | SMacDefT String | RMacDefT String - | NumT Int | WrapperT | EncodingT | ActionTypeT @@ -121,7 +121,7 @@ decch (p,_,str) ln = return $ T p (CharT (do_ech 10 ln (take (ln-1) (tail st hexch (p,_,str) ln = return $ T p (CharT (do_ech 16 ln (take (ln-2) (drop 2 str)))) octch (p,_,str) ln = return $ T p (CharT (do_ech 8 ln (take (ln-2) (drop 2 str)))) char (p,_,str) _ = return $ T p (CharT (head str)) -num (p,_,str) ln = return $ T p $ NumT $ parseInt 10 $ take ln str +digit (p,_,str) _ = return $ T p (DigitT (head str)) smac (p,_,str) ln = return $ T p (SMacT (mac ln str)) rmac (p,_,str) ln = return $ T p (RMacT (mac ln str)) smacdef (p,_,str) ln = return $ T p (SMacDefT (macdef ln str)) diff --git a/tests/Makefile b/tests/Makefile index 4f9b7371..b0a90c3d 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -55,6 +55,7 @@ TESTS = \ issue_71.x \ issue_119.x \ issue_141.x \ + issue_197.x \ monad_typeclass.x \ monad_typeclass_bytestring.x \ monadUserState_typeclass.x \ diff --git a/tests/issue_197.x b/tests/issue_197.x new file mode 100644 index 00000000..64cbb617 --- /dev/null +++ b/tests/issue_197.x @@ -0,0 +1,43 @@ +{ +-- Issue #197 +-- reported 2022-01-21 by https://github.com/Commelina +-- fixed 2022-01-23 by Andreas Abel & John Ericson +-- +-- Problem was: +-- Surface syntax regressed and could no longer handle character strings +-- that looked like numbers. + +module Main (main) where + +import System.Exit +} + +%wrapper "posn" +%token "Token" + +@iec60559suffix = (32|64|128)[x]? +@any = [0-9]+[x]? + +:- + +$white+ ; +@iec60559suffix { \ _ -> Good } +@any { \ _ -> Bad } + +{ +data Token = Good String | Bad String + deriving (Eq, Show) + +input = "32 32x 99 99x 128x" +expected_result = [Good "32", Good "32x", Bad "99", Bad "99x", Good "128x"] + +main :: IO () +main + | result == expected_result = do + exitWith ExitSuccess + | otherwise = do + print result + exitFailure + where + result = alexScanTokens input +}