From e50c825029acc46a2c8b4288d6570d5922c08185 Mon Sep 17 00:00:00 2001 From: knothed Date: Fri, 2 Oct 2020 14:11:31 +0200 Subject: [PATCH 1/9] Implement continuation-based RAD code generation This commit implements code generation for strongly-typed, continuation-based, directly-executable parsers, as described by Hinze and Paterson 2005. In addition, generated code can either be in LALR or RAD (Recursive Ascent-Descent, Horspool 1991) form. Recursive Ascent-Descent significantly reduces the number of states while maintaining the power of LR/LALR parsers. --- cabal.project.local | 0 happy.cabal | 9 +- src/Follow.hs | 73 +++++ src/Grammar.lhs | 5 +- src/LALR.lhs | 12 +- src/Main.lhs | 107 ++++++- src/RADCodeGen.hs | 533 ++++++++++++++++++++++++++++++++++ src/RADCodeGen_LALR.hs | 362 +++++++++++++++++++++++ src/RADStateGen.hs | 634 +++++++++++++++++++++++++++++++++++++++++ src/RADTools.hs | 230 +++++++++++++++ 10 files changed, 1952 insertions(+), 13 deletions(-) create mode 100644 cabal.project.local create mode 100644 src/Follow.hs create mode 100644 src/RADCodeGen.hs create mode 100644 src/RADCodeGen_LALR.hs create mode 100644 src/RADStateGen.hs create mode 100644 src/RADTools.hs diff --git a/cabal.project.local b/cabal.project.local new file mode 100644 index 00000000..e69de29b diff --git a/happy.cabal b/happy.cabal index d1e42fb4..70a77fef 100644 --- a/happy.cabal +++ b/happy.cabal @@ -161,12 +161,14 @@ executable happy build-depends: base < 5, array, containers >= 0.4.2, + dom-lt >= 0.2.2, + text, mtl >= 2.2.1 -- mtl-2.2.1 added Control.Monad.Except default-language: Haskell98 default-extensions: CPP, MagicHash, FlexibleContexts - ghc-options: -Wall + ghc-options: -Wall -Wno-name-shadowing -Wno-missing-signatures -Wno-type-defaults -Wno-incomplete-patterns -Wno-unused-local-binds -Wno-unused-local-binds -Wno-unused-matches other-modules: Paths_happy AbsSyn @@ -186,6 +188,11 @@ executable happy AttrGrammarParser ParamRules PrettyGrammar + RADCodeGen + RADCodeGen_LALR + RADStateGen + RADTools + Follow test-suite tests type: exitcode-stdio-1.0 diff --git a/src/Follow.hs b/src/Follow.hs new file mode 100644 index 00000000..8d009c6b --- /dev/null +++ b/src/Follow.hs @@ -0,0 +1,73 @@ +module Follow where + import Grammar + import RADTools + import Control.Monad + import Control.Monad.ST + import Data.Array.ST + import GHC.Arr + import Data.List (findIndices, tails) + import NameSet (NameSet, empty, fromList, union, unions, delete, member, singleton) + + -- Calculate the follow sets for all nonterminals in the grammar. + followArray :: Grammar -> ([Name] -> NameSet) -> Array Name NameSet + followArray g first = runST $ do + let bounds = liftM2 (,) head last (non_terminals g) + arr <- newArray bounds empty + startSymbols arr + updateRepeatedly arr first + elems <- getElems arr + return (listArray bounds elems) + where + startSymbols :: (STArray s Int NameSet) -> ST s () + startSymbols arr = do + mapM_ (setEOF arr) (starts g) + setEOF :: (STArray s Int NameSet) -> (a, Int, b, c) -> ST s () + setEOF arr (_, nt, _, _) = writeArray arr nt (singleton (eof_term g)) + + updateRepeatedly :: (STArray s Int NameSet) -> ([Name] -> NameSet) -> ST s () + updateRepeatedly arr first = do + old <- getElems arr + updateStep arr first + new <- getElems arr + if old == new then return () else updateRepeatedly arr first + + updateStep :: (STArray s Int NameSet) -> ([Name] -> NameSet) -> ST s () + updateStep arr first = mapM_ (updateRule arr first) (productions g) + + updateRule :: (STArray s Int NameSet) -> ([Name] -> NameSet) -> Production -> ST s () + updateRule arr first (Production lhs rhs _ _) = mapM_ (updateNT arr lhs first) (tails rhs) + + updateNT :: (STArray s Int NameSet) -> Name -> ([Name] -> NameSet) -> [Name] -> ST s () + updateNT _ _ _ [] = return () + updateNT arr lhs first (tok:rhsRest) + | not (elem tok (non_terminals g)) = return () + | otherwise = do + let first' = first rhsRest + let first'' = delete epsilonTok first' + follow_lhs <- readArray arr lhs + let new_follow = union first'' (if member epsilonTok first' then follow_lhs else empty) + old_follow <- readArray arr tok + writeArray arr tok (union old_follow new_follow) + + -- The lc-follow set of a single nonterminal, given a full "follow" array. + -- We only use rules where NT appears after the recognition point. If this is the case, enter into FOLLOW (not into LCFOLLOW), + -- i.e. recursive rules are processed as normal, irrespective of their recognition points. + lcfollow :: XGrammar -> ([Name] -> NameSet) -> (Array Name NameSet) -> Name -> NameSet + lcfollow x@(XGrammar { g = g }) first follow nt + | member nt startSymbols = union (singleton (eof_term g)) rest + | otherwise = rest + where + startSymbols = fromList $ map (\(_, a, _, _) -> a) (starts g) + + rest = unions $ map (uncurry process) rules + rules = filter (rhsContains nt) (zip [0..] (productions g)) + rhsContains nt (_, (Production _ rhs _ _)) = elem nt rhs + + process :: Int -> Production -> NameSet + process ruleIndex (Production lhs rhs _ _) = unions $ map process' $ (reverse (findIndices (== nt) rhs)) where + process' i + | i < ((recognitionPoints x) !! ruleIndex) = empty + | member epsilonTok first_b = union (delete epsilonTok first_b) (follow ! lhs) + | otherwise = first_b + where + first_b = first (drop (i+1) rhs) \ No newline at end of file diff --git a/src/Grammar.lhs b/src/Grammar.lhs index d3ce625e..c23f549b 100644 --- a/src/Grammar.lhs +++ b/src/Grammar.lhs @@ -36,13 +36,16 @@ Here is our mid-section datatype > data Production > = Production Name [Name] (String,[Int]) Priority +> deriving (Eq #ifdef DEBUG -> deriving Show +> , Show #endif +> ) + > data Grammar > = Grammar { > productions :: [Production], diff --git a/src/LALR.lhs b/src/LALR.lhs index 1dc274cf..6df81617 100644 --- a/src/LALR.lhs +++ b/src/LALR.lhs @@ -8,7 +8,7 @@ Generation of LALR parsing tables. > module LALR > (genActionTable, genGotoTable, genLR0items, precalcClosure0, > propLookaheads, calcLookaheads, mergeLookaheadInfo, countConflicts, -> Lr0Item(..), Lr1Item) +> Lr0Item(..), Lr1Item(..)) > where > import GenUtils @@ -35,15 +35,7 @@ Generation of LALR parsing tables. This means rule $a$, with dot at $b$ (all starting at 0) > data Lr0Item = Lr0 {-#UNPACK#-}!Int {-#UNPACK#-}!Int -- (rule, dot) -> deriving (Eq,Ord - -#ifdef DEBUG - -> ,Show - -#endif - -> ) +> deriving (Eq,Ord,Show) > data Lr1Item = Lr1 {-#UNPACK#-}!Int {-#UNPACK#-}!Int NameSet -- (rule, dot, lookahead) diff --git a/src/Main.lhs b/src/Main.lhs index 20c5eb62..46f4cd74 100644 --- a/src/Main.lhs +++ b/src/Main.lhs @@ -21,6 +21,9 @@ Path settings auto-generated by Cabal: > import ProduceCode (produceParser) > import ProduceGLRCode > import Info (genInfoFile) +> import qualified RADCodeGen as RAD +> import qualified RADCodeGen_LALR as LALR +> import RADStateGen > import Target (Target(..)) > import System.Console.GetOpt > import Control.Monad ( liftM ) @@ -30,6 +33,7 @@ Path settings auto-generated by Cabal: > import System.IO > import Data.Array( assocs, elems, (!) ) > import Data.List( nub, isSuffixOf ) +> import Data.Maybe (fromMaybe) > import Data.Version ( showVersion ) #if defined(mingw32_HOST_OS) > import Foreign.Marshal.Array @@ -202,6 +206,81 @@ Add any special options or imports required by the parsing machinery. > ) > in +------------------------------------- +Branch off to continuation-based LALR parser production: + +> getForallMatch cli >>= \forallMatch -> +> let showTypes = OptCB_ShowTypes `elem` cli || rank2Types +> showComments = OptCB_ShowComments `elem` cli +> rank2Types = maybe False (return True) forallMatch +> match = fromMaybe "" forallMatch +> rulesTupleBased = OptCB_RAD_TupleBased `elem` cli +> in + +> if OptCB_LALR `elem` cli +> then +> let (isMonad, _, parserType, _, _) = monad g +> +> ptype = case (Grammar.lexer g, isMonad) of +> (Nothing, False) -> LALR.Normal +> (Nothing, True) -> LALR.Monad +> (Just _, False) -> error "%lexer without %monad not supported in RAD" +> (Just _, True) -> LALR.MonadLexer +> +> options = LALR.GenOptions { +> LALR.ptype = ptype, +> LALR.wrapperType = if parserType == "Parser" then "HappyP" else "Parser", +> LALR.errorTokenType = "ErrorToken", +> LALR.header = fromMaybe "" hd, +> LALR.footer = fromMaybe "" tl, +> LALR.showTypes = showTypes, +> LALR.comments = showComments, +> LALR.rank2Types = rank2Types, +> LALR.forallMatch = match +> } +> +> lalrStates = generateLALRStates g action goto items2 +> in +> LALR.genCode options g lalrStates action goto >>= +> (if outfilename == "-" then putStr else writeFile outfilename) +> +> else + +Branch off to continuation-based RAD parser production: + +> if OptCB_RAD `elem` cli || OptCB_RAD_TupleBased `elem` cli +> then +> +> let (isMonad, _, parserType, _, _) = monad g +> +> ptype = case (Grammar.lexer g, isMonad) of +> (Nothing, False) -> RAD.Normal +> (Nothing, True) -> RAD.Monad +> (Just _, False) -> error "%lexer without %monad not supported in RAD" +> (Just _, True) -> RAD.MonadLexer +> +> options = RAD.GenOptions { +> RAD.ptype = ptype, +> RAD.wrapperType = if parserType == "Parser" then "HappyP" else "Parser", +> RAD.errorTokenType = "ErrorToken", +> RAD.header = fromMaybe "" hd, +> RAD.footer = fromMaybe "" tl, +> RAD.showTypes = showTypes, +> RAD.comments = showComments, +> RAD.rank2Types = rank2Types, +> RAD.rulesTupleBased = rulesTupleBased, +> RAD.forallMatch = match +> } +> +> lalrStates = generateLALRStates g action goto items2 +> in +> createXGrammar g lalrStates >>= \x -> +> generateRADStates x lalrStates unused_rules >>= \radStates -> +> RAD.genCode options x radStates action goto >>= +> (if outfilename == "-" then putStr else writeFile outfilename) +> +> else + %--------------------------------------- Branch off to GLR parser production @@ -403,6 +482,13 @@ The command line arguments. > | OptGLR > | OptGLR_Decode > | OptGLR_Filter +> +> | OptCB_RAD +> | OptCB_RAD_TupleBased +> | OptCB_LALR +> | OptCB_ShowTypes +> | OptCB_ShowComments +> | OptCB_ForallMatch (Maybe String) > deriving Eq > argInfo :: [OptDescr CLIFlags] @@ -436,7 +522,19 @@ The command line arguments. > Option ['?'] ["help"] (NoArg DumpHelp) > "display this help and exit", > Option ['V','v'] ["version"] (NoArg DumpVersion) -- ToDo: -v is deprecated -> "output version information and exit" +> "output version information and exit", +> Option [] ["cb-rad"] (NoArg OptCB_RAD) +> "create a continuation-based Recursive Ascent-Descent parser. Not compatible with most other options", +> Option [] ["cb-rad-tuple"] (NoArg OptCB_RAD_TupleBased) +> "same as cb-rad, but uses tuples instead of continuations inside rule functions", +> Option [] ["cb-lalr"] (NoArg OptCB_LALR) +> "create a continuation-based LALR parser. Not compatible with most other options", +> Option [] ["types"] (NoArg OptCB_ShowTypes) +> "show function types for continuation-based parsers", +> Option [] ["comments"] (NoArg OptCB_ShowComments) +> "show comments for continuation-based parsers", +> Option [] ["forall"] (OptArg OptCB_ForallMatch "MATCH") +> "a string which is used to detect and handle higher-rank function types" #ifdef DEBUG @@ -572,6 +670,13 @@ Extract various command-line options. > [] -> return Nothing > f:fs -> return (Just (map toLower (last (f:fs)))) +> getForallMatch :: [CLIFlags] -> IO (Maybe String) +> getForallMatch cli +> = case [ s | (OptCB_ForallMatch s) <- cli ] of +> [] -> return Nothing +> [f] -> return f +> _many -> dieHappy "multiple --forall options\n" + > getCoerce :: Target -> [CLIFlags] -> IO Bool > getCoerce _target cli > = if OptUseCoercions `elem` cli diff --git a/src/RADCodeGen.hs b/src/RADCodeGen.hs new file mode 100644 index 00000000..6e881d12 --- /dev/null +++ b/src/RADCodeGen.hs @@ -0,0 +1,533 @@ +module RADCodeGen where + import Grammar + import LALR + import GenUtils (mapDollarDollar) + import RADTools (XGrammar(..), showItem, showProd, lhs, showRecognitionPoint, recognitionPoints, rhsAfterDot) + import RADStateGen + import Control.Monad + import Data.List + import Data.Maybe + import Data.Text (pack, unpack, replace) + import GHC.Arr ((!), indices) + + + data ParserType = Normal | Monad | MonadLexer deriving (Eq, Show) + + data GenOptions = GenOptions { + ptype :: ParserType, + + wrapperType :: String, -- e.g. "Parser" + errorTokenType :: String, -- e.g. "ErrorToken" + + showTypes :: Bool, + comments :: Bool, + + rank2Types :: Bool, -- when True, all functions (including goto functions) which use or enclose a higher-rank-function are annotated with an explicit type. + forallMatch :: String, -- the text which determines which types count as rank-2-types. + + rulesTupleBased :: Bool, + + header :: String, + footer :: String + } deriving Show + + mlex opts = ptype opts == MonadLexer + + raw = flip (.) _raw + + dotleft (Lr0 rule dot) = Lr0 rule (dot-1) + + hasNT state = elem (raw radType state) [Type1, Type2] + + replaceDollar a char = maybe a ($ char) (mapDollarDollar a) + + -------------------- GENCODE -------------------- + -- Generate the full code + genCode :: GenOptions -> XGrammar -> [RADState] -> ActionTable -> GotoTable -> IO String + genCode opts x states action goto = do + return $ newlines 3 [languageFeatures, header', entryPoints', definitions', rules', parseNTs', parseTerminals', states', actions', footer'] where + languageFeatures + | rank2Types opts = newline $ map extension ["RankNTypes", "ScopedTypeVariables"] + | otherwise = "" where + extension str = "{-# LANGUAGE " ++ str ++ " #-}" + + g = (RADTools.g x) + header' = header opts + entryPoints' = newlines 2 $ map (entryPoint opts g states) (starts g) + definitions' = definitions opts g + + rules' = newlines 2 $ map (genRule opts x) [0..prods] + parseNTs' = newlines 2 $ catMaybes $ map (genParseNT opts g states) (non_terminals g) + parseTerminals' = newlines 2 $ map (genParseTerminal opts g) (delete errorTok (terminals g)) + + states' = newlines 2 $ map (genState opts x) states + + actions' = newlines 2 $ map (genAction opts g) [0..prods] + prods = length (productions g) - 1 + footer' = footer opts + + + -------------------- ENTRYPOINT -------------------- + entryPoint :: GenOptions -> Grammar -> [RADState] -> (String, Name, Name, Bool) -> String + entryPoint opts g states (name, lhs, rhs, isPartial) = newline [typedecl, definition] where + typedecl + | showTypes opts = fromMaybe "" $ fmap (((name ++ " :: ") ++) . correctP) (symboltype opts g rhs) + | otherwise = "" + + correctP = if mlex opts then p else parser + + definition = case ptype opts of + Normal -> common ++ paren (checkEof ++ "const") + Monad -> common ++ paren (checkEof ++ "const . " ++ returnP) + MonadLexer -> common ++ paren (checkEof ++ "const . " ++ returnP) ++ " []" + + checkEof + | isPartial = "" + | otherwise = "parse" ++ show (eof_term g) ++ " . " + + common = name ++ " = rule" ++ show prod ++ " " + + -- Rule LHS -> RHS + prod = fromJust $ find matches [0 .. length (productions g) - 1] where + matches i = matches' (lookupProdNo g i) + matches' (Production lhs' rhs' _ _) = lhs' == lhs && rhs' == [rhs] + + p a = p' ++ " " ++ a + parser a = wrapperType opts ++ " " ++ a + (_, _, p', _, returnP) = monad g + + + -------------------- DEFINITIONS -------------------- + -- Generate definitions such as wrappers, the parser type or more required functions and types + definitions :: GenOptions -> Grammar -> String + definitions opts g = case ptype opts of + Normal -> newlines 2 [parserDecl, errorToken] + Monad -> newlines 2 [parserDecl, errorToken, wrapThen] + MonadLexer -> newlines 2 [parserDecl, errorToken, wrapThen, repeatTok, wrapLexer, wrapError] + where + + -- type Parser r = [Token] -> P r + parserDecl + | ptype opts == Normal = "type " ++ parser "r" ++ " = [" ++ tokenT ++ "] -> r" + | otherwise = "type " ++ parser "r" ++ " = [" ++ tokenT ++ "] -> " ++ p "r" + + -- data ErrorToken = ErrorToken + errorToken = "data " ++ errorTokenT ++ " = " ++ errorTokenT + + -- thenWrapP :: P a -> (a -> Parser b) -> Parser b + -- thenWrapP a f ts = (thenP) a (flip f ts) + wrapThen = newline [typedecl, definition] where + name = "thenWrapP" + typedecl = name ++ " :: " ++ p "a" ++ " -> (a -> " ++ parser "b" ++ ") -> " ++ parser "b" + definition = name ++ " a f ts = " ++ paren thenP ++ " a (flip f ts)" + + -- repeatTok :: Token -> Parser a -> Parser a + -- repeatTok tok p = \cur -> p (tok:cur) + repeatTok = newline [typedecl, definition] where + name = "repeatTok" + typedecl = name ++ " :: " ++ tokenT ++ " -> " ++ parser "a" ++ " -> " ++ parser "a" + definition = name ++ " tok p = \\cur -> p (tok:cur)" + + -- lexerWrapper :: (Token -> Parser a) -> Parser a + -- lexerWrapper cont [] = lexer (\tok -> cont tok []) + -- lexerWrapper cont (tok:toks) = cont tok toks + wrapLexer = newline [typedecl, line1, line2] where + name = "lexerWrapper" + typedecl = name ++ " :: " ++ paren (tokenT ++ " -> " ++ parser "a") ++ " -> " ++ parser "a" + line1 = name ++ " cont [] = " ++ lexer' ++ " (\\t -> cont t [])" + line2 = name ++ " cont (t:ts) = cont t ts" + + -- happyErrorWrapper :: Token -> Parser a + -- happyErrorWrapper t _ = happyError t + wrapError = newline [typedecl, definition] where + name = "happyErrorWrapper" + typedecl = name ++ " :: " ++ tokenT ++ " -> " ++ parser "a" + definition = name ++ " = const . " ++ happyError + + p a = p' ++ " " ++ a + parser a = wrapperType opts ++ " " ++ a + (_, _, p', thenP, _) = monad g + tokenT = token_type g + errorTokenT = errorTokenType opts + (Just (lexer', _)) = lexer g + happyError = fromMaybe "happyError" (error_handler g) + + + -------------------- GENSTATE ------------------- + -- Generate the code for a single state. + genState :: GenOptions -> XGrammar -> RADState -> String + genState opts x@XGrammar { RADTools.g = g } state + | isTrivialAccept = newline [comment, trivialTypedecl, trivialAcceptHeader] + | isTrivialAnnounce = newline [comment, trivialTypedecl, trivialAnnounceHeader] + | otherwise = newline [comment, typedecl, header, shifts'', announces'', accepts'', defaultAction'', gotos''] where + + hasNoActions = (null $ shifts' state) && (null $ accepts' state) && (null $ announces' state) && length (artCore state) == 1 + hasNoGotos = (null $ gotos' state) + isTrivialAccept = hasNoActions && hasNoGotos && (defaultAction' state == Accept') + isTrivialAnnounce = isAlwaysAnnounce && hasNoGotos + isAlwaysAnnounce = case defaultAction' state of + Announce' _ -> hasNoActions + _ -> False + + hasRank2Goto = any ((hasRank2Type opts g) . fst) (gotos' state) + hasRank2TypeSignature = any (hasRank2Item) (artCore state) + hasRank2Item item = any (hasRank2Type opts g) (rhsAfterDot g item) + + trivialTypedecl + | rank2Types opts && hasRank2Goto = fromMaybe "" (stateTypeSignature opts g True state) + | showTypes opts = fromMaybe "" (stateTypeSignature opts g False state) + | otherwise = "" + + trivialAcceptHeader = "state" ++ show (raw i state) ++ " = id" + + trivialAnnounceHeader = "state" ++ show (raw i state) ++ " = rule" ++ show rule where + Announce' rule = defaultAction' state + + comment + | comments opts = newlineMap "-- " (showItem g) (artCore state) + | otherwise = "" + + typedecl + | rank2Types opts && hasRank2Goto = fromMaybe "" (stateTypeSignature opts g True state) + | rank2Types opts && hasRank2TypeSignature = fromMaybe "" (stateTypeSignature opts g False state) + | showTypes opts = fromMaybe "" (stateTypeSignature opts g False state) + | otherwise = "" + + shifts'' = newlineMap " " shift (shifts' state) + announces'' = newlineMap " " announce (announces' state) + accepts'' = newlineMap " " accept (accepts' state) + gotos'' = where' ++ intercalate "\n" (map (" " ++) lines) where + lines = join (map goto (gotos' state)) + where' = if null (gotos' state) then "" else " where\n" + + header + | mlex opts = common ++ " = lexerWrapper $ \\t -> case t of" + | otherwise = common ++ " ts = case ts of" where + common = "state" ++ show (raw i state) ++ " " ++ headerKs + headerKs = unwords $ map k (artCore state) + + shift (token, (state, i)) + | mlex opts = paren tok ++ " -> state" ++ show state ++ " " ++ kcontent + | otherwise = "t@" ++ paren tok ++ ":tr -> state" ++ show state ++ " " ++ kcontent ++ " tr" where + i' = map dotleft i + tok = replaceDollar rawToken (if wantsProjection then "v" else "_") + kcontent = unwords (map (paren . (++ x) . k) i') where + x = if wantsProjection then " v" else " t" + rawToken = fromJust $ lookup token (token_specs g) + wantsProjection = "$$" == (rawToken \\ replaceDollar rawToken "") -- i.e. Tokens of form "TokenInt $$" + + announce (token, rule) + | mlex opts = paren tokMaybeEof ++ " -> repeatTok t $ rule" ++ show rule ++ " " ++ paren (k item) + | otherwise = if token == eof_term g then eofCase else normalCase + where + normalCase = paren tok ++ ":tr -> rule" ++ show rule ++ " " ++ paren (k item) ++ " ts" + eofCase = "[] -> rule" ++ show rule ++ " " ++ paren (k item) ++ " ts" + + tokMaybeEof = if token == eof_term g then eof else tok + Just (_, eof) = lexer g + + item = fromJust $ find matches (raw completion' state) where -- the item in the completion corresponding to (i.e. of the) rule which is announced. The dot must be at the recognition point. + matches (Lr0 rule' dot) = rule == rule' && (recognitionPoints x) !! rule == dot + tok = replaceDollar rawToken "_" + rawToken = fromJust $ lookup token (token_specs g) + + accept token +   | mlex opts = paren tokMaybeEof ++ " -> repeatTok t $ " ++ k' + | otherwise = if token == eof_term g then eofCase else normalCase + where + normalCase = "t@" ++ paren tok ++ ":tr -> " ++ k' ++ " ts" + eofCase = "[] -> " ++ k' ++ " ts" + + tokMaybeEof = if token == eof_term g then eof else tok + tok = removeDollar $ fromJust (lookup token (token_specs g)) + Just (_, eof) = lexer g + + removeDollar a = maybe a ($ "_") (mapDollarDollar a) + k' = k (head (artCore state)) + + goto (nt, (state, i)) + | hasRank2Type opts g nt = catMaybes [gototype, goto] + | otherwise = catMaybes [goto] + where + i' = map dotleft i + gototype = case symboltype opts g nt of + Just t -> Just $ "g" ++ show nt ++ " :: " ++ t ++ " -> " ++ paren outtype + Nothing -> Nothing + goto = Just $ "g" ++ show nt ++ " x = state" ++ show state ++ " " ++ unwords (map (paren . (++ " x") . k) i') + outtype = wrapperType opts ++ " r" + + defaultAction'' = " " ++ case defaultAction' state of + ErrorShift' state -> defaultErrorShift state + Announce' rule -> defaultAnnounce rule + Accept' -> defaultAccept + Error' -> defaultError + + defaultErrorShift toState + | mlex opts = "_ -> repeatTok t $ state" ++ show toState ++ " " ++ paren (k item ++ " ErrorToken") + | otherwise = "_ -> state" ++ show toState ++ " " ++ paren (k item ++ " ErrorToken") ++ " ts" where + item = head $ hdiv (raw completion' state) errorTok g + + defaultAnnounce rule + | mlex opts = "_ -> repeatTok t $ rule" ++ show rule ++ " " ++ paren (k item) + | otherwise = "_ -> rule" ++ show rule ++ " " ++ paren (k item) ++ " ts" where + item = fromJust $ find matches (raw completion' state) where -- the item in the completion corresponding to (i.e. of the) rule which is announced. The dot must be at the recognition point. + matches (Lr0 rule' dot) = rule == rule' && (recognitionPoints x) !! rule == dot + + defaultAccept + | mlex opts = "_ -> repeatTok t $ " ++ k' + | otherwise = "_ -> " ++ k' ++ " ts" where + k' = k (head (artCore state)) + + defaultError + | mlex opts = "_ -> happyErrorWrapper t" + | otherwise = "_ -> " ++ happyError ++ " ts" where + happyError = fromMaybe "happyError" (error_handler g) + + k item@(Lr0 rule dot) = maybe noCore core $ elemIndex item (artCore state) where + core idx + | (length (artCore state) == 1) = "k" + | otherwise = "k" ++ (show (idx + 1)) + noCore = "action" ++ show rule ++ " g" ++ show (lhs g item) + + -- Create the type signature for a state. + stateTypeSignature :: GenOptions -> Grammar -> Bool -> RADState -> Maybe String + stateTypeSignature opts g forall_r state = do + let start = "state" ++ show (raw i state) ++ " :: " ++ forall + components <- mapM component (artCore state) + return $ start ++ intercalate " -> " (map paren (components ++ [outtype])) + where + component item@(Lr0 rule dot) + | rule < 0 = if dot == 0 then component' [-rule] else component' [] -- artifical NT + | rule >= 0 = component' (rhsAfterDot g item) + component' rhs = fmap (intercalate " -> " . (++ [outtype])) (mapM (symboltype opts g) rhs) + outtype = wrapperType opts ++ " r" + forall = if forall_r then "forall r. " else "" + + + -------------------- GENACTION -------------------- + -- Create the code for a semantic action, i.e. a reduce action. + genAction :: GenOptions -> Grammar -> Int -> String + genAction opts g i = newline [comment, typedecl, code] where + prod@(Production lhs' rhs' _ _) = lookupProdNo g i + + comment + | comments opts = "-- " ++ showProd g i + | otherwise = "" + + typedecl + | showTypes opts || rank2Types opts = typedecl' -- some actions (not further specified) need to be explicitly typed in order for rank-n-types to work + | otherwise = "" + where + typedecl' = fromMaybe "" $ fmap (("action" ++ show i ++ " :: ") ++) (actionTypedecl opts g i) + + code = header ++ (if isMonadic then monadicCode else normalCode) + (customCode, isMonadic) = customProdCode prod + header = "action" ++ show i ++ " g " ++ unwords (map v [1..length rhs']) ++ " = " + normalCode = "g " ++ paren customCode + monadicCode = paren customCode ++ " `thenWrapP` g" + v n = "v" ++ show n + + -- Generate the type signature of a semantic action function. + actionTypedecl :: GenOptions -> Grammar -> Int -> Maybe String + actionTypedecl opts g i = do + lhstype <- symboltype opts g lhs' + let lhs = paren $ intercalate " -> " $ [lhstype, outtype] + rhstypes <- mapM (symboltype opts g) rhs' + let rhs = intercalate " -> " $ rhstypes ++ [outtype] + return (lhs ++ " -> " ++ rhs) + where + Production lhs' rhs' _ _ = lookupProdNo g i + outtype = wrapperType opts ++ " r" + + -- Read and translate the raw action code supplied by the user. Also return whether the action is monadic or not. + customProdCode :: Production -> (String, Bool) + customProdCode (Production _ _ (code, _) _) = case code of + '%':'%':_ -> error "'{%%' actions not supported" + '%':'^':_ -> error "'{%^' actions not supported" + '%':rest -> (adapt rest, True) + _ -> (adapt code, False) + where + adapt code + | code == "no code" = v 1 + | otherwise = replaceHappyVars code + v n = "v" ++ show n + replaceHappyVars = unpack . replace (pack "happy_var_") (pack "v") . pack + + + -------------------- PARSETERMINALS / PARSENTS ------------------- + -- Generate the code for parsing a single nonterminal. + genParseNT :: GenOptions -> Grammar -> [RADState] -> Int -> Maybe String + genParseNT opts g states token = do + state <- find (\s -> (raw radType s == Type1 && raw nt s == token)) states + let line = "parse" ++ show token ++ " = state" ++ show (raw i state) + return (newline [comment, line]) where + comment + | comments opts = "-- " ++ (token_names g) ! token + | otherwise = "" + + + -- Generate the code for parsing a single terminal. + genParseTerminal :: GenOptions -> Grammar -> Int -> String + genParseTerminal opts g token = newline [comment, typedecl, code] where + specialEof = ptype opts /= MonadLexer && token == eof_term g + + comment + | comments opts = "-- " ++ (token_names g) ! token + | otherwise = "" + + typedecl + | specialEof && (showTypes opts || rank2Types opts) = typedecl'' + | showTypes opts || rank2Types opts = typedecl' + | otherwise = "" + where + typedecl' = maybe "" (\token' -> "parse" ++ show token ++ " :: " ++ paren (token' ++ " -> " ++ parser) ++ " -> " ++ parser) token' + typedecl'' = "parse" ++ show token ++ " :: " ++ parser ++ " -> " ++ parser + token' = symboltype opts g token + parser = wrapperType opts ++ " r" + + code + | specialEof = newline $ [lineEof1, line2] + | mlex opts = newline $ [lineLex1, lineLex2, lineLex3] + | otherwise = newline $ [line1, line2] + where + lineEof1 = "parse" ++ show token ++ " k [] = k []" + line1 = "parse" ++ show token ++ " k (t@" ++ paren tok ++ ":tr) = k " ++ t ++ " tr" + line2 = "parse" ++ show token ++ " k ts = " ++ happyError ++ " ts" + happyError = fromMaybe "happyError" (error_handler g) + + rawToken = fromJust $ lookup token (token_specs g) + tok = replaceDollar rawToken (if wantsProjection then "v" else "_") + t = if wantsProjection then "v" else "t" + wantsProjection = "$$" == (rawToken \\ replaceDollar rawToken "") -- i.e. Tokens of form "TokenInt $$" + + lineLex1 = "parse" ++ show token ++ " k = lexerWrapper $ \\t -> case t of" + + lineLex2 + | token == eof_term g = " " ++ eof ++ " -> k" + | otherwise = " " ++ paren tok ++ " -> k " ++ t + where + Just (_, eof) = lexer g + + lineLex3 = " _ -> happyErrorWrapper t" + + + -------------------- GENRULE ------------------- + -- Generate the code for a rule. + genRule :: GenOptions -> XGrammar -> Int -> String + genRule opts x rule + | isTrivial = newline [comment, typedecl, trivialCode] + | otherwise = newline [comment, typedecl, code] + where + + recog = (recognitionPoints x) !! rule + rhsAfterDot' = rhsAfterDot (RADTools.g x) (Lr0 rule recog) + isTrivial = length rhsAfterDot' <= 1 + + comment + | comments opts = "-- " ++ showRecognitionPoint x rule + | otherwise = "" + + typedecl + | showTypes opts || rank2Types opts = typedecl' + | otherwise = "" + where + typedecl' = fromMaybe "" $ fmap (("rule" ++ show rule ++ " :: ") ++) (ruleTypedecl opts x (rank2Types opts) rule) + + code = case (rulesTupleBased opts, ptype opts) of + (True, Normal) -> tupleBasedCodeNormal + (True, Monad) -> error "TODO" + (True, MonadLexer) -> tupleBasedCodeLexer + (False, Normal) -> continuationBasedCodeNormal + (False, Monad) -> error "TODO" + (False, MonadLexer) -> continuationBasedCodeLexer + + -- There are 3 types how we code generate the code: + -- 1. trivial: 0 or 1 symbols are parsed + -- 2. continuation-based, with optional type annotations for the continuations + -- 3. tuple-based + + trivialCode = case rhsAfterDot' of + [x] -> "rule" ++ show rule ++ " = parse" ++ show x + [] -> "rule" ++ show rule ++ " = id" + + tupleBasedCodeNormal = newline $ firstLine:otherLines where + firstLine = "rule" ++ show rule ++ " k ts0 = " ++ fullk ++ " where" + fullk = "k " ++ (unwords $ map (\x -> "v" ++ show x) [1..length otherLines]) ++ " ts" ++ show (length otherLines) + otherLines = map (uncurry toLine) (zip rhsAfterDot' [1..]) + toLine tok i = " (v" ++ show i ++ ", ts" ++ show i ++ ") = parse" ++ show tok ++ " (,) ts" ++ show (i-1) + + tupleBasedCodeLexer = newline $ firstLine : otherLines ++ [finalLine] where + firstLine = "rule" ++ show rule ++ " k la0 = do" + fullk = "k " ++ (unwords $ map (\x -> "v" ++ show x) [1..length otherLines]) ++ " la" ++ show (length otherLines) + otherLines = map (uncurry toLine) (zip rhsAfterDot' [1..]) + toLine tok i = " (v" ++ show i ++ ", la" ++ show i ++ ") <- parse" ++ show tok ++ " (\\a b -> return (a, b)) la" ++ show (i-1) + finalLine = " " ++ fullk + + continuationBasedCodeNormal = continuationBasedCode "ts" + continuationBasedCodeLexer = continuationBasedCode "la" + continuationBasedCode ts + | rank2Types opts = newline $ firstLine : (blend lineTypes (otherLines ++ [finalLine])) + | otherwise = newline $ firstLine : (otherLines ++ [finalLine]) + where + firstLine = "rule" ++ show rule ++ " k " ++ ts ++ " = parse" ++ show (head rhsAfterDot') ++ " cont1 " ++ ts ++ " where" + otherLines = map (uncurry toLine) (zip (tail rhsAfterDot') [1..]) + toLine tok i = " cont" ++ show i ++ " " ++ vs i ++ " " ++ ts ++ " = parse" ++ show tok ++ " " ++ paren ("cont" ++ show (i+1) ++ " " ++ vs i) ++ " " ++ ts + vs i = unwords (map (\v -> "v" ++ show v) [1..i]) + finalLine = " cont" ++ show n ++ " " ++ vs n ++ " " ++ ts ++ " = k " ++ vs n ++ " " ++ ts where + n = length rhsAfterDot' + + lineTypes = map toType [1..n] + toType i = fromMaybe "" (toType' i) + toType' i = do + lhs <- mapM (symboltype opts (RADTools.g x)) (take i rhsAfterDot') + let lhsType = intercalate " -> " (lhs ++ [parser]) + return $ " cont" ++ show i ++ " :: " ++ lhsType where + parser = paren $ wrapperType opts ++ " r" + + blend (x:xs) ys = x:(blend ys xs) + blend _ _ = [] + + -- Generate the type signature of a rule function. + ruleTypedecl :: GenOptions -> XGrammar -> Bool -> Int -> Maybe String + ruleTypedecl opts x forall_r rule = do + let g = RADTools.g x + let recog = (recognitionPoints x) !! rule + let lhs' = rhsAfterDot g (Lr0 rule recog) + lhstypes <- mapM (symboltype opts g) lhs' + let lhs = forall ++ (paren $ intercalate " -> " $ lhstypes ++ [parser]) + return (lhs ++ " -> " ++ parser) + where + forall = if forall_r then "forall r. " else "" + parser = paren $ wrapperType opts ++ " r" + + + -------------------- TOOLS -------------------- + + -- Insert newlines between the strings; ignore empty strings + newlines :: Int -> [String] -> String + newlines n = intercalate (replicate n '\n') . filter (not . null) + + newline = newlines 1 + + newlineMap prefix f x = newlines 1 $ map ((prefix ++) . f) x + + paren a = "(" ++ a ++ ")" + + hasRank2Type opts g nt = rank2Types opts && case symboltype opts g nt of + Just t -> isInfixOf (forallMatch opts) t + Nothing -> False + + symboltype opts g symbol + | symbol == errorTok = Just (process $ errorTokenType opts) + | symbol == (eof_term g) = Nothing + | elem symbol (non_terminals g) = fmap process $ join (maybelookup (types g) symbol) + | wantsProjection = Nothing -- we don't know the type of the projection + | otherwise = Just (process $ token_type g) + where + process = remNewlines . paren where + remNewlines = map replace + replace '\n' = ' ' + replace x = x + maybelookup arr i = if elem i (indices arr) then Just (arr ! i) else Nothing + wantsProjection = "$$" == (rawToken \\ replaceDollar rawToken "") -- i.e. Tokens of form "TokenInt $$" + rawToken = fromJust $ lookup symbol (token_specs g) \ No newline at end of file diff --git a/src/RADCodeGen_LALR.hs b/src/RADCodeGen_LALR.hs new file mode 100644 index 00000000..61be9a20 --- /dev/null +++ b/src/RADCodeGen_LALR.hs @@ -0,0 +1,362 @@ +module RADCodeGen_LALR where + import Grammar + import LALR + import GenUtils + import RADTools (showItem, showProd, lhs, prod) + import RADStateGen + import Control.Monad + import Data.List + import Data.Maybe + import Data.Text (pack, unpack, replace) + import GHC.Arr ((!), indices) + + + data ParserType = Normal | Monad | MonadLexer deriving (Eq, Show) + + data GenOptions = GenOptions { + ptype :: ParserType, + + wrapperType :: String, -- e.g. "Parser" + errorTokenType :: String, -- e.g. "ErrorToken" + + showTypes :: Bool, + comments :: Bool, + + rank2Types :: Bool, -- when True, all functions (including goto functions) which use or enclose a higher-rank-function are annotated with an explicit type. + forallMatch :: String, -- the text which determines which types count as rank-2-types. + + header :: String, + footer :: String + } deriving Show + + mlex opts = ptype opts == MonadLexer + + dotleft (Lr0 rule dot) = Lr0 rule (dot-1) + + -------------------- GENCODE -------------------- + -- Generate the full code + genCode :: GenOptions -> Grammar -> [LALRState] -> ActionTable -> GotoTable -> IO String + genCode opts g states action goto = do + return $ newlines 3 [languageFeatures, header', entryPoints', definitions', states', actions', footer'] where + languageFeatures + | rank2Types opts = newline $ map extension ["RankNTypes", "ScopedTypeVariables"] + | otherwise = "" where + extension str = "{-# LANGUAGE " ++ str ++ " #-}" + + header' = header opts + entryPoints' = newlines 2 $ map (entryPoint opts g states) (starts g) + definitions' = definitions opts g + + states' = newlines 2 $ map (genState opts g) states + + actions' = newlines 2 $ map (genAction opts g) [1..prods] + prods = length (productions g) - 1 + footer' = footer opts + + + -------------------- ENTRYPOINT -------------------- + entryPoint :: GenOptions -> Grammar -> [LALRState] -> (String, Name, Name, Bool) -> String + entryPoint opts g states (name, lhs, rhs, isPartial) + | isPartial = newline [typedecl, definition] + | otherwise = newline [typedecl, definition, parseEof] + where + typedecl + | showTypes opts = fromMaybe "" $ fmap (((name ++ " :: ") ++) . correctP) (symboltype opts g rhs) + | otherwise = "" + + correctP = if mlex opts then p else parser + + definition = case ptype opts of + Normal -> common ++ paren (checkEof ++ "const") ++ maybeWhere + Monad -> common ++ paren (checkEof ++ "const . " ++ returnP) ++ maybeWhere + MonadLexer -> common ++ paren (checkEof ++ "const . " ++ returnP) ++ " []" ++ maybeWhere + + common = name ++ " = state" ++ show i ++ " " + + -- After finishing, eof must be parsed. This is because the accept-state may accept per default, which means eof still has to be verified. For partial parsers, this is not the case. + maybeWhere = if isPartial then "" else " where" + checkEof = if isPartial then "" else "parseEof . " + parseEof + | mlex opts = newline [lex1, lex2, lex3] + | otherwise = newline [normal1, normal2] + where + lex1 = " parseEof k = lexerWrapper $ \\t -> case t of" + lex2 = " " ++ paren eof ++ " -> k" + lex3 = " _ -> happyErrorWrapper t" + Just (_, eof) = lexer g + normal1 = " parseEof k [] = k []" + normal2 = " parseEof k ts = " ++ happyError ++ " ts" + happyError = fromMaybe "happyError" (error_handler g) + + -- Rule LHS -> RHS + prod = fromJust $ find matches [0 .. length (productions g) - 1] where + matches i = matches' (lookupProdNo g i) + matches' (Production lhs' rhs' _ _) = lhs' == lhs && rhs' == [rhs] + + -- State with item LHS -> . RHS + state = fromJust $ find (matches . coreItems) states where -- state with item LHS -> . RHS + matches items = elem (Lr0 prod 0) items + i = fromJust $ elemIndex state states + + + p a = p' ++ " " ++ a + parser a = wrapperType opts ++ " " ++ a + (_, _, p', _, returnP) = monad g + + + -------------------- DEFINITIONS -------------------- + -- Generate definitions such as wrappers, the parser type or more required functions and types + definitions :: GenOptions -> Grammar -> String + definitions opts g = case ptype opts of + Normal -> newlines 2 [parserDecl, errorToken] + Monad -> newlines 2 [parserDecl, errorToken, wrapThen] + MonadLexer -> newlines 2 [parserDecl, errorToken, wrapThen, repeatTok, wrapLexer, wrapError] + where + + -- type Parser r = [Token] -> P r + parserDecl + | ptype opts == Normal = "type " ++ parser "r" ++ " = [" ++ tokenT ++ "] -> r" + | otherwise = "type " ++ parser "r" ++ " = [" ++ tokenT ++ "] -> " ++ p "r" + + -- data ErrorToken = ErrorToken + errorToken = "data " ++ errorTokenT ++ " = " ++ errorTokenT + + -- thenWrapP :: P a -> (a -> Parser b) -> Parser b + -- thenWrapP a f ts = (thenP) a (flip f ts) + wrapThen = newline [typedecl, definition] where + name = "thenWrapP" + typedecl = name ++ " :: " ++ p "a" ++ " -> (a -> " ++ parser "b" ++ ") -> " ++ parser "b" + definition = name ++ " a f ts = " ++ paren thenP ++ " a (flip f ts)" + + -- repeatTok :: Token -> Parser a -> Parser a + -- repeatTok tok p = \cur -> p (tok:cur) + repeatTok = newline [typedecl, definition] where + name = "repeatTok" + typedecl = name ++ " :: " ++ tokenT ++ " -> " ++ parser "a" ++ " -> " ++ parser "a" + definition = name ++ " tok p = \\cur -> p (tok:cur)" + + -- lexerWrapper :: (Token -> Parser a) -> Parser a + -- lexerWrapper cont [] = lexer (\tok -> cont tok []) + -- lexerWrapper cont (tok:toks) = cont tok toks + wrapLexer = newline [typedecl, line1, line2] where + name = "lexerWrapper" + typedecl = name ++ " :: " ++ paren (tokenT ++ " -> " ++ parser "a") ++ " -> " ++ parser "a" + line1 = name ++ " cont [] = " ++ lexer' ++ " (\\t -> cont t [])" + line2 = name ++ " cont (t:ts) = cont t ts" + + -- happyErrorWrapper :: Token -> Parser a + -- happyErrorWrapper t _ = happyError t + wrapError = newline [typedecl, definition] where + name = "happyErrorWrapper" + typedecl = name ++ " :: " ++ tokenT ++ " -> " ++ parser "a" + definition = name ++ " = const . " ++ happyError + + p a = p' ++ " " ++ a + parser a = wrapperType opts ++ " " ++ a + (_, _, p', thenP, _) = monad g + tokenT = token_type g + errorTokenT = errorTokenType opts + (Just (lexer', _)) = lexer g + happyError = fromMaybe "happyError" (error_handler g) + + + -------------------- GENSTATE ------------------- + -- Generate the code for a single state. + genState :: GenOptions -> Grammar -> LALRState -> String + genState opts g state + | isTrivial = newline [comment, trivialTypedecl, trivialHeader] + | otherwise = newline [comment, typedecl, header, shifts', reduces', defaultAction', gotos'] where + + isTrivial = (length (coreItems state) == 1) && (null $ shifts state) && (null $ gotos state) && (null $ reduces state) && isReduce (defaultAction state) where + isReduce (Reduce _) = True + isReduce _ = False + + hasRank2Goto = any ((hasRank2Type opts g) . fst) (gotos state) + hasRank2TypeSignature = any (hasRank2Item) (coreItems state) + hasRank2Item item = any (hasRank2Type opts g) (rhsAfterDot g item) + + trivialTypedecl + | rank2Types opts && hasRank2Goto = fromMaybe "" (stateTypeSignature opts g True state) + | otherwise = "" + + trivialHeader = "state" ++ show (index state) ++ " = id" + + comment + | comments opts = newlineMap "-- " (showItem g) (coreItems state) + | otherwise = "" + + typedecl + | rank2Types opts && hasRank2Goto = fromMaybe "" (stateTypeSignature opts g True state) + | rank2Types opts && hasRank2TypeSignature = fromMaybe "" (stateTypeSignature opts g False state) + | showTypes opts = fromMaybe "" (stateTypeSignature opts g False state) + | otherwise = "" + + shifts' = newlineMap " " shift (shifts state) + reduces' = newlineMap " " reduce (reduces state) + gotos' = where' ++ intercalate "\n" (map (" " ++) lines) where + lines = join (map goto (gotos state)) + where' = if null (gotos state) then "" else " where\n" + + header + | mlex opts = common ++ " = lexerWrapper $ \\t -> case t of" + | otherwise = common ++ " ts = case ts of" where + common = "state" ++ show (index state) ++ " " ++ headerKs + headerKs = unwords $ map k (coreItems state) + + shift (token, (state, i)) + | mlex opts = paren tok ++ " -> state" ++ show state ++ " " ++ kcontent + | otherwise = "t@" ++ paren tok ++ ":tr -> state" ++ show state ++ " " ++ kcontent ++ " tr" where + i' = map dotleft i + tok = replaceDollar rawToken (if wantsProjection then "v" else "_") + replaceDollar a char = maybe a ($ char) (mapDollarDollar a) + kcontent = unwords (map (paren . (++ x) . k) i') where + x = if wantsProjection then " v" else " t" + rawToken = fromJust $ lookup token (token_specs g) + wantsProjection = "$$" == (rawToken \\ replaceDollar rawToken "") -- i.e. Tokens of form "TokenInt $$" + + reduce (token, rule) +   | mlex opts = paren tokMaybeEof ++ " -> repeatTok t $ " ++ k' + | otherwise = if token == eof_term g then eofCase else normalCase + where + normalCase = "t@" ++ paren tok ++ ":tr -> " ++ k' ++ " ts" + eofCase = "[] -> " ++ k' ++ " ts" + + tokMaybeEof = if token == eof_term g then eof else tok + tok = removeDollar $ fromJust (lookup token (token_specs g)) + Just (_, eof) = lexer g + + removeDollar a = maybe a ($ "_") (mapDollarDollar a) + k' = let dot = length (rhs (lookupProdNo g rule)) in k (Lr0 rule dot) + + goto (nt, (state, i)) + | hasRank2Type opts g nt = catMaybes [gototype, goto] + | otherwise = catMaybes [goto] + where + i' = map dotleft i + gototype = case symboltype opts g nt of + Just t -> Just $ "g" ++ show nt ++ " :: " ++ t ++ " -> " ++ paren outtype + Nothing -> Nothing + goto = Just $ "g" ++ show nt ++ " x = state" ++ show state ++ " " ++ unwords (map (paren . (++ " x") . k) i') + outtype = wrapperType opts ++ " r" + + defaultAction' = " " ++ case defaultAction state of + ErrorShift state -> defaultErrorShift state + Reduce rule -> defaultReduce rule + Error -> defaultError + + defaultErrorShift toState + | mlex opts = "_ -> repeatTok t $ state" ++ show toState ++ " " ++ paren (k item ++ " ErrorToken") + | otherwise = "_ -> state" ++ show toState ++ " " ++ paren (k item ++ " ErrorToken") ++ " ts" where + item = head $ hdiv (completionItems state) errorTok g + + defaultReduce rule + | mlex opts = "_ -> repeatTok t $ " ++ k' + | otherwise = "_ -> " ++ k' ++ " ts" where + k' = let dot = length (rhs (lookupProdNo g rule)) in k (Lr0 rule dot) + + defaultError + | mlex opts = "_ -> happyErrorWrapper t" + | otherwise = "_ -> " ++ happyError ++ " ts" where + happyError = fromMaybe "happyError" (error_handler g) + + k item@(Lr0 rule dot) = maybe noCore core $ elemIndex item (coreItems state) where + core idx + | (length (coreItems state) == 1) = "k" + | otherwise = "k" ++ (show (idx + 1)) + noCore = "action" ++ show rule ++ " g" ++ show (lhs g item) + + -- Create the type signature for a state. + stateTypeSignature :: GenOptions -> Grammar -> Bool -> LALRState -> Maybe String + stateTypeSignature opts g forall_r state = do + let start = "state" ++ show (index state) ++ " :: " ++ forall + components <- mapM component (coreItems state) + return $ start ++ intercalate " -> " (map paren (components ++ [outtype])) + where + component item = fmap (intercalate " -> " . (++ [outtype])) (mapM (symboltype opts g) (rhsAfterDot g item)) + outtype = wrapperType opts ++ " r" + forall = if forall_r then "forall r. " else "" + + + -------------------- GENACTION -------------------- + -- Create the code for a semantic action, i.e. a reduce action. + genAction :: GenOptions -> Grammar -> Int -> String + genAction opts g i = newline [comment, typedecl, code] where + prod@(Production lhs' rhs' _ _) = lookupProdNo g i + + comment + | comments opts = "-- " ++ showProd g i + | otherwise = "" + + typedecl + | showTypes opts || rank2Types opts = typedecl' -- some actions (not further specified) need to be explicitly typed in order for rank-n-types to work + | otherwise = "" + where + typedecl' = fromMaybe "" $ fmap (("action" ++ show i ++ " :: ") ++) (actionTypedecl opts g i) + + code = header ++ (if isMonadic then monadicCode else normalCode) + (customCode, isMonadic) = customProdCode prod + header = "action" ++ show i ++ " g " ++ unwords (map v [1..length rhs']) ++ " = " + normalCode = "g " ++ paren customCode + monadicCode = paren customCode ++ " `thenWrapP` g" + v n = "v" ++ show n + + -- Generate the type signature of a semantic action function. + actionTypedecl :: GenOptions -> Grammar -> Int -> Maybe String + actionTypedecl opts g i = do + lhstype <- symboltype opts g lhs' + let lhs = paren $ intercalate " -> " $ [lhstype, outtype] + rhstypes <- mapM (symboltype opts g) rhs' + let rhs = intercalate " -> " $ rhstypes ++ [outtype] + return (lhs ++ " -> " ++ rhs) + where + Production lhs' rhs' _ _ = lookupProdNo g i + outtype = wrapperType opts ++ " r" + + -- Read and translate the raw action code supplied by the user. Also return whether the action is monadic or not. + customProdCode :: Production -> (String, Bool) + customProdCode (Production _ _ (code, _) _) = case code of + '%':'%':_ -> error "'{%%' actions not supported" + '%':'^':_ -> error "'{%^' actions not supported" + '%':rest -> (adapt rest, True) + _ -> (adapt code, False) + where + adapt code + | code == "no code" = v 1 + | otherwise = replaceHappyVars code + v n = "v" ++ show n + replaceHappyVars = unpack . replace (pack "happy_var_") (pack "v") . pack + + + -------------------- TOOLS -------------------- + + -- Insert newlines between the strings; ignore empty strings + newlines :: Int -> [String] -> String + newlines n = intercalate (replicate n '\n') . filter (not . null) + + newline = newlines 1 + + newlineMap prefix f x = newlines 1 $ map ((prefix ++) . f) x + + paren a = "(" ++ a ++ ")" + + rhsAfterDot g item@(Lr0 rule dot) = drop dot $ rhs (prod g item) + rhs (Production _ rhs _ _) = rhs + + hasRank2Type opts g nt = rank2Types opts && case symboltype opts g nt of + Just t -> isInfixOf (forallMatch opts) t + Nothing -> False + + symboltype opts g symbol + | symbol == errorTok = Just (process $ errorTokenType opts) + | elem symbol (non_terminals g) = fmap process $ join (maybelookup (types g) symbol) + | wantsProjection = Nothing -- we don't know the type of the projection + | otherwise = Just (process $ token_type g) + where + process = remNewlines . paren where + remNewlines = map replace + replace '\n' = ' ' + replace x = x + maybelookup arr i = if elem i (indices arr) then Just (arr ! i) else Nothing + wantsProjection = "$$" == (rawToken \\ replaceDollar rawToken "") -- i.e. Tokens of form "TokenInt $$" + rawToken = fromJust $ lookup symbol (token_specs g) + replaceDollar a char = maybe a ($ char) (mapDollarDollar a) \ No newline at end of file diff --git a/src/RADStateGen.hs b/src/RADStateGen.hs new file mode 100644 index 00000000..d7ef6921 --- /dev/null +++ b/src/RADStateGen.hs @@ -0,0 +1,634 @@ +module RADStateGen (generateLALRStates, generateRADStates, createXGrammar, artCore, hdiv, plus, RADType(..), RADState(..), LALRState(..), RawRADState(..), LALRDefaultAction(..), RADDefaultAction(..)) where + import Grammar + import Data.Graph.Dom + import First + import Follow + import NameSet (NameSet) + import Data.Graph + import Data.Set (Set, toList, fromList, elemAt) + import qualified Data.IntSet + import LALR + import RADTools (CompletedLr0State, Lr1State, XGrammar(..), complete, showItem, showProd, lhs, core, completion, prod, hasTokenAfterDot, tokenAfterDot, rhsLength', isInDirectCompletion, dotIsAtRightEnd, plus, hdiv, radCompletion, itemsStartingWith, plusRad, completeWithFunction, directCompletion, rhsAfterDot) + import Control.Monad + import Data.List + import Data.Maybe + import Data.Ord + import Data.Function (on) + import GHC.Arr ((!), assocs, listArray, Array(..)) + + + -- Types which are used (both) for LALR and RAD states: + type AcceptAction = (Name) -- On terminal t --> accept + type AnnounceAction = (Name, Int) -- On terminal t --> announce using rule i + type ReduceAction = (Name, Int) -- On terminal t --> reduce using rule i + type ShiftAction = (Name, (Int, [Lr0Item])) -- On terminal t --> goto state S with items I (= A -> B t . C) + type GotoAction = (Name, (Int, [Lr0Item])) -- Nonterminal X <-> goto to state S with items I (= A -> B X . C) such that: + -- g_X v = state_{i+X} (k_i v) + + -------------------- RAD STATE -------------------- + + data RADType = Type1 -- States with the item _ -> |- . NT (here a top-down parse is started) + | Type2 -- States with the item _ -> |- NT . (here a top-down parse is accepted) + | Type3 -- Normal states with no artificial item + deriving (Show, Eq) + + data RawRADState = RawRADState { + i :: Int, -- The final index that the completed RADState state will also have + comingFrom :: Int, + radType :: RADType, + state :: LALRState, + nt :: Name, -- The NT which is used by Type1 or Type2 states. Attention: undefined for Type3 states + core' :: [Lr0Item], + completion' :: [Lr0Item] -- contains core + } deriving (Eq +#ifdef DEBUG + , Show +#endif + ) + + createType1State :: XGrammar -> Name -> LALRState -> Int -> Int -> RawRADState + createType1State x nt state i comingFrom = RawRADState { i = i, radType = Type1, state = state, nt = nt, core' = [], completion' = radCompletion x (itemsStartingWith (g x) nt), comingFrom = comingFrom } -- has artifical core item _ -> |- . NT + + createType2State :: XGrammar -> Name -> LALRState -> [Lr0Item] -> Int -> Int -> RawRADState + createType2State x nt state core i comingFrom = RawRADState { i = i, radType = Type2, state = state, nt = nt, core' = core, completion' = radCompletion x core, comingFrom = comingFrom } -- has artifical core item _ -> |- NT . + + createType3State :: XGrammar -> LALRState -> [Lr0Item] -> Int -> Int -> RawRADState + createType3State x state core i comingFrom = RawRADState { i = i, radType = Type3, state = state, nt = undefined, core' = core, completion' = radCompletion x core, comingFrom = comingFrom } + + data RADState = RADState { + announces' :: [AnnounceAction], + gotos' :: [GotoAction], + shifts' :: [ShiftAction], + accepts' :: [AcceptAction], -- ONLY type2 states can have (and always do have) accept actions! + defaultAction' :: RADDefaultAction, + _raw :: RawRADState + } deriving (Eq +#ifdef DEBUG + , Show +#endif + ) + + data RADDefaultAction = ErrorShift' Int -- On errorToken (i.e. default), shift to state X + | Announce' Int -- Announce rule X + | Accept' -- Accept the NT. Only in type2-states + | Error' -- Call happyError + deriving (Eq, Show) + + -- The core PLUS the possible artificial item. + -- Artificial items have negative rule numbers - they look like this: "Lr0 (-4) 0" for the item "|- -> . (NT4)". + artCore :: RADState -> [Lr0Item] + artCore state = case (radType . _raw $ state) of + Type1 -> (Lr0 (-nt') 0) : core + Type2 -> (Lr0 (-nt') 1) : core + Type3 -> core + where + core = core' (_raw state) + nt' = nt (_raw state) + + showRadState :: XGrammar -> RADState -> [Char] + showRadState x state = "Raw = " ++ showRaw x (_raw state) ++ "\nShifts = " ++ show (shifts' state) ++ "\nGotos = " ++ show (gotos' state) ++ "\nAnnounces = " ++ show (announces' state) ++ "\nAccepts = " ++ show (accepts' state) ++ "\nDefault = " ++ show (defaultAction' state) ++ "\n\n" + + showRaw :: XGrammar -> RawRADState -> [Char] + showRaw x raw = "RawRADState " ++ show (i raw) ++ ": " ++ show (radType raw) ++ " (orig state: " ++ show (index (state raw)) ++ " " ++ show (map (showItem (g x)) (coreItems (state raw))) ++ ")" ++ + (if radType raw /= Type3 then ". NT = " ++ show (nt raw) ++ " (" ++ ((token_names (g x)) ! (nt raw)) ++ ")" else "") ++ + (if radType raw /= Type1 then ". core = { " ++ intercalate "; " (map (showItem (g x)) (core' raw)) ++ " }" else "") ++ + ". completion = { " ++ intercalate "; " (map (showItem (g x)) (completion' raw)) ++ " }" ++ + " (Coming from state " ++ show (comingFrom raw) ++ ")" + + + -- Create the extended grammar containing information about the recognition points. + createXGrammar :: Grammar -> [LALRState] -> IO XGrammar + createXGrammar g lalrStates = do + -- Create state graphs; determine recognition points for each rule + let allGraphs = map (recognitionGraph g) lalrStates + let nonfree = nonfreeItems g allGraphs + let recognitionPoints = determineRecognitionPoints g nonfree + + let x = XGrammar { g = g, recognitionPoints = recognitionPoints } + +#ifdef DEBUG + debugPrint "State Graphs:" (showGraph g) allGraphs + --debugPrint "Non-Free Items:" (showItem g) nonfree + debugPrint "All Rules With Their Recognition Points:" (showRecognitionPoint x) [0 .. (length (productions g)) - 1] +#endif + + return x + + -- Generate all RAD states from happy's LALR states. + generateRADStates :: XGrammar -> [LALRState] -> [Int] -> IO [RADState] + generateRADStates x lalrStates unusedRules = do + let g = RADTools.g x + let first = mkFirst g + let follow = followArray g first + let radStates = lalrToRADStates x lalrStates unusedRules first follow + +#ifdef DEBUG + -- debugPrint "LALRStates:" (showState g) lalrStates + debugPrint "RADStates:" (showRadState x) radStates +#endif + + return radStates + + -- Helper function for printing. + + debugPrint :: String -> (a -> String) -> [a] -> IO () + debugPrint title showElem elems = putStrLn $ break ++ dash ++ "\n" ++ title ++ break ++ unlines (map showElem elems) ++ dash ++ break where + dash = replicate 40 '–' + break = "\n\n" + + -- Convert all LALR states to RAD states. + lalrToRADStates :: XGrammar -> [LALRState] -> [Int] -> ([Name] -> NameSet) -> Array Name NameSet -> [RADState] + lalrToRADStates x@(XGrammar { g = g, recognitionPoints = recognitionPoints }) lalrStates unusedRules first follow = gen' x [] rawType1States where + rawType1States = map (uncurry toType1) (zip unambiguousNTs [0..]) + + -- Unambiguous NTs are NTs that appear after the recognition point in some rule + unambiguousNTs = filter hasGoodRule (non_terminals g) + hasGoodRule = not . null . findGoodRule + findGoodRule nt = find (uncurry ntAppearsAfterRecogPoint) (zip [0..] (productions g)) where + ntAppearsAfterRecogPoint i (Production _ rhs _ _) = elem nt (drop (recognitionPoints !! i) rhs) && notElem i unusedRules + + -- Find a state with a (completion) item where the dot is immediately before the NT; create a type1-state + toType1 nt index = createType1State x nt state index (-1) where + (i, (Production _ rhs _ _)) = fromJust $ findGoodRule nt + posBeforeNT = (length rhs - 1) - (fromJust $ findIndex (nt ==) (reverse rhs)) + item = Lr0 i posBeforeNT + state = fromJust $ find (elem item . completionItems) lalrStates + + -- Complete the raw states to full RADStates, possibly yielding new raw states which will be recursively completed. + gen' :: XGrammar -> [RADState] -> [RawRADState] -> [RADState] + gen' _ states [] = states + gen' x states rs@(raw:raws) = gen' x (states ++ [fresh]) (raws ++ new) where + (fresh, new) = completeRaw x lalrStates raw existingRaws first follow (length states + length rs) + existingRaws = raws ++ map _raw states + + + -- Complete a raw state to a RADState, possibly yielding new raw states. + -- The "new" raw states which are created for shifting/goto can also be existing ones; therefore, the list of all already created raw states is passed around. + completeRaw :: XGrammar -> [LALRState] -> RawRADState -> [RawRADState] -> ([Name] -> NameSet) -> Array Name NameSet -> Int -> (RADState, [RawRADState]) + completeRaw x@(XGrammar { g = g }) allStates raw allRawStates first follow stateCount = (radState, newStates) where + radState = RADState { shifts' = shifts', accepts' = accepts'', announces' = announces'', gotos' = gotos', defaultAction' = default'', _raw = raw } + newStates = gotoStates ++ shiftStates ++ (maybe [] return newStateFromTransformedErrorShift) + gotos' = transformedGotos + shifts' = shiftShifts + + announces' = announcesFromReduces ++ shiftAnnounces ++ type1EpsilonAnnounces + announces'' = case default'' of -- If default action is announce, remove unnecessary entries + Announce' rule -> filter ((/=) rule . snd) $ filter ((/=) errorTok . fst) announces' + _ -> filter ((/=) errorTok . fst) announces' + + accepts' = shiftAccepts ++ type2Accepts + accepts'' -- If default action is accept, no need for an accept array + | default'' == Accept' = [] + | otherwise = delete errorTok (rmdups accepts') where rmdups = map head . group . sort + + -- If there is no transformed default action, we choose a suitable default action: + -- Accept for type2 states, or the largest announce action for other states. + -- If there is an accept or announce action on the errorToken, use this as the default action. + default'' + | elem errorTok accepts' = if default' == Error' then Accept' else error ("errorTok is in AcceptActions, but defaultAction is " ++ (show default')) -- check for accept conflict, shouldn't happen + | any ((==) errorTok . fst) announces' = case default' of + Error' -> Announce' defaultRule + Announce' rule -> if rule == defaultRule then Announce' defaultRule else + error $ "errorTok wants to announce rule " ++ show defaultRule ++ ", but defaultAction is " ++ (show default') + _ -> error $ "errorTok wants to announce rule " ++ show defaultRule ++ ", but defaultAction is " ++ (show default') + | default' /= Error' = default' -- Keep transformed default action + | radType raw == Type2 = Accept' -- Type2 states accept per default + | not (null announces') = Announce' largestAnnounce + | otherwise = Error' + where + defaultRule = snd $ fromJust $ find ((==) errorTok . fst) announces' + largestAnnounce = head . head $ sortBy (flip (comparing length)) (group (sort (map snd announces'))) + + -- Transform the LALR default action + default' = fromMaybe Error' (transformDefault (defaultAction (state raw))) + transformDefault Error = Just Error' + transformDefault (Reduce rule) = do + (_, rule') <- transformReduce (errorTok, rule) + return $ Announce' rule' + + transformDefault (ErrorShift _) = case (fromJust transformedErrorShift) of + (Just (_, (state, _)), _, _, _) -> Just (ErrorShift' state) + (_, _, Just (_, rule), _) -> Just (Announce' rule) + _ -> Nothing -- The default action could be irrelevant for the RAD state + + transformedErrorShift = case (defaultAction (state raw)) of + ErrorShift state -> Just $ transformShift stateNum (errorTok, (state, undefined)) where stateNum = stateCount + length (gotoStates ++ shiftStates) + _ -> Nothing + newStateFromTransformedErrorShift = maybe Nothing (\(_, s, _, _) -> s) transformedErrorShift + + -- Goto actions and new goto-states: + (transformedGotos, gotoStates) = (catMaybes transformedGotos', catMaybes gotoStates') + (transformedGotos', gotoStates') = unzip $ allGotos' stateCount (gotos (state raw)) [] where + allGotos' :: Int -> [GotoAction] -> [(Maybe GotoAction, Maybe RawRADState)] -> [(Maybe GotoAction, Maybe RawRADState)] + allGotos' _ [] result = result + allGotos' nextIndex (goto:gotos) result = case transformGoto nextIndex goto of + res@(_, Just _) -> allGotos' (nextIndex + 1) gotos (result ++ [res]) + res@(_, Nothing) -> allGotos' nextIndex gotos (result ++ [res]) + + -- Transform a normal goto-action into a RAD-goto action to a new type3-state. + -- This function creates both the goto action and the new state. + -- Return Nothing if the goto is not required for RAD. + -- Type-1-specific: The artificial item '_ -> |- . NT' yields a goto action to a new type2(!)-state. + transformGoto :: Int -> GotoAction -> (Maybe GotoAction, Maybe RawRADState) + transformGoto index (tok, (gotoState, _)) + | not isGotoFromType1ToType2 && null gotoItems = (Nothing, Nothing) + | otherwise = (Just newAction, newState) + where + isGotoFromType1ToType2 = radType raw == Type1 && tok == (nt raw) -- type1-specific. May have no goto items as an articifial item is created + gotoItems = plusRad (completion' raw) tok x + + newState + | isGotoFromType1ToType2 = Just $ createType2State x tok (allStates !! gotoState) gotoItems index (i raw) + | null existingState = Just $ createType3State x (allStates !! gotoState) gotoItems index (i raw) + | otherwise = Nothing + + newAction = (tok, (index, allItems)) where + allItems = if isGotoFromType1ToType2 then artificial:gotoItems else gotoItems -- gotoItems + artificial item + artificial = Lr0 (-(nt raw)) 1 + index = maybe (i $ fromJust newState) i existingState + + -- an existing (type3) state with the same core can be reused, if existing + existingState + | isGotoFromType1ToType2 = Nothing + | otherwise = find matchesState allRawStates + where + matchesState raw' = gotoItems == core' raw' && radType raw' == Type3 + + + -- Transform shift actions into shift, announce and accept actions: + (shiftShifts, shiftStates, shiftAnnounces, shiftAccepts) = (catMaybes shiftShifts', catMaybes shiftStates', catMaybes shiftAnnounces', catMaybes shiftAccepts') + (shiftShifts', shiftStates', shiftAnnounces', shiftAccepts') = unzip4 $ allShifts' (stateCount + length gotoStates) (shifts (state raw)) [] where + allShifts' :: Int -> [ShiftAction] -> [(Maybe ShiftAction, Maybe RawRADState, Maybe AnnounceAction, Maybe AcceptAction)] -> [(Maybe ShiftAction, Maybe RawRADState, Maybe AnnounceAction, Maybe AcceptAction)] + allShifts' _ [] result = result + allShifts' nextIndex (shift:shifts) result = case transformShift nextIndex shift of + res@(_, Just _, _, _) -> allShifts' (nextIndex + 1) shifts (result ++ [res]) + res@(_, Nothing, _, _) -> allShifts' nextIndex shifts (result ++ [res]) + + -- Transform a normal shift-action into one of the following: + -- A shift action (with a new state), an announce action or an accept action, or nothing if the shift is not relevant for the RAD state. + transformShift :: Int -> ShiftAction -> (Maybe ShiftAction, Maybe RawRADState, Maybe AnnounceAction, Maybe AcceptAction) + transformShift index (tok, (shiftState, _)) + | not (null gotoItems) = (Just shift, newState, Nothing, Nothing) + | not (null announcedRule) = (Nothing, Nothing, Just (tok, rule'), Nothing) + | radType raw == Type2 = (Nothing, Nothing, Nothing, Just tok) -- TODO: sinnvoll? + | otherwise = (Nothing, Nothing, Nothing, Nothing) + where + gotoItems = plusRad (completion' raw) tok x + shift = (tok, (index, gotoItems)) where + index = maybe (i $ fromJust newState) i existingState + + -- an existing (type3) state with the same core can be reused, if existing + existingState = find matchesState allRawStates where + matchesState raw' = gotoItems == core' raw' && radType raw' == Type3 + + newState = case existingState of + Just _ -> Nothing + Nothing -> Just $ createType3State x (allStates !! shiftState) gotoItems index (i raw) + + announcedRule = getAnnouncedRule tok + Just rule' = announcedRule + + -- Announce actions from reduce actions: + announcesFromReduces = mapMaybe transformReduce (reduces (state raw)) + -- Transform a normal reduce-action into a RAD-announce action. + -- Return Nothing if the announce is not relevant for the RAD state. + transformReduce :: ReduceAction -> Maybe AnnounceAction + transformReduce (tok, rule) + | elem (Lr0 rule veryRight) (completion' raw) = Just (tok, rule) + | not (null announcedRule) = Just (tok, rule') + | otherwise = Nothing + where + veryRight = rhsLength' (lookupProdNo g rule) + announcedRule = getAnnouncedRule (lhs g (Lr0 rule 0)) + Just rule' = announcedRule + + -- Accept actions for type-2 states: + -- When a token of the lc-follow-set of NT (on which we accept NT) already has another action, we get an accept conflict. + type2Accepts + | (radType raw) == Type2 = catMaybes $ map toAccept (Data.IntSet.toList $ lcfollow x first follow (nt raw)) + | otherwise = [] + where + toAccept tok + | tok == 0 = Nothing -- epsilon ∈ follow(NT) + | hasOtherAction tok = Nothing -- No accept conflict! This happens e.g. on shift/reduce-conflicts which have been resolved in favor of shift + | otherwise = Just tok + hasOtherAction tok = elem tok otherActions + otherActions = (map fst shifts') ++ (map fst announces') + + -- For a type-1 action: If NT can produce ɛ (either directly, NT -> ɛ or indirectly, NT -> A so that A ->* ɛ) we need a special announce action to announce a related rule. + type1EpsilonAnnounces + | (radType raw) == Type1 && canProduceEpsilon [nt raw] = map toAnnounce validTokens + | otherwise = [] + where + -- All tokens that are in the lc-follow set will produce the special announce action. + validTokens = filter isCandidate allCandidates where + isCandidate tok = tok /= epsilonTok && not (alreadyHasAction tok) + allCandidates = Data.IntSet.toList $ lcfollow x first follow (nt raw) + alreadyHasAction tok -- = elem tok announces || elem tok otherActions where + | elem tok announces = True + | elem tok otherActions = True -- seq (unsafePerformIO (print $ "token " ++ show tok ++ "already has other action, ignoring")) True + | otherwise = False + where + announces = map fst (announcesFromReduces ++ shiftAnnounces) + otherActions = (map fst shifts') ++ (map id accepts') + + toAnnounce tok = (tok, announcedRule) + + -- The question whether the right hand side of a rule can produce epsilon. + canProduceEpsilon = Data.IntSet.member epsilonTok . first + + -- Create the graph consisting of all items in the NT's completion which CAN PRODUCE EPSILON. + -- From these, there should be a way from the NT to a leaf node (X -> .) + core = [Lr0 (-(nt raw)) 0] + + reducedCompletion = filter itemCanProduceEpsilon (completeWithFunction (directCompletion g) core) where + itemCanProduceEpsilon = canProduceEpsilon . rhsAfterDot g + + (_, _rooted, nodes) = recognitionGraph g artState where + artState = LALRState { index = 0, coreItems = core, completionItems = reducedCompletion, shifts = [], gotos = [], reduces = [], defaultAction = Error } + graph = convert _rooted + + -- All vertices reachable from the root node + connectedVertices = delete 0 (reachable graph 0) + + -- Find all reachable leaf vertices of the form X -> . + -- Optimally, there should only be a single one of these. + epsilonItems = filter (isEpsilon . (!!) nodes) connectedVertices where + isEpsilon (Item (Lr0 rule _) _) = rule >= 0 && let (Production _ rhs _ _) = (lookupProdNo g rule) in null rhs + isEpsilon _ = False + + leafNode = case epsilonItems of + [] -> error $ "Cannot happen - there must be an item of the form X -> . in the completion of NT " ++ show ((token_names g) ! (nt raw)) + [item] -> item + _ -> error $ "Multiple leaf nodes X -> . in the completion of NT " ++ show ((token_names g) ! (nt raw)) + + -- All cycle-free paths between 1 (the node _ -> |- . NT) and the leaf node. + allPaths = connect 1 leafNode graph + + connect x y g = helper x y g [x] where -- all cycle-free paths between x and y, from https://stackoverflow.com/questions/11168238 + helper a b g visited + | a == b = [[]] + | otherwise = [(a,c):path | c <- g!a, c `notElem` visited, path <- helper c b g (c:visited)] + + -- Get the single path from the root node 1 to the leaf node + path = case allPaths of + [] -> error $ "Should not happen - there must be a path from the root node to the epsilon node" + [path] -> path + _ -> error $ "Multiple paths from the root node (" ++ showNode (nodes !! 1) ++ ") to epsilon node (" ++ showNode (nodes !! leafNode) ++ ")" where + showNode (Item item _) = showItem g item + showNode _ = "" + + -- Find the item / rule to be announced. + -- It is any rule on the path which both: + -- - has the recognition point at the beginning and + -- - is in the rad-completion of the current raw-LC-state. + elements = map snd path -- discard the root node (it cannot be announced as it doesn't correspond to any rule) + validElements = filter (valid . (!!) nodes) elements where + valid (Item item@(Lr0 rule _) _) = (recognitionPoints x) !! rule == 0 && elem item (completion' raw) + valid _ = False + + announcedItem = case validElements of + [] -> error $ "No valid rule to be announced for epsilon-production " ++ showNode (nodes !! leafNode) ++ "in item " ++ showNode (nodes !! 1) + items -> head items + + announcedRule = let (Item (Lr0 rule _) _) = nodes !! announcedItem in rule + + -- This is the traceback of a shift or reduce action to the item and rule through whose recursive completion it was added to the RAD state's completion. + getAnnouncedRule :: Name -> Maybe Int + getAnnouncedRule t = recursiveAnnouncedRule t [] where + -- This version takes a list of inputs that directly return Nothing (i.e. will recurse) to avoid infinte recursion: going in a recursion cycle will return Nothing. + recursiveAnnouncedRule :: Name -> [Name] -> Maybe Int + recursiveAnnouncedRule token nulls + | elem token nulls = Nothing + | otherwise = case directRule of + Just rule -> elemIndex (prod g rule) (productions g) + Nothing -> extendedRule + where + + directRule = find matchingReadyRule (completion' raw) + matchingReadyRule item@(Lr0 rule dot) = (recognitionPoints x) !! rule == dot && hasTokenAfterDot g item && tokenAfterDot g item == token + extendedRule = case length extendedRules of + 0 -> Nothing + 1 -> Just (elemAt 0 extendedRules) + _ -> error $ "Announce conflict! Possible rules: { " ++ intercalate "; " (map (showProd g) (toList extendedRules)) ++ " } in RAD-State " ++ showRaw x raw ++ "!" + extendedRules :: Set Int + extendedRules = mapMaybeSet recursive (fromList $ completionItems (state raw)) + recursive item@(Lr0 _ dot') + | dot' == 0 && hasTokenAfterDot g item && tokenAfterDot g item == token = recursiveAnnouncedRule (lhs g item) (token:nulls) -- avoids infinite recursion + | otherwise = Nothing + + mapMaybeSet :: Ord b => (a -> Maybe b) -> Data.Set.Set a -> Data.Set.Set b + mapMaybeSet f = Data.Set.fromList . Data.Maybe.mapMaybe f . Data.Set.toList + + + -------------------- LALR STATE GENERATION -------------------- + + -- Create LALRStates from happy's action and goto tables, bundling state information in a single datatype. + generateLALRStates :: Grammar -> ActionTable -> GotoTable -> [Lr1State] -> [LALRState] + generateLALRStates g action goto happystates = do + let completed = complete g happystates + let numbered = zip [0..] completed + let lalrStates = map (uncurry $ createState g action goto) numbered + lalrStates + + -- `State` bundles required symbol-item mappings for creating a Hinze-like continuation-based + -- state function for a state. + -- It combines the data from goto and action tables in one coherent data structure. + data LALRState = LALRState { + index :: Int, + coreItems :: [Lr0Item], + completionItems :: [Lr0Item], + shifts :: [ShiftAction], + gotos :: [GotoAction], + reduces :: [ReduceAction], + defaultAction :: LALRDefaultAction -- Do something per default. This action is NOT explicitly mentioned in the shifts/reduces list. + } deriving (Eq +#ifdef DEBUG + , Show +#endif + ) + + data LALRDefaultAction = ErrorShift Int -- On errorToken (i.e. default), shift to state X + | Reduce Int -- Reduce rule X + | Error -- Call happyError + deriving (Eq, Show) + + showState :: Grammar -> LALRState -> [Char] + showState g state = "State " ++ show (index state) ++ + " { " ++ unwords (map (showItem g) (coreItems state)) ++ " }" ++ + " – Shifts: " ++ unwords (map showShift (shifts state)) ++ + " – Gotos: " ++ unwords (map showGoto (gotos state)) ++ + " – Reduces: " ++ unwords (map showReduce (reduces state)) ++ + " – Default: " ++ show (defaultAction state) where + showShift (token, (i, items)) = "(on " ++ (token_names g)!token ++ " shift to " ++ show i ++ " with items: " ++ unwords (map (showItem g) items) ++ ")" + showGoto (nt, (i, items)) = "(on " ++ (token_names g)!nt ++ " goto " ++ show i ++ " with items: " ++ unwords (map (showItem g) items) ++ ")" + showReduce (token, rule) = "(on " ++ (token_names g)!token ++ " reduce rule " ++ show rule ++ ")" + + -- Create `State` data from the given `CompletedLr0State` and its index. + createState :: Grammar -> ActionTable -> GotoTable -> Int -> CompletedLr0State -> LALRState + createState g action goto i state = LALRState { index = i, coreItems = core state, completionItems = completion state, shifts = shifts, gotos = gotos, reduces = reduces, defaultAction = defaultAction } where + gotos = map toGoto allGotos + toGoto (nt, Goto toState) = (nt, (toState, items)) where + items = plus (completion state) nt g + allGotos = filter isGoto $ assocs (goto ! i) + isGoto (_, Goto {}) = True + isGoto _ = False + + shifts' = map toShift allShifts + toShift (token, LR'Shift toState _) = (token, (toState, shiftItems token)) + toShift (token, LR'Multiple _ (LR'Shift toState _)) = (token, (toState, shiftItems token)) + shiftItems token = plus (completion state) token g + allShifts = filter isShift $ assocs (action ! i) -- all shifts from action table + isShift (_, LR'Shift {}) = True + isShift (_, LR'Multiple _ (LR'Shift {})) = True + isShift _ = False + + reduces' = map toReduce allReduces + toReduce (token, LR'Reduce rule _) = (token, rule) + toReduce (token, LR'Multiple _ (LR'Reduce rule _)) = (token, rule) + toReduce (token, LR'Accept) = let (Lr0 rule _) = head (core state) in (token, rule) + toReduce (token, LR'Multiple _ LR'Accept) = let (Lr0 rule _) = head (core state) in (token, rule) + allReduces = filter isReduce $ assocs (action ! i) -- all reduces from action table + isReduce (_, LR'Reduce {}) = True + isReduce (_, LR'Multiple _ (LR'Reduce {})) = True + isReduce (_, LR'Accept) = True + isReduce (_, LR'Multiple _ LR'Accept) = True + isReduce _ = False + + -- Remove default action (errorShift or reduce) from shifts/reduces + shifts = filter (\(token, _) -> token /= errorTok) shifts' + reduces = filter test reduces' where + test = if defaultReduce then (\(_, rule) -> rule /= defaultReduceRule) else return True + + defaultErrorShift = any isErrorAction shifts' where + errorShiftState = (fst . snd . fromJust) (find isErrorAction shifts') + + isErrorAction (token, _) = token == errorTok + + defaultReduce = not defaultErrorShift && not (null reduces') + defaultReduceRule = fromMaybe largestRule errorRule where + errorRule = (find isErrorAction reduces') >>= Just . snd + + largestRule = (snd . head . head) sortedGroups -- Find reduce rule which is used most often (i.e. by most tokens) + sorted = sortBy (comparing snd) reduces' + grouped = groupBy ((==) `on` snd) sorted + sortedGroups = sortBy (flip (comparing length)) grouped + + defaultAction + | defaultErrorShift = ErrorShift errorShiftState + | defaultReduce = Reduce defaultReduceRule + | otherwise = Error + + + + -------------------- DETERMINING RECOGNITION POINTS -------------------- + + -- Determine the recognition points for each rule from the set of all non-free items. + determineRecognitionPoints :: Grammar -> [Lr0Item] -> [Int] + determineRecognitionPoints g nonfree = map (uncurry recognitionPoint) (zip [0..] (productions g)) where + + -- No priority -> recognition point = first position where all consecutive positions are free + recognitionPoint rule (Production _ rhs _ No) = maybe 0 (+1) $ find isNonfree (reverse [0 .. length rhs-1]) where + isNonfree i = elem (Lr0 rule i) nonfree || (rhs !! i) == errorTok -- recognition point must come after all error tokens + + -- Priority/associativity -> recognition point must be at the very right + recognitionPoint _ (Production _ rhs _ _) = length rhs + + -- Determine all non-free items from all state graphs. + nonfreeItems :: Grammar -> [RecognitionGraph] -> [Lr0Item] + nonfreeItems _ graphs = (toList . fromList . join) nonfrees where -- removing duplicates + nonfrees = map (\((_, g, v), i) -> nonfree g i v) $ zip graphs [0..] + nonfree g i nodes = map lr0 $ filter (not . isFree) [0 .. numNodes g-1] where + dom' = dom g + isFree v = all (dominates dom' v) reachableLeafs where + reachableLeafs = filter (isLeaf . (!!) nodes) (reach g v) + isLeaf (ShiftNode _ _) = True; isLeaf (ReduceNode _ _) = True; isLeaf _ = False + lr0 = lr0' . (!!) nodes where lr0' (Item a _) = a + + -- The number of nodes of a rooted graph. + numNodes :: Rooted -> Int + numNodes = length . toAdj . snd + + -- Convert a Rooted (used for domination) to a Data.Graph.Graph (used for reachability) + convert :: Rooted -> Data.Graph.Graph + convert g = listArray (0, numNodes g-1) (map snd (toAdj (snd g))) + + -- All nodes that can be reached from this node using at least 1 edge. + -- This means a node only reaches itself it participates in a cycle. + reach :: Rooted -> Vertex -> [Vertex] + reach = reach' . convert where + reach' g v = reachWithoutV ++ (if isCycle then [v] else []) where + reachWithoutV = delete v (reachable g v) + isCycle = elem v (g ! v) || any reachesV reachWithoutV + reachesV w = elem v (reachable g w) + + -- True if a dominates b. A node never dominates itself. + dominates :: [(Node, Path)] -> Node -> Node -> Bool + dominates dom' a b = contains a pair where + pair = find ((b ==) . fst) dom' + contains a = maybe False (elem a . snd) + + + -------------------- RECOGNITION GRAPH CREATION -------------------- + + data RecognitionNode = Init + | Item Lr0Item Bool -- item, isCore + | ShiftNode Int Name -- shift to state; token which triggers the shift + | ReduceNode Int (Maybe Name) -- rule which is reduced; token which triggers the reduce (can be Nothing -> it is a default reduce) + deriving (Eq +#ifdef DEBUG + , Show +#endif + ) + type RecognitionGraph = (Int, -- Rule number. + Rooted, -- Rooted uses Ints to decode the nodes, + [RecognitionNode]) -- so this is the ordered list of all nodes + + -- Create the rooted state graph for a state which is in turn used to determine the recognition points. + recognitionGraph :: Grammar -> LALRState -> RecognitionGraph + recognitionGraph g state@(LALRState { index = i, coreItems = core, completionItems = completion, shifts = shifts, reduces = reduces, defaultAction = defaultAction }) = graph i allNodes succ where + + -- All nodes of the graph + allNodes = [initNode] ++ coreNodes ++ completionNodes ++ shiftNodes ++ reduceNodes ++ defaultNode where + initNode = Init + coreNodes = map (flip Item True) core + completionNodes = map (flip Item False) (filter (not . (flip elem) core) completion) + shiftNodes = map (uncurry toShift) shifts where + toShift tok (state, _) = ShiftNode state tok + + reduceNodes = map (uncurry toReduce) reduces where + toReduce tok rule = ReduceNode rule (Just tok) + + defaultNode = case defaultAction of + ErrorShift state -> [ShiftNode state errorTok] + Reduce rule -> [ReduceNode rule Nothing] + _ -> [] + + -- Successor relation + succ :: RecognitionNode -> RecognitionNode -> Bool + succ Init (Item _ True) = True + succ (Item a _) (Item b _) = isInDirectCompletion g a b + succ (Item item@(Lr0 rule _) _) (ReduceNode rule' _) = rule == rule' && dotIsAtRightEnd g item + succ (Item item@(Lr0 rule _) _) (ShiftNode _ token) = hasTokenAfterDot g item && (tokenAfterDot g item) == token + succ _ _ = False + + -- Create a graph from the nodes and their successor relation. Here, the nodes are encoded as integers. + graph :: Int -> [RecognitionNode] -> (RecognitionNode -> RecognitionNode -> Bool) -> RecognitionGraph + graph i nodes succ = (i, (fromJust $ elemIndex Init nodes, fromAdj adjacency), nodes) where + adjacency = map (ap (,) neighbors) intNodes + neighbors i = filter (intSucc i) intNodes + intNodes = [0 .. (length nodes)-1] + intSucc i j = succ (nodes !! i) (nodes !! j) + + -- Pretty-print a graph. + showGraph :: Grammar -> RecognitionGraph -> String + showGraph g (state, graph, nodes) = header ++ unlines (map showNode (zip [0..] nodes)) where + header = "Graph of state " ++ show state ++ ":\n" + showNode (i, node) = unlines (if (null successors) then [line1] else [line1, line2]) where + successors = snd $ (toAdj (snd graph)) !! i + line1 = " " ++ show i ++ ": " ++ show' node + line2 = " -> " ++ unwords (map show successors) + show' Init = "Init" + show' (Item item isCore) = showItem g item ++ if isCore then " (core)" else "" + show' (ShiftNode state token) = "Shift to state " ++ show state ++ " (on " ++ (token_names g) ! token ++ ")" + show' r@(ReduceNode rule token) = "Reduce rule " ++ show rule ++ ": " ++ showProd g rule ++ " (on " ++ maybe "default" ((token_names g) !) token ++ ")" diff --git a/src/RADTools.hs b/src/RADTools.hs new file mode 100644 index 00000000..bf4cf327 --- /dev/null +++ b/src/RADTools.hs @@ -0,0 +1,230 @@ +module RADTools where + import Grammar + import GHC.Arr + import LALR + import Data.Maybe + import Data.List (elemIndex) + import Data.IntSet () + import Data.IntMap () + import qualified Data.Set as Set + import Data.Set (Set, toList, fromList, union, (\\)) + + -- Lr1State as generated from happy + type Lr1State = ([Lr1Item], [(Name, Int)]) + + ----- COMPLETE LR0 STATES ----- + + data CompletedLr0State = CompletedLr0State [Lr0Item] [Lr0Item] + deriving (Eq +#ifdef DEBUG + , Show +#endif + ) + + core :: CompletedLr0State -> [Lr0Item] + core (CompletedLr0State a _) = a + completion :: CompletedLr0State -> [Lr0Item] + completion (CompletedLr0State _ a) = a + + -- Complete all states of the grammar, yielding a core and a completion set for each state. + complete :: Grammar -> [Lr1State] -> [CompletedLr0State] + complete g = map complete' where + complete' state = CompletedLr0State core (completeWithFunction (directCompletion g) core) where + core = (map lr0 $ fst state) + lr0 (Lr1 rule dot _) = Lr0 rule dot + + -- Show a completed Lr0 state. + showCompletedLr0State :: Grammar -> Int -> CompletedLr0State -> String + showCompletedLr0State g i (CompletedLr0State core completion) = + unlines $ ["State " ++ show i ++ ". CORE:"] ++ (map (showItem g) core) ++ ["COMPLETION:"] ++ (map (showItem g) completion) + + + -- Perform a completion with a custom completion function. + completeWithFunction :: (Lr0Item -> [Lr0Item]) -> [Lr0Item] -> [Lr0Item] + completeWithFunction f core = toList $ complete' (fromList core) (fromList core) where + + -- Complete result. It is always newSet ⊆ result. + complete' :: Set Lr0Item -> Set Lr0Item -> Set Lr0Item + complete' result newSet + | Set.null newSet = result + | otherwise = complete' (union result newItems) (newItems \\ result) where + newItems = join $ Set.map f newSet + + join :: Set [Lr0Item] -> Set Lr0Item + join set = Set.fold (flip union . fromList) Set.empty set + + ----- LR0ITEM ----- + + -- Get the number of tokens in the RHS of a rule. + rhsLength :: Grammar -> Lr0Item -> Int + rhsLength = rhsLength' .* prod + rhsLength' :: Production -> Int + rhsLength' (Production _ rhs _ _) = length rhs + + -- Get the production belonging to an item. + prod :: Grammar -> Lr0Item -> Production + prod g (Lr0 rule _) = lookupProdNo g rule + + -- Determine whether the dot is at the very right end of an item. + dotIsAtRightEnd :: Grammar -> Lr0Item -> Bool + dotIsAtRightEnd g item@(Lr0 _ dot) = (rhsLength g item) == dot + + -- Determine whether the dot is at the very right beginning of an item. + dotIsAtLeftEnd :: Grammar -> Lr0Item -> Bool + dotIsAtLeftEnd _ (Lr0 _ dot) = 0 == dot + + -- Determine if the dot is NOT at the very beginning of an item. + -- Iff this returns true you may call tokenBeforeDot. + hasTokenBeforeDot :: Grammar -> Lr0Item -> Bool + hasTokenBeforeDot = not .* dotIsAtLeftEnd + + -- Determine if the dot is NOT at the very right end of an item. + -- Iff this returns true you may call tokenAfterDot. + hasTokenAfterDot :: Grammar -> Lr0Item -> Bool + hasTokenAfterDot = not .* dotIsAtRightEnd + + -- Check whether a rule has a terminal symbol after its dot + hasNonterminalAfterDot :: Grammar -> Lr0Item -> Bool + hasNonterminalAfterDot g item = hasTokenAfterDot g item && isNonterminal g (tokenAfterDot g item) + + -- Get the token immediately after the dot. + -- Diverges if the dot is at the end - call "hasTokenAfterDot" to determine if it is valid to call "tokenAfterDot" + tokenAfterDot :: Grammar -> Lr0Item -> Name + tokenAfterDot g item@(Lr0 _ dot) = tokenAtPosition g item dot + + -- Get the token immediately before the dot. + -- Diverges if the dot is at the end - call "hasTokenBeforeDot" to determine if it is valid to call "tokenAfterDot" + tokenBeforeDot :: Grammar -> Lr0Item -> Name + tokenBeforeDot g item@(Lr0 _ dot) = tokenAtPosition g item (dot-1) + + -- Get the token in the RHS of an item at a certain position. The dot is ignored. + tokenAtPosition :: Grammar -> Lr0Item -> Int -> Name + tokenAtPosition g item pos = tokenAtPosition' (prod g item) pos where + tokenAtPosition' (Production _ rhs _ _) = (rhs !!) + + -- Determine whether a token is a nonterminal + isNonterminal :: Grammar -> Name -> Bool + isNonterminal = flip elem . non_terminals + + -- Get the nonterminal on the left side of an item + lhs :: Grammar -> Lr0Item -> Name + lhs = lhs' .* prod + lhs' :: Production -> Name + lhs' (Production lhs _ _ _) = lhs + + rhsAfterDot :: Grammar -> Lr0Item -> [Name] + rhsAfterDot g item@(Lr0 rule dot) + | rule < 0 = if dot == 0 then [-rule] else [] -- artifical NT handling + | otherwise = drop dot $ rhs (prod g item) + + rhs :: Production -> [Name] + rhs (Production _ rhs _ _) = rhs + + -- Calculate the items which are in the IMEMDIATE completion of an item. + -- For example, "A -> b . C D" has "C -> . D e", in its direct completion, + -- but not the recursive completion "D -> . f" (which would be in the direct completion of "C -> . D e". + -- The item itself may also be in its own completion. + directCompletion :: Grammar -> Lr0Item -> [Lr0Item] + directCompletion g item@(Lr0 rule dot) + | rule < 0 && dot == 0 = itemsStartingWith g (-rule) -- special case: completion of artifical item _ -> |- . NT + | hasNonterminalAfterDot g item = itemsStartingWith g (tokenAfterDot g item) + | otherwise = [] + + -- Determine whether item 2 is in the direct completion of item 1, as described above. + -- Therefore, item 2 must be of the form "A -> (DOT) ..." + -- An item may be in their own completion. + isInDirectCompletion :: Grammar -> Lr0Item -> Lr0Item -> Bool + isInDirectCompletion g item1@(Lr0 rule dot) item2@(Lr0 rule' dot') + | rule' < 0 = False +  | rule < 0 = dot == 0 && dot' == 0 && itemStartsWith g item2 (-rule) + | otherwise = + dot' == 0 && + hasNonterminalAfterDot g item1 && + itemStartsWith g item2 (tokenAfterDot g item1) + + -- Get all rules starting with "A -> ...", in form of the item "A -> (DOT) ..." + itemsStartingWith :: Grammar -> Name -> [Lr0Item] + itemsStartingWith g token = map toItem $ filter (startsWith token) $ (productions g) where + toItem prod = Lr0 rule 0 where + rule = fromJust $ elemIndex prod (productions g) + startsWith token (Production token' _ _ _) = token == token' + + -- Determine whether the item starts with the token + itemStartsWith :: Grammar -> Lr0Item -> Name -> Bool + itemStartsWith g item token = startsWith token (prod g item) where + startsWith token (Production token' _ _ _) = token == token' + + -- Convert an Lr0Item to a string, for example "A -> b . C D" + showItem :: Grammar -> Lr0Item -> String + showItem g (Lr0 rule dot) +  | rule < 0 = -- artificial NT handling + let nt = -rule in if dot == 0 then "|- -> . " ++ showToken nt else "|- -> " ++ showToken nt ++ " ." + + | otherwise = showProd (lookupProdNo g rule) where + showProd = unwords . showProdArray + showProdArray (Production from to _ _) = insert "." (dot + 1) ([(showToken from) ++ " ->"] ++ (map showToken to)) + showToken tok = (token_names g) ! tok + insert elem pos list = let (ys,zs) = splitAt pos list in ys ++ [elem] ++ zs + + ----- PRODUCTION ----- + + -- Convert a production (represented by its index) to a string, for example "A -> b C D" + showProd :: Grammar -> Int -> String + showProd g i = unwords (showProdArray (lookupProdNo g i)) where + showProdArray (Production from to _ _) = [(showToken from) ++ " ->"] ++ (map showToken to) + showToken tok = (token_names g) ! tok + + -- Convert a production (represented by its index) and its recognition point to a string, for example "A -> b C . D (has priority)" + showRecognitionPoint :: XGrammar -> Int -> String + showRecognitionPoint x rule = showItem (g x) (Lr0 rule point) ++ prioText (lookupProdNo (g x) rule) where + point = (recognitionPoints x) !! rule + prioText (Production _ _ _ No) = "" + prioText _ = " (has priority)" + + ----- RAD-SPECIFIC ----- + + -- Extended grammar containing RAD-relevant data like recognition points. + data XGrammar = XGrammar { + g :: Grammar, + recognitionPoints :: [Int] + } + + -- The rad-completion of a set of core items, defined as follows: + -- Each core item is in the completion. + -- If an item I = A -> b . C d is in the completion, and the dot is before the recognition point of the associated rule, then all items C -> ... are in the completion. + radCompletion :: XGrammar -> [Lr0Item] -> [Lr0Item] + radCompletion x core = completeWithFunction directRadCompletion core where + directRadCompletion item@(Lr0 rule dot) + | rule < 0 = directCompletion (g x) item -- special handling for item _ -> |- . NT + | dot < (recognitionPoints x) !! rule = directCompletion (g x) item + | otherwise = [] + + + -- Perform Q+'X, but only consider items in Q/'X (i.e. where the dot is before the recognition point): + -- Q+'X = { A -> β X . Ɣ | A -> β . X Ɣ elem Q, recog. point is after X } + plusRad :: [Lr0Item] -> Name -> XGrammar -> [Lr0Item] + plusRad q y x = filter nonready' (plus q y (g x)) where + nonready' (Lr0 rule dot) = dot <= (recognitionPoints x) !! rule + + -- Perform Q/X, but discard items where the dot is at or after the recognition point: + -- Q/'X = { A -> β . X Ɣ elem Q, recog. point is after X } + hdivRad :: [Lr0Item] -> Name -> XGrammar -> [Lr0Item] + hdivRad q y x = filter nonready (hdiv q y (g x)) where + nonready (Lr0 rule dot) = dot < (recognitionPoints x) !! rule + + -- Perform Q+X as described by Hinze: + -- Q+X = { A -> β X . Ɣ | A -> β . X Ɣ elem Q } + plus :: [Lr0Item] -> Name -> Grammar -> [Lr0Item] + plus q x g = map shiftDot (hdiv q x g) where + shiftDot (Lr0 rule dot) = Lr0 rule (dot+1) + + -- Perform Q/X as described by Hinze: + -- Q/X = { A -> β . X Ɣ elem Q } + hdiv :: [Lr0Item] -> Name -> Grammar -> [Lr0Item] + hdiv q x g = filter matches q where + matches item = hasTokenAfterDot g item && tokenAfterDot g item == x + + ----- MORE ----- + + (.*) :: (c -> d) -> (a -> b -> c) -> (a -> b -> d) + (.*) = (.) . (.) \ No newline at end of file From 75fa5f2305b3dec0fe4e3cf1f0462c0d89219392 Mon Sep 17 00:00:00 2001 From: David Knothe Date: Tue, 13 Oct 2020 10:21:58 +0200 Subject: [PATCH 2/9] Add parentheses around eof-token in parseEof --- src/RADCodeGen.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RADCodeGen.hs b/src/RADCodeGen.hs index 6e881d12..50a12e32 100644 --- a/src/RADCodeGen.hs +++ b/src/RADCodeGen.hs @@ -403,7 +403,7 @@ module RADCodeGen where lineLex1 = "parse" ++ show token ++ " k = lexerWrapper $ \\t -> case t of" lineLex2 - | token == eof_term g = " " ++ eof ++ " -> k" + | token == eof_term g = " " ++ paren eof ++ " -> k" | otherwise = " " ++ paren tok ++ " -> k " ++ t where Just (_, eof) = lexer g From c940075ba3d7d42c70544fe824c0269ddfd57115 Mon Sep 17 00:00:00 2001 From: David Knothe Date: Tue, 13 Oct 2020 15:59:15 +0200 Subject: [PATCH 3/9] Change comment punctuation to match the thesis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use • for rules and · for items (instead of . and .). Also use _ instead of |- for the lhs of an artificial item. --- src/RADTools.hs | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/RADTools.hs b/src/RADTools.hs index bf4cf327..0feb6c00 100644 --- a/src/RADTools.hs +++ b/src/RADTools.hs @@ -126,7 +126,7 @@ module RADTools where -- The item itself may also be in its own completion. directCompletion :: Grammar -> Lr0Item -> [Lr0Item] directCompletion g item@(Lr0 rule dot) - | rule < 0 && dot == 0 = itemsStartingWith g (-rule) -- special case: completion of artifical item _ -> |- . NT + | rule < 0 && dot == 0 = itemsStartingWith g (-rule) -- special case: completion of artifical item _ -> . NT | hasNonterminalAfterDot g item = itemsStartingWith g (tokenAfterDot g item) | otherwise = [] @@ -154,15 +154,18 @@ module RADTools where itemStartsWith g item token = startsWith token (prod g item) where startsWith token (Production token' _ _ _) = token == token' - -- Convert an Lr0Item to a string, for example "A -> b . C D" + -- Convert an Lr0Item to a string, for example "A -> b · C D" showItem :: Grammar -> Lr0Item -> String - showItem g (Lr0 rule dot) + showItem = showItemWithSeparator "·" + + showItemWithSeparator :: String -> Grammar -> Lr0Item -> String + showItemWithSeparator sep g (Lr0 rule dot)  | rule < 0 = -- artificial NT handling - let nt = -rule in if dot == 0 then "|- -> . " ++ showToken nt else "|- -> " ++ showToken nt ++ " ." + let nt = -rule in if dot == 0 then "_ -> " ++ sep ++ " " ++ showToken nt else "_ -> " ++ showToken nt ++ " " ++ sep | otherwise = showProd (lookupProdNo g rule) where showProd = unwords . showProdArray - showProdArray (Production from to _ _) = insert "." (dot + 1) ([(showToken from) ++ " ->"] ++ (map showToken to)) + showProdArray (Production from to _ _) = insert sep (dot + 1) ([(showToken from) ++ " ->"] ++ (map showToken to)) showToken tok = (token_names g) ! tok insert elem pos list = let (ys,zs) = splitAt pos list in ys ++ [elem] ++ zs @@ -174,12 +177,10 @@ module RADTools where showProdArray (Production from to _ _) = [(showToken from) ++ " ->"] ++ (map showToken to) showToken tok = (token_names g) ! tok - -- Convert a production (represented by its index) and its recognition point to a string, for example "A -> b C . D (has priority)" + -- Convert a production (represented by its index) and its recognition point to a string, for example "A -> b C . D" showRecognitionPoint :: XGrammar -> Int -> String - showRecognitionPoint x rule = showItem (g x) (Lr0 rule point) ++ prioText (lookupProdNo (g x) rule) where + showRecognitionPoint x rule = showItemWithSeparator "•" (g x) (Lr0 rule point) where point = (recognitionPoints x) !! rule - prioText (Production _ _ _ No) = "" - prioText _ = " (has priority)" ----- RAD-SPECIFIC ----- @@ -195,7 +196,7 @@ module RADTools where radCompletion :: XGrammar -> [Lr0Item] -> [Lr0Item] radCompletion x core = completeWithFunction directRadCompletion core where directRadCompletion item@(Lr0 rule dot) - | rule < 0 = directCompletion (g x) item -- special handling for item _ -> |- . NT + | rule < 0 = directCompletion (g x) item -- special handling for item _ -> . NT | dot < (recognitionPoints x) !! rule = directCompletion (g x) item | otherwise = [] From 92efa8d852377f604f20783af2d66668ce52805e Mon Sep 17 00:00:00 2001 From: David Knothe Date: Thu, 15 Oct 2020 14:51:19 +0200 Subject: [PATCH 4/9] Implement optimization suggestions of @sgraf812 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When specifying --optims in addition to --cb-rad, some optimizations are applied to the produced code: • All rule functions are marked with INLINE • All applications of goto-functions and k-functions are eta-expanded --- src/Main.lhs | 8 ++++++- src/RADCodeGen.hs | 59 ++++++++++++++++++++++++++++++++--------------- 2 files changed, 48 insertions(+), 19 deletions(-) diff --git a/src/Main.lhs b/src/Main.lhs index 46f4cd74..1db809cc 100644 --- a/src/Main.lhs +++ b/src/Main.lhs @@ -252,6 +252,8 @@ Branch off to continuation-based RAD parser production: > then > > let (isMonad, _, parserType, _, _) = monad g +> +> optimize = OptCB_RAD_Optimizations `elem` cli > > ptype = case (Grammar.lexer g, isMonad) of > (Nothing, False) -> RAD.Normal @@ -269,7 +271,8 @@ Branch off to continuation-based RAD parser production: > RAD.comments = showComments, > RAD.rank2Types = rank2Types, > RAD.rulesTupleBased = rulesTupleBased, -> RAD.forallMatch = match +> RAD.forallMatch = match, +> RAD.optimize = optimize > } > > lalrStates = generateLALRStates g action goto items2 @@ -485,6 +488,7 @@ The command line arguments. > > | OptCB_RAD > | OptCB_RAD_TupleBased +> | OptCB_RAD_Optimizations > | OptCB_LALR > | OptCB_ShowTypes > | OptCB_ShowComments @@ -527,6 +531,8 @@ The command line arguments. > "create a continuation-based Recursive Ascent-Descent parser. Not compatible with most other options", > Option [] ["cb-rad-tuple"] (NoArg OptCB_RAD_TupleBased) > "same as cb-rad, but uses tuples instead of continuations inside rule functions", +> Option [] ["optims"] (NoArg OptCB_RAD_Optimizations) +> "add optimizations such as eta-expansions and explicit rule-inlining to a RAD parser", > Option [] ["cb-lalr"] (NoArg OptCB_LALR) > "create a continuation-based LALR parser. Not compatible with most other options", > Option [] ["types"] (NoArg OptCB_ShowTypes) diff --git a/src/RADCodeGen.hs b/src/RADCodeGen.hs index 50a12e32..ad88aefd 100644 --- a/src/RADCodeGen.hs +++ b/src/RADCodeGen.hs @@ -25,7 +25,9 @@ module RADCodeGen where rank2Types :: Bool, -- when True, all functions (including goto functions) which use or enclose a higher-rank-function are annotated with an explicit type. forallMatch :: String, -- the text which determines which types count as rank-2-types. - rulesTupleBased :: Bool, + rulesTupleBased :: Bool, -- actually, this doesn't produce good nor fast code. maybe drop this? + + optimize :: Bool, -- inline all rule functions and eta-expand all applications of local "g" functions header :: String, footer :: String @@ -204,24 +206,24 @@ module RADCodeGen where | mlex opts = common ++ " = lexerWrapper $ \\t -> case t of" | otherwise = common ++ " ts = case ts of" where common = "state" ++ show (raw i state) ++ " " ++ headerKs - headerKs = unwords $ map k (artCore state) + headerKs = unwords $ map (kNoEta "") (artCore state) shift (token, (state, i)) | mlex opts = paren tok ++ " -> state" ++ show state ++ " " ++ kcontent | otherwise = "t@" ++ paren tok ++ ":tr -> state" ++ show state ++ " " ++ kcontent ++ " tr" where i' = map dotleft i tok = replaceDollar rawToken (if wantsProjection then "v" else "_") - kcontent = unwords (map (paren . (++ x) . k) i') where + kcontent = unwords (map (paren . k x) i') where x = if wantsProjection then " v" else " t" rawToken = fromJust $ lookup token (token_specs g) wantsProjection = "$$" == (rawToken \\ replaceDollar rawToken "") -- i.e. Tokens of form "TokenInt $$" announce (token, rule) - | mlex opts = paren tokMaybeEof ++ " -> repeatTok t $ rule" ++ show rule ++ " " ++ paren (k item) + | mlex opts = paren tokMaybeEof ++ " -> repeatTok t $ rule" ++ show rule ++ " " ++ paren (k "" item) | otherwise = if token == eof_term g then eofCase else normalCase where - normalCase = paren tok ++ ":tr -> rule" ++ show rule ++ " " ++ paren (k item) ++ " ts" - eofCase = "[] -> rule" ++ show rule ++ " " ++ paren (k item) ++ " ts" + normalCase = paren tok ++ ":tr -> rule" ++ show rule ++ " " ++ paren (k "" item) ++ " ts" + eofCase = "[] -> rule" ++ show rule ++ " " ++ paren (k "" item) ++ " ts" tokMaybeEof = if token == eof_term g then eof else tok Just (_, eof) = lexer g @@ -243,7 +245,7 @@ module RADCodeGen where Just (_, eof) = lexer g removeDollar a = maybe a ($ "_") (mapDollarDollar a) - k' = k (head (artCore state)) + k' = k "" (head (artCore state)) goto (nt, (state, i)) | hasRank2Type opts g nt = catMaybes [gototype, goto] @@ -253,8 +255,11 @@ module RADCodeGen where gototype = case symboltype opts g nt of Just t -> Just $ "g" ++ show nt ++ " :: " ++ t ++ " -> " ++ paren outtype Nothing -> Nothing - goto = Just $ "g" ++ show nt ++ " x = state" ++ show state ++ " " ++ unwords (map (paren . (++ " x") . k) i') + goto + | optimize opts = Just $ "g" ++ show nt ++ " x " ++ ts ++ " = state" ++ show state ++ " " ++ unwords (map (paren . k "x") i') ++ " " ++ ts + | otherwise = Just $ "g" ++ show nt ++ " x = state" ++ show state ++ " " ++ unwords (map (paren . k "x") i') outtype = wrapperType opts ++ " r" + ts = if mlex opts then "la" else "ts" defaultAction'' = " " ++ case defaultAction' state of ErrorShift' state -> defaultErrorShift state @@ -263,31 +268,45 @@ module RADCodeGen where Error' -> defaultError defaultErrorShift toState - | mlex opts = "_ -> repeatTok t $ state" ++ show toState ++ " " ++ paren (k item ++ " ErrorToken") - | otherwise = "_ -> state" ++ show toState ++ " " ++ paren (k item ++ " ErrorToken") ++ " ts" where + | mlex opts = "_ -> repeatTok t $ state" ++ show toState ++ " " ++ paren (kNoEta "" item ++ " ErrorToken") + | otherwise = "_ -> state" ++ show toState ++ " " ++ paren (kNoEta "" item ++ " ErrorToken") ++ " ts" where item = head $ hdiv (raw completion' state) errorTok g defaultAnnounce rule - | mlex opts = "_ -> repeatTok t $ rule" ++ show rule ++ " " ++ paren (k item) - | otherwise = "_ -> rule" ++ show rule ++ " " ++ paren (k item) ++ " ts" where + | mlex opts = "_ -> repeatTok t $ rule" ++ show rule ++ " " ++ paren (k "" item) + | otherwise = "_ -> rule" ++ show rule ++ " " ++ paren (k "" item) ++ " ts" where item = fromJust $ find matches (raw completion' state) where -- the item in the completion corresponding to (i.e. of the) rule which is announced. The dot must be at the recognition point. matches (Lr0 rule' dot) = rule == rule' && (recognitionPoints x) !! rule == dot defaultAccept | mlex opts = "_ -> repeatTok t $ " ++ k' | otherwise = "_ -> " ++ k' ++ " ts" where - k' = k (head (artCore state)) + k' = k "" (head (artCore state)) defaultError | mlex opts = "_ -> happyErrorWrapper t" | otherwise = "_ -> " ++ happyError ++ " ts" where happyError = fromMaybe "happyError" (error_handler g) - k item@(Lr0 rule dot) = maybe noCore core $ elemIndex item (artCore state) where + k = if optimize opts then kEta else kNoEta + + -- Produce "k1 x" or "action5 g4 x", or without x: + -- "k1" or "action5 g4" + kNoEta x item@(Lr0 rule dot) = maybe noCore core $ elemIndex item (artCore state) where core idx - | (length (artCore state) == 1) = "k" + | (length (artCore state) == 1) = "k " ++ x | otherwise = "k" ++ (show (idx + 1)) - noCore = "action" ++ show rule ++ " g" ++ show (lhs g item) + noCore = "action" ++ show rule ++ " g" ++ show (lhs g item) ++ " " ++ x + + -- Produce "\z -> k1 x z" or "action5 (\y z -> g4 y z) x", or without x: + -- "\z -> k1 z" or "action5 (\y z -> g4 y z)" + kEta x item@(Lr0 rule dot) = maybe noCore core $ elemIndex item (artCore state) where + core idx + | (length (artCore state) == 1) = if kArity idx == 0 then "k " ++ x else "\\z -> k " ++ x ++ " z" + | otherwise = if kArity idx == 0 then "k" ++ (show (idx + 1)) ++ " " ++ x else "\\z -> k" ++ (show (idx + 1)) ++ " " ++ x ++ " z" + noCore = "action" ++ show rule ++ " (\\y z -> g" ++ show (lhs g item) ++ " y z) " ++ x + kArity idx = kArity' idx - (if null x then 0 else 1) + kArity' idx = length $ rhsAfterDot g ((artCore state) !! idx) -- Create the type signature for a state. stateTypeSignature :: GenOptions -> Grammar -> Bool -> RADState -> Maybe String @@ -415,14 +434,18 @@ module RADCodeGen where -- Generate the code for a rule. genRule :: GenOptions -> XGrammar -> Int -> String genRule opts x rule - | isTrivial = newline [comment, typedecl, trivialCode] - | otherwise = newline [comment, typedecl, code] + | isTrivial = newline [inline, comment, typedecl, trivialCode] + | otherwise = newline [inline, comment, typedecl, code] where recog = (recognitionPoints x) !! rule rhsAfterDot' = rhsAfterDot (RADTools.g x) (Lr0 rule recog) isTrivial = length rhsAfterDot' <= 1 + inline + | optimize opts = "{-# INLINE rule" ++ show rule ++ " #-}" + | otherwise = "" + comment | comments opts = "-- " ++ showRecognitionPoint x rule | otherwise = "" From e403e9ed38bd5e698400bc2bf2e662adba625984 Mon Sep 17 00:00:00 2001 From: David Knothe Date: Thu, 15 Oct 2020 19:36:46 +0200 Subject: [PATCH 5/9] Change monadic Parser type from `[Token] -> r` to `Maybe Token -> r`, as in the thesis --- src/RADCodeGen.hs | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/RADCodeGen.hs b/src/RADCodeGen.hs index ad88aefd..78b8a4d6 100644 --- a/src/RADCodeGen.hs +++ b/src/RADCodeGen.hs @@ -81,7 +81,7 @@ module RADCodeGen where definition = case ptype opts of Normal -> common ++ paren (checkEof ++ "const") Monad -> common ++ paren (checkEof ++ "const . " ++ returnP) - MonadLexer -> common ++ paren (checkEof ++ "const . " ++ returnP) ++ " []" + MonadLexer -> common ++ paren (checkEof ++ "const . " ++ returnP) ++ " Nothing" checkEof | isPartial = "" @@ -109,9 +109,10 @@ module RADCodeGen where where -- type Parser r = [Token] -> P r - parserDecl - | ptype opts == Normal = "type " ++ parser "r" ++ " = [" ++ tokenT ++ "] -> r" - | otherwise = "type " ++ parser "r" ++ " = [" ++ tokenT ++ "] -> " ++ p "r" + parserDecl = case ptype opts of + Normal -> "type " ++ parser "r" ++ " = [" ++ tokenT ++ "] -> r" + Monad -> "type " ++ parser "r" ++ " = [" ++ tokenT ++ "] -> " ++ p "r" + MonadLexer -> "type " ++ parser "r" ++ " = Maybe " ++ paren tokenT ++ " -> " ++ p "r" -- data ErrorToken = ErrorToken errorToken = "data " ++ errorTokenT ++ " = " ++ errorTokenT @@ -124,20 +125,20 @@ module RADCodeGen where definition = name ++ " a f ts = " ++ paren thenP ++ " a (flip f ts)" -- repeatTok :: Token -> Parser a -> Parser a - -- repeatTok tok p = \cur -> p (tok:cur) + -- repeatTok tok p _ = p (Just tok) repeatTok = newline [typedecl, definition] where name = "repeatTok" typedecl = name ++ " :: " ++ tokenT ++ " -> " ++ parser "a" ++ " -> " ++ parser "a" - definition = name ++ " tok p = \\cur -> p (tok:cur)" + definition = name ++ " tok p _ = p (Just tok)" -- lexerWrapper :: (Token -> Parser a) -> Parser a - -- lexerWrapper cont [] = lexer (\tok -> cont tok []) - -- lexerWrapper cont (tok:toks) = cont tok toks + -- lexerWrapper cont Nothing = lexer (\tok -> cont tok Nothing) + -- lexerWrapper cont (Just tok) = cont tok Nothing wrapLexer = newline [typedecl, line1, line2] where name = "lexerWrapper" typedecl = name ++ " :: " ++ paren (tokenT ++ " -> " ++ parser "a") ++ " -> " ++ parser "a" - line1 = name ++ " cont [] = " ++ lexer' ++ " (\\t -> cont t [])" - line2 = name ++ " cont (t:ts) = cont t ts" + line1 = name ++ " cont Nothing = " ++ lexer' ++ " (\\t -> cont t Nothing)" + line2 = name ++ " cont (Just tok) = cont tok Nothing" -- happyErrorWrapper :: Token -> Parser a -- happyErrorWrapper t _ = happyError t From 286f3bcc1270e9783cc1ebb18c7838cf6cc6f52c Mon Sep 17 00:00:00 2001 From: David Knothe Date: Thu, 15 Oct 2020 23:21:37 +0200 Subject: [PATCH 6/9] Bugfix relating to eta-expansion --- src/RADCodeGen.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/RADCodeGen.hs b/src/RADCodeGen.hs index 78b8a4d6..feb65d6d 100644 --- a/src/RADCodeGen.hs +++ b/src/RADCodeGen.hs @@ -269,8 +269,8 @@ module RADCodeGen where Error' -> defaultError defaultErrorShift toState - | mlex opts = "_ -> repeatTok t $ state" ++ show toState ++ " " ++ paren (kNoEta "" item ++ " ErrorToken") - | otherwise = "_ -> state" ++ show toState ++ " " ++ paren (kNoEta "" item ++ " ErrorToken") ++ " ts" where + | mlex opts = "_ -> repeatTok t $ state" ++ show toState ++ " " ++ paren (k "ErrorToken" item) + | otherwise = "_ -> state" ++ show toState ++ " " ++ paren (k "ErrorToken" item) ++ " ts" where item = head $ hdiv (raw completion' state) errorTok g defaultAnnounce rule @@ -296,7 +296,7 @@ module RADCodeGen where kNoEta x item@(Lr0 rule dot) = maybe noCore core $ elemIndex item (artCore state) where core idx | (length (artCore state) == 1) = "k " ++ x - | otherwise = "k" ++ (show (idx + 1)) + | otherwise = "k" ++ (show (idx + 1)) ++ " " ++ x noCore = "action" ++ show rule ++ " g" ++ show (lhs g item) ++ " " ++ x -- Produce "\z -> k1 x z" or "action5 (\y z -> g4 y z) x", or without x: From a5c67a655803e2411ea12c4ea3c21c9261270335 Mon Sep 17 00:00:00 2001 From: David Knothe Date: Fri, 16 Oct 2020 15:35:14 +0200 Subject: [PATCH 7/9] Calculate and output LL-ness (on #DEBUG) --- src/RADStateGen.hs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/RADStateGen.hs b/src/RADStateGen.hs index d7ef6921..b714b0f3 100644 --- a/src/RADStateGen.hs +++ b/src/RADStateGen.hs @@ -8,7 +8,7 @@ module RADStateGen (generateLALRStates, generateRADStates, createXGrammar, artCo import Data.Set (Set, toList, fromList, elemAt) import qualified Data.IntSet import LALR - import RADTools (CompletedLr0State, Lr1State, XGrammar(..), complete, showItem, showProd, lhs, core, completion, prod, hasTokenAfterDot, tokenAfterDot, rhsLength', isInDirectCompletion, dotIsAtRightEnd, plus, hdiv, radCompletion, itemsStartingWith, plusRad, completeWithFunction, directCompletion, rhsAfterDot) + import RADTools (CompletedLr0State, Lr1State, XGrammar(..), complete, showItem, showProd, lhs, core, completion, prod, hasTokenAfterDot, tokenAfterDot, rhsLength', isInDirectCompletion, dotIsAtRightEnd, plus, hdiv, radCompletion, itemsStartingWith, plusRad, completeWithFunction, directCompletion, rhsAfterDot, showRecognitionPoint) import Control.Monad import Data.List import Data.Maybe @@ -110,6 +110,10 @@ module RADStateGen (generateLALRStates, generateRADStates, createXGrammar, artCo debugPrint "State Graphs:" (showGraph g) allGraphs --debugPrint "Non-Free Items:" (showItem g) nonfree debugPrint "All Rules With Their Recognition Points:" (showRecognitionPoint x) [0 .. (length (productions g)) - 1] + + -- LL-ness + let totalLength = sum $ map (\(Production _ rhs _ _) -> length rhs) (productions g) + putStrLn $ "Sum of rec-points: " ++ show (sum recognitionPoints) ++ "; total rule lengths: " ++ show totalLength ++ "\nLL-ness: " ++ show (100 * (1 - (fromIntegral (sum recognitionPoints)) / (fromIntegral totalLength))) ++ "%" #endif return x From 5035c9444f687fda0a0683f9ccf3f659a4e3aa9a Mon Sep 17 00:00:00 2001 From: David Knothe Date: Thu, 29 Oct 2020 15:38:32 +0100 Subject: [PATCH 8/9] Bump version to 1.21.0 --- CHANGES | 10 ++++++++++ happy.cabal | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 92188987..3ab2c02b 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,13 @@ +----------------------------------------------------------------------------- +1.21.0 + + * Add two new backends: + * Continuation-based LALR(1) (recursive ascent) + * Continuation-based RAD(1) (recursive ascent-descent) + * RAD generally produces smaller compiled code using less states + * Continuation-based form introduces a speed advantage over + table-based form + ----------------------------------------------------------------------------- 1.20.0 diff --git a/happy.cabal b/happy.cabal index 70a77fef..14ce6457 100644 --- a/happy.cabal +++ b/happy.cabal @@ -1,5 +1,5 @@ name: happy -version: 1.20.0 +version: 1.21.0 license: BSD2 license-file: LICENSE copyright: (c) Andy Gill, Simon Marlow From 911473a44f0b1766ffb5f97227ac90d5dc1ee6f3 Mon Sep 17 00:00:00 2001 From: David Knothe Date: Wed, 10 Feb 2021 16:23:33 +0100 Subject: [PATCH 9/9] Revert back to original build options --- happy.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/happy.cabal b/happy.cabal index 14ce6457..ef9889e2 100644 --- a/happy.cabal +++ b/happy.cabal @@ -168,7 +168,7 @@ executable happy default-language: Haskell98 default-extensions: CPP, MagicHash, FlexibleContexts - ghc-options: -Wall -Wno-name-shadowing -Wno-missing-signatures -Wno-type-defaults -Wno-incomplete-patterns -Wno-unused-local-binds -Wno-unused-local-binds -Wno-unused-matches + ghc-options: -Wall other-modules: Paths_happy AbsSyn