diff --git a/CHANGES b/CHANGES index 92188987..3ab2c02b 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,13 @@ +----------------------------------------------------------------------------- +1.21.0 + + * Add two new backends: + * Continuation-based LALR(1) (recursive ascent) + * Continuation-based RAD(1) (recursive ascent-descent) + * RAD generally produces smaller compiled code using less states + * Continuation-based form introduces a speed advantage over + table-based form + ----------------------------------------------------------------------------- 1.20.0 diff --git a/cabal.project.local b/cabal.project.local new file mode 100644 index 00000000..e69de29b diff --git a/happy.cabal b/happy.cabal index d1e42fb4..ef9889e2 100644 --- a/happy.cabal +++ b/happy.cabal @@ -1,5 +1,5 @@ name: happy -version: 1.20.0 +version: 1.21.0 license: BSD2 license-file: LICENSE copyright: (c) Andy Gill, Simon Marlow @@ -161,6 +161,8 @@ executable happy build-depends: base < 5, array, containers >= 0.4.2, + dom-lt >= 0.2.2, + text, mtl >= 2.2.1 -- mtl-2.2.1 added Control.Monad.Except @@ -186,6 +188,11 @@ executable happy AttrGrammarParser ParamRules PrettyGrammar + RADCodeGen + RADCodeGen_LALR + RADStateGen + RADTools + Follow test-suite tests type: exitcode-stdio-1.0 diff --git a/src/Follow.hs b/src/Follow.hs new file mode 100644 index 00000000..8d009c6b --- /dev/null +++ b/src/Follow.hs @@ -0,0 +1,73 @@ +module Follow where + import Grammar + import RADTools + import Control.Monad + import Control.Monad.ST + import Data.Array.ST + import GHC.Arr + import Data.List (findIndices, tails) + import NameSet (NameSet, empty, fromList, union, unions, delete, member, singleton) + + -- Calculate the follow sets for all nonterminals in the grammar. + followArray :: Grammar -> ([Name] -> NameSet) -> Array Name NameSet + followArray g first = runST $ do + let bounds = liftM2 (,) head last (non_terminals g) + arr <- newArray bounds empty + startSymbols arr + updateRepeatedly arr first + elems <- getElems arr + return (listArray bounds elems) + where + startSymbols :: (STArray s Int NameSet) -> ST s () + startSymbols arr = do + mapM_ (setEOF arr) (starts g) + setEOF :: (STArray s Int NameSet) -> (a, Int, b, c) -> ST s () + setEOF arr (_, nt, _, _) = writeArray arr nt (singleton (eof_term g)) + + updateRepeatedly :: (STArray s Int NameSet) -> ([Name] -> NameSet) -> ST s () + updateRepeatedly arr first = do + old <- getElems arr + updateStep arr first + new <- getElems arr + if old == new then return () else updateRepeatedly arr first + + updateStep :: (STArray s Int NameSet) -> ([Name] -> NameSet) -> ST s () + updateStep arr first = mapM_ (updateRule arr first) (productions g) + + updateRule :: (STArray s Int NameSet) -> ([Name] -> NameSet) -> Production -> ST s () + updateRule arr first (Production lhs rhs _ _) = mapM_ (updateNT arr lhs first) (tails rhs) + + updateNT :: (STArray s Int NameSet) -> Name -> ([Name] -> NameSet) -> [Name] -> ST s () + updateNT _ _ _ [] = return () + updateNT arr lhs first (tok:rhsRest) + | not (elem tok (non_terminals g)) = return () + | otherwise = do + let first' = first rhsRest + let first'' = delete epsilonTok first' + follow_lhs <- readArray arr lhs + let new_follow = union first'' (if member epsilonTok first' then follow_lhs else empty) + old_follow <- readArray arr tok + writeArray arr tok (union old_follow new_follow) + + -- The lc-follow set of a single nonterminal, given a full "follow" array. + -- We only use rules where NT appears after the recognition point. If this is the case, enter into FOLLOW (not into LCFOLLOW), + -- i.e. recursive rules are processed as normal, irrespective of their recognition points. + lcfollow :: XGrammar -> ([Name] -> NameSet) -> (Array Name NameSet) -> Name -> NameSet + lcfollow x@(XGrammar { g = g }) first follow nt + | member nt startSymbols = union (singleton (eof_term g)) rest + | otherwise = rest + where + startSymbols = fromList $ map (\(_, a, _, _) -> a) (starts g) + + rest = unions $ map (uncurry process) rules + rules = filter (rhsContains nt) (zip [0..] (productions g)) + rhsContains nt (_, (Production _ rhs _ _)) = elem nt rhs + + process :: Int -> Production -> NameSet + process ruleIndex (Production lhs rhs _ _) = unions $ map process' $ (reverse (findIndices (== nt) rhs)) where + process' i + | i < ((recognitionPoints x) !! ruleIndex) = empty + | member epsilonTok first_b = union (delete epsilonTok first_b) (follow ! lhs) + | otherwise = first_b + where + first_b = first (drop (i+1) rhs) \ No newline at end of file diff --git a/src/Grammar.lhs b/src/Grammar.lhs index d3ce625e..c23f549b 100644 --- a/src/Grammar.lhs +++ b/src/Grammar.lhs @@ -36,13 +36,16 @@ Here is our mid-section datatype > data Production > = Production Name [Name] (String,[Int]) Priority +> deriving (Eq #ifdef DEBUG -> deriving Show +> , Show #endif +> ) + > data Grammar > = Grammar { > productions :: [Production], diff --git a/src/LALR.lhs b/src/LALR.lhs index 1dc274cf..6df81617 100644 --- a/src/LALR.lhs +++ b/src/LALR.lhs @@ -8,7 +8,7 @@ Generation of LALR parsing tables. > module LALR > (genActionTable, genGotoTable, genLR0items, precalcClosure0, > propLookaheads, calcLookaheads, mergeLookaheadInfo, countConflicts, -> Lr0Item(..), Lr1Item) +> Lr0Item(..), Lr1Item(..)) > where > import GenUtils @@ -35,15 +35,7 @@ Generation of LALR parsing tables. This means rule $a$, with dot at $b$ (all starting at 0) > data Lr0Item = Lr0 {-#UNPACK#-}!Int {-#UNPACK#-}!Int -- (rule, dot) -> deriving (Eq,Ord - -#ifdef DEBUG - -> ,Show - -#endif - -> ) +> deriving (Eq,Ord,Show) > data Lr1Item = Lr1 {-#UNPACK#-}!Int {-#UNPACK#-}!Int NameSet -- (rule, dot, lookahead) diff --git a/src/Main.lhs b/src/Main.lhs index 20c5eb62..1db809cc 100644 --- a/src/Main.lhs +++ b/src/Main.lhs @@ -21,6 +21,9 @@ Path settings auto-generated by Cabal: > import ProduceCode (produceParser) > import ProduceGLRCode > import Info (genInfoFile) +> import qualified RADCodeGen as RAD +> import qualified RADCodeGen_LALR as LALR +> import RADStateGen > import Target (Target(..)) > import System.Console.GetOpt > import Control.Monad ( liftM ) @@ -30,6 +33,7 @@ Path settings auto-generated by Cabal: > import System.IO > import Data.Array( assocs, elems, (!) ) > import Data.List( nub, isSuffixOf ) +> import Data.Maybe (fromMaybe) > import Data.Version ( showVersion ) #if defined(mingw32_HOST_OS) > import Foreign.Marshal.Array @@ -202,6 +206,84 @@ Add any special options or imports required by the parsing machinery. > ) > in +------------------------------------- +Branch off to continuation-based LALR parser production: + +> getForallMatch cli >>= \forallMatch -> +> let showTypes = OptCB_ShowTypes `elem` cli || rank2Types +> showComments = OptCB_ShowComments `elem` cli +> rank2Types = maybe False (return True) forallMatch +> match = fromMaybe "" forallMatch +> rulesTupleBased = OptCB_RAD_TupleBased `elem` cli +> in + +> if OptCB_LALR `elem` cli +> then +> let (isMonad, _, parserType, _, _) = monad g +> +> ptype = case (Grammar.lexer g, isMonad) of +> (Nothing, False) -> LALR.Normal +> (Nothing, True) -> LALR.Monad +> (Just _, False) -> error "%lexer without %monad not supported in RAD" +> (Just _, True) -> LALR.MonadLexer +> +> options = LALR.GenOptions { +> LALR.ptype = ptype, +> LALR.wrapperType = if parserType == "Parser" then "HappyP" else "Parser", +> LALR.errorTokenType = "ErrorToken", +> LALR.header = fromMaybe "" hd, +> LALR.footer = fromMaybe "" tl, +> LALR.showTypes = showTypes, +> LALR.comments = showComments, +> LALR.rank2Types = rank2Types, +> LALR.forallMatch = match +> } +> +> lalrStates = generateLALRStates g action goto items2 +> in +> LALR.genCode options g lalrStates action goto >>= +> (if outfilename == "-" then putStr else writeFile outfilename) +> +> else + +Branch off to continuation-based RAD parser production: + +> if OptCB_RAD `elem` cli || OptCB_RAD_TupleBased `elem` cli +> then +> +> let (isMonad, _, parserType, _, _) = monad g +> +> optimize = OptCB_RAD_Optimizations `elem` cli +> +> ptype = case (Grammar.lexer g, isMonad) of +> (Nothing, False) -> RAD.Normal +> (Nothing, True) -> RAD.Monad +> (Just _, False) -> error "%lexer without %monad not supported in RAD" +> (Just _, True) -> RAD.MonadLexer +> +> options = RAD.GenOptions { +> RAD.ptype = ptype, +> RAD.wrapperType = if parserType == "Parser" then "HappyP" else "Parser", +> RAD.errorTokenType = "ErrorToken", +> RAD.header = fromMaybe "" hd, +> RAD.footer = fromMaybe "" tl, +> RAD.showTypes = showTypes, +> RAD.comments = showComments, +> RAD.rank2Types = rank2Types, +> RAD.rulesTupleBased = rulesTupleBased, +> RAD.forallMatch = match, +> RAD.optimize = optimize +> } +> +> lalrStates = generateLALRStates g action goto items2 +> in +> createXGrammar g lalrStates >>= \x -> +> generateRADStates x lalrStates unused_rules >>= \radStates -> +> RAD.genCode options x radStates action goto >>= +> (if outfilename == "-" then putStr else writeFile outfilename) +> +> else + %--------------------------------------- Branch off to GLR parser production @@ -403,6 +485,14 @@ The command line arguments. > | OptGLR > | OptGLR_Decode > | OptGLR_Filter +> +> | OptCB_RAD +> | OptCB_RAD_TupleBased +> | OptCB_RAD_Optimizations +> | OptCB_LALR +> | OptCB_ShowTypes +> | OptCB_ShowComments +> | OptCB_ForallMatch (Maybe String) > deriving Eq > argInfo :: [OptDescr CLIFlags] @@ -436,7 +526,21 @@ The command line arguments. > Option ['?'] ["help"] (NoArg DumpHelp) > "display this help and exit", > Option ['V','v'] ["version"] (NoArg DumpVersion) -- ToDo: -v is deprecated -> "output version information and exit" +> "output version information and exit", +> Option [] ["cb-rad"] (NoArg OptCB_RAD) +> "create a continuation-based Recursive Ascent-Descent parser. Not compatible with most other options", +> Option [] ["cb-rad-tuple"] (NoArg OptCB_RAD_TupleBased) +> "same as cb-rad, but uses tuples instead of continuations inside rule functions", +> Option [] ["optims"] (NoArg OptCB_RAD_Optimizations) +> "add optimizations such as eta-expansions and explicit rule-inlining to a RAD parser", +> Option [] ["cb-lalr"] (NoArg OptCB_LALR) +> "create a continuation-based LALR parser. Not compatible with most other options", +> Option [] ["types"] (NoArg OptCB_ShowTypes) +> "show function types for continuation-based parsers", +> Option [] ["comments"] (NoArg OptCB_ShowComments) +> "show comments for continuation-based parsers", +> Option [] ["forall"] (OptArg OptCB_ForallMatch "MATCH") +> "a string which is used to detect and handle higher-rank function types" #ifdef DEBUG @@ -572,6 +676,13 @@ Extract various command-line options. > [] -> return Nothing > f:fs -> return (Just (map toLower (last (f:fs)))) +> getForallMatch :: [CLIFlags] -> IO (Maybe String) +> getForallMatch cli +> = case [ s | (OptCB_ForallMatch s) <- cli ] of +> [] -> return Nothing +> [f] -> return f +> _many -> dieHappy "multiple --forall options\n" + > getCoerce :: Target -> [CLIFlags] -> IO Bool > getCoerce _target cli > = if OptUseCoercions `elem` cli diff --git a/src/RADCodeGen.hs b/src/RADCodeGen.hs new file mode 100644 index 00000000..feb65d6d --- /dev/null +++ b/src/RADCodeGen.hs @@ -0,0 +1,557 @@ +module RADCodeGen where + import Grammar + import LALR + import GenUtils (mapDollarDollar) + import RADTools (XGrammar(..), showItem, showProd, lhs, showRecognitionPoint, recognitionPoints, rhsAfterDot) + import RADStateGen + import Control.Monad + import Data.List + import Data.Maybe + import Data.Text (pack, unpack, replace) + import GHC.Arr ((!), indices) + + + data ParserType = Normal | Monad | MonadLexer deriving (Eq, Show) + + data GenOptions = GenOptions { + ptype :: ParserType, + + wrapperType :: String, -- e.g. "Parser" + errorTokenType :: String, -- e.g. "ErrorToken" + + showTypes :: Bool, + comments :: Bool, + + rank2Types :: Bool, -- when True, all functions (including goto functions) which use or enclose a higher-rank-function are annotated with an explicit type. + forallMatch :: String, -- the text which determines which types count as rank-2-types. + + rulesTupleBased :: Bool, -- actually, this doesn't produce good nor fast code. maybe drop this? + + optimize :: Bool, -- inline all rule functions and eta-expand all applications of local "g" functions + + header :: String, + footer :: String + } deriving Show + + mlex opts = ptype opts == MonadLexer + + raw = flip (.) _raw + + dotleft (Lr0 rule dot) = Lr0 rule (dot-1) + + hasNT state = elem (raw radType state) [Type1, Type2] + + replaceDollar a char = maybe a ($ char) (mapDollarDollar a) + + -------------------- GENCODE -------------------- + -- Generate the full code + genCode :: GenOptions -> XGrammar -> [RADState] -> ActionTable -> GotoTable -> IO String + genCode opts x states action goto = do + return $ newlines 3 [languageFeatures, header', entryPoints', definitions', rules', parseNTs', parseTerminals', states', actions', footer'] where + languageFeatures + | rank2Types opts = newline $ map extension ["RankNTypes", "ScopedTypeVariables"] + | otherwise = "" where + extension str = "{-# LANGUAGE " ++ str ++ " #-}" + + g = (RADTools.g x) + header' = header opts + entryPoints' = newlines 2 $ map (entryPoint opts g states) (starts g) + definitions' = definitions opts g + + rules' = newlines 2 $ map (genRule opts x) [0..prods] + parseNTs' = newlines 2 $ catMaybes $ map (genParseNT opts g states) (non_terminals g) + parseTerminals' = newlines 2 $ map (genParseTerminal opts g) (delete errorTok (terminals g)) + + states' = newlines 2 $ map (genState opts x) states + + actions' = newlines 2 $ map (genAction opts g) [0..prods] + prods = length (productions g) - 1 + footer' = footer opts + + + -------------------- ENTRYPOINT -------------------- + entryPoint :: GenOptions -> Grammar -> [RADState] -> (String, Name, Name, Bool) -> String + entryPoint opts g states (name, lhs, rhs, isPartial) = newline [typedecl, definition] where + typedecl + | showTypes opts = fromMaybe "" $ fmap (((name ++ " :: ") ++) . correctP) (symboltype opts g rhs) + | otherwise = "" + + correctP = if mlex opts then p else parser + + definition = case ptype opts of + Normal -> common ++ paren (checkEof ++ "const") + Monad -> common ++ paren (checkEof ++ "const . " ++ returnP) + MonadLexer -> common ++ paren (checkEof ++ "const . " ++ returnP) ++ " Nothing" + + checkEof + | isPartial = "" + | otherwise = "parse" ++ show (eof_term g) ++ " . " + + common = name ++ " = rule" ++ show prod ++ " " + + -- Rule LHS -> RHS + prod = fromJust $ find matches [0 .. length (productions g) - 1] where + matches i = matches' (lookupProdNo g i) + matches' (Production lhs' rhs' _ _) = lhs' == lhs && rhs' == [rhs] + + p a = p' ++ " " ++ a + parser a = wrapperType opts ++ " " ++ a + (_, _, p', _, returnP) = monad g + + + -------------------- DEFINITIONS -------------------- + -- Generate definitions such as wrappers, the parser type or more required functions and types + definitions :: GenOptions -> Grammar -> String + definitions opts g = case ptype opts of + Normal -> newlines 2 [parserDecl, errorToken] + Monad -> newlines 2 [parserDecl, errorToken, wrapThen] + MonadLexer -> newlines 2 [parserDecl, errorToken, wrapThen, repeatTok, wrapLexer, wrapError] + where + + -- type Parser r = [Token] -> P r + parserDecl = case ptype opts of + Normal -> "type " ++ parser "r" ++ " = [" ++ tokenT ++ "] -> r" + Monad -> "type " ++ parser "r" ++ " = [" ++ tokenT ++ "] -> " ++ p "r" + MonadLexer -> "type " ++ parser "r" ++ " = Maybe " ++ paren tokenT ++ " -> " ++ p "r" + + -- data ErrorToken = ErrorToken + errorToken = "data " ++ errorTokenT ++ " = " ++ errorTokenT + + -- thenWrapP :: P a -> (a -> Parser b) -> Parser b + -- thenWrapP a f ts = (thenP) a (flip f ts) + wrapThen = newline [typedecl, definition] where + name = "thenWrapP" + typedecl = name ++ " :: " ++ p "a" ++ " -> (a -> " ++ parser "b" ++ ") -> " ++ parser "b" + definition = name ++ " a f ts = " ++ paren thenP ++ " a (flip f ts)" + + -- repeatTok :: Token -> Parser a -> Parser a + -- repeatTok tok p _ = p (Just tok) + repeatTok = newline [typedecl, definition] where + name = "repeatTok" + typedecl = name ++ " :: " ++ tokenT ++ " -> " ++ parser "a" ++ " -> " ++ parser "a" + definition = name ++ " tok p _ = p (Just tok)" + + -- lexerWrapper :: (Token -> Parser a) -> Parser a + -- lexerWrapper cont Nothing = lexer (\tok -> cont tok Nothing) + -- lexerWrapper cont (Just tok) = cont tok Nothing + wrapLexer = newline [typedecl, line1, line2] where + name = "lexerWrapper" + typedecl = name ++ " :: " ++ paren (tokenT ++ " -> " ++ parser "a") ++ " -> " ++ parser "a" + line1 = name ++ " cont Nothing = " ++ lexer' ++ " (\\t -> cont t Nothing)" + line2 = name ++ " cont (Just tok) = cont tok Nothing" + + -- happyErrorWrapper :: Token -> Parser a + -- happyErrorWrapper t _ = happyError t + wrapError = newline [typedecl, definition] where + name = "happyErrorWrapper" + typedecl = name ++ " :: " ++ tokenT ++ " -> " ++ parser "a" + definition = name ++ " = const . " ++ happyError + + p a = p' ++ " " ++ a + parser a = wrapperType opts ++ " " ++ a + (_, _, p', thenP, _) = monad g + tokenT = token_type g + errorTokenT = errorTokenType opts + (Just (lexer', _)) = lexer g + happyError = fromMaybe "happyError" (error_handler g) + + + -------------------- GENSTATE ------------------- + -- Generate the code for a single state. + genState :: GenOptions -> XGrammar -> RADState -> String + genState opts x@XGrammar { RADTools.g = g } state + | isTrivialAccept = newline [comment, trivialTypedecl, trivialAcceptHeader] + | isTrivialAnnounce = newline [comment, trivialTypedecl, trivialAnnounceHeader] + | otherwise = newline [comment, typedecl, header, shifts'', announces'', accepts'', defaultAction'', gotos''] where + + hasNoActions = (null $ shifts' state) && (null $ accepts' state) && (null $ announces' state) && length (artCore state) == 1 + hasNoGotos = (null $ gotos' state) + isTrivialAccept = hasNoActions && hasNoGotos && (defaultAction' state == Accept') + isTrivialAnnounce = isAlwaysAnnounce && hasNoGotos + isAlwaysAnnounce = case defaultAction' state of + Announce' _ -> hasNoActions + _ -> False + + hasRank2Goto = any ((hasRank2Type opts g) . fst) (gotos' state) + hasRank2TypeSignature = any (hasRank2Item) (artCore state) + hasRank2Item item = any (hasRank2Type opts g) (rhsAfterDot g item) + + trivialTypedecl + | rank2Types opts && hasRank2Goto = fromMaybe "" (stateTypeSignature opts g True state) + | showTypes opts = fromMaybe "" (stateTypeSignature opts g False state) + | otherwise = "" + + trivialAcceptHeader = "state" ++ show (raw i state) ++ " = id" + + trivialAnnounceHeader = "state" ++ show (raw i state) ++ " = rule" ++ show rule where + Announce' rule = defaultAction' state + + comment + | comments opts = newlineMap "-- " (showItem g) (artCore state) + | otherwise = "" + + typedecl + | rank2Types opts && hasRank2Goto = fromMaybe "" (stateTypeSignature opts g True state) + | rank2Types opts && hasRank2TypeSignature = fromMaybe "" (stateTypeSignature opts g False state) + | showTypes opts = fromMaybe "" (stateTypeSignature opts g False state) + | otherwise = "" + + shifts'' = newlineMap " " shift (shifts' state) + announces'' = newlineMap " " announce (announces' state) + accepts'' = newlineMap " " accept (accepts' state) + gotos'' = where' ++ intercalate "\n" (map (" " ++) lines) where + lines = join (map goto (gotos' state)) + where' = if null (gotos' state) then "" else " where\n" + + header + | mlex opts = common ++ " = lexerWrapper $ \\t -> case t of" + | otherwise = common ++ " ts = case ts of" where + common = "state" ++ show (raw i state) ++ " " ++ headerKs + headerKs = unwords $ map (kNoEta "") (artCore state) + + shift (token, (state, i)) + | mlex opts = paren tok ++ " -> state" ++ show state ++ " " ++ kcontent + | otherwise = "t@" ++ paren tok ++ ":tr -> state" ++ show state ++ " " ++ kcontent ++ " tr" where + i' = map dotleft i + tok = replaceDollar rawToken (if wantsProjection then "v" else "_") + kcontent = unwords (map (paren . k x) i') where + x = if wantsProjection then " v" else " t" + rawToken = fromJust $ lookup token (token_specs g) + wantsProjection = "$$" == (rawToken \\ replaceDollar rawToken "") -- i.e. Tokens of form "TokenInt $$" + + announce (token, rule) + | mlex opts = paren tokMaybeEof ++ " -> repeatTok t $ rule" ++ show rule ++ " " ++ paren (k "" item) + | otherwise = if token == eof_term g then eofCase else normalCase + where + normalCase = paren tok ++ ":tr -> rule" ++ show rule ++ " " ++ paren (k "" item) ++ " ts" + eofCase = "[] -> rule" ++ show rule ++ " " ++ paren (k "" item) ++ " ts" + + tokMaybeEof = if token == eof_term g then eof else tok + Just (_, eof) = lexer g + + item = fromJust $ find matches (raw completion' state) where -- the item in the completion corresponding to (i.e. of the) rule which is announced. The dot must be at the recognition point. + matches (Lr0 rule' dot) = rule == rule' && (recognitionPoints x) !! rule == dot + tok = replaceDollar rawToken "_" + rawToken = fromJust $ lookup token (token_specs g) + + accept token +   | mlex opts = paren tokMaybeEof ++ " -> repeatTok t $ " ++ k' + | otherwise = if token == eof_term g then eofCase else normalCase + where + normalCase = "t@" ++ paren tok ++ ":tr -> " ++ k' ++ " ts" + eofCase = "[] -> " ++ k' ++ " ts" + + tokMaybeEof = if token == eof_term g then eof else tok + tok = removeDollar $ fromJust (lookup token (token_specs g)) + Just (_, eof) = lexer g + + removeDollar a = maybe a ($ "_") (mapDollarDollar a) + k' = k "" (head (artCore state)) + + goto (nt, (state, i)) + | hasRank2Type opts g nt = catMaybes [gototype, goto] + | otherwise = catMaybes [goto] + where + i' = map dotleft i + gototype = case symboltype opts g nt of + Just t -> Just $ "g" ++ show nt ++ " :: " ++ t ++ " -> " ++ paren outtype + Nothing -> Nothing + goto + | optimize opts = Just $ "g" ++ show nt ++ " x " ++ ts ++ " = state" ++ show state ++ " " ++ unwords (map (paren . k "x") i') ++ " " ++ ts + | otherwise = Just $ "g" ++ show nt ++ " x = state" ++ show state ++ " " ++ unwords (map (paren . k "x") i') + outtype = wrapperType opts ++ " r" + ts = if mlex opts then "la" else "ts" + + defaultAction'' = " " ++ case defaultAction' state of + ErrorShift' state -> defaultErrorShift state + Announce' rule -> defaultAnnounce rule + Accept' -> defaultAccept + Error' -> defaultError + + defaultErrorShift toState + | mlex opts = "_ -> repeatTok t $ state" ++ show toState ++ " " ++ paren (k "ErrorToken" item) + | otherwise = "_ -> state" ++ show toState ++ " " ++ paren (k "ErrorToken" item) ++ " ts" where + item = head $ hdiv (raw completion' state) errorTok g + + defaultAnnounce rule + | mlex opts = "_ -> repeatTok t $ rule" ++ show rule ++ " " ++ paren (k "" item) + | otherwise = "_ -> rule" ++ show rule ++ " " ++ paren (k "" item) ++ " ts" where + item = fromJust $ find matches (raw completion' state) where -- the item in the completion corresponding to (i.e. of the) rule which is announced. The dot must be at the recognition point. + matches (Lr0 rule' dot) = rule == rule' && (recognitionPoints x) !! rule == dot + + defaultAccept + | mlex opts = "_ -> repeatTok t $ " ++ k' + | otherwise = "_ -> " ++ k' ++ " ts" where + k' = k "" (head (artCore state)) + + defaultError + | mlex opts = "_ -> happyErrorWrapper t" + | otherwise = "_ -> " ++ happyError ++ " ts" where + happyError = fromMaybe "happyError" (error_handler g) + + k = if optimize opts then kEta else kNoEta + + -- Produce "k1 x" or "action5 g4 x", or without x: + -- "k1" or "action5 g4" + kNoEta x item@(Lr0 rule dot) = maybe noCore core $ elemIndex item (artCore state) where + core idx + | (length (artCore state) == 1) = "k " ++ x + | otherwise = "k" ++ (show (idx + 1)) ++ " " ++ x + noCore = "action" ++ show rule ++ " g" ++ show (lhs g item) ++ " " ++ x + + -- Produce "\z -> k1 x z" or "action5 (\y z -> g4 y z) x", or without x: + -- "\z -> k1 z" or "action5 (\y z -> g4 y z)" + kEta x item@(Lr0 rule dot) = maybe noCore core $ elemIndex item (artCore state) where + core idx + | (length (artCore state) == 1) = if kArity idx == 0 then "k " ++ x else "\\z -> k " ++ x ++ " z" + | otherwise = if kArity idx == 0 then "k" ++ (show (idx + 1)) ++ " " ++ x else "\\z -> k" ++ (show (idx + 1)) ++ " " ++ x ++ " z" + noCore = "action" ++ show rule ++ " (\\y z -> g" ++ show (lhs g item) ++ " y z) " ++ x + kArity idx = kArity' idx - (if null x then 0 else 1) + kArity' idx = length $ rhsAfterDot g ((artCore state) !! idx) + + -- Create the type signature for a state. + stateTypeSignature :: GenOptions -> Grammar -> Bool -> RADState -> Maybe String + stateTypeSignature opts g forall_r state = do + let start = "state" ++ show (raw i state) ++ " :: " ++ forall + components <- mapM component (artCore state) + return $ start ++ intercalate " -> " (map paren (components ++ [outtype])) + where + component item@(Lr0 rule dot) + | rule < 0 = if dot == 0 then component' [-rule] else component' [] -- artifical NT + | rule >= 0 = component' (rhsAfterDot g item) + component' rhs = fmap (intercalate " -> " . (++ [outtype])) (mapM (symboltype opts g) rhs) + outtype = wrapperType opts ++ " r" + forall = if forall_r then "forall r. " else "" + + + -------------------- GENACTION -------------------- + -- Create the code for a semantic action, i.e. a reduce action. + genAction :: GenOptions -> Grammar -> Int -> String + genAction opts g i = newline [comment, typedecl, code] where + prod@(Production lhs' rhs' _ _) = lookupProdNo g i + + comment + | comments opts = "-- " ++ showProd g i + | otherwise = "" + + typedecl + | showTypes opts || rank2Types opts = typedecl' -- some actions (not further specified) need to be explicitly typed in order for rank-n-types to work + | otherwise = "" + where + typedecl' = fromMaybe "" $ fmap (("action" ++ show i ++ " :: ") ++) (actionTypedecl opts g i) + + code = header ++ (if isMonadic then monadicCode else normalCode) + (customCode, isMonadic) = customProdCode prod + header = "action" ++ show i ++ " g " ++ unwords (map v [1..length rhs']) ++ " = " + normalCode = "g " ++ paren customCode + monadicCode = paren customCode ++ " `thenWrapP` g" + v n = "v" ++ show n + + -- Generate the type signature of a semantic action function. + actionTypedecl :: GenOptions -> Grammar -> Int -> Maybe String + actionTypedecl opts g i = do + lhstype <- symboltype opts g lhs' + let lhs = paren $ intercalate " -> " $ [lhstype, outtype] + rhstypes <- mapM (symboltype opts g) rhs' + let rhs = intercalate " -> " $ rhstypes ++ [outtype] + return (lhs ++ " -> " ++ rhs) + where + Production lhs' rhs' _ _ = lookupProdNo g i + outtype = wrapperType opts ++ " r" + + -- Read and translate the raw action code supplied by the user. Also return whether the action is monadic or not. + customProdCode :: Production -> (String, Bool) + customProdCode (Production _ _ (code, _) _) = case code of + '%':'%':_ -> error "'{%%' actions not supported" + '%':'^':_ -> error "'{%^' actions not supported" + '%':rest -> (adapt rest, True) + _ -> (adapt code, False) + where + adapt code + | code == "no code" = v 1 + | otherwise = replaceHappyVars code + v n = "v" ++ show n + replaceHappyVars = unpack . replace (pack "happy_var_") (pack "v") . pack + + + -------------------- PARSETERMINALS / PARSENTS ------------------- + -- Generate the code for parsing a single nonterminal. + genParseNT :: GenOptions -> Grammar -> [RADState] -> Int -> Maybe String + genParseNT opts g states token = do + state <- find (\s -> (raw radType s == Type1 && raw nt s == token)) states + let line = "parse" ++ show token ++ " = state" ++ show (raw i state) + return (newline [comment, line]) where + comment + | comments opts = "-- " ++ (token_names g) ! token + | otherwise = "" + + + -- Generate the code for parsing a single terminal. + genParseTerminal :: GenOptions -> Grammar -> Int -> String + genParseTerminal opts g token = newline [comment, typedecl, code] where + specialEof = ptype opts /= MonadLexer && token == eof_term g + + comment + | comments opts = "-- " ++ (token_names g) ! token + | otherwise = "" + + typedecl + | specialEof && (showTypes opts || rank2Types opts) = typedecl'' + | showTypes opts || rank2Types opts = typedecl' + | otherwise = "" + where + typedecl' = maybe "" (\token' -> "parse" ++ show token ++ " :: " ++ paren (token' ++ " -> " ++ parser) ++ " -> " ++ parser) token' + typedecl'' = "parse" ++ show token ++ " :: " ++ parser ++ " -> " ++ parser + token' = symboltype opts g token + parser = wrapperType opts ++ " r" + + code + | specialEof = newline $ [lineEof1, line2] + | mlex opts = newline $ [lineLex1, lineLex2, lineLex3] + | otherwise = newline $ [line1, line2] + where + lineEof1 = "parse" ++ show token ++ " k [] = k []" + line1 = "parse" ++ show token ++ " k (t@" ++ paren tok ++ ":tr) = k " ++ t ++ " tr" + line2 = "parse" ++ show token ++ " k ts = " ++ happyError ++ " ts" + happyError = fromMaybe "happyError" (error_handler g) + + rawToken = fromJust $ lookup token (token_specs g) + tok = replaceDollar rawToken (if wantsProjection then "v" else "_") + t = if wantsProjection then "v" else "t" + wantsProjection = "$$" == (rawToken \\ replaceDollar rawToken "") -- i.e. Tokens of form "TokenInt $$" + + lineLex1 = "parse" ++ show token ++ " k = lexerWrapper $ \\t -> case t of" + + lineLex2 + | token == eof_term g = " " ++ paren eof ++ " -> k" + | otherwise = " " ++ paren tok ++ " -> k " ++ t + where + Just (_, eof) = lexer g + + lineLex3 = " _ -> happyErrorWrapper t" + + + -------------------- GENRULE ------------------- + -- Generate the code for a rule. + genRule :: GenOptions -> XGrammar -> Int -> String + genRule opts x rule + | isTrivial = newline [inline, comment, typedecl, trivialCode] + | otherwise = newline [inline, comment, typedecl, code] + where + + recog = (recognitionPoints x) !! rule + rhsAfterDot' = rhsAfterDot (RADTools.g x) (Lr0 rule recog) + isTrivial = length rhsAfterDot' <= 1 + + inline + | optimize opts = "{-# INLINE rule" ++ show rule ++ " #-}" + | otherwise = "" + + comment + | comments opts = "-- " ++ showRecognitionPoint x rule + | otherwise = "" + + typedecl + | showTypes opts || rank2Types opts = typedecl' + | otherwise = "" + where + typedecl' = fromMaybe "" $ fmap (("rule" ++ show rule ++ " :: ") ++) (ruleTypedecl opts x (rank2Types opts) rule) + + code = case (rulesTupleBased opts, ptype opts) of + (True, Normal) -> tupleBasedCodeNormal + (True, Monad) -> error "TODO" + (True, MonadLexer) -> tupleBasedCodeLexer + (False, Normal) -> continuationBasedCodeNormal + (False, Monad) -> error "TODO" + (False, MonadLexer) -> continuationBasedCodeLexer + + -- There are 3 types how we code generate the code: + -- 1. trivial: 0 or 1 symbols are parsed + -- 2. continuation-based, with optional type annotations for the continuations + -- 3. tuple-based + + trivialCode = case rhsAfterDot' of + [x] -> "rule" ++ show rule ++ " = parse" ++ show x + [] -> "rule" ++ show rule ++ " = id" + + tupleBasedCodeNormal = newline $ firstLine:otherLines where + firstLine = "rule" ++ show rule ++ " k ts0 = " ++ fullk ++ " where" + fullk = "k " ++ (unwords $ map (\x -> "v" ++ show x) [1..length otherLines]) ++ " ts" ++ show (length otherLines) + otherLines = map (uncurry toLine) (zip rhsAfterDot' [1..]) + toLine tok i = " (v" ++ show i ++ ", ts" ++ show i ++ ") = parse" ++ show tok ++ " (,) ts" ++ show (i-1) + + tupleBasedCodeLexer = newline $ firstLine : otherLines ++ [finalLine] where + firstLine = "rule" ++ show rule ++ " k la0 = do" + fullk = "k " ++ (unwords $ map (\x -> "v" ++ show x) [1..length otherLines]) ++ " la" ++ show (length otherLines) + otherLines = map (uncurry toLine) (zip rhsAfterDot' [1..]) + toLine tok i = " (v" ++ show i ++ ", la" ++ show i ++ ") <- parse" ++ show tok ++ " (\\a b -> return (a, b)) la" ++ show (i-1) + finalLine = " " ++ fullk + + continuationBasedCodeNormal = continuationBasedCode "ts" + continuationBasedCodeLexer = continuationBasedCode "la" + continuationBasedCode ts + | rank2Types opts = newline $ firstLine : (blend lineTypes (otherLines ++ [finalLine])) + | otherwise = newline $ firstLine : (otherLines ++ [finalLine]) + where + firstLine = "rule" ++ show rule ++ " k " ++ ts ++ " = parse" ++ show (head rhsAfterDot') ++ " cont1 " ++ ts ++ " where" + otherLines = map (uncurry toLine) (zip (tail rhsAfterDot') [1..]) + toLine tok i = " cont" ++ show i ++ " " ++ vs i ++ " " ++ ts ++ " = parse" ++ show tok ++ " " ++ paren ("cont" ++ show (i+1) ++ " " ++ vs i) ++ " " ++ ts + vs i = unwords (map (\v -> "v" ++ show v) [1..i]) + finalLine = " cont" ++ show n ++ " " ++ vs n ++ " " ++ ts ++ " = k " ++ vs n ++ " " ++ ts where + n = length rhsAfterDot' + + lineTypes = map toType [1..n] + toType i = fromMaybe "" (toType' i) + toType' i = do + lhs <- mapM (symboltype opts (RADTools.g x)) (take i rhsAfterDot') + let lhsType = intercalate " -> " (lhs ++ [parser]) + return $ " cont" ++ show i ++ " :: " ++ lhsType where + parser = paren $ wrapperType opts ++ " r" + + blend (x:xs) ys = x:(blend ys xs) + blend _ _ = [] + + -- Generate the type signature of a rule function. + ruleTypedecl :: GenOptions -> XGrammar -> Bool -> Int -> Maybe String + ruleTypedecl opts x forall_r rule = do + let g = RADTools.g x + let recog = (recognitionPoints x) !! rule + let lhs' = rhsAfterDot g (Lr0 rule recog) + lhstypes <- mapM (symboltype opts g) lhs' + let lhs = forall ++ (paren $ intercalate " -> " $ lhstypes ++ [parser]) + return (lhs ++ " -> " ++ parser) + where + forall = if forall_r then "forall r. " else "" + parser = paren $ wrapperType opts ++ " r" + + + -------------------- TOOLS -------------------- + + -- Insert newlines between the strings; ignore empty strings + newlines :: Int -> [String] -> String + newlines n = intercalate (replicate n '\n') . filter (not . null) + + newline = newlines 1 + + newlineMap prefix f x = newlines 1 $ map ((prefix ++) . f) x + + paren a = "(" ++ a ++ ")" + + hasRank2Type opts g nt = rank2Types opts && case symboltype opts g nt of + Just t -> isInfixOf (forallMatch opts) t + Nothing -> False + + symboltype opts g symbol + | symbol == errorTok = Just (process $ errorTokenType opts) + | symbol == (eof_term g) = Nothing + | elem symbol (non_terminals g) = fmap process $ join (maybelookup (types g) symbol) + | wantsProjection = Nothing -- we don't know the type of the projection + | otherwise = Just (process $ token_type g) + where + process = remNewlines . paren where + remNewlines = map replace + replace '\n' = ' ' + replace x = x + maybelookup arr i = if elem i (indices arr) then Just (arr ! i) else Nothing + wantsProjection = "$$" == (rawToken \\ replaceDollar rawToken "") -- i.e. Tokens of form "TokenInt $$" + rawToken = fromJust $ lookup symbol (token_specs g) \ No newline at end of file diff --git a/src/RADCodeGen_LALR.hs b/src/RADCodeGen_LALR.hs new file mode 100644 index 00000000..61be9a20 --- /dev/null +++ b/src/RADCodeGen_LALR.hs @@ -0,0 +1,362 @@ +module RADCodeGen_LALR where + import Grammar + import LALR + import GenUtils + import RADTools (showItem, showProd, lhs, prod) + import RADStateGen + import Control.Monad + import Data.List + import Data.Maybe + import Data.Text (pack, unpack, replace) + import GHC.Arr ((!), indices) + + + data ParserType = Normal | Monad | MonadLexer deriving (Eq, Show) + + data GenOptions = GenOptions { + ptype :: ParserType, + + wrapperType :: String, -- e.g. "Parser" + errorTokenType :: String, -- e.g. "ErrorToken" + + showTypes :: Bool, + comments :: Bool, + + rank2Types :: Bool, -- when True, all functions (including goto functions) which use or enclose a higher-rank-function are annotated with an explicit type. + forallMatch :: String, -- the text which determines which types count as rank-2-types. + + header :: String, + footer :: String + } deriving Show + + mlex opts = ptype opts == MonadLexer + + dotleft (Lr0 rule dot) = Lr0 rule (dot-1) + + -------------------- GENCODE -------------------- + -- Generate the full code + genCode :: GenOptions -> Grammar -> [LALRState] -> ActionTable -> GotoTable -> IO String + genCode opts g states action goto = do + return $ newlines 3 [languageFeatures, header', entryPoints', definitions', states', actions', footer'] where + languageFeatures + | rank2Types opts = newline $ map extension ["RankNTypes", "ScopedTypeVariables"] + | otherwise = "" where + extension str = "{-# LANGUAGE " ++ str ++ " #-}" + + header' = header opts + entryPoints' = newlines 2 $ map (entryPoint opts g states) (starts g) + definitions' = definitions opts g + + states' = newlines 2 $ map (genState opts g) states + + actions' = newlines 2 $ map (genAction opts g) [1..prods] + prods = length (productions g) - 1 + footer' = footer opts + + + -------------------- ENTRYPOINT -------------------- + entryPoint :: GenOptions -> Grammar -> [LALRState] -> (String, Name, Name, Bool) -> String + entryPoint opts g states (name, lhs, rhs, isPartial) + | isPartial = newline [typedecl, definition] + | otherwise = newline [typedecl, definition, parseEof] + where + typedecl + | showTypes opts = fromMaybe "" $ fmap (((name ++ " :: ") ++) . correctP) (symboltype opts g rhs) + | otherwise = "" + + correctP = if mlex opts then p else parser + + definition = case ptype opts of + Normal -> common ++ paren (checkEof ++ "const") ++ maybeWhere + Monad -> common ++ paren (checkEof ++ "const . " ++ returnP) ++ maybeWhere + MonadLexer -> common ++ paren (checkEof ++ "const . " ++ returnP) ++ " []" ++ maybeWhere + + common = name ++ " = state" ++ show i ++ " " + + -- After finishing, eof must be parsed. This is because the accept-state may accept per default, which means eof still has to be verified. For partial parsers, this is not the case. + maybeWhere = if isPartial then "" else " where" + checkEof = if isPartial then "" else "parseEof . " + parseEof + | mlex opts = newline [lex1, lex2, lex3] + | otherwise = newline [normal1, normal2] + where + lex1 = " parseEof k = lexerWrapper $ \\t -> case t of" + lex2 = " " ++ paren eof ++ " -> k" + lex3 = " _ -> happyErrorWrapper t" + Just (_, eof) = lexer g + normal1 = " parseEof k [] = k []" + normal2 = " parseEof k ts = " ++ happyError ++ " ts" + happyError = fromMaybe "happyError" (error_handler g) + + -- Rule LHS -> RHS + prod = fromJust $ find matches [0 .. length (productions g) - 1] where + matches i = matches' (lookupProdNo g i) + matches' (Production lhs' rhs' _ _) = lhs' == lhs && rhs' == [rhs] + + -- State with item LHS -> . RHS + state = fromJust $ find (matches . coreItems) states where -- state with item LHS -> . RHS + matches items = elem (Lr0 prod 0) items + i = fromJust $ elemIndex state states + + + p a = p' ++ " " ++ a + parser a = wrapperType opts ++ " " ++ a + (_, _, p', _, returnP) = monad g + + + -------------------- DEFINITIONS -------------------- + -- Generate definitions such as wrappers, the parser type or more required functions and types + definitions :: GenOptions -> Grammar -> String + definitions opts g = case ptype opts of + Normal -> newlines 2 [parserDecl, errorToken] + Monad -> newlines 2 [parserDecl, errorToken, wrapThen] + MonadLexer -> newlines 2 [parserDecl, errorToken, wrapThen, repeatTok, wrapLexer, wrapError] + where + + -- type Parser r = [Token] -> P r + parserDecl + | ptype opts == Normal = "type " ++ parser "r" ++ " = [" ++ tokenT ++ "] -> r" + | otherwise = "type " ++ parser "r" ++ " = [" ++ tokenT ++ "] -> " ++ p "r" + + -- data ErrorToken = ErrorToken + errorToken = "data " ++ errorTokenT ++ " = " ++ errorTokenT + + -- thenWrapP :: P a -> (a -> Parser b) -> Parser b + -- thenWrapP a f ts = (thenP) a (flip f ts) + wrapThen = newline [typedecl, definition] where + name = "thenWrapP" + typedecl = name ++ " :: " ++ p "a" ++ " -> (a -> " ++ parser "b" ++ ") -> " ++ parser "b" + definition = name ++ " a f ts = " ++ paren thenP ++ " a (flip f ts)" + + -- repeatTok :: Token -> Parser a -> Parser a + -- repeatTok tok p = \cur -> p (tok:cur) + repeatTok = newline [typedecl, definition] where + name = "repeatTok" + typedecl = name ++ " :: " ++ tokenT ++ " -> " ++ parser "a" ++ " -> " ++ parser "a" + definition = name ++ " tok p = \\cur -> p (tok:cur)" + + -- lexerWrapper :: (Token -> Parser a) -> Parser a + -- lexerWrapper cont [] = lexer (\tok -> cont tok []) + -- lexerWrapper cont (tok:toks) = cont tok toks + wrapLexer = newline [typedecl, line1, line2] where + name = "lexerWrapper" + typedecl = name ++ " :: " ++ paren (tokenT ++ " -> " ++ parser "a") ++ " -> " ++ parser "a" + line1 = name ++ " cont [] = " ++ lexer' ++ " (\\t -> cont t [])" + line2 = name ++ " cont (t:ts) = cont t ts" + + -- happyErrorWrapper :: Token -> Parser a + -- happyErrorWrapper t _ = happyError t + wrapError = newline [typedecl, definition] where + name = "happyErrorWrapper" + typedecl = name ++ " :: " ++ tokenT ++ " -> " ++ parser "a" + definition = name ++ " = const . " ++ happyError + + p a = p' ++ " " ++ a + parser a = wrapperType opts ++ " " ++ a + (_, _, p', thenP, _) = monad g + tokenT = token_type g + errorTokenT = errorTokenType opts + (Just (lexer', _)) = lexer g + happyError = fromMaybe "happyError" (error_handler g) + + + -------------------- GENSTATE ------------------- + -- Generate the code for a single state. + genState :: GenOptions -> Grammar -> LALRState -> String + genState opts g state + | isTrivial = newline [comment, trivialTypedecl, trivialHeader] + | otherwise = newline [comment, typedecl, header, shifts', reduces', defaultAction', gotos'] where + + isTrivial = (length (coreItems state) == 1) && (null $ shifts state) && (null $ gotos state) && (null $ reduces state) && isReduce (defaultAction state) where + isReduce (Reduce _) = True + isReduce _ = False + + hasRank2Goto = any ((hasRank2Type opts g) . fst) (gotos state) + hasRank2TypeSignature = any (hasRank2Item) (coreItems state) + hasRank2Item item = any (hasRank2Type opts g) (rhsAfterDot g item) + + trivialTypedecl + | rank2Types opts && hasRank2Goto = fromMaybe "" (stateTypeSignature opts g True state) + | otherwise = "" + + trivialHeader = "state" ++ show (index state) ++ " = id" + + comment + | comments opts = newlineMap "-- " (showItem g) (coreItems state) + | otherwise = "" + + typedecl + | rank2Types opts && hasRank2Goto = fromMaybe "" (stateTypeSignature opts g True state) + | rank2Types opts && hasRank2TypeSignature = fromMaybe "" (stateTypeSignature opts g False state) + | showTypes opts = fromMaybe "" (stateTypeSignature opts g False state) + | otherwise = "" + + shifts' = newlineMap " " shift (shifts state) + reduces' = newlineMap " " reduce (reduces state) + gotos' = where' ++ intercalate "\n" (map (" " ++) lines) where + lines = join (map goto (gotos state)) + where' = if null (gotos state) then "" else " where\n" + + header + | mlex opts = common ++ " = lexerWrapper $ \\t -> case t of" + | otherwise = common ++ " ts = case ts of" where + common = "state" ++ show (index state) ++ " " ++ headerKs + headerKs = unwords $ map k (coreItems state) + + shift (token, (state, i)) + | mlex opts = paren tok ++ " -> state" ++ show state ++ " " ++ kcontent + | otherwise = "t@" ++ paren tok ++ ":tr -> state" ++ show state ++ " " ++ kcontent ++ " tr" where + i' = map dotleft i + tok = replaceDollar rawToken (if wantsProjection then "v" else "_") + replaceDollar a char = maybe a ($ char) (mapDollarDollar a) + kcontent = unwords (map (paren . (++ x) . k) i') where + x = if wantsProjection then " v" else " t" + rawToken = fromJust $ lookup token (token_specs g) + wantsProjection = "$$" == (rawToken \\ replaceDollar rawToken "") -- i.e. Tokens of form "TokenInt $$" + + reduce (token, rule) +   | mlex opts = paren tokMaybeEof ++ " -> repeatTok t $ " ++ k' + | otherwise = if token == eof_term g then eofCase else normalCase + where + normalCase = "t@" ++ paren tok ++ ":tr -> " ++ k' ++ " ts" + eofCase = "[] -> " ++ k' ++ " ts" + + tokMaybeEof = if token == eof_term g then eof else tok + tok = removeDollar $ fromJust (lookup token (token_specs g)) + Just (_, eof) = lexer g + + removeDollar a = maybe a ($ "_") (mapDollarDollar a) + k' = let dot = length (rhs (lookupProdNo g rule)) in k (Lr0 rule dot) + + goto (nt, (state, i)) + | hasRank2Type opts g nt = catMaybes [gototype, goto] + | otherwise = catMaybes [goto] + where + i' = map dotleft i + gototype = case symboltype opts g nt of + Just t -> Just $ "g" ++ show nt ++ " :: " ++ t ++ " -> " ++ paren outtype + Nothing -> Nothing + goto = Just $ "g" ++ show nt ++ " x = state" ++ show state ++ " " ++ unwords (map (paren . (++ " x") . k) i') + outtype = wrapperType opts ++ " r" + + defaultAction' = " " ++ case defaultAction state of + ErrorShift state -> defaultErrorShift state + Reduce rule -> defaultReduce rule + Error -> defaultError + + defaultErrorShift toState + | mlex opts = "_ -> repeatTok t $ state" ++ show toState ++ " " ++ paren (k item ++ " ErrorToken") + | otherwise = "_ -> state" ++ show toState ++ " " ++ paren (k item ++ " ErrorToken") ++ " ts" where + item = head $ hdiv (completionItems state) errorTok g + + defaultReduce rule + | mlex opts = "_ -> repeatTok t $ " ++ k' + | otherwise = "_ -> " ++ k' ++ " ts" where + k' = let dot = length (rhs (lookupProdNo g rule)) in k (Lr0 rule dot) + + defaultError + | mlex opts = "_ -> happyErrorWrapper t" + | otherwise = "_ -> " ++ happyError ++ " ts" where + happyError = fromMaybe "happyError" (error_handler g) + + k item@(Lr0 rule dot) = maybe noCore core $ elemIndex item (coreItems state) where + core idx + | (length (coreItems state) == 1) = "k" + | otherwise = "k" ++ (show (idx + 1)) + noCore = "action" ++ show rule ++ " g" ++ show (lhs g item) + + -- Create the type signature for a state. + stateTypeSignature :: GenOptions -> Grammar -> Bool -> LALRState -> Maybe String + stateTypeSignature opts g forall_r state = do + let start = "state" ++ show (index state) ++ " :: " ++ forall + components <- mapM component (coreItems state) + return $ start ++ intercalate " -> " (map paren (components ++ [outtype])) + where + component item = fmap (intercalate " -> " . (++ [outtype])) (mapM (symboltype opts g) (rhsAfterDot g item)) + outtype = wrapperType opts ++ " r" + forall = if forall_r then "forall r. " else "" + + + -------------------- GENACTION -------------------- + -- Create the code for a semantic action, i.e. a reduce action. + genAction :: GenOptions -> Grammar -> Int -> String + genAction opts g i = newline [comment, typedecl, code] where + prod@(Production lhs' rhs' _ _) = lookupProdNo g i + + comment + | comments opts = "-- " ++ showProd g i + | otherwise = "" + + typedecl + | showTypes opts || rank2Types opts = typedecl' -- some actions (not further specified) need to be explicitly typed in order for rank-n-types to work + | otherwise = "" + where + typedecl' = fromMaybe "" $ fmap (("action" ++ show i ++ " :: ") ++) (actionTypedecl opts g i) + + code = header ++ (if isMonadic then monadicCode else normalCode) + (customCode, isMonadic) = customProdCode prod + header = "action" ++ show i ++ " g " ++ unwords (map v [1..length rhs']) ++ " = " + normalCode = "g " ++ paren customCode + monadicCode = paren customCode ++ " `thenWrapP` g" + v n = "v" ++ show n + + -- Generate the type signature of a semantic action function. + actionTypedecl :: GenOptions -> Grammar -> Int -> Maybe String + actionTypedecl opts g i = do + lhstype <- symboltype opts g lhs' + let lhs = paren $ intercalate " -> " $ [lhstype, outtype] + rhstypes <- mapM (symboltype opts g) rhs' + let rhs = intercalate " -> " $ rhstypes ++ [outtype] + return (lhs ++ " -> " ++ rhs) + where + Production lhs' rhs' _ _ = lookupProdNo g i + outtype = wrapperType opts ++ " r" + + -- Read and translate the raw action code supplied by the user. Also return whether the action is monadic or not. + customProdCode :: Production -> (String, Bool) + customProdCode (Production _ _ (code, _) _) = case code of + '%':'%':_ -> error "'{%%' actions not supported" + '%':'^':_ -> error "'{%^' actions not supported" + '%':rest -> (adapt rest, True) + _ -> (adapt code, False) + where + adapt code + | code == "no code" = v 1 + | otherwise = replaceHappyVars code + v n = "v" ++ show n + replaceHappyVars = unpack . replace (pack "happy_var_") (pack "v") . pack + + + -------------------- TOOLS -------------------- + + -- Insert newlines between the strings; ignore empty strings + newlines :: Int -> [String] -> String + newlines n = intercalate (replicate n '\n') . filter (not . null) + + newline = newlines 1 + + newlineMap prefix f x = newlines 1 $ map ((prefix ++) . f) x + + paren a = "(" ++ a ++ ")" + + rhsAfterDot g item@(Lr0 rule dot) = drop dot $ rhs (prod g item) + rhs (Production _ rhs _ _) = rhs + + hasRank2Type opts g nt = rank2Types opts && case symboltype opts g nt of + Just t -> isInfixOf (forallMatch opts) t + Nothing -> False + + symboltype opts g symbol + | symbol == errorTok = Just (process $ errorTokenType opts) + | elem symbol (non_terminals g) = fmap process $ join (maybelookup (types g) symbol) + | wantsProjection = Nothing -- we don't know the type of the projection + | otherwise = Just (process $ token_type g) + where + process = remNewlines . paren where + remNewlines = map replace + replace '\n' = ' ' + replace x = x + maybelookup arr i = if elem i (indices arr) then Just (arr ! i) else Nothing + wantsProjection = "$$" == (rawToken \\ replaceDollar rawToken "") -- i.e. Tokens of form "TokenInt $$" + rawToken = fromJust $ lookup symbol (token_specs g) + replaceDollar a char = maybe a ($ char) (mapDollarDollar a) \ No newline at end of file diff --git a/src/RADStateGen.hs b/src/RADStateGen.hs new file mode 100644 index 00000000..b714b0f3 --- /dev/null +++ b/src/RADStateGen.hs @@ -0,0 +1,638 @@ +module RADStateGen (generateLALRStates, generateRADStates, createXGrammar, artCore, hdiv, plus, RADType(..), RADState(..), LALRState(..), RawRADState(..), LALRDefaultAction(..), RADDefaultAction(..)) where + import Grammar + import Data.Graph.Dom + import First + import Follow + import NameSet (NameSet) + import Data.Graph + import Data.Set (Set, toList, fromList, elemAt) + import qualified Data.IntSet + import LALR + import RADTools (CompletedLr0State, Lr1State, XGrammar(..), complete, showItem, showProd, lhs, core, completion, prod, hasTokenAfterDot, tokenAfterDot, rhsLength', isInDirectCompletion, dotIsAtRightEnd, plus, hdiv, radCompletion, itemsStartingWith, plusRad, completeWithFunction, directCompletion, rhsAfterDot, showRecognitionPoint) + import Control.Monad + import Data.List + import Data.Maybe + import Data.Ord + import Data.Function (on) + import GHC.Arr ((!), assocs, listArray, Array(..)) + + + -- Types which are used (both) for LALR and RAD states: + type AcceptAction = (Name) -- On terminal t --> accept + type AnnounceAction = (Name, Int) -- On terminal t --> announce using rule i + type ReduceAction = (Name, Int) -- On terminal t --> reduce using rule i + type ShiftAction = (Name, (Int, [Lr0Item])) -- On terminal t --> goto state S with items I (= A -> B t . C) + type GotoAction = (Name, (Int, [Lr0Item])) -- Nonterminal X <-> goto to state S with items I (= A -> B X . C) such that: + -- g_X v = state_{i+X} (k_i v) + + -------------------- RAD STATE -------------------- + + data RADType = Type1 -- States with the item _ -> |- . NT (here a top-down parse is started) + | Type2 -- States with the item _ -> |- NT . (here a top-down parse is accepted) + | Type3 -- Normal states with no artificial item + deriving (Show, Eq) + + data RawRADState = RawRADState { + i :: Int, -- The final index that the completed RADState state will also have + comingFrom :: Int, + radType :: RADType, + state :: LALRState, + nt :: Name, -- The NT which is used by Type1 or Type2 states. Attention: undefined for Type3 states + core' :: [Lr0Item], + completion' :: [Lr0Item] -- contains core + } deriving (Eq +#ifdef DEBUG + , Show +#endif + ) + + createType1State :: XGrammar -> Name -> LALRState -> Int -> Int -> RawRADState + createType1State x nt state i comingFrom = RawRADState { i = i, radType = Type1, state = state, nt = nt, core' = [], completion' = radCompletion x (itemsStartingWith (g x) nt), comingFrom = comingFrom } -- has artifical core item _ -> |- . NT + + createType2State :: XGrammar -> Name -> LALRState -> [Lr0Item] -> Int -> Int -> RawRADState + createType2State x nt state core i comingFrom = RawRADState { i = i, radType = Type2, state = state, nt = nt, core' = core, completion' = radCompletion x core, comingFrom = comingFrom } -- has artifical core item _ -> |- NT . + + createType3State :: XGrammar -> LALRState -> [Lr0Item] -> Int -> Int -> RawRADState + createType3State x state core i comingFrom = RawRADState { i = i, radType = Type3, state = state, nt = undefined, core' = core, completion' = radCompletion x core, comingFrom = comingFrom } + + data RADState = RADState { + announces' :: [AnnounceAction], + gotos' :: [GotoAction], + shifts' :: [ShiftAction], + accepts' :: [AcceptAction], -- ONLY type2 states can have (and always do have) accept actions! + defaultAction' :: RADDefaultAction, + _raw :: RawRADState + } deriving (Eq +#ifdef DEBUG + , Show +#endif + ) + + data RADDefaultAction = ErrorShift' Int -- On errorToken (i.e. default), shift to state X + | Announce' Int -- Announce rule X + | Accept' -- Accept the NT. Only in type2-states + | Error' -- Call happyError + deriving (Eq, Show) + + -- The core PLUS the possible artificial item. + -- Artificial items have negative rule numbers - they look like this: "Lr0 (-4) 0" for the item "|- -> . (NT4)". + artCore :: RADState -> [Lr0Item] + artCore state = case (radType . _raw $ state) of + Type1 -> (Lr0 (-nt') 0) : core + Type2 -> (Lr0 (-nt') 1) : core + Type3 -> core + where + core = core' (_raw state) + nt' = nt (_raw state) + + showRadState :: XGrammar -> RADState -> [Char] + showRadState x state = "Raw = " ++ showRaw x (_raw state) ++ "\nShifts = " ++ show (shifts' state) ++ "\nGotos = " ++ show (gotos' state) ++ "\nAnnounces = " ++ show (announces' state) ++ "\nAccepts = " ++ show (accepts' state) ++ "\nDefault = " ++ show (defaultAction' state) ++ "\n\n" + + showRaw :: XGrammar -> RawRADState -> [Char] + showRaw x raw = "RawRADState " ++ show (i raw) ++ ": " ++ show (radType raw) ++ " (orig state: " ++ show (index (state raw)) ++ " " ++ show (map (showItem (g x)) (coreItems (state raw))) ++ ")" ++ + (if radType raw /= Type3 then ". NT = " ++ show (nt raw) ++ " (" ++ ((token_names (g x)) ! (nt raw)) ++ ")" else "") ++ + (if radType raw /= Type1 then ". core = { " ++ intercalate "; " (map (showItem (g x)) (core' raw)) ++ " }" else "") ++ + ". completion = { " ++ intercalate "; " (map (showItem (g x)) (completion' raw)) ++ " }" ++ + " (Coming from state " ++ show (comingFrom raw) ++ ")" + + + -- Create the extended grammar containing information about the recognition points. + createXGrammar :: Grammar -> [LALRState] -> IO XGrammar + createXGrammar g lalrStates = do + -- Create state graphs; determine recognition points for each rule + let allGraphs = map (recognitionGraph g) lalrStates + let nonfree = nonfreeItems g allGraphs + let recognitionPoints = determineRecognitionPoints g nonfree + + let x = XGrammar { g = g, recognitionPoints = recognitionPoints } + +#ifdef DEBUG + debugPrint "State Graphs:" (showGraph g) allGraphs + --debugPrint "Non-Free Items:" (showItem g) nonfree + debugPrint "All Rules With Their Recognition Points:" (showRecognitionPoint x) [0 .. (length (productions g)) - 1] + + -- LL-ness + let totalLength = sum $ map (\(Production _ rhs _ _) -> length rhs) (productions g) + putStrLn $ "Sum of rec-points: " ++ show (sum recognitionPoints) ++ "; total rule lengths: " ++ show totalLength ++ "\nLL-ness: " ++ show (100 * (1 - (fromIntegral (sum recognitionPoints)) / (fromIntegral totalLength))) ++ "%" +#endif + + return x + + -- Generate all RAD states from happy's LALR states. + generateRADStates :: XGrammar -> [LALRState] -> [Int] -> IO [RADState] + generateRADStates x lalrStates unusedRules = do + let g = RADTools.g x + let first = mkFirst g + let follow = followArray g first + let radStates = lalrToRADStates x lalrStates unusedRules first follow + +#ifdef DEBUG + -- debugPrint "LALRStates:" (showState g) lalrStates + debugPrint "RADStates:" (showRadState x) radStates +#endif + + return radStates + + -- Helper function for printing. + + debugPrint :: String -> (a -> String) -> [a] -> IO () + debugPrint title showElem elems = putStrLn $ break ++ dash ++ "\n" ++ title ++ break ++ unlines (map showElem elems) ++ dash ++ break where + dash = replicate 40 '–' + break = "\n\n" + + -- Convert all LALR states to RAD states. + lalrToRADStates :: XGrammar -> [LALRState] -> [Int] -> ([Name] -> NameSet) -> Array Name NameSet -> [RADState] + lalrToRADStates x@(XGrammar { g = g, recognitionPoints = recognitionPoints }) lalrStates unusedRules first follow = gen' x [] rawType1States where + rawType1States = map (uncurry toType1) (zip unambiguousNTs [0..]) + + -- Unambiguous NTs are NTs that appear after the recognition point in some rule + unambiguousNTs = filter hasGoodRule (non_terminals g) + hasGoodRule = not . null . findGoodRule + findGoodRule nt = find (uncurry ntAppearsAfterRecogPoint) (zip [0..] (productions g)) where + ntAppearsAfterRecogPoint i (Production _ rhs _ _) = elem nt (drop (recognitionPoints !! i) rhs) && notElem i unusedRules + + -- Find a state with a (completion) item where the dot is immediately before the NT; create a type1-state + toType1 nt index = createType1State x nt state index (-1) where + (i, (Production _ rhs _ _)) = fromJust $ findGoodRule nt + posBeforeNT = (length rhs - 1) - (fromJust $ findIndex (nt ==) (reverse rhs)) + item = Lr0 i posBeforeNT + state = fromJust $ find (elem item . completionItems) lalrStates + + -- Complete the raw states to full RADStates, possibly yielding new raw states which will be recursively completed. + gen' :: XGrammar -> [RADState] -> [RawRADState] -> [RADState] + gen' _ states [] = states + gen' x states rs@(raw:raws) = gen' x (states ++ [fresh]) (raws ++ new) where + (fresh, new) = completeRaw x lalrStates raw existingRaws first follow (length states + length rs) + existingRaws = raws ++ map _raw states + + + -- Complete a raw state to a RADState, possibly yielding new raw states. + -- The "new" raw states which are created for shifting/goto can also be existing ones; therefore, the list of all already created raw states is passed around. + completeRaw :: XGrammar -> [LALRState] -> RawRADState -> [RawRADState] -> ([Name] -> NameSet) -> Array Name NameSet -> Int -> (RADState, [RawRADState]) + completeRaw x@(XGrammar { g = g }) allStates raw allRawStates first follow stateCount = (radState, newStates) where + radState = RADState { shifts' = shifts', accepts' = accepts'', announces' = announces'', gotos' = gotos', defaultAction' = default'', _raw = raw } + newStates = gotoStates ++ shiftStates ++ (maybe [] return newStateFromTransformedErrorShift) + gotos' = transformedGotos + shifts' = shiftShifts + + announces' = announcesFromReduces ++ shiftAnnounces ++ type1EpsilonAnnounces + announces'' = case default'' of -- If default action is announce, remove unnecessary entries + Announce' rule -> filter ((/=) rule . snd) $ filter ((/=) errorTok . fst) announces' + _ -> filter ((/=) errorTok . fst) announces' + + accepts' = shiftAccepts ++ type2Accepts + accepts'' -- If default action is accept, no need for an accept array + | default'' == Accept' = [] + | otherwise = delete errorTok (rmdups accepts') where rmdups = map head . group . sort + + -- If there is no transformed default action, we choose a suitable default action: + -- Accept for type2 states, or the largest announce action for other states. + -- If there is an accept or announce action on the errorToken, use this as the default action. + default'' + | elem errorTok accepts' = if default' == Error' then Accept' else error ("errorTok is in AcceptActions, but defaultAction is " ++ (show default')) -- check for accept conflict, shouldn't happen + | any ((==) errorTok . fst) announces' = case default' of + Error' -> Announce' defaultRule + Announce' rule -> if rule == defaultRule then Announce' defaultRule else + error $ "errorTok wants to announce rule " ++ show defaultRule ++ ", but defaultAction is " ++ (show default') + _ -> error $ "errorTok wants to announce rule " ++ show defaultRule ++ ", but defaultAction is " ++ (show default') + | default' /= Error' = default' -- Keep transformed default action + | radType raw == Type2 = Accept' -- Type2 states accept per default + | not (null announces') = Announce' largestAnnounce + | otherwise = Error' + where + defaultRule = snd $ fromJust $ find ((==) errorTok . fst) announces' + largestAnnounce = head . head $ sortBy (flip (comparing length)) (group (sort (map snd announces'))) + + -- Transform the LALR default action + default' = fromMaybe Error' (transformDefault (defaultAction (state raw))) + transformDefault Error = Just Error' + transformDefault (Reduce rule) = do + (_, rule') <- transformReduce (errorTok, rule) + return $ Announce' rule' + + transformDefault (ErrorShift _) = case (fromJust transformedErrorShift) of + (Just (_, (state, _)), _, _, _) -> Just (ErrorShift' state) + (_, _, Just (_, rule), _) -> Just (Announce' rule) + _ -> Nothing -- The default action could be irrelevant for the RAD state + + transformedErrorShift = case (defaultAction (state raw)) of + ErrorShift state -> Just $ transformShift stateNum (errorTok, (state, undefined)) where stateNum = stateCount + length (gotoStates ++ shiftStates) + _ -> Nothing + newStateFromTransformedErrorShift = maybe Nothing (\(_, s, _, _) -> s) transformedErrorShift + + -- Goto actions and new goto-states: + (transformedGotos, gotoStates) = (catMaybes transformedGotos', catMaybes gotoStates') + (transformedGotos', gotoStates') = unzip $ allGotos' stateCount (gotos (state raw)) [] where + allGotos' :: Int -> [GotoAction] -> [(Maybe GotoAction, Maybe RawRADState)] -> [(Maybe GotoAction, Maybe RawRADState)] + allGotos' _ [] result = result + allGotos' nextIndex (goto:gotos) result = case transformGoto nextIndex goto of + res@(_, Just _) -> allGotos' (nextIndex + 1) gotos (result ++ [res]) + res@(_, Nothing) -> allGotos' nextIndex gotos (result ++ [res]) + + -- Transform a normal goto-action into a RAD-goto action to a new type3-state. + -- This function creates both the goto action and the new state. + -- Return Nothing if the goto is not required for RAD. + -- Type-1-specific: The artificial item '_ -> |- . NT' yields a goto action to a new type2(!)-state. + transformGoto :: Int -> GotoAction -> (Maybe GotoAction, Maybe RawRADState) + transformGoto index (tok, (gotoState, _)) + | not isGotoFromType1ToType2 && null gotoItems = (Nothing, Nothing) + | otherwise = (Just newAction, newState) + where + isGotoFromType1ToType2 = radType raw == Type1 && tok == (nt raw) -- type1-specific. May have no goto items as an articifial item is created + gotoItems = plusRad (completion' raw) tok x + + newState + | isGotoFromType1ToType2 = Just $ createType2State x tok (allStates !! gotoState) gotoItems index (i raw) + | null existingState = Just $ createType3State x (allStates !! gotoState) gotoItems index (i raw) + | otherwise = Nothing + + newAction = (tok, (index, allItems)) where + allItems = if isGotoFromType1ToType2 then artificial:gotoItems else gotoItems -- gotoItems + artificial item + artificial = Lr0 (-(nt raw)) 1 + index = maybe (i $ fromJust newState) i existingState + + -- an existing (type3) state with the same core can be reused, if existing + existingState + | isGotoFromType1ToType2 = Nothing + | otherwise = find matchesState allRawStates + where + matchesState raw' = gotoItems == core' raw' && radType raw' == Type3 + + + -- Transform shift actions into shift, announce and accept actions: + (shiftShifts, shiftStates, shiftAnnounces, shiftAccepts) = (catMaybes shiftShifts', catMaybes shiftStates', catMaybes shiftAnnounces', catMaybes shiftAccepts') + (shiftShifts', shiftStates', shiftAnnounces', shiftAccepts') = unzip4 $ allShifts' (stateCount + length gotoStates) (shifts (state raw)) [] where + allShifts' :: Int -> [ShiftAction] -> [(Maybe ShiftAction, Maybe RawRADState, Maybe AnnounceAction, Maybe AcceptAction)] -> [(Maybe ShiftAction, Maybe RawRADState, Maybe AnnounceAction, Maybe AcceptAction)] + allShifts' _ [] result = result + allShifts' nextIndex (shift:shifts) result = case transformShift nextIndex shift of + res@(_, Just _, _, _) -> allShifts' (nextIndex + 1) shifts (result ++ [res]) + res@(_, Nothing, _, _) -> allShifts' nextIndex shifts (result ++ [res]) + + -- Transform a normal shift-action into one of the following: + -- A shift action (with a new state), an announce action or an accept action, or nothing if the shift is not relevant for the RAD state. + transformShift :: Int -> ShiftAction -> (Maybe ShiftAction, Maybe RawRADState, Maybe AnnounceAction, Maybe AcceptAction) + transformShift index (tok, (shiftState, _)) + | not (null gotoItems) = (Just shift, newState, Nothing, Nothing) + | not (null announcedRule) = (Nothing, Nothing, Just (tok, rule'), Nothing) + | radType raw == Type2 = (Nothing, Nothing, Nothing, Just tok) -- TODO: sinnvoll? + | otherwise = (Nothing, Nothing, Nothing, Nothing) + where + gotoItems = plusRad (completion' raw) tok x + shift = (tok, (index, gotoItems)) where + index = maybe (i $ fromJust newState) i existingState + + -- an existing (type3) state with the same core can be reused, if existing + existingState = find matchesState allRawStates where + matchesState raw' = gotoItems == core' raw' && radType raw' == Type3 + + newState = case existingState of + Just _ -> Nothing + Nothing -> Just $ createType3State x (allStates !! shiftState) gotoItems index (i raw) + + announcedRule = getAnnouncedRule tok + Just rule' = announcedRule + + -- Announce actions from reduce actions: + announcesFromReduces = mapMaybe transformReduce (reduces (state raw)) + -- Transform a normal reduce-action into a RAD-announce action. + -- Return Nothing if the announce is not relevant for the RAD state. + transformReduce :: ReduceAction -> Maybe AnnounceAction + transformReduce (tok, rule) + | elem (Lr0 rule veryRight) (completion' raw) = Just (tok, rule) + | not (null announcedRule) = Just (tok, rule') + | otherwise = Nothing + where + veryRight = rhsLength' (lookupProdNo g rule) + announcedRule = getAnnouncedRule (lhs g (Lr0 rule 0)) + Just rule' = announcedRule + + -- Accept actions for type-2 states: + -- When a token of the lc-follow-set of NT (on which we accept NT) already has another action, we get an accept conflict. + type2Accepts + | (radType raw) == Type2 = catMaybes $ map toAccept (Data.IntSet.toList $ lcfollow x first follow (nt raw)) + | otherwise = [] + where + toAccept tok + | tok == 0 = Nothing -- epsilon ∈ follow(NT) + | hasOtherAction tok = Nothing -- No accept conflict! This happens e.g. on shift/reduce-conflicts which have been resolved in favor of shift + | otherwise = Just tok + hasOtherAction tok = elem tok otherActions + otherActions = (map fst shifts') ++ (map fst announces') + + -- For a type-1 action: If NT can produce ɛ (either directly, NT -> ɛ or indirectly, NT -> A so that A ->* ɛ) we need a special announce action to announce a related rule. + type1EpsilonAnnounces + | (radType raw) == Type1 && canProduceEpsilon [nt raw] = map toAnnounce validTokens + | otherwise = [] + where + -- All tokens that are in the lc-follow set will produce the special announce action. + validTokens = filter isCandidate allCandidates where + isCandidate tok = tok /= epsilonTok && not (alreadyHasAction tok) + allCandidates = Data.IntSet.toList $ lcfollow x first follow (nt raw) + alreadyHasAction tok -- = elem tok announces || elem tok otherActions where + | elem tok announces = True + | elem tok otherActions = True -- seq (unsafePerformIO (print $ "token " ++ show tok ++ "already has other action, ignoring")) True + | otherwise = False + where + announces = map fst (announcesFromReduces ++ shiftAnnounces) + otherActions = (map fst shifts') ++ (map id accepts') + + toAnnounce tok = (tok, announcedRule) + + -- The question whether the right hand side of a rule can produce epsilon. + canProduceEpsilon = Data.IntSet.member epsilonTok . first + + -- Create the graph consisting of all items in the NT's completion which CAN PRODUCE EPSILON. + -- From these, there should be a way from the NT to a leaf node (X -> .) + core = [Lr0 (-(nt raw)) 0] + + reducedCompletion = filter itemCanProduceEpsilon (completeWithFunction (directCompletion g) core) where + itemCanProduceEpsilon = canProduceEpsilon . rhsAfterDot g + + (_, _rooted, nodes) = recognitionGraph g artState where + artState = LALRState { index = 0, coreItems = core, completionItems = reducedCompletion, shifts = [], gotos = [], reduces = [], defaultAction = Error } + graph = convert _rooted + + -- All vertices reachable from the root node + connectedVertices = delete 0 (reachable graph 0) + + -- Find all reachable leaf vertices of the form X -> . + -- Optimally, there should only be a single one of these. + epsilonItems = filter (isEpsilon . (!!) nodes) connectedVertices where + isEpsilon (Item (Lr0 rule _) _) = rule >= 0 && let (Production _ rhs _ _) = (lookupProdNo g rule) in null rhs + isEpsilon _ = False + + leafNode = case epsilonItems of + [] -> error $ "Cannot happen - there must be an item of the form X -> . in the completion of NT " ++ show ((token_names g) ! (nt raw)) + [item] -> item + _ -> error $ "Multiple leaf nodes X -> . in the completion of NT " ++ show ((token_names g) ! (nt raw)) + + -- All cycle-free paths between 1 (the node _ -> |- . NT) and the leaf node. + allPaths = connect 1 leafNode graph + + connect x y g = helper x y g [x] where -- all cycle-free paths between x and y, from https://stackoverflow.com/questions/11168238 + helper a b g visited + | a == b = [[]] + | otherwise = [(a,c):path | c <- g!a, c `notElem` visited, path <- helper c b g (c:visited)] + + -- Get the single path from the root node 1 to the leaf node + path = case allPaths of + [] -> error $ "Should not happen - there must be a path from the root node to the epsilon node" + [path] -> path + _ -> error $ "Multiple paths from the root node (" ++ showNode (nodes !! 1) ++ ") to epsilon node (" ++ showNode (nodes !! leafNode) ++ ")" where + showNode (Item item _) = showItem g item + showNode _ = "" + + -- Find the item / rule to be announced. + -- It is any rule on the path which both: + -- - has the recognition point at the beginning and + -- - is in the rad-completion of the current raw-LC-state. + elements = map snd path -- discard the root node (it cannot be announced as it doesn't correspond to any rule) + validElements = filter (valid . (!!) nodes) elements where + valid (Item item@(Lr0 rule _) _) = (recognitionPoints x) !! rule == 0 && elem item (completion' raw) + valid _ = False + + announcedItem = case validElements of + [] -> error $ "No valid rule to be announced for epsilon-production " ++ showNode (nodes !! leafNode) ++ "in item " ++ showNode (nodes !! 1) + items -> head items + + announcedRule = let (Item (Lr0 rule _) _) = nodes !! announcedItem in rule + + -- This is the traceback of a shift or reduce action to the item and rule through whose recursive completion it was added to the RAD state's completion. + getAnnouncedRule :: Name -> Maybe Int + getAnnouncedRule t = recursiveAnnouncedRule t [] where + -- This version takes a list of inputs that directly return Nothing (i.e. will recurse) to avoid infinte recursion: going in a recursion cycle will return Nothing. + recursiveAnnouncedRule :: Name -> [Name] -> Maybe Int + recursiveAnnouncedRule token nulls + | elem token nulls = Nothing + | otherwise = case directRule of + Just rule -> elemIndex (prod g rule) (productions g) + Nothing -> extendedRule + where + + directRule = find matchingReadyRule (completion' raw) + matchingReadyRule item@(Lr0 rule dot) = (recognitionPoints x) !! rule == dot && hasTokenAfterDot g item && tokenAfterDot g item == token + extendedRule = case length extendedRules of + 0 -> Nothing + 1 -> Just (elemAt 0 extendedRules) + _ -> error $ "Announce conflict! Possible rules: { " ++ intercalate "; " (map (showProd g) (toList extendedRules)) ++ " } in RAD-State " ++ showRaw x raw ++ "!" + extendedRules :: Set Int + extendedRules = mapMaybeSet recursive (fromList $ completionItems (state raw)) + recursive item@(Lr0 _ dot') + | dot' == 0 && hasTokenAfterDot g item && tokenAfterDot g item == token = recursiveAnnouncedRule (lhs g item) (token:nulls) -- avoids infinite recursion + | otherwise = Nothing + + mapMaybeSet :: Ord b => (a -> Maybe b) -> Data.Set.Set a -> Data.Set.Set b + mapMaybeSet f = Data.Set.fromList . Data.Maybe.mapMaybe f . Data.Set.toList + + + -------------------- LALR STATE GENERATION -------------------- + + -- Create LALRStates from happy's action and goto tables, bundling state information in a single datatype. + generateLALRStates :: Grammar -> ActionTable -> GotoTable -> [Lr1State] -> [LALRState] + generateLALRStates g action goto happystates = do + let completed = complete g happystates + let numbered = zip [0..] completed + let lalrStates = map (uncurry $ createState g action goto) numbered + lalrStates + + -- `State` bundles required symbol-item mappings for creating a Hinze-like continuation-based + -- state function for a state. + -- It combines the data from goto and action tables in one coherent data structure. + data LALRState = LALRState { + index :: Int, + coreItems :: [Lr0Item], + completionItems :: [Lr0Item], + shifts :: [ShiftAction], + gotos :: [GotoAction], + reduces :: [ReduceAction], + defaultAction :: LALRDefaultAction -- Do something per default. This action is NOT explicitly mentioned in the shifts/reduces list. + } deriving (Eq +#ifdef DEBUG + , Show +#endif + ) + + data LALRDefaultAction = ErrorShift Int -- On errorToken (i.e. default), shift to state X + | Reduce Int -- Reduce rule X + | Error -- Call happyError + deriving (Eq, Show) + + showState :: Grammar -> LALRState -> [Char] + showState g state = "State " ++ show (index state) ++ + " { " ++ unwords (map (showItem g) (coreItems state)) ++ " }" ++ + " – Shifts: " ++ unwords (map showShift (shifts state)) ++ + " – Gotos: " ++ unwords (map showGoto (gotos state)) ++ + " – Reduces: " ++ unwords (map showReduce (reduces state)) ++ + " – Default: " ++ show (defaultAction state) where + showShift (token, (i, items)) = "(on " ++ (token_names g)!token ++ " shift to " ++ show i ++ " with items: " ++ unwords (map (showItem g) items) ++ ")" + showGoto (nt, (i, items)) = "(on " ++ (token_names g)!nt ++ " goto " ++ show i ++ " with items: " ++ unwords (map (showItem g) items) ++ ")" + showReduce (token, rule) = "(on " ++ (token_names g)!token ++ " reduce rule " ++ show rule ++ ")" + + -- Create `State` data from the given `CompletedLr0State` and its index. + createState :: Grammar -> ActionTable -> GotoTable -> Int -> CompletedLr0State -> LALRState + createState g action goto i state = LALRState { index = i, coreItems = core state, completionItems = completion state, shifts = shifts, gotos = gotos, reduces = reduces, defaultAction = defaultAction } where + gotos = map toGoto allGotos + toGoto (nt, Goto toState) = (nt, (toState, items)) where + items = plus (completion state) nt g + allGotos = filter isGoto $ assocs (goto ! i) + isGoto (_, Goto {}) = True + isGoto _ = False + + shifts' = map toShift allShifts + toShift (token, LR'Shift toState _) = (token, (toState, shiftItems token)) + toShift (token, LR'Multiple _ (LR'Shift toState _)) = (token, (toState, shiftItems token)) + shiftItems token = plus (completion state) token g + allShifts = filter isShift $ assocs (action ! i) -- all shifts from action table + isShift (_, LR'Shift {}) = True + isShift (_, LR'Multiple _ (LR'Shift {})) = True + isShift _ = False + + reduces' = map toReduce allReduces + toReduce (token, LR'Reduce rule _) = (token, rule) + toReduce (token, LR'Multiple _ (LR'Reduce rule _)) = (token, rule) + toReduce (token, LR'Accept) = let (Lr0 rule _) = head (core state) in (token, rule) + toReduce (token, LR'Multiple _ LR'Accept) = let (Lr0 rule _) = head (core state) in (token, rule) + allReduces = filter isReduce $ assocs (action ! i) -- all reduces from action table + isReduce (_, LR'Reduce {}) = True + isReduce (_, LR'Multiple _ (LR'Reduce {})) = True + isReduce (_, LR'Accept) = True + isReduce (_, LR'Multiple _ LR'Accept) = True + isReduce _ = False + + -- Remove default action (errorShift or reduce) from shifts/reduces + shifts = filter (\(token, _) -> token /= errorTok) shifts' + reduces = filter test reduces' where + test = if defaultReduce then (\(_, rule) -> rule /= defaultReduceRule) else return True + + defaultErrorShift = any isErrorAction shifts' where + errorShiftState = (fst . snd . fromJust) (find isErrorAction shifts') + + isErrorAction (token, _) = token == errorTok + + defaultReduce = not defaultErrorShift && not (null reduces') + defaultReduceRule = fromMaybe largestRule errorRule where + errorRule = (find isErrorAction reduces') >>= Just . snd + + largestRule = (snd . head . head) sortedGroups -- Find reduce rule which is used most often (i.e. by most tokens) + sorted = sortBy (comparing snd) reduces' + grouped = groupBy ((==) `on` snd) sorted + sortedGroups = sortBy (flip (comparing length)) grouped + + defaultAction + | defaultErrorShift = ErrorShift errorShiftState + | defaultReduce = Reduce defaultReduceRule + | otherwise = Error + + + + -------------------- DETERMINING RECOGNITION POINTS -------------------- + + -- Determine the recognition points for each rule from the set of all non-free items. + determineRecognitionPoints :: Grammar -> [Lr0Item] -> [Int] + determineRecognitionPoints g nonfree = map (uncurry recognitionPoint) (zip [0..] (productions g)) where + + -- No priority -> recognition point = first position where all consecutive positions are free + recognitionPoint rule (Production _ rhs _ No) = maybe 0 (+1) $ find isNonfree (reverse [0 .. length rhs-1]) where + isNonfree i = elem (Lr0 rule i) nonfree || (rhs !! i) == errorTok -- recognition point must come after all error tokens + + -- Priority/associativity -> recognition point must be at the very right + recognitionPoint _ (Production _ rhs _ _) = length rhs + + -- Determine all non-free items from all state graphs. + nonfreeItems :: Grammar -> [RecognitionGraph] -> [Lr0Item] + nonfreeItems _ graphs = (toList . fromList . join) nonfrees where -- removing duplicates + nonfrees = map (\((_, g, v), i) -> nonfree g i v) $ zip graphs [0..] + nonfree g i nodes = map lr0 $ filter (not . isFree) [0 .. numNodes g-1] where + dom' = dom g + isFree v = all (dominates dom' v) reachableLeafs where + reachableLeafs = filter (isLeaf . (!!) nodes) (reach g v) + isLeaf (ShiftNode _ _) = True; isLeaf (ReduceNode _ _) = True; isLeaf _ = False + lr0 = lr0' . (!!) nodes where lr0' (Item a _) = a + + -- The number of nodes of a rooted graph. + numNodes :: Rooted -> Int + numNodes = length . toAdj . snd + + -- Convert a Rooted (used for domination) to a Data.Graph.Graph (used for reachability) + convert :: Rooted -> Data.Graph.Graph + convert g = listArray (0, numNodes g-1) (map snd (toAdj (snd g))) + + -- All nodes that can be reached from this node using at least 1 edge. + -- This means a node only reaches itself it participates in a cycle. + reach :: Rooted -> Vertex -> [Vertex] + reach = reach' . convert where + reach' g v = reachWithoutV ++ (if isCycle then [v] else []) where + reachWithoutV = delete v (reachable g v) + isCycle = elem v (g ! v) || any reachesV reachWithoutV + reachesV w = elem v (reachable g w) + + -- True if a dominates b. A node never dominates itself. + dominates :: [(Node, Path)] -> Node -> Node -> Bool + dominates dom' a b = contains a pair where + pair = find ((b ==) . fst) dom' + contains a = maybe False (elem a . snd) + + + -------------------- RECOGNITION GRAPH CREATION -------------------- + + data RecognitionNode = Init + | Item Lr0Item Bool -- item, isCore + | ShiftNode Int Name -- shift to state; token which triggers the shift + | ReduceNode Int (Maybe Name) -- rule which is reduced; token which triggers the reduce (can be Nothing -> it is a default reduce) + deriving (Eq +#ifdef DEBUG + , Show +#endif + ) + type RecognitionGraph = (Int, -- Rule number. + Rooted, -- Rooted uses Ints to decode the nodes, + [RecognitionNode]) -- so this is the ordered list of all nodes + + -- Create the rooted state graph for a state which is in turn used to determine the recognition points. + recognitionGraph :: Grammar -> LALRState -> RecognitionGraph + recognitionGraph g state@(LALRState { index = i, coreItems = core, completionItems = completion, shifts = shifts, reduces = reduces, defaultAction = defaultAction }) = graph i allNodes succ where + + -- All nodes of the graph + allNodes = [initNode] ++ coreNodes ++ completionNodes ++ shiftNodes ++ reduceNodes ++ defaultNode where + initNode = Init + coreNodes = map (flip Item True) core + completionNodes = map (flip Item False) (filter (not . (flip elem) core) completion) + shiftNodes = map (uncurry toShift) shifts where + toShift tok (state, _) = ShiftNode state tok + + reduceNodes = map (uncurry toReduce) reduces where + toReduce tok rule = ReduceNode rule (Just tok) + + defaultNode = case defaultAction of + ErrorShift state -> [ShiftNode state errorTok] + Reduce rule -> [ReduceNode rule Nothing] + _ -> [] + + -- Successor relation + succ :: RecognitionNode -> RecognitionNode -> Bool + succ Init (Item _ True) = True + succ (Item a _) (Item b _) = isInDirectCompletion g a b + succ (Item item@(Lr0 rule _) _) (ReduceNode rule' _) = rule == rule' && dotIsAtRightEnd g item + succ (Item item@(Lr0 rule _) _) (ShiftNode _ token) = hasTokenAfterDot g item && (tokenAfterDot g item) == token + succ _ _ = False + + -- Create a graph from the nodes and their successor relation. Here, the nodes are encoded as integers. + graph :: Int -> [RecognitionNode] -> (RecognitionNode -> RecognitionNode -> Bool) -> RecognitionGraph + graph i nodes succ = (i, (fromJust $ elemIndex Init nodes, fromAdj adjacency), nodes) where + adjacency = map (ap (,) neighbors) intNodes + neighbors i = filter (intSucc i) intNodes + intNodes = [0 .. (length nodes)-1] + intSucc i j = succ (nodes !! i) (nodes !! j) + + -- Pretty-print a graph. + showGraph :: Grammar -> RecognitionGraph -> String + showGraph g (state, graph, nodes) = header ++ unlines (map showNode (zip [0..] nodes)) where + header = "Graph of state " ++ show state ++ ":\n" + showNode (i, node) = unlines (if (null successors) then [line1] else [line1, line2]) where + successors = snd $ (toAdj (snd graph)) !! i + line1 = " " ++ show i ++ ": " ++ show' node + line2 = " -> " ++ unwords (map show successors) + show' Init = "Init" + show' (Item item isCore) = showItem g item ++ if isCore then " (core)" else "" + show' (ShiftNode state token) = "Shift to state " ++ show state ++ " (on " ++ (token_names g) ! token ++ ")" + show' r@(ReduceNode rule token) = "Reduce rule " ++ show rule ++ ": " ++ showProd g rule ++ " (on " ++ maybe "default" ((token_names g) !) token ++ ")" diff --git a/src/RADTools.hs b/src/RADTools.hs new file mode 100644 index 00000000..0feb6c00 --- /dev/null +++ b/src/RADTools.hs @@ -0,0 +1,231 @@ +module RADTools where + import Grammar + import GHC.Arr + import LALR + import Data.Maybe + import Data.List (elemIndex) + import Data.IntSet () + import Data.IntMap () + import qualified Data.Set as Set + import Data.Set (Set, toList, fromList, union, (\\)) + + -- Lr1State as generated from happy + type Lr1State = ([Lr1Item], [(Name, Int)]) + + ----- COMPLETE LR0 STATES ----- + + data CompletedLr0State = CompletedLr0State [Lr0Item] [Lr0Item] + deriving (Eq +#ifdef DEBUG + , Show +#endif + ) + + core :: CompletedLr0State -> [Lr0Item] + core (CompletedLr0State a _) = a + completion :: CompletedLr0State -> [Lr0Item] + completion (CompletedLr0State _ a) = a + + -- Complete all states of the grammar, yielding a core and a completion set for each state. + complete :: Grammar -> [Lr1State] -> [CompletedLr0State] + complete g = map complete' where + complete' state = CompletedLr0State core (completeWithFunction (directCompletion g) core) where + core = (map lr0 $ fst state) + lr0 (Lr1 rule dot _) = Lr0 rule dot + + -- Show a completed Lr0 state. + showCompletedLr0State :: Grammar -> Int -> CompletedLr0State -> String + showCompletedLr0State g i (CompletedLr0State core completion) = + unlines $ ["State " ++ show i ++ ". CORE:"] ++ (map (showItem g) core) ++ ["COMPLETION:"] ++ (map (showItem g) completion) + + + -- Perform a completion with a custom completion function. + completeWithFunction :: (Lr0Item -> [Lr0Item]) -> [Lr0Item] -> [Lr0Item] + completeWithFunction f core = toList $ complete' (fromList core) (fromList core) where + + -- Complete result. It is always newSet ⊆ result. + complete' :: Set Lr0Item -> Set Lr0Item -> Set Lr0Item + complete' result newSet + | Set.null newSet = result + | otherwise = complete' (union result newItems) (newItems \\ result) where + newItems = join $ Set.map f newSet + + join :: Set [Lr0Item] -> Set Lr0Item + join set = Set.fold (flip union . fromList) Set.empty set + + ----- LR0ITEM ----- + + -- Get the number of tokens in the RHS of a rule. + rhsLength :: Grammar -> Lr0Item -> Int + rhsLength = rhsLength' .* prod + rhsLength' :: Production -> Int + rhsLength' (Production _ rhs _ _) = length rhs + + -- Get the production belonging to an item. + prod :: Grammar -> Lr0Item -> Production + prod g (Lr0 rule _) = lookupProdNo g rule + + -- Determine whether the dot is at the very right end of an item. + dotIsAtRightEnd :: Grammar -> Lr0Item -> Bool + dotIsAtRightEnd g item@(Lr0 _ dot) = (rhsLength g item) == dot + + -- Determine whether the dot is at the very right beginning of an item. + dotIsAtLeftEnd :: Grammar -> Lr0Item -> Bool + dotIsAtLeftEnd _ (Lr0 _ dot) = 0 == dot + + -- Determine if the dot is NOT at the very beginning of an item. + -- Iff this returns true you may call tokenBeforeDot. + hasTokenBeforeDot :: Grammar -> Lr0Item -> Bool + hasTokenBeforeDot = not .* dotIsAtLeftEnd + + -- Determine if the dot is NOT at the very right end of an item. + -- Iff this returns true you may call tokenAfterDot. + hasTokenAfterDot :: Grammar -> Lr0Item -> Bool + hasTokenAfterDot = not .* dotIsAtRightEnd + + -- Check whether a rule has a terminal symbol after its dot + hasNonterminalAfterDot :: Grammar -> Lr0Item -> Bool + hasNonterminalAfterDot g item = hasTokenAfterDot g item && isNonterminal g (tokenAfterDot g item) + + -- Get the token immediately after the dot. + -- Diverges if the dot is at the end - call "hasTokenAfterDot" to determine if it is valid to call "tokenAfterDot" + tokenAfterDot :: Grammar -> Lr0Item -> Name + tokenAfterDot g item@(Lr0 _ dot) = tokenAtPosition g item dot + + -- Get the token immediately before the dot. + -- Diverges if the dot is at the end - call "hasTokenBeforeDot" to determine if it is valid to call "tokenAfterDot" + tokenBeforeDot :: Grammar -> Lr0Item -> Name + tokenBeforeDot g item@(Lr0 _ dot) = tokenAtPosition g item (dot-1) + + -- Get the token in the RHS of an item at a certain position. The dot is ignored. + tokenAtPosition :: Grammar -> Lr0Item -> Int -> Name + tokenAtPosition g item pos = tokenAtPosition' (prod g item) pos where + tokenAtPosition' (Production _ rhs _ _) = (rhs !!) + + -- Determine whether a token is a nonterminal + isNonterminal :: Grammar -> Name -> Bool + isNonterminal = flip elem . non_terminals + + -- Get the nonterminal on the left side of an item + lhs :: Grammar -> Lr0Item -> Name + lhs = lhs' .* prod + lhs' :: Production -> Name + lhs' (Production lhs _ _ _) = lhs + + rhsAfterDot :: Grammar -> Lr0Item -> [Name] + rhsAfterDot g item@(Lr0 rule dot) + | rule < 0 = if dot == 0 then [-rule] else [] -- artifical NT handling + | otherwise = drop dot $ rhs (prod g item) + + rhs :: Production -> [Name] + rhs (Production _ rhs _ _) = rhs + + -- Calculate the items which are in the IMEMDIATE completion of an item. + -- For example, "A -> b . C D" has "C -> . D e", in its direct completion, + -- but not the recursive completion "D -> . f" (which would be in the direct completion of "C -> . D e". + -- The item itself may also be in its own completion. + directCompletion :: Grammar -> Lr0Item -> [Lr0Item] + directCompletion g item@(Lr0 rule dot) + | rule < 0 && dot == 0 = itemsStartingWith g (-rule) -- special case: completion of artifical item _ -> . NT + | hasNonterminalAfterDot g item = itemsStartingWith g (tokenAfterDot g item) + | otherwise = [] + + -- Determine whether item 2 is in the direct completion of item 1, as described above. + -- Therefore, item 2 must be of the form "A -> (DOT) ..." + -- An item may be in their own completion. + isInDirectCompletion :: Grammar -> Lr0Item -> Lr0Item -> Bool + isInDirectCompletion g item1@(Lr0 rule dot) item2@(Lr0 rule' dot') + | rule' < 0 = False +  | rule < 0 = dot == 0 && dot' == 0 && itemStartsWith g item2 (-rule) + | otherwise = + dot' == 0 && + hasNonterminalAfterDot g item1 && + itemStartsWith g item2 (tokenAfterDot g item1) + + -- Get all rules starting with "A -> ...", in form of the item "A -> (DOT) ..." + itemsStartingWith :: Grammar -> Name -> [Lr0Item] + itemsStartingWith g token = map toItem $ filter (startsWith token) $ (productions g) where + toItem prod = Lr0 rule 0 where + rule = fromJust $ elemIndex prod (productions g) + startsWith token (Production token' _ _ _) = token == token' + + -- Determine whether the item starts with the token + itemStartsWith :: Grammar -> Lr0Item -> Name -> Bool + itemStartsWith g item token = startsWith token (prod g item) where + startsWith token (Production token' _ _ _) = token == token' + + -- Convert an Lr0Item to a string, for example "A -> b · C D" + showItem :: Grammar -> Lr0Item -> String + showItem = showItemWithSeparator "·" + + showItemWithSeparator :: String -> Grammar -> Lr0Item -> String + showItemWithSeparator sep g (Lr0 rule dot) +  | rule < 0 = -- artificial NT handling + let nt = -rule in if dot == 0 then "_ -> " ++ sep ++ " " ++ showToken nt else "_ -> " ++ showToken nt ++ " " ++ sep + + | otherwise = showProd (lookupProdNo g rule) where + showProd = unwords . showProdArray + showProdArray (Production from to _ _) = insert sep (dot + 1) ([(showToken from) ++ " ->"] ++ (map showToken to)) + showToken tok = (token_names g) ! tok + insert elem pos list = let (ys,zs) = splitAt pos list in ys ++ [elem] ++ zs + + ----- PRODUCTION ----- + + -- Convert a production (represented by its index) to a string, for example "A -> b C D" + showProd :: Grammar -> Int -> String + showProd g i = unwords (showProdArray (lookupProdNo g i)) where + showProdArray (Production from to _ _) = [(showToken from) ++ " ->"] ++ (map showToken to) + showToken tok = (token_names g) ! tok + + -- Convert a production (represented by its index) and its recognition point to a string, for example "A -> b C . D" + showRecognitionPoint :: XGrammar -> Int -> String + showRecognitionPoint x rule = showItemWithSeparator "•" (g x) (Lr0 rule point) where + point = (recognitionPoints x) !! rule + + ----- RAD-SPECIFIC ----- + + -- Extended grammar containing RAD-relevant data like recognition points. + data XGrammar = XGrammar { + g :: Grammar, + recognitionPoints :: [Int] + } + + -- The rad-completion of a set of core items, defined as follows: + -- Each core item is in the completion. + -- If an item I = A -> b . C d is in the completion, and the dot is before the recognition point of the associated rule, then all items C -> ... are in the completion. + radCompletion :: XGrammar -> [Lr0Item] -> [Lr0Item] + radCompletion x core = completeWithFunction directRadCompletion core where + directRadCompletion item@(Lr0 rule dot) + | rule < 0 = directCompletion (g x) item -- special handling for item _ -> . NT + | dot < (recognitionPoints x) !! rule = directCompletion (g x) item + | otherwise = [] + + + -- Perform Q+'X, but only consider items in Q/'X (i.e. where the dot is before the recognition point): + -- Q+'X = { A -> β X . Ɣ | A -> β . X Ɣ elem Q, recog. point is after X } + plusRad :: [Lr0Item] -> Name -> XGrammar -> [Lr0Item] + plusRad q y x = filter nonready' (plus q y (g x)) where + nonready' (Lr0 rule dot) = dot <= (recognitionPoints x) !! rule + + -- Perform Q/X, but discard items where the dot is at or after the recognition point: + -- Q/'X = { A -> β . X Ɣ elem Q, recog. point is after X } + hdivRad :: [Lr0Item] -> Name -> XGrammar -> [Lr0Item] + hdivRad q y x = filter nonready (hdiv q y (g x)) where + nonready (Lr0 rule dot) = dot < (recognitionPoints x) !! rule + + -- Perform Q+X as described by Hinze: + -- Q+X = { A -> β X . Ɣ | A -> β . X Ɣ elem Q } + plus :: [Lr0Item] -> Name -> Grammar -> [Lr0Item] + plus q x g = map shiftDot (hdiv q x g) where + shiftDot (Lr0 rule dot) = Lr0 rule (dot+1) + + -- Perform Q/X as described by Hinze: + -- Q/X = { A -> β . X Ɣ elem Q } + hdiv :: [Lr0Item] -> Name -> Grammar -> [Lr0Item] + hdiv q x g = filter matches q where + matches item = hasTokenAfterDot g item && tokenAfterDot g item == x + + ----- MORE ----- + + (.*) :: (c -> d) -> (a -> b -> c) -> (a -> b -> d) + (.*) = (.) . (.) \ No newline at end of file