Skip to content

Commit

Permalink
feat: Better generation of arbitrary code.
Browse files Browse the repository at this point in the history
It's pretty far from good, but at least it's mostly time-bounded now.
  • Loading branch information
iphydf committed Jan 18, 2024
1 parent f5e40ac commit 9ade206
Show file tree
Hide file tree
Showing 8 changed files with 85 additions and 44 deletions.
2 changes: 1 addition & 1 deletion BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ haskell_library(
"//third_party/haskell:bytestring",
"//third_party/haskell:data-fix",
"//third_party/haskell:file-embed",
"//third_party/haskell:recursion-schemes",
"//third_party/haskell:text",
"//third_party/haskell:transformers-compat",
],
Expand Down Expand Up @@ -97,6 +96,7 @@ haskell_library(
"//third_party/haskell:data-fix",
"//third_party/haskell:extra",
"//third_party/haskell:file-embed",
"//third_party/haskell:mtl",
"//third_party/haskell:text",
"//third_party/haskell:transformers-compat",
],
Expand Down
2 changes: 1 addition & 1 deletion happy-arbitrary.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ library
, data-fix
, extra
, file-embed
, recursion-schemes
, mtl
, text
, transformers-compat

Expand Down
9 changes: 9 additions & 0 deletions src/Language/Happy.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
module Language.Happy (module X, Grammar) where

import Data.Text (Text)
import Language.Happy.Ast as X
import Language.Happy.Lexer as X
import Language.Happy.Parser as X


type Grammar = Node (Lexeme Text)
86 changes: 56 additions & 30 deletions src/Language/Happy/Arbitrary.hs
Original file line number Diff line number Diff line change
@@ -1,47 +1,73 @@
{-# OPTIONS_GHC -Wwarn #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE Strict #-}
module Language.Happy.Arbitrary where

import Control.Applicative ((<|>))
import Control.Monad.Extra (concatMapM)
import Data.Fix (foldFix)
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Maybe (fromJust)
import Data.Text (Text)
import qualified Data.Text as Text
import Language.Happy.Ast (Node, NodeF (..))
import Language.Happy.Lexer (Lexeme, lexemeText)
import qualified Test.QuickCheck.Gen as Gen
import Test.QuickCheck.Gen (Gen)

genTokens :: Text -> Node (Lexeme Text) -> Gen [Text]
genTokens start g = do
case Map.lookup start rules of
import Control.Applicative ((<|>))
import Control.Monad.Extra (concatMapM)
import Control.Monad.State.Lazy (State)
import qualified Control.Monad.State.Lazy as State
import Data.Fix (foldFix)
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Maybe (fromJust)
import Data.Text (Text)
import qualified Data.Text as Text
import Debug.Trace (traceM)

Check warning on line 17 in src/Language/Happy/Arbitrary.hs

View workflow job for this annotation

GitHub Actions / publish / Publish to Hackage

The import of ‘Debug.Trace’ is redundant
import Language.Happy.Ast (Node, NodeF (..))
import Language.Happy.Lexer (Lexeme, lexemeText)
import Test.QuickCheck.Arbitrary (arbitrary)
import qualified Test.QuickCheck.Gen as Gen
import Test.QuickCheck.Gen (Gen)

newtype Config token = Config
{ parseToken :: Text -> token
}

defConfig :: (Text -> token) -> Config token
defConfig parseToken = Config{parseToken}

genTokens :: Show token => Config token -> Text -> Node (Lexeme Text) -> Gen [token]
genTokens cfg start g = do
rng <- Gen.scale (*2) arbitrary
-- _ <- traceM $ "============================\nrng = " <> show rng
return $ case Map.lookup start rules of
Nothing -> error $ "no such rule: " <> Text.unpack start
Just r -> expand tokens rules r
Just r -> State.evalState (expand cfg tokens rules r) rng
where
tokens = foldFix terminals g
tokens = foldFix (terminals cfg) g
rules = foldFix nonterminals g

expand :: Map Text Text -> Map Text [[Text]] -> [[Text]] -> Gen [Text]
expand tokens rules r = do
rule <- Gen.elements r
let expanded = map (resolve tokens rules) rule
concatMapM (continue tokens rules) expanded
expand :: Show token => Config token -> Map Text token -> Map Text [[Text]] -> [[Text]] -> State [Int] [token]
expand cfg tokens rules nonterm = do
-- _ <- traceM $ show rng <> ": selecting from " <> show nonterm
rule <- select nonterm
res <- concatMapM (continue cfg tokens rules . resolve tokens rules) rule
-- _ <- traceM $ show rng <> ": result: " <> show res
return res

select :: [a] -> State [Int] a
select [] = error "nope"
select nonterm@(rule:_) = do
rng <- State.get
case rng of
[] -> return rule
(i:is) -> do
State.put is
return $ nonterm !! (i `mod` length nonterm)

continue :: Map Text Text -> Map Text [[Text]] -> Either Text [[Text]] -> Gen [Text]
continue _ _ (Left token) = return [token]
continue tokens rules (Right rule) = expand tokens rules rule
continue :: Show token => Config token -> Map Text token -> Map Text [[Text]] -> Either token [[Text]] -> State [Int] [token]
continue _ _ _ (Left token) = return [token]
continue cfg tokens rules (Right rule) = expand cfg tokens rules rule

resolve :: Map Text Text -> Map Text [[Text]] -> Text -> Either Text [[Text]]
resolve :: Show token => Map Text token -> Map Text [[Text]] -> Text -> Either token [[Text]]
resolve tokens rules sym =
fromJust $ (Left <$> Map.lookup sym tokens) <|> (Right <$> Map.lookup sym rules)

terminals :: NodeF (Lexeme Text) (Map Text Text) -> Map Text Text
terminals node = case node of
Token k v -> Map.singleton (lexemeText k) (lexemeText v)
terminals :: Config token -> NodeF (Lexeme Text) (Map Text token) -> Map Text token
terminals Config{parseToken} node = case node of
Token k v -> Map.singleton (lexemeText k) (parseToken $ lexemeText v)
n -> Map.unions n

nonterminals :: NodeF (Lexeme Text) (Map Text [[Text]]) -> Map Text [[Text]]
Expand Down
1 change: 1 addition & 0 deletions src/Language/Happy/Lexer.x
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ tokens :-
<0> "%lexer" { mkL KwLexer }
<0> "%monad" { mkL KwMonad }
<0> "%name" { mkL KwName }
<0> "%prec" { mkL KwPrec }
<0> "%right" { mkL KwRight }
<0> "%token" { mkL KwToken }
<0> "%tokentype" { mkL KwTokentype }
Expand Down
23 changes: 13 additions & 10 deletions src/Language/Happy/Parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import Language.Happy.Tokens (LexemeClass (..))
%token
ID_NAME { L _ IdName _ }

'{}' { L _ LitCode _ }
'{code}' { L _ LitCode _ }

'%errorhandlertype' { L _ KwErrorhandlertype _ }
'%error' { L _ KwError _ }
Expand All @@ -39,6 +39,7 @@ import Language.Happy.Tokens (LexemeClass (..))
'%lexer' { L _ KwLexer _ }
'%monad' { L _ KwMonad _ }
'%name' { L _ KwName _ }
'%prec' { L _ KwPrec _ }
'%right' { L _ KwRight _ }
'%token' { L _ KwToken _ }
'%tokentype' { L _ KwTokentype _ }
Expand All @@ -59,8 +60,8 @@ Grammar

Code :: { [Term] }
Code
: '{}' { [$1] }
| Code '{}' { $1 ++ [$2] }
: '{code}' { [$1] }
| Code '{code}' { $1 ++ [$2] }

Pragmas :: { [NonTerm] }
Pragmas
Expand All @@ -72,10 +73,10 @@ Pragma
: '%expect' LIT_INTEGER { Fix $ PragmaExpect $2 }
| '%name' ID_NAME ID_NAME { Fix $ PragmaName $2 $3 }
| '%errorhandlertype' ID_NAME { Fix $ PragmaErrorHandlerType $2 }
| '%error' '{}' { Fix $ PragmaError $2 }
| '%lexer' '{}' '{}' { Fix $ PragmaLexer $2 $3 }
| '%monad' '{}' { Fix $ PragmaMonad $2 }
| '%tokentype' '{}' { Fix $ PragmaTokenType $2 }
| '%error' '{code}' { Fix $ PragmaError $2 }
| '%lexer' '{code}' '{code}' { Fix $ PragmaLexer $2 $3 }
| '%monad' '{code}' { Fix $ PragmaMonad $2 }
| '%tokentype' '{code}' { Fix $ PragmaTokenType $2 }
| '%token' Tokens { Fix $ PragmaToken $2 }
| '%left' TokenNames { Fix $ PragmaLeft $2 }
| '%right' TokenNames { Fix $ PragmaRight $2 }
Expand All @@ -92,7 +93,7 @@ Tokens

Token :: { NonTerm }
Token
: TokenName '{}' { Fix $ Token $1 $2 }
: TokenName '{code}' { Fix $ Token $1 $2 }

TokenName :: { Term }
TokenName
Expand All @@ -110,7 +111,7 @@ Rule

RuleType :: { NonTerm }
RuleType
: ID_NAME '::' '{}' { Fix $ RuleType $1 $3 }
: ID_NAME '::' '{code}' { Fix $ RuleType $1 $3 }

RuleDefn :: { NonTerm }
RuleDefn
Expand All @@ -123,7 +124,9 @@ RuleLines

RuleLine :: { NonTerm }
RuleLine
: TokenNames '{}' { Fix $ RuleLine $1 $2 }
: '{code}' { Fix $ RuleLine [] $1 }
| TokenNames '{code}' { Fix $ RuleLine $1 $2 }
| TokenNames '%prec' ID_NAME '{code}' { Fix $ RuleLine $1 $4 }


{
Expand Down
1 change: 1 addition & 0 deletions src/Language/Happy/Tokens.hs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ data LexemeClass
| KwLexer
| KwMonad
| KwName
| KwPrec
| KwRight
| KwToken
| KwTokentype
Expand Down
5 changes: 3 additions & 2 deletions test/Language/Happy/ArbitrarySpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import qualified Data.ByteString.Lazy as LBS
import Data.Text (Text)
import qualified Data.Text as Text
import qualified Data.Text.Encoding as Text
import Language.Happy.Arbitrary (genTokens)
import Language.Happy.Arbitrary (defConfig, genTokens)
import Language.Happy.Ast (Node)
import Language.Happy.Lexer (Lexeme, runAlex)
import Language.Happy.Parser (parseGrammar)
Expand All @@ -29,6 +29,7 @@ sampleToken c = case c of
KwLexer -> "%lexer"
KwMonad -> "%monad"
KwName -> "%name"
KwPrec -> "%prec"
KwRight -> "%right"
KwToken -> "%token"
KwTokentype -> "%tokentype"
Expand Down Expand Up @@ -60,7 +61,7 @@ spec :: Spec
spec = tryParseGrammar $ \g -> do
describe "genTokens" $ do
it "generates sequences that can be parsed again using the same grammar" $
forAll (Text.intercalate " " . map (sampleToken . parseToken) <$> genTokens "Grammar" g) $ \code -> do
forAll (Text.intercalate " " . map sampleToken <$> genTokens (defConfig parseToken) "Grammar" g) $ \code -> do
case runAlex (LBS.fromStrict . Text.encodeUtf8 $ code) parseGrammar of
Left err -> expectationFailure err
Right ok -> print ok

0 comments on commit 9ade206

Please sign in to comment.