-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This will enable caching analysis results in a text format that can be parsed much more efficiently than re-running the entire fixed point analysis.
- Loading branch information
Showing
4 changed files
with
612 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
{-# LANGUAGE OverloadedStrings #-} | ||
-- | The atoms of the concrete syntax for macaw | ||
module Data.Macaw.Syntax.Atom ( | ||
Keyword(..) | ||
, keywords | ||
, AtomName(..) | ||
, Atom(..) | ||
) | ||
where | ||
|
||
import qualified Data.Map.Strict as Map | ||
import qualified Data.Text as T | ||
import Numeric.Natural ( Natural ) | ||
|
||
-- | Macaw syntax keywords | ||
-- | ||
-- These are the keywords for the *base* macaw IR (i.e., without | ||
-- architecture-specific extensions). The architecture-specific operations are | ||
-- parsed as 'AtomName's initially and resolved by architecture-specific parsers | ||
-- at the atom level. | ||
data Keyword = BVAdd | ||
| BVSub | ||
| BVMul | ||
| BVSDiv | ||
| BVUDiv | ||
| Lt | ||
| Le | ||
| Sle | ||
| Slt | ||
-- Syntax | ||
| Assign | ||
-- Statements | ||
| Comment | ||
| InstructionStart | ||
| WriteMemory | ||
| CondWriteMemory | ||
-- Expressions | ||
| ReadMemory | ||
-- Boolean operations | ||
| Not_ | ||
| And_ | ||
| Or_ | ||
| Xor_ | ||
-- Endianness | ||
| BigEndian | ||
| LittleEndian | ||
-- MemRepr | ||
| BVMemRepr | ||
-- Types | ||
| Bool_ | ||
| BV_ | ||
| Float_ | ||
| Tuple_ | ||
| Vec_ | ||
-- Values | ||
| True_ | ||
| False_ | ||
| BV | ||
| Undefined | ||
deriving (Eq, Ord, Show) | ||
|
||
-- | Uninterpreted atoms | ||
newtype AtomName = AtomText T.Text | ||
deriving (Eq, Ord, Show) | ||
|
||
data Atom = Keyword !Keyword -- ^ Keywords include all of the built-in expressions and operators | ||
| AtomName !AtomName -- ^ Non-keyword syntax atoms (to be interpreted at parse time) | ||
| Register !Natural -- ^ A numbered local register (e.g., @r12@) | ||
| AddressWord !Natural -- ^ An arbitrary address rendered in hex ('ArchAddrWord') | ||
| SegmentOffset !Natural -- ^ A segment offset address rendered in hex (validation against the Memory object is required) | ||
| Integer_ !Integer -- ^ Literal integers | ||
| Natural_ !Natural -- ^ Literal naturals | ||
| String_ !T.Text -- ^ Literal strings | ||
deriving (Eq, Ord, Show) | ||
|
||
keywords :: Map.Map T.Text Keyword | ||
keywords = Map.fromList [ ("bv-add", BVAdd) | ||
, ("bv-sub", BVSub) | ||
, ("bv-mul", BVMul) | ||
, ("bv-sdiv", BVSDiv) | ||
, ("bv-udiv", BVUDiv) | ||
, ("<", Lt) | ||
, ("<=", Le) | ||
, ("<$", Slt) | ||
, ("<=$", Sle) | ||
, ("not", Not_) | ||
, ("and", And_) | ||
, ("or", Or_) | ||
, ("xor", Xor_) | ||
, ("Bool", Bool_) | ||
, ("BV", BV_) | ||
, ("Float", Float_) | ||
, ("Tuple", Tuple_) | ||
, ("Vec", Vec_) | ||
, ("true", True_) | ||
, ("false", False_) | ||
, ("bv", BV) | ||
, ("undefined", Undefined) | ||
, ("read-memory", ReadMemory) | ||
, (":=", Assign) | ||
, ("comment", Comment) | ||
, ("instruction-start", InstructionStart) | ||
, ("write-memory", WriteMemory) | ||
, ("cond-write-memory", CondWriteMemory) | ||
, ("big-endian", BigEndian) | ||
, ("little-endian", LittleEndian) | ||
, ("bv-mem", BVMemRepr) | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,269 @@ | ||
{-# LANGUAGE DataKinds #-} | ||
{-# LANGUAGE FlexibleContexts #-} | ||
{-# LANGUAGE GADTs #-} | ||
{-# LANGUAGE RankNTypes #-} | ||
{-# LANGUAGE ScopedTypeVariables #-} | ||
{-# LANGUAGE TypeApplications #-} | ||
{-# LANGUAGE TypeOperators #-} | ||
{-# LANGUAGE ViewPatterns #-} | ||
-- | This module defines a concrete syntax parser for macaw, which is meant to | ||
-- make it easy to persist analysis results and reuse them without recomputing | ||
-- everything | ||
-- | ||
-- The persisted artifact is a zip file with one file per function, plus a | ||
-- manifest file that records metadata about the binary to enable the loader to | ||
-- check to ensure that the loaded macaw IR actually matches the given binary. | ||
module Data.Macaw.Syntax.Parser ( | ||
parseDiscoveryFunInfo | ||
) where | ||
|
||
import Control.Applicative ( (<|>) ) | ||
import qualified Data.ByteString as BS | ||
import qualified Data.Map as Map | ||
import qualified Data.Parameterized.Classes as PC | ||
import qualified Data.Parameterized.NatRepr as PN | ||
import Data.Parameterized.Some ( Some(..) ) | ||
import qualified Data.Text as T | ||
import GHC.TypeLits ( type (<=) ) | ||
import Numeric.Natural ( Natural ) | ||
import qualified Text.Megaparsec as TM | ||
import qualified Text.Megaparsec.Char as TMC | ||
import qualified Text.Megaparsec.Char.Lexer as TMCL | ||
|
||
import qualified Data.Macaw.CFG as MC | ||
import qualified Data.Macaw.Discovery as MD | ||
import qualified Data.Macaw.Discovery.State as MDS | ||
import qualified Data.Macaw.Discovery.ParsedContents as Parsed | ||
import qualified Data.Macaw.Memory as MM | ||
import Data.Macaw.Syntax.Atom | ||
import qualified Data.Macaw.Syntax.SExpr as SExpr | ||
import qualified Data.Macaw.Types as MT | ||
|
||
|
||
parse | ||
:: (SExpr.Syntax Atom -> Either SExpr.MacawSyntaxError a) | ||
-> SExpr.Parser arch ids a | ||
parse asSomething = do | ||
at <- SExpr.sexp parseAtom | ||
case asSomething at of | ||
Left err -> TM.customFailure err | ||
Right r -> return r | ||
|
||
parseSExpr | ||
:: forall arch ids a | ||
. SExpr.Syntax Atom | ||
-> (SExpr.Syntax Atom -> Either SExpr.MacawSyntaxError a) | ||
-> SExpr.Parser arch ids a | ||
parseSExpr sexp asSomething = | ||
case asSomething sexp of | ||
Left err -> TM.customFailure err | ||
Right r -> return r | ||
|
||
data WidthRepr where | ||
WidthRepr :: (1 <= n) => PN.NatRepr n -> WidthRepr | ||
|
||
-- | Attempt to convert a 'Natural' into a non-zero 'PN.NatRepr' | ||
asWidthRepr :: Natural -> Maybe WidthRepr | ||
asWidthRepr nat = | ||
case PN.mkNatRepr nat of | ||
Some nr | ||
| Right PN.LeqProof <- PN.isZeroOrGT1 nr -> Just (WidthRepr nr) | ||
| otherwise -> Nothing | ||
|
||
asEndianness :: SExpr.Syntax Atom -> Either SExpr.MacawSyntaxError MM.Endianness | ||
asEndianness at = | ||
case at of | ||
SExpr.A (Keyword BigEndian) -> Right MM.BigEndian | ||
SExpr.A (Keyword LittleEndian) -> Right MM.LittleEndian | ||
_ -> Left (SExpr.InvalidEndianness at) | ||
|
||
asMemRepr :: SExpr.Syntax Atom -> Either SExpr.MacawSyntaxError (SomeTyped MC.MemRepr) | ||
asMemRepr at = | ||
case at of | ||
SExpr.L [ SExpr.A (Keyword BVMemRepr), SExpr.A (Natural_ w), send ] | ||
| Just (WidthRepr nr) <- asWidthRepr w -> do | ||
end <- asEndianness send | ||
let nr8 = PN.natMultiply (PN.knownNat @8) nr | ||
-- This cannot fail because we already checked that @w@ is >= 1 above | ||
case PN.isZeroOrGT1 nr8 of | ||
Left _ -> error ("Impossible; w was >= 1, so w * 8 must be >= 1: " ++ show nr8) | ||
Right PN.LeqProof -> do | ||
let tyRep = MT.BVTypeRepr nr8 | ||
return (SomeTyped tyRep (MC.BVMemRepr nr end)) | ||
|
||
data SomeTyped f where | ||
SomeTyped :: MT.TypeRepr tp -> f tp -> SomeTyped f | ||
|
||
asValue :: SExpr.Syntax Atom -> SExpr.Parser arch ids (SomeTyped (MC.Value arch ids)) | ||
asValue at = | ||
case at of | ||
SExpr.A (Keyword True_) -> return (SomeTyped MT.BoolTypeRepr (MC.BoolValue True)) | ||
SExpr.A (Keyword False_) -> return (SomeTyped MT.BoolTypeRepr (MC.BoolValue False)) | ||
SExpr.L [SExpr.A (Keyword BV), SExpr.A (Natural_ w), SExpr.A (Integer_ i)] | ||
| Just (WidthRepr nr) <- asWidthRepr w -> return (SomeTyped (MT.BVTypeRepr nr) (MC.BVValue nr i)) | ||
| otherwise -> TM.customFailure SExpr.InvalidZeroWidthBV | ||
SExpr.A (Register rnum) -> do | ||
Some val <- SExpr.valueForRegisterNumber rnum | ||
return (SomeTyped (MT.typeRepr val) val) | ||
|
||
parseIdentifier :: SExpr.Parser arch ids T.Text | ||
parseIdentifier = undefined | ||
|
||
parseKeywordOrAtom :: SExpr.Parser arch ids Atom | ||
parseKeywordOrAtom = do | ||
x <- parseIdentifier | ||
return $! maybe (AtomName (AtomText x)) Keyword (Map.lookup x keywords) | ||
|
||
parseNatural :: SExpr.Parser arch ids Atom | ||
parseNatural = Natural_ <$> TMCL.decimal | ||
|
||
parseInteger :: SExpr.Parser arch ids Atom | ||
parseInteger = TM.try (Integer_ <$> (TMC.char '+' >> TMCL.decimal)) | ||
<|> (Integer_ <$> (TMC.char '-' >> TMCL.decimal)) | ||
|
||
parseAtom :: SExpr.Parser arch ids Atom | ||
parseAtom = TM.try parseKeywordOrAtom | ||
<|> TM.try parseNatural | ||
<|> TM.try parseInteger | ||
|
||
asApp :: SExpr.Syntax Atom -> SExpr.Parser arch ids (SomeTyped (MC.App (MC.Value arch ids))) | ||
asApp a = | ||
case a of | ||
SExpr.L [ SExpr.A (Keyword BVAdd), slhs, srhs ] -> do | ||
SomeTyped lrep lhs <- asValue slhs | ||
SomeTyped rrep rhs <- asValue srhs | ||
case (PC.testEquality lrep rrep, lrep) of | ||
(Just PC.Refl, MT.BVTypeRepr nr) -> return (SomeTyped lrep (MC.BVAdd nr lhs rhs)) | ||
|
||
-- | Parse a single type as a 'MT.TypeRepr' | ||
-- | ||
-- The type forms are: | ||
-- | ||
-- * @Bool@ | ||
-- * @(BV n)@ | ||
-- * @(Vec n ty)@ | ||
asTypeRepr :: SExpr.Syntax Atom -> Either SExpr.MacawSyntaxError (Some MT.TypeRepr) | ||
asTypeRepr at = | ||
case at of | ||
SExpr.A (Keyword Bool_) -> Right (Some MT.BoolTypeRepr) | ||
SExpr.L [SExpr.A (Keyword BV_), SExpr.A (Natural_ w)] | ||
| Just (WidthRepr nr) <- asWidthRepr w -> Right (Some (MT.BVTypeRepr nr)) | ||
| otherwise -> Left SExpr.InvalidZeroWidthBV | ||
SExpr.L [SExpr.A (Keyword Vec_), SExpr.A (Natural_ w), mty] -> | ||
case PN.mkNatRepr w of | ||
Some nr -> | ||
-- Note that zero-width vectors are technically allowed | ||
case asTypeRepr mty of | ||
Right (Some ty) -> Right (Some (MT.VecTypeRepr nr ty)) | ||
Left _errs -> Left (SExpr.InvalidVectorPayload mty) | ||
_ -> Left (SExpr.InvalidType at) | ||
|
||
-- | Parse the right-hand side of an assignment | ||
-- | ||
-- Note that it is important that the EvalApp case is last, as there are many | ||
-- syntactic forms that we might need to dispatch to. | ||
asRHS | ||
:: forall arch ids | ||
. SExpr.Syntax Atom | ||
-> SExpr.Parser arch ids (Some (MC.AssignRhs arch (MC.Value arch ids))) | ||
asRHS a = | ||
case a of | ||
SExpr.L [ SExpr.A (Keyword Undefined), srep ] -> do | ||
Some rep <- parseSExpr srep asTypeRepr | ||
return (Some (MC.SetUndefined rep)) | ||
SExpr.L [ SExpr.A (Keyword ReadMemory), saddr, smemRepr ] -> do | ||
SomeTyped addrTp addr <- asValue saddr | ||
SomeTyped _rep memRepr <- parseSExpr smemRepr asMemRepr | ||
|
||
memWidth <- SExpr.archMemWidth | ||
let addrRepr = MT.BVTypeRepr memWidth | ||
|
||
case PC.testEquality addrTp addrRepr of | ||
Just PC.Refl -> return (Some (MC.ReadMem addr memRepr)) | ||
Nothing -> TM.customFailure (SExpr.InvalidAddressWidth a) | ||
_ -> do | ||
SomeTyped _tp app <- asApp a | ||
return (Some (MC.EvalApp app)) | ||
|
||
-- | Forms: | ||
-- | ||
-- * @(comment "msg")@ | ||
-- * @(instruction-start addr decoded-asm-text)@ | ||
-- * @(write-memory addr mem-rep value)@ | ||
-- * @(cond-write-memory cond addr mem-rep value)@ | ||
-- * @(reg := rhs)@ | ||
parseStmt :: forall arch ids . SExpr.Parser arch ids (MC.Stmt arch ids) | ||
parseStmt = do | ||
at <- SExpr.sexp parseAtom | ||
case at of | ||
SExpr.L [ SExpr.A (Keyword Comment), SExpr.A (String_ txt) ] -> | ||
return (MC.Comment txt) | ||
SExpr.L [ SExpr.A (Keyword InstructionStart), SExpr.A (AddressWord addr), SExpr.A (String_ txt) ] -> | ||
return (MC.InstructionStart (MM.memWord (fromIntegral addr)) txt) | ||
SExpr.L [ SExpr.A (Keyword WriteMemory), stargetAddr, smemRepr, svalue ] -> do | ||
SomeTyped addrTy addr <- asValue stargetAddr | ||
SomeTyped vtp value <- asValue svalue | ||
SomeTyped mtp memRepr <- parseSExpr smemRepr asMemRepr | ||
memWidth <- SExpr.archMemWidth | ||
let addrRepr = MT.BVTypeRepr memWidth | ||
case (PC.testEquality vtp mtp, PC.testEquality addrTy addrRepr) of | ||
(Just PC.Refl, Just PC.Refl) -> do | ||
return (MC.WriteMem addr memRepr value) | ||
-- FIXME: Make a more-specific error | ||
_ -> TM.customFailure (SExpr.InvalidStatement at) | ||
SExpr.L [ SExpr.A (Keyword CondWriteMemory), scond, stargetAddr, smemRepr, svalue ] -> do | ||
SomeTyped condTy cond <- asValue scond | ||
SomeTyped addrTy addr <- asValue stargetAddr | ||
SomeTyped vtp value <- asValue svalue | ||
SomeTyped mtp memRepr <- parseSExpr smemRepr asMemRepr | ||
memWidth <- SExpr.archMemWidth | ||
let addrRepr = MT.BVTypeRepr memWidth | ||
case (PC.testEquality vtp mtp, PC.testEquality addrTy addrRepr, PC.testEquality condTy MT.BoolTypeRepr) of | ||
(Just PC.Refl, Just PC.Refl, Just PC.Refl) -> do | ||
return (MC.CondWriteMem cond addr memRepr value) | ||
-- FIXME: Make a more-specific error | ||
_ -> TM.customFailure (SExpr.InvalidStatement at) | ||
SExpr.L [ SExpr.A (Register r), SExpr.A (Keyword Assign), srhs ] -> do | ||
Some rhs <- asRHS srhs | ||
Some asgn <- SExpr.defineRegister r rhs | ||
return (MC.AssignStmt asgn) | ||
_ -> TM.customFailure (SExpr.InvalidStatement at) | ||
|
||
parseBlock :: SExpr.Parser arch ids (Parsed.ParsedBlock arch ids) | ||
parseBlock = do | ||
|
||
stmts <- TM.many parseStmt | ||
|
||
return Parsed.ParsedBlock { Parsed.pblockAddr = undefined | ||
, Parsed.pblockPrecond = undefined | ||
, Parsed.blockSize = undefined | ||
, Parsed.blockReason = undefined | ||
, Parsed.blockAbstractState = undefined | ||
, Parsed.blockJumpBounds = undefined | ||
, Parsed.pblockStmts = stmts | ||
, Parsed.pblockTermStmt = undefined | ||
} | ||
|
||
parseFunction :: SExpr.Parser arch ids (Some (MD.DiscoveryFunInfo arch)) | ||
parseFunction = do | ||
|
||
blocks <- TM.many parseBlock | ||
|
||
let dfi = MDS.DiscoveryFunInfo { MDS.discoveredFunReason = undefined | ||
, MDS.discoveredFunAddr = undefined | ||
, MDS.discoveredFunSymbol = undefined | ||
, MDS._parsedBlocks = Map.fromList [ (Parsed.pblockAddr pb, pb) | ||
| pb <- blocks | ||
] | ||
, MDS.discoveredClassifyFailureResolutions = undefined | ||
} | ||
return (Some dfi) | ||
|
||
parseDiscoveryFunInfo | ||
:: (MC.ArchConstraints arch) | ||
=> MM.Memory (MC.ArchAddrWidth arch) | ||
-- ^ The memory of the binary we are loading results for | ||
-> BS.ByteString | ||
-- ^ The bytes of the persisted file | ||
-> Either SExpr.MacawParseError (Some (MD.DiscoveryFunInfo arch)) | ||
parseDiscoveryFunInfo mem bytes = SExpr.runParser mem bytes parseFunction |
Oops, something went wrong.