Fix corpus call method resolution bug, improve startup logging (#308)

* * Fixed a corpus call method resolution bug. * Deprecated "methodName" in corpus (lack of support for function overloading). To be removed later, in favor of new "methodSignature" key. * Improved fuzzer initialization logging * Print basic metrics for corpus health on startup * Reorder printing to avoid "Creating X workers" message after "fuzz: elapsed[...]" message. * Update corpus health log for readability --------- Co-authored-by: anishnaik <[email protected]>
crytic · Feb 28, 2024 · c0c3718 · c0c3718
1 parent 729b78a
commit c0c3718
Show file tree

Hide file tree

Showing 4 changed files with 79 additions and 19 deletions.
diff --git a/fuzzing/calls/call_message_abi_values.go b/fuzzing/calls/call_message_abi_values.go
@@ -6,6 +6,7 @@ import (
 	"github.com/crytic/medusa/fuzzing/valuegeneration"
 	"github.com/ethereum/go-ethereum/accounts/abi"
 	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/crypto"
 )
 
 // CallMessageDataAbiValues describes a CallMessage Data field which is represented by ABI input argument values.
@@ -22,8 +23,17 @@ type CallMessageDataAbiValues struct {
 
 	// methodName stores the name of Method when decoding from JSON. The Method will be resolved using this internal
 	// reference when Resolve is called.
+	//
+	// TODO: Note, this field is deprecated and should be removed after methodSignature is adopted for some time.
+	//  This will help transition old corpuses in the meantime.
 	methodName string
 
+	// methodSignature stores the function prototype which is used to calculate the method ID. This is human-readable,
+	// and easily editable, so it is used in favor of the method ID derived from it.
+	//
+	// The Method will be resolved using this internal reference when Resolve is called.
+	methodSignature string
+
 	// encodedInputValues stores the raw encoded input values when decoding from JSON. The actual InputValues will be
 	// decoded using this and the resolved Method once Resolve is called.
 	encodedInputValues []any
@@ -32,7 +42,8 @@ type CallMessageDataAbiValues struct {
 // callMessageDataAbiValuesMarshal is used as an internal struct to represent JSON serialized data for
 // CallMessageDataAbiValues.
 type callMessageDataAbiValuesMarshal struct {
-	MethodName         string `json:"methodName"`
+	MethodName         string `json:"methodName,omitempty"`
+	MethodSignature    string `json:"methodSignature"`
 	EncodedInputValues []any  `json:"inputValues"`
 }
 
@@ -43,6 +54,7 @@ func (m *CallMessageDataAbiValues) Clone() (*CallMessageDataAbiValues, error) {
 		Method:             m.Method,
 		InputValues:        nil, // set lower
 		methodName:         m.methodName,
+		methodSignature:    m.methodSignature,
 		encodedInputValues: m.encodedInputValues,
 	}
 
@@ -65,17 +77,32 @@ func (m *CallMessageDataAbiValues) Clone() (*CallMessageDataAbiValues, error) {
 // Resolve takes a previously unmarshalled CallMessageDataAbiValues and resolves all internal data needed for it to be
 // used at runtime by resolving the abi.Method it references from the provided contract ABI.
 func (d *CallMessageDataAbiValues) Resolve(contractAbi abi.ABI) error {
-	// Try to resolve the method from our contract ABI.
-	if resolvedMethod, ok := contractAbi.Methods[d.methodName]; ok {
-		d.Method = &resolvedMethod
-	} else {
-		return fmt.Errorf("could not resolve method '%v' from the given contract ABI", d.methodName)
+	// If we have a method signature, try to resolve it by calculating a method ID from this.
+	d.Method = nil
+	if d.methodSignature != "" {
+		methodId := crypto.Keccak256([]byte(d.methodSignature))[:4]
+		if resolvedMethod, err := contractAbi.MethodById(methodId); err == nil {
+			d.Method = resolvedMethod
+		} else {
+			return fmt.Errorf("could not resolve method signature '%v'", d.methodSignature)
+		}
 	}
 
+	// TODO: Deprecated old way of resolving methods. This is left for compatibility with old corpuses, but should be
+	//  removed at a later date in favor of methodSignature resolution. It resolves a method by name if it has not been.
+	if d.Method == nil {
+		if resolvedMethod, ok := contractAbi.Methods[d.methodName]; ok {
+			d.Method = &resolvedMethod
+		} else {
+			return fmt.Errorf("could not resolve method name '%v'", d.methodName)
+		}
+	}
+	d.methodSignature = d.Method.Sig
+
 	// Now that we've resolved the method, decode our encoded input values.
 	decodedArguments, err := valuegeneration.DecodeJSONArgumentsFromSlice(d.Method.Inputs, d.encodedInputValues, make(map[string]common.Address))
 	if err != nil {
-		return err
+		return fmt.Errorf("error decoding arguments for method '%v': %v", d.methodSignature, err)
 	}
 
 	// If we've decoded arguments successfully, set them and clear our encoded arguments as they're no longer needed.
@@ -132,7 +159,7 @@ func (d *CallMessageDataAbiValues) MarshalJSON() ([]byte, error) {
 
 	// Now create our outer struct and marshal all the data and return it.
 	marshalData := callMessageDataAbiValuesMarshal{
-		MethodName:         d.Method.Name,
+		MethodSignature:    d.Method.Sig,
 		EncodedInputValues: inputValuesEncoded,
 	}
 	return json.Marshal(marshalData)
@@ -150,6 +177,7 @@ func (d *CallMessageDataAbiValues) UnmarshalJSON(b []byte) error {
 
 	// Set our data in our actual structure now
 	d.methodName = marshalData.MethodName
+	d.methodSignature = marshalData.MethodSignature
 	d.encodedInputValues = marshalData.EncodedInputValues
 	return nil
 }
diff --git a/fuzzing/calls/call_sequence.go b/fuzzing/calls/call_sequence.go
@@ -205,7 +205,17 @@ func (cse *CallSequenceElement) Method() (*abi.Method, error) {
 	if cse.Contract == nil {
 		return nil, nil
 	}
-	return cse.Contract.CompiledContract().Abi.MethodById(cse.Call.Data)
+
+	// If we have a method resolved, return it.
+	if cse.Call != nil && cse.Call.DataAbiValues != nil {
+		if cse.Call.DataAbiValues.Method != nil {
+			return cse.Call.DataAbiValues.Method, nil
+		}
+	}
+
+	// Try to resolve the method by ID from the call data.
+	method, err := cse.Contract.CompiledContract().Abi.MethodById(cse.Call.Data)
+	return method, err
 }
 
 // String returns a displayable string representing the CallSequenceElement.

diff --git a/fuzzing/corpus/corpus.go b/fuzzing/corpus/corpus.go
@@ -189,6 +189,7 @@ func (c *Corpus) initializeSequences(sequenceFiles *corpusDirectory[calls.CallSe
 			if callAbiValues != nil {
 				sequenceInvalidError = callAbiValues.Resolve(currentSequenceElement.Contract.CompiledContract().Abi)
 				if sequenceInvalidError != nil {
+					sequenceInvalidError = fmt.Errorf("error resolving method in contract '%v': %v", currentSequenceElement.Contract.Name(), sequenceInvalidError)
 					return nil, nil
 				}
 			}
@@ -236,7 +237,9 @@ func (c *Corpus) initializeSequences(sequenceFiles *corpusDirectory[calls.CallSe
 
 // Initialize initializes any runtime data needed for a Corpus on startup. Call sequences are replayed on the post-setup
 // (deployment) test chain to calculate coverage, while resolving references to compiled contracts.
-func (c *Corpus) Initialize(baseTestChain *chain.TestChain, contractDefinitions contracts.Contracts) error {
+// Returns the active number of corpus items, total number of corpus items, or an error if one occurred. If an error
+// is returned, then the corpus counts returned will always be zero.
+func (c *Corpus) Initialize(baseTestChain *chain.TestChain, contractDefinitions contracts.Contracts) (int, int, error) {
 	// Acquire our call sequences lock during the duration of this method.
 	c.callSequencesLock.Lock()
 	defer c.callSequencesLock.Unlock()
@@ -273,7 +276,7 @@ func (c *Corpus) Initialize(baseTestChain *chain.TestChain, contractDefinitions
 		return nil
 	})
 	if err != nil {
-		return fmt.Errorf("failed to initialize coverage maps, base test chain cloning encountered error: %v", err)
+		return 0, 0, fmt.Errorf("failed to initialize coverage maps, base test chain cloning encountered error: %v", err)
 	}
 
 	// Set our coverage maps to those collected when replaying all blocks when cloning.
@@ -283,7 +286,7 @@ func (c *Corpus) Initialize(baseTestChain *chain.TestChain, contractDefinitions
 			covMaps := coverage.GetCoverageTracerResults(messageResults)
 			_, _, covErr := c.coverageMaps.Update(covMaps)
 			if covErr != nil {
-				return err
+				return 0, 0, err
 			}
 		}
 	}
@@ -292,18 +295,22 @@ func (c *Corpus) Initialize(baseTestChain *chain.TestChain, contractDefinitions
 	// are added to the corpus for mutations, re-execution, etc.
 	err = c.initializeSequences(c.mutableSequenceFiles, testChain, deployedContracts, true)
 	if err != nil {
-		return err
+		return 0, 0, err
 	}
 	err = c.initializeSequences(c.immutableSequenceFiles, testChain, deployedContracts, false)
 	if err != nil {
-		return err
+		return 0, 0, err
 	}
 	err = c.initializeSequences(c.testResultSequenceFiles, testChain, deployedContracts, false)
 	if err != nil {
-		return err
+		return 0, 0, err
 	}
 
-	return nil
+	// Calculate corpus health metrics
+	corpusSequencesTotal := len(c.mutableSequenceFiles.files) + len(c.immutableSequenceFiles.files) + len(c.testResultSequenceFiles.files)
+	corpusSequencesActive := len(c.unexecutedCallSequences)
+
+	return corpusSequencesActive, corpusSequencesTotal, nil
 }
 
 // addCallSequence adds a call sequence to the corpus in a given corpus directory.

diff --git a/fuzzing/fuzzer.go b/fuzzing/fuzzer.go
@@ -513,8 +513,7 @@ func (f *Fuzzer) spawnWorkersLoop(baseTestChain *chain.TestChain) error {
 	// Define a flag that indicates whether we have not cancelled o
 	working := !utils.CheckContextDone(f.ctx)
 
-	// Log that we are about to create the workers and start fuzzing
-	f.logger.Info("Creating ", colors.Bold, f.config.Fuzzing.Workers, colors.Reset, " workers...")
+	// Create workers and start fuzzing.
 	var err error
 	for err == nil && working {
 		// Send an item into our channel to queue up a spot. This will block us if we hit capacity until a worker
@@ -617,6 +616,7 @@ func (f *Fuzzer) Start() error {
 	}
 
 	// Set up the corpus
+	f.logger.Info("Initializing corpus")
 	f.corpus, err = corpus.NewCorpus(f.config.Fuzzing.CorpusDirectory)
 	if err != nil {
 		f.logger.Error("Failed to create the corpus", err)
@@ -640,19 +640,34 @@ func (f *Fuzzer) Start() error {
 	}
 
 	// Set it up with our deployment/setup strategy defined by the fuzzer.
+	f.logger.Info("Setting up base chain")
 	err = f.Hooks.ChainSetupFunc(f, baseTestChain)
 	if err != nil {
 		f.logger.Error("Failed to initialize the test chain", err)
 		return err
 	}
 
 	// Initialize our coverage maps by measuring the coverage we get from the corpus.
-	err = f.corpus.Initialize(baseTestChain, f.contractDefinitions)
+	var corpusActiveSequences, corpusTotalSequences int
+	f.logger.Info("Initializing and validating corpus call sequences")
+	corpusActiveSequences, corpusTotalSequences, err = f.corpus.Initialize(baseTestChain, f.contractDefinitions)
 	if err != nil {
 		f.logger.Error("Failed to initialize the corpus", err)
 		return err
 	}
 
+	// Log corpus health statistics, if we have any existing sequences.
+	if corpusTotalSequences > 0 {
+		f.logger.Info(
+			colors.Bold, "corpus: ", colors.Reset,
+			"health: ", colors.Bold, int(float32(corpusActiveSequences)/float32(corpusTotalSequences)*100.0), "%", colors.Reset, ", ",
+			"sequences: ", colors.Bold, corpusTotalSequences, " (", corpusActiveSequences, " valid, ", corpusTotalSequences-corpusActiveSequences, " invalid)", colors.Reset,
+		)
+	}
+
+	// Log the start of our fuzzing campaign.
+	f.logger.Info("Fuzzing with ", colors.Bold, f.config.Fuzzing.Workers, colors.Reset, " workers")
+
 	// Start our printing loop now that we're about to begin fuzzing.
 	go f.printMetricsLoop()