You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
use std::collections::HashSet;use ethers::types::U256;use heimdall_common::ether::evm::core::{
types::{byte_size_to_type, convert_bitmask},
vm::State,};use tracing::{debug, trace};usecrate::{
core::analyze::{AnalyzerState,AnalyzerType},
interfaces::{AnalyzedFunction,CalldataFrame,TypeHeuristic},
utils::constants::{AND_BITMASK_REGEX,AND_BITMASK_REGEX_2},Error,};pubfnargument_heuristic(function:&mutAnalyzedFunction,state:&State,analyzer_state:&mutAnalyzerState,) -> Result<(),Error>{match state.last_instruction.opcode{// CALLDATALOAD0x35 => {// calculate the argument index, with the 4byte signature padding removed// for example, CALLDATALOAD(4) -> (4-4)/32 = 0// CALLDATALOAD(36) -> (36-4)/32 = 1let arg_index = (state.last_instruction.inputs[0].saturating_sub(U256::from(4)) / 32).try_into().unwrap_or(usize::MAX);// insert only if this argument is not already in the hashmap
function.arguments.entry(arg_index).or_insert_with(|| {debug!("discovered new argument at index {} from CALLDATALOAD({})",
arg_index, state.last_instruction.inputs[0]);CalldataFrame{arg_op: state.last_instruction.input_operations[0].to_string(),mask_size:32,// init to 32 because all CALLDATALOADs are 32 bytesheuristics:HashSet::new(),}});}// CALLDATACOPY0x37 => {// TODO: implement CALLDATACOPY supporttrace!("CALLDATACOPY detected; not implemented");}// AND | OR0x16 | 0x17 => {// if this is a bitwise mask operation on CALLDATALOAD, we can use it to determine the// size (and consequently type) of the variableifletSome(calldataload_op) =
state.last_instruction.input_operations.iter().find(|op| op.opcode.code == 0x35){// this is a bitwise mask, we can use it to determine the size of the variablelet(mask_size_bytes, _potential_types) =
convert_bitmask(state.last_instruction.clone());// yulify the calldataload operation, and find the associated argument index// this MUST exist, as we have already inserted it in the CALLDATALOAD heuristiclet arg_op = calldataload_op.inputs[0].to_string();ifletSome((arg_index, frame)) =
function.arguments.iter_mut().find(|(_, frame)| frame.arg_op == arg_op){debug!("instruction {} ({}) indicates argument {} is masked to {} bytes",
state.last_instruction.instruction,
state.last_instruction.opcode_details.clone().expect("impossible").name,
arg_index,
mask_size_bytes
);
frame.mask_size = mask_size_bytes;}}}// RETURN0xf3 => {// Safely convert U256 to usizelet size:usize = state.last_instruction.inputs[1].try_into().unwrap_or(0);let return_memory_operations = function.get_memory_range(
state.last_instruction.inputs[0],
state.last_instruction.inputs[1],);let return_memory_operations_solidified = return_memory_operations
.iter().map(|x| x.operations.solidify()).collect::<Vec<String>>().join(", ");// add the return statement to the function logicif analyzer_state.analyzer_type == AnalyzerType::Solidity{if return_memory_operations.len() <= 1{
function.logic.push(format!("return {return_memory_operations_solidified};"));}else{
function.logic.push(format!("return abi.encodePacked({return_memory_operations_solidified});"));}}elseif analyzer_state.analyzer_type == AnalyzerType::Yul{
function.logic.push(format!("return({}, {})",
state.last_instruction.input_operations[0].yulify(),
state.last_instruction.input_operations[1].yulify()));}// if we've already determined a return type, we don't want to do it again.// we use bytes32 as a default return typeif function.returns != Some(String::from("bytes32")){returnOk(());}// if the any input op is ISZERO(x), this is a boolean returnif return_memory_operations.iter().any(|x| x.operations.opcode.name == "ISZERO"){
function.returns = Some(String::from("bool"));}// if the size of returndata is > 32, it must be a bytes memory return.// it could be a struct, but we cant really determine that from the bytecodeelseif size > 32{
function.returns = Some(String::from("bytes memory"));}else{// attempt to find a return type within the return memory operationslet byte_size = matchAND_BITMASK_REGEX.find(&return_memory_operations_solidified).ok().flatten(){Some(bitmask) => {let cast = bitmask.as_str();
cast.matches("ff").count()}None => matchAND_BITMASK_REGEX_2.find(&return_memory_operations_solidified).ok().flatten(){Some(bitmask) => {let cast = bitmask.as_str();
cast.matches("ff").count()}None => 32,},};// convert the cast size to a stringlet(_, cast_types) = byte_size_to_type(byte_size);
function.returns = Some(cast_types[0].to_string());}}// integer type heuristics0x02 | 0x04 | 0x05 | 0x06 | 0x07 | 0x08 | 0x09 | 0x0b | 0x10 | 0x11 | 0x12 | 0x13 => {// check if this instruction is operating on a known argument.// if it is, add 'integer' to the list of heuristics// TODO: we probably want to use an enum for heuristicsifletSome((arg_index, frame)) = function.arguments.iter_mut().find(|(_, frame)| {
state
.last_instruction.output_operations.iter().any(|operation| operation.to_string().contains(frame.arg_op.as_str()))}){debug!("instruction {} ({}) indicates argument {} may be a numeric type",
state.last_instruction.instruction,
state.last_instruction.opcode_details.clone().expect("impossible").name,
arg_index
);
frame.heuristics.insert(TypeHeuristic::Numeric);}}// bytes type heuristics0x18 | 0x1a | 0x1b | 0x1c | 0x1d | 0x20 => {// check if this instruction is operating on a known argument.// if it is, add 'bytes' to the list of heuristics// TODO: we probably want to use an enum for heuristicsifletSome((arg_index, frame)) = function.arguments.iter_mut().find(|(_, frame)| {
state
.last_instruction.output_operations.iter().any(|operation| operation.to_string().contains(frame.arg_op.as_str()))}){debug!("instruction {} ({}) indicates argument {} may be a bytes type",
state.last_instruction.instruction,
state.last_instruction.opcode_details.clone().expect("impossible").name,
arg_index
);
frame.heuristics.insert(TypeHeuristic::Bytes);}}// boolean type heuristics0x15 => {// if this is a boolean check on CALLDATALOAD, we can add boolean to the potential typesifletSome(calldataload_op) =
state.last_instruction.input_operations.iter().find(|op| op.opcode.code == 0x35){// yulify the calldataload operation, and find the associated argument index// this MUST exist, as we have already inserted it in the CALLDATALOAD heuristiclet arg_op = calldataload_op.inputs[0].to_string();ifletSome((arg_index, frame)) =
function.arguments.iter_mut().find(|(_, frame)| frame.arg_op == arg_op){debug!("instruction {} ({}) indicates argument {} may be a boolean",
state.last_instruction.instruction,
state.last_instruction.opcode_details.clone().expect("impossible").name,
arg_index
);// NOTE: we don't want to update mask_size here, as we are only adding potential// types
frame.heuristics.insert(TypeHeuristic::Boolean);}}}
_ => {}};Ok(())}
The text was updated successfully, but these errors were encountered:
New Codebase TODO
implement CALLDATACOPY support
Location
heimdall-rs/crates/decompile/src/utils/heuristics/arguments.rs
Line 49 in 7a41ff2
The text was updated successfully, but these errors were encountered: