From 29f94337e93ffff52662cd5980e286aa24422b13 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 17 Jun 2023 19:11:53 -0700 Subject: [PATCH 001/122] wip - IR executor --- boa/environment.py | 152 ++++++++------ boa/interpret.py | 3 +- boa/vm/fork.py | 6 +- boa/vyper/contract.py | 7 + boa/vyper/ir_executor.py | 426 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 530 insertions(+), 64 deletions(-) create mode 100644 boa/vyper/ir_executor.py diff --git a/boa/environment.py b/boa/environment.py index da8e9df2..295b1d54 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -5,7 +5,7 @@ import logging import sys import warnings -from typing import Any, Iterator, Optional, Union +from typing import Any, Iterator, Optional, Union, Tuple import eth.constants as constants import eth.tools.builder.chain as chain @@ -24,6 +24,7 @@ from boa.util.eip1167 import extract_eip1167_address, is_eip1167_contract from boa.vm.fork import AccountDBFork from boa.vm.gas_meters import GasMeter, NoGasMeter, ProfilingGasMeter +from boa.vyper.ir_executor import EvalContext def enable_pyevm_verbose_logging(): @@ -254,6 +255,89 @@ def __call__(self, computation): # ### End section: sha3 tracing +# py-evm uses class instantiaters which need to be classes +# instead of like factories or other easier to use architectures - +# `computation_template` is a class which can be constructed dynamically +class computation_template: + _gas_meter_class = GasMeter + + def __init__(self, *args, **kwargs): + # super() hardcodes CodeStream into the ctor + # so we have to override it here + super().__init__(*args, **kwargs) + self.code = TracingCodeStream( + self.code._raw_code_bytes, + fake_codesize=getattr(self.msg, "_fake_codesize", None), + start_pc=getattr(self.msg, "_start_pc", 0), + ) + global _precompiles + # copy so as not to mess with class state + self._precompiles = self._precompiles.copy() + self._precompiles.update(_precompiles) + + global _opcode_overrides + # copy so as not to mess with class state + self.opcodes = self.opcodes.copy() + self.opcodes.update(_opcode_overrides) + + self._gas_meter = self._gas_meter_class(self.msg.gas) + if hasattr(self._gas_meter, "_set_code"): + self._gas_meter._set_code(self.code) + + self._child_pcs = [] + + def add_child_computation(self, child_computation): + super().add_child_computation(child_computation) + # track PCs of child calls for profiling purposes + self._child_pcs.append(self.code.program_counter) + + # hijack creations to automatically generate blueprints + @classmethod + def apply_create_message(cls, state, msg, tx_ctx): + computation = super().apply_create_message(state, msg, tx_ctx) + + bytecode = msg.code + # cf. eth/vm/logic/system/Create* opcodes + contract_address = msg.storage_address + + if is_eip1167_contract(bytecode): + contract_address = extract_eip1167_address(bytecode) + bytecode = self.vm.state.get_code(contract_address) + + if bytecode in cls.env._code_registry: + target = self._code_registry[bytecode].deployer.at(contract_address) + target.created_from = to_checksum_address(msg.sender) + env.register_contract(contract_address, target) + + return computation + + @classmethod + def apply_computation(cls, state , msg , tx_ctx): + addr = msg.code_address + contract = cls.env.lookup_contract(addr) if addr else None + if contract is None or True: + print("SLOW MODE") + return super().apply_computation(state, msg, tx_ctx) + + print("FAST MODE") + err = None + with cls(state, msg, tx_ctx) as computation: + eval_ctx = EvalContext(computation) + print(contract.ir_executor) + try: + contract.ir_executor.eval(eval_ctx) + except Exception as e: + # grab the exception to raise later - + # unclear why this is getting swallowed by py-evm. + #print(e) + err = e + + from eth.exceptions import VMError, Halt + if err is not None and not isinstance(err, (Halt, VMError)): + #if err is not None: + raise err + return computation + # wrapper class around py-evm which provides a "contract-centric" API class Env: @@ -293,61 +377,7 @@ def _init_vm(self, reset_traces=True): self.vm = self.chain.get_vm() env = self - class OpcodeTracingComputation(self.vm.state.computation_class): - _gas_meter_class = GasMeter - - def __init__(self, *args, **kwargs): - # super() hardcodes CodeStream into the ctor - # so we have to override it here - super().__init__(*args, **kwargs) - self.code = TracingCodeStream( - self.code._raw_code_bytes, - fake_codesize=getattr(self.msg, "_fake_codesize", None), - start_pc=getattr(self.msg, "_start_pc", 0), - ) - global _precompiles - # copy so as not to mess with class state - self._precompiles = self._precompiles.copy() - self._precompiles.update(_precompiles) - - global _opcode_overrides - # copy so as not to mess with class state - self.opcodes = self.opcodes.copy() - self.opcodes.update(_opcode_overrides) - - self._gas_meter = self._gas_meter_class(self.msg.gas) - if hasattr(self._gas_meter, "_set_code"): - self._gas_meter._set_code(self.code) - - self._child_pcs = [] - - def add_child_computation(self, child_computation): - super().add_child_computation(child_computation) - # track PCs of child calls for profiling purposes - self._child_pcs.append(self.code.program_counter) - - # hijack creations to automatically generate blueprints - @classmethod - def apply_create_message(cls, state, msg, tx_ctx): - computation = super().apply_create_message(state, msg, tx_ctx) - - bytecode = msg.code - # cf. eth/vm/logic/system/Create* opcodes - contract_address = msg.storage_address - - if is_eip1167_contract(bytecode): - contract_address = extract_eip1167_address(bytecode) - bytecode = self.vm.state.get_code(contract_address) - - if bytecode in self._code_registry: - target = self._code_registry[bytecode].deployer.at(contract_address) - target.created_from = to_checksum_address(msg.sender) - env.register_contract(contract_address, target) - - return computation - - # TODO make metering toggle-able - c = OpcodeTracingComputation + c = type("TitanoboaComputation", (computation_template, self.vm.state.computation_class), {"env": self}) self.vm.state.computation_class = c @@ -405,18 +435,19 @@ def get_balance(self, addr): return self.vm.state.get_balance(to_canonical_address(addr)) def register_contract(self, address, obj): - self._contracts[to_checksum_address(address)] = obj + addr = to_canonical_address(address) + self._contracts[addr] = obj # also register it in the registry for # create_minimal_proxy_to and create_copy_of - bytecode = self.vm.state.get_code(to_canonical_address(address)) + bytecode = self.vm.state.get_code(addr) self._code_registry[bytecode] = obj def register_blueprint(self, bytecode, obj): self._code_registry[bytecode] = obj def lookup_contract(self, address): - return self._contracts.get(to_checksum_address(address)) + return self._contracts.get(to_canonical_address(address)) def alias(self, address, name): self._aliases[to_checksum_address(address)] = name @@ -484,7 +515,7 @@ def deploy_code( start_pc: int = 0, # override the target address: override_address: Optional[AddressType] = None, - ) -> tuple[AddressType, bytes]: + ) -> Tuple[AddressType, bytes]: if gas is None: gas = self.vm.state.gas_limit sender = self._get_sender(sender) @@ -559,6 +590,7 @@ class FakeMessage(Message): # Message object with settable attrs msg._contract = contract # type: ignore origin = sender # XXX: consider making this parametrizable tx_ctx = BaseTransactionContext(origin=origin, gas_price=self.get_gas_price()) + ret = self.vm.state.computation_class.apply_message(self.vm.state, msg, tx_ctx) if self._coverage_enabled: diff --git a/boa/interpret.py b/boa/interpret.py index c27f12f2..d7a3c2e8 100644 --- a/boa/interpret.py +++ b/boa/interpret.py @@ -1,6 +1,7 @@ import json import textwrap from pathlib import Path +from typing import Union import vyper from vyper.cli.vyper_compile import get_interface_codes @@ -15,7 +16,7 @@ VyperDeployer, ) -_Contract = VyperContract | VyperBlueprint +_Contract = Union[VyperContract, VyperBlueprint] _disk_cache = None diff --git a/boa/vm/fork.py b/boa/vm/fork.py index b9a680b1..170fe7ea 100644 --- a/boa/vm/fork.py +++ b/boa/vm/fork.py @@ -1,5 +1,5 @@ import os -from typing import Any +from typing import Any, Dict, Tuple try: import ujson as json @@ -46,7 +46,7 @@ def __init__(self, url: str, cache_file: str = DEFAULT_CACHE_DIR): # _loaded is a cache for the constructor. # reduces fork time after the first fork. - _loaded: dict[tuple[str, str], "CachingRPC"] = {} + _loaded: Dict[Tuple[str, str], "CachingRPC"] = {} _pid: int = os.getpid() # so we can detect if our fds are bad def _init_mem_db(self): @@ -102,7 +102,7 @@ def fetch_multi(self, payload): # AccountDB which dispatches to an RPC when we don't have the # data locally class AccountDBFork(AccountDB): - _rpc_init_kwargs: dict[str, Any] = {} + _rpc_init_kwargs: Dict[str, Any] = {} def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index 695702c0..c15341e9 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -38,6 +38,7 @@ from boa.util.lrudict import lrudict from boa.vm.gas_meters import ProfilingGasMeter from boa.vyper import _METHOD_ID_VAR +from boa.vyper.ir_executor import executor_from_ir from boa.vyper.ast_utils import ast_map_of, get_fn_ancestor_from_node, reason_at from boa.vyper.compiler_utils import ( _compile_vyper_function, @@ -777,6 +778,12 @@ def unoptimized_bytecode(self): ) return s + self.data_section + @cached_property + def ir_executor(self): + ir = self.compiler_data.ir_runtime + opcode_impls = self.env.vm.state.computation_class.opcodes + return executor_from_ir(ir, opcode_impls) + @contextlib.contextmanager def _anchor_source_map(self, source_map): tmp = self._source_map diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py new file mode 100644 index 00000000..b6ef4ffa --- /dev/null +++ b/boa/vyper/ir_executor.py @@ -0,0 +1,426 @@ +from dataclasses import dataclass, field +from functools import cached_property +from typing import Any, Dict, List, Union +import re + +from eth._utils.numeric import ceil32 +from eth.exceptions import Revert + +from vyper.evm.opcodes import OPCODES +import vyper.ir.optimizer + +def debug(*args, **kwargs): + pass + +def _debug(*args, **kwargs): + print(*args, **kwargs) + +if False: + debug = _debug + +@dataclass +class OpcodeInfo: + # model of an opcode from vyper.evm.opcodes + + opcode: int # opcode number ex. 0x01 for ADD + consumes: int # number of stack items this consumes + produces: int # number of stack items this produces, must be 0 or 1 + _gas_estimate: int # in vyper.evm.opcodes but not useful + + def __post_init__(self): + assert self.produces in (0, 1) + + @classmethod + def from_opcode_info(cls, opcode_info): + # info from vyper.evm.opcodes + opcode, consumes, produces, gas_estimate = opcode_info + return cls(opcode, consumes, produces, gas_estimate) + + +@dataclass +class EvalContext: + computation: Any # ComputationAPI + call_frames: List[Dict[str, int]] = field(default_factory=lambda: [{}]) + + @property + def local_vars(self): + return self.call_frames[-1] + + def goto(self, compile_ctx, label, arglist): + if label == "returnpc": # i.e. exitsub + return + + self.call_frames.append({}) + compile_ctx.labels[label].execute_subroutine(self, *arglist) + self.call_frames.pop() + + +class IRBaseExecutor: + def __init__(self, *args): + self.args = args + + @property + def name(self): + return self._name + + def __repr__(self): + ret = self.name + "(" + + show = lambda s: s if isinstance(s, str) else hex(s) if isinstance(s, int) else repr(s) + arg_reprs = [show(arg) for arg in self.args] + arg_reprs = [x.replace("\n", "\n ") for x in arg_reprs] + ret += ",\n ".join(arg_reprs) + ret += ")" + + has_inner_newlines = any("\n" in t for t in arg_reprs) + output_on_one_line = re.sub(r",\n *", ", ", ret).replace("\n", "") + + should_output_single_line = len(output_on_one_line) < 80 and not has_inner_newlines + + if should_output_single_line: + return output_on_one_line + else: + return ret + + def eval(self, context): + debug("ENTER", self.name) + args = self._eval_args(context) + return self._impl(context, *args) + + def _eval_args(self, context): + ret = [_eval_single(arg, context) for arg in reversed(self.args)] + ret.reverse() + return ret + + +def _eval_single(arg, context): + if isinstance(arg, str): + return context.local_vars[arg] + if isinstance(arg, int): + return arg + return arg.eval(context) + +# an IR executor for evm opcodes which dispatches into py-evm +class DefaultIRExecutor(IRBaseExecutor): + def __init__(self, name, opcode_impl, opcode_info, *args): + self.opcode_impl = opcode_impl # py-evm OpcodeAPI + self.opcode_info: OpcodeInfo = opcode_info # info from vyper.evm.opcodes + self._name = "__" + name + "__" + super().__init__(*args) + + @cached_property + def produces(self): + return self.opcode_info.produces + + def eval(self, context): + debug("ENTER", self.name) + evaled_args = self._eval_args(context) + debug(self.name,"args.", evaled_args) + computation = context.computation + for arg in reversed(evaled_args): + if isinstance(arg, int): + computation.stack_push_int(arg) + elif isinstance(arg, bytes): + computation.stack_push_bytes(arg) + #elif isinstance(arg, str) and arg.startswith("_sym_"): + # # it's a returnpc for a function + # pass + else: + raise RuntimeError(f"Not a stack item. {type(arg)} {arg}") + + self.opcode_impl.__call__(computation) + + if self.produces: + return computation.stack_pop1_any() + + +_executors = {} + +# decorator to register an executor class in the _executors dict. +def executor(cls): + _executors[cls._name] = cls + return cls + +StackItem = Union[int, bytes] + +def _to_int(stack_item: StackItem) -> int: + if isinstance(stack_item, int): + return stack_item + return int.from_bytes(stack_item, "big") + + +def _to_bytes(stack_item: StackItem) -> bytes: + if isinstance(stack_item, bytes): + return stack_item + return stack_item.to_bytes(32, "big") + + +def _wrap256(x): + return x % 2**256 + + +def _as_signed(x): + return unsigned_to_signed(x, 256, strict=True) + + +@dataclass +class CompileContext: + labels: Dict[str, IRBaseExecutor] + + +class IRExecutor(IRBaseExecutor): + _sig = None + + def __init__(self, compile_ctx, *args): + self.compile_ctx = compile_ctx + super().__init__(*args) + + def eval(self, context): + debug("ENTER", self.name) + args = self._eval_args(context) + if self.sig_mapper: + assert len(args) == len(self.sig_mapper) + args = (mapper(arg) for (mapper, arg) in zip(self.sig_mapper, args)) + ret = self._impl(context, *args) + debug(f"({self.name} returning {ret})") + return ret + + @cached_property + def sig_mapper(self): + return tuple(_to_int if typ is int else _to_bytes for typ in self._sig) + + +class UnsignedBinopExecutor(IRExecutor): + _sig = (int, int) + + def _impl(self, context, x, y): + return _wrap256(self._op(x, y)) + +class SignedBinopExecutor(UnsignedBinopExecutor): + def _impl(self, x, y): + debug("entered _impl.", self.name) + x = unsigned_to_signed(x, 256, strict=True) + y = unsigned_to_signed(y, 256, strict=True) + return _wrap256(self._op(x, y)) + +# just use routines from vyper optimizer +for opname, (op, _, unsigned) in vyper.ir.optimizer.arith.items(): + base = UnsignedBinopExecutor if unsigned else SignedBinopExecutor + _executors[opname] = type(opname.capitalize(), (base,), {"_op": op, "_name": opname}) + +@executor +class MLoad(IRExecutor): + _name = "mload" + _sig = (int,) + + def _impl(self, context, ptr): + context.computation._memory.extend(ptr, 32) + return context.computation._memory.read_bytes(ptr, 32) + +@executor +class MStore(IRExecutor): + _name = "mstore" + _sig = (int,bytes) + + def _impl(self, context, ptr, val): + context.computation._memory.extend(ptr, 32) + context.computation._memory.write(ptr, 32, val) + + +@executor +class Ceil32(IRExecutor): + _name = "ceil32" + _sig = (int,int) + + def _impl(self, context, x): + return eth._utils.numeric.ceil32(x) + + + +#@executor +class DLoad(IRExecutor): + _name = "dload" + _sig = (int,) + + def _impl(self, context, ptr): + raise RuntimeError("unimplemented") + +#@executor +class DLoadBytes(IRExecutor): + _name = "dloadbytes" + sig = (int,int,int) + def _impl(self, context, dst, src, size): + raise RuntimeError("unimplemented") + + +@executor +class Pass(IRExecutor): + _name = "pass" + + def eval(self, context): + pass + +@executor +class Seq(IRExecutor): + _name = "seq" + + def eval(self, context): + debug("ENTER", self.name) + + for arg in self.args: + lastval = _eval_single(arg, context) + debug(self.name,"evaled",lastval) + + return lastval + +@executor +class Repeat(IRExecutor): + _name = "repeat" + + def eval(self, context): + debug("ENTER", self.name) + + i_name, start, rounds, rounds_bound, body = self.args + + start = _eval_single(start, context) + rounds = _eval_single(rounds, context) + assert rounds <= rounds_bound + + assert i_name not in context.local_vars + + for i in range(start, start + rounds): + context.local_vars[i_name] = i + body.eval(context) + + del context.local_vars[i_name] + + +@executor +class If(IRExecutor): + _name = "if" + + # override `eval()` so we can get the correct lazy behavior + def eval(self, context): + debug("ENTER", self.name) + try: + test, body, orelse = self.args + except ValueError: + test, body = self.args + orelse = None + + test = _to_int(_eval_single(test, context)) + if bool(test): + return _eval_single(body, context) + + elif orelse is not None: + return _eval_single(orelse, context) + + return + + +@executor +class Assert(IRExecutor): + _name = "assert" + _sig = (int,) + + def _impl(self, context, test): + if not bool(test): + context.computation.output = b"" + raise Revert(b"") + +@executor +class VarList(IRExecutor): + _name = "var_list" + + +@executor +class Goto(IRExecutor): + _name = "goto" + + def get_label(self): + label = self.args[0] + if label.startswith("_sym_"): + label = label[len("_sym_"):] + return label + + def eval(self, context): + debug("ENTER", self.name) + label = self.get_label() + args = reversed([_eval_single(arg, context) for arg in reversed(self.args[1:])]) + context.goto(self.compile_ctx, label, args) + + +@executor +class ExitTo(Goto): + # exit_to and goto have pretty much the same semantics as far as we + # are concerned here. + _name = "exit_to" + + +@executor +class Label(IRExecutor): + _name = "label" + + def __init__(self, compile_ctx, name, var_list, body): + self.compile_ctx = compile_ctx + self.var_list = var_list.args + self.body = body + self.labelname = name + + self.args = (name, var_list, body) + + compile_ctx.labels[name] = self + + def eval(self, context): + debug("ENTER", self.name) + pass + + def execute_subroutine(self, context, *args): + assert len(args) == len(self.var_list), (self.labelname, [x for x in args], self.var_list) + for varname, val in zip(self.var_list, args): + context.local_vars[varname] = val + + self.body.eval(context) + +@executor +class With(IRExecutor): + _name = "with" + + def eval(self, context): + debug("ENTER", self.name) + varname, val, body = self.args + + val = _eval_single(val, context) + + backup = context.local_vars.get(varname) + context.local_vars[varname] = val + + ret = body.eval(context) + + if backup is None: + del context.local_vars[varname] + else: + context.local_vars[varname] = backup + + return ret + +def executor_from_ir(ir_node, opcode_impls: Dict[int, Any], compile_ctx = None) -> Any: + instr = ir_node.value + if isinstance(instr, int): + return instr + + if compile_ctx is None: + compile_ctx = CompileContext({}) + + args = (executor_from_ir(arg, opcode_impls, compile_ctx) for arg in ir_node.args) + + if instr in _executors: + return _executors[instr](compile_ctx, *args) + + if instr.upper() in OPCODES: + opcode_info = OpcodeInfo.from_opcode_info(OPCODES[instr.upper()]) + opcode_impl = opcode_impls[opcode_info.opcode] + return DefaultIRExecutor(instr, opcode_impl, opcode_info, *args) + + assert len(ir_node.args) == 0, ir_node + return ir_node.value From 1d7a84fa5d3b114e8396fbf3e1a05c27645ebb73 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 17 Jun 2023 19:47:34 -0700 Subject: [PATCH 002/122] wip --- boa/environment.py | 2 +- boa/vyper/ir_executor.py | 178 +++++++++++++++++++++++++++------------ 2 files changed, 124 insertions(+), 56 deletions(-) diff --git a/boa/environment.py b/boa/environment.py index 295b1d54..4c55ce5c 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -315,7 +315,7 @@ def apply_create_message(cls, state, msg, tx_ctx): def apply_computation(cls, state , msg , tx_ctx): addr = msg.code_address contract = cls.env.lookup_contract(addr) if addr else None - if contract is None or True: + if contract is None or False: print("SLOW MODE") return super().apply_computation(state, msg, tx_ctx) diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index b6ef4ffa..9763cd77 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -2,6 +2,7 @@ from functools import cached_property from typing import Any, Dict, List, Union import re +import sys from eth._utils.numeric import ceil32 from eth.exceptions import Revert @@ -37,7 +38,7 @@ def from_opcode_info(cls, opcode_info): return cls(opcode, consumes, produces, gas_estimate) -@dataclass +@dataclass(slots=True) class EvalContext: computation: Any # ComputationAPI call_frames: List[Dict[str, int]] = field(default_factory=lambda: [{}]) @@ -56,10 +57,12 @@ def goto(self, compile_ctx, label, arglist): class IRBaseExecutor: + __slots__ = ("args",) + def __init__(self, *args): self.args = args - @property + @cached_property def name(self): return self._name @@ -83,25 +86,38 @@ def __repr__(self): return ret def eval(self, context): - debug("ENTER", self.name) + #debug("ENTER", self.name) args = self._eval_args(context) return self._impl(context, *args) def _eval_args(self, context): - ret = [_eval_single(arg, context) for arg in reversed(self.args)] + ret = [arg.eval(context) for arg in reversed(self.args)] ret.reverse() return ret -def _eval_single(arg, context): - if isinstance(arg, str): - return context.local_vars[arg] - if isinstance(arg, int): - return arg - return arg.eval(context) +@dataclass(slots=True) +class IntExecutor: + _int_value: int + + def __repr__(self): + return repr(self._int_value) + + def eval(self, context): + return self._int_value + +@dataclass(slots=True) +class StringExecutor: + _str_value: str + + def __repr__(self): + return repr(self._str_value) + + def eval(self, context): + return context.local_vars[self._str_value] # an IR executor for evm opcodes which dispatches into py-evm -class DefaultIRExecutor(IRBaseExecutor): +class OpcodeIRExecutor(IRBaseExecutor): def __init__(self, name, opcode_impl, opcode_info, *args): self.opcode_impl = opcode_impl # py-evm OpcodeAPI self.opcode_info: OpcodeInfo = opcode_info # info from vyper.evm.opcodes @@ -113,9 +129,9 @@ def produces(self): return self.opcode_info.produces def eval(self, context): - debug("ENTER", self.name) + #debug("ENTER", self.name) evaled_args = self._eval_args(context) - debug(self.name,"args.", evaled_args) + #debug(self.name,"args.", evaled_args) computation = context.computation for arg in reversed(evaled_args): if isinstance(arg, int): @@ -163,12 +179,49 @@ def _as_signed(x): return unsigned_to_signed(x, 256, strict=True) +@dataclass(slots=True) +class VariableReference: + varname: str + var_slot: int + + @dataclass class CompileContext: labels: Dict[str, IRBaseExecutor] + #n_variable_slots: int = 0 + #varnames: Dict[str, VariableReference] + + +# most memory is aligned. treat it as list of ints, and provide mocking +# for instructions which access it in the slow way +class FastMem: + def __init__(self): + self.mem = [] + self.mem_bytes = bytearray() + + def __len__(self): + # return len in bytes + return len(self.mem) * 32 + + def extend(self, start_position, size_bytes): + new_size_words = ceil32(start_position + size) // 32 + size_difference = new_size_words - len(self.mem) + self.mem.extend([0] * size_difference) + + def read_word(self, start_position): + if start_position % 32 == 0: + return self.mem[start_position // 32] + return _to_int(self.read_bytes(start_position, 32)) + + def read_bytes(self, start_position, size): + pass + +MAX_UINT256 = 2** 256 - 1 class IRExecutor(IRBaseExecutor): + __slots__ = ("args", "compile_ctx") + _sig = None def __init__(self, compile_ctx, *args): @@ -176,13 +229,13 @@ def __init__(self, compile_ctx, *args): super().__init__(*args) def eval(self, context): - debug("ENTER", self.name) + #debug("ENTER", self.name) args = self._eval_args(context) if self.sig_mapper: assert len(args) == len(self.sig_mapper) args = (mapper(arg) for (mapper, arg) in zip(self.sig_mapper, args)) ret = self._impl(context, *args) - debug(f"({self.name} returning {ret})") + #debug(f"({self.name} returning {ret})") return ret @cached_property @@ -191,18 +244,23 @@ def sig_mapper(self): class UnsignedBinopExecutor(IRExecutor): - _sig = (int, int) + __slots__ = ("_name", "_op") - def _impl(self, context, x, y): + def eval(self, context): + x, y = self.args + # note: eval in reverse order. + y = _to_int(y.eval(context)) + x = _to_int(x.eval(context)) return _wrap256(self._op(x, y)) - + class SignedBinopExecutor(UnsignedBinopExecutor): - def _impl(self, x, y): - debug("entered _impl.", self.name) - x = unsigned_to_signed(x, 256, strict=True) - y = unsigned_to_signed(y, 256, strict=True) + def eval(self, context): + x, y = self.args + # note: eval in reverse order. + y = unsigned_to_signed(_to_int(y.eval(context), 256, strict=True)) + x = unsigned_to_signed(_to_int(x.eval(context), 256, strict=True)) return _wrap256(self._op(x, y)) - + # just use routines from vyper optimizer for opname, (op, _, unsigned) in vyper.ir.optimizer.arith.items(): base = UnsignedBinopExecutor if unsigned else SignedBinopExecutor @@ -211,18 +269,19 @@ def _impl(self, x, y): @executor class MLoad(IRExecutor): _name = "mload" - _sig = (int,) - def _impl(self, context, ptr): + def eval(self, context): + ptr = _to_int(self.args[0].eval(context)) context.computation._memory.extend(ptr, 32) return context.computation._memory.read_bytes(ptr, 32) @executor class MStore(IRExecutor): _name = "mstore" - _sig = (int,bytes) - def _impl(self, context, ptr, val): + def eval(self, context): + val = _to_bytes(self.args[1].eval(context)) + ptr = _to_int(self.args[0].eval(context)) context.computation._memory.extend(ptr, 32) context.computation._memory.write(ptr, 32, val) @@ -230,13 +289,12 @@ def _impl(self, context, ptr, val): @executor class Ceil32(IRExecutor): _name = "ceil32" - _sig = (int,int) + _sig = (int,) def _impl(self, context, x): return eth._utils.numeric.ceil32(x) - #@executor class DLoad(IRExecutor): _name = "dload" @@ -265,11 +323,11 @@ class Seq(IRExecutor): _name = "seq" def eval(self, context): - debug("ENTER", self.name) + #debug("ENTER", self.name) for arg in self.args: - lastval = _eval_single(arg, context) - debug(self.name,"evaled",lastval) + lastval = arg.eval(context) + #debug(self.name,"evaled",lastval) return lastval @@ -278,14 +336,15 @@ class Repeat(IRExecutor): _name = "repeat" def eval(self, context): - debug("ENTER", self.name) + #debug("ENTER", self.name) i_name, start, rounds, rounds_bound, body = self.args - start = _eval_single(start, context) - rounds = _eval_single(rounds, context) - assert rounds <= rounds_bound + start = start.eval(context) + rounds = rounds.eval(context) + assert rounds <= rounds_bound._int_value + i_name = i_name._str_value assert i_name not in context.local_vars for i in range(start, start + rounds): @@ -301,19 +360,19 @@ class If(IRExecutor): # override `eval()` so we can get the correct lazy behavior def eval(self, context): - debug("ENTER", self.name) + #debug("ENTER", self.name) try: test, body, orelse = self.args except ValueError: test, body = self.args orelse = None - test = _to_int(_eval_single(test, context)) + test = _to_int(test.eval(context)) if bool(test): - return _eval_single(body, context) + return body.eval(context) elif orelse is not None: - return _eval_single(orelse, context) + return orelse.eval(context) return @@ -338,15 +397,15 @@ class Goto(IRExecutor): _name = "goto" def get_label(self): - label = self.args[0] + label = self.args[0]._str_value if label.startswith("_sym_"): label = label[len("_sym_"):] return label def eval(self, context): - debug("ENTER", self.name) + #debug("ENTER", self.name) label = self.get_label() - args = reversed([_eval_single(arg, context) for arg in reversed(self.args[1:])]) + args = reversed([arg.eval(context) for arg in reversed(self.args[1:])]) context.goto(self.compile_ctx, label, args) @@ -369,16 +428,16 @@ def __init__(self, compile_ctx, name, var_list, body): self.args = (name, var_list, body) - compile_ctx.labels[name] = self + compile_ctx.labels[name._str_value] = self def eval(self, context): - debug("ENTER", self.name) + #debug("ENTER", self.name) pass def execute_subroutine(self, context, *args): assert len(args) == len(self.var_list), (self.labelname, [x for x in args], self.var_list) for varname, val in zip(self.var_list, args): - context.local_vars[varname] = val + context.local_vars[varname._str_value] = val self.body.eval(context) @@ -386,28 +445,37 @@ def execute_subroutine(self, context, *args): class With(IRExecutor): _name = "with" + # accessing local vars is a hotspot, so we translate varnames + # to slots at compile time (something like de-bruijn index) to + # save some dictionary accesses. + #def __init__(self, compile_ctx, varname, val, body): + def eval(self, context): - debug("ENTER", self.name) + #debug("ENTER", self.name) varname, val, body = self.args + varname = varname._str_value + #_, val, body = self.args + #varname = self.varname + + val = val.eval(context) - val = _eval_single(val, context) + shadowed = context.local_vars.pop(varname, None) - backup = context.local_vars.get(varname) context.local_vars[varname] = val ret = body.eval(context) - if backup is None: - del context.local_vars[varname] + if shadowed is not None: + context.local_vars[varname] = shadowed else: - context.local_vars[varname] = backup + del context.local_vars[varname] return ret def executor_from_ir(ir_node, opcode_impls: Dict[int, Any], compile_ctx = None) -> Any: instr = ir_node.value if isinstance(instr, int): - return instr + return IntExecutor(instr) if compile_ctx is None: compile_ctx = CompileContext({}) @@ -420,7 +488,7 @@ def executor_from_ir(ir_node, opcode_impls: Dict[int, Any], compile_ctx = None) if instr.upper() in OPCODES: opcode_info = OpcodeInfo.from_opcode_info(OPCODES[instr.upper()]) opcode_impl = opcode_impls[opcode_info.opcode] - return DefaultIRExecutor(instr, opcode_impl, opcode_info, *args) + return OpcodeIRExecutor(instr, opcode_impl, opcode_info, *args) assert len(ir_node.args) == 0, ir_node - return ir_node.value + return StringExecutor(instr) From 490f236d3055887de11cbcca4498d4cb37657435 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 17 Jun 2023 22:26:41 -0700 Subject: [PATCH 003/122] work on de bruijn indices --- boa/vyper/ir_executor.py | 170 +++++++++++++++++++++++++++------------ 1 file changed, 117 insertions(+), 53 deletions(-) diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index 9763cd77..059bd36c 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -1,5 +1,6 @@ from dataclasses import dataclass, field from functools import cached_property +import contextlib from typing import Any, Dict, List, Union import re import sys @@ -41,26 +42,37 @@ def from_opcode_info(cls, opcode_info): @dataclass(slots=True) class EvalContext: computation: Any # ComputationAPI - call_frames: List[Dict[str, int]] = field(default_factory=lambda: [{}]) + call_frames: List[List[Any]] = field(default_factory=lambda: [[]]) @property def local_vars(self): return self.call_frames[-1] + @contextlib.contextmanager + def variables(self, var_list): + for var in var_list: + self.local_vars.append(var) + + yield + + for var in var_list: + self.local_vars.pop() + def goto(self, compile_ctx, label, arglist): if label == "returnpc": # i.e. exitsub return - self.call_frames.append({}) + self.call_frames.append([]) compile_ctx.labels[label].execute_subroutine(self, *arglist) self.call_frames.pop() class IRBaseExecutor: - __slots__ = ("args",) + __slots__ = ("args","compile_ctx") - def __init__(self, *args): + def __init__(self, compile_ctx, *args): self.args = args + self.compile_ctx = compile_ctx @cached_property def name(self): @@ -95,6 +107,45 @@ def _eval_args(self, context): ret.reverse() return ret + def analyze(self): + self.args = [arg.analyze() for arg in self.args] + return self + + +@dataclass +class CompileContext: + labels: Dict[str, IRBaseExecutor] + de_bruijn_indexes: List[int] = field(default_factory=lambda:[0]) + var_slot_mappings: List[Dict[str, int]] = field(default_factory=lambda:[{}]) + + @property + def local_vars(self): + return self.var_slot_mappings[-1] + + @contextlib.contextmanager + def allocate_local_frame(self): + self.var_slot_mappings.append({}) + self.de_bruijn_indexes.append(0) + yield + self.de_bruijn_indexes.pop() + self.var_slot_mappings.pop() + + @contextlib.contextmanager + def variables(self, vars_list): + shadowed = {} + for varname in vars_list: + shadowed[varname] = self.local_vars.get(varname) + self.local_vars[varname] = self.de_bruijn_indexes[-1] + self.de_bruijn_indexes[-1] += 1 + + yield + + for varname in vars_list: + self.de_bruijn_indexes[-1] -= 1 + if shadowed[varname] is None: + del self.local_vars[varname] + else: + self.local_vars[varname] = shadowed[varname] @dataclass(slots=True) class IntExecutor: @@ -106,15 +157,21 @@ def __repr__(self): def eval(self, context): return self._int_value + def analyze(self): + return self + @dataclass(slots=True) class StringExecutor: _str_value: str + compile_ctx: CompileContext def __repr__(self): return repr(self._str_value) - def eval(self, context): - return context.local_vars[self._str_value] + def analyze(self): + de_bruijn_index = self.compile_ctx.local_vars[self._str_value] + return VariableExecutor(self._str_value, de_bruijn_index) + # an IR executor for evm opcodes which dispatches into py-evm class OpcodeIRExecutor(IRBaseExecutor): @@ -180,16 +237,12 @@ def _as_signed(x): @dataclass(slots=True) -class VariableReference: +class VariableExecutor: varname: str var_slot: int - -@dataclass -class CompileContext: - labels: Dict[str, IRBaseExecutor] - #n_variable_slots: int = 0 - #varnames: Dict[str, VariableReference] + def eval(self, context): + return context.local_vars[self.var_slot] # most memory is aligned. treat it as list of ints, and provide mocking @@ -220,14 +273,8 @@ def read_bytes(self, start_position, size): MAX_UINT256 = 2** 256 - 1 class IRExecutor(IRBaseExecutor): - __slots__ = ("args", "compile_ctx") - _sig = None - def __init__(self, compile_ctx, *args): - self.compile_ctx = compile_ctx - super().__init__(*args) - def eval(self, context): #debug("ENTER", self.name) args = self._eval_args(context) @@ -247,6 +294,7 @@ class UnsignedBinopExecutor(IRExecutor): __slots__ = ("_name", "_op") def eval(self, context): + #print("ENTER",self._name,self.args) x, y = self.args # note: eval in reverse order. y = _to_int(y.eval(context)) @@ -338,20 +386,27 @@ class Repeat(IRExecutor): def eval(self, context): #debug("ENTER", self.name) - i_name, start, rounds, rounds_bound, body = self.args + _, start, rounds, rounds_bound, body = self.args start = start.eval(context) rounds = rounds.eval(context) assert rounds <= rounds_bound._int_value - i_name = i_name._str_value - assert i_name not in context.local_vars + with context.variables([-1]): + i_slot = len(context.local_vars) - 1 + for i in range(start, start + rounds): + context.local_vars[i_slot] = i + body.eval(context) - for i in range(start, start + rounds): - context.local_vars[i_name] = i - body.eval(context) - del context.local_vars[i_name] + def analyze(self): + i_name, start, rounds, rounds_bound, body = self.args + start = start.analyze() + rounds = rounds.analyze() + with self.compile_ctx.variables([i_name._str_value]): + body = body.analyze() + self.args = i_name, start, rounds, rounds_bound, body + return self @executor @@ -402,6 +457,11 @@ def get_label(self): label = label[len("_sym_"):] return label + def analyze(self): + for arg in self.args[1:]: + arg = arg.analyze() + return self + def eval(self, context): #debug("ENTER", self.name) label = self.get_label() @@ -430,16 +490,22 @@ def __init__(self, compile_ctx, name, var_list, body): compile_ctx.labels[name._str_value] = self + def analyze(self): + with self.compile_ctx.allocate_local_frame(): + var_list = [var._str_value for var in self.var_list] + with self.compile_ctx.variables(var_list): + self.body = self.body.analyze() + + return self + def eval(self, context): #debug("ENTER", self.name) pass def execute_subroutine(self, context, *args): assert len(args) == len(self.var_list), (self.labelname, [x for x in args], self.var_list) - for varname, val in zip(self.var_list, args): - context.local_vars[varname._str_value] = val - - self.body.eval(context) + with context.variables(args): + self.body.eval(context) @executor class With(IRExecutor): @@ -448,39 +514,37 @@ class With(IRExecutor): # accessing local vars is a hotspot, so we translate varnames # to slots at compile time (something like de-bruijn index) to # save some dictionary accesses. - #def __init__(self, compile_ctx, varname, val, body): + def analyze(self): + varname = self.args[0]._str_value + val = self.args[1].analyze() # analyze before shadowing + with self.compile_ctx.variables([varname]): + variable = self.args[0].analyze() # analyze for debugging + body = self.args[2].analyze() + self.args = (variable, val, body) + + return self def eval(self, context): - #debug("ENTER", self.name) - varname, val, body = self.args - varname = varname._str_value - #_, val, body = self.args - #varname = self.varname + variable, val, body = self.args val = val.eval(context) - shadowed = context.local_vars.pop(varname, None) - - context.local_vars[varname] = val - - ret = body.eval(context) - - if shadowed is not None: - context.local_vars[varname] = shadowed - else: - del context.local_vars[varname] + with context.variables([val]): + #assert len(context.local_vars) == variable.var_slot + 1 # sanity check + ret = body.eval(context) return ret -def executor_from_ir(ir_node, opcode_impls: Dict[int, Any], compile_ctx = None) -> Any: +def executor_from_ir(ir_node, opcode_impls: Dict[int, Any]) -> Any: + ret = _executor_from_ir(ir_node, opcode_impls, CompileContext({})) + return ret.analyze() + +def _executor_from_ir(ir_node, opcode_impls, compile_ctx) -> Any: instr = ir_node.value if isinstance(instr, int): return IntExecutor(instr) - if compile_ctx is None: - compile_ctx = CompileContext({}) - - args = (executor_from_ir(arg, opcode_impls, compile_ctx) for arg in ir_node.args) + args = (_executor_from_ir(arg, opcode_impls, compile_ctx) for arg in ir_node.args) if instr in _executors: return _executors[instr](compile_ctx, *args) @@ -488,7 +552,7 @@ def executor_from_ir(ir_node, opcode_impls: Dict[int, Any], compile_ctx = None) if instr.upper() in OPCODES: opcode_info = OpcodeInfo.from_opcode_info(OPCODES[instr.upper()]) opcode_impl = opcode_impls[opcode_info.opcode] - return OpcodeIRExecutor(instr, opcode_impl, opcode_info, *args) + return OpcodeIRExecutor(instr, opcode_impl, opcode_info, compile_ctx, *args) assert len(ir_node.args) == 0, ir_node - return StringExecutor(instr) + return StringExecutor(instr, compile_ctx) From 64538f8ac3db3b7df93f12a73da94171c6e0b80f Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 18 Jun 2023 10:16:55 -0700 Subject: [PATCH 004/122] get de-bruijn indexes working this gets benchmark time from 0.87s to 0.77s --- boa/environment.py | 4 +- boa/vyper/ir_executor.py | 107 +++++++++++++++++++++++++-------------- 2 files changed, 70 insertions(+), 41 deletions(-) diff --git a/boa/environment.py b/boa/environment.py index 4c55ce5c..ff32ee2b 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -322,10 +322,10 @@ def apply_computation(cls, state , msg , tx_ctx): print("FAST MODE") err = None with cls(state, msg, tx_ctx) as computation: - eval_ctx = EvalContext(computation) print(contract.ir_executor) + eval_ctx = EvalContext(contract.ir_executor, computation) try: - contract.ir_executor.eval(eval_ctx) + eval_ctx.run() except Exception as e: # grab the exception to raise later - # unclear why this is getting swallowed by py-evm. diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index 059bd36c..9cb07123 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -39,32 +39,53 @@ def from_opcode_info(cls, opcode_info): return cls(opcode, consumes, produces, gas_estimate) -@dataclass(slots=True) +#@dataclass(slots=True) +@dataclass class EvalContext: + ir_executor: Any # IRBaseExecutor computation: Any # ComputationAPI - call_frames: List[List[Any]] = field(default_factory=lambda: [[]]) + call_frames: List[List[Any]] = field(default_factory=list) + + def __post_init__(self): + self._allocate_call_frame([], self.ir_executor._max_var_height) + + def run(self): + #print("ENTER", self.call_frames) + self.ir_executor.eval(self) + return self.computation @property def local_vars(self): return self.call_frames[-1] - @contextlib.contextmanager - def variables(self, var_list): - for var in var_list: - self.local_vars.append(var) + def _allocate_call_frame(self, arglist, max_var_height): + # a sentinel which will cause an exception if somebody tries to use it by accident + oob_int = "uh oh!" + required_dummies = max_var_height + 1 - len(arglist) + frame_vars = list(arglist) + frame_vars.extend([oob_int] * required_dummies) + self.call_frames.append(frame_vars) + @contextlib.contextmanager + def allocate_call_frame(self, arglist, max_var_height): + self._allocate_call_frame(arglist, max_var_height) yield - - for var in var_list: - self.local_vars.pop() + self.call_frames.pop() + #@contextlib.contextmanager + #def variables(self, var_list): + # for var in var_list: + # self.local_vars.append(var) +# +# yield +# +# for var in var_list: +# self.local_vars.pop() def goto(self, compile_ctx, label, arglist): if label == "returnpc": # i.e. exitsub return - self.call_frames.append([]) compile_ctx.labels[label].execute_subroutine(self, *arglist) - self.call_frames.pop() class IRBaseExecutor: @@ -111,41 +132,46 @@ def analyze(self): self.args = [arg.analyze() for arg in self.args] return self +@dataclass +class FrameInfo: + de_bruijn_index: int = 0 + max_db_index: int = 0 + slots: Dict[str, int] = field(default_factory=lambda: {}) @dataclass class CompileContext: labels: Dict[str, IRBaseExecutor] - de_bruijn_indexes: List[int] = field(default_factory=lambda:[0]) - var_slot_mappings: List[Dict[str, int]] = field(default_factory=lambda:[{}]) + frames: List[FrameInfo] = field(default_factory=lambda: [FrameInfo()]) @property def local_vars(self): - return self.var_slot_mappings[-1] + return self.frames[-1].slots @contextlib.contextmanager def allocate_local_frame(self): - self.var_slot_mappings.append({}) - self.de_bruijn_indexes.append(0) - yield - self.de_bruijn_indexes.pop() - self.var_slot_mappings.pop() + frame = FrameInfo() + self.frames.append(frame) + yield frame + self.frames.pop() @contextlib.contextmanager def variables(self, vars_list): shadowed = {} + frame = self.frames[-1] for varname in vars_list: - shadowed[varname] = self.local_vars.get(varname) - self.local_vars[varname] = self.de_bruijn_indexes[-1] - self.de_bruijn_indexes[-1] += 1 + shadowed[varname] = frame.slots.get(varname) + frame.slots[varname] = frame.de_bruijn_index + frame.de_bruijn_index += 1 + frame.max_db_index = max(frame.max_db_index, frame.de_bruijn_index) yield for varname in vars_list: - self.de_bruijn_indexes[-1] -= 1 + frame.de_bruijn_index -= 1 if shadowed[varname] is None: - del self.local_vars[varname] + del frame.slots[varname] else: - self.local_vars[varname] = shadowed[varname] + frame.slots[varname] = shadowed[varname] @dataclass(slots=True) class IntExecutor: @@ -274,6 +300,7 @@ def read_bytes(self, start_position, size): class IRExecutor(IRBaseExecutor): _sig = None + _max_var_height = None def eval(self, context): #debug("ENTER", self.name) @@ -386,17 +413,15 @@ class Repeat(IRExecutor): def eval(self, context): #debug("ENTER", self.name) - _, start, rounds, rounds_bound, body = self.args + i_var, start, rounds, rounds_bound, body = self.args start = start.eval(context) rounds = rounds.eval(context) assert rounds <= rounds_bound._int_value - with context.variables([-1]): - i_slot = len(context.local_vars) - 1 - for i in range(start, start + rounds): - context.local_vars[i_slot] = i - body.eval(context) + for i in range(start, start + rounds): + context.local_vars[i_var.var_slot] = i + body.eval(context) def analyze(self): @@ -404,8 +429,9 @@ def analyze(self): start = start.analyze() rounds = rounds.analyze() with self.compile_ctx.variables([i_name._str_value]): + i_var = i_name.analyze() body = body.analyze() - self.args = i_name, start, rounds, rounds_bound, body + self.args = i_var, start, rounds, rounds_bound, body return self @@ -491,11 +517,14 @@ def __init__(self, compile_ctx, name, var_list, body): compile_ctx.labels[name._str_value] = self def analyze(self): - with self.compile_ctx.allocate_local_frame(): + with self.compile_ctx.allocate_local_frame() as frame_info: var_list = [var._str_value for var in self.var_list] with self.compile_ctx.variables(var_list): self.body = self.body.analyze() + self._max_var_height = frame_info.max_db_index + + print(self._name, self.labelname, self._max_var_height) return self def eval(self, context): @@ -504,7 +533,7 @@ def eval(self, context): def execute_subroutine(self, context, *args): assert len(args) == len(self.var_list), (self.labelname, [x for x in args], self.var_list) - with context.variables(args): + with context.allocate_call_frame(args, self._max_var_height): self.body.eval(context) @executor @@ -528,16 +557,16 @@ def eval(self, context): variable, val, body = self.args val = val.eval(context) - - with context.variables([val]): - #assert len(context.local_vars) == variable.var_slot + 1 # sanity check - ret = body.eval(context) + context.local_vars[variable.var_slot] = val + ret = body.eval(context) return ret def executor_from_ir(ir_node, opcode_impls: Dict[int, Any]) -> Any: ret = _executor_from_ir(ir_node, opcode_impls, CompileContext({})) - return ret.analyze() + ret = ret.analyze() + ret._max_var_height = ret.compile_ctx.frames[0].max_db_index + return ret def _executor_from_ir(ir_node, opcode_impls, compile_ctx) -> Any: instr = ir_node.value From 7888ce50665977b9712f03754cbd210e472af362 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 18 Jun 2023 12:15:04 -0700 Subject: [PATCH 005/122] optimize de bruijn indices, fastmem --- boa/vyper/ir_executor.py | 104 +++++++++++++++++++++++++-------------- 1 file changed, 67 insertions(+), 37 deletions(-) diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index 9cb07123..89063325 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -7,6 +7,7 @@ from eth._utils.numeric import ceil32 from eth.exceptions import Revert +from eth.vm.memory import Memory from vyper.evm.opcodes import OPCODES import vyper.ir.optimizer @@ -47,7 +48,8 @@ class EvalContext: call_frames: List[List[Any]] = field(default_factory=list) def __post_init__(self): - self._allocate_call_frame([], self.ir_executor._max_var_height) + self.computation._memory = FastMem() + self._allocate_local_frame([], self.ir_executor._max_var_height) def run(self): #print("ENTER", self.call_frames) @@ -58,7 +60,9 @@ def run(self): def local_vars(self): return self.call_frames[-1] - def _allocate_call_frame(self, arglist, max_var_height): + def _allocate_local_frame(self, arglist, max_var_height): + # pre-allocate variable slots so we don't waste time with append/pop. + # a sentinel which will cause an exception if somebody tries to use it by accident oob_int = "uh oh!" required_dummies = max_var_height + 1 - len(arglist) @@ -67,19 +71,10 @@ def _allocate_call_frame(self, arglist, max_var_height): self.call_frames.append(frame_vars) @contextlib.contextmanager - def allocate_call_frame(self, arglist, max_var_height): - self._allocate_call_frame(arglist, max_var_height) + def allocate_local_frame(self, arglist, max_var_height): + self._allocate_local_frame(arglist, max_var_height) yield self.call_frames.pop() - #@contextlib.contextmanager - #def variables(self, var_list): - # for var in var_list: - # self.local_vars.append(var) -# -# yield -# -# for var in var_list: -# self.local_vars.pop() def goto(self, compile_ctx, label, arglist): if label == "returnpc": # i.e. exitsub @@ -134,9 +129,10 @@ def analyze(self): @dataclass class FrameInfo: - de_bruijn_index: int = 0 - max_db_index: int = 0 + current_slot: int = 0 # basically the de bruijn index slots: Dict[str, int] = field(default_factory=lambda: {}) + # record the largest slot we see, so we know how many local vars to allocate + max_slot: int = 0 @dataclass class CompileContext: @@ -160,14 +156,14 @@ def variables(self, vars_list): frame = self.frames[-1] for varname in vars_list: shadowed[varname] = frame.slots.get(varname) - frame.slots[varname] = frame.de_bruijn_index - frame.de_bruijn_index += 1 - frame.max_db_index = max(frame.max_db_index, frame.de_bruijn_index) + frame.slots[varname] = frame.current_slot + frame.current_slot += 1 + frame.max_slot = max(frame.max_slot, frame.current_slot) yield for varname in vars_list: - frame.de_bruijn_index -= 1 + frame.current_slot -= 1 if shadowed[varname] is None: del frame.slots[varname] else: @@ -273,28 +269,60 @@ def eval(self, context): # most memory is aligned. treat it as list of ints, and provide mocking # for instructions which access it in the slow way -class FastMem: +class FastMem(Memory): + __slots__ = ("mem_cache", "_bytes", "needs_writeback") def __init__(self): - self.mem = [] - self.mem_bytes = bytearray() + self.mem_cache = [] # cached words + + self.needs_writeback = set() # - def __len__(self): - # return len in bytes - return len(self.mem) * 32 + super().__init__() + + _DIRTY = object() def extend(self, start_position, size_bytes): - new_size_words = ceil32(start_position + size) // 32 - size_difference = new_size_words - len(self.mem) - self.mem.extend([0] * size_difference) + # i.e. ceil32(len(self)) // 32 + new_size = (start_position + size_bytes + 31) // 32 + if (size_difference := new_size - len(self.mem_cache)) > 0: + self.mem_cache.extend([self._DIRTY] * size_difference) + super().extend(start_position, size_bytes) + def read_word(self, start_position): if start_position % 32 == 0: - return self.mem[start_position // 32] + if (ret := self.mem_cache[start_position // 32]) is not self._DIRTY: + return ret - return _to_int(self.read_bytes(start_position, 32)) + ret = _to_int(self.read_bytes(start_position, 32)) + self.mem_cache[start_position // 32] = ret + return ret def read_bytes(self, start_position, size): - pass + start = start_position // 32 + end = ceil32(start_position + size) // 32 + for ix in range(start, end): + if ix in self.needs_writeback: + super().write(ix * 32, 32, _to_bytes(self.mem_cache[ix])) + self.needs_writeback.remove(ix) + + return super().read_bytes(start_position, size) + + def write_word(self, start_position, int_val): + if start_position % 32 == 0: + self.mem_cache[start_position // 32] = int_val + + self.needs_writeback.add(start_position // 32) + + # bypass cache dirtying + #super().write(start_position, 32, _to_bytes(int_val)) + + def write(self, start_position, size, value): + start = start_position // 32 + end = (start_position + size + 31) // 32 + for i in range(start, end): + self.mem_cache[i] = self._DIRTY + super().write(start_position, size, value) + MAX_UINT256 = 2** 256 - 1 @@ -348,17 +376,19 @@ class MLoad(IRExecutor): def eval(self, context): ptr = _to_int(self.args[0].eval(context)) context.computation._memory.extend(ptr, 32) - return context.computation._memory.read_bytes(ptr, 32) + #return context.computation._memory.read_bytes(ptr, 32) + return context.computation._memory.read_word(ptr) @executor class MStore(IRExecutor): _name = "mstore" def eval(self, context): - val = _to_bytes(self.args[1].eval(context)) + val = _to_int(self.args[1].eval(context)) ptr = _to_int(self.args[0].eval(context)) context.computation._memory.extend(ptr, 32) - context.computation._memory.write(ptr, 32, val) + #context.computation._memory.write(ptr, 32, val) + context.computation._memory.write_word(ptr, val) @executor @@ -522,7 +552,7 @@ def analyze(self): with self.compile_ctx.variables(var_list): self.body = self.body.analyze() - self._max_var_height = frame_info.max_db_index + self._max_var_height = frame_info.max_slot print(self._name, self.labelname, self._max_var_height) return self @@ -533,7 +563,7 @@ def eval(self, context): def execute_subroutine(self, context, *args): assert len(args) == len(self.var_list), (self.labelname, [x for x in args], self.var_list) - with context.allocate_call_frame(args, self._max_var_height): + with context.allocate_local_frame(args, self._max_var_height): self.body.eval(context) @executor @@ -565,7 +595,7 @@ def eval(self, context): def executor_from_ir(ir_node, opcode_impls: Dict[int, Any]) -> Any: ret = _executor_from_ir(ir_node, opcode_impls, CompileContext({})) ret = ret.analyze() - ret._max_var_height = ret.compile_ctx.frames[0].max_db_index + ret._max_var_height = ret.compile_ctx.frames[0].max_slot return ret def _executor_from_ir(ir_node, opcode_impls, compile_ctx) -> Any: From a78efd2022a8c76e4ec75903eccea6e66f424ced Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 18 Jun 2023 12:43:28 -0700 Subject: [PATCH 006/122] use pycryptodome version of eth-stdlib for pypy --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 973f94df..6083c4f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ classifiers = ["Topic :: Software Development"] # Requirements dependencies = [ "vyper >= 0.3.8", - "eth-stdlib", + "git+https://github.com/charles-cooper/eth-stdlib/pycryptodome.git", "eth-abi", "py-evm>=0.7.0a2", "eth-typing", From b788e99aeb068eb363ed4af20e06c8b9e735e82b Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 18 Jun 2023 13:23:51 -0700 Subject: [PATCH 007/122] fix lint --- boa/environment.py | 30 +++-- boa/vyper/contract.py | 2 +- boa/vyper/ir_executor.py | 247 ++++++++++++++++++++++----------------- pyproject.toml | 2 +- 4 files changed, 159 insertions(+), 122 deletions(-) diff --git a/boa/environment.py b/boa/environment.py index ff32ee2b..6d1a6812 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -5,7 +5,7 @@ import logging import sys import warnings -from typing import Any, Iterator, Optional, Union, Tuple +from typing import Any, Iterator, Optional, Tuple, Union import eth.constants as constants import eth.tools.builder.chain as chain @@ -255,6 +255,7 @@ def __call__(self, computation): # ### End section: sha3 tracing + # py-evm uses class instantiaters which need to be classes # instead of like factories or other easier to use architectures - # `computation_template` is a class which can be constructed dynamically @@ -302,26 +303,26 @@ def apply_create_message(cls, state, msg, tx_ctx): if is_eip1167_contract(bytecode): contract_address = extract_eip1167_address(bytecode) - bytecode = self.vm.state.get_code(contract_address) + bytecode = cls.env.vm.state.get_code(contract_address) if bytecode in cls.env._code_registry: - target = self._code_registry[bytecode].deployer.at(contract_address) + target = cls.env._code_registry[bytecode].deployer.at(contract_address) target.created_from = to_checksum_address(msg.sender) - env.register_contract(contract_address, target) + cls.env.register_contract(contract_address, target) return computation @classmethod - def apply_computation(cls, state , msg , tx_ctx): + def apply_computation(cls, state, msg, tx_ctx): addr = msg.code_address contract = cls.env.lookup_contract(addr) if addr else None - if contract is None or False: + if contract is None or not cls.env._enable_fast_mode: print("SLOW MODE") return super().apply_computation(state, msg, tx_ctx) - print("FAST MODE") err = None with cls(state, msg, tx_ctx) as computation: + print("FAST MODE") print(contract.ir_executor) eval_ctx = EvalContext(contract.ir_executor, computation) try: @@ -329,12 +330,13 @@ def apply_computation(cls, state , msg , tx_ctx): except Exception as e: # grab the exception to raise later - # unclear why this is getting swallowed by py-evm. - #print(e) + # print(e) err = e - from eth.exceptions import VMError, Halt + from eth.exceptions import Halt, VMError + if err is not None and not isinstance(err, (Halt, VMError)): - #if err is not None: + # if err is not None: raise err return computation @@ -344,6 +346,7 @@ class Env: _singleton = None _initial_address_counter = 100 _coverage_enabled = False + _enable_fast_mode = False def __init__(self): self.chain = _make_chain() @@ -375,9 +378,12 @@ def get_gas_price(self): def _init_vm(self, reset_traces=True): self.vm = self.chain.get_vm() - env = self - c = type("TitanoboaComputation", (computation_template, self.vm.state.computation_class), {"env": self}) + c = type( + "TitanoboaComputation", + (computation_template, self.vm.state.computation_class), + {"env": self}, + ) self.vm.state.computation_class = c diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index c15341e9..4396fe91 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -38,7 +38,6 @@ from boa.util.lrudict import lrudict from boa.vm.gas_meters import ProfilingGasMeter from boa.vyper import _METHOD_ID_VAR -from boa.vyper.ir_executor import executor_from_ir from boa.vyper.ast_utils import ast_map_of, get_fn_ancestor_from_node, reason_at from boa.vyper.compiler_utils import ( _compile_vyper_function, @@ -47,6 +46,7 @@ ) from boa.vyper.decoder_utils import ByteAddressableStorage, decode_vyper_object from boa.vyper.event import Event, RawEvent +from boa.vyper.ir_executor import executor_from_ir # error messages for external calls EXTERNAL_CALL_ERRORS = ("external call failed", "returndatasize too small") diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index 89063325..73930c96 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -1,30 +1,27 @@ -from dataclasses import dataclass, field -from functools import cached_property import contextlib -from typing import Any, Dict, List, Union import re -import sys +from dataclasses import dataclass, field +from functools import cached_property +from typing import Any, Optional -from eth._utils.numeric import ceil32 +import vyper.ir.optimizer from eth.exceptions import Revert from eth.vm.memory import Memory - from vyper.evm.opcodes import OPCODES -import vyper.ir.optimizer +from vyper.utils import unsigned_to_signed -def debug(*args, **kwargs): - pass -def _debug(*args, **kwargs): +def debug(*args, **kwargs): print(*args, **kwargs) -if False: - debug = _debug + +def ceil32(x): + return (x + 31) & ~31 + @dataclass class OpcodeInfo: # model of an opcode from vyper.evm.opcodes - opcode: int # opcode number ex. 0x01 for ADD consumes: int # number of stack items this consumes produces: int # number of stack items this produces, must be 0 or 1 @@ -40,21 +37,23 @@ def from_opcode_info(cls, opcode_info): return cls(opcode, consumes, produces, gas_estimate) -#@dataclass(slots=True) -@dataclass +@dataclass(slots=True) class EvalContext: - ir_executor: Any # IRBaseExecutor + ir_executor: "IRBaseExecutor" computation: Any # ComputationAPI - call_frames: List[List[Any]] = field(default_factory=list) + call_frames: list[list[Any]] = field(default_factory=list) def __post_init__(self): self.computation._memory = FastMem() - self._allocate_local_frame([], self.ir_executor._max_var_height) def run(self): - #print("ENTER", self.call_frames) - self.ir_executor.eval(self) - return self.computation + try: + self._allocate_local_frame([], self.ir_executor._max_var_height) + self.ir_executor.eval(self) + return self.computation + finally: + # clear all state + self.call_frames = [] @property def local_vars(self): @@ -63,11 +62,14 @@ def local_vars(self): def _allocate_local_frame(self, arglist, max_var_height): # pre-allocate variable slots so we don't waste time with append/pop. - # a sentinel which will cause an exception if somebody tries to use it by accident - oob_int = "uh oh!" required_dummies = max_var_height + 1 - len(arglist) + frame_vars = list(arglist) - frame_vars.extend([oob_int] * required_dummies) + + # a sentinel which will cause an exception if somebody tries to use it by accident + dummy = "uh oh!" + frame_vars.extend([dummy] * required_dummies) + self.call_frames.append(frame_vars) @contextlib.contextmanager @@ -77,67 +79,26 @@ def allocate_local_frame(self, arglist, max_var_height): self.call_frames.pop() def goto(self, compile_ctx, label, arglist): - if label == "returnpc": # i.e. exitsub + # special case to handle how vyper returns from subroutines + if label == "returnpc": return compile_ctx.labels[label].execute_subroutine(self, *arglist) -class IRBaseExecutor: - __slots__ = ("args","compile_ctx") - - def __init__(self, compile_ctx, *args): - self.args = args - self.compile_ctx = compile_ctx - - @cached_property - def name(self): - return self._name - - def __repr__(self): - ret = self.name + "(" - - show = lambda s: s if isinstance(s, str) else hex(s) if isinstance(s, int) else repr(s) - arg_reprs = [show(arg) for arg in self.args] - arg_reprs = [x.replace("\n", "\n ") for x in arg_reprs] - ret += ",\n ".join(arg_reprs) - ret += ")" - - has_inner_newlines = any("\n" in t for t in arg_reprs) - output_on_one_line = re.sub(r",\n *", ", ", ret).replace("\n", "") - - should_output_single_line = len(output_on_one_line) < 80 and not has_inner_newlines - - if should_output_single_line: - return output_on_one_line - else: - return ret - - def eval(self, context): - #debug("ENTER", self.name) - args = self._eval_args(context) - return self._impl(context, *args) - - def _eval_args(self, context): - ret = [arg.eval(context) for arg in reversed(self.args)] - ret.reverse() - return ret - - def analyze(self): - self.args = [arg.analyze() for arg in self.args] - return self - @dataclass class FrameInfo: current_slot: int = 0 # basically the de bruijn index - slots: Dict[str, int] = field(default_factory=lambda: {}) + slots: dict[str, int] = field(default_factory=lambda: {}) + # record the largest slot we see, so we know how many local vars to allocate max_slot: int = 0 + @dataclass class CompileContext: - labels: Dict[str, IRBaseExecutor] - frames: List[FrameInfo] = field(default_factory=lambda: [FrameInfo()]) + labels: dict[str, "IRBaseExecutor"] + frames: list[FrameInfo] = field(default_factory=lambda: [FrameInfo()]) @property def local_vars(self): @@ -152,6 +113,8 @@ def allocate_local_frame(self): @contextlib.contextmanager def variables(self, vars_list): + # allocate variables in vars_list, assigning them each + # a new slot. shadowed = {} frame = self.frames[-1] for varname in vars_list: @@ -169,6 +132,54 @@ def variables(self, vars_list): else: frame.slots[varname] = shadowed[varname] + +class IRBaseExecutor: + __slots__ = ("args", "compile_ctx") + + def __init__(self, compile_ctx, *args): + self.args = args + self.compile_ctx = compile_ctx + + @cached_property + def name(self): + return self._name + + def __repr__(self): + ret = self.name + "(" + + def show(s): + return hex(s) if isinstance(s, int) else repr(s) + + arg_reprs = [show(arg) for arg in self.args] + arg_reprs = [x.replace("\n", "\n ") for x in arg_reprs] + ret += ",\n ".join(arg_reprs) + ret += ")" + + has_inner_newlines = any("\n" in t for t in arg_reprs) + one_line_output = re.sub(r",\n *", ", ", ret).replace("\n", "") + + should_one_line = len(one_line_output) < 80 and not has_inner_newlines + + if should_one_line: + return one_line_output + else: + return ret + + def eval(self, context): + # debug("ENTER", self.name) + args = self._eval_args(context) + return self._impl(context, *args) + + def _eval_args(self, context): + ret = [arg.eval(context) for arg in reversed(self.args)] + ret.reverse() + return ret + + def analyze(self): + self.args = [arg.analyze() for arg in self.args] + return self + + @dataclass(slots=True) class IntExecutor: _int_value: int @@ -182,6 +193,7 @@ def eval(self, context): def analyze(self): return self + @dataclass(slots=True) class StringExecutor: _str_value: str @@ -191,8 +203,8 @@ def __repr__(self): return repr(self._str_value) def analyze(self): - de_bruijn_index = self.compile_ctx.local_vars[self._str_value] - return VariableExecutor(self._str_value, de_bruijn_index) + slot = self.compile_ctx.local_vars[self._str_value] + return VariableExecutor(self._str_value, slot) # an IR executor for evm opcodes which dispatches into py-evm @@ -208,16 +220,16 @@ def produces(self): return self.opcode_info.produces def eval(self, context): - #debug("ENTER", self.name) + # debug("ENTER", self.name) evaled_args = self._eval_args(context) - #debug(self.name,"args.", evaled_args) + # debug(self.name,"args.", evaled_args) computation = context.computation for arg in reversed(evaled_args): if isinstance(arg, int): computation.stack_push_int(arg) elif isinstance(arg, bytes): computation.stack_push_bytes(arg) - #elif isinstance(arg, str) and arg.startswith("_sym_"): + # elif isinstance(arg, str) and arg.startswith("_sym_"): # # it's a returnpc for a function # pass else: @@ -231,12 +243,15 @@ def eval(self, context): _executors = {} + # decorator to register an executor class in the _executors dict. def executor(cls): _executors[cls._name] = cls return cls -StackItem = Union[int, bytes] + +StackItem = int | bytes + def _to_int(stack_item: StackItem) -> int: if isinstance(stack_item, int): @@ -263,6 +278,9 @@ class VariableExecutor: varname: str var_slot: int + def __repr__(self): + return f"var({self.varname})" + def eval(self, context): return context.local_vars[self.var_slot] @@ -271,6 +289,7 @@ def eval(self, context): # for instructions which access it in the slow way class FastMem(Memory): __slots__ = ("mem_cache", "_bytes", "needs_writeback") + def __init__(self): self.mem_cache = [] # cached words @@ -287,7 +306,6 @@ def extend(self, start_position, size_bytes): self.mem_cache.extend([self._DIRTY] * size_difference) super().extend(start_position, size_bytes) - def read_word(self, start_position): if start_position % 32 == 0: if (ret := self.mem_cache[start_position // 32]) is not self._DIRTY: @@ -314,7 +332,7 @@ def write_word(self, start_position, int_val): self.needs_writeback.add(start_position // 32) # bypass cache dirtying - #super().write(start_position, 32, _to_bytes(int_val)) + # super().write(start_position, 32, _to_bytes(int_val)) def write(self, start_position, size, value): start = start_position // 32 @@ -324,20 +342,21 @@ def write(self, start_position, size, value): super().write(start_position, size, value) -MAX_UINT256 = 2** 256 - 1 +MAX_UINT256 = 2**256 - 1 + class IRExecutor(IRBaseExecutor): - _sig = None + _sig = Optional[tuple] _max_var_height = None def eval(self, context): - #debug("ENTER", self.name) + # debug("ENTER", self.name) args = self._eval_args(context) if self.sig_mapper: assert len(args) == len(self.sig_mapper) args = (mapper(arg) for (mapper, arg) in zip(self.sig_mapper, args)) ret = self._impl(context, *args) - #debug(f"({self.name} returning {ret})") + # debug(f"({self.name} returning {ret})") return ret @cached_property @@ -349,13 +368,14 @@ class UnsignedBinopExecutor(IRExecutor): __slots__ = ("_name", "_op") def eval(self, context): - #print("ENTER",self._name,self.args) + # debug("ENTER",self._name,self.args) x, y = self.args # note: eval in reverse order. y = _to_int(y.eval(context)) x = _to_int(x.eval(context)) return _wrap256(self._op(x, y)) - + + class SignedBinopExecutor(UnsignedBinopExecutor): def eval(self, context): x, y = self.args @@ -363,11 +383,15 @@ def eval(self, context): y = unsigned_to_signed(_to_int(y.eval(context), 256, strict=True)) x = unsigned_to_signed(_to_int(x.eval(context), 256, strict=True)) return _wrap256(self._op(x, y)) - + + # just use routines from vyper optimizer for opname, (op, _, unsigned) in vyper.ir.optimizer.arith.items(): base = UnsignedBinopExecutor if unsigned else SignedBinopExecutor - _executors[opname] = type(opname.capitalize(), (base,), {"_op": op, "_name": opname}) + _executors[opname] = type( + opname.capitalize(), (base,), {"_op": op, "_name": opname} + ) + @executor class MLoad(IRExecutor): @@ -376,9 +400,10 @@ class MLoad(IRExecutor): def eval(self, context): ptr = _to_int(self.args[0].eval(context)) context.computation._memory.extend(ptr, 32) - #return context.computation._memory.read_bytes(ptr, 32) + # return context.computation._memory.read_bytes(ptr, 32) return context.computation._memory.read_word(ptr) + @executor class MStore(IRExecutor): _name = "mstore" @@ -387,9 +412,9 @@ def eval(self, context): val = _to_int(self.args[1].eval(context)) ptr = _to_int(self.args[0].eval(context)) context.computation._memory.extend(ptr, 32) - #context.computation._memory.write(ptr, 32, val) + # context.computation._memory.write(ptr, 32, val) context.computation._memory.write_word(ptr, val) - + @executor class Ceil32(IRExecutor): @@ -397,10 +422,10 @@ class Ceil32(IRExecutor): _sig = (int,) def _impl(self, context, x): - return eth._utils.numeric.ceil32(x) + return ceil32(x) -#@executor +# @executor class DLoad(IRExecutor): _name = "dload" _sig = (int,) @@ -408,10 +433,12 @@ class DLoad(IRExecutor): def _impl(self, context, ptr): raise RuntimeError("unimplemented") -#@executor + +# @executor class DLoadBytes(IRExecutor): _name = "dloadbytes" - sig = (int,int,int) + sig = (int, int, int) + def _impl(self, context, dst, src, size): raise RuntimeError("unimplemented") @@ -423,25 +450,25 @@ class Pass(IRExecutor): def eval(self, context): pass + @executor class Seq(IRExecutor): _name = "seq" def eval(self, context): - #debug("ENTER", self.name) - + lastval = None for arg in self.args: lastval = arg.eval(context) - #debug(self.name,"evaled",lastval) return lastval + @executor class Repeat(IRExecutor): _name = "repeat" def eval(self, context): - #debug("ENTER", self.name) + # debug("ENTER", self.name) i_var, start, rounds, rounds_bound, body = self.args @@ -453,7 +480,6 @@ def eval(self, context): context.local_vars[i_var.var_slot] = i body.eval(context) - def analyze(self): i_name, start, rounds, rounds_bound, body = self.args start = start.analyze() @@ -471,7 +497,7 @@ class If(IRExecutor): # override `eval()` so we can get the correct lazy behavior def eval(self, context): - #debug("ENTER", self.name) + # debug("ENTER", self.name) try: test, body, orelse = self.args except ValueError: @@ -498,6 +524,7 @@ def _impl(self, context, test): context.computation.output = b"" raise Revert(b"") + @executor class VarList(IRExecutor): _name = "var_list" @@ -510,7 +537,7 @@ class Goto(IRExecutor): def get_label(self): label = self.args[0]._str_value if label.startswith("_sym_"): - label = label[len("_sym_"):] + label = label[len("_sym_") :] return label def analyze(self): @@ -519,7 +546,7 @@ def analyze(self): return self def eval(self, context): - #debug("ENTER", self.name) + # debug("ENTER", self.name) label = self.get_label() args = reversed([arg.eval(context) for arg in reversed(self.args[1:])]) context.goto(self.compile_ctx, label, args) @@ -554,18 +581,20 @@ def analyze(self): self._max_var_height = frame_info.max_slot - print(self._name, self.labelname, self._max_var_height) + # debug(self._name, self.labelname, self._max_var_height) return self def eval(self, context): - #debug("ENTER", self.name) + # debug("ENTER", self.name) pass def execute_subroutine(self, context, *args): - assert len(args) == len(self.var_list), (self.labelname, [x for x in args], self.var_list) + assert len(args) == len(self.var_list), (list(args), self.var_list) + with context.allocate_local_frame(args, self._max_var_height): self.body.eval(context) + @executor class With(IRExecutor): _name = "with" @@ -592,18 +621,20 @@ def eval(self, context): return ret -def executor_from_ir(ir_node, opcode_impls: Dict[int, Any]) -> Any: + +def executor_from_ir(ir_node, opcode_impls: dict[int, Any]) -> Any: ret = _executor_from_ir(ir_node, opcode_impls, CompileContext({})) ret = ret.analyze() ret._max_var_height = ret.compile_ctx.frames[0].max_slot return ret + def _executor_from_ir(ir_node, opcode_impls, compile_ctx) -> Any: instr = ir_node.value if isinstance(instr, int): return IntExecutor(instr) - args = (_executor_from_ir(arg, opcode_impls, compile_ctx) for arg in ir_node.args) + args = [_executor_from_ir(arg, opcode_impls, compile_ctx) for arg in ir_node.args] if instr in _executors: return _executors[instr](compile_ctx, *args) diff --git a/pyproject.toml b/pyproject.toml index 6083c4f0..d8f86ff9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ classifiers = ["Topic :: Software Development"] # Requirements dependencies = [ "vyper >= 0.3.8", - "git+https://github.com/charles-cooper/eth-stdlib/pycryptodome.git", + "eth-stdlib @ git+https://github.com/charles-cooper/eth-stdlib.git@pycryptodome", "eth-abi", "py-evm>=0.7.0a2", "eth-typing", From 1e47accd98440d8cd052bd6e4b3bb6b89cb5672b Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 18 Jun 2023 13:40:08 -0700 Subject: [PATCH 008/122] polish --- boa/vyper/ir_executor.py | 63 ++++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index 73930c96..0caa1b1e 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -212,7 +212,7 @@ class OpcodeIRExecutor(IRBaseExecutor): def __init__(self, name, opcode_impl, opcode_info, *args): self.opcode_impl = opcode_impl # py-evm OpcodeAPI self.opcode_info: OpcodeInfo = opcode_info # info from vyper.evm.opcodes - self._name = "__" + name + "__" + self._name = "__" + name + "__" # to differentiate from implemented codes super().__init__(*args) @cached_property @@ -221,17 +221,13 @@ def produces(self): def eval(self, context): # debug("ENTER", self.name) - evaled_args = self._eval_args(context) - # debug(self.name,"args.", evaled_args) computation = context.computation - for arg in reversed(evaled_args): + for arg0 in reversed(self.args): + arg = arg0.eval(context) if isinstance(arg, int): computation.stack_push_int(arg) elif isinstance(arg, bytes): computation.stack_push_bytes(arg) - # elif isinstance(arg, str) and arg.startswith("_sym_"): - # # it's a returnpc for a function - # pass else: raise RuntimeError(f"Not a stack item. {type(arg)} {arg}") @@ -291,9 +287,12 @@ class FastMem(Memory): __slots__ = ("mem_cache", "_bytes", "needs_writeback") def __init__(self): + # XXX: check if this would be faster as dict? self.mem_cache = [] # cached words - self.needs_writeback = set() # + # words which are in the cache but have not been written + # to the backing bytes + self.needs_writeback = set() super().__init__() @@ -342,13 +341,13 @@ def write(self, start_position, size, value): super().write(start_position, size, value) -MAX_UINT256 = 2**256 - 1 - - class IRExecutor(IRBaseExecutor): _sig = Optional[tuple] _max_var_height = None + # a default eval implementation which is not super fast + # but makes it convenient to implement executors. + # for max perf, inline arg casting as in UnsignedBinopExecutor def eval(self, context): # debug("ENTER", self.name) args = self._eval_args(context) @@ -385,12 +384,11 @@ def eval(self, context): return _wrap256(self._op(x, y)) -# just use routines from vyper optimizer +# for binops, just use routines from vyper optimizer for opname, (op, _, unsigned) in vyper.ir.optimizer.arith.items(): base = UnsignedBinopExecutor if unsigned else SignedBinopExecutor - _executors[opname] = type( - opname.capitalize(), (base,), {"_op": op, "_name": opname} - ) + nickname = opname.capitalize() + _executors[opname] = type(nickname, (base,), {"_op": op, "_name": opname}) @executor @@ -398,6 +396,7 @@ class MLoad(IRExecutor): _name = "mload" def eval(self, context): + # perf hotspot. ptr = _to_int(self.args[0].eval(context)) context.computation._memory.extend(ptr, 32) # return context.computation._memory.read_bytes(ptr, 32) @@ -409,6 +408,7 @@ class MStore(IRExecutor): _name = "mstore" def eval(self, context): + # perf hotspot. val = _to_int(self.args[1].eval(context)) ptr = _to_int(self.args[0].eval(context)) context.computation._memory.extend(ptr, 32) @@ -469,7 +469,6 @@ class Repeat(IRExecutor): def eval(self, context): # debug("ENTER", self.name) - i_var, start, rounds, rounds_bound, body = self.args start = start.eval(context) @@ -482,8 +481,11 @@ def eval(self, context): def analyze(self): i_name, start, rounds, rounds_bound, body = self.args + + # analyze start and rounds before shadowing. start = start.analyze() rounds = rounds.analyze() + with self.compile_ctx.variables([i_name._str_value]): i_var = i_name.analyze() body = body.analyze() @@ -534,11 +536,14 @@ class VarList(IRExecutor): class Goto(IRExecutor): _name = "goto" - def get_label(self): - label = self.args[0]._str_value - if label.startswith("_sym_"): - label = label[len("_sym_") :] - return label + @cached_property + # figure out the label to jump to, works for both goto and exit_to + # (why does vyper generate them differently? XXX fix in vyper) + def label(self): + ret = self.args[0]._str_value + if ret.startswith("_sym_"): + ret = ret[len("_sym_") :] + return ret def analyze(self): for arg in self.args[1:]: @@ -547,9 +552,8 @@ def analyze(self): def eval(self, context): # debug("ENTER", self.name) - label = self.get_label() args = reversed([arg.eval(context) for arg in reversed(self.args[1:])]) - context.goto(self.compile_ctx, label, args) + context.goto(self.compile_ctx, self.label, args) @executor @@ -579,18 +583,16 @@ def analyze(self): with self.compile_ctx.variables(var_list): self.body = self.body.analyze() + # grab max slot after analysis self._max_var_height = frame_info.max_slot - # debug(self._name, self.labelname, self._max_var_height) return self def eval(self, context): - # debug("ENTER", self.name) - pass + raise RuntimeError("labels should only be jumped into!") def execute_subroutine(self, context, *args): - assert len(args) == len(self.var_list), (list(args), self.var_list) - + # assert len(args) == len(self.var_list), (list(args), self.var_list) with context.allocate_local_frame(args, self._max_var_height): self.body.eval(context) @@ -605,9 +607,11 @@ class With(IRExecutor): def analyze(self): varname = self.args[0]._str_value val = self.args[1].analyze() # analyze before shadowing + with self.compile_ctx.variables([varname]): - variable = self.args[0].analyze() # analyze for debugging + variable = self.args[0].analyze() body = self.args[2].analyze() + self.args = (variable, val, body) return self @@ -645,4 +649,5 @@ def _executor_from_ir(ir_node, opcode_impls, compile_ctx) -> Any: return OpcodeIRExecutor(instr, opcode_impl, opcode_info, compile_ctx, *args) assert len(ir_node.args) == 0, ir_node + assert isinstance(ir_node.value, str) return StringExecutor(instr, compile_ctx) From ff8e26069c7d1d8d99f3bffbb6a2bc25c84f13c1 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 18 Jun 2023 13:41:16 -0700 Subject: [PATCH 009/122] clean up err handling(?) --- boa/environment.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/boa/environment.py b/boa/environment.py index 6d1a6812..f681b58a 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -323,22 +323,12 @@ def apply_computation(cls, state, msg, tx_ctx): err = None with cls(state, msg, tx_ctx) as computation: print("FAST MODE") - print(contract.ir_executor) + #print(contract.ir_executor) eval_ctx = EvalContext(contract.ir_executor, computation) try: eval_ctx.run() - except Exception as e: - # grab the exception to raise later - - # unclear why this is getting swallowed by py-evm. - # print(e) - err = e - - from eth.exceptions import Halt, VMError - - if err is not None and not isinstance(err, (Halt, VMError)): - # if err is not None: - raise err - return computation + finally: + return computation # wrapper class around py-evm which provides a "contract-centric" API From 99b9736c470412486f2a9eb5125a9f43ff5ee3ed Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 19 Jun 2023 09:22:47 -0700 Subject: [PATCH 010/122] wip - compile to python --- boa/environment.py | 18 +- boa/vyper/contract.py | 7 + boa/vyper/ir_compiler.py | 672 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 693 insertions(+), 4 deletions(-) create mode 100644 boa/vyper/ir_compiler.py diff --git a/boa/environment.py b/boa/environment.py index f681b58a..3b28c9fb 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -6,6 +6,7 @@ import sys import warnings from typing import Any, Iterator, Optional, Tuple, Union +import traceback import eth.constants as constants import eth.tools.builder.chain as chain @@ -323,12 +324,21 @@ def apply_computation(cls, state, msg, tx_ctx): err = None with cls(state, msg, tx_ctx) as computation: print("FAST MODE") - #print(contract.ir_executor) + # print(contract.ir_executor) eval_ctx = EvalContext(contract.ir_executor, computation) try: - eval_ctx.run() - finally: - return computation + #eval_ctx.run() + print("ENTER") + contract.ir_compiler.exec(eval_ctx) + #except PyEVMError as e: + # raise e + except Exception as e: + err = e + traceback.print_exception(e, file=sys.stderr) + + if err is not None: + raise err + return computation # wrapper class around py-evm which provides a "contract-centric" API diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index 4396fe91..ae45a97a 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -47,6 +47,7 @@ from boa.vyper.decoder_utils import ByteAddressableStorage, decode_vyper_object from boa.vyper.event import Event, RawEvent from boa.vyper.ir_executor import executor_from_ir +from boa.vyper.ir_compiler import executor_from_ir as compiler_from_ir # error messages for external calls EXTERNAL_CALL_ERRORS = ("external call failed", "returndatasize too small") @@ -784,6 +785,12 @@ def ir_executor(self): opcode_impls = self.env.vm.state.computation_class.opcodes return executor_from_ir(ir, opcode_impls) + @cached_property + def ir_compiler(self): + ir = self.compiler_data.ir_runtime + return compiler_from_ir(ir, self.compiler_data.contract_name) + + @contextlib.contextmanager def _anchor_source_map(self, source_map): tmp = self._source_map diff --git a/boa/vyper/ir_compiler.py b/boa/vyper/ir_compiler.py new file mode 100644 index 00000000..c4e03bfa --- /dev/null +++ b/boa/vyper/ir_compiler.py @@ -0,0 +1,672 @@ +import contextlib +import inspect +import textwrap +from dataclasses import dataclass, field +from functools import cached_property +from typing import Any, Optional +from pathlib import PurePath + +import vyper.ir.optimizer +from eth.exceptions import Revert +from vyper.evm.opcodes import OPCODES +from vyper.utils import unsigned_to_signed, mkalphanum + + +def debug(*args, **kwargs): + print(*args, **kwargs) + + +def ceil32(x): + return (x + 31) & ~31 + + +@dataclass +class _Line: + indentation_level: int + line: str + + def show(self, indenter=" "): + return indenter * self.indentation_level + self.line + + +@dataclass +class PythonBuilder: + cur_indentation_level: int = 0 + lines: list[_Line] = field(default_factory=list) + + def extend(self, source_code): + source_code = textwrap.dedent(source_code) + for line in source_code.splitlines(): + self.append(line) + + def append(self, source_code): + self.lines.append(_Line(self.cur_indentation_level, source_code)) + + def get_output(self): + return "\n".join(line.show() for line in self.lines) + + def get_code(self, filename): + return compile(self.get_output()) + + @contextlib.contextmanager + def block(self, entry): + self.append(entry + ":") + self.cur_indentation_level += 4 + yield + self.cur_indentation_level -= 4 + + +@dataclass +class FrameInfo: + current_slot: int = 0 # basically the de bruijn index + slots: dict[str, int] = field(default_factory=lambda: {}) + + +_global_id = 0 + + +@dataclass +class CompileContext: + contract_path: Optional[str] = "" + uuid: str = field(init=False) + labels: dict[str, "IRExecutor"] = field(default_factory=dict) + frames: list[FrameInfo] = field(default_factory=lambda: [FrameInfo()]) + builder: PythonBuilder = field(default_factory=PythonBuilder) + + def __post_init__(self): + # use a global bc the generated functions need to be unique + global _global_id + self.uuid = str(_global_id) + _global_id += 1 + + + @property + def local_vars(self): + return self.frames[-1].slots + + @cached_property + def contract_name(self): + return mkalphanum(PurePath(self.contract_path).name) + + def translate_label(self, label): + return f"{label}_{self.contract_name}_{self.uuid}" + + + @contextlib.contextmanager + def allocate_local_frame(self): + frame = FrameInfo() + self.frames.append(frame) + yield # frame + self.frames.pop() + + @contextlib.contextmanager + def variables(self, vars_list): + # allocate variables in vars_list, assigning them each + # a new slot. + shadowed = {} + frame = self.frames[-1] + for varname in vars_list: + shadowed[varname] = frame.slots.get(varname) + frame.slots[varname] = frame.current_slot + frame.current_slot += 1 + + yield + + for varname in vars_list: + frame.current_slot -= 1 + if shadowed[varname] is None: + del frame.slots[varname] + else: + frame.slots[varname] = shadowed[varname] + + +StackItem = int | bytes + + +mapper = {int: "_to_int", bytes: "_to_bytes", StackItem: ""} + + +class IRExecutor: + __slots__ = ("args", "compile_ctx", "exec") + + _out_type: Optional[StackItem] = None + + def __init__(self, compile_ctx, *args): + self.args = args + self.compile_ctx = compile_ctx + self.py_bytecode = None + + def get_output(self): + return self.builder.get_output() + + @cached_property + def name(self): + return self._name + + def _compile_args(self, argnames): + assert len(self.args) == len(argnames) == len(self._sig), (self.args, argnames, self._sig) + for out, arg, typ in reversed(list(zip(argnames, self.args, self._sig))): + arg.compile(out=out, out_typ=typ) + + @property + def builder(self): + return self.compile_ctx.builder + + def analyze(self): + self.args = [arg.analyze() for arg in self.args] + return self + + def compile(self, out=None, out_typ=None): + # do a bit of metaprogramming to infer how to compile the args + if hasattr(self, "_argnames"): + argnames = self._argnames + else: + argnames = inspect.getargs(self._compile.__code__).args + assert argnames[0] == "self" + argnames = argnames[1:] + + self._compile_args(argnames) + + res = self._compile(*argnames) + + if res is None: + assert out is None, (type(self), self, out, argnames) + return + + #print("ENTER", type(self), self, out, argnames, res) + res_typ, res = res + + if out is not None: + if res_typ != out_typ: + res = f"{mapper[out_typ]}({res})" + self.builder.append(f"{out} = {res}") + else: + self.builder.append(res) + + def _compile(self, context): + raise RuntimeError("must be overridden in subclass!") + + def compile_main(self, contract_path=""): + self.builder.extend("import vyper.utils\nimport _operator") + + main_name = self.compile_ctx.translate_label("main") + with self.builder.block(f"def {main_name}(CTX)"): + self.compile() + + for func in self.compile_ctx.labels.values(): + self.builder.extend("\n\n") + func.compile_func() + + py_bytecode = compile(self.builder.get_output(), contract_path, "exec") + exec(py_bytecode, globals()) + self.exec = globals()[main_name] + + +@dataclass +class IntExecutor(IRExecutor): + compile_ctx: CompileContext + _int_value: int + + def __post_init__(self): + assert 0 <= self._int_value < 2**256 + self.args = self._sig = () + + def __repr__(self): + return hex(self._int_value) + + def analyze(self): + return self + + def _compile(self): + return int, repr(self) + + +@dataclass +class StringExecutor(IRExecutor): + compile_ctx: CompileContext + _str_value: str + + def __post_init__(self): + self.args = self._sig = () + + def __repr__(self): + return repr(self._str_value) + + def analyze(self): + slot = self.compile_ctx.local_vars[self._str_value] + return VariableExecutor(self.compile_ctx, self._str_value, slot) + + +@dataclass +class VariableExecutor(IRExecutor): + compile_ctx: CompileContext + varname: str + var_slot: int + + def __post_init__(self): + self.args = self._sig = () + + def __repr__(self): + return f"var({self.varname})" + + @cached_property + def out_name(self): + slot = self.var_slot + ret = f"__user_{self.varname}" + if slot > 0: + ret += f"_{slot}" + return ret + + def _compile(self): + return StackItem, self.out_name # XXX: figure out type + +@dataclass +class OpcodeInfo: + # model of an opcode from vyper.evm.opcodes + mnemonic: str + opcode: int # opcode number ex. 0x01 for ADD + consumes: int # number of stack items this consumes + produces: int # number of stack items this produces, must be 0 or 1 + _gas_estimate: int # in vyper.evm.opcodes but probably not useful for us + + def __post_init__(self): + assert 0 <= self.opcode < 256 + assert self.produces in (0, 1) + + @classmethod + def from_opcode_info(cls, mnemonic, opcode_info): + # info from vyper.evm.opcodes + opcode, consumes, produces, gas_estimate = opcode_info + return cls(mnemonic, opcode, consumes, produces, gas_estimate) + + +# an executor for evm opcodes which dispatches into py-evm +class OpcodeIRExecutor(IRExecutor): + def __init__(self, name, opcode_info, *args): + self.opcode_info: OpcodeInfo = opcode_info + + # to differentiate from implemented codes + self._name = "__" + name + "__" + + super().__init__(*args) + + @cached_property + def _sig(self): + return tuple(StackItem for _ in range(self.opcode_info.consumes)) + + @cached_property + def _argnames(self): + def mkargname(i): + return f"__{self.opcode_info.mnemonic.lower()}_arg{i}" + + return tuple(mkargname(i) for i in range(self.opcode_info.consumes)) + + def _compile(self, *args): + opcode = hex(self.opcode_info.opcode) + for arg in reversed(args): + # TODO figure out the type to avoid calling _to_int + self.builder.append(f"CTX.computation.stack_push_int(_to_int({arg}))") + + self.builder.extend( + f""" + # {self._name} + CTX.computation.opcodes[{opcode}].__call__(CTX.computation) + """ + ) + if self.opcode_info.produces: + return StackItem, "CTX.computation.stack_pop1_any()" + + +_executors = {} + + +# decorator to register an executor class in the _executors dict. +def executor(cls): + _executors[cls._name] = cls + return cls + + +def _to_int(stack_item: StackItem) -> int: + if isinstance(stack_item, int): + return stack_item + return int.from_bytes(stack_item, "big") + + +def _to_bytes(stack_item: StackItem) -> bytes: + if isinstance(stack_item, bytes): + return stack_item + return stack_item.to_bytes(32, "big") + + +def _wrap256(x): + return x % 2**256 + + +def _as_signed(x): + return unsigned_to_signed(x, 256, strict=True) + + +class UnsignedBinopExecutor(IRExecutor): + __slots__ = ("_name", "_op") + _sig = int, int + _out_type = int + + @cached_property + def funcname(self): + return self._op.__module__ + "." + self._op.__name__ + + def _compile(self, x, y): + return int, f"_wrap256({self.funcname}({x}, {y}))" + + +class SignedBinopExecutor(UnsignedBinopExecutor): + def _compile(self, x, y): + self.builder.extend( + f""" + x = _as_signed({x}, 256, strict=True)) + y = _as_signed({y}, 256, strict=True)) + """ + ) + return int, f"_wrap256({self._funcname}(x, y))" + + +# for binops, just use routines from vyper optimizer +for opname, (op, _, unsigned) in vyper.ir.optimizer.arith.items(): + base = UnsignedBinopExecutor if unsigned else SignedBinopExecutor + nickname = opname.capitalize() + _executors[opname] = type(nickname, (base,), {"_op": op, "_name": opname}) + + +@executor +class MLoad(IRExecutor): + _name = "mload" + _sig = (int,) + + def _compile(self, ptr): + self.builder.append(f"CTX.computation._memory.extend({ptr}, 32)") + return int, f"CTX.computation._memory.read_word({ptr})" + + +@executor +class MStore(IRExecutor): + _name = "mstore" + _sig = (int, int) + + def _compile(self, val, ptr): + self.builder.extend( + f""" + CTX.computation._memory.extend({ptr}, 32) + CTX.computation._memory.write_word({ptr}, {val}) + """ + ) + + +@executor +class Ceil32(IRExecutor): + _name = "ceil32" + _sig = (int,) + + def _compile(self, x): + return int, f"({x} + 31) & 31" + +@executor +class IsZero(IRExecutor): + _name = "iszero" + _sig = (int,) + + def _compile(self, x): + return int, f"{x} == 0" + + + +# @executor +class DLoad(IRExecutor): + _name = "dload" + _sig = (int,) + + def _impl(self, context, ptr): + raise RuntimeError("unimplemented") + + +# @executor +class DLoadBytes(IRExecutor): + _name = "dloadbytes" + + def _impl(self, context, dst, src, size): + raise RuntimeError("unimplemented") + + +@executor +class Pass(IRExecutor): + _name = "pass" + _sig = () + _argnames = () + + def _compile(self): + self.builder.append("pass") + + +@executor +class Seq(IRExecutor): + _name = "seq" + + def compile(self, out=None, out_typ=None): + for i, arg in enumerate(self.args): + if i + 1 < len(self.args): + # don't accidentally assign + arg.compile(out=None) + else: + return arg.compile(out=out, out_typ=out_typ) + else: + raise RuntimeError("loop should have broken") + + +@executor +class Repeat(IRExecutor): + _name = "repeat" + + def compile(self, out=None): + i_var, start, rounds, rounds_bound, body = self.args + + start.compile("start", int) + rounds.compile("rounds", int) + rounds_bound.compile("rounds_bound", int) + end = "start + rounds" + + self.builder.append(f"assert rounds <= rounds_bound") + with self.builder.block(f"for {i_var.out_name} in range(start, {end})"): + body.compile() + + def analyze(self): + i_name, start, rounds, rounds_bound, body = self.args + + # analyze start and rounds before shadowing. + start = start.analyze() + rounds = rounds.analyze() + + with self.compile_ctx.variables([i_name._str_value]): + i_var = i_name.analyze() + body = body.analyze() + self.args = i_var, start, rounds, rounds_bound, body + return self + + +@executor +class If(IRExecutor): + _name = "if" + + # override `compile()` so we can get the correct lazy behavior + def compile(self, out=None, out_typ=None): + orelse = None + if len(self.args) == 3: + test, body, orelse = self.args + else: + test, body = self.args + + test.compile("test", out_typ=int) + + with self.builder.block("if bool(test)"): + body.compile(out, out_typ) + + if orelse: + with self.builder.block("else"): + orelse.compile(out, out_typ) + + +@executor +class Assert(IRExecutor): + _name = "assert" + _sig = (int,) + + def _compile(self, test): + _ = Revert # linter does not know we are using `Revert`. + self.builder.extend( + """ + if not bool(test): + CTX.computation.output = b"" + raise Revert(b"") + """ + ) + + +@executor +class VarList(IRExecutor): + _name = "var_list" + + +@executor +class Goto(IRExecutor): + _name = "goto" + + def analyze(self): + self.label = self.args[0]._str_value + if self.label.startswith("_sym_"): + self.label = self.label[len("_sym_"):] + + for arg in self.args[1:]: + arg = arg.analyze() + + self.args = self.args[1:] + + return self + + @cached_property + def _argnames(self): + return self.compile_ctx.labels[self.label].param_names + + @cached_property + def _sig(self): + return tuple(StackItem for _ in self._argnames) + + def _compile(self, *args): + label = self.label + + if label == "returnpc": + # i.e. exitsub + assert len(args) == 0 + self.builder.append("return") + return + + argnames = self._argnames + assert len(argnames) == len(self.args) + + args_str = ", ".join(["CTX"] + argnames) + # XXX: figure out type + return StackItem, f"{label}({args_str})" + + +@executor +class ExitTo(Goto): + # exit_to and goto have pretty much the same semantics as far as we + # are concerned here. + _name = "exit_to" + + +@executor +class Label(IRExecutor): + _name = "label" + + def __init__(self, compile_ctx, *args): + self.compile_ctx = compile_ctx + + name, var_list, body = args + + self.var_list = var_list.args + self.body = body + self.labelname = name._str_value + + if name._str_value in compile_ctx.labels: + raise ValueError("duplicated label: {name._str_value}") + compile_ctx.labels[name._str_value] = self + + @cached_property + def param_names(self): + return [param._str_value for param in self.var_list] + + def analyze(self): + with self.compile_ctx.allocate_local_frame(): + with self.compile_ctx.variables(self.param_names): + self.body = self.body.analyze() + + return self + + def compile(self, **kwargs): + pass + + def compile_func(self): + print(self.var_list) + params_str = ", ".join(["CTX"] + self.param_names) + with self.builder.block(f"def {self.labelname}({params_str})"): + self.body.compile() + + +@executor +class With(IRExecutor): + _name = "with" + + # variable names can be shadowed, so we need to do a bit of + # analysis to find unshadowed names + def analyze(self): + varname = self.args[0]._str_value + val = self.args[1].analyze() # analyze before shadowing + + with self.compile_ctx.variables([varname]): + variable = self.args[0].analyze() + body = self.args[2].analyze() + + self.args = (variable, val, body) + + return self + + def compile(self, out=None, out_typ=None): + variable, val, body = self.args + # TODO: infer val typ + val.compile(out=variable.out_name, out_typ=StackItem) + return body.compile(out=out, out_typ=out_typ) + + +def executor_from_ir(ir_node, contract_path = "") -> Any: + ret = _executor_from_ir(ir_node, CompileContext(contract_path)) + + ret = ret.analyze() + ret.compile_main() + return ret + + +def _executor_from_ir(ir_node, compile_ctx) -> Any: + instr = ir_node.value + if isinstance(instr, int): + return IntExecutor(compile_ctx, instr) + + args = [_executor_from_ir(arg, compile_ctx) for arg in ir_node.args] + + if instr in _executors: + return _executors[instr](compile_ctx, *args) + + if (mnemonic := instr.upper()) in OPCODES: + opcode_info = OpcodeInfo.from_opcode_info(mnemonic, OPCODES[mnemonic]) + return OpcodeIRExecutor(instr, opcode_info, compile_ctx, *args) + + assert len(ir_node.args) == 0, ir_node + assert isinstance(ir_node.value, str) + return StringExecutor(compile_ctx, ir_node.value) From 97b7c12454544bb62e83a41360ba72c97e4f22fd Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 19 Jun 2023 21:14:11 -0700 Subject: [PATCH 011/122] works - actually executes --- boa/environment.py | 28 ++++++++---- boa/vm/fast_mem.py | 74 ++++++++++++++++++++++++++++++ boa/vm/utils.py | 14 ++++++ boa/vyper/ir_compiler.py | 28 +++++++----- boa/vyper/ir_executor.py | 98 +++++----------------------------------- 5 files changed, 136 insertions(+), 106 deletions(-) create mode 100644 boa/vm/fast_mem.py create mode 100644 boa/vm/utils.py diff --git a/boa/environment.py b/boa/environment.py index 3b28c9fb..0eb23041 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -12,6 +12,7 @@ import eth.tools.builder.chain as chain import eth.vm.forks.spurious_dragon.computation as spurious_dragon from eth._utils.address import generate_contract_address +from eth.exceptions import Halt, VMError from eth.chains.mainnet import MainnetChain from eth.codecs import abi from eth.db.atomic import AtomicDB @@ -256,6 +257,10 @@ def __call__(self, computation): # ### End section: sha3 tracing +_SLOW = 0 +_FAST = 1 +_LUDICROUS = 2 + # py-evm uses class instantiaters which need to be classes # instead of like factories or other easier to use architectures - @@ -317,26 +322,31 @@ def apply_create_message(cls, state, msg, tx_ctx): def apply_computation(cls, state, msg, tx_ctx): addr = msg.code_address contract = cls.env.lookup_contract(addr) if addr else None - if contract is None or not cls.env._enable_fast_mode: + if contract is None or cls.env._speed == _SLOW: print("SLOW MODE") return super().apply_computation(state, msg, tx_ctx) err = None with cls(state, msg, tx_ctx) as computation: - print("FAST MODE") # print(contract.ir_executor) eval_ctx = EvalContext(contract.ir_executor, computation) try: - #eval_ctx.run() - print("ENTER") - contract.ir_compiler.exec(eval_ctx) - #except PyEVMError as e: - # raise e + if cls.env._speed == _FAST: + print("FAST MODE") + eval_ctx.run() + else: # LUDICROUS + print("LUDICROUS SPEED") + contract.ir_compiler.exec(eval_ctx) + except (Halt, VMError): + pass except Exception as e: + # grab the exception to raise later - + # unclear why this is getting swallowed by py-evm. + # print(e) err = e - traceback.print_exception(e, file=sys.stderr) if err is not None: + # if err is not None: raise err return computation @@ -346,7 +356,7 @@ class Env: _singleton = None _initial_address_counter = 100 _coverage_enabled = False - _enable_fast_mode = False + _speed = _SLOW def __init__(self): self.chain = _make_chain() diff --git a/boa/vm/fast_mem.py b/boa/vm/fast_mem.py new file mode 100644 index 00000000..192d498c --- /dev/null +++ b/boa/vm/fast_mem.py @@ -0,0 +1,74 @@ +from eth.vm.memory import Memory + +import contextlib +import re +from dataclasses import dataclass, field +from functools import cached_property +from typing import Any, Optional + +import vyper.ir.optimizer +from eth.exceptions import Revert +from vyper.evm.opcodes import OPCODES +from vyper.utils import unsigned_to_signed +from boa.vm.utils import to_bytes, to_int, ceil32 + + +# a py-evm eth.vm.Memory compatible implementation of memory. +# most memory is aligned. add a cache which avoids converting +# between bytes and ints where possible +class FastMem(Memory): + __slots__ = ("mem_cache", "_bytes", "needs_writeback") + + def __init__(self): + # XXX: check if this would be faster as dict? + self.mem_cache = [] # cached words + + # words which are in the cache but have not been written + # to the backing bytes + self.needs_writeback = set() + + super().__init__() + + _DIRTY = object() + + def extend(self, start_position, size_bytes): + # i.e. ceil32(len(self)) // 32 + new_size = (start_position + size_bytes + 31) // 32 + if (size_difference := new_size - len(self.mem_cache)) > 0: + self.mem_cache.extend([self._DIRTY] * size_difference) + super().extend(start_position, size_bytes) + + def read_word(self, start_position): + if start_position % 32 == 0: + if (ret := self.mem_cache[start_position // 32]) is not self._DIRTY: + return ret + + ret = to_int(self.read_bytes(start_position, 32)) + self.mem_cache[start_position // 32] = ret + return ret + + def read_bytes(self, start_position, size): + start = start_position // 32 + end = ceil32(start_position + size) // 32 + for ix in range(start, end): + if ix in self.needs_writeback: + super().write(ix * 32, 32, to_bytes(self.mem_cache[ix])) + self.needs_writeback.remove(ix) + + return super().read_bytes(start_position, size) + + def write_word(self, start_position, int_val): + if start_position % 32 == 0: + self.mem_cache[start_position // 32] = int_val + + self.needs_writeback.add(start_position // 32) + + # bypass cache dirtying + # super().write(start_position, 32, to_bytes(int_val)) + + def write(self, start_position, size, value): + start = start_position // 32 + end = (start_position + size + 31) // 32 + for i in range(start, end): + self.mem_cache[i] = self._DIRTY + super().write(start_position, size, value) diff --git a/boa/vm/utils.py b/boa/vm/utils.py new file mode 100644 index 00000000..02c51e2c --- /dev/null +++ b/boa/vm/utils.py @@ -0,0 +1,14 @@ +def ceil32(x): + return (x + 31) & ~31 + +def to_int(stack_item) -> int: + if isinstance(stack_item, int): + return stack_item + return int.from_bytes(stack_item, "big") + +def to_bytes(stack_item) -> bytes: + if isinstance(stack_item, bytes): + return stack_item + return stack_item.to_bytes(32, "big") + + diff --git a/boa/vyper/ir_compiler.py b/boa/vyper/ir_compiler.py index c4e03bfa..4ed36e80 100644 --- a/boa/vyper/ir_compiler.py +++ b/boa/vyper/ir_compiler.py @@ -11,14 +11,13 @@ from vyper.evm.opcodes import OPCODES from vyper.utils import unsigned_to_signed, mkalphanum +from boa.vm.fast_mem import FastMem + def debug(*args, **kwargs): print(*args, **kwargs) -def ceil32(x): - return (x + 31) & ~31 - @dataclass class _Line: @@ -127,7 +126,7 @@ def variables(self, vars_list): class IRExecutor: - __slots__ = ("args", "compile_ctx", "exec") + __slots__ = ("args", "compile_ctx")# "exec") _out_type: Optional[StackItem] = None @@ -199,7 +198,12 @@ def compile_main(self, contract_path=""): py_bytecode = compile(self.builder.get_output(), contract_path, "exec") exec(py_bytecode, globals()) - self.exec = globals()[main_name] + + self._exec = globals()[main_name] + + def exec(self, execution_ctx): + execution_ctx.computation._memory = FastMem() + self._exec(execution_ctx) @dataclass @@ -552,7 +556,7 @@ def analyze(self): @cached_property def _argnames(self): - return self.compile_ctx.labels[self.label].param_names + return self.compile_ctx.labels[self.label].analyzed_param_names @cached_property def _sig(self): @@ -591,7 +595,7 @@ def __init__(self, compile_ctx, *args): name, var_list, body = args - self.var_list = var_list.args + self.var_list = var_list self.body = body self.labelname = name._str_value @@ -600,12 +604,14 @@ def __init__(self, compile_ctx, *args): compile_ctx.labels[name._str_value] = self @cached_property - def param_names(self): - return [param._str_value for param in self.var_list] + def analyzed_param_names(self): + return [param.out_name for param in self.var_list.args] def analyze(self): with self.compile_ctx.allocate_local_frame(): - with self.compile_ctx.variables(self.param_names): + params = [param._str_value for param in self.var_list.args] + with self.compile_ctx.variables(params): + self.var_list = self.var_list.analyze() self.body = self.body.analyze() return self @@ -615,7 +621,7 @@ def compile(self, **kwargs): def compile_func(self): print(self.var_list) - params_str = ", ".join(["CTX"] + self.param_names) + params_str = ", ".join(["CTX"] + self.analyzed_param_names) with self.builder.block(f"def {self.labelname}({params_str})"): self.body.compile() diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index 0caa1b1e..9d696ffa 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -6,19 +6,17 @@ import vyper.ir.optimizer from eth.exceptions import Revert -from eth.vm.memory import Memory from vyper.evm.opcodes import OPCODES from vyper.utils import unsigned_to_signed +from boa.vm.fast_mem import FastMem +from boa.vm.utils import to_bytes, to_int, ceil32 + def debug(*args, **kwargs): print(*args, **kwargs) -def ceil32(x): - return (x + 31) & ~31 - - @dataclass class OpcodeInfo: # model of an opcode from vyper.evm.opcodes @@ -249,18 +247,6 @@ def executor(cls): StackItem = int | bytes -def _to_int(stack_item: StackItem) -> int: - if isinstance(stack_item, int): - return stack_item - return int.from_bytes(stack_item, "big") - - -def _to_bytes(stack_item: StackItem) -> bytes: - if isinstance(stack_item, bytes): - return stack_item - return stack_item.to_bytes(32, "big") - - def _wrap256(x): return x % 2**256 @@ -281,66 +267,6 @@ def eval(self, context): return context.local_vars[self.var_slot] -# most memory is aligned. treat it as list of ints, and provide mocking -# for instructions which access it in the slow way -class FastMem(Memory): - __slots__ = ("mem_cache", "_bytes", "needs_writeback") - - def __init__(self): - # XXX: check if this would be faster as dict? - self.mem_cache = [] # cached words - - # words which are in the cache but have not been written - # to the backing bytes - self.needs_writeback = set() - - super().__init__() - - _DIRTY = object() - - def extend(self, start_position, size_bytes): - # i.e. ceil32(len(self)) // 32 - new_size = (start_position + size_bytes + 31) // 32 - if (size_difference := new_size - len(self.mem_cache)) > 0: - self.mem_cache.extend([self._DIRTY] * size_difference) - super().extend(start_position, size_bytes) - - def read_word(self, start_position): - if start_position % 32 == 0: - if (ret := self.mem_cache[start_position // 32]) is not self._DIRTY: - return ret - - ret = _to_int(self.read_bytes(start_position, 32)) - self.mem_cache[start_position // 32] = ret - return ret - - def read_bytes(self, start_position, size): - start = start_position // 32 - end = ceil32(start_position + size) // 32 - for ix in range(start, end): - if ix in self.needs_writeback: - super().write(ix * 32, 32, _to_bytes(self.mem_cache[ix])) - self.needs_writeback.remove(ix) - - return super().read_bytes(start_position, size) - - def write_word(self, start_position, int_val): - if start_position % 32 == 0: - self.mem_cache[start_position // 32] = int_val - - self.needs_writeback.add(start_position // 32) - - # bypass cache dirtying - # super().write(start_position, 32, _to_bytes(int_val)) - - def write(self, start_position, size, value): - start = start_position // 32 - end = (start_position + size + 31) // 32 - for i in range(start, end): - self.mem_cache[i] = self._DIRTY - super().write(start_position, size, value) - - class IRExecutor(IRBaseExecutor): _sig = Optional[tuple] _max_var_height = None @@ -360,7 +286,7 @@ def eval(self, context): @cached_property def sig_mapper(self): - return tuple(_to_int if typ is int else _to_bytes for typ in self._sig) + return tuple(to_int if typ is int else to_bytes for typ in self._sig) class UnsignedBinopExecutor(IRExecutor): @@ -370,8 +296,8 @@ def eval(self, context): # debug("ENTER",self._name,self.args) x, y = self.args # note: eval in reverse order. - y = _to_int(y.eval(context)) - x = _to_int(x.eval(context)) + y = to_int(y.eval(context)) + x = to_int(x.eval(context)) return _wrap256(self._op(x, y)) @@ -379,8 +305,8 @@ class SignedBinopExecutor(UnsignedBinopExecutor): def eval(self, context): x, y = self.args # note: eval in reverse order. - y = unsigned_to_signed(_to_int(y.eval(context), 256, strict=True)) - x = unsigned_to_signed(_to_int(x.eval(context), 256, strict=True)) + y = unsigned_to_signed(to_int(y.eval(context), 256, strict=True)) + x = unsigned_to_signed(to_int(x.eval(context), 256, strict=True)) return _wrap256(self._op(x, y)) @@ -397,7 +323,7 @@ class MLoad(IRExecutor): def eval(self, context): # perf hotspot. - ptr = _to_int(self.args[0].eval(context)) + ptr = to_int(self.args[0].eval(context)) context.computation._memory.extend(ptr, 32) # return context.computation._memory.read_bytes(ptr, 32) return context.computation._memory.read_word(ptr) @@ -409,8 +335,8 @@ class MStore(IRExecutor): def eval(self, context): # perf hotspot. - val = _to_int(self.args[1].eval(context)) - ptr = _to_int(self.args[0].eval(context)) + val = to_int(self.args[1].eval(context)) + ptr = to_int(self.args[0].eval(context)) context.computation._memory.extend(ptr, 32) # context.computation._memory.write(ptr, 32, val) context.computation._memory.write_word(ptr, val) @@ -506,7 +432,7 @@ def eval(self, context): test, body = self.args orelse = None - test = _to_int(test.eval(context)) + test = to_int(test.eval(context)) if bool(test): return body.eval(context) From fbb1887513ff1752ac379698232e41c9d36b46ba Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 19 Jun 2023 21:29:09 -0700 Subject: [PATCH 012/122] fix mstore arg order --- boa/vyper/ir_compiler.py | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/boa/vyper/ir_compiler.py b/boa/vyper/ir_compiler.py index 4ed36e80..33976e87 100644 --- a/boa/vyper/ir_compiler.py +++ b/boa/vyper/ir_compiler.py @@ -12,6 +12,7 @@ from vyper.utils import unsigned_to_signed, mkalphanum from boa.vm.fast_mem import FastMem +from boa.vm.utils import to_bytes, to_int def debug(*args, **kwargs): @@ -122,13 +123,14 @@ def variables(self, vars_list): StackItem = int | bytes -mapper = {int: "_to_int", bytes: "_to_bytes", StackItem: ""} +mapper = {int: "to_int", bytes: "to_bytes", StackItem: ""} class IRExecutor: - __slots__ = ("args", "compile_ctx")# "exec") + __slots__ = ("args", "compile_ctx") - _out_type: Optional[StackItem] = None + # the type produced when executing this node + _type: Optional[type] = None # | int | bytes def __init__(self, compile_ctx, *args): self.args = args @@ -308,8 +310,8 @@ def mkargname(i): def _compile(self, *args): opcode = hex(self.opcode_info.opcode) for arg in reversed(args): - # TODO figure out the type to avoid calling _to_int - self.builder.append(f"CTX.computation.stack_push_int(_to_int({arg}))") + # TODO figure out the type to avoid calling to_int + self.builder.append(f"CTX.computation.stack_push_int(to_int({arg}))") self.builder.extend( f""" @@ -330,18 +332,6 @@ def executor(cls): return cls -def _to_int(stack_item: StackItem) -> int: - if isinstance(stack_item, int): - return stack_item - return int.from_bytes(stack_item, "big") - - -def _to_bytes(stack_item: StackItem) -> bytes: - if isinstance(stack_item, bytes): - return stack_item - return stack_item.to_bytes(32, "big") - - def _wrap256(x): return x % 2**256 @@ -396,7 +386,7 @@ class MStore(IRExecutor): _name = "mstore" _sig = (int, int) - def _compile(self, val, ptr): + def _compile(self, ptr, val): self.builder.extend( f""" CTX.computation._memory.extend({ptr}, 32) From def2bd3e1d66870e42a429f7db50368a352aada7 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 19 Jun 2023 21:34:27 -0700 Subject: [PATCH 013/122] speed up FastMem.write_word --- boa/vm/fast_mem.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/boa/vm/fast_mem.py b/boa/vm/fast_mem.py index 192d498c..ac88a6e7 100644 --- a/boa/vm/fast_mem.py +++ b/boa/vm/fast_mem.py @@ -25,7 +25,7 @@ def __init__(self): # words which are in the cache but have not been written # to the backing bytes - self.needs_writeback = set() + self.needs_writeback = [] super().__init__() @@ -36,6 +36,7 @@ def extend(self, start_position, size_bytes): new_size = (start_position + size_bytes + 31) // 32 if (size_difference := new_size - len(self.mem_cache)) > 0: self.mem_cache.extend([self._DIRTY] * size_difference) + self.needs_writeback.extend([False] * size_difference) super().extend(start_position, size_bytes) def read_word(self, start_position): @@ -51,9 +52,9 @@ def read_bytes(self, start_position, size): start = start_position // 32 end = ceil32(start_position + size) // 32 for ix in range(start, end): - if ix in self.needs_writeback: + if self.needs_writeback[ix]: super().write(ix * 32, 32, to_bytes(self.mem_cache[ix])) - self.needs_writeback.remove(ix) + self.needs_writeback[ix] = False return super().read_bytes(start_position, size) @@ -61,7 +62,7 @@ def write_word(self, start_position, int_val): if start_position % 32 == 0: self.mem_cache[start_position // 32] = int_val - self.needs_writeback.add(start_position // 32) + self.needs_writeback[start_position // 32] = True # bypass cache dirtying # super().write(start_position, 32, to_bytes(int_val)) From 2e88368f079b2ccb7f11ac9deedcfcc8702b7493 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 19 Jun 2023 22:12:35 -0700 Subject: [PATCH 014/122] move stack item type inference into analysis phase --- boa/vyper/ir_compiler.py | 75 +++++++++++++++++++++++----------------- 1 file changed, 44 insertions(+), 31 deletions(-) diff --git a/boa/vyper/ir_compiler.py b/boa/vyper/ir_compiler.py index 33976e87..1044f025 100644 --- a/boa/vyper/ir_compiler.py +++ b/boa/vyper/ir_compiler.py @@ -15,11 +15,6 @@ from boa.vm.utils import to_bytes, to_int -def debug(*args, **kwargs): - print(*args, **kwargs) - - - @dataclass class _Line: indentation_level: int @@ -60,6 +55,7 @@ def block(self, entry): class FrameInfo: current_slot: int = 0 # basically the de bruijn index slots: dict[str, int] = field(default_factory=lambda: {}) + types: dict[str, type] = field(default_factory=lambda: {}) _global_id = 0 @@ -174,11 +170,8 @@ def compile(self, out=None, out_typ=None): assert out is None, (type(self), self, out, argnames) return - #print("ENTER", type(self), self, out, argnames, res) - res_typ, res = res - if out is not None: - if res_typ != out_typ: + if self._type != out_typ: res = f"{mapper[out_typ]}({res})" self.builder.append(f"{out} = {res}") else: @@ -212,6 +205,7 @@ def exec(self, execution_ctx): class IntExecutor(IRExecutor): compile_ctx: CompileContext _int_value: int + _type: type = int def __post_init__(self): assert 0 <= self._int_value < 2**256 @@ -224,7 +218,7 @@ def analyze(self): return self def _compile(self): - return int, repr(self) + return repr(self) @dataclass @@ -232,6 +226,10 @@ class StringExecutor(IRExecutor): compile_ctx: CompileContext _str_value: str + @property + def _type(self): + raise RuntimeError("should have been analyzed!") + def __post_init__(self): self.args = self._sig = () @@ -249,6 +247,10 @@ class VariableExecutor(IRExecutor): varname: str var_slot: int + # optimization assumption: most variables that + # will be hotspots need to be ints. + _type: type = int + def __post_init__(self): self.args = self._sig = () @@ -264,7 +266,7 @@ def out_name(self): return ret def _compile(self): - return StackItem, self.out_name # XXX: figure out type + return self.out_name @dataclass class OpcodeInfo: @@ -288,6 +290,7 @@ def from_opcode_info(cls, mnemonic, opcode_info): # an executor for evm opcodes which dispatches into py-evm class OpcodeIRExecutor(IRExecutor): + _type: type = StackItem def __init__(self, name, opcode_info, *args): self.opcode_info: OpcodeInfo = opcode_info @@ -298,7 +301,8 @@ def __init__(self, name, opcode_info, *args): @cached_property def _sig(self): - return tuple(StackItem for _ in range(self.opcode_info.consumes)) + # TODO figure out the type to avoid calling to_int + return tuple(int for _ in range(self.opcode_info.consumes)) @cached_property def _argnames(self): @@ -310,8 +314,7 @@ def mkargname(i): def _compile(self, *args): opcode = hex(self.opcode_info.opcode) for arg in reversed(args): - # TODO figure out the type to avoid calling to_int - self.builder.append(f"CTX.computation.stack_push_int(to_int({arg}))") + self.builder.append(f"CTX.computation.stack_push_int({arg})") self.builder.extend( f""" @@ -320,7 +323,7 @@ def _compile(self, *args): """ ) if self.opcode_info.produces: - return StackItem, "CTX.computation.stack_pop1_any()" + return "CTX.computation.stack_pop1_any()" _executors = {} @@ -343,14 +346,14 @@ def _as_signed(x): class UnsignedBinopExecutor(IRExecutor): __slots__ = ("_name", "_op") _sig = int, int - _out_type = int + _type: type = int @cached_property def funcname(self): return self._op.__module__ + "." + self._op.__name__ def _compile(self, x, y): - return int, f"_wrap256({self.funcname}({x}, {y}))" + return f"_wrap256({self.funcname}({x}, {y}))" class SignedBinopExecutor(UnsignedBinopExecutor): @@ -361,7 +364,7 @@ def _compile(self, x, y): y = _as_signed({y}, 256, strict=True)) """ ) - return int, f"_wrap256({self._funcname}(x, y))" + return f"_wrap256({self._funcname}(x, y))" # for binops, just use routines from vyper optimizer @@ -375,10 +378,11 @@ def _compile(self, x, y): class MLoad(IRExecutor): _name = "mload" _sig = (int,) + _type: type = int def _compile(self, ptr): self.builder.append(f"CTX.computation._memory.extend({ptr}, 32)") - return int, f"CTX.computation._memory.read_word({ptr})" + return f"CTX.computation._memory.read_word({ptr})" @executor @@ -399,18 +403,19 @@ def _compile(self, ptr, val): class Ceil32(IRExecutor): _name = "ceil32" _sig = (int,) + _type: type = int def _compile(self, x): - return int, f"({x} + 31) & 31" + return f"({x} + 31) & 31" @executor class IsZero(IRExecutor): _name = "iszero" _sig = (int,) + _type: type = int def _compile(self, x): - return int, f"{x} == 0" - + return f"({x} == 0)" # @executor @@ -462,9 +467,9 @@ class Repeat(IRExecutor): def compile(self, out=None): i_var, start, rounds, rounds_bound, body = self.args - start.compile("start", int) - rounds.compile("rounds", int) - rounds_bound.compile("rounds_bound", int) + start.compile("start", out_typ=int) + rounds.compile("rounds", out_typ=int) + rounds_bound.compile("rounds_bound", out_typ=int) end = "start + rounds" self.builder.append(f"assert rounds <= rounds_bound") @@ -548,9 +553,14 @@ def analyze(self): def _argnames(self): return self.compile_ctx.labels[self.label].analyzed_param_names + @cached_property + def _type(self): + return self.compile_ctx.labels[self.label]._type + @cached_property def _sig(self): - return tuple(StackItem for _ in self._argnames) + # optimization assumption: they all need to be ints + return tuple(int for _ in self._argnames) def _compile(self, *args): label = self.label @@ -565,8 +575,7 @@ def _compile(self, *args): assert len(argnames) == len(self.args) args_str = ", ".join(["CTX"] + argnames) - # XXX: figure out type - return StackItem, f"{label}({args_str})" + return f"{label}({args_str})" @executor @@ -604,13 +613,14 @@ def analyze(self): self.var_list = self.var_list.analyze() self.body = self.body.analyze() + self._type = self.body._type + return self def compile(self, **kwargs): pass def compile_func(self): - print(self.var_list) params_str = ", ".join(["CTX"] + self.analyzed_param_names) with self.builder.block(f"def {self.labelname}({params_str})"): self.body.compile() @@ -632,12 +642,15 @@ def analyze(self): self.args = (variable, val, body) + self._type = body._type + return self def compile(self, out=None, out_typ=None): variable, val, body = self.args - # TODO: infer val typ - val.compile(out=variable.out_name, out_typ=StackItem) + # optimization assumption: most variables that + # will be hotspots need to be ints. + val.compile(out=variable.out_name, out_typ=int) return body.compile(out=out, out_typ=out_typ) From 57c1a724dca6ff7b04caf51a876cad513594a383 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 19 Jun 2023 22:46:38 -0700 Subject: [PATCH 015/122] debugging --- boa/environment.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/boa/environment.py b/boa/environment.py index 0eb23041..0eb83a73 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -323,7 +323,7 @@ def apply_computation(cls, state, msg, tx_ctx): addr = msg.code_address contract = cls.env.lookup_contract(addr) if addr else None if contract is None or cls.env._speed == _SLOW: - print("SLOW MODE") + #print("REGULAR MODE") return super().apply_computation(state, msg, tx_ctx) err = None @@ -332,10 +332,10 @@ def apply_computation(cls, state, msg, tx_ctx): eval_ctx = EvalContext(contract.ir_executor, computation) try: if cls.env._speed == _FAST: - print("FAST MODE") + #print("FAST MODE") eval_ctx.run() else: # LUDICROUS - print("LUDICROUS SPEED") + #print("LUDICROUS SPEED") contract.ir_compiler.exec(eval_ctx) except (Halt, VMError): pass From ea2fa27154abc82a698c14b7b812d7407ba4f3bd Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 20 Jun 2023 09:50:19 -0700 Subject: [PATCH 016/122] remove old IR interpreter --- boa/vyper/ir_executor.py | 579 --------------------------------------- 1 file changed, 579 deletions(-) delete mode 100644 boa/vyper/ir_executor.py diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py deleted file mode 100644 index 9d696ffa..00000000 --- a/boa/vyper/ir_executor.py +++ /dev/null @@ -1,579 +0,0 @@ -import contextlib -import re -from dataclasses import dataclass, field -from functools import cached_property -from typing import Any, Optional - -import vyper.ir.optimizer -from eth.exceptions import Revert -from vyper.evm.opcodes import OPCODES -from vyper.utils import unsigned_to_signed - -from boa.vm.fast_mem import FastMem -from boa.vm.utils import to_bytes, to_int, ceil32 - - -def debug(*args, **kwargs): - print(*args, **kwargs) - - -@dataclass -class OpcodeInfo: - # model of an opcode from vyper.evm.opcodes - opcode: int # opcode number ex. 0x01 for ADD - consumes: int # number of stack items this consumes - produces: int # number of stack items this produces, must be 0 or 1 - _gas_estimate: int # in vyper.evm.opcodes but not useful - - def __post_init__(self): - assert self.produces in (0, 1) - - @classmethod - def from_opcode_info(cls, opcode_info): - # info from vyper.evm.opcodes - opcode, consumes, produces, gas_estimate = opcode_info - return cls(opcode, consumes, produces, gas_estimate) - - -@dataclass(slots=True) -class EvalContext: - ir_executor: "IRBaseExecutor" - computation: Any # ComputationAPI - call_frames: list[list[Any]] = field(default_factory=list) - - def __post_init__(self): - self.computation._memory = FastMem() - - def run(self): - try: - self._allocate_local_frame([], self.ir_executor._max_var_height) - self.ir_executor.eval(self) - return self.computation - finally: - # clear all state - self.call_frames = [] - - @property - def local_vars(self): - return self.call_frames[-1] - - def _allocate_local_frame(self, arglist, max_var_height): - # pre-allocate variable slots so we don't waste time with append/pop. - - required_dummies = max_var_height + 1 - len(arglist) - - frame_vars = list(arglist) - - # a sentinel which will cause an exception if somebody tries to use it by accident - dummy = "uh oh!" - frame_vars.extend([dummy] * required_dummies) - - self.call_frames.append(frame_vars) - - @contextlib.contextmanager - def allocate_local_frame(self, arglist, max_var_height): - self._allocate_local_frame(arglist, max_var_height) - yield - self.call_frames.pop() - - def goto(self, compile_ctx, label, arglist): - # special case to handle how vyper returns from subroutines - if label == "returnpc": - return - - compile_ctx.labels[label].execute_subroutine(self, *arglist) - - -@dataclass -class FrameInfo: - current_slot: int = 0 # basically the de bruijn index - slots: dict[str, int] = field(default_factory=lambda: {}) - - # record the largest slot we see, so we know how many local vars to allocate - max_slot: int = 0 - - -@dataclass -class CompileContext: - labels: dict[str, "IRBaseExecutor"] - frames: list[FrameInfo] = field(default_factory=lambda: [FrameInfo()]) - - @property - def local_vars(self): - return self.frames[-1].slots - - @contextlib.contextmanager - def allocate_local_frame(self): - frame = FrameInfo() - self.frames.append(frame) - yield frame - self.frames.pop() - - @contextlib.contextmanager - def variables(self, vars_list): - # allocate variables in vars_list, assigning them each - # a new slot. - shadowed = {} - frame = self.frames[-1] - for varname in vars_list: - shadowed[varname] = frame.slots.get(varname) - frame.slots[varname] = frame.current_slot - frame.current_slot += 1 - frame.max_slot = max(frame.max_slot, frame.current_slot) - - yield - - for varname in vars_list: - frame.current_slot -= 1 - if shadowed[varname] is None: - del frame.slots[varname] - else: - frame.slots[varname] = shadowed[varname] - - -class IRBaseExecutor: - __slots__ = ("args", "compile_ctx") - - def __init__(self, compile_ctx, *args): - self.args = args - self.compile_ctx = compile_ctx - - @cached_property - def name(self): - return self._name - - def __repr__(self): - ret = self.name + "(" - - def show(s): - return hex(s) if isinstance(s, int) else repr(s) - - arg_reprs = [show(arg) for arg in self.args] - arg_reprs = [x.replace("\n", "\n ") for x in arg_reprs] - ret += ",\n ".join(arg_reprs) - ret += ")" - - has_inner_newlines = any("\n" in t for t in arg_reprs) - one_line_output = re.sub(r",\n *", ", ", ret).replace("\n", "") - - should_one_line = len(one_line_output) < 80 and not has_inner_newlines - - if should_one_line: - return one_line_output - else: - return ret - - def eval(self, context): - # debug("ENTER", self.name) - args = self._eval_args(context) - return self._impl(context, *args) - - def _eval_args(self, context): - ret = [arg.eval(context) for arg in reversed(self.args)] - ret.reverse() - return ret - - def analyze(self): - self.args = [arg.analyze() for arg in self.args] - return self - - -@dataclass(slots=True) -class IntExecutor: - _int_value: int - - def __repr__(self): - return repr(self._int_value) - - def eval(self, context): - return self._int_value - - def analyze(self): - return self - - -@dataclass(slots=True) -class StringExecutor: - _str_value: str - compile_ctx: CompileContext - - def __repr__(self): - return repr(self._str_value) - - def analyze(self): - slot = self.compile_ctx.local_vars[self._str_value] - return VariableExecutor(self._str_value, slot) - - -# an IR executor for evm opcodes which dispatches into py-evm -class OpcodeIRExecutor(IRBaseExecutor): - def __init__(self, name, opcode_impl, opcode_info, *args): - self.opcode_impl = opcode_impl # py-evm OpcodeAPI - self.opcode_info: OpcodeInfo = opcode_info # info from vyper.evm.opcodes - self._name = "__" + name + "__" # to differentiate from implemented codes - super().__init__(*args) - - @cached_property - def produces(self): - return self.opcode_info.produces - - def eval(self, context): - # debug("ENTER", self.name) - computation = context.computation - for arg0 in reversed(self.args): - arg = arg0.eval(context) - if isinstance(arg, int): - computation.stack_push_int(arg) - elif isinstance(arg, bytes): - computation.stack_push_bytes(arg) - else: - raise RuntimeError(f"Not a stack item. {type(arg)} {arg}") - - self.opcode_impl.__call__(computation) - - if self.produces: - return computation.stack_pop1_any() - - -_executors = {} - - -# decorator to register an executor class in the _executors dict. -def executor(cls): - _executors[cls._name] = cls - return cls - - -StackItem = int | bytes - - -def _wrap256(x): - return x % 2**256 - - -def _as_signed(x): - return unsigned_to_signed(x, 256, strict=True) - - -@dataclass(slots=True) -class VariableExecutor: - varname: str - var_slot: int - - def __repr__(self): - return f"var({self.varname})" - - def eval(self, context): - return context.local_vars[self.var_slot] - - -class IRExecutor(IRBaseExecutor): - _sig = Optional[tuple] - _max_var_height = None - - # a default eval implementation which is not super fast - # but makes it convenient to implement executors. - # for max perf, inline arg casting as in UnsignedBinopExecutor - def eval(self, context): - # debug("ENTER", self.name) - args = self._eval_args(context) - if self.sig_mapper: - assert len(args) == len(self.sig_mapper) - args = (mapper(arg) for (mapper, arg) in zip(self.sig_mapper, args)) - ret = self._impl(context, *args) - # debug(f"({self.name} returning {ret})") - return ret - - @cached_property - def sig_mapper(self): - return tuple(to_int if typ is int else to_bytes for typ in self._sig) - - -class UnsignedBinopExecutor(IRExecutor): - __slots__ = ("_name", "_op") - - def eval(self, context): - # debug("ENTER",self._name,self.args) - x, y = self.args - # note: eval in reverse order. - y = to_int(y.eval(context)) - x = to_int(x.eval(context)) - return _wrap256(self._op(x, y)) - - -class SignedBinopExecutor(UnsignedBinopExecutor): - def eval(self, context): - x, y = self.args - # note: eval in reverse order. - y = unsigned_to_signed(to_int(y.eval(context), 256, strict=True)) - x = unsigned_to_signed(to_int(x.eval(context), 256, strict=True)) - return _wrap256(self._op(x, y)) - - -# for binops, just use routines from vyper optimizer -for opname, (op, _, unsigned) in vyper.ir.optimizer.arith.items(): - base = UnsignedBinopExecutor if unsigned else SignedBinopExecutor - nickname = opname.capitalize() - _executors[opname] = type(nickname, (base,), {"_op": op, "_name": opname}) - - -@executor -class MLoad(IRExecutor): - _name = "mload" - - def eval(self, context): - # perf hotspot. - ptr = to_int(self.args[0].eval(context)) - context.computation._memory.extend(ptr, 32) - # return context.computation._memory.read_bytes(ptr, 32) - return context.computation._memory.read_word(ptr) - - -@executor -class MStore(IRExecutor): - _name = "mstore" - - def eval(self, context): - # perf hotspot. - val = to_int(self.args[1].eval(context)) - ptr = to_int(self.args[0].eval(context)) - context.computation._memory.extend(ptr, 32) - # context.computation._memory.write(ptr, 32, val) - context.computation._memory.write_word(ptr, val) - - -@executor -class Ceil32(IRExecutor): - _name = "ceil32" - _sig = (int,) - - def _impl(self, context, x): - return ceil32(x) - - -# @executor -class DLoad(IRExecutor): - _name = "dload" - _sig = (int,) - - def _impl(self, context, ptr): - raise RuntimeError("unimplemented") - - -# @executor -class DLoadBytes(IRExecutor): - _name = "dloadbytes" - sig = (int, int, int) - - def _impl(self, context, dst, src, size): - raise RuntimeError("unimplemented") - - -@executor -class Pass(IRExecutor): - _name = "pass" - - def eval(self, context): - pass - - -@executor -class Seq(IRExecutor): - _name = "seq" - - def eval(self, context): - lastval = None - for arg in self.args: - lastval = arg.eval(context) - - return lastval - - -@executor -class Repeat(IRExecutor): - _name = "repeat" - - def eval(self, context): - # debug("ENTER", self.name) - i_var, start, rounds, rounds_bound, body = self.args - - start = start.eval(context) - rounds = rounds.eval(context) - assert rounds <= rounds_bound._int_value - - for i in range(start, start + rounds): - context.local_vars[i_var.var_slot] = i - body.eval(context) - - def analyze(self): - i_name, start, rounds, rounds_bound, body = self.args - - # analyze start and rounds before shadowing. - start = start.analyze() - rounds = rounds.analyze() - - with self.compile_ctx.variables([i_name._str_value]): - i_var = i_name.analyze() - body = body.analyze() - self.args = i_var, start, rounds, rounds_bound, body - return self - - -@executor -class If(IRExecutor): - _name = "if" - - # override `eval()` so we can get the correct lazy behavior - def eval(self, context): - # debug("ENTER", self.name) - try: - test, body, orelse = self.args - except ValueError: - test, body = self.args - orelse = None - - test = to_int(test.eval(context)) - if bool(test): - return body.eval(context) - - elif orelse is not None: - return orelse.eval(context) - - return - - -@executor -class Assert(IRExecutor): - _name = "assert" - _sig = (int,) - - def _impl(self, context, test): - if not bool(test): - context.computation.output = b"" - raise Revert(b"") - - -@executor -class VarList(IRExecutor): - _name = "var_list" - - -@executor -class Goto(IRExecutor): - _name = "goto" - - @cached_property - # figure out the label to jump to, works for both goto and exit_to - # (why does vyper generate them differently? XXX fix in vyper) - def label(self): - ret = self.args[0]._str_value - if ret.startswith("_sym_"): - ret = ret[len("_sym_") :] - return ret - - def analyze(self): - for arg in self.args[1:]: - arg = arg.analyze() - return self - - def eval(self, context): - # debug("ENTER", self.name) - args = reversed([arg.eval(context) for arg in reversed(self.args[1:])]) - context.goto(self.compile_ctx, self.label, args) - - -@executor -class ExitTo(Goto): - # exit_to and goto have pretty much the same semantics as far as we - # are concerned here. - _name = "exit_to" - - -@executor -class Label(IRExecutor): - _name = "label" - - def __init__(self, compile_ctx, name, var_list, body): - self.compile_ctx = compile_ctx - self.var_list = var_list.args - self.body = body - self.labelname = name - - self.args = (name, var_list, body) - - compile_ctx.labels[name._str_value] = self - - def analyze(self): - with self.compile_ctx.allocate_local_frame() as frame_info: - var_list = [var._str_value for var in self.var_list] - with self.compile_ctx.variables(var_list): - self.body = self.body.analyze() - - # grab max slot after analysis - self._max_var_height = frame_info.max_slot - - return self - - def eval(self, context): - raise RuntimeError("labels should only be jumped into!") - - def execute_subroutine(self, context, *args): - # assert len(args) == len(self.var_list), (list(args), self.var_list) - with context.allocate_local_frame(args, self._max_var_height): - self.body.eval(context) - - -@executor -class With(IRExecutor): - _name = "with" - - # accessing local vars is a hotspot, so we translate varnames - # to slots at compile time (something like de-bruijn index) to - # save some dictionary accesses. - def analyze(self): - varname = self.args[0]._str_value - val = self.args[1].analyze() # analyze before shadowing - - with self.compile_ctx.variables([varname]): - variable = self.args[0].analyze() - body = self.args[2].analyze() - - self.args = (variable, val, body) - - return self - - def eval(self, context): - variable, val, body = self.args - - val = val.eval(context) - context.local_vars[variable.var_slot] = val - ret = body.eval(context) - - return ret - - -def executor_from_ir(ir_node, opcode_impls: dict[int, Any]) -> Any: - ret = _executor_from_ir(ir_node, opcode_impls, CompileContext({})) - ret = ret.analyze() - ret._max_var_height = ret.compile_ctx.frames[0].max_slot - return ret - - -def _executor_from_ir(ir_node, opcode_impls, compile_ctx) -> Any: - instr = ir_node.value - if isinstance(instr, int): - return IntExecutor(instr) - - args = [_executor_from_ir(arg, opcode_impls, compile_ctx) for arg in ir_node.args] - - if instr in _executors: - return _executors[instr](compile_ctx, *args) - - if instr.upper() in OPCODES: - opcode_info = OpcodeInfo.from_opcode_info(OPCODES[instr.upper()]) - opcode_impl = opcode_impls[opcode_info.opcode] - return OpcodeIRExecutor(instr, opcode_impl, opcode_info, compile_ctx, *args) - - assert len(ir_node.args) == 0, ir_node - assert isinstance(ir_node.value, str) - return StringExecutor(instr, compile_ctx) From 7b25424dc33de10ed2f6a6cf389ecd9ad5926be1 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 20 Jun 2023 15:18:30 -0700 Subject: [PATCH 017/122] get erc20 working --- boa/environment.py | 27 +- boa/vm/fast_mem.py | 6 +- boa/vm/utils.py | 6 +- boa/vyper/contract.py | 12 +- boa/vyper/{ir_compiler.py => ir_executor.py} | 315 ++++++++++++++----- 5 files changed, 253 insertions(+), 113 deletions(-) rename boa/vyper/{ir_compiler.py => ir_executor.py} (68%) diff --git a/boa/environment.py b/boa/environment.py index 0eb83a73..092ea365 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -4,18 +4,18 @@ import contextlib import logging import sys +import traceback import warnings from typing import Any, Iterator, Optional, Tuple, Union -import traceback import eth.constants as constants import eth.tools.builder.chain as chain import eth.vm.forks.spurious_dragon.computation as spurious_dragon from eth._utils.address import generate_contract_address -from eth.exceptions import Halt, VMError from eth.chains.mainnet import MainnetChain from eth.codecs import abi from eth.db.atomic import AtomicDB +from eth.exceptions import Halt, VMError from eth.vm.code_stream import CodeStream from eth.vm.message import Message from eth.vm.opcode_values import STOP @@ -24,9 +24,9 @@ from eth_utils import setup_DEBUG2_logging, to_canonical_address, to_checksum_address from boa.util.eip1167 import extract_eip1167_address, is_eip1167_contract +from boa.vm.fast_mem import FastMem from boa.vm.fork import AccountDBFork from boa.vm.gas_meters import GasMeter, NoGasMeter, ProfilingGasMeter -from boa.vyper.ir_executor import EvalContext def enable_pyevm_verbose_logging(): @@ -264,8 +264,8 @@ def __call__(self, computation): # py-evm uses class instantiaters which need to be classes # instead of like factories or other easier to use architectures - -# `computation_template` is a class which can be constructed dynamically -class computation_template: +# `titanoboa_computation` is a class which can be constructed dynamically +class titanoboa_computation: _gas_meter_class = GasMeter def __init__(self, *args, **kwargs): @@ -323,30 +323,23 @@ def apply_computation(cls, state, msg, tx_ctx): addr = msg.code_address contract = cls.env.lookup_contract(addr) if addr else None if contract is None or cls.env._speed == _SLOW: - #print("REGULAR MODE") + # print("REGULAR MODE") return super().apply_computation(state, msg, tx_ctx) err = None with cls(state, msg, tx_ctx) as computation: - # print(contract.ir_executor) - eval_ctx = EvalContext(contract.ir_executor, computation) try: - if cls.env._speed == _FAST: - #print("FAST MODE") - eval_ctx.run() - else: # LUDICROUS - #print("LUDICROUS SPEED") - contract.ir_compiler.exec(eval_ctx) + # print("LUDICROUS MODE") + contract.ir_executor.exec(computation) except (Halt, VMError): pass except Exception as e: # grab the exception to raise later - # unclear why this is getting swallowed by py-evm. - # print(e) + # print(e) err = e if err is not None: - # if err is not None: raise err return computation @@ -391,7 +384,7 @@ def _init_vm(self, reset_traces=True): c = type( "TitanoboaComputation", - (computation_template, self.vm.state.computation_class), + (titanoboa_computation, self.vm.state.computation_class), {"env": self}, ) diff --git a/boa/vm/fast_mem.py b/boa/vm/fast_mem.py index ac88a6e7..a57c7db8 100644 --- a/boa/vm/fast_mem.py +++ b/boa/vm/fast_mem.py @@ -1,5 +1,3 @@ -from eth.vm.memory import Memory - import contextlib import re from dataclasses import dataclass, field @@ -8,9 +6,11 @@ import vyper.ir.optimizer from eth.exceptions import Revert +from eth.vm.memory import Memory from vyper.evm.opcodes import OPCODES from vyper.utils import unsigned_to_signed -from boa.vm.utils import to_bytes, to_int, ceil32 + +from boa.vm.utils import ceil32, to_bytes, to_int # a py-evm eth.vm.Memory compatible implementation of memory. diff --git a/boa/vm/utils.py b/boa/vm/utils.py index 02c51e2c..a0994fb3 100644 --- a/boa/vm/utils.py +++ b/boa/vm/utils.py @@ -1,14 +1,14 @@ def ceil32(x): return (x + 31) & ~31 + def to_int(stack_item) -> int: if isinstance(stack_item, int): return stack_item return int.from_bytes(stack_item, "big") -def to_bytes(stack_item) -> bytes: + +def to_bytes(stack_item) -> bytes: if isinstance(stack_item, bytes): return stack_item return stack_item.to_bytes(32, "big") - - diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index ae45a97a..e295185d 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -47,7 +47,6 @@ from boa.vyper.decoder_utils import ByteAddressableStorage, decode_vyper_object from boa.vyper.event import Event, RawEvent from boa.vyper.ir_executor import executor_from_ir -from boa.vyper.ir_compiler import executor_from_ir as compiler_from_ir # error messages for external calls EXTERNAL_CALL_ERRORS = ("external call failed", "returndatasize too small") @@ -782,14 +781,7 @@ def unoptimized_bytecode(self): @cached_property def ir_executor(self): ir = self.compiler_data.ir_runtime - opcode_impls = self.env.vm.state.computation_class.opcodes - return executor_from_ir(ir, opcode_impls) - - @cached_property - def ir_compiler(self): - ir = self.compiler_data.ir_runtime - return compiler_from_ir(ir, self.compiler_data.contract_name) - + return executor_from_ir(ir, self.compiler_data) @contextlib.contextmanager def _anchor_source_map(self, source_map): @@ -870,7 +862,6 @@ def func_t(self): @cached_property def ir(self): - # patch compiler_data to have IR for every function global_ctx = self.contract.global_ctx ir = generate_ir_for_function(self.fn_ast, global_ctx, False) @@ -928,6 +919,7 @@ def _prepare_calldata(self, *args, **kwargs): # sig_kwargs = self.func_t.default_args[: len(kwargs)] total_non_base_args = len(kwargs) + len(args) - n_pos_args + # allow things with `.address` to be encode-able args = [getattr(arg, "address", arg) for arg in args] diff --git a/boa/vyper/ir_compiler.py b/boa/vyper/ir_executor.py similarity index 68% rename from boa/vyper/ir_compiler.py rename to boa/vyper/ir_executor.py index 1044f025..8f114d72 100644 --- a/boa/vyper/ir_compiler.py +++ b/boa/vyper/ir_executor.py @@ -3,13 +3,14 @@ import textwrap from dataclasses import dataclass, field from functools import cached_property -from typing import Any, Optional from pathlib import PurePath +from typing import Any, Optional import vyper.ir.optimizer from eth.exceptions import Revert +from vyper.compiler.phases import CompilerData from vyper.evm.opcodes import OPCODES -from vyper.utils import unsigned_to_signed, mkalphanum +from vyper.utils import mkalphanum, unsigned_to_signed from boa.vm.fast_mem import FastMem from boa.vm.utils import to_bytes, to_int @@ -40,9 +41,6 @@ def append(self, source_code): def get_output(self): return "\n".join(line.show() for line in self.lines) - def get_code(self, filename): - return compile(self.get_output()) - @contextlib.contextmanager def block(self, entry): self.append(entry + ":") @@ -63,9 +61,11 @@ class FrameInfo: @dataclass class CompileContext: - contract_path: Optional[str] = "" + # include CompilerData - we need this to get immutable section size + vyper_compiler_data: CompilerData uuid: str = field(init=False) labels: dict[str, "IRExecutor"] = field(default_factory=dict) + unique_symbols: set[str] = field(default_factory=set) frames: list[FrameInfo] = field(default_factory=lambda: [FrameInfo()]) builder: PythonBuilder = field(default_factory=PythonBuilder) @@ -75,18 +75,28 @@ def __post_init__(self): self.uuid = str(_global_id) _global_id += 1 - @property def local_vars(self): return self.frames[-1].slots @cached_property def contract_name(self): - return mkalphanum(PurePath(self.contract_path).name) + return mkalphanum(PurePath(self.vyper_compiler_data.contract_name).name) def translate_label(self, label): return f"{label}_{self.contract_name}_{self.uuid}" + def add_unique_symbol(self, symbol): + if symbol in self.unique_symbols: + raise ValueError( + "duplicated symbol {symbol}, this is likely a bug in vyper!" + ) + self.unique_symbols.add(symbol) + + def add_label(self, labelname, executor): + if labelname in self.labels: + raise ValueError("duplicated label: {labelname}") + self.labels[labelname] = executor @contextlib.contextmanager def allocate_local_frame(self): @@ -122,30 +132,40 @@ def variables(self, vars_list): mapper = {int: "to_int", bytes: "to_bytes", StackItem: ""} +class ExecutionContext: + __slots__ = "computation" + + def __init__(self, computation): + self.computation = computation + + class IRExecutor: - __slots__ = ("args", "compile_ctx") + __slots__ = ("args", "compile_ctx", "_exec", "ir_node") # the type produced when executing this node _type: Optional[type] = None # | int | bytes - def __init__(self, compile_ctx, *args): + def __init__(self, ir_node, compile_ctx, *args): + self.ir_node = ir_node self.args = args self.compile_ctx = compile_ctx - self.py_bytecode = None - - def get_output(self): - return self.builder.get_output() @cached_property def name(self): return self._name def _compile_args(self, argnames): - assert len(self.args) == len(argnames) == len(self._sig), (self.args, argnames, self._sig) + assert len(self.args) == len(argnames) == len(self._sig), ( + type(self), + self.args, + argnames, + self._sig, + self.ir_node, + ) for out, arg, typ in reversed(list(zip(argnames, self.args, self._sig))): arg.compile(out=out, out_typ=typ) - @property + @cached_property def builder(self): return self.compile_ctx.builder @@ -164,6 +184,9 @@ def compile(self, out=None, out_typ=None): self._compile_args(argnames) + if hasattr(self, "_name"): + self.builder.append(f"# {self.name}") + res = self._compile(*argnames) if res is None: @@ -171,6 +194,8 @@ def compile(self, out=None, out_typ=None): return if out is not None: + # squash F401 lint complaint about import + _ = to_int, to_bytes if self._type != out_typ: res = f"{mapper[out_typ]}({res})" self.builder.append(f"{out} = {res}") @@ -185,6 +210,7 @@ def compile_main(self, contract_path=""): main_name = self.compile_ctx.translate_label("main") with self.builder.block(f"def {main_name}(CTX)"): + self.builder.append("VM = CTX.computation") self.compile() for func in self.compile_ctx.labels.values(): @@ -196,8 +222,9 @@ def compile_main(self, contract_path=""): self._exec = globals()[main_name] - def exec(self, execution_ctx): - execution_ctx.computation._memory = FastMem() + def exec(self, computation): + computation._memory = FastMem() + execution_ctx = ExecutionContext(computation) self._exec(execution_ctx) @@ -256,7 +283,7 @@ def __post_init__(self): def __repr__(self): return f"var({self.varname})" - + @cached_property def out_name(self): slot = self.var_slot @@ -265,9 +292,13 @@ def out_name(self): ret += f"_{slot}" return ret + def analyze(self): + raise RuntimeError("Should not appear during analysis!") + def _compile(self): return self.out_name + @dataclass class OpcodeInfo: # model of an opcode from vyper.evm.opcodes @@ -287,10 +318,16 @@ def from_opcode_info(cls, mnemonic, opcode_info): opcode, consumes, produces, gas_estimate = opcode_info return cls(mnemonic, opcode, consumes, produces, gas_estimate) + @classmethod + def from_mnemonic(cls, mnemonic): + mnemonic = mnemonic.upper() + return cls.from_opcode_info(mnemonic, OPCODES[mnemonic]) + # an executor for evm opcodes which dispatches into py-evm class OpcodeIRExecutor(IRExecutor): _type: type = StackItem + def __init__(self, name, opcode_info, *args): self.opcode_info: OpcodeInfo = opcode_info @@ -299,6 +336,10 @@ def __init__(self, name, opcode_info, *args): super().__init__(*args) + def __repr__(self): + args = ",".join(repr(arg) for arg in self.args) + return f"{self.name}({args})" + @cached_property def _sig(self): # TODO figure out the type to avoid calling to_int @@ -312,18 +353,13 @@ def mkargname(i): return tuple(mkargname(i) for i in range(self.opcode_info.consumes)) def _compile(self, *args): - opcode = hex(self.opcode_info.opcode) for arg in reversed(args): - self.builder.append(f"CTX.computation.stack_push_int({arg})") + self.builder.append(f"VM.stack_push_int({arg})") - self.builder.extend( - f""" - # {self._name} - CTX.computation.opcodes[{opcode}].__call__(CTX.computation) - """ - ) + opcode = hex(self.opcode_info.opcode) + self.builder.append(f"VM.opcodes[{opcode}].__call__(CTX.computation)") if self.opcode_info.produces: - return "CTX.computation.stack_pop1_any()" + return "VM.stack_pop1_any()" _executors = {} @@ -381,8 +417,8 @@ class MLoad(IRExecutor): _type: type = int def _compile(self, ptr): - self.builder.append(f"CTX.computation._memory.extend({ptr}, 32)") - return f"CTX.computation._memory.read_word({ptr})" + self.builder.append(f"VM._memory.extend({ptr}, 32)") + return f"VM._memory.read_word({ptr})" @executor @@ -393,10 +429,100 @@ class MStore(IRExecutor): def _compile(self, ptr, val): self.builder.extend( f""" - CTX.computation._memory.extend({ptr}, 32) - CTX.computation._memory.write_word({ptr}, {val}) + VM._memory.extend({ptr}, 32) + VM._memory.write_word({ptr}, {val}) + """ + ) + + +class _CodeLoader(IRExecutor): + @cached_property + def runtime_code_size(self): + return self.compiler_data.global_ctx.immutable_section_bytes + + +@executor +class DLoad(_CodeLoader): + _name = "dload" + _sig = (int,) + _type: type = bytes + + def _compile(self, ptr): + self.builder.extend( + f""" + code_start_position = {ptr} + {self.runtime_code_size} + + with VM.code.seek(code_start_position): + ret = VM.code.read(32) + + return ret.ljust(size, b"\x00") + """ + ) + + +@executor +class DLoadBytes(_CodeLoader): + _name = "dloadbytes" + _sig = (int, int, int) + + def _compile(self, dst, src, size): + # adapted from py-evm codecopy, but without gas metering and + # mess with the start position + self.builder.extend( + f""" + code_start_position = {src} + {self.runtime_code_size} + VM.extend_memory({dst}, {size}) + + with VM.code.seek(code_start_position): + code_bytes = VM.code.read({size}) + + padded_code_bytes = code_bytes.ljust(size, b"\x00") + + VM.memory_write({dst}, {size}, padded_code_bytes) + """ + ) + + +# we call into py-evm for sha3_32 and sha3_64 to allow tracing to still work +class _Sha3_N(IRExecutor): + _type: type = bytes + + @cached_property + def _argnames(self): + return tuple(f"{self.name}.arg{i}" for i in range(len(self._sig))) + + def _compile(self, *args): + assert self.N > 0 and self.N % 32 == 0 + opcode_info = OpcodeInfo.from_mnemonic("SHA3") + self.builder.append(f"VM.extend_memory(0, {self.N})") + for i, val in enumerate(args): + self.builder.append(f"VM.memory_write({i*32}, 32, {val})") + + sha3 = hex(opcode_info.opcode) + self.builder.extend( + f""" + VM.stack_push_int({self.N}) + VM.stack_push_int(0) + VM.opcodes[{sha3}].__call__(VM) """ ) + return "VM.stack_pop1_any()" + + +@executor +class Sha3_64(_Sha3_N): + _name = "sha3_64" + _sig = (bytes, bytes) + _argnames = ("sha3_64_arg0", "sha3_64_arg1") + N = 64 + + +@executor +class Sha3_32(_Sha3_N): + _name = "sha3_32" + _sig = (bytes,) + _argnames = ("sha3_32_arg0",) + N = 32 @executor @@ -408,6 +534,7 @@ class Ceil32(IRExecutor): def _compile(self, x): return f"({x} + 31) & 31" + @executor class IsZero(IRExecutor): _name = "iszero" @@ -418,23 +545,6 @@ def _compile(self, x): return f"({x} == 0)" -# @executor -class DLoad(IRExecutor): - _name = "dload" - _sig = (int,) - - def _impl(self, context, ptr): - raise RuntimeError("unimplemented") - - -# @executor -class DLoadBytes(IRExecutor): - _name = "dloadbytes" - - def _impl(self, context, dst, src, size): - raise RuntimeError("unimplemented") - - @executor class Pass(IRExecutor): _name = "pass" @@ -472,7 +582,7 @@ def compile(self, out=None): rounds_bound.compile("rounds_bound", out_typ=int) end = "start + rounds" - self.builder.append(f"assert rounds <= rounds_bound") + self.builder.append("assert rounds <= rounds_bound") with self.builder.block(f"for {i_var.out_name} in range(start, {end})"): body.compile() @@ -522,7 +632,7 @@ def _compile(self, test): self.builder.extend( """ if not bool(test): - CTX.computation.output = b"" + VM.output = b"" raise Revert(b"") """ ) @@ -539,22 +649,46 @@ class Goto(IRExecutor): def analyze(self): self.label = self.args[0]._str_value + # exit_to labels weird, fixed in GH vyper#3488 if self.label.startswith("_sym_"): - self.label = self.label[len("_sym_"):] + self.label = self.label[len("_sym_") :] + # just get the parameters, leaving the label in self.args + # messes with downstream machinery which tries to analyze the label. + runtime_args = [] for arg in self.args[1:]: - arg = arg.analyze() + if isinstance(arg, StringExecutor): + argval = arg._str_value + # GH vyper#3488 + if argval == "return_pc": + continue + # calling convention wants to push the return pc since evm + # has no subroutines, we are using python function call + # machinery so we don't need to worry about that. + if argval.startswith("_sym_"): + continue + + runtime_args.append(arg.analyze()) - self.args = self.args[1:] + self.args = runtime_args return self + @cached_property + def is_return_stmt(self): + # i.e. we are exiting a subroutine + return self.label == "return_pc" + @cached_property def _argnames(self): + if self.is_return_stmt: + return () return self.compile_ctx.labels[self.label].analyzed_param_names @cached_property def _type(self): + if self.is_return_stmt: + return None return self.compile_ctx.labels[self.label]._type @cached_property @@ -565,9 +699,8 @@ def _sig(self): def _compile(self, *args): label = self.label - if label == "returnpc": - # i.e. exitsub - assert len(args) == 0 + if self.is_return_stmt: + assert len(self.args) == 0 self.builder.append("return") return @@ -589,31 +722,27 @@ class ExitTo(Goto): class Label(IRExecutor): _name = "label" - def __init__(self, compile_ctx, *args): - self.compile_ctx = compile_ctx + @cached_property + def analyzed_param_names(self): + _, var_list, _ = self.args + return [x.out_name for x in var_list.args if x.varname != "return_pc"] - name, var_list, body = args + def analyze(self): + name, var_list, body = self.args - self.var_list = var_list - self.body = body self.labelname = name._str_value - if name._str_value in compile_ctx.labels: - raise ValueError("duplicated label: {name._str_value}") - compile_ctx.labels[name._str_value] = self - - @cached_property - def analyzed_param_names(self): - return [param.out_name for param in self.var_list.args] + self.compile_ctx.add_label(self.labelname, self) - def analyze(self): with self.compile_ctx.allocate_local_frame(): - params = [param._str_value for param in self.var_list.args] + params = [param._str_value for param in var_list.args] with self.compile_ctx.variables(params): - self.var_list = self.var_list.analyze() - self.body = self.body.analyze() + var_list = var_list.analyze() + body = body.analyze() - self._type = self.body._type + self.args = name, var_list, body + + self._type = body._type return self @@ -621,9 +750,26 @@ def compile(self, **kwargs): pass def compile_func(self): + _, _, body = self.args params_str = ", ".join(["CTX"] + self.analyzed_param_names) with self.builder.block(f"def {self.labelname}({params_str})"): - self.body.compile() + self.builder.append("VM = CTX.computation") + body.compile() + + +@executor +class UniqueSymbol(IRExecutor): + _name = "unique_symbol" + + def analyze(self): + # we don't really need to do this analysis since vyper should + # have done it already, but doesn't hurt to be a little paranoid + symbol = self.args[0]._str_value + self.compile_ctx.add_unique_symbol(symbol) + return self + + def compile(self, **kwargs): + pass @executor @@ -654,8 +800,17 @@ def compile(self, out=None, out_typ=None): return body.compile(out=out, out_typ=out_typ) -def executor_from_ir(ir_node, contract_path = "") -> Any: - ret = _executor_from_ir(ir_node, CompileContext(contract_path)) +@executor +class Set(IRExecutor): + _name = "set" + + def compile(self, **kwargs): + variable, val = self.args + val.compile(out=variable.out_name, out_typ=int) + + +def executor_from_ir(ir_node, vyper_compiler_data) -> Any: + ret = _executor_from_ir(ir_node, CompileContext(vyper_compiler_data)) ret = ret.analyze() ret.compile_main() @@ -670,11 +825,11 @@ def _executor_from_ir(ir_node, compile_ctx) -> Any: args = [_executor_from_ir(arg, compile_ctx) for arg in ir_node.args] if instr in _executors: - return _executors[instr](compile_ctx, *args) + return _executors[instr](ir_node, compile_ctx, *args) if (mnemonic := instr.upper()) in OPCODES: - opcode_info = OpcodeInfo.from_opcode_info(mnemonic, OPCODES[mnemonic]) - return OpcodeIRExecutor(instr, opcode_info, compile_ctx, *args) + opcode_info = OpcodeInfo.from_mnemonic(mnemonic) + return OpcodeIRExecutor(instr, opcode_info, ir_node, compile_ctx, *args) assert len(ir_node.args) == 0, ir_node assert isinstance(ir_node.value, str) From e613a965f2973449bf28a01b3262b9a8ca7f5dc3 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 20 Jun 2023 16:01:42 -0700 Subject: [PATCH 018/122] get dload/dloadbytes to work --- boa/environment.py | 2 +- boa/vyper/ir_executor.py | 18 +++++++++++------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/boa/environment.py b/boa/environment.py index 092ea365..0aff4810 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -331,7 +331,7 @@ def apply_computation(cls, state, msg, tx_ctx): try: # print("LUDICROUS MODE") contract.ir_executor.exec(computation) - except (Halt, VMError): + except Halt: pass except Exception as e: # grab the exception to raise later - diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index 8f114d72..e79c2b18 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -437,8 +437,9 @@ def _compile(self, ptr, val): class _CodeLoader(IRExecutor): @cached_property - def runtime_code_size(self): - return self.compiler_data.global_ctx.immutable_section_bytes + def immutables_size(self): + compiler_data = self.compile_ctx.vyper_compiler_data + return compiler_data.global_ctx.immutable_section_bytes @executor @@ -448,16 +449,17 @@ class DLoad(_CodeLoader): _type: type = bytes def _compile(self, ptr): + assert self.immutables_size > 0 self.builder.extend( f""" - code_start_position = {ptr} + {self.runtime_code_size} + code_start_position = {ptr} - {self.immutables_size} + len(VM.code) with VM.code.seek(code_start_position): ret = VM.code.read(32) - return ret.ljust(size, b"\x00") """ ) + return f"""ret.ljust(32, b"\\x00")""" @executor @@ -466,17 +468,19 @@ class DLoadBytes(_CodeLoader): _sig = (int, int, int) def _compile(self, dst, src, size): - # adapted from py-evm codecopy, but without gas metering and + assert self.immutables_size > 0 + + # adapted from py-evm codecopy, but without gas metering, then # mess with the start position self.builder.extend( f""" - code_start_position = {src} + {self.runtime_code_size} + code_start_position = {src} - {self.immutables_size} + len(VM.code) VM.extend_memory({dst}, {size}) with VM.code.seek(code_start_position): code_bytes = VM.code.read({size}) - padded_code_bytes = code_bytes.ljust(size, b"\x00") + padded_code_bytes = code_bytes.ljust(size, b"\\x00") VM.memory_write({dst}, {size}, padded_code_bytes) """ From 8f54bbff90ce1f4f750573a4941e6761574f8c9d Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 20 Jun 2023 16:44:23 -0700 Subject: [PATCH 019/122] fix ceil32, stomped variables --- boa/vyper/ir_executor.py | 41 ++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index e79c2b18..f8eaf29f 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -13,7 +13,7 @@ from vyper.utils import mkalphanum, unsigned_to_signed from boa.vm.fast_mem import FastMem -from boa.vm.utils import to_bytes, to_int +from boa.vm.utils import to_bytes, to_int, ceil32 @dataclass @@ -68,6 +68,7 @@ class CompileContext: unique_symbols: set[str] = field(default_factory=set) frames: list[FrameInfo] = field(default_factory=lambda: [FrameInfo()]) builder: PythonBuilder = field(default_factory=PythonBuilder) + var_id: int = -1 def __post_init__(self): # use a global bc the generated functions need to be unique @@ -79,6 +80,11 @@ def __post_init__(self): def local_vars(self): return self.frames[-1].slots + + def fresh_var(self, name = ""): + self.var_id += 1 + return f"var_{name}_{self.var_id}" + @cached_property def contract_name(self): return mkalphanum(PurePath(self.vyper_compiler_data.contract_name).name) @@ -182,6 +188,8 @@ def compile(self, out=None, out_typ=None): assert argnames[0] == "self" argnames = argnames[1:] + argnames = [self.compile_ctx.fresh_var(x) for x in argnames] + self._compile_args(argnames) if hasattr(self, "_name"): @@ -480,7 +488,7 @@ def _compile(self, dst, src, size): with VM.code.seek(code_start_position): code_bytes = VM.code.read({size}) - padded_code_bytes = code_bytes.ljust(size, b"\\x00") + padded_code_bytes = code_bytes.ljust({size}, b"\\x00") VM.memory_write({dst}, {size}, padded_code_bytes) """ @@ -536,7 +544,7 @@ class Ceil32(IRExecutor): _type: type = int def _compile(self, x): - return f"({x} + 31) & 31" + return f"ceil32({x})" @executor @@ -581,13 +589,17 @@ class Repeat(IRExecutor): def compile(self, out=None): i_var, start, rounds, rounds_bound, body = self.args - start.compile("start", out_typ=int) - rounds.compile("rounds", out_typ=int) - rounds_bound.compile("rounds_bound", out_typ=int) - end = "start + rounds" + startname = self.compile_ctx.fresh_var("start") + roundsname = self.compile_ctx.fresh_var("rounds") + start.compile(startname, out_typ=int) + rounds.compile(roundsname, out_typ=int) - self.builder.append("assert rounds <= rounds_bound") - with self.builder.block(f"for {i_var.out_name} in range(start, {end})"): + rounds_bound = rounds_bound._int_value + + end = f"{start} + {rounds}" + + self.builder.append("assert {rounds} <= rounds_bound") + with self.builder.block(f"for {i_var.out_name} in range({start}, {end})"): body.compile() def analyze(self): @@ -616,9 +628,10 @@ def compile(self, out=None, out_typ=None): else: test, body = self.args - test.compile("test", out_typ=int) + testname = self.compile_ctx.fresh_var("test") + test.compile(testname, out_typ=int) - with self.builder.block("if bool(test)"): + with self.builder.block(f"if bool({testname})"): body.compile(out, out_typ) if orelse: @@ -634,8 +647,8 @@ class Assert(IRExecutor): def _compile(self, test): _ = Revert # linter does not know we are using `Revert`. self.builder.extend( - """ - if not bool(test): + f""" + if not bool({test}): VM.output = b"" raise Revert(b"") """ @@ -711,7 +724,7 @@ def _compile(self, *args): argnames = self._argnames assert len(argnames) == len(self.args) - args_str = ", ".join(["CTX"] + argnames) + args_str = ", ".join(["CTX"] + list(args)) return f"{label}({args_str})" From dcc8c8eb3e044eef5356a757f91d93313556da9a Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 20 Jun 2023 17:28:46 -0700 Subject: [PATCH 020/122] fix exception handling --- boa/environment.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/boa/environment.py b/boa/environment.py index 0aff4810..c7986c61 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -333,14 +333,7 @@ def apply_computation(cls, state, msg, tx_ctx): contract.ir_executor.exec(computation) except Halt: pass - except Exception as e: - # grab the exception to raise later - - # unclear why this is getting swallowed by py-evm. - # print(e) - err = e - - if err is not None: - raise err + return computation From a79fb0379831831e06d262b84fbab070383a3738 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 20 Jun 2023 18:04:28 -0700 Subject: [PATCH 021/122] handle reverts --- boa/vyper/contract.py | 20 ++++++++++++++------ boa/vyper/ir_executor.py | 23 ++++++++++++++++++++--- 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index e295185d..5c2ceca3 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -190,8 +190,8 @@ class ErrorDetail: @classmethod def from_computation(cls, contract, computation): - error_detail = contract.find_error_meta(computation.code) - ast_source = contract.find_source_of(computation.code) + error_detail = contract.find_error_meta(computation) + ast_source = contract.find_source_of(computation) reason = None if ast_source is not None: reason = DevReason.at_source_location( @@ -541,7 +541,7 @@ def ast_map(self): return ast_map_of(self.compiler_data.vyper_module) def _get_fn_from_computation(self, computation): - node = self.find_source_of(computation.code) + node = self.find_source_of(computation) return get_fn_ancestor_from_node(node) def debug_frame(self, computation=None): @@ -581,14 +581,22 @@ def source_map(self): ) return self._source_map - def find_error_meta(self, code_stream): + def find_error_meta(self, computation): + if hasattr(computation, "vyper_error_msg"): + return computation.vyper_error_msg + + code_stream = computation.code_stream error_map = self.source_map.get("error_map", {}) for pc in reversed(code_stream._trace): if pc in error_map: return error_map[pc] return None - def find_source_of(self, code_stream, is_initcode=False): + def find_source_of(self, computation, is_initcode=False): + if hasattr(computation, "vyper_source_pos"): + return self.ast_map.get(computation.vyper_source_pos) + + code_stream = computation.code_stream pc_map = self.source_map["pc_pos_map"] for pc in reversed(code_stream._trace): if pc in pc_map and pc_map[pc] in self.ast_map: @@ -693,7 +701,7 @@ def handle_error(self, computation): def stack_trace(self, computation=None): computation = computation or self._computation ret = StackTrace([ErrorDetail.from_computation(self, computation)]) - error_detail = self.find_error_meta(computation.code) + error_detail = self.find_error_meta(computation) if error_detail not in EXTERNAL_CALL_ERRORS: return ret return _handle_child_trace(computation, self.env, ret) diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index f8eaf29f..3aaff3c7 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -7,7 +7,7 @@ from typing import Any, Optional import vyper.ir.optimizer -from eth.exceptions import Revert +from eth.exceptions import Revert as VMRevert from vyper.compiler.phases import CompilerData from vyper.evm.opcodes import OPCODES from vyper.utils import mkalphanum, unsigned_to_signed @@ -645,12 +645,29 @@ class Assert(IRExecutor): _sig = (int,) def _compile(self, test): - _ = Revert # linter does not know we are using `Revert`. self.builder.extend( f""" if not bool({test}): VM.output = b"" - raise Revert(b"") + VM.vyper_source_pos = {repr(self.ir_node.source_pos)} + VM.vyper_error_msg = {repr(self.ir_node.error_msg)} + raise VMRevert("") # venom assert + """ + ) + + +@executor +class _IRRevert(IRExecutor): + _name = "revert" + _sig = (int,int) + + def _compile(self, ptr, size): + self.builder.extend( + f""" + VM.output = VM.memory_read_bytes({ptr}, {size}) + VM.vyper_source_pos = {repr(self.ir_node.source_pos)} + VM.vyper_error_msg = {repr(self.ir_node.error_msg)} + raise VMRevert("") # venom revert """ ) From d7ab2ca569261affaaa30bd3a9fd8822b630c368 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 20 Jun 2023 19:10:26 -0700 Subject: [PATCH 022/122] fix bytes padding --- boa/vm/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boa/vm/utils.py b/boa/vm/utils.py index a0994fb3..7054781e 100644 --- a/boa/vm/utils.py +++ b/boa/vm/utils.py @@ -10,5 +10,5 @@ def to_int(stack_item) -> int: def to_bytes(stack_item) -> bytes: if isinstance(stack_item, bytes): - return stack_item + return stack_item.rjust(32, b"\x00") return stack_item.to_bytes(32, "big") From 71023e6ea6c65a4ab5c8674e0988ff7e5d316dff Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 20 Jun 2023 19:27:52 -0700 Subject: [PATCH 023/122] ensure source pos in ir --- boa/vyper/ir_executor.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index 3aaff3c7..b2b81e1d 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -648,7 +648,6 @@ def _compile(self, test): self.builder.extend( f""" if not bool({test}): - VM.output = b"" VM.vyper_source_pos = {repr(self.ir_node.source_pos)} VM.vyper_error_msg = {repr(self.ir_node.error_msg)} raise VMRevert("") # venom assert @@ -843,7 +842,15 @@ def compile(self, **kwargs): val.compile(out=variable.out_name, out_typ=int) +def _ensure_source_pos(ir_node, source_pos=None): + if ir_node.source_pos is None: + ir_node.source_pos = source_pos + for arg in ir_node.args: + _ensure_source_pos(arg, ir_node.source_pos) + + def executor_from_ir(ir_node, vyper_compiler_data) -> Any: + _ensure_source_pos(ir_node) ret = _executor_from_ir(ir_node, CompileContext(vyper_compiler_data)) ret = ret.analyze() From 2db42be82d9ffce45edb96d2ccb0df768e2a6f0c Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 21 Jun 2023 07:30:31 -0700 Subject: [PATCH 024/122] optimize sha3 tracer --- boa/environment.py | 2 +- boa/vyper/contract.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/boa/environment.py b/boa/environment.py index c7986c61..13ad5732 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -243,7 +243,7 @@ def __init__(self, sstore_op, trace_db): def __call__(self, computation): value, slot = [to_bytes(t) for t in computation._stack.values[-2:]] - account = to_checksum_address(computation.msg.storage_address) + account = computation.msg.storage_address self.trace_db.setdefault(account, set()) # we don't want to deal with snapshots/commits/reverts, so just diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index 5c2ceca3..63a508ac 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -144,6 +144,10 @@ def __init__( self.env.register_blueprint(compiler_data.bytecode, self) + @cached_property + def canonical_address(self): + return to_canonical_address(self.address) + @cached_property def deployer(self): return VyperDeployer(self.compiler_data, filename=self.filename) @@ -384,7 +388,7 @@ def _dealias(self, maybe_address): def get(self, truncate_limit=None): if isinstance(self.typ, HashMapT): ret = {} - for k in self.contract.env.sstore_trace.get(self.contract.address, {}): + for k in self.contract.env.sstore_trace.get(self.addr, {}): path = unwrap_storage_key(self.contract.env.sha3_trace, k) if to_int(path[0]) != self.slot: continue From f42f9ad971df0aff502a0c4da0017beb515e754a Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 21 Jun 2023 08:36:19 -0700 Subject: [PATCH 025/122] optimize some ops, 5% improvement return, halt, calldataload, calldatasize, callvalue --- boa/vyper/ir_executor.py | 69 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 3 deletions(-) diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index b2b81e1d..79c2844e 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -7,7 +7,7 @@ from typing import Any, Optional import vyper.ir.optimizer -from eth.exceptions import Revert as VMRevert +from eth.exceptions import Halt, Revert as VMRevert from vyper.compiler.phases import CompilerData from vyper.evm.opcodes import OPCODES from vyper.utils import mkalphanum, unsigned_to_signed @@ -418,6 +418,42 @@ def _compile(self, x, y): _executors[opname] = type(nickname, (base,), {"_op": op, "_name": opname}) +_NULL_BYTE = repr(b"\x00") + +@executor +class CalldataLoad(IRExecutor): + _name = "calldataload" + _sig = (int,) + _type: type = bytes + + def _compile(self, ptr): + self.builder.extend( + f""" + ret = VM.msg.data_as_bytes[{ptr} : {ptr} + 32] + """) + return f"ret.ljust(32, {_NULL_BYTE})" + +@executor +class CalldataSize(IRExecutor): + _name = "calldatasize" + _sig = () + _type: type = int + + def _compile(self): + return f"len(VM.msg.data)" + +@executor +class CallValue(IRExecutor): + _name = "callvalue" + _sig = () + _type: type = int + + def _compile(self): + return f"VM.msg.value" + + +# XXX: calldatacopy + @executor class MLoad(IRExecutor): _name = "mload" @@ -467,7 +503,7 @@ def _compile(self, ptr): """ ) - return f"""ret.ljust(32, b"\\x00")""" + return f"ret.ljust(32, {_NULL_BYTE})" @executor @@ -488,7 +524,7 @@ def _compile(self, dst, src, size): with VM.code.seek(code_start_position): code_bytes = VM.code.read({size}) - padded_code_bytes = code_bytes.ljust({size}, b"\\x00") + padded_code_bytes = code_bytes.ljust({size}, {_NULL_BYTE}) VM.memory_write({dst}, {size}, padded_code_bytes) """ @@ -670,6 +706,33 @@ def _compile(self, ptr, size): """ ) +@executor +class Return(IRExecutor): + _name = "return" + _sig = (int,int) + + def _compile(self, ptr, size): + self.builder.extend( + f""" + VM.output = VM.memory_read_bytes({ptr}, {size}) + raise Halt("") # return + """ + ) + +@executor +class Stop(IRExecutor): + _name = "stop" + _sig = () + + def _compile(self): + self.builder.extend( + f""" + raise Halt("") # return + """ + ) + + + @executor class VarList(IRExecutor): From 1dd378bc8749679a9f6a7eaaee4d36109d0733cf Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 21 Jun 2023 09:36:12 -0700 Subject: [PATCH 026/122] optimize sha3, another 10% --- boa/environment.py | 63 ++++++++++------------ boa/vyper/contract.py | 10 ++-- boa/vyper/ir_executor.py | 111 +++++++++++++++++++++------------------ 3 files changed, 94 insertions(+), 90 deletions(-) diff --git a/boa/environment.py b/boa/environment.py index 13ad5732..c1295f20 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -26,6 +26,7 @@ from boa.util.eip1167 import extract_eip1167_address, is_eip1167_contract from boa.vm.fast_mem import FastMem from boa.vm.fork import AccountDBFork +from boa.vm.utils import to_bytes, to_int from boa.vm.gas_meters import GasMeter, NoGasMeter, ProfilingGasMeter @@ -186,27 +187,14 @@ def __len__(self): # ### section: sha3 preimage tracing -# (TODO: move to dedicated module) -def to_int(value): - if isinstance(value, tuple): - return to_int(value[1]) # how py-evm stores stuff on stack - if isinstance(value, int): - return value - if isinstance(value, bytes): - return int.from_bytes(value, "big") +def _stackitem_to_int(value): + assert isinstance(value, tuple) + return to_int(value[1]) # how py-evm stores stuff on stack - raise ValueError("invalid type %s", type(value)) - -def to_bytes(value): - if isinstance(value, tuple): - return to_bytes(value[1]) # how py-evm stores stuff on stack - if isinstance(value, bytes): - return value - if isinstance(value, int): - return value.to_bytes(32, "big") - - raise ValueError("invalid type %s", type(value)) +def _stackitem_to_bytes(value): + assert isinstance(value, tuple) + return to_bytes(value[1]) # how py-evm stores stuff on stack class Sha3PreimageTracer: @@ -214,12 +202,12 @@ class Sha3PreimageTracer: # trace preimages of sha3 - def __init__(self, sha3_op, preimage_map): - self.preimages = preimage_map + def __init__(self, sha3_op, env): + self.env = env self.sha3 = sha3_op def __call__(self, computation): - size, offset = [to_int(x) for x in computation._stack.values[-2:]] + size, offset = [_stackitem_to_int(x) for x in computation._stack.values[-2:]] # dispatch into py-evm self.sha3(computation) @@ -229,27 +217,23 @@ def __call__(self, computation): preimage = computation._memory.read_bytes(offset, size) - image = to_bytes(computation._stack.values[-1]) + image = _stackitem_to_bytes(computation._stack.values[-1]) - self.preimages[image] = preimage + self.env._trace_sha3_preimage(preimage, image) class SstoreTracer: mnemonic = "SSTORE" - def __init__(self, sstore_op, trace_db): - self.trace_db = trace_db + def __init__(self, sstore_op, env): + self.env = env self.sstore = sstore_op def __call__(self, computation): - value, slot = [to_bytes(t) for t in computation._stack.values[-2:]] + value, slot = [_stackitem_to_int(t) for t in computation._stack.values[-2:]] account = computation.msg.storage_address - self.trace_db.setdefault(account, set()) - # we don't want to deal with snapshots/commits/reverts, so just - # register that the slot was touched and downstream can filter - # zero entries. - self.trace_db[account].add(slot) + self.env._trace_sstore(account, slot) # dispatch into py-evm self.sstore(computation) @@ -390,11 +374,22 @@ def _init_vm(self, reset_traces=True): self.sstore_trace = {} # patch in tracing opcodes - c.opcodes[0x20] = Sha3PreimageTracer(c.opcodes[0x20], self.sha3_trace) - c.opcodes[0x55] = SstoreTracer(c.opcodes[0x55], self.sstore_trace) + c.opcodes[0x20] = Sha3PreimageTracer(c.opcodes[0x20], self) + c.opcodes[0x55] = SstoreTracer(c.opcodes[0x55], self) self.vm.patch = VMPatcher(self.vm) + def _trace_sha3_preimage(self, preimage, image): + self.sha3_trace[image] = preimage + + def _trace_sstore(self, account, slot): + self.sstore_trace.setdefault(account, set()) + # we don't want to deal with snapshots/commits/reverts, so just + # register that the slot was touched and downstream can filter + # zero entries. + self.sstore_trace[account].add(slot) + + def fork(self, url, reset_traces=True, **kwargs): kwargs["url"] = url AccountDBFork._rpc_init_kwargs = kwargs diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index 63a508ac..c8402a5a 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -32,11 +32,12 @@ from vyper.semantics.types.function import ContractFunctionT from vyper.utils import method_id -from boa.environment import AddressType, Env, to_int +from boa.environment import AddressType, Env from boa.profiling import LineProfile, cache_gas_used_for_computation from boa.util.exceptions import strip_internal_frames from boa.util.lrudict import lrudict from boa.vm.gas_meters import ProfilingGasMeter +from boa.vm.utils import to_int, to_bytes from boa.vyper import _METHOD_ID_VAR from boa.vyper.ast_utils import ast_map_of, get_fn_ancestor_from_node, reason_at from boa.vyper.compiler_utils import ( @@ -343,8 +344,9 @@ def unwrap_storage_key(sha3_db, k): path = [] def unwrap(k): - if k in sha3_db: - preimage = sha3_db[k] + k_bytes = to_bytes(k) + if k_bytes in sha3_db: + preimage = sha3_db[k_bytes] slot, k = preimage[:32], preimage[32:] unwrap(slot) @@ -402,7 +404,7 @@ def get(self, truncate_limit=None): path_t.append(ty.key_type) ty = ty.value_type - val = self._decode(to_int(k), ty, truncate_limit) + val = self._decode(k, ty, truncate_limit) # set val only if value is nonzero if val: diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index 79c2844e..fdfc8036 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -15,6 +15,12 @@ from boa.vm.fast_mem import FastMem from boa.vm.utils import to_bytes, to_int, ceil32 +from eth_hash.auto import keccak + + +def keccak256(x): + return keccak(x) + @dataclass class _Line: @@ -81,7 +87,7 @@ def local_vars(self): return self.frames[-1].slots - def fresh_var(self, name = ""): + def freshvar(self, name = ""): self.var_id += 1 return f"var_{name}_{self.var_id}" @@ -150,6 +156,7 @@ class IRExecutor: # the type produced when executing this node _type: Optional[type] = None # | int | bytes + _is_static = True # can be used from a STATICCALL context def __init__(self, ir_node, compile_ctx, *args): self.ir_node = ir_node @@ -188,7 +195,7 @@ def compile(self, out=None, out_typ=None): assert argnames[0] == "self" argnames = argnames[1:] - argnames = [self.compile_ctx.fresh_var(x) for x in argnames] + argnames = [self.compile_ctx.freshvar(x) for x in argnames] self._compile_args(argnames) @@ -420,19 +427,6 @@ def _compile(self, x, y): _NULL_BYTE = repr(b"\x00") -@executor -class CalldataLoad(IRExecutor): - _name = "calldataload" - _sig = (int,) - _type: type = bytes - - def _compile(self, ptr): - self.builder.extend( - f""" - ret = VM.msg.data_as_bytes[{ptr} : {ptr} + 32] - """) - return f"ret.ljust(32, {_NULL_BYTE})" - @executor class CalldataSize(IRExecutor): _name = "calldatasize" @@ -452,7 +446,36 @@ def _compile(self): return f"VM.msg.value" -# XXX: calldatacopy +@executor +class CalldataLoad(IRExecutor): + _name = "calldataload" + _sig = (int,) + _type: type = bytes + + def _compile(self, ptr): + self.builder.extend( + f""" + val = bytes(VM.msg.data[{ptr} : {ptr} + 32]) + """) + return f"val.ljust(32, {_NULL_BYTE})" + + +@executor +class CalldataCopy(IRExecutor): + _name = "calldatacopy" + _sig = (int, int, int) + + def _compile(self, dst, src, size): + self.builder.extend( + f""" + val = bytes(VM.msg.data[{src} : {src} + {size}]) + val = val.ljust({size}, {_NULL_BYTE}) + + VM.extend_memory({dst}, {size}) + VM.memory_write({dst}, {size}, val) + """ + ) + @executor class MLoad(IRExecutor): @@ -500,7 +523,6 @@ def _compile(self, ptr): with VM.code.seek(code_start_position): ret = VM.code.read(32) - """ ) return f"ret.ljust(32, {_NULL_BYTE})" @@ -531,46 +553,31 @@ def _compile(self, dst, src, size): ) -# we call into py-evm for sha3_32 and sha3_64 to allow tracing to still work -class _Sha3_N(IRExecutor): - _type: type = bytes - - @cached_property - def _argnames(self): - return tuple(f"{self.name}.arg{i}" for i in range(len(self._sig))) - - def _compile(self, *args): - assert self.N > 0 and self.N % 32 == 0 - opcode_info = OpcodeInfo.from_mnemonic("SHA3") - self.builder.append(f"VM.extend_memory(0, {self.N})") - for i, val in enumerate(args): - self.builder.append(f"VM.memory_write({i*32}, 32, {val})") - - sha3 = hex(opcode_info.opcode) - self.builder.extend( - f""" - VM.stack_push_int({self.N}) - VM.stack_push_int(0) - VM.opcodes[{sha3}].__call__(VM) - """ - ) - return "VM.stack_pop1_any()" - - @executor -class Sha3_64(_Sha3_N): +class Sha3_64(IRExecutor): _name = "sha3_64" _sig = (bytes, bytes) - _argnames = ("sha3_64_arg0", "sha3_64_arg1") - N = 64 + + # we need to trace for downstream to reverse engineer mappings + def _compile(self, arg1, arg2): + self.builder.extend(f""" + preimage = {arg1}.rjust(32, {_NULL_BYTE}) + {arg2}.rjust(32, {_NULL_BYTE}) + image = keccak256(preimage) + VM.env._trace_sha3_preimage(preimage, image) + """) + return "image" @executor -class Sha3_32(_Sha3_N): +class Sha3_32(IRExecutor): _name = "sha3_32" _sig = (bytes,) - _argnames = ("sha3_32_arg0",) - N = 32 + + def _compile(self, arg): + self.builder.extend(f""" + preimage = {arg}.rjust(32, {_NULL_BYTE}) + """) + return "keccak256(preimage)" @executor @@ -625,8 +632,8 @@ class Repeat(IRExecutor): def compile(self, out=None): i_var, start, rounds, rounds_bound, body = self.args - startname = self.compile_ctx.fresh_var("start") - roundsname = self.compile_ctx.fresh_var("rounds") + startname = self.compile_ctx.freshvar("start") + roundsname = self.compile_ctx.freshvar("rounds") start.compile(startname, out_typ=int) rounds.compile(roundsname, out_typ=int) @@ -664,7 +671,7 @@ def compile(self, out=None, out_typ=None): else: test, body = self.args - testname = self.compile_ctx.fresh_var("test") + testname = self.compile_ctx.freshvar("test") test.compile(testname, out_typ=int) with self.builder.block(f"if bool({testname})"): From bf6c40efbb43162ec82bc6558cff4cebfc95c862 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 21 Jun 2023 10:07:58 -0700 Subject: [PATCH 027/122] cache recent keccaks, another 3% --- boa/util/lrudict.py | 6 ++++-- boa/vyper/ir_executor.py | 5 ++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/boa/util/lrudict.py b/boa/util/lrudict.py index fbf8a117..90d6ab46 100644 --- a/boa/util/lrudict.py +++ b/boa/util/lrudict.py @@ -16,6 +16,8 @@ def __setitem__(self, k, val): # set based on a lambda def setdefault_lambda(self, k, fn): - if k in self: + try: return self[k] - self[k] = fn() + except KeyError: + self[k] = (ret := fn(k)) + return ret diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index fdfc8036..7007556f 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -14,12 +14,15 @@ from boa.vm.fast_mem import FastMem from boa.vm.utils import to_bytes, to_int, ceil32 +from boa.util.lrudict import lrudict from eth_hash.auto import keccak +_keccak_cache = lrudict(256) + def keccak256(x): - return keccak(x) + return _keccak_cache.setdefault_lambda(x, keccak) @dataclass From b2253382966b648e37dd51e5ade569115a33acea Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 21 Jun 2023 10:13:44 -0700 Subject: [PATCH 028/122] implement sload, sstore, saves 15% --- boa/vyper/ir_executor.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index 7007556f..f4fe54ae 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -7,7 +7,7 @@ from typing import Any, Optional import vyper.ir.optimizer -from eth.exceptions import Halt, Revert as VMRevert +from eth.exceptions import Halt, Revert as VMRevert, WriteProtection from vyper.compiler.phases import CompilerData from vyper.evm.opcodes import OPCODES from vyper.utils import mkalphanum, unsigned_to_signed @@ -205,6 +205,14 @@ def compile(self, out=None, out_typ=None): if hasattr(self, "_name"): self.builder.append(f"# {self.name}") + if not self._is_static: + self.builder.extend(f""" + if VM.msg.is_static: + raise WriteProtection( + "Cannot modify state while inside of a STATICCALL context" + ) + """) + res = self._compile(*argnames) if res is None: @@ -556,6 +564,27 @@ def _compile(self, dst, src, size): ) +@executor +class SLoad(IRExecutor): + _name = "sload" + _sig = (int,) + _type = int + + def _compile(self, slot): + return f"""VM.state.get_storage(address=VM.msg.storage_address, slot={slot})""" + + +@executor +class SStore(IRExecutor): + _name = "sstore" + _is_static = False + _sig = (int,int) + _type = int + + def _compile(self, slot, value): + return f"""VM.state.set_storage(address=VM.msg.storage_address, slot={slot}, value={value})""" + + @executor class Sha3_64(IRExecutor): _name = "sha3_64" From 623b9c6ba0b391b41d058e3c8a2e7e345061064a Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 21 Jun 2023 14:15:45 -0700 Subject: [PATCH 029/122] move ceil32/floor32 --- boa/vm/utils.py | 8 ++++++-- boa/vyper/decoder_utils.py | 9 +-------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/boa/vm/utils.py b/boa/vm/utils.py index 7054781e..0cb3ed47 100644 --- a/boa/vm/utils.py +++ b/boa/vm/utils.py @@ -1,5 +1,9 @@ -def ceil32(x): - return (x + 31) & ~31 +def ceil32(n): + return floor32(n + 31) + + +def floor32(n): + return n & ~31 def to_int(stack_item) -> int: diff --git a/boa/vyper/decoder_utils.py b/boa/vyper/decoder_utils.py index 9df25f82..297127d5 100644 --- a/boa/vyper/decoder_utils.py +++ b/boa/vyper/decoder_utils.py @@ -11,14 +11,7 @@ StringT, ) from vyper.utils import unsigned_to_signed - - -def ceil32(n): - return floor32(n + 31) - - -def floor32(n): - return n & ~31 +from boa.vm.utils import ceil32, floor32 # wrap storage in something which looks like memory From c745c7d9cae4b147938ef2be07f994874571ae2f Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 21 Jun 2023 10:25:19 -0700 Subject: [PATCH 030/122] fix memory reads --- boa/vm/fast_mem.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/boa/vm/fast_mem.py b/boa/vm/fast_mem.py index a57c7db8..03901f3f 100644 --- a/boa/vm/fast_mem.py +++ b/boa/vm/fast_mem.py @@ -48,7 +48,7 @@ def read_word(self, start_position): self.mem_cache[start_position // 32] = ret return ret - def read_bytes(self, start_position, size): + def _writeback(self, start_position, size): start = start_position // 32 end = ceil32(start_position + size) // 32 for ix in range(start, end): @@ -56,8 +56,14 @@ def read_bytes(self, start_position, size): super().write(ix * 32, 32, to_bytes(self.mem_cache[ix])) self.needs_writeback[ix] = False + def read_bytes(self, start_position, size): + self._writeback(start_position, size) return super().read_bytes(start_position, size) + def read(self, start_position, size): + self._writeback(start_position, size) + return super().read(start_position, size) + def write_word(self, start_position, int_val): if start_position % 32 == 0: self.mem_cache[start_position // 32] = int_val From 30e609fc7dd3ff003f8459ab444f6acb29f6352d Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 21 Jun 2023 14:26:05 -0700 Subject: [PATCH 031/122] fix int check --- boa/vyper/ir_executor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index f4fe54ae..559ce870 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -261,7 +261,7 @@ class IntExecutor(IRExecutor): _type: type = int def __post_init__(self): - assert 0 <= self._int_value < 2**256 + assert -(2**255) <= self._int_value < 2**256 self.args = self._sig = () def __repr__(self): From 80efaa7af17454b6029fcbb7b102adb4d510bce0 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 21 Jun 2023 14:29:46 -0700 Subject: [PATCH 032/122] add select opcode --- boa/vyper/ir_executor.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index 559ce870..f884c488 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -436,6 +436,15 @@ def _compile(self, x, y): _executors[opname] = type(nickname, (base,), {"_op": op, "_name": opname}) +@executor +class Select(IRExecutor): + _name = "select" + _sig = (int, StackItem, StackItem) + _type: type = StackItem + def _compile(self, test, x, y): + return f"{x} if {test} else {y}" + + _NULL_BYTE = repr(b"\x00") @executor From 028c19f105979c75383254aeb13ef6d5c8261ef8 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 21 Jun 2023 08:23:26 -0700 Subject: [PATCH 033/122] optimize address conversion, saves 9% --- boa/environment.py | 98 ++++++++++++++++++++++++++++--------------- boa/vyper/contract.py | 21 +++++----- 2 files changed, 74 insertions(+), 45 deletions(-) diff --git a/boa/environment.py b/boa/environment.py index c1295f20..0030d409 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -20,10 +20,11 @@ from eth.vm.message import Message from eth.vm.opcode_values import STOP from eth.vm.transaction_context import BaseTransactionContext -from eth_typing import Address +from eth_typing import Address as PYEVM_Address from eth_utils import setup_DEBUG2_logging, to_canonical_address, to_checksum_address from boa.util.eip1167 import extract_eip1167_address, is_eip1167_contract +from boa.util.lrudict import lrudict from boa.vm.fast_mem import FastMem from boa.vm.fork import AccountDBFork from boa.vm.utils import to_bytes, to_int @@ -93,11 +94,37 @@ def anchor(self): setattr(self, attr, snap[attr]) -AddressType = Union[Address, bytes, str] # make mypy happy +# XXX: inherit from bytes directly so that we can pass it to py-evm? +class Address: #(PYEVM_Address): + # converting between checksum and canonical addresses is a hotspot; + # this class contains both and caches recently seen conversions + __slots__ = "checksum_address", "canonical_address", "normalized_address" + _cache = lrudict(1024) + def __new__(cls, address): + if isinstance(address, Address): + return address -def _addr(addr: AddressType) -> Address: - return Address(to_canonical_address(addr)) + try: + return cls._cache[address] + except KeyError: + pass + + self = super().__new__(cls) + self.checksum_address = to_checksum_address(address) + self.canonical_address = to_canonical_address(address) + self.normalized_address = self.checksum_address.lower() + cls._cache[address] = self + return self + + def __repr__(self): + return f"_Address({self.normalized_address})" + + def __str__(self): + return self.checksum_address + +# make mypy happy +_AddressType = Address | str | bytes | PYEVM_Address _opcode_overrides = {} @@ -124,14 +151,14 @@ def register_precompile(*args, **kwargs): def register_raw_precompile(address, fn, force=False): global _precompiles - address = _addr(address) + address = Address(address) if address in _precompiles and not force: raise ValueError(f"Already registered: {address}") _precompiles[address] = fn def deregister_raw_precompile(address, force=True): - address = _addr(address) + address = Address(address) if address not in _precompiles and not force: raise ValueError("Not registered: {address}") _precompiles.pop(address, None) @@ -297,7 +324,7 @@ def apply_create_message(cls, state, msg, tx_ctx): if bytecode in cls.env._code_registry: target = cls.env._code_registry[bytecode].deployer.at(contract_address) - target.created_from = to_checksum_address(msg.sender) + target.created_from = Address(msg.sender) cls.env.register_contract(contract_address, target) return computation @@ -305,15 +332,15 @@ def apply_create_message(cls, state, msg, tx_ctx): @classmethod def apply_computation(cls, state, msg, tx_ctx): addr = msg.code_address - contract = cls.env.lookup_contract(addr) if addr else None + contract = cls.env._lookup_contract_fast(addr) if addr else None if contract is None or cls.env._speed == _SLOW: - # print("REGULAR MODE") + #print("REGULAR MODE") return super().apply_computation(state, msg, tx_ctx) err = None with cls(state, msg, tx_ctx) as computation: try: - # print("LUDICROUS MODE") + #print("LUDICROUS MODE") contract.ir_executor.exec(computation) except Halt: pass @@ -425,14 +452,14 @@ def reset_gas_metering_behavior(self) -> None: # set balance of address in py-evm def set_balance(self, addr, value): - self.vm.state.set_balance(to_canonical_address(addr), value) + self.vm.state.set_balance(Address(addr), value) # get balance of address in py-evm def get_balance(self, addr): - return self.vm.state.get_balance(to_canonical_address(addr)) + return self.vm.state.get_balance(Address(addr)) def register_contract(self, address, obj): - addr = to_canonical_address(address) + addr = Address(address).canonical_address self._contracts[addr] = obj # also register it in the registry for @@ -443,14 +470,17 @@ def register_contract(self, address, obj): def register_blueprint(self, bytecode, obj): self._code_registry[bytecode] = obj - def lookup_contract(self, address): - return self._contracts.get(to_canonical_address(address)) + def _lookup_contract_fast(self, address: PYEVM_Address): + return self._contracts.get(address) + + def lookup_contract(self, address: _AddressType): + return self._contracts.get(Address(address).canonical_address) def alias(self, address, name): - self._aliases[to_checksum_address(address)] = name + self._aliases[Address(address).canonical_address] = name def lookup_alias(self, address): - return self._aliases[to_checksum_address(address)] + return self._aliases[Address(address).canonical_address] # advanced: reset warm/cold counters for addresses and storage def _reset_access_counters(self): @@ -470,7 +500,7 @@ def anchor(self): @contextlib.contextmanager def sender(self, address): tmp = self.eoa - self.eoa = to_checksum_address(address) + self.eoa = Address(address) try: yield finally: @@ -485,40 +515,39 @@ def get_singleton(cls): cls._singleton = cls() return cls._singleton - def generate_address(self, alias: Optional[str] = None) -> AddressType: + def generate_address(self, alias: Optional[str] = None) -> Address: self._address_counter += 1 - t = self._address_counter.to_bytes(length=20, byteorder="big") - # checksum addr easier for humans to debug - ret = to_checksum_address(t) + t = Address(self._address_counter.to_bytes(length=20, byteorder="big")) if alias is not None: - self.alias(ret, alias) + self.alias(t, alias) - return ret + return t # helper fn - def _get_sender(self, sender=None) -> Address: + def _get_sender(self, sender=None) -> PYEVM_Address: if sender is None: sender = self.eoa if self.eoa is None: raise ValueError(f"{self}.eoa not defined!") - return _addr(sender) + return Address(sender).canonical_address def deploy_code( self, - sender: Optional[AddressType] = None, + sender: Optional[_AddressType] = None, gas: Optional[int] = None, value: int = 0, bytecode: bytes = b"", start_pc: int = 0, # override the target address: - override_address: Optional[AddressType] = None, - ) -> Tuple[AddressType, bytes]: + override_address: Optional[_AddressType] = None, + ) -> Tuple[Address, bytes]: if gas is None: gas = self.vm.state.gas_limit + sender = self._get_sender(sender) if override_address is not None: - target_address = _addr(override_address) + target_address = Address(override_address).canonical_address else: nonce = self.vm.state.get_nonce(sender) self.vm.state.increment_nonce(sender) @@ -546,8 +575,8 @@ def deploy_code( def execute_code( self, - to_address: AddressType = constants.ZERO_ADDRESS, - sender: Optional[AddressType] = None, + to_address: _AddressType = constants.ZERO_ADDRESS, + sender: Optional[_AddressType] = None, gas: Optional[int] = None, value: int = 0, data: bytes = b"", @@ -559,12 +588,13 @@ def execute_code( ) -> Any: if gas is None: gas = self.vm.state.gas_limit + sender = self._get_sender(sender) class FakeMessage(Message): # Message object with settable attrs __dict__: dict = {} - to = _addr(to_address) + to = Address(to_address).canonical_address bytecode = override_bytecode if override_bytecode is None: @@ -601,7 +631,7 @@ def _hook_trace_computation(self, computation, contract=None): # loop over pc so that it is available when coverage hooks into it pass for child in computation.children: - child_contract = self.lookup_contract(child.msg.code_address) + child_contract = self._lookup_contract_fast(child.msg.code_address) self._hook_trace_computation(computation, child_contract) # function to time travel diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index c8402a5a..b937f859 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -32,7 +32,7 @@ from vyper.semantics.types.function import ContractFunctionT from vyper.utils import method_id -from boa.environment import AddressType, Env +from boa.environment import Address, Env, to_int from boa.profiling import LineProfile, cache_gas_used_for_computation from boa.util.exceptions import strip_internal_frames from boa.util.lrudict import lrudict @@ -79,8 +79,8 @@ def deploy_as_blueprint(self, *args, **kwargs): self.compiler_data, *args, filename=self.filename, **kwargs ) - def at(self, address: AddressType) -> "VyperContract": - address = to_checksum_address(address) + def at(self, address: Any) -> "VyperContract": + address = Address(address) ret = VyperContract( self.compiler_data, override_address=address, @@ -88,7 +88,7 @@ def at(self, address: AddressType) -> "VyperContract": filename=self.filename, ) vm = ret.env.vm - bytecode = vm.state.get_code(to_canonical_address(address)) + bytecode = vm.state.get_code(address.canonical_address) ret._set_bytecode(bytecode) @@ -141,14 +141,10 @@ def __init__( bytecode=deploy_bytecode, override_address=override_address ) - self.address = to_checksum_address(addr) + self.address = Address(addr) self.env.register_blueprint(compiler_data.bytecode, self) - @cached_property - def canonical_address(self): - return to_canonical_address(self.address) - @cached_property def deployer(self): return VyperDeployer(self.compiler_data, filename=self.filename) @@ -766,6 +762,10 @@ def override_vyper_namespace(self): finally: self._vyper_namespace["self"].typ.members.pop("__boa_debug__", None) + @cached_property + def checksum_address(self): + return self.address.checksum_address + # for eval(), we need unoptimized assembly, since the dead code # eliminator might prune a dead function (which we want to eval) @cached_property @@ -934,8 +934,7 @@ def _prepare_calldata(self, *args, **kwargs): total_non_base_args = len(kwargs) + len(args) - n_pos_args - # allow things with `.address` to be encode-able - args = [getattr(arg, "address", arg) for arg in args] + args = [getattr(arg, "checksum_address", arg) for arg in args] method_id, args_abi_type = self.args_abi_type(total_non_base_args) encoded_args = abi.encode(args_abi_type, args) From 742b48644f9bc04b639b2599eb69351cc10edef6 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 21 Jun 2023 15:59:17 -0700 Subject: [PATCH 034/122] optimize shr, shl, sar, saves 10% --- boa/vyper/ir_executor.py | 53 ++++++++++++++++++++++++++++++++++------ 1 file changed, 46 insertions(+), 7 deletions(-) diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index f884c488..6d060edd 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -405,6 +405,7 @@ def _as_signed(x): return unsigned_to_signed(x, 256, strict=True) +# XXX: some of these do not need to be wrapped class UnsignedBinopExecutor(IRExecutor): __slots__ = ("_name", "_op") _sig = int, int @@ -420,13 +421,9 @@ def _compile(self, x, y): class SignedBinopExecutor(UnsignedBinopExecutor): def _compile(self, x, y): - self.builder.extend( - f""" - x = _as_signed({x}, 256, strict=True)) - y = _as_signed({y}, 256, strict=True)) - """ - ) - return f"_wrap256({self._funcname}(x, y))" + return f"_wrap256({self._funcname}(_as_signed({x}), _as_signed({y})))" + + # for binops, just use routines from vyper optimizer @@ -436,6 +433,37 @@ def _compile(self, x, y): _executors[opname] = type(nickname, (base,), {"_op": op, "_name": opname}) +# shift instructions have opposite operand order from operator.*shift functions. +@executor +class Shr(IRExecutor): + _name = "shr" + _sig = (int,int) + _type: type = int + + def _compile(self, bits, val): + return f"{val} >> {bits}" + +@executor +class Sar(IRExecutor): + _name = "sar" + _sig = (int,int) + _type: type = int + + def _compile(self, bits, val): + # wrap256 to get back into unsigned land + return f"_wrap256(_as_signed({val}) >> {bits})" + +@executor +class Shl(IRExecutor): + _name = "shl" + _sig = (int,int) + _type: type = int + + def _compile(self, bits, val): + return f"{val} >> {bits}" + + + @executor class Select(IRExecutor): _name = "select" @@ -447,6 +475,17 @@ def _compile(self, test, x, y): _NULL_BYTE = repr(b"\x00") + +@executor +class Caller(IRExecutor): + _name = "caller" + _sig = () + _type: type = bytes + + def _compile(self): + return f"VM.msg.sender" + + @executor class CalldataSize(IRExecutor): _name = "calldatasize" From ecf8694e57d35140794efe0922d9790813e6dc04 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 21 Jun 2023 16:36:11 -0700 Subject: [PATCH 035/122] add log instructions --- boa/environment.py | 11 ++-- boa/vyper/contract.py | 2 +- boa/vyper/decoder_utils.py | 1 + boa/vyper/ir_executor.py | 100 +++++++++++++++++++++++++------------ 4 files changed, 76 insertions(+), 38 deletions(-) diff --git a/boa/environment.py b/boa/environment.py index 0030d409..921c0210 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -27,8 +27,8 @@ from boa.util.lrudict import lrudict from boa.vm.fast_mem import FastMem from boa.vm.fork import AccountDBFork -from boa.vm.utils import to_bytes, to_int from boa.vm.gas_meters import GasMeter, NoGasMeter, ProfilingGasMeter +from boa.vm.utils import to_bytes, to_int def enable_pyevm_verbose_logging(): @@ -95,12 +95,13 @@ def anchor(self): # XXX: inherit from bytes directly so that we can pass it to py-evm? -class Address: #(PYEVM_Address): +class Address: # (PYEVM_Address): # converting between checksum and canonical addresses is a hotspot; # this class contains both and caches recently seen conversions __slots__ = "checksum_address", "canonical_address", "normalized_address" _cache = lrudict(1024) + def __new__(cls, address): if isinstance(address, Address): return address @@ -123,6 +124,7 @@ def __repr__(self): def __str__(self): return self.checksum_address + # make mypy happy _AddressType = Address | str | bytes | PYEVM_Address @@ -334,13 +336,13 @@ def apply_computation(cls, state, msg, tx_ctx): addr = msg.code_address contract = cls.env._lookup_contract_fast(addr) if addr else None if contract is None or cls.env._speed == _SLOW: - #print("REGULAR MODE") + # print("REGULAR MODE") return super().apply_computation(state, msg, tx_ctx) err = None with cls(state, msg, tx_ctx) as computation: try: - #print("LUDICROUS MODE") + # print("LUDICROUS MODE") contract.ir_executor.exec(computation) except Halt: pass @@ -416,7 +418,6 @@ def _trace_sstore(self, account, slot): # zero entries. self.sstore_trace[account].add(slot) - def fork(self, url, reset_traces=True, **kwargs): kwargs["url"] = url AccountDBFork._rpc_init_kwargs = kwargs diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index b937f859..5f0af76c 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -37,7 +37,7 @@ from boa.util.exceptions import strip_internal_frames from boa.util.lrudict import lrudict from boa.vm.gas_meters import ProfilingGasMeter -from boa.vm.utils import to_int, to_bytes +from boa.vm.utils import to_bytes, to_int from boa.vyper import _METHOD_ID_VAR from boa.vyper.ast_utils import ast_map_of, get_fn_ancestor_from_node, reason_at from boa.vyper.compiler_utils import ( diff --git a/boa/vyper/decoder_utils.py b/boa/vyper/decoder_utils.py index 297127d5..9e5a8f61 100644 --- a/boa/vyper/decoder_utils.py +++ b/boa/vyper/decoder_utils.py @@ -11,6 +11,7 @@ StringT, ) from vyper.utils import unsigned_to_signed + from boa.vm.utils import ceil32, floor32 diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index 6d060edd..e73c820e 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -7,20 +7,21 @@ from typing import Any, Optional import vyper.ir.optimizer -from eth.exceptions import Halt, Revert as VMRevert, WriteProtection +from eth.exceptions import Halt +from eth.exceptions import Revert as VMRevert +from eth.exceptions import WriteProtection +from eth_hash.auto import keccak from vyper.compiler.phases import CompilerData from vyper.evm.opcodes import OPCODES from vyper.utils import mkalphanum, unsigned_to_signed -from boa.vm.fast_mem import FastMem -from boa.vm.utils import to_bytes, to_int, ceil32 from boa.util.lrudict import lrudict - -from eth_hash.auto import keccak - +from boa.vm.fast_mem import FastMem +from boa.vm.utils import ceil32, to_bytes, to_int _keccak_cache = lrudict(256) + def keccak256(x): return _keccak_cache.setdefault_lambda(x, keccak) @@ -89,8 +90,7 @@ def __post_init__(self): def local_vars(self): return self.frames[-1].slots - - def freshvar(self, name = ""): + def freshvar(self, name=""): self.var_id += 1 return f"var_{name}_{self.var_id}" @@ -206,12 +206,14 @@ def compile(self, out=None, out_typ=None): self.builder.append(f"# {self.name}") if not self._is_static: - self.builder.extend(f""" + self.builder.extend( + f""" if VM.msg.is_static: raise WriteProtection( "Cannot modify state while inside of a STATICCALL context" ) - """) + """ + ) res = self._compile(*argnames) @@ -424,8 +426,6 @@ def _compile(self, x, y): return f"_wrap256({self._funcname}(_as_signed({x}), _as_signed({y})))" - - # for binops, just use routines from vyper optimizer for opname, (op, _, unsigned) in vyper.ir.optimizer.arith.items(): base = UnsignedBinopExecutor if unsigned else SignedBinopExecutor @@ -437,38 +437,40 @@ def _compile(self, x, y): @executor class Shr(IRExecutor): _name = "shr" - _sig = (int,int) + _sig = (int, int) _type: type = int def _compile(self, bits, val): return f"{val} >> {bits}" + @executor class Sar(IRExecutor): _name = "sar" - _sig = (int,int) + _sig = (int, int) _type: type = int def _compile(self, bits, val): # wrap256 to get back into unsigned land return f"_wrap256(_as_signed({val}) >> {bits})" + @executor class Shl(IRExecutor): _name = "shl" - _sig = (int,int) + _sig = (int, int) _type: type = int def _compile(self, bits, val): return f"{val} >> {bits}" - @executor class Select(IRExecutor): _name = "select" _sig = (int, StackItem, StackItem) _type: type = StackItem + def _compile(self, test, x, y): return f"{x} if {test} else {y}" @@ -495,6 +497,7 @@ class CalldataSize(IRExecutor): def _compile(self): return f"len(VM.msg.data)" + @executor class CallValue(IRExecutor): _name = "callvalue" @@ -515,7 +518,8 @@ def _compile(self, ptr): self.builder.extend( f""" val = bytes(VM.msg.data[{ptr} : {ptr} + 32]) - """) + """ + ) return f"val.ljust(32, {_NULL_BYTE})" @@ -533,7 +537,7 @@ def _compile(self, dst, src, size): VM.extend_memory({dst}, {size}) VM.memory_write({dst}, {size}, val) """ - ) + ) @executor @@ -626,7 +630,7 @@ def _compile(self, slot): class SStore(IRExecutor): _name = "sstore" _is_static = False - _sig = (int,int) + _sig = (int, int) _type = int def _compile(self, slot, value): @@ -640,11 +644,13 @@ class Sha3_64(IRExecutor): # we need to trace for downstream to reverse engineer mappings def _compile(self, arg1, arg2): - self.builder.extend(f""" + self.builder.extend( + f""" preimage = {arg1}.rjust(32, {_NULL_BYTE}) + {arg2}.rjust(32, {_NULL_BYTE}) image = keccak256(preimage) VM.env._trace_sha3_preimage(preimage, image) - """) + """ + ) return "image" @@ -654,12 +660,43 @@ class Sha3_32(IRExecutor): _sig = (bytes,) def _compile(self, arg): - self.builder.extend(f""" + self.builder.extend( + f""" preimage = {arg}.rjust(32, {_NULL_BYTE}) - """) + """ + ) return "keccak256(preimage)" +class _LogN(IRExecutor): + @cached_property + def _argnames(self): + return ("ofst", "size") + tuple(f"log_arg{i}" for i in range(self.N)) + + @cached_property + def _sig(self): + return (int, int) + tuple(int for _ in range(self.N)) + + def _compile(self, ofst, size, *topics): + self.builder.extend( + f""" + VM.extend_memory({ofst}, {size}) + log_data = VM.memory_read_bytes({ofst}, {size}) + VM.add_log_entry( + account=VM.msg.storage_address, + topics=({", ".join(topics)}), + data=log_data, + ) + """ + ) + + +# generate log0..log4 +for i in (0, 1, 2, 3, 4): + opname = f"log{i}" + _executors[opname] = type(opname.capitalize(), (_LogN,), {"N": i, "_name": opname}) + + @executor class Ceil32(IRExecutor): _name = "ceil32" @@ -667,6 +704,7 @@ class Ceil32(IRExecutor): _type: type = int def _compile(self, x): + _ = ceil32 # typing hint return f"ceil32({x})" @@ -717,12 +755,10 @@ def compile(self, out=None): start.compile(startname, out_typ=int) rounds.compile(roundsname, out_typ=int) - rounds_bound = rounds_bound._int_value - - end = f"{start} + {rounds}" + end = f"{startname} + {roundsname}" - self.builder.append("assert {rounds} <= rounds_bound") - with self.builder.block(f"for {i_var.out_name} in range({start}, {end})"): + self.builder.append(f"assert {roundsname} <= {rounds_bound}") + with self.builder.block(f"for {i_var.out_name} in range({startname}, {end})"): body.compile() def analyze(self): @@ -781,7 +817,7 @@ def _compile(self, test): @executor class _IRRevert(IRExecutor): _name = "revert" - _sig = (int,int) + _sig = (int, int) def _compile(self, ptr, size): self.builder.extend( @@ -793,10 +829,11 @@ def _compile(self, ptr, size): """ ) + @executor class Return(IRExecutor): _name = "return" - _sig = (int,int) + _sig = (int, int) def _compile(self, ptr, size): self.builder.extend( @@ -806,6 +843,7 @@ def _compile(self, ptr, size): """ ) + @executor class Stop(IRExecutor): _name = "stop" @@ -819,8 +857,6 @@ def _compile(self): ) - - @executor class VarList(IRExecutor): _name = "var_list" From 4068cf301de9eaf749dbef739e6e11ef9dcef83e Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 21 Jun 2023 17:03:32 -0700 Subject: [PATCH 036/122] propagate IR error messages --- boa/vyper/ir_executor.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index e73c820e..1250045e 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -1028,11 +1028,13 @@ def compile(self, **kwargs): val.compile(out=variable.out_name, out_typ=int) -def _ensure_source_pos(ir_node, source_pos=None): +def _ensure_source_pos(ir_node, source_pos=None, error_msg=None): if ir_node.source_pos is None: ir_node.source_pos = source_pos + if ir_node.error_msg is None: + ir_node.error_msg = error_msg for arg in ir_node.args: - _ensure_source_pos(arg, ir_node.source_pos) + _ensure_source_pos(arg, ir_node.source_pos, ir_node.error_msg) def executor_from_ir(ir_node, vyper_compiler_data) -> Any: From 1aeb0e148349c57668c274a290ca3b1d0d63747c Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 21 Jun 2023 17:10:45 -0700 Subject: [PATCH 037/122] fix some address/typing issues --- boa/environment.py | 22 +++++++++++---------- boa/vyper/contract.py | 45 +++++++++++++++++++++---------------------- 2 files changed, 34 insertions(+), 33 deletions(-) diff --git a/boa/environment.py b/boa/environment.py index 921c0210..594c31c4 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -95,11 +95,11 @@ def anchor(self): # XXX: inherit from bytes directly so that we can pass it to py-evm? -class Address: # (PYEVM_Address): +class Address(str): # (PYEVM_Address): # converting between checksum and canonical addresses is a hotspot; # this class contains both and caches recently seen conversions - __slots__ = "checksum_address", "canonical_address", "normalized_address" + __slots__ = ("canonical_address",) _cache = lrudict(1024) def __new__(cls, address): @@ -111,19 +111,21 @@ def __new__(cls, address): except KeyError: pass - self = super().__new__(cls) - self.checksum_address = to_checksum_address(address) + checksum_address = to_checksum_address(address) + self = super().__new__(cls, checksum_address) self.canonical_address = to_canonical_address(address) - self.normalized_address = self.checksum_address.lower() cls._cache[address] = self return self + #def __hash__(self): + # return hash(self.checksum_address) + + #def __eq__(self, other): + # return super().__eq__(self, other) + def __repr__(self): return f"_Address({self.normalized_address})" - def __str__(self): - return self.checksum_address - # make mypy happy _AddressType = Address | str | bytes | PYEVM_Address @@ -453,11 +455,11 @@ def reset_gas_metering_behavior(self) -> None: # set balance of address in py-evm def set_balance(self, addr, value): - self.vm.state.set_balance(Address(addr), value) + self.vm.state.set_balance(Address(addr).canonical_address, value) # get balance of address in py-evm def get_balance(self, addr): - return self.vm.state.get_balance(Address(addr)) + return self.vm.state.get_balance(Address(addr).canonical_address) def register_contract(self, address, obj): addr = Address(address).canonical_address diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index 5f0af76c..20027f92 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -17,7 +17,6 @@ from eth.codecs import abi from eth.exceptions import VMError from eth_typing import Address -from eth_utils import to_canonical_address, to_checksum_address from vyper.ast.utils import parse_to_ast from vyper.codegen.core import calculate_type_for_external_return from vyper.codegen.function_definitions import generate_ir_for_function @@ -109,6 +108,10 @@ def __init__(self, compiler_data, env=None, filename=None): self.filename = filename + @property + def address(self): + return self._address + # create a blueprint for use with `create_from_blueprint`. # uses a ERC5202 preamble, when calling `create_from_blueprint` will @@ -141,7 +144,7 @@ def __init__( bytecode=deploy_bytecode, override_address=override_address ) - self.address = Address(addr) + self._address = Address(addr) self.env.register_blueprint(compiler_data.bytecode, self) @@ -364,7 +367,7 @@ def setpath(lens, path, val): class VarModel: def __init__(self, contract, slot, typ): self.contract = contract - self.addr = to_canonical_address(self.contract.address) + self.addr = self.contract._address.canonical_address self.accountdb = contract.env.vm.state._account_db self.slot = slot self.typ = typ @@ -469,9 +472,9 @@ def __init__( self._ctor = VyperFunction(external_fns.pop("__init__"), self) if skip_initcode: - self.address = to_checksum_address(override_address) + self._address = Address(override_address) else: - self.address = self._run_init(*args, override_address=override_address) + self._address = self._run_init(*args, override_address=override_address) for fn_name, fn in external_fns.items(): setattr(self, fn_name, VyperFunction(fn, self)) @@ -489,7 +492,7 @@ def __init__( self._source_map = None self._computation = None - self.env.register_contract(self.address, self) + self.env.register_contract(self._address, self) def _run_init(self, *args, override_address=None): encoded_args = b"" @@ -500,7 +503,7 @@ def _run_init(self, *args, override_address=None): addr, self.bytecode = self.env.deploy_code( bytecode=initcode, override_address=override_address ) - return to_checksum_address(addr) + return Address(addr) # manually set the runtime bytecode, instead of using deploy def _set_bytecode(self, bytecode: bytes) -> None: @@ -516,7 +519,7 @@ def _set_bytecode(self, bytecode: bytes) -> None: def __repr__(self): ret = ( - f"<{self.compiler_data.contract_name} at {to_checksum_address(self.address)}, " + f"<{self.compiler_data.contract_name} at {self.address}, " f"compiled with vyper-{vyper.__version__}+{vyper.__commit__}>" ) @@ -587,7 +590,7 @@ def find_error_meta(self, computation): if hasattr(computation, "vyper_error_msg"): return computation.vyper_error_msg - code_stream = computation.code_stream + code_stream = computation.code error_map = self.source_map.get("error_map", {}) for pc in reversed(code_stream._trace): if pc in error_map: @@ -598,7 +601,7 @@ def find_source_of(self, computation, is_initcode=False): if hasattr(computation, "vyper_source_pos"): return self.ast_map.get(computation.vyper_source_pos) - code_stream = computation.code_stream + code_stream = computation.code pc_map = self.source_map["pc_pos_map"] for pc in reversed(code_stream._trace): if pc in pc_map and pc_map[pc] in self.ast_map: @@ -643,7 +646,7 @@ def event_for(self): def decode_log(self, e): log_id, address, topics, data = e - assert to_canonical_address(self.address) == address + assert self._address.canonical_address == address event_hash = topics[0] event_t = self.event_for[event_hash] @@ -667,7 +670,7 @@ def decode_log(self, e): args = abi.decode(tuple_typ.abi_type.selector_name(), data) - return Event(log_id, self.address, event_t, decoded_topics, args) + return Event(log_id, self._address, event_t, decoded_topics, args) def marshal_to_python(self, computation, vyper_typ): self._computation = computation # for further inspection @@ -762,10 +765,6 @@ def override_vyper_namespace(self): finally: self._vyper_namespace["self"].typ.members.pop("__boa_debug__", None) - @cached_property - def checksum_address(self): - return self.address.checksum_address - # for eval(), we need unoptimized assembly, since the dead code # eliminator might prune a dead function (which we want to eval) @cached_property @@ -825,7 +824,7 @@ def eval( with self._anchor_source_map(source_map): method_id = b"dbug" # note dummy method id, doesn't get validated c = self.env.execute_code( - to_address=self.address, + to_address=self._address, sender=sender, data=method_id, value=value, @@ -934,7 +933,7 @@ def _prepare_calldata(self, *args, **kwargs): total_non_base_args = len(kwargs) + len(args) - n_pos_args - args = [getattr(arg, "checksum_address", arg) for arg in args] + args = [getattr(arg, "address", arg) for arg in args] method_id, args_abi_type = self.args_abi_type(total_non_base_args) encoded_args = abi.encode(args_abi_type, args) @@ -949,7 +948,7 @@ def __call__(self, *args, value=0, gas=None, sender=None, **kwargs): override_bytecode = getattr(self, "override_bytecode", None) with self.contract._anchor_source_map(self._source_map): computation = self.env.execute_code( - to_address=self.contract.address, + to_address=self.contract._address, sender=sender, data=calldata_bytes, value=value, @@ -1006,7 +1005,7 @@ def __init__(self, name, functions, events, address, created_from=None, env=None for func_t in self._functions: setattr(self, func_t.name, ABIFunction(func_t, self)) - self.address = to_checksum_address(address) + self._address = Address(address) self.created_from = created_from self._source_map = {"pc_pos_map": {}} # override @@ -1033,7 +1032,7 @@ def deployer(self): return ABIContractFactory(self._name, self._functions, self._events) def __repr__(self): - ret = f"<{self._name} interface at {to_checksum_address(self.address)}>" + ret = f"<{self._name} interface at {self.address}>" if self.created_from is not None: ret += f" (created by {self.created_from})" @@ -1079,11 +1078,11 @@ def from_abi_dict(cls, abi, name=None): return cls(name, functions, events) def at(self, address) -> ABIContract: - address = to_checksum_address(address) + address = Address(address) ret = ABIContract(self._name, self._functions, self._events, address) - bytecode = ret.env.vm.state.get_code(to_canonical_address(address)) + bytecode = ret.env.vm.state.get_code(address.canonical_address) if bytecode == b"": warnings.warn( "requested {ret} but there is no bytecode at that address!", From c168aca2902684e87d22e46cc31123ed0ef2d484 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 21 Jun 2023 17:53:29 -0700 Subject: [PATCH 038/122] change the api a bit --- boa/environment.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/boa/environment.py b/boa/environment.py index 594c31c4..bde1cfb1 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -272,10 +272,6 @@ def __call__(self, computation): # ### End section: sha3 tracing -_SLOW = 0 -_FAST = 1 -_LUDICROUS = 2 - # py-evm uses class instantiaters which need to be classes # instead of like factories or other easier to use architectures - @@ -337,7 +333,7 @@ def apply_create_message(cls, state, msg, tx_ctx): def apply_computation(cls, state, msg, tx_ctx): addr = msg.code_address contract = cls.env._lookup_contract_fast(addr) if addr else None - if contract is None or cls.env._speed == _SLOW: + if contract is None or not cls.env._enable_fast_mode: # print("REGULAR MODE") return super().apply_computation(state, msg, tx_ctx) @@ -357,7 +353,7 @@ class Env: _singleton = None _initial_address_counter = 100 _coverage_enabled = False - _speed = _SLOW + _enable_fast_mode = False def __init__(self): self.chain = _make_chain() From c02a858b682b88f0bf4730af366cd3bd75d7142a Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 21 Jun 2023 18:12:30 -0700 Subject: [PATCH 039/122] fix exit_to, add cleanup_repeat and break --- boa/vyper/ir_executor.py | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index 1250045e..29380d63 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -423,7 +423,7 @@ def _compile(self, x, y): class SignedBinopExecutor(UnsignedBinopExecutor): def _compile(self, x, y): - return f"_wrap256({self._funcname}(_as_signed({x}), _as_signed({y})))" + return f"_wrap256({self.funcname}(_as_signed({x}), _as_signed({y})))" # for binops, just use routines from vyper optimizer @@ -932,10 +932,43 @@ def _compile(self, *args): @executor class ExitTo(Goto): - # exit_to and goto have pretty much the same semantics as far as we - # are concerned here. + # exit_to is similar but it is known to end execution of this subroutine _name = "exit_to" + def _compile(self, *args): + subroutine_call = super()._compile(*args) + return f"return {subroutine_call}" + +@executor +class CleanupRepeat(IRExecutor): + # a no-op from our perspective + _name = "cleanup_repeat" + _sig = () + _argnames = () + + def _compile(self): + self.builder.append("pass") + + +@executor +class Break(IRExecutor): + _name = "break" + _sig = () + _argnames = () + + def _compile(self): + self.builder.append("break") + + +@executor +class Continue(IRExecutor): + _name = "continue" + _sig = () + _argnames = () + + def _compile(self): + self.builder.append("continue") + @executor class Label(IRExecutor): From 3a38e6da4568a22c00c244136db3d33a81be5cf7 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 21 Jun 2023 18:52:51 -0700 Subject: [PATCH 040/122] skip computation.extend_memory saves about 5% --- boa/vyper/ir_executor.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index 29380d63..4b2b6328 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -534,7 +534,7 @@ def _compile(self, dst, src, size): val = bytes(VM.msg.data[{src} : {src} + {size}]) val = val.ljust({size}, {_NULL_BYTE}) - VM.extend_memory({dst}, {size}) + VM._memory.extend({dst}, {size}) VM.memory_write({dst}, {size}, val) """ ) @@ -604,7 +604,7 @@ def _compile(self, dst, src, size): self.builder.extend( f""" code_start_position = {src} - {self.immutables_size} + len(VM.code) - VM.extend_memory({dst}, {size}) + VM._memory.extend({dst}, {size}) with VM.code.seek(code_start_position): code_bytes = VM.code.read({size}) @@ -680,7 +680,7 @@ def _sig(self): def _compile(self, ofst, size, *topics): self.builder.extend( f""" - VM.extend_memory({ofst}, {size}) + VM._memory.extend({ofst}, {size}) log_data = VM.memory_read_bytes({ofst}, {size}) VM.add_log_entry( account=VM.msg.storage_address, From 23d4bb27985428a77be6ece7e72e1c62f0159010 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 21 Jun 2023 19:03:47 -0700 Subject: [PATCH 041/122] fix perf issue in writeback saves about 5% --- boa/vm/fast_mem.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/boa/vm/fast_mem.py b/boa/vm/fast_mem.py index 03901f3f..30ffa79b 100644 --- a/boa/vm/fast_mem.py +++ b/boa/vm/fast_mem.py @@ -53,7 +53,9 @@ def _writeback(self, start_position, size): end = ceil32(start_position + size) // 32 for ix in range(start, end): if self.needs_writeback[ix]: - super().write(ix * 32, 32, to_bytes(self.mem_cache[ix])) + word = self.mem_cache[ix] + assert ix + 32 <= len(self._bytes) + self._bytes[ix * 32 : ix * 32 + 32] = to_bytes(word) self.needs_writeback[ix] = False def read_bytes(self, start_position, size): @@ -78,4 +80,5 @@ def write(self, start_position, size, value): end = (start_position + size + 31) // 32 for i in range(start, end): self.mem_cache[i] = self._DIRTY + super().write(start_position, size, value) From 26a770b11252cddd4a20039f7c432cdfc3504708 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 22 Jun 2023 12:13:12 -0700 Subject: [PATCH 042/122] fix shl implementation --- boa/vyper/ir_executor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index 4b2b6328..1aaabb2c 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -462,7 +462,7 @@ class Shl(IRExecutor): _type: type = int def _compile(self, bits, val): - return f"{val} >> {bits}" + return f"_wrap256({val} << {bits})" @executor From bd9bb72b6a9b2c5d324b367e44df30b38b65020d Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 22 Jun 2023 13:15:05 -0700 Subject: [PATCH 043/122] cache abi encoder schema parsing, saves 5-6% --- boa/environment.py | 6 +++--- boa/precompile.py | 10 +++++----- boa/vyper/contract.py | 15 +++++++++------ 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/boa/environment.py b/boa/environment.py index bde1cfb1..da8ab9af 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -13,7 +13,6 @@ import eth.vm.forks.spurious_dragon.computation as spurious_dragon from eth._utils.address import generate_contract_address from eth.chains.mainnet import MainnetChain -from eth.codecs import abi from eth.db.atomic import AtomicDB from eth.exceptions import Halt, VMError from eth.vm.code_stream import CodeStream @@ -25,6 +24,7 @@ from boa.util.eip1167 import extract_eip1167_address, is_eip1167_contract from boa.util.lrudict import lrudict +from boa.util.abi import abi_decode from boa.vm.fast_mem import FastMem from boa.vm.fork import AccountDBFork from boa.vm.gas_meters import GasMeter, NoGasMeter, ProfilingGasMeter @@ -170,8 +170,8 @@ def deregister_raw_precompile(address, force=True): def console_log(computation): msgdata = computation.msg.data_as_bytes - schema, payload = abi.decode("(string,bytes)", msgdata[4:]) - data = abi.decode(schema, payload) + schema, payload = abi_decode("(string,bytes)", msgdata[4:]) + data = abi_decode(schema, payload) print(*data, file=sys.stderr) return computation diff --git a/boa/precompile.py b/boa/precompile.py index 9ebaea87..b2529c7c 100644 --- a/boa/precompile.py +++ b/boa/precompile.py @@ -1,12 +1,11 @@ from typing import Any -from eth.codecs import abi from vyper.ast import parse_to_ast from vyper.builtins._signatures import BuiltinFunction from vyper.builtins.functions import ( DISPATCH_TABLE, STMT_DISPATCH_TABLE, - abi_encode, + abi_encode as abi_encode_ir, ir_tuple_from_args, process_inputs, ) @@ -19,6 +18,7 @@ from vyper.utils import keccak256 from boa.environment import register_raw_precompile +from boa.util.abi import abi_encode, abi_decode class PrecompileBuiltin(BuiltinFunction): @@ -44,7 +44,7 @@ def build_IR(self, expr, args, kwargs, context): ret = ["seq"] # store abi-encoded argument at buf - args_len = abi_encode( + args_len = abi_encode_ir( args_buf, args_as_tuple, context, args_abi_t.size_bound(), returns_len=True ) ret_len = self._return_type.abi_type.size_bound() @@ -71,7 +71,7 @@ def decorator(func): def wrapper(computation): # Decode input arguments from message data msg_data = computation.msg.data_as_bytes - arg_values = abi.decode(args_t.abi_type.selector_name(), msg_data) + arg_values = abi_decode(args_t.abi_type.selector_name(), msg_data) # Call the original function with decoded input arguments res = func(*arg_values) @@ -85,7 +85,7 @@ def wrapper(computation): return_t = TupleT((return_t,)) ret_abi_t = return_t.abi_type.selector_name() - computation.output = abi.encode(ret_abi_t, res) + computation.output = abi_encode(ret_abi_t, res) return computation diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index 20027f92..8bef9184 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -14,7 +14,7 @@ import vyper.ir.compile_ir as compile_ir import vyper.semantics.analysis as analysis import vyper.semantics.namespace as vy_ns -from eth.codecs import abi + from eth.exceptions import VMError from eth_typing import Address from vyper.ast.utils import parse_to_ast @@ -35,6 +35,7 @@ from boa.profiling import LineProfile, cache_gas_used_for_computation from boa.util.exceptions import strip_internal_frames from boa.util.lrudict import lrudict +from boa.util.abi import abi_encode, abi_decode from boa.vm.gas_meters import ProfilingGasMeter from boa.vm.utils import to_bytes, to_int from boa.vyper import _METHOD_ID_VAR @@ -222,7 +223,7 @@ def pretty_vm_reason(self): # decode error msg if it's "Error(string)" # b"\x08\xc3y\xa0" == method_id("Error(string)") if isinstance(err.args[0], bytes) and err.args[0][:4] == b"\x08\xc3y\xa0": - return abi.decode("(string)", err.args[0][4:])[0] + return abi_decode("(string)", err.args[0][4:])[0] return repr(err) @@ -663,12 +664,12 @@ def decode_log(self, e): # convert to bytes for abi decoder encoded_topic = t.to_bytes(32, "big") decoded_topics.append( - abi.decode(typ.abi_type.selector_name(), encoded_topic) + abi_decode(typ.abi_type.selector_name(), encoded_topic) ) tuple_typ = TupleT(arg_typs) - args = abi.decode(tuple_typ.abi_type.selector_name(), data) + args = abi_decode(tuple_typ.abi_type.selector_name(), data) return Event(log_id, self._address, event_t, decoded_topics, args) @@ -687,7 +688,7 @@ def marshal_to_python(self, computation, vyper_typ): return None return_typ = calculate_type_for_external_return(vyper_typ) - ret = abi.decode(return_typ.abi_type.selector_name(), computation.output) + ret = abi_decode(return_typ.abi_type.selector_name(), computation.output) # unwrap the tuple if needed if not isinstance(vyper_typ, TupleT): @@ -900,6 +901,7 @@ def bytecode(self): def args_abi_type(self, num_kwargs): if not hasattr(self, "_signature_cache"): self._signature_cache = {} + if num_kwargs in self._signature_cache: return self._signature_cache[num_kwargs] @@ -910,6 +912,7 @@ def args_abi_type(self, num_kwargs): "(" + ",".join(arg.typ.abi_type.selector_name() for arg in sig_args) + ")" ) abi_sig = self.func_t.name + args_abi_type + _method_id = method_id(abi_sig) self._signature_cache[num_kwargs] = (_method_id, args_abi_type) @@ -936,7 +939,7 @@ def _prepare_calldata(self, *args, **kwargs): args = [getattr(arg, "address", arg) for arg in args] method_id, args_abi_type = self.args_abi_type(total_non_base_args) - encoded_args = abi.encode(args_abi_type, args) + encoded_args = abi_encode(args_abi_type, args) if self.func_t.is_constructor or self.func_t.is_fallback: return encoded_args From ccadd630c603f68b3096248d3cf8f7c8c2097a49 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 22 Jun 2023 13:50:00 -0700 Subject: [PATCH 044/122] fix log0 --- boa/environment.py | 1 + boa/vyper/ir_executor.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/boa/environment.py b/boa/environment.py index da8ab9af..04a8932d 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -283,6 +283,7 @@ def __init__(self, *args, **kwargs): # super() hardcodes CodeStream into the ctor # so we have to override it here super().__init__(*args, **kwargs) + self.code = TracingCodeStream( self.code._raw_code_bytes, fake_codesize=getattr(self.msg, "_fake_codesize", None), diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index 1aaabb2c..d90ef0a8 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -678,13 +678,15 @@ def _sig(self): return (int, int) + tuple(int for _ in range(self.N)) def _compile(self, ofst, size, *topics): + # write out tuple strings correctly, always need trailing comma + topics = [f"{topic}," for topic in topics] self.builder.extend( f""" VM._memory.extend({ofst}, {size}) log_data = VM.memory_read_bytes({ofst}, {size}) VM.add_log_entry( account=VM.msg.storage_address, - topics=({", ".join(topics)}), + topics=({" ".join(topics)}), data=log_data, ) """ From dd8225a1b16f0233a352a3c14b5796e8e8b68a34 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 22 Jun 2023 13:51:55 -0700 Subject: [PATCH 045/122] fix repeat --- boa/vyper/ir_executor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index d90ef0a8..91cb676e 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -749,7 +749,7 @@ def compile(self, out=None, out_typ=None): class Repeat(IRExecutor): _name = "repeat" - def compile(self, out=None): + def compile(self, out=None, out_typ=None): i_var, start, rounds, rounds_bound, body = self.args startname = self.compile_ctx.freshvar("start") From 58968a63f2779fb8f3315875e9a5c9fa38330ea6 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 22 Jun 2023 13:59:33 -0700 Subject: [PATCH 046/122] fix fastmem writeback --- boa/vm/fast_mem.py | 1 + 1 file changed, 1 insertion(+) diff --git a/boa/vm/fast_mem.py b/boa/vm/fast_mem.py index 30ffa79b..c63033c1 100644 --- a/boa/vm/fast_mem.py +++ b/boa/vm/fast_mem.py @@ -80,5 +80,6 @@ def write(self, start_position, size, value): end = (start_position + size + 31) // 32 for i in range(start, end): self.mem_cache[i] = self._DIRTY + self.needs_writeback[i] = False super().write(start_position, size, value) From 2877ca0244d64b4fb579115d4309ae6598f9d9c2 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 22 Jun 2023 14:01:18 -0700 Subject: [PATCH 047/122] fix Address.__repr__ --- boa/environment.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/boa/environment.py b/boa/environment.py index 04a8932d..6b8e84b9 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -124,7 +124,8 @@ def __new__(cls, address): # return super().__eq__(self, other) def __repr__(self): - return f"_Address({self.normalized_address})" + checksum_addr = super().__repr__() + return f"_Address({checksum_addr})" # make mypy happy From dd4da06bb96e5bb7abc274bfaefb128693e28a0d Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 22 Jun 2023 20:14:55 -0700 Subject: [PATCH 048/122] forgot to commit abi util file, should squash with bd9bb72b6a9b2c5d324b367e44df30b38b65020d --- boa/util/abi.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 boa/util/abi.py diff --git a/boa/util/abi.py b/boa/util/abi.py new file mode 100644 index 00000000..4fc79067 --- /dev/null +++ b/boa/util/abi.py @@ -0,0 +1,23 @@ +# wrapper module around whatever encoder we are using + +from eth.codecs.abi.decoder import Decoder +from eth.codecs.abi.encoder import Encoder +from eth.codecs.abi.parser import Parser + +_parsers = {} + + +def _get_parser(schema): + try: + return _parsers[schema] + except KeyError: + _parsers[schema] = (ret := Parser.parse(schema)) + return ret + + +def abi_encode(schema, data): + return Encoder.encode(_get_parser(schema), data) + + +def abi_decode(schema, data): + return Decoder.decode(_get_parser(schema), data) From 5fcba4317dd0768ca5c1024391e60d54e9b6a280 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 23 Jun 2023 12:40:45 -0700 Subject: [PATCH 049/122] fix unaligned writes when writeback is needed --- boa/environment.py | 2 +- boa/vm/fast_mem.py | 13 +++++++------ boa/vyper/ir_executor.py | 2 +- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/boa/environment.py b/boa/environment.py index 6b8e84b9..5181d06b 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -342,7 +342,7 @@ def apply_computation(cls, state, msg, tx_ctx): err = None with cls(state, msg, tx_ctx) as computation: try: - # print("LUDICROUS MODE") + # print("FAST MODE") contract.ir_executor.exec(computation) except Halt: pass diff --git a/boa/vm/fast_mem.py b/boa/vm/fast_mem.py index c63033c1..a8dc377f 100644 --- a/boa/vm/fast_mem.py +++ b/boa/vm/fast_mem.py @@ -69,17 +69,18 @@ def read(self, start_position, size): def write_word(self, start_position, int_val): if start_position % 32 == 0: self.mem_cache[start_position // 32] = int_val - - self.needs_writeback[start_position // 32] = True - - # bypass cache dirtying - # super().write(start_position, 32, to_bytes(int_val)) + self.needs_writeback[start_position // 32] = True + else: + self.write(start_position, 32, to_bytes(int_val)) def write(self, start_position, size, value): start = start_position // 32 end = (start_position + size + 31) // 32 + + # need to write back, in case this is not an aligned write. + self._writeback(start_position, size) for i in range(start, end): self.mem_cache[i] = self._DIRTY - self.needs_writeback[i] = False + assert self.needs_writeback[i] == False super().write(start_position, size, value) diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index 91cb676e..b8e5167c 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -827,7 +827,7 @@ def _compile(self, ptr, size): VM.output = VM.memory_read_bytes({ptr}, {size}) VM.vyper_source_pos = {repr(self.ir_node.source_pos)} VM.vyper_error_msg = {repr(self.ir_node.error_msg)} - raise VMRevert("") # venom revert + raise VMRevert(VM.output) # venom revert """ ) From 6d2e8d8a9d62958a581e20b9675e8f774db1de74 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 29 Jun 2023 17:15:37 -0700 Subject: [PATCH 050/122] use latest vyper calling convention code --- boa/vyper/ir_executor.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index b8e5167c..2674a393 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -870,25 +870,22 @@ class Goto(IRExecutor): def analyze(self): self.label = self.args[0]._str_value - # exit_to labels weird, fixed in GH vyper#3488 - if self.label.startswith("_sym_"): - self.label = self.label[len("_sym_") :] # just get the parameters, leaving the label in self.args # messes with downstream machinery which tries to analyze the label. runtime_args = [] for arg in self.args[1:]: if isinstance(arg, StringExecutor): - argval = arg._str_value - # GH vyper#3488 - if argval == "return_pc": - continue - # calling convention wants to push the return pc since evm - # has no subroutines, we are using python function call - # machinery so we don't need to worry about that. - if argval.startswith("_sym_"): + if arg._str_value == "return_pc": + # we don't need to deal with return pc on the way out. continue + argval = arg._str_value + + if isinstance(arg, Symbol): + # we don't need to push the return pc on the way in. + continue + runtime_args.append(arg.analyze()) self.args = runtime_args @@ -1010,6 +1007,12 @@ def compile_func(self): self.builder.append("VM = CTX.computation") body.compile() +@executor +class Symbol(IRExecutor): + # in IR, a "symbol" is a label which needs to be pushed to the + # stack for the calling convention. + _name = "symbol" + @executor class UniqueSymbol(IRExecutor): From 144cb339a5bb1c3471723a5f588425f7be6dc3a4 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 30 Jun 2023 11:37:21 -0700 Subject: [PATCH 051/122] don't allow IR executor for <0.3.10 vyper due to IR fixes in vyper/c1f0bd5a87e2f7fa10. --- boa/vyper/ir_executor.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index 2674a393..4ab382ef 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -1076,6 +1076,10 @@ def _ensure_source_pos(ir_node, source_pos=None, error_msg=None): def executor_from_ir(ir_node, vyper_compiler_data) -> Any: + import vyper.version + if vyper.version.__version_tuple__ < (0, 3, 10): + raise RuntimeError("IR executor requires vyper 0.3.10 or above") + _ensure_source_pos(ir_node) ret = _executor_from_ir(ir_node, CompileContext(vyper_compiler_data)) From 014152b30a84f2aaf7686716a956704768342779 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 30 Jun 2023 23:12:56 +0000 Subject: [PATCH 052/122] fix: wrap for signed integers in IR --- boa/vyper/ir_executor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index 4ab382ef..99618344 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -264,6 +264,7 @@ class IntExecutor(IRExecutor): def __post_init__(self): assert -(2**255) <= self._int_value < 2**256 + self._int_value = _wrap256(self._int_value) # wrap, could be negative self.args = self._sig = () def __repr__(self): From 22fbf5d5adc997069122cccacad517932e421dc2 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 1 Jul 2023 16:44:00 +0000 Subject: [PATCH 053/122] fix some lint --- boa/environment.py | 14 +++++++------- boa/precompile.py | 12 ++++-------- boa/util/abi.py | 2 +- boa/vm/fast_mem.py | 12 +----------- boa/vyper/contract.py | 2 +- boa/vyper/ir_executor.py | 34 +++++++++++++++++++++++----------- 6 files changed, 37 insertions(+), 39 deletions(-) diff --git a/boa/environment.py b/boa/environment.py index d147cc44..0b3ba626 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -6,7 +6,7 @@ import sys import traceback import warnings -from typing import Any, Iterator, Optional, Tuple, Union +from typing import Annotated, Any, Iterator, Optional, Tuple import eth.constants as constants import eth.tools.builder.chain as chain @@ -19,7 +19,7 @@ from eth.vm.message import Message from eth.vm.opcode_values import STOP from eth.vm.transaction_context import BaseTransactionContext -from eth_typing import Address as PYEVM_Address +from eth_typing import Address as PYEVM_Address # it's just bytes. from eth_utils import setup_DEBUG2_logging, to_canonical_address, to_checksum_address from boa.util.eip1167 import extract_eip1167_address, is_eip1167_contract @@ -98,10 +98,11 @@ def anchor(self): class Address(str): # (PYEVM_Address): # converting between checksum and canonical addresses is a hotspot; # this class contains both and caches recently seen conversions - __slots__ = ("canonical_address",) _cache = lrudict(1024) + canonical_address: Annotated[PYEVM_Address, "canonical address"] + def __new__(cls, address): if isinstance(address, Address): return address @@ -339,7 +340,6 @@ def apply_computation(cls, state, msg, tx_ctx): # print("REGULAR MODE") return super().apply_computation(state, msg, tx_ctx) - err = None with cls(state, msg, tx_ctx) as computation: try: # print("FAST MODE") @@ -548,11 +548,11 @@ def deploy_code( sender = self._get_sender(sender) if override_address is not None: - target_address = Address(override_address).canonical_address + target_address = Address(override_address) else: nonce = self.vm.state.get_nonce(sender) self.vm.state.increment_nonce(sender) - target_address = generate_contract_address(sender, nonce) + target_address = Address(generate_contract_address(sender, nonce)) msg = Message( to=constants.CREATE_CONTRACT_ADDRESS, # i.e., b"" @@ -560,7 +560,7 @@ def deploy_code( gas=gas, value=value, code=bytecode, - create_address=target_address, + create_address=target_address.canonical_address, data=b"", ) origin = sender # XXX: consider making this parametrizable diff --git a/boa/precompile.py b/boa/precompile.py index b2529c7c..7baaebb0 100644 --- a/boa/precompile.py +++ b/boa/precompile.py @@ -2,13 +2,9 @@ from vyper.ast import parse_to_ast from vyper.builtins._signatures import BuiltinFunction -from vyper.builtins.functions import ( - DISPATCH_TABLE, - STMT_DISPATCH_TABLE, - abi_encode as abi_encode_ir, - ir_tuple_from_args, - process_inputs, -) +from vyper.builtins.functions import DISPATCH_TABLE, STMT_DISPATCH_TABLE +from vyper.builtins.functions import abi_encode as abi_encode_ir +from vyper.builtins.functions import ir_tuple_from_args, process_inputs from vyper.codegen.core import IRnode, needs_external_call_wrap from vyper.evm.address_space import MEMORY from vyper.semantics.analysis.base import VarInfo @@ -18,7 +14,7 @@ from vyper.utils import keccak256 from boa.environment import register_raw_precompile -from boa.util.abi import abi_encode, abi_decode +from boa.util.abi import abi_decode, abi_encode class PrecompileBuiltin(BuiltinFunction): diff --git a/boa/util/abi.py b/boa/util/abi.py index 4fc79067..2ccd8c6b 100644 --- a/boa/util/abi.py +++ b/boa/util/abi.py @@ -4,7 +4,7 @@ from eth.codecs.abi.encoder import Encoder from eth.codecs.abi.parser import Parser -_parsers = {} +_parsers: dict[str, Parser] = {} def _get_parser(schema): diff --git a/boa/vm/fast_mem.py b/boa/vm/fast_mem.py index a8dc377f..84dd76c8 100644 --- a/boa/vm/fast_mem.py +++ b/boa/vm/fast_mem.py @@ -1,14 +1,4 @@ -import contextlib -import re -from dataclasses import dataclass, field -from functools import cached_property -from typing import Any, Optional - -import vyper.ir.optimizer -from eth.exceptions import Revert from eth.vm.memory import Memory -from vyper.evm.opcodes import OPCODES -from vyper.utils import unsigned_to_signed from boa.vm.utils import ceil32, to_bytes, to_int @@ -81,6 +71,6 @@ def write(self, start_position, size, value): self._writeback(start_position, size) for i in range(start, end): self.mem_cache[i] = self._DIRTY - assert self.needs_writeback[i] == False + assert self.needs_writeback[i] is False super().write(start_position, size, value) diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index 8bef9184..a3590e90 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -31,7 +31,7 @@ from vyper.semantics.types.function import ContractFunctionT from vyper.utils import method_id -from boa.environment import Address, Env, to_int +from boa.environment import Address, Env from boa.profiling import LineProfile, cache_gas_used_for_computation from boa.util.exceptions import strip_internal_frames from boa.util.lrudict import lrudict diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index 99618344..2c866710 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -206,8 +206,9 @@ def compile(self, out=None, out_typ=None): self.builder.append(f"# {self.name}") if not self._is_static: + _ = WriteProtection # make flake8 happy self.builder.extend( - f""" + """ if VM.msg.is_static: raise WriteProtection( "Cannot modify state while inside of a STATICCALL context" @@ -355,7 +356,7 @@ def from_mnemonic(cls, mnemonic): # an executor for evm opcodes which dispatches into py-evm class OpcodeIRExecutor(IRExecutor): - _type: type = StackItem + _type: type = StackItem # type: ignore def __init__(self, name, opcode_info, *args): self.opcode_info: OpcodeInfo = opcode_info @@ -470,7 +471,13 @@ def _compile(self, bits, val): class Select(IRExecutor): _name = "select" _sig = (int, StackItem, StackItem) - _type: type = StackItem + + @cached_property + def _type(self): + _, x, y = self.args + if x._type == y._type: + return x._type + return StackItem def _compile(self, test, x, y): return f"{x} if {test} else {y}" @@ -486,7 +493,7 @@ class Caller(IRExecutor): _type: type = bytes def _compile(self): - return f"VM.msg.sender" + return "VM.msg.sender" @executor @@ -496,7 +503,7 @@ class CalldataSize(IRExecutor): _type: type = int def _compile(self): - return f"len(VM.msg.data)" + return "len(VM.msg.data)" @executor @@ -506,7 +513,7 @@ class CallValue(IRExecutor): _type: type = int def _compile(self): - return f"VM.msg.value" + return "VM.msg.value" @executor @@ -635,7 +642,9 @@ class SStore(IRExecutor): _type = int def _compile(self, slot, value): - return f"""VM.state.set_storage(address=VM.msg.storage_address, slot={slot}, value={value})""" + return f""" + VM.state.set_storage(address=VM.msg.storage_address, slot={slot}, value={value}) + """.strip() @executor @@ -807,6 +816,7 @@ class Assert(IRExecutor): _sig = (int,) def _compile(self, test): + _ = VMRevert # make flake8 happy self.builder.extend( f""" if not bool({test}): @@ -839,6 +849,7 @@ class Return(IRExecutor): _sig = (int, int) def _compile(self, ptr, size): + _ = Halt # make flake8 happy self.builder.extend( f""" VM.output = VM.memory_read_bytes({ptr}, {size}) @@ -854,9 +865,9 @@ class Stop(IRExecutor): def _compile(self): self.builder.extend( - f""" + """ raise Halt("") # return - """ + """ ) @@ -881,8 +892,6 @@ def analyze(self): # we don't need to deal with return pc on the way out. continue - argval = arg._str_value - if isinstance(arg, Symbol): # we don't need to push the return pc on the way in. continue @@ -939,6 +948,7 @@ def _compile(self, *args): subroutine_call = super()._compile(*args) return f"return {subroutine_call}" + @executor class CleanupRepeat(IRExecutor): # a no-op from our perspective @@ -1008,6 +1018,7 @@ def compile_func(self): self.builder.append("VM = CTX.computation") body.compile() + @executor class Symbol(IRExecutor): # in IR, a "symbol" is a label which needs to be pushed to the @@ -1078,6 +1089,7 @@ def _ensure_source_pos(ir_node, source_pos=None, error_msg=None): def executor_from_ir(ir_node, vyper_compiler_data) -> Any: import vyper.version + if vyper.version.__version_tuple__ < (0, 3, 10): raise RuntimeError("IR executor requires vyper 0.3.10 or above") From b8c771dd1b93728449f881926380e4ce8b266132 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 1 Jul 2023 16:44:00 +0000 Subject: [PATCH 054/122] override ir_executor for dynamic code injection e.g. contract.eval(), InternalVyperFunction, InjectVyperFunction all override bytecode (and this provides a way to handle all of the above) --- boa/environment.py | 55 +++++++++++++++++++++++++++---------- boa/vyper/compiler_utils.py | 24 +++++++++++----- boa/vyper/contract.py | 50 +++++++++++++++++++++------------ 3 files changed, 89 insertions(+), 40 deletions(-) diff --git a/boa/environment.py b/boa/environment.py index 0b3ba626..d99d0339 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -4,7 +4,6 @@ import contextlib import logging import sys -import traceback import warnings from typing import Annotated, Any, Iterator, Optional, Tuple @@ -14,7 +13,7 @@ from eth._utils.address import generate_contract_address from eth.chains.mainnet import MainnetChain from eth.db.atomic import AtomicDB -from eth.exceptions import Halt, VMError +from eth.exceptions import Halt from eth.vm.code_stream import CodeStream from eth.vm.message import Message from eth.vm.opcode_values import STOP @@ -22,10 +21,9 @@ from eth_typing import Address as PYEVM_Address # it's just bytes. from eth_utils import setup_DEBUG2_logging, to_canonical_address, to_checksum_address +from boa.util.abi import abi_decode from boa.util.eip1167 import extract_eip1167_address, is_eip1167_contract from boa.util.lrudict import lrudict -from boa.util.abi import abi_decode -from boa.vm.fast_mem import FastMem from boa.vm.fork import AccountDBFork from boa.vm.gas_meters import GasMeter, NoGasMeter, ProfilingGasMeter from boa.vm.utils import to_bytes, to_int @@ -118,10 +116,10 @@ def __new__(cls, address): cls._cache[address] = self return self - #def __hash__(self): + # def __hash__(self): # return hash(self.checksum_address) - #def __eq__(self, other): + # def __eq__(self, other): # return super().__eq__(self, other) def __repr__(self): @@ -336,20 +334,48 @@ def apply_create_message(cls, state, msg, tx_ctx): def apply_computation(cls, state, msg, tx_ctx): addr = msg.code_address contract = cls.env._lookup_contract_fast(addr) if addr else None + #print("ENTER", Address(msg.code_address or bytes([0]*20)), contract) if contract is None or not cls.env._enable_fast_mode: - # print("REGULAR MODE") + #print("SLOW MODE") return super().apply_computation(state, msg, tx_ctx) with cls(state, msg, tx_ctx) as computation: try: - # print("FAST MODE") - contract.ir_executor.exec(computation) + if getattr(msg, "_ir_executor", None) is not None: + #print("MSG HAS IR EXECUTOR") + # this happens when bytecode is overridden, e.g. + # for injected functions. note ir_executor is (correctly) + # used for the outer computation only because on subcalls + # a clean message is constructed for the child computation + msg._ir_executor.exec(computation) + else: + #print("REGULAR FAST MODE") + contract.ir_executor.exec(computation) except Halt: pass return computation +# Message object with extra attrs we can use to thread things through +# the execution context. +class FakeMessage(Message): + def __init__( + self, + *args, + ir_executor=None, + fake_codesize=None, + start_pc=0, + contract=None, + **kwargs, + ): + super().__init__(*args, **kwargs) + self._ir_executor = ir_executor + self._fake_codesize = fake_codesize + self._start_pc = start_pc + self._contract = contract + + # wrapper class around py-evm which provides a "contract-centric" API class Env: _singleton = None @@ -582,6 +608,7 @@ def execute_code( value: int = 0, data: bytes = b"", override_bytecode: Optional[bytes] = None, + ir_executor: Any = None, is_modifying: bool = True, start_pc: int = 0, fake_codesize: Optional[int] = None, @@ -592,9 +619,6 @@ def execute_code( sender = self._get_sender(sender) - class FakeMessage(Message): # Message object with settable attrs - __dict__: dict = {} - to = Address(to_address).canonical_address bytecode = override_bytecode @@ -611,11 +635,12 @@ class FakeMessage(Message): # Message object with settable attrs code=bytecode, # type: ignore data=data, is_static=is_static, + fake_codesize=fake_codesize, + start_pc=start_pc, + ir_executor=ir_executor, + contract=contract, ) - msg._fake_codesize = fake_codesize # type: ignore - msg._start_pc = start_pc # type: ignore - msg._contract = contract # type: ignore origin = sender # XXX: consider making this parametrizable tx_ctx = BaseTransactionContext(origin=origin, gas_price=self.get_gas_price()) diff --git a/boa/vyper/compiler_utils.py b/boa/vyper/compiler_utils.py index 58eb8330..9b00193c 100644 --- a/boa/vyper/compiler_utils.py +++ b/boa/vyper/compiler_utils.py @@ -6,14 +6,15 @@ from vyper.codegen.function_definitions import generate_ir_for_function from vyper.codegen.ir_node import IRnode from vyper.exceptions import InvalidType -from vyper.ir import compile_ir as compile_ir +from vyper.ir import compile_ir, optimizer from vyper.semantics.analysis.utils import get_exact_type_from_node from vyper.utils import method_id_int from boa.vyper import _METHOD_ID_VAR +from boa.vyper.ir_executor import executor_from_ir -def _compile_vyper_function(vyper_function, contract): +def compile_vyper_function(vyper_function, contract): """Compiles a vyper function and appends it to the top of the IR of a contract. This is useful for vyper `eval` and internal functions, where the runtime bytecode must be changed to add more runtime functionality @@ -37,16 +38,25 @@ def _compile_vyper_function(vyper_function, contract): base_signature = func_t.abi_signature_for_kwargs([]) ir = IRnode.from_list(["with", _METHOD_ID_VAR, method_id_int(base_signature), ir]) - assembly = compile_ir.compile_to_assembly(ir, no_optimize=True) + ir = optimizer.optimize(ir) - # extend IR with contract's unoptimized assembly + # extend IR with contract's unoptimized assembly to avoid stripping + # labels at first (and then optimize all together) + assembly = compile_ir.compile_to_assembly(ir, no_optimize=True) assembly.extend(contract.unoptimized_assembly) compile_ir._optimize_assembly(assembly) bytecode, source_map = compile_ir.assembly_to_evm(assembly) bytecode += contract.data_section typ = func_t.return_type - return ast, bytecode, source_map, typ + # generate the IR executor + # first mush it with the rest of the IR in the contract to ensure + # all labels are present + ir = IRnode.from_list(["seq", ir, contract.compiler_data.ir_runtime]) + # now compile. + ir_executor = executor_from_ir(ir, compiler_data) + + return ast, ir_executor, bytecode, source_map, typ def generate_bytecode_for_internal_fn(fn): @@ -84,7 +94,7 @@ def __boa_private_{fn_name}__({fn_sig}){return_sig}: {fn_call} """ ) - return _compile_vyper_function(wrapper_code, contract)[1:] + return compile_vyper_function(wrapper_code, contract) def generate_bytecode_for_arbitrary_stmt(source_code, contract): @@ -115,4 +125,4 @@ def __boa_debug__() {return_sig}: {debug_body} """ ) - return _compile_vyper_function(wrapper_code, contract)[1:] + return compile_vyper_function(wrapper_code, contract) diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index a3590e90..d08316d8 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -14,9 +14,7 @@ import vyper.ir.compile_ir as compile_ir import vyper.semantics.analysis as analysis import vyper.semantics.namespace as vy_ns - from eth.exceptions import VMError -from eth_typing import Address from vyper.ast.utils import parse_to_ast from vyper.codegen.core import calculate_type_for_external_return from vyper.codegen.function_definitions import generate_ir_for_function @@ -33,15 +31,15 @@ from boa.environment import Address, Env from boa.profiling import LineProfile, cache_gas_used_for_computation +from boa.util.abi import abi_decode, abi_encode from boa.util.exceptions import strip_internal_frames from boa.util.lrudict import lrudict -from boa.util.abi import abi_encode, abi_decode from boa.vm.gas_meters import ProfilingGasMeter from boa.vm.utils import to_bytes, to_int from boa.vyper import _METHOD_ID_VAR from boa.vyper.ast_utils import ast_map_of, get_fn_ancestor_from_node, reason_at from boa.vyper.compiler_utils import ( - _compile_vyper_function, + compile_vyper_function, generate_bytecode_for_arbitrary_stmt, generate_bytecode_for_internal_fn, ) @@ -589,6 +587,7 @@ def source_map(self): def find_error_meta(self, computation): if hasattr(computation, "vyper_error_msg"): + # this is set by ir executor currently. return computation.vyper_error_msg code_stream = computation.code @@ -600,6 +599,7 @@ def find_error_meta(self, computation): def find_source_of(self, computation, is_initcode=False): if hasattr(computation, "vyper_source_pos"): + # this is set by ir executor currently. return self.ast_map.get(computation.vyper_source_pos) code_stream = computation.code @@ -760,11 +760,17 @@ def _cache_namespace(self, namespace): def override_vyper_namespace(self): # ensure self._vyper_namespace is computed m = self._ast_module # noqa: F841 + contract_members = self._vyper_namespace["self"].typ.members try: + to_keep = set(contract_members.keys()) with vy_ns.override_global_namespace(self._vyper_namespace): yield finally: - self._vyper_namespace["self"].typ.members.pop("__boa_debug__", None) + # drop all keys which were added while yielding + keys = list(contract_members.keys()) + for k in keys: + if k not in to_keep: + contract_members.pop(k) # for eval(), we need unoptimized assembly, since the dead code # eliminator might prune a dead function (which we want to eval) @@ -816,11 +822,9 @@ def eval( """eval vyper code in the context of this contract""" # this method is super slow so we cache compilation results - if stmt in self._eval_cache: - bytecode, source_map, typ = self._eval_cache[stmt] - else: - bytecode, source_map, typ = generate_bytecode_for_arbitrary_stmt(stmt, self) - self._eval_cache[stmt] = (bytecode, source_map, typ) + if stmt not in self._eval_cache: + self._eval_cache[stmt] = generate_bytecode_for_arbitrary_stmt(stmt, self) + _, ir_executor, bytecode, source_map, typ = self._eval_cache[stmt] with self._anchor_source_map(source_map): method_id = b"dbug" # note dummy method id, doesn't get validated @@ -832,6 +836,7 @@ def eval( gas=gas, contract=self, override_bytecode=bytecode, + ir_executor=ir_executor, ) ret = self.marshal_to_python(c, typ) @@ -948,7 +953,8 @@ def _prepare_calldata(self, *args, **kwargs): def __call__(self, *args, value=0, gas=None, sender=None, **kwargs): calldata_bytes = self._prepare_calldata(*args, **kwargs) - override_bytecode = getattr(self, "override_bytecode", None) + override_bytecode = getattr(self, "_override_bytecode", None) + ir_executor = getattr(self, "_ir_executor", None) with self.contract._anchor_source_map(self._source_map): computation = self.env.execute_code( to_address=self.contract._address, @@ -958,6 +964,7 @@ def __call__(self, *args, value=0, gas=None, sender=None, **kwargs): gas=gas, is_modifying=self.func_t.is_mutable, override_bytecode=override_bytecode, + ir_executor=ir_executor, contract=self.contract, ) @@ -978,14 +985,19 @@ def _compiled(self): # OVERRIDE so that __call__ uses the specially crafted bytecode @cached_property - def override_bytecode(self): - bytecode, _, _ = self._compiled + def _override_bytecode(self): + _, _, bytecode, _, _ = self._compiled return bytecode + @cached_property + def _ir_executor(self): + _, ir_executor, _, _, _ = self._compiled + return ir_executor + # OVERRIDE so that __call__ uses corresponding source map @cached_property def _source_map(self): - _, source_map, _ = self._compiled + _, _, _, source_map, _ = self._compiled return source_map @@ -1119,12 +1131,14 @@ def _source_map(self): class _InjectVyperFunction(VyperFunction): def __init__(self, contract, fn_source): - ast, bytecode, source_map, _ = _compile_vyper_function(fn_source, contract) + ast, ir_executor, bytecode, source_map, _ = compile_vyper_function( + fn_source, contract + ) super().__init__(ast, contract) - self.override_bytecode = bytecode - - # OVERRIDE so that __call__ uses special source map + # OVERRIDES so that __call__ does the right thing + self._override_bytecode = bytecode + self._ir_executor = ir_executor self._source_map = source_map From f5bc9e368426bc9493dde0b4289adfcbeae92d27 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 1 Jul 2023 22:18:20 +0000 Subject: [PATCH 055/122] clean up storage dump in traceback --- boa/vyper/contract.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index d08316d8..17461eeb 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -188,7 +188,6 @@ class ErrorDetail: error_detail: str # compiler provided error detail dev_reason: DevReason frame_detail: FrameDetail - storage_detail: Optional[FrameDetail] ast_source: vy_ast.VyperNode @classmethod @@ -203,7 +202,6 @@ def from_computation(cls, contract, computation): ast_source.end_lineno, ) frame_detail = contract.debug_frame(computation) - storage_detail = contract._storage.dump() return cls( vm_error=computation.error, @@ -211,7 +209,6 @@ def from_computation(cls, contract, computation): error_detail=error_detail, dev_reason=reason, frame_detail=frame_detail, - storage_detail=storage_detail, ast_source=ast_source, ) @@ -240,11 +237,6 @@ def __str__(self): if len(self.frame_detail) > 0: msg += f" {self.frame_detail}" - if self.storage_detail is not None: - self.storage_detail.fn_name = "storage" # override displayed name - if len(self.storage_detail) > 0: - msg += f"\n {self.storage_detail}" - return msg From b93f6c532eb55d10a8f56c0f487e1ee044cb2dd2 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 18 Jul 2023 10:02:55 -0700 Subject: [PATCH 056/122] wip: implement fast accountdb --- boa/environment.py | 3 +++ boa/vm/fast_accountdb.py | 6 ++++++ 2 files changed, 9 insertions(+) create mode 100644 boa/vm/fast_accountdb.py diff --git a/boa/environment.py b/boa/environment.py index d99d0339..4ea6399a 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -25,6 +25,7 @@ from boa.util.eip1167 import extract_eip1167_address, is_eip1167_contract from boa.util.lrudict import lrudict from boa.vm.fork import AccountDBFork +from boa.vm.fast_accountdb import FastAccountDB from boa.vm.gas_meters import GasMeter, NoGasMeter, ProfilingGasMeter from boa.vm.utils import to_bytes, to_int @@ -421,6 +422,8 @@ def _init_vm(self, reset_traces=True): ) self.vm.state.computation_class = c + # TODO: enable this with fast mode + # self.vm.state.account_db_class = FastAccountDB # we usually want to reset the trace data structures # but sometimes don't, give caller the option. diff --git a/boa/vm/fast_accountdb.py b/boa/vm/fast_accountdb.py new file mode 100644 index 00000000..f948c542 --- /dev/null +++ b/boa/vm/fast_accountdb.py @@ -0,0 +1,6 @@ +from eth.db.account import AccountDB + +class FastAccountDB(AccountDB): + # this is a hotspot in super(). + def touch_account(self, address): + self._accessed_accounts.add(address) From 00500c68720a1692a2588481146227e576b0fb79 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 8 Sep 2023 12:55:02 -0400 Subject: [PATCH 057/122] switch to skellet0r eth-stdlib --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 697c0e5f..9c96b145 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ classifiers = ["Topic :: Software Development"] # Requirements dependencies = [ "vyper >= 0.3.8", - "eth-stdlib @ git+https://github.com/charles-cooper/eth-stdlib.git@pycryptodome", + "eth-stdlib @ git+https://github.com/skellet0r/eth-stdlib.git", "eth-abi", "py-evm>=0.7.0a4", "eth-typing", From 523db9f5f91907d1176f265b9b98229161430ec7 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 8 Sep 2023 13:43:56 -0400 Subject: [PATCH 058/122] fix an api thing --- boa/environment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boa/environment.py b/boa/environment.py index 941f82d3..83ae600d 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -607,7 +607,7 @@ def deploy_code( def raw_call( self, to_address, - sender: Optional[AddressType] = None, + sender: Optional[_AddressType] = None, gas: Optional[int] = None, value: int = 0, data: bytes = b"", From f777382d9e7798482a212e79362a2544b99a3fc0 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 12 Sep 2023 15:13:30 -0400 Subject: [PATCH 059/122] add version bounds for eth-stdlib pycryptodome fix has been pushed as of 0.2.7 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9c96b145..dfd9538d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ classifiers = ["Topic :: Software Development"] # Requirements dependencies = [ "vyper >= 0.3.8", - "eth-stdlib @ git+https://github.com/skellet0r/eth-stdlib.git", + "eth-stdlib>=0.2.7,<0.3.0", "eth-abi", "py-evm>=0.7.0a4", "eth-typing", From eec91e6658eaacbe8f33a5101f40ded4b793c49b Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 12 Sep 2023 16:04:24 -0400 Subject: [PATCH 060/122] add contract name --- boa/vyper/ir_executor.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index 2c866710..ea299d97 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -1097,7 +1097,10 @@ def executor_from_ir(ir_node, vyper_compiler_data) -> Any: ret = _executor_from_ir(ir_node, CompileContext(vyper_compiler_data)) ret = ret.analyze() - ret.compile_main() + + # TODO: rename this, this is "something.vy", but we maybe want + # "something.py " + ret.compile_main(vyper_compiler_data.contract_name) return ret From 291c8f87a07b49e6430701046e8de932657c89c4 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 12 Sep 2023 16:05:04 -0400 Subject: [PATCH 061/122] remove restriction on vyper version for ir executor it's already in the dependency spec --- boa/vyper/ir_executor.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index ea299d97..19df379c 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -1090,9 +1090,6 @@ def _ensure_source_pos(ir_node, source_pos=None, error_msg=None): def executor_from_ir(ir_node, vyper_compiler_data) -> Any: import vyper.version - if vyper.version.__version_tuple__ < (0, 3, 10): - raise RuntimeError("IR executor requires vyper 0.3.10 or above") - _ensure_source_pos(ir_node) ret = _executor_from_ir(ir_node, CompileContext(vyper_compiler_data)) From 106ff8210179ac911c660d067ad126005efb69a1 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 12 Sep 2023 15:26:42 -0400 Subject: [PATCH 062/122] turn off optimization for selector table can't handle the new dynjumps (well, maybe can, but too much effort at this time) --- boa/vyper/contract.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index 63bfd282..5f0b4ec8 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -16,7 +16,8 @@ import vyper.semantics.namespace as vy_ns from eth.exceptions import VMError from vyper.ast.utils import parse_to_ast -from vyper.codegen.core import calculate_type_for_external_return +from vyper.compiler.settings import OptimizationLevel +from vyper.codegen.core import calculate_type_for_external_return, anchor_opt_level from vyper.codegen.function_definitions import generate_ir_for_function from vyper.codegen.global_context import GlobalContext from vyper.codegen.ir_node import IRnode @@ -820,8 +821,9 @@ def unoptimized_bytecode(self): @cached_property def ir_executor(self): - ir = self.compiler_data.ir_runtime - return executor_from_ir(ir, self.compiler_data) + with anchor_opt_level(OptimizationLevel.NONE): + _, ir_runtime = generate_ir_for_module(self.compiler_data.global_ctx) + return executor_from_ir(ir_runtime, self.compiler_data) @contextlib.contextmanager def _anchor_source_map(self, source_map): From a665580b517181ea21f6695202127445aefbbadf Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 12 Sep 2023 16:17:58 -0400 Subject: [PATCH 063/122] handle empty seqs --- boa/vyper/ir_executor.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index 19df379c..e7b62b1a 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -189,6 +189,7 @@ def analyze(self): self.args = [arg.analyze() for arg in self.args] return self + # compile an IR "expr". called for its side effects on builder def compile(self, out=None, out_typ=None): # do a bit of metaprogramming to infer how to compile the args if hasattr(self, "_argnames"): @@ -745,6 +746,9 @@ class Seq(IRExecutor): _name = "seq" def compile(self, out=None, out_typ=None): + if len(self.args) == 0: + self.builder.append("pass") + return for i, arg in enumerate(self.args): if i + 1 < len(self.args): # don't accidentally assign From c02df654ca35bea042cd2e7d1c980b9b5615c041 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 12 Sep 2023 17:14:50 -0400 Subject: [PATCH 064/122] fix vyper version bounds --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3a4cc049..dc60b85d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ classifiers = ["Topic :: Software Development"] # Requirements dependencies = [ - "vyper > 0.3.9", + "vyper >=0.3.10rc3", "eth-stdlib>=0.2.7,<0.3.0", "eth-abi", "py-evm>=0.7.0a4", From 86d7017dbe5060a6c78aa2feaff4ac50227848c7 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 12 Sep 2023 17:22:34 -0400 Subject: [PATCH 065/122] inline wrap256 --- boa/vyper/ir_executor.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index e7b62b1a..571274d6 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -405,6 +405,9 @@ def executor(cls): def _wrap256(x): return x % 2**256 +def wrap256(x_str): + return f"(({x_str}) % 2**256)" + def _as_signed(x): return unsigned_to_signed(x, 256, strict=True) @@ -421,12 +424,12 @@ def funcname(self): return self._op.__module__ + "." + self._op.__name__ def _compile(self, x, y): - return f"_wrap256({self.funcname}({x}, {y}))" + return wrap256(f"{self.funcname}({x}, {y})") class SignedBinopExecutor(UnsignedBinopExecutor): def _compile(self, x, y): - return f"_wrap256({self.funcname}(_as_signed({x}), _as_signed({y})))" + return wrap256(f"{self.funcname}(_as_signed({x}), _as_signed({y}))") # for binops, just use routines from vyper optimizer @@ -455,7 +458,7 @@ class Sar(IRExecutor): def _compile(self, bits, val): # wrap256 to get back into unsigned land - return f"_wrap256(_as_signed({val}) >> {bits})" + return wrap256(f"_as_signed({val}) >> {bits}") @executor @@ -465,7 +468,7 @@ class Shl(IRExecutor): _type: type = int def _compile(self, bits, val): - return f"_wrap256({val} << {bits})" + return wrap256(f"{val} << {bits}") @executor From 11c11c3fd875948e2d98a28b2851008333b81733 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 12 Sep 2023 17:52:03 -0400 Subject: [PATCH 066/122] patch touch_account in fast mode --- boa/environment.py | 19 ++++++++++++++----- boa/vm/fast_accountdb.py | 24 ++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/boa/environment.py b/boa/environment.py index 83ae600d..c3ee343f 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -25,7 +25,7 @@ from boa.util.eip1167 import extract_eip1167_address, is_eip1167_contract from boa.util.lrudict import lrudict from boa.vm.fork import AccountDBFork -from boa.vm.fast_accountdb import FastAccountDB +from boa.vm.fast_accountdb import FastAccountDB, patch_pyevm_state_object, unpatch_pyevm_state_object from boa.vm.gas_meters import GasMeter, NoGasMeter, ProfilingGasMeter from boa.vm.utils import to_bytes, to_int @@ -336,7 +336,7 @@ def apply_computation(cls, state, msg, tx_ctx): addr = msg.code_address contract = cls.env._lookup_contract_fast(addr) if addr else None #print("ENTER", Address(msg.code_address or bytes([0]*20)), contract) - if contract is None or not cls.env._enable_fast_mode: + if contract is None or not cls.env._fast_mode_enabled: #print("SLOW MODE") return super().apply_computation(state, msg, tx_ctx) @@ -382,7 +382,7 @@ class Env: _singleton = None _initial_address_counter = 100 _coverage_enabled = False - _enable_fast_mode = False + _fast_mode_enabled = False def __init__(self): self.chain = _make_chain() @@ -414,6 +414,7 @@ def get_gas_price(self): def _init_vm(self, reset_traces=True): self.vm = self.chain.get_vm() + self.vm.patch = VMPatcher(self.vm) c = type( @@ -422,9 +423,10 @@ def _init_vm(self, reset_traces=True): {"env": self}, ) + if self._fast_mode_enabled: + self.vm._state_class.account_db_class = FastAccountDB + self.vm.state.computation_class = c - # TODO: enable this with fast mode - # self.vm.state.account_db_class = FastAccountDB # we usually want to reset the trace data structures # but sometimes don't, give caller the option. @@ -446,6 +448,13 @@ def _trace_sstore(self, account, slot): # zero entries. self.sstore_trace[account].add(slot) + def enable_fast_mode(self, flag: bool = True): + self._fast_mode_enabled = flag + if flag: + patch_pyevm_state_object(self.vm.state) + else: + unpatch_pyevm_state_object(self.vm.state) + def fork(self, url, reset_traces=True, **kwargs): kwargs["url"] = url AccountDBFork._rpc_init_kwargs = kwargs diff --git a/boa/vm/fast_accountdb.py b/boa/vm/fast_accountdb.py index f948c542..e4d10385 100644 --- a/boa/vm/fast_accountdb.py +++ b/boa/vm/fast_accountdb.py @@ -1,6 +1,30 @@ from eth.db.account import AccountDB class FastAccountDB(AccountDB): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) # this is a hotspot in super(). def touch_account(self, address): self._accessed_accounts.add(address) + + +def _touch_account_patcher(self, address): + self._accessed_accounts.add(address) + +_BOA_PATCHED = object() + +def patch_pyevm_state_object(state_object): + if getattr(state_object, "__boa_patched__", None) == _BOA_PATCHED: + return + accountdb = state_object._account_db + accountdb._restore_touch_account = accountdb.touch_account + accountdb.touch_account = _touch_account_patcher.__get__(accountdb, AccountDB) + state_object.__boa_patched__ = True + +def unpatch_pyevm_state_object(state_object): + if not getattr(state_object, "__boa_patched__", None) == _BOA_PATCHED: + return + accountdb = state_object._account_db + accountdb.touch_account = accountdb._restore_touch_account + delattr(accountdb, "_restore_touch_account") + delattr(state_object, "__boa_patched__") From 5095452d0b7b065e9fd50abfc3dc849349fef2b3 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 12 Sep 2023 19:02:45 -0400 Subject: [PATCH 067/122] rename wrap256 --- boa/vyper/ir_executor.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index 571274d6..df5b5eb8 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -405,7 +405,8 @@ def executor(cls): def _wrap256(x): return x % 2**256 -def wrap256(x_str): + +def wrap256_str(x_str): return f"(({x_str}) % 2**256)" @@ -424,12 +425,12 @@ def funcname(self): return self._op.__module__ + "." + self._op.__name__ def _compile(self, x, y): - return wrap256(f"{self.funcname}({x}, {y})") + return wrap256_str(f"{self.funcname}({x}, {y})") class SignedBinopExecutor(UnsignedBinopExecutor): def _compile(self, x, y): - return wrap256(f"{self.funcname}(_as_signed({x}), _as_signed({y}))") + return wrap256_str(f"{self.funcname}(_as_signed({x}), _as_signed({y}))") # for binops, just use routines from vyper optimizer @@ -458,7 +459,7 @@ class Sar(IRExecutor): def _compile(self, bits, val): # wrap256 to get back into unsigned land - return wrap256(f"_as_signed({val}) >> {bits}") + return wrap256_str(f"_as_signed({val}) >> {bits}") @executor @@ -468,7 +469,7 @@ class Shl(IRExecutor): _type: type = int def _compile(self, bits, val): - return wrap256(f"{val} << {bits}") + return wrap256_str(f"{val} << {bits}") @executor From 0915084325c2be82681594a547d4193b1edec0c2 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 12 Sep 2023 19:17:14 -0400 Subject: [PATCH 068/122] fix .internal functions --- boa/vyper/compiler_utils.py | 11 ++++++++--- boa/vyper/contract.py | 8 ++++++-- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/boa/vyper/compiler_utils.py b/boa/vyper/compiler_utils.py index 4a9e905b..bfd1120b 100644 --- a/boa/vyper/compiler_utils.py +++ b/boa/vyper/compiler_utils.py @@ -20,6 +20,7 @@ def compile_vyper_function(vyper_function, contract): contract. This is useful for vyper `eval` and internal functions, where the runtime bytecode must be changed to add more runtime functionality (such as eval, and calling internal functions) + (performance note: this function is very very slow!) """ compiler_data = contract.compiler_data @@ -38,8 +39,10 @@ def compile_vyper_function(vyper_function, contract): external_func_info = generate_ir_for_function(ast, global_ctx, False) ir = external_func_info.common_ir - base_signature = func_t.abi_signature_for_kwargs([]) - ir = IRnode.from_list(["with", _METHOD_ID_VAR, method_id_int(base_signature), ir]) + ir = ["seq", ["goto", func_t._ir_info.external_function_base_entry_label], ir] + + # use a dummy method id + ir = IRnode.from_list(["with", _METHOD_ID_VAR, 0, ir]) ir = optimizer.optimize(ir) # extend IR with contract's unoptimized assembly to avoid stripping @@ -54,7 +57,9 @@ def compile_vyper_function(vyper_function, contract): # generate the IR executor # first mush it with the rest of the IR in the contract to ensure # all labels are present - ir = IRnode.from_list(["seq", ir, contract.compiler_data.ir_runtime]) + # (use unoptimized IR as ir_executor can't handle optimized selector tables) + _, contract_runtime = contract.unoptimized_ir + ir = IRnode.from_list(["seq", ir, contract_runtime]) # now compile. ir_executor = executor_from_ir(ir, compiler_data) diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index 2536eeb6..448c0b8f 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -821,9 +821,13 @@ def unoptimized_bytecode(self): return s + self.data_section @cached_property - def ir_executor(self): + def unoptimized_ir(self): with anchor_opt_level(OptimizationLevel.NONE): - _, ir_runtime = generate_ir_for_module(self.compiler_data.global_ctx) + return generate_ir_for_module(self.compiler_data.global_ctx) + + @cached_property + def ir_executor(self): + _, ir_runtime = self.unoptimized_ir return executor_from_ir(ir_runtime, self.compiler_data) @contextlib.contextmanager From 8db8783fa2e24669713c3aaa5dcc7aaa35719e7f Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 12 Sep 2023 19:52:10 -0400 Subject: [PATCH 069/122] small refactor of compile_vyper_function --- boa/vyper/compiler_utils.py | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/boa/vyper/compiler_utils.py b/boa/vyper/compiler_utils.py index bfd1120b..c1f19767 100644 --- a/boa/vyper/compiler_utils.py +++ b/boa/vyper/compiler_utils.py @@ -25,9 +25,9 @@ def compile_vyper_function(vyper_function, contract): compiler_data = contract.compiler_data global_ctx = contract.global_ctx - ifaces = compiler_data.interface_codes - ast = parse_to_ast(vyper_function, ifaces) + ast = parse_to_ast(vyper_function) + ifaces = compiler_data.interface_codes # override namespace and add wrapper code at the top with contract.override_vyper_namespace(): analysis.add_module_namespace(ast, ifaces) @@ -39,28 +39,26 @@ def compile_vyper_function(vyper_function, contract): external_func_info = generate_ir_for_function(ast, global_ctx, False) ir = external_func_info.common_ir - ir = ["seq", ["goto", func_t._ir_info.external_function_base_entry_label], ir] + entry_label = func_t._ir_info.external_function_base_entry_label + + ir = ["seq", ["goto", entry_label], ir] # use a dummy method id - ir = IRnode.from_list(["with", _METHOD_ID_VAR, 0, ir]) + ir = ["with", _METHOD_ID_VAR, 0, ir] + + # first mush it with the rest of the IR in the contract to ensure + # all labels are present, and then optimize all together + # (use unoptimized IR, ir_executor can't handle optimized selector tables) + _, contract_runtime = contract.unoptimized_ir + ir = IRnode.from_list(["seq", ir, contract_runtime]) ir = optimizer.optimize(ir) - # extend IR with contract's unoptimized assembly to avoid stripping - # labels at first (and then optimize all together) - assembly = compile_ir.compile_to_assembly(ir, optimize=OptimizationLevel.NONE) - assembly.extend(contract.unoptimized_assembly) - compile_ir._optimize_assembly(assembly) + assembly = compile_ir.compile_to_assembly(ir) bytecode, source_map = compile_ir.assembly_to_evm(assembly) bytecode += contract.data_section typ = func_t.return_type # generate the IR executor - # first mush it with the rest of the IR in the contract to ensure - # all labels are present - # (use unoptimized IR as ir_executor can't handle optimized selector tables) - _, contract_runtime = contract.unoptimized_ir - ir = IRnode.from_list(["seq", ir, contract_runtime]) - # now compile. ir_executor = executor_from_ir(ir, compiler_data) return ast, ir_executor, bytecode, source_map, typ From 161d9e7df4728f47270cf23ece22c9317f8c3c86 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 12 Sep 2023 20:48:31 -0400 Subject: [PATCH 070/122] fix a typo introduced in a merge conflict --- boa/vyper/contract.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index 448c0b8f..278eb87c 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -988,8 +988,8 @@ def __call__(self, *args, value=0, gas=None, sender=None, **kwargs): ir_executor = self._ir_executor override_bytecode = None - if hasattr(self, "override_bytecode"): - override_bytecode = self.override_bytecode + if hasattr(self, "_override_bytecode"): + override_bytecode = self._override_bytecode with self.contract._anchor_source_map(self._source_map): computation = self.env.execute_code( From e2957791a17fa5d3a33dc8c629caac79c3d96929 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 13 Sep 2023 21:19:12 -0400 Subject: [PATCH 071/122] add a couple notes --- boa/environment.py | 2 ++ boa/vyper/contract.py | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/boa/environment.py b/boa/environment.py index c3ee343f..39281f05 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -355,6 +355,8 @@ def apply_computation(cls, state, msg, tx_ctx): except Halt: pass + # return computation outside of with block; computation.__exit__ + # swallows exceptions (including Revert). return computation diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index 278eb87c..7b74bca7 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -268,6 +268,10 @@ def _handle_child_trace(computation, env, return_trace): if not computation.children[-1].is_error: return return_trace child = computation.children[-1] + + # TODO: maybe should be: + # child_obj = env.lookup_contract(child.msg.code_address) or env._code_registry.get(child.msg.code) + child_obj = env.lookup_contract(child.msg.code_address) if child_obj is None: child_trace = trace_for_unknown_contract(child, env) From 255a1780d9654434bc6a540867e3a9cc2af66c7a Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 13 Sep 2023 23:12:34 -0400 Subject: [PATCH 072/122] add sstore trace --- boa/vyper/ir_executor.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index df5b5eb8..c9519f30 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -647,9 +647,12 @@ class SStore(IRExecutor): _type = int def _compile(self, slot, value): - return f""" + self.builder.extend( + f""" + VM.env._trace_sstore(VM.msg.storage_address, {slot}) VM.state.set_storage(address=VM.msg.storage_address, slot={slot}, value={value}) - """.strip() + """ + ) @executor From 90c8d969082ff9f81bd25f84eec336cc5d4a006e Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 14 Sep 2023 10:45:12 -0400 Subject: [PATCH 073/122] fix cross-module name collisions between labels --- boa/vyper/ir_executor.py | 67 +++++++++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 21 deletions(-) diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index c9519f30..6754b528 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -99,7 +99,7 @@ def contract_name(self): return mkalphanum(PurePath(self.vyper_compiler_data.contract_name).name) def translate_label(self, label): - return f"{label}_{self.contract_name}_{self.uuid}" + return f"{self.contract_name}_{self.uuid}_{label}" def add_unique_symbol(self, symbol): if symbol in self.unique_symbols: @@ -247,7 +247,14 @@ def compile_main(self, contract_path=""): self.builder.extend("\n\n") func.compile_func() - py_bytecode = compile(self.builder.get_output(), contract_path, "exec") + + py_file = contract_path + str(self.compile_ctx.uuid) + ".py" + + # uncomment for debugging the python code: + #with open(py_file, "w") as f: + # print(self.builder.get_output(), file=f) + + py_bytecode = compile(self.builder.get_output(), py_file, "exec") exec(py_bytecode, globals()) self._exec = globals()[main_name] @@ -891,18 +898,15 @@ class VarList(IRExecutor): class Goto(IRExecutor): _name = "goto" + is_return_stmt = False + def analyze(self): - self.label = self.args[0]._str_value + self.label = self.compile_ctx.translate_label(self.args[0]._str_value) # just get the parameters, leaving the label in self.args # messes with downstream machinery which tries to analyze the label. runtime_args = [] for arg in self.args[1:]: - if isinstance(arg, StringExecutor): - if arg._str_value == "return_pc": - # we don't need to deal with return pc on the way out. - continue - if isinstance(arg, Symbol): # we don't need to push the return pc on the way in. continue @@ -913,11 +917,6 @@ def analyze(self): return self - @cached_property - def is_return_stmt(self): - # i.e. we are exiting a subroutine - return self.label == "return_pc" - @cached_property def _argnames(self): if self.is_return_stmt: @@ -936,18 +935,11 @@ def _sig(self): return tuple(int for _ in self._argnames) def _compile(self, *args): - label = self.label - - if self.is_return_stmt: - assert len(self.args) == 0 - self.builder.append("return") - return - argnames = self._argnames assert len(argnames) == len(self.args) args_str = ", ".join(["CTX"] + list(args)) - return f"{label}({args_str})" + return f"{self.label}({args_str})" @executor @@ -955,7 +947,37 @@ class ExitTo(Goto): # exit_to is similar but it is known to end execution of this subroutine _name = "exit_to" + def analyze(self): + # small helper function + def _is_return_pc(arg): + return isinstance(arg, StringExecutor) and arg._str_value == "return_pc" + + if _is_return_pc(self.args[0]): + assert len(self.args) == 1 + self.is_return_stmt = True + + # strip out return_pc args, we don't need actually need to + # generate any code for it + self.args = [arg for arg in self.args if not _is_return_pc(arg)] + + if len(self.args) == 0: + # it's not really a goto, it's a return statement + # skip super.analyze() as it will choke on no args + assert self.is_return_stmt + return self + + return super().analyze() + def _compile(self, *args): + if self.is_return_stmt: + # straight return + # skip super._compile() as it will choke on no args + assert len(self.args) == 0 + self.builder.append("return") + return + + # execute the subroutine and then return + # (probably not necessary to return the result but may as well) subroutine_call = super()._compile(*args) return f"return {subroutine_call}" @@ -1003,6 +1025,9 @@ def analyzed_param_names(self): def analyze(self): name, var_list, body = self.args + # use translate_label to ensure no collisions across compilations + name._str_value = self.compile_ctx.translate_label(name._str_value) + self.labelname = name._str_value self.compile_ctx.add_label(self.labelname, self) From 8cb5736a3894fe716c28931585eb490b617fdaed Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 12 Oct 2023 13:02:09 -0400 Subject: [PATCH 074/122] add requirements.txt for rtd --- .readthedocs.yaml | 5 +++++ docs/requirements.txt | 1 + 2 files changed, 6 insertions(+) create mode 100644 docs/requirements.txt diff --git a/.readthedocs.yaml b/.readthedocs.yaml index ad1ccd07..2baf85d5 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -15,6 +15,11 @@ build: sphinx: configuration: docs/source/conf.py +# Explicitly set the version of Python and its requirements +python: + install: + - requirements: docs/requirements.txt + # If using Sphinx, optionally build your docs in additional formats such as PDF # formats: # - pdf diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..9304738c --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1 @@ +sphinx-rtd-theme>=1.3.0,<1.4.0 From a147cbece45edaa4020039e22785b3aa739a9103 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 12 Oct 2023 19:03:15 -0400 Subject: [PATCH 075/122] add a note --- boa/environment.py | 1 + 1 file changed, 1 insertion(+) diff --git a/boa/environment.py b/boa/environment.py index cd6cf88a..13e96010 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -95,6 +95,7 @@ def anchor(self): # XXX: inherit from bytes directly so that we can pass it to py-evm? +# inherit from `str` so that ABI encoder / decoder can work without failing class Address(str): # (PYEVM_Address): # converting between checksum and canonical addresses is a hotspot; # this class contains both and caches recently seen conversions From 20dbf4c259ee766b20253b353bf7fffd8884d963 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 16 Oct 2023 12:03:13 -0400 Subject: [PATCH 076/122] fix: handle alchemy free tier in network mode --- boa/network.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/boa/network.py b/boa/network.py index f8e7fbbc..0564bca8 100644 --- a/boa/network.py +++ b/boa/network.py @@ -285,7 +285,9 @@ def _tracer(self): call_tracer = {"tracer": "callTracer", "onlyTopCall": True} self._rpc.fetch("debug_traceTransaction", [txn_hash, call_tracer]) except RPCError as e: - if e.code == -32601: + # -32600 is alchemy unpaid tier error message + # -32601 is infura error message (if i recall correctly) + if e.code in (-32601, -32600): warnings.warn( "debug_traceTransaction not available! " "titanoboa will try hard to interact with the network, but " From 7005178fd4fc948c3b565261942e770e1cbc287d Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 16 Oct 2023 12:06:32 -0400 Subject: [PATCH 077/122] bump BASE_FEE_ESTIMATOR_CONSTANT this works out to ~1.6x the base fee estimate --- boa/network.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boa/network.py b/boa/network.py index 0564bca8..03b78be1 100644 --- a/boa/network.py +++ b/boa/network.py @@ -114,7 +114,7 @@ def get_gas_price(self) -> int: # but can be tweaked if you get errors like # `boa.rpc.RPCError: -32000: err: max fee per gas less than block base fee` - BASE_FEE_ESTIMATOR_CONSTANT = 0 + BASE_FEE_ESTIMATOR_CONSTANT = 4 def get_fee_info(self) -> tuple[str, str, str, str]: # returns: base_fee, max_fee, max_priority_fee From d468fffd5cad0510c6a63e7b92735223a2d5afe4 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 16 Oct 2023 12:24:08 -0400 Subject: [PATCH 078/122] factor out base fee constant and timeout into TransactionSettings object --- boa/network.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/boa/network.py b/boa/network.py index 03b78be1..298aa391 100644 --- a/boa/network.py +++ b/boa/network.py @@ -49,6 +49,21 @@ class _EstimateGasFailed(Exception): pass +@dataclass +class TransactionSettings: + # when calculating the base fee, the number of blocks N ahead + # to compute a cap for the Nth block. + # defaults to 4 (4 blocks ahead, pending block's baseFee * ~1.6) + # but can be tweaked. if you get errors like + # `boa.rpc.RPCError: -32000: err: max fee per gas less than block base fee` + # try increasing the constant. + # do not recommend setting below 0. + base_fee_estimator_constant: int = 4 + + # amount of time to wait, in seconds before giving up on a transaction + poll_timeout: float = 240.0 + + class NetworkEnv(Env): """ An Env object which can be swapped in via `boa.set_env()`. @@ -69,6 +84,8 @@ def __init__(self, rpc_url, accounts=None): self._gas_price = None + self.tx_settings = TransactionSettings() + @cached_property def _rpc_has_snapshot(self): try: @@ -108,14 +125,6 @@ def get_gas_price(self) -> int: return self._gas_price return to_int(self._rpc.fetch("eth_gasPrice", [])) - # when calculating the base fee, the number of blocks N ahead - # to compute a cap for the Nth block. - # defaults to 0 (no blocks ahead, just use pending block's baseFee) - # but can be tweaked if you get errors like - # `boa.rpc.RPCError: -32000: err: max fee per gas less than block base fee` - - BASE_FEE_ESTIMATOR_CONSTANT = 4 - def get_fee_info(self) -> tuple[str, str, str, str]: # returns: base_fee, max_fee, max_priority_fee reqs = [ @@ -129,7 +138,7 @@ def get_fee_info(self) -> tuple[str, str, str, str]: # Each block increases the base fee by 1/8 at most. # here we have the next block's base fee, compute a cap for the # next N blocks here. - blocks_ahead = self.BASE_FEE_ESTIMATOR_CONSTANT + blocks_ahead = self.tx_settings.base_fee_estimator_constant base_fee_estimate = ceil(to_int(base_fee) * (9 / 8) ** blocks_ahead) max_fee = to_hex(base_fee_estimate + to_int(max_priority_fee)) @@ -261,8 +270,11 @@ def deploy_code(self, sender=None, gas=None, value=0, bytecode=b"", **kwargs): return create_address, deployed_bytecode - def _wait_for_tx_trace(self, tx_hash, timeout=60, poll_latency=0.25): + def _wait_for_tx_trace(self, tx_hash, poll_latency=0.25): start = time.time() + + timeout = self.tx_settings.poll_timeout + while True: receipt = self._rpc.fetch("eth_getTransactionReceipt", [tx_hash]) if receipt is not None: From 1c301300a19683de7592132e9bf72f8139a743f9 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 16 Oct 2023 12:26:33 -0400 Subject: [PATCH 079/122] fix lint, bad type annotation --- boa/environment.py | 16 ++++++++++------ boa/network.py | 1 + boa/vm/fast_accountdb.py | 5 +++++ boa/vyper/compiler_utils.py | 2 -- boa/vyper/contract.py | 10 ++++++---- boa/vyper/ir_executor.py | 5 +---- 6 files changed, 23 insertions(+), 16 deletions(-) diff --git a/boa/environment.py b/boa/environment.py index 13e96010..2e7baa9e 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -25,8 +25,12 @@ from boa.util.abi import abi_decode from boa.util.eip1167 import extract_eip1167_address, is_eip1167_contract from boa.util.lrudict import lrudict +from boa.vm.fast_accountdb import ( + FastAccountDB, + patch_pyevm_state_object, + unpatch_pyevm_state_object, +) from boa.vm.fork import AccountDBFork -from boa.vm.fast_accountdb import FastAccountDB, patch_pyevm_state_object, unpatch_pyevm_state_object from boa.vm.gas_meters import GasMeter, NoGasMeter, ProfilingGasMeter from boa.vm.utils import to_bytes, to_int @@ -337,22 +341,22 @@ def apply_create_message(cls, state, msg, tx_ctx): def apply_computation(cls, state, msg, tx_ctx): addr = msg.code_address contract = cls.env._lookup_contract_fast(addr) if addr else None - #print("ENTER", Address(msg.code_address or bytes([0]*20)), contract) + # print("ENTER", Address(msg.code_address or bytes([0]*20)), contract) if contract is None or not cls.env._fast_mode_enabled: - #print("SLOW MODE") + # print("SLOW MODE") return super().apply_computation(state, msg, tx_ctx) with cls(state, msg, tx_ctx) as computation: try: if getattr(msg, "_ir_executor", None) is not None: - #print("MSG HAS IR EXECUTOR") + # print("MSG HAS IR EXECUTOR") # this happens when bytecode is overridden, e.g. # for injected functions. note ir_executor is (correctly) # used for the outer computation only because on subcalls # a clean message is constructed for the child computation msg._ir_executor.exec(computation) else: - #print("REGULAR FAST MODE") + # print("REGULAR FAST MODE") contract.ir_executor.exec(computation) except Halt: pass @@ -560,7 +564,7 @@ def get_singleton(cls): cls._singleton = cls() return cls._singleton - def generate_address(self, alias: Optional[str] = None) -> AddressType: + def generate_address(self, alias: Optional[str] = None) -> _AddressType: t = Address(self._random.randbytes(20)) if alias is not None: self.alias(t, alias) diff --git a/boa/network.py b/boa/network.py index 298aa391..149ca0b6 100644 --- a/boa/network.py +++ b/boa/network.py @@ -2,6 +2,7 @@ import contextlib import time import warnings +from dataclasses import dataclass from functools import cached_property from math import ceil diff --git a/boa/vm/fast_accountdb.py b/boa/vm/fast_accountdb.py index e4d10385..63ba7226 100644 --- a/boa/vm/fast_accountdb.py +++ b/boa/vm/fast_accountdb.py @@ -1,8 +1,10 @@ from eth.db.account import AccountDB + class FastAccountDB(AccountDB): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + # this is a hotspot in super(). def touch_account(self, address): self._accessed_accounts.add(address) @@ -11,8 +13,10 @@ def touch_account(self, address): def _touch_account_patcher(self, address): self._accessed_accounts.add(address) + _BOA_PATCHED = object() + def patch_pyevm_state_object(state_object): if getattr(state_object, "__boa_patched__", None) == _BOA_PATCHED: return @@ -21,6 +25,7 @@ def patch_pyevm_state_object(state_object): accountdb.touch_account = _touch_account_patcher.__get__(accountdb, AccountDB) state_object.__boa_patched__ = True + def unpatch_pyevm_state_object(state_object): if not getattr(state_object, "__boa_patched__", None) == _BOA_PATCHED: return diff --git a/boa/vyper/compiler_utils.py b/boa/vyper/compiler_utils.py index 9d2f5a0c..545d4e70 100644 --- a/boa/vyper/compiler_utils.py +++ b/boa/vyper/compiler_utils.py @@ -5,11 +5,9 @@ from vyper.ast.utils import parse_to_ast from vyper.codegen.function_definitions import generate_ir_for_function from vyper.codegen.ir_node import IRnode -from vyper.compiler.settings import OptimizationLevel from vyper.exceptions import InvalidType from vyper.ir import compile_ir, optimizer from vyper.semantics.analysis.utils import get_exact_type_from_node -from vyper.utils import method_id_int from boa.vyper import _METHOD_ID_VAR from boa.vyper.ir_executor import executor_from_ir diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index 81bab41f..a78daf42 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -16,8 +16,7 @@ import vyper.semantics.namespace as vy_ns from eth.exceptions import VMError from vyper.ast.utils import parse_to_ast -from vyper.compiler.settings import OptimizationLevel -from vyper.codegen.core import calculate_type_for_external_return, anchor_opt_level +from vyper.codegen.core import anchor_opt_level, calculate_type_for_external_return from vyper.codegen.function_definitions import generate_ir_for_function from vyper.codegen.function_definitions.common import ExternalFuncIR, InternalFuncIR from vyper.codegen.global_context import GlobalContext @@ -271,9 +270,12 @@ def _handle_child_trace(computation, env, return_trace): child = computation.children[-1] # TODO: maybe should be: - # child_obj = env.lookup_contract(child.msg.code_address) or env._code_registry.get(child.msg.code) - + # child_obj = ( + # env.lookup_contract(child.msg.code_address) + # or env._code_registry.get(child.msg.code) + # ) child_obj = env.lookup_contract(child.msg.code_address) + if child_obj is None: child_trace = trace_for_unknown_contract(child, env) else: diff --git a/boa/vyper/ir_executor.py b/boa/vyper/ir_executor.py index 6754b528..30f1657d 100644 --- a/boa/vyper/ir_executor.py +++ b/boa/vyper/ir_executor.py @@ -247,11 +247,10 @@ def compile_main(self, contract_path=""): self.builder.extend("\n\n") func.compile_func() - py_file = contract_path + str(self.compile_ctx.uuid) + ".py" # uncomment for debugging the python code: - #with open(py_file, "w") as f: + # with open(py_file, "w") as f: # print(self.builder.get_output(), file=f) py_bytecode = compile(self.builder.get_output(), py_file, "exec") @@ -1124,8 +1123,6 @@ def _ensure_source_pos(ir_node, source_pos=None, error_msg=None): def executor_from_ir(ir_node, vyper_compiler_data) -> Any: - import vyper.version - _ensure_source_pos(ir_node) ret = _executor_from_ir(ir_node, CompileContext(vyper_compiler_data)) From d555f3d3e0d147ae294edd37581035f0b1e35ac5 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 16 Oct 2023 12:31:29 -0400 Subject: [PATCH 080/122] fix up things with new Address type --- boa/network.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/boa/network.py b/boa/network.py index 149ca0b6..936c03c3 100644 --- a/boa/network.py +++ b/boa/network.py @@ -7,9 +7,8 @@ from math import ceil from eth_account import Account -from eth_utils import to_canonical_address, to_checksum_address -from boa.environment import Env +from boa.environment import Address, Env from boa.rpc import EthereumRPC, RPCError, to_bytes, to_hex, to_int @@ -148,7 +147,7 @@ def get_fee_info(self) -> tuple[str, str, str, str]: def _check_sender(self, address): if address is None: raise ValueError("No sender!") - return to_checksum_address(address) + return address # OVERRIDES def execute_code( @@ -249,7 +248,7 @@ def deploy_code(self, sender=None, gas=None, value=0, bytecode=b"", **kwargs): from_=sender, value=value, gas=gas, data=bytecode ) - create_address = to_canonical_address(receipt["contractAddress"]) + create_address = Address(receipt["contractAddress"]) deployed_bytecode = local_bytecode @@ -267,7 +266,7 @@ def deploy_code(self, sender=None, gas=None, value=0, bytecode=b"", **kwargs): raise RuntimeError(f"uh oh! {local_address} != {create_address}") # TODO get contract info in here - print(f"contract deployed at {to_checksum_address(create_address)}") + print(f"contract deployed at {create_address}") return create_address, deployed_bytecode From 3f250a5256321bea2e7f11c1c483ebb7d315f0ae Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 16 Oct 2023 12:34:47 -0400 Subject: [PATCH 081/122] fix check_sender --- boa/network.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boa/network.py b/boa/network.py index 936c03c3..1496674b 100644 --- a/boa/network.py +++ b/boa/network.py @@ -147,7 +147,7 @@ def get_fee_info(self) -> tuple[str, str, str, str]: def _check_sender(self, address): if address is None: raise ValueError("No sender!") - return address + return Address(address) # OVERRIDES def execute_code( From c2205550debc60ed57b6a7a4776f049e27166c31 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 16 Oct 2023 12:44:25 -0400 Subject: [PATCH 082/122] fix another small api thing --- boa/network.py | 1 + 1 file changed, 1 insertion(+) diff --git a/boa/network.py b/boa/network.py index 1496674b..3d104c55 100644 --- a/boa/network.py +++ b/boa/network.py @@ -160,6 +160,7 @@ def execute_code( override_bytecode=None, contract=None, is_modifying=True, + ir_executor=None, # maybe just have **kwargs to collect extra kwargs ): # call execute_code for tracing side effects computation = super().execute_code( From 2493d753d4665001031208f2e38b3f5982fa8c81 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 16 Oct 2023 14:17:53 -0400 Subject: [PATCH 083/122] add ExternalAccount support this allows interacting with any RPC which has accounts (ex. frame.sh) --- boa/network.py | 31 +++++++++++++++++++++++++++++++ boa/rpc.py | 10 ++++++++++ 2 files changed, 41 insertions(+) diff --git a/boa/network.py b/boa/network.py index 3d104c55..99d65952 100644 --- a/boa/network.py +++ b/boa/network.py @@ -64,6 +64,20 @@ class TransactionSettings: poll_timeout: float = 240.0 +@dataclass +class ExternalAccount: + address: Address + _rpc: EthereumRPC + + def __post_init__(self): + self.address = Address(self.address) + + def send_transaction(self, tx_data): + txhash = self._rpc.fetch("eth_sendTransaction", [tx_data]) + # format to be the same as what BrowserSigner returns + return {"hash": txhash} + + class NetworkEnv(Env): """ An Env object which can be swapped in via `boa.set_env()`. @@ -111,11 +125,28 @@ def anchor(self): # wipe forked state self._reset_fork(blkid) + # add account, or "Account-like" object. MUST expose + # `sign_transaction` or `send_transaction` method! def add_account(self, account: Account, force_eoa=False): self._accounts[account.address] = account # type: ignore if self.eoa is None or force_eoa: self.eoa = account.address # type: ignore + def add_accounts_from_rpc(self, rpc: str | EthereumRPC) -> None: + if isinstance(rpc, str): + rpc = EthereumRPC(rpc) + + # address strings, ex. ["0x0e5437b1b3448d22c07caed31e5bcdc4ec5284a9"] + addresses = rpc.fetch("eth_accounts", []) + if not addresses: + # strip out content in the URL which might not want to get into logs + warnings.warn( + f"No accounts fetched from <{rpc.url_base}>! (URL partially masked for privacy)", + stacklevel=2, + ) + for address in addresses: + self.add_account(ExternalAccount(_rpc=rpc, address=address)) # type: ignore + def set_eoa(self, eoa: Account) -> None: self.add_account(eoa, force_eoa=True) diff --git a/boa/rpc.py b/boa/rpc.py index 77d090c0..359619ed 100644 --- a/boa/rpc.py +++ b/boa/rpc.py @@ -1,3 +1,5 @@ +from urllib.parse import urlparse + import requests try: @@ -49,6 +51,14 @@ def __init__(self, url: str): self._rpc_url = url self._session = requests.Session() + @property + def url_base(self): + # return a version of the URL which has everything past the "base" + # url stripped out (content which you might not want to end up + # in logs) + parse_result = urlparse(self._rpc_url) + return f"{parse_result.scheme}://{parse_result.netloc}" + def fetch(self, method, params): # the obvious thing to do here is dispatch into fetch_multi. # but some providers (alchemy) can't handle batched requests From 3201bf318d10c238071365be833ef8e3c7eaaef2 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 16 Oct 2023 14:26:00 -0400 Subject: [PATCH 084/122] declare self to frame.sh --- boa/rpc.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/boa/rpc.py b/boa/rpc.py index 359619ed..a0544e6e 100644 --- a/boa/rpc.py +++ b/boa/rpc.py @@ -51,6 +51,9 @@ def __init__(self, url: str): self._rpc_url = url self._session = requests.Session() + # declare app name to frame.sh + self._session.headers["Origin"] = "Titanoboa" + @property def url_base(self): # return a version of the URL which has everything past the "base" From 7171aee25c4d25fc1626a361a8c972e9316fd383 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 18 Oct 2023 10:47:18 -0400 Subject: [PATCH 085/122] fix: handle evm_version in compiler settings ensure evm_version gets properly passed in through compiler passes --- boa/vyper/compiler_utils.py | 61 +++++++++++++++++++------------------ boa/vyper/contract.py | 56 +++++++++++++++++++++------------- 2 files changed, 66 insertions(+), 51 deletions(-) diff --git a/boa/vyper/compiler_utils.py b/boa/vyper/compiler_utils.py index 545d4e70..c639aedb 100644 --- a/boa/vyper/compiler_utils.py +++ b/boa/vyper/compiler_utils.py @@ -5,6 +5,7 @@ from vyper.ast.utils import parse_to_ast from vyper.codegen.function_definitions import generate_ir_for_function from vyper.codegen.ir_node import IRnode +from vyper.evm.opcodes import anchor_evm_version from vyper.exceptions import InvalidType from vyper.ir import compile_ir, optimizer from vyper.semantics.analysis.utils import get_exact_type_from_node @@ -22,45 +23,47 @@ def compile_vyper_function(vyper_function, contract): """ compiler_data = contract.compiler_data - global_ctx = contract.global_ctx - ifaces = compiler_data.interface_codes - ast = parse_to_ast(vyper_function, ifaces) - vy_ast.folding.fold(ast) - # override namespace and add wrapper code at the top - with contract.override_vyper_namespace(): - analysis.add_module_namespace(ast, ifaces) - analysis.validate_functions(ast) + with anchor_evm_version(compiler_data.settings.evm_version): + global_ctx = contract.global_ctx + ifaces = compiler_data.interface_codes + ast = parse_to_ast(vyper_function, ifaces) + vy_ast.folding.fold(ast) - ast = ast.body[0] - func_t = ast._metadata["type"] + # override namespace and add wrapper code at the top + with contract.override_vyper_namespace(): + analysis.add_module_namespace(ast, ifaces) + analysis.validate_functions(ast) + + ast = ast.body[0] + func_t = ast._metadata["type"] - external_func_info = generate_ir_for_function(ast, global_ctx, False) - ir = external_func_info.common_ir + external_func_info = generate_ir_for_function(ast, global_ctx, False) + ir = external_func_info.common_ir - entry_label = func_t._ir_info.external_function_base_entry_label + entry_label = func_t._ir_info.external_function_base_entry_label - ir = ["seq", ["goto", entry_label], ir] + ir = ["seq", ["goto", entry_label], ir] - # use a dummy method id - ir = ["with", _METHOD_ID_VAR, 0, ir] + # use a dummy method id + ir = ["with", _METHOD_ID_VAR, 0, ir] - # first mush it with the rest of the IR in the contract to ensure - # all labels are present, and then optimize all together - # (use unoptimized IR, ir_executor can't handle optimized selector tables) - _, contract_runtime = contract.unoptimized_ir - ir = IRnode.from_list(["seq", ir, contract_runtime]) - ir = optimizer.optimize(ir) + # first mush it with the rest of the IR in the contract to ensure + # all labels are present, and then optimize all together + # (use unoptimized IR, ir_executor can't handle optimized selector tables) + _, contract_runtime = contract.unoptimized_ir + ir = IRnode.from_list(["seq", ir, contract_runtime]) + ir = optimizer.optimize(ir) - assembly = compile_ir.compile_to_assembly(ir) - bytecode, source_map = compile_ir.assembly_to_evm(assembly) - bytecode += contract.data_section - typ = func_t.return_type + assembly = compile_ir.compile_to_assembly(ir) + bytecode, source_map = compile_ir.assembly_to_evm(assembly) + bytecode += contract.data_section + typ = func_t.return_type - # generate the IR executor - ir_executor = executor_from_ir(ir, compiler_data) + # generate the IR executor + ir_executor = executor_from_ir(ir, compiler_data) - return ast, ir_executor, bytecode, source_map, typ + return ast, ir_executor, bytecode, source_map, typ def generate_bytecode_for_internal_fn(fn): diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index a78daf42..54d908cb 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -24,6 +24,7 @@ from vyper.codegen.module import generate_ir_for_module from vyper.compiler import output as compiler_output from vyper.compiler.settings import OptimizationLevel +from vyper.evm.opcodes import anchor_evm_version from vyper.exceptions import VyperException from vyper.ir.optimizer import optimize from vyper.semantics.analysis.data_positions import set_data_positions @@ -64,7 +65,8 @@ def __init__(self, compiler_data, filename=None): # force compilation so that if there are any errors in the contract, # we fail at load rather than at deploy time. - _ = compiler_data.bytecode + with anchor_evm_version(compiler_data.settings.evm_version): + _ = compiler_data.bytecode self.filename = filename @@ -609,9 +611,10 @@ def global_ctx(self): @property def source_map(self): if self._source_map is None: - _, self._source_map = compile_ir.assembly_to_evm( - self.compiler_data.assembly_runtime - ) + with anchor_evm_version(self.compiler_data.settings.evm_version): + _, self._source_map = compile_ir.assembly_to_evm( + self.compiler_data.assembly_runtime + ) return self._source_map def find_error_meta(self, computation): @@ -756,24 +759,27 @@ def _ast_module(self): module = copy.deepcopy(self.compiler_data.vyper_module) # do the same thing as vyper_module_folded but skip getter expansion - vy_ast.folding.fold(module) - with vy_ns.get_namespace().enter_scope(): - analysis.add_module_namespace(module, self.compiler_data.interface_codes) - analysis.validate_functions(module) - # we need to cache the namespace right here(!). - # set_data_positions will modify the type definitions in place. - self._cache_namespace(vy_ns.get_namespace()) + with anchor_evm_version(self.compiler_data.settings.evm_version): + vy_ast.folding.fold(module) + with vy_ns.get_namespace().enter_scope(): + analysis.add_module_namespace( + module, self.compiler_data.interface_codes + ) + analysis.validate_functions(module) + # we need to cache the namespace right here(!). + # set_data_positions will modify the type definitions in place. + self._cache_namespace(vy_ns.get_namespace()) - vy_ast.expansion.remove_unused_statements(module) - # calculate slots for all storage variables, tagging - # the types in the namespace. - set_data_positions(module, storage_layout_overrides=None) + vy_ast.expansion.remove_unused_statements(module) + # calculate slots for all storage variables, tagging + # the types in the namespace. + set_data_positions(module, storage_layout_overrides=None) - # ensure _ir_info is generated for all functions in this copied/shadow - # namespace - _ = generate_ir_for_module(GlobalContext(module)) + # ensure _ir_info is generated for all functions in this copied/shadow + # namespace + _ = generate_ir_for_module(GlobalContext(module)) - return module + return module # the global namespace is expensive to compute, so cache it def _cache_namespace(self, namespace): @@ -805,8 +811,11 @@ def override_vyper_namespace(self): # eliminator might prune a dead function (which we want to eval) @cached_property def unoptimized_assembly(self): - runtime = self.compiler_data.ir_runtime - return compile_ir.compile_to_assembly(runtime, optimize=OptimizationLevel.NONE) + with anchor_evm_version(self.compiler_data.settings.evm_version): + runtime = self.compiler_data.ir_runtime + return compile_ir.compile_to_assembly( + runtime, optimize=OptimizationLevel.NONE + ) @cached_property def data_section_size(self): @@ -829,12 +838,15 @@ def unoptimized_bytecode(self): @cached_property def unoptimized_ir(self): - with anchor_opt_level(OptimizationLevel.NONE): + with anchor_opt_level(OptimizationLevel.NONE), anchor_evm_version( + self.compiler_data.settings.evm_version + ): return generate_ir_for_module(self.compiler_data.global_ctx) @cached_property def ir_executor(self): _, ir_runtime = self.unoptimized_ir + # TODO: check if this needs anchor_evm_version return executor_from_ir(ir_runtime, self.compiler_data) @contextlib.contextmanager From 7fcca390a0c4d94fe78e19221df618d264a609f1 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 24 Oct 2023 10:14:18 -0400 Subject: [PATCH 086/122] fix: non eip-1559 transactions for some reason, the exception-throwing call to get_fee_info was not in the try/except block. --- boa/network.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boa/network.py b/boa/network.py index 99d65952..f8e87ebe 100644 --- a/boa/network.py +++ b/boa/network.py @@ -365,9 +365,9 @@ def _send_txn(self, from_, to=None, gas=None, value=None, data=None): {"from": from_, "to": to, "gas": gas, "value": value, "data": data} ) - base_fee, max_priority_fee, max_fee, chain_id = self.get_fee_info() try: # eip-1559 txn + base_fee, max_priority_fee, max_fee, chain_id = self.get_fee_info() tx_data["maxPriorityFeePerGas"] = max_priority_fee tx_data["maxFeePerGas"] = max_fee tx_data["chainId"] = chain_id From 514a6b014b86bd8793846553d76bc344f2ac817b Mon Sep 17 00:00:00 2001 From: bout3fiddy <11488427+bout3fiddy@users.noreply.github.com> Date: Wed, 25 Oct 2023 12:57:20 +0200 Subject: [PATCH 087/122] catch missing baseFee --- boa/network.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/boa/network.py b/boa/network.py index f8e87ebe..ce75082d 100644 --- a/boa/network.py +++ b/boa/network.py @@ -156,7 +156,7 @@ def get_gas_price(self) -> int: return self._gas_price return to_int(self._rpc.fetch("eth_gasPrice", [])) - def get_fee_info(self) -> tuple[str, str, str, str]: + def estimate_eip1559_fee(self) -> tuple[str, str, str, str]: # returns: base_fee, max_fee, max_priority_fee reqs = [ ("eth_getBlockByNumber", ["pending", False]), @@ -367,11 +367,11 @@ def _send_txn(self, from_, to=None, gas=None, value=None, data=None): try: # eip-1559 txn - base_fee, max_priority_fee, max_fee, chain_id = self.get_fee_info() + base_fee, max_priority_fee, max_fee, chain_id = self.estimate_eip1559_fee() tx_data["maxPriorityFeePerGas"] = max_priority_fee tx_data["maxFeePerGas"] = max_fee tx_data["chainId"] = chain_id - except RPCError: + except (RPCError, KeyError): tx_data["gasPrice"] = to_hex(self.get_gas_price()) tx_data["nonce"] = self._get_nonce(from_) From c68148c9b8dcca75ef5c774bfe221cdb9b4370b1 Mon Sep 17 00:00:00 2001 From: bout3fiddy <11488427+bout3fiddy@users.noreply.github.com> Date: Wed, 25 Oct 2023 13:23:06 +0200 Subject: [PATCH 088/122] add chainId to static fee info --- boa/network.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/boa/network.py b/boa/network.py index ce75082d..bafb38f4 100644 --- a/boa/network.py +++ b/boa/network.py @@ -156,7 +156,7 @@ def get_gas_price(self) -> int: return self._gas_price return to_int(self._rpc.fetch("eth_gasPrice", [])) - def estimate_eip1559_fee(self) -> tuple[str, str, str, str]: + def get_eip1559_fee(self) -> tuple[str, str, str, str]: # returns: base_fee, max_fee, max_priority_fee reqs = [ ("eth_getBlockByNumber", ["pending", False]), @@ -175,6 +175,9 @@ def estimate_eip1559_fee(self) -> tuple[str, str, str, str]: max_fee = to_hex(base_fee_estimate + to_int(max_priority_fee)) return to_hex(base_fee_estimate), max_priority_fee, max_fee, chain_id + def get_static_fee(self) -> tuple[str, str]: + return self._rpc.fetch_multi([("eth_gasPrice", []), ("eth_chainId", [])]) + def _check_sender(self, address): if address is None: raise ValueError("No sender!") @@ -367,12 +370,14 @@ def _send_txn(self, from_, to=None, gas=None, value=None, data=None): try: # eip-1559 txn - base_fee, max_priority_fee, max_fee, chain_id = self.estimate_eip1559_fee() + (base_fee, max_priority_fee, max_fee, chain_id) = self.get_eip1559_fee() tx_data["maxPriorityFeePerGas"] = max_priority_fee tx_data["maxFeePerGas"] = max_fee tx_data["chainId"] = chain_id except (RPCError, KeyError): - tx_data["gasPrice"] = to_hex(self.get_gas_price()) + gas_price, chain_id = self.get_static_fee() + tx_data["gasPrice"] = gas_price + tx_data["chainId"] = chain_id tx_data["nonce"] = self._get_nonce(from_) From 1924f4888e2ef261efb2367fce2cf467de16f529 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 25 Oct 2023 17:34:33 -0400 Subject: [PATCH 089/122] add a comment --- boa/network.py | 1 + 1 file changed, 1 insertion(+) diff --git a/boa/network.py b/boa/network.py index bafb38f4..ed540d75 100644 --- a/boa/network.py +++ b/boa/network.py @@ -176,6 +176,7 @@ def get_eip1559_fee(self) -> tuple[str, str, str, str]: return to_hex(base_fee_estimate), max_priority_fee, max_fee, chain_id def get_static_fee(self) -> tuple[str, str]: + # non eip-1559 transaction return self._rpc.fetch_multi([("eth_gasPrice", []), ("eth_chainId", [])]) def _check_sender(self, address): From e823fdac19ed9f89df4b6a3d0b77988c2a93f396 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 26 Oct 2023 15:31:28 -0400 Subject: [PATCH 090/122] fix patch_pyevm_account_db --- boa/vm/fast_accountdb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boa/vm/fast_accountdb.py b/boa/vm/fast_accountdb.py index 63ba7226..ed081a85 100644 --- a/boa/vm/fast_accountdb.py +++ b/boa/vm/fast_accountdb.py @@ -23,7 +23,7 @@ def patch_pyevm_state_object(state_object): accountdb = state_object._account_db accountdb._restore_touch_account = accountdb.touch_account accountdb.touch_account = _touch_account_patcher.__get__(accountdb, AccountDB) - state_object.__boa_patched__ = True + state_object.__boa_patched__ = _BOA_PATCHED def unpatch_pyevm_state_object(state_object): From 68e58155c8120a7ceced77561d15e6ae2a0d69e5 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 26 Oct 2023 15:44:37 -0400 Subject: [PATCH 091/122] remove FastAccountDB we only need accountdb patcher --- boa/environment.py | 3 +-- boa/vm/fast_accountdb.py | 9 --------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/boa/environment.py b/boa/environment.py index 2e7baa9e..20c4f9c3 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -26,7 +26,6 @@ from boa.util.eip1167 import extract_eip1167_address, is_eip1167_contract from boa.util.lrudict import lrudict from boa.vm.fast_accountdb import ( - FastAccountDB, patch_pyevm_state_object, unpatch_pyevm_state_object, ) @@ -433,7 +432,7 @@ def _init_vm(self, reset_traces=True): ) if self._fast_mode_enabled: - self.vm._state_class.account_db_class = FastAccountDB + patch_pyevm_state_object(self.vm.state) self.vm.state.computation_class = c diff --git a/boa/vm/fast_accountdb.py b/boa/vm/fast_accountdb.py index ed081a85..728cd47a 100644 --- a/boa/vm/fast_accountdb.py +++ b/boa/vm/fast_accountdb.py @@ -1,15 +1,6 @@ from eth.db.account import AccountDB -class FastAccountDB(AccountDB): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - # this is a hotspot in super(). - def touch_account(self, address): - self._accessed_accounts.add(address) - - def _touch_account_patcher(self, address): self._accessed_accounts.add(address) From 373de9d27dd182f27783ea077314ed42466c4ccb Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 26 Oct 2023 17:05:05 -0400 Subject: [PATCH 092/122] feat: speed up fork mode try to call `debug_traceCall` with prestateTracer to get the prestate for a call. if it succeeds, seed the vm with the fetched state. --- boa/environment.py | 13 ++++--- boa/network.py | 20 ++++------- boa/rpc.py | 19 +++++++--- boa/vm/fork.py | 88 ++++++++++++++++++++++++++++++++++++++-------- 4 files changed, 103 insertions(+), 37 deletions(-) diff --git a/boa/environment.py b/boa/environment.py index 20c4f9c3..ebf27fbe 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -25,10 +25,7 @@ from boa.util.abi import abi_decode from boa.util.eip1167 import extract_eip1167_address, is_eip1167_contract from boa.util.lrudict import lrudict -from boa.vm.fast_accountdb import ( - patch_pyevm_state_object, - unpatch_pyevm_state_object, -) +from boa.vm.fast_accountdb import patch_pyevm_state_object, unpatch_pyevm_state_object from boa.vm.fork import AccountDBFork from boa.vm.gas_meters import GasMeter, NoGasMeter, ProfilingGasMeter from boa.vm.utils import to_bytes, to_int @@ -390,6 +387,7 @@ class Env: _random = random.Random("titanoboa") # something reproducible _coverage_enabled = False _fast_mode_enabled = False + _fork_mode = False def __init__(self): self.chain = _make_chain() @@ -466,6 +464,7 @@ def enable_fast_mode(self, flag: bool = True): def fork(self, url, reset_traces=True, **kwargs): kwargs["url"] = url AccountDBFork._rpc_init_kwargs = kwargs + self._fork_mode = True self.vm.__class__._state_class.account_db_class = AccountDBFork self._init_vm(reset_traces=reset_traces) block_info = self.vm.state._account_db._block_info @@ -609,6 +608,10 @@ def deploy_code( create_address=target_address.canonical_address, data=b"", ) + + if self._fork_mode: + self.vm.state._account_db.try_prefetch_state(msg) + origin = sender # XXX: consider making this parametrizable tx_ctx = BaseTransactionContext(origin=origin, gas_price=self.get_gas_price()) c = self.vm.state.computation_class.apply_create_message( @@ -682,6 +685,8 @@ def execute_code( ir_executor=ir_executor, contract=contract, ) + if self._fork_mode: + self.vm.state._account_db.try_prefetch_state(msg) origin = sender # XXX: consider making this parametrizable tx_ctx = BaseTransactionContext(origin=origin, gas_price=self.get_gas_price()) diff --git a/boa/network.py b/boa/network.py index ed540d75..954b4e83 100644 --- a/boa/network.py +++ b/boa/network.py @@ -9,7 +9,7 @@ from eth_account import Account from boa.environment import Address, Env -from boa.rpc import EthereumRPC, RPCError, to_bytes, to_hex, to_int +from boa.rpc import EthereumRPC, RPCError, fixup_dict, to_bytes, to_hex, to_int, trim_dict class TraceObject: @@ -37,14 +37,6 @@ def is_error(self): return "error" in self.raw_trace -def trim_dict(kv): - return {k: v for (k, v) in kv.items() if bool(v)} - - -def _fixup_dict(kv): - return {k: to_hex(v) for (k, v) in trim_dict(kv).items()} - - class _EstimateGasFailed(Exception): pass @@ -210,12 +202,12 @@ def execute_code( sender = self._check_sender(self._get_sender(sender)) - data = to_hex(data) + hexdata = to_hex(data) if is_modifying: try: receipt, trace = self._send_txn( - from_=sender, to=to_address, value=value, gas=gas, data=data + from_=sender, to=to_address, value=value, gas=gas, data=hexdata ) except _EstimateGasFailed: # no need to actually run the txn. @@ -239,13 +231,13 @@ def execute_code( ) else: - args = _fixup_dict( + args = fixup_dict( { "from": sender, "to": to_address, "gas": gas, "value": value, - "data": data, + "data": hexdata, } ) returnvalue = self._rpc.fetch("eth_call", [args, "latest"]) @@ -365,7 +357,7 @@ def _reset_fork(self, block_identifier="latest"): self.vm.state._account_db._rpc._init_mem_db() def _send_txn(self, from_, to=None, gas=None, value=None, data=None): - tx_data = _fixup_dict( + tx_data = fixup_dict( {"from": from_, "to": to, "gas": gas, "value": value, "data": data} ) diff --git a/boa/rpc.py b/boa/rpc.py index a0544e6e..82af8c86 100644 --- a/boa/rpc.py +++ b/boa/rpc.py @@ -13,6 +13,14 @@ # some utility functions +def trim_dict(kv): + return {k: v for (k, v) in kv.items() if bool(v)} + + +def fixup_dict(kv): + return {k: to_hex(v) for (k, v) in trim_dict(kv).items()} + + def to_hex(s: int) -> str: if isinstance(s, int): return hex(s) @@ -62,10 +70,7 @@ def url_base(self): parse_result = urlparse(self._rpc_url) return f"{parse_result.scheme}://{parse_result.netloc}" - def fetch(self, method, params): - # the obvious thing to do here is dispatch into fetch_multi. - # but some providers (alchemy) can't handle batched requests - # for certain endpoints (debug_traceTransaction). + def _raw_fetch_single(self, method, params): req = {"jsonrpc": "2.0", "method": method, "params": params, "id": 0} # print(req) res = self._session.post(self._rpc_url, json=req, timeout=TIMEOUT) @@ -76,6 +81,12 @@ def fetch(self, method, params): raise RPCError.from_json(res["error"]) return res["result"] + def fetch(self, method, params): + # the obvious thing to do here is dispatch into fetch_multi. + # but some providers (alchemy) can't handle batched requests + # for certain endpoints (debug_traceTransaction). + return self._raw_fetch_single(method, params) + def fetch_multi(self, payloads): reqs = [(i, m, p) for i, (m, p) in enumerate(payloads)] res = self._raw_fetch_multi(reqs) diff --git a/boa/vm/fork.py b/boa/vm/fork.py index 93f0813f..d2707eb8 100644 --- a/boa/vm/fork.py +++ b/boa/vm/fork.py @@ -12,9 +12,10 @@ from eth.db.cache import CacheDB from eth.rlp.accounts import Account from eth.vm.interrupt import MissingBytecode +from eth.vm.message import Message from eth_utils import int_to_big_endian, to_checksum_address -from boa.rpc import EthereumRPC, to_bytes, to_hex, to_int +from boa.rpc import EthereumRPC, RPCError, fixup_dict, to_bytes, to_hex, to_int from boa.util.lrudict import lrudict TIMEOUT = 60 # default timeout for http requests in seconds @@ -71,6 +72,7 @@ def get_rpc(cls, url, cache_file=DEFAULT_CACHE_DIR): def _mk_key(self, method: str, params: Any) -> Any: return json.dumps({"method": method, "params": params}).encode("utf-8") + # note: overrides super().fetch! def fetch(self, method, params): # dispatch into fetch_multi for caching behavior. (res,) = self.fetch_multi([(method, params)]) @@ -117,9 +119,9 @@ def __init__(self, *args, **kwargs): block_identifier = to_hex(block_identifier) # do not cache - use raw_fetch - self._block_info = self._rpc._raw_fetch_multi( - [(0, "eth_getBlockByNumber", [block_identifier, False])] - )[0] + self._block_info = self._rpc._raw_fetch_single( + "eth_getBlockByNumber", [block_identifier, False] + ) self._block_number = to_int(self._block_info["number"]) @property @@ -129,20 +131,26 @@ def _block_id(self): def _has_account(self, address, from_journal=True): return super()._get_encoded_account(address, from_journal) != _EMPTY - def _get_account(self, address, from_journal=True): + def _get_account_helper(self, address, from_journal=True): # cf. super impl of _get_account - # we need to override this in order so that internal uses of - # _set_account() work correctly - if from_journal and address in self._account_cache: return self._account_cache[address] rlp_account = self._get_encoded_account(address, from_journal) if rlp_account: - account = rlp.decode(rlp_account, sedes=Account) + return rlp.decode(rlp_account, sedes=Account) else: + return None + + def _get_account(self, address, from_journal=True): + # we need to override this in order so that internal uses of + # _set_account() work correctly + account = self._get_account_helper(address, from_journal) + + if account is None: account = self._get_account_rpc(address) + if from_journal: self._account_cache[address] = account @@ -163,6 +171,47 @@ def _get_account_rpc(self, address): return Account(nonce=nonce, balance=balance, code_hash=code_hash) + # try call debug_traceCall to get the ostensible prestate for this call + def try_prefetch_state(self, msg: Message): + args = fixup_dict( + { + "from": msg.sender, + "to": msg.to, + "gas": msg.gas, + "value": msg.value, + "data": msg.data, + } + ) + try: + tracer = {"tracer": "prestateTracer"} + res = self._rpc._raw_fetch_single( + "debug_traceCall", [args, self._block_id, tracer] + ) + except RPCError: + return + + # everything is returned in hex + for address, v in res.items(): + address = to_bytes(address) + + # set account if we don't already have it + if self._get_account_helper(address) is None: + balance = to_int(v.get("balance", "0x")) + code = to_bytes(v.get("code", "0x")) + nonce = v.get("nonce", 0) # already an int + self._set_account(address, Account(nonce=nonce, balance=balance)) + self.set_code(address, code) + + storage = v.get("storage", dict()) + + account_store = super()._get_address_store(address) + for hexslot, hexvalue in storage.items(): + slot = to_int(hexslot) + value = to_int(hexvalue) + # set storage if we don't already have it + if not self._helper_have_storage(address, slot): + account_store.set(slot, value) + def get_code(self, address): try: return super().get_code(address) @@ -172,19 +221,28 @@ def get_code(self, address): ) return to_bytes(ret) - def get_storage(self, address, slot, from_journal=True): - # call super to get address warming semantics - s = super().get_storage(address, slot, from_journal) - - # cf. AccountStorageDB.get() + # helper to determine if something is in the storage db + # or we need to get from RPC + def _helper_have_storage(self, address, slot, from_journal=True): + # we have the storage locally in the VM already store = super()._get_address_store(address) key = int_to_big_endian(slot) db = store._journal_storage if from_journal else store._locked_changes try: if db[key] != _EMPTY: - return s + return True except KeyError: # (it was deleted in the journal.) + return True + + return False + + def get_storage(self, address, slot, from_journal=True): + # call super to get address warming semantics + s = super().get_storage(address, slot, from_journal) + + # cf. AccountStorageDB.get() + if self._helper_have_storage(address, slot, from_journal=from_journal): return s addr = to_checksum_address(address) From 5d81e02ed66a84ebf0f6e9fb3eccc485ce169533 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 26 Oct 2023 17:14:50 -0400 Subject: [PATCH 093/122] add flag to toggle state prefetcher --- boa/environment.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/boa/environment.py b/boa/environment.py index ebf27fbe..eb28edf6 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -388,6 +388,7 @@ class Env: _coverage_enabled = False _fast_mode_enabled = False _fork_mode = False + _fork_try_prefetch_state = False def __init__(self): self.chain = _make_chain() @@ -609,7 +610,7 @@ def deploy_code( data=b"", ) - if self._fork_mode: + if self._fork_mode and self._fork_try_prefetch_state: self.vm.state._account_db.try_prefetch_state(msg) origin = sender # XXX: consider making this parametrizable From d94bc4ac17e73df94e91eb90f6364151436571fc Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 26 Oct 2023 17:32:33 -0400 Subject: [PATCH 094/122] make try_prefetch_state the default --- boa/environment.py | 2 +- boa/vm/fork.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/boa/environment.py b/boa/environment.py index eb28edf6..1a2ede32 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -388,7 +388,7 @@ class Env: _coverage_enabled = False _fast_mode_enabled = False _fork_mode = False - _fork_try_prefetch_state = False + _fork_try_prefetch_state = True def __init__(self): self.chain = _make_chain() diff --git a/boa/vm/fork.py b/boa/vm/fork.py index d2707eb8..9d294b2f 100644 --- a/boa/vm/fork.py +++ b/boa/vm/fork.py @@ -182,6 +182,8 @@ def try_prefetch_state(self, msg: Message): "data": msg.data, } ) + # TODO: skip debug_traceCall if we have seen these specific + # arguments with this specific block before try: tracer = {"tracer": "prestateTracer"} res = self._rpc._raw_fetch_single( From e6b22a22000326569f26a8d07fbaee35aab75b34 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 26 Oct 2023 18:18:33 -0400 Subject: [PATCH 095/122] fix perf issue try_prefetch_state was not writing slots with value=0x00 correctly (in a way that _helper_have_storage would detect that we do in fact have the storage). --- boa/vm/fork.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/boa/vm/fork.py b/boa/vm/fork.py index 9d294b2f..ff62cef9 100644 --- a/boa/vm/fork.py +++ b/boa/vm/fork.py @@ -13,7 +13,7 @@ from eth.rlp.accounts import Account from eth.vm.interrupt import MissingBytecode from eth.vm.message import Message -from eth_utils import int_to_big_endian, to_checksum_address +from eth_utils import int_to_big_endian, to_canonical_address, to_checksum_address from boa.rpc import EthereumRPC, RPCError, fixup_dict, to_bytes, to_hex, to_int from boa.util.lrudict import lrudict @@ -194,7 +194,7 @@ def try_prefetch_state(self, msg: Message): # everything is returned in hex for address, v in res.items(): - address = to_bytes(address) + address = to_canonical_address(address) # set account if we don't already have it if self._get_account_helper(address) is None: @@ -210,9 +210,13 @@ def try_prefetch_state(self, msg: Message): for hexslot, hexvalue in storage.items(): slot = to_int(hexslot) value = to_int(hexvalue) - # set storage if we don't already have it + # set storage if we don't already have it. + # see AccountStorageDB.get() + # note we explicitly write 0s, so that they appear + # in the journal later when called by get_storage + key = int_to_big_endian(slot) if not self._helper_have_storage(address, slot): - account_store.set(slot, value) + account_store._journal_storage[key] = rlp.encode(value) # type: ignore def get_code(self, address): try: @@ -227,23 +231,17 @@ def get_code(self, address): # or we need to get from RPC def _helper_have_storage(self, address, slot, from_journal=True): # we have the storage locally in the VM already + # cf. AccountStorageDB.get() store = super()._get_address_store(address) key = int_to_big_endian(slot) db = store._journal_storage if from_journal else store._locked_changes - try: - if db[key] != _EMPTY: - return True - except KeyError: - # (it was deleted in the journal.) - return True - return False + return key in db and db[key] != _EMPTY def get_storage(self, address, slot, from_journal=True): # call super to get address warming semantics s = super().get_storage(address, slot, from_journal) - # cf. AccountStorageDB.get() if self._helper_have_storage(address, slot, from_journal=from_journal): return s From 0fbb34fed4600784f6792432b318c6896a8d46a6 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 27 Oct 2023 09:07:57 -0400 Subject: [PATCH 096/122] disable hypothesis deadline globally --- tests/conftest.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 03d3806e..115ea1ba 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,11 @@ import os import pytest +import hypothesis + +# disable hypothesis deadline globally +hypothesis.settings.register_profile("ci", deadline=None) +hypothesis.settings.load_profile("ci") @pytest.fixture(scope="module") From b5e9fb96d1424ed5cc5a6af03391d885439c83e5 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 27 Oct 2023 09:31:35 -0400 Subject: [PATCH 097/122] set prefetch state default to true only in network mode default to false in regular fork mode (it introduces an overhead on every call which might be worse than just fetching storage lazily) --- boa/environment.py | 4 ++-- boa/network.py | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/boa/environment.py b/boa/environment.py index 1a2ede32..3589a794 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -388,7 +388,7 @@ class Env: _coverage_enabled = False _fast_mode_enabled = False _fork_mode = False - _fork_try_prefetch_state = True + _fork_try_prefetch_state = False def __init__(self): self.chain = _make_chain() @@ -686,7 +686,7 @@ def execute_code( ir_executor=ir_executor, contract=contract, ) - if self._fork_mode: + if self._fork_mode and self._fork_try_prefetch_state: self.vm.state._account_db.try_prefetch_state(msg) origin = sender # XXX: consider making this parametrizable diff --git a/boa/network.py b/boa/network.py index 954b4e83..eb3be100 100644 --- a/boa/network.py +++ b/boa/network.py @@ -77,6 +77,9 @@ class NetworkEnv(Env): mutable functions and contract creation via eth_sendRawTransaction. """ + # always prefetch state in network mode + _fork_try_prefetch_state = True + def __init__(self, rpc_url, accounts=None): super().__init__() @@ -190,6 +193,8 @@ def execute_code( ir_executor=None, # maybe just have **kwargs to collect extra kwargs ): # call execute_code for tracing side effects + # note: we could get a perf improvement if we ran this in + # the background while waiting on RPC network calls computation = super().execute_code( to_address=to_address, sender=sender, From b10aea249373a7841b718a67ff2b50ed63375945 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 12 Nov 2023 00:55:47 +0300 Subject: [PATCH 098/122] fix for py-evm 0.8.x --- boa/environment.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/boa/environment.py b/boa/environment.py index 3589a794..8ee269d3 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -222,13 +222,17 @@ def __len__(self): # ### section: sha3 preimage tracing def _stackitem_to_int(value): - assert isinstance(value, tuple) - return to_int(value[1]) # how py-evm stores stuff on stack + if isinstance(value, tuple): + return to_int(value[1]) # how py-evm<=0.8.0b1 stores stuff on stack + else: + return to_int(value) def _stackitem_to_bytes(value): - assert isinstance(value, tuple) - return to_bytes(value[1]) # how py-evm stores stuff on stack + if isinstance(value, tuple): + return to_bytes(value[1]) # how py-evm<=0.8.0b1 stores stuff on stack + else: + return to_bytes(value) class Sha3PreimageTracer: From 2038461ce5bc9f39a7932605582852e35c68a78f Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 15 Nov 2023 10:50:01 +0300 Subject: [PATCH 099/122] catch HTTPError in try prefetch state --- boa/vm/fork.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/boa/vm/fork.py b/boa/vm/fork.py index ff62cef9..ef81f73a 100644 --- a/boa/vm/fork.py +++ b/boa/vm/fork.py @@ -1,5 +1,6 @@ import os from typing import Any, Dict, Tuple +from requests import HTTPError try: import ujson as json @@ -189,7 +190,7 @@ def try_prefetch_state(self, msg: Message): res = self._rpc._raw_fetch_single( "debug_traceCall", [args, self._block_id, tracer] ) - except RPCError: + except (RPCError, HTTPError): return # everything is returned in hex From b4e66665612ea0b27c1a286dd598bb02a6a973ff Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 16 Nov 2023 10:47:01 +0300 Subject: [PATCH 100/122] quick fix: don't overwrite gas --- boa/network.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/boa/network.py b/boa/network.py index eb3be100..add60004 100644 --- a/boa/network.py +++ b/boa/network.py @@ -379,13 +379,14 @@ def _send_txn(self, from_, to=None, gas=None, value=None, data=None): tx_data["nonce"] = self._get_nonce(from_) - try: - tx_data["gas"] = self._rpc.fetch("eth_estimateGas", [tx_data]) - except RPCError as e: - if e.code == 3: - # execution failed at estimateGas, probably the txn reverted - raise _EstimateGasFailed() - raise e from e + if gas is None: + try: + tx_data["gas"] = self._rpc.fetch("eth_estimateGas", [tx_data]) + except RPCError as e: + if e.code == 3: + # execution failed at estimateGas, probably the txn reverted + raise _EstimateGasFailed() + raise e from e if from_ not in self._accounts: raise ValueError(f"Account not available: {from_}") From ce6c65ac8d4c7c208a06cb2a06f07e65d4ce9f47 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 27 Nov 2023 07:23:55 -0800 Subject: [PATCH 101/122] fix: precompile type --- boa/environment.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/boa/environment.py b/boa/environment.py index 8ee269d3..e1f3915c 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -161,11 +161,11 @@ def register_raw_precompile(address, fn, force=False): address = Address(address) if address in _precompiles and not force: raise ValueError(f"Already registered: {address}") - _precompiles[address] = fn + _precompiles[address.canonical_address] = fn def deregister_raw_precompile(address, force=True): - address = Address(address) + address = Address(address).canonical_address if address not in _precompiles and not force: raise ValueError("Not registered: {address}") _precompiles.pop(address, None) From e927974d70ae1d89f36a2a2170e89fc34b726dd6 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 1 Dec 2023 21:50:28 -0500 Subject: [PATCH 102/122] protect unoptimized_bytecode with evm_version --- boa/vm/fork.py | 1 + boa/vyper/contract.py | 9 +++++---- tests/conftest.py | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/boa/vm/fork.py b/boa/vm/fork.py index ef81f73a..8dd6debc 100644 --- a/boa/vm/fork.py +++ b/boa/vm/fork.py @@ -1,5 +1,6 @@ import os from typing import Any, Dict, Tuple + from requests import HTTPError try: diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index 54d908cb..999f4df4 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -831,10 +831,11 @@ def data_section(self): @cached_property def unoptimized_bytecode(self): - s, _ = compile_ir.assembly_to_evm( - self.unoptimized_assembly, insert_vyper_signature=True - ) - return s + self.data_section + with anchor_evm_version(self.compiler_data.settings.evm_version): + s, _ = compile_ir.assembly_to_evm( + self.unoptimized_assembly, insert_vyper_signature=True + ) + return s + self.data_section @cached_property def unoptimized_ir(self): diff --git a/tests/conftest.py b/tests/conftest.py index 115ea1ba..1068f8c6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,7 +1,7 @@ import os -import pytest import hypothesis +import pytest # disable hypothesis deadline globally hypothesis.settings.register_profile("ci", deadline=None) From aa9a10c39d8ce31382a856deb48d12f4dd9d2071 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 1 Dec 2023 22:20:16 -0500 Subject: [PATCH 103/122] implement from_etherscan --- boa/__init__.py | 11 ++++++++++- boa/explorer.py | 27 +++++++++++++++++++++++++++ boa/interpret.py | 12 +++++++++++- boa/network.py | 1 + 4 files changed, 49 insertions(+), 2 deletions(-) create mode 100644 boa/explorer.py diff --git a/boa/__init__.py b/boa/__init__.py index 565ca4e6..1fdef431 100644 --- a/boa/__init__.py +++ b/boa/__init__.py @@ -5,7 +5,16 @@ from boa.debugger import BoaDebug from boa.environment import Env, enable_pyevm_verbose_logging, patch_opcode -from boa.interpret import BoaError, load, load_abi, load_partial, loads, loads_abi, loads_partial +from boa.interpret import ( + BoaError, + from_etherscan, + load, + load_abi, + load_partial, + loads, + loads_abi, + loads_partial, +) from boa.precompile import precompile from boa.test.strategies import fuzz from boa.vyper.contract import check_boa_error_matches diff --git a/boa/explorer.py b/boa/explorer.py new file mode 100644 index 00000000..44779b46 --- /dev/null +++ b/boa/explorer.py @@ -0,0 +1,27 @@ +import json + +import requests + +from boa.environment import Address + +SESSION = requests.Session() + + +def fetch_abi_from_etherscan( + address: str, uri: str = "https://api.etherscan.io/api", api_key: str = None +): + address = Address(address) + + params = dict(module="contract", action="getabi", address=address) + if api_key is not None: + params["apikey"] = api_key + + res = SESSION.get(uri, params=params) + res.raise_for_status() + + data = res.json() + + if int(data["status"]) != 1: + raise ValueError(f"Failed to retrieve data from API: {data}") + + return json.loads(data["result"].strip()) diff --git a/boa/interpret.py b/boa/interpret.py index d3cd3906..a2150adb 100644 --- a/boa/interpret.py +++ b/boa/interpret.py @@ -1,12 +1,14 @@ import json import textwrap from pathlib import Path -from typing import Union +from typing import Any, Union import vyper from vyper.cli.vyper_compile import get_interface_codes from vyper.compiler.phases import CompilerData +from boa.environment import Address +from boa.explorer import fetch_abi_from_etherscan from boa.util.disk_cache import DiskCache from boa.vyper.contract import ( ABIContractFactory, @@ -114,4 +116,12 @@ def load_partial(filename: str, compiler_args=None) -> VyperDeployer: # type: i ) +def from_etherscan( + address: Any, name=None, uri="https://api.etherscan.io/api", api_key=None +): + addr = Address(address) + abi = fetch_abi_from_etherscan(addr, uri, api_key) + return ABIContractFactory.from_abi_dict(abi, name=name).at(addr) + + __all__ = ["BoaError"] diff --git a/boa/network.py b/boa/network.py index add60004..824aad80 100644 --- a/boa/network.py +++ b/boa/network.py @@ -391,6 +391,7 @@ def _send_txn(self, from_, to=None, gas=None, value=None, data=None): if from_ not in self._accounts: raise ValueError(f"Account not available: {from_}") account = self._accounts[from_] + if hasattr(account, "sign_transaction"): signed = account.sign_transaction(tx_data) From 5860aa429041bedfdb41c946b67cb2d80e99a0df Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 1 Dec 2023 22:36:59 -0500 Subject: [PATCH 104/122] add docs for from_etherscan (and load_abi/loads_abi) --- docs/source/api.rst | 58 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/docs/source/api.rst b/docs/source/api.rst index 6fac31e3..e21454f7 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -112,6 +112,64 @@ High-Level Functionality >>> boa.loads_partial(src, "Foo") + +.. function:: load_abi(filename: str, name: str = None) -> ABIContractFactory + + Return a :py:class:`ABIContractFactory` from an ABI file (.json) + + :param filename: The file containing the ABI as a JSON string (something like ``my_abi.json``) + :param name: The name of the contract. + :returns: A :py:class:`ABIContractFactory` factory instance. + + .. rubric:: Example + + .. code-block:: python + + >>> import boa + >>> filename = "foo.json" + >>> boa.load_abi(src, name="Foo") + + + +.. function:: loads_abi(json_str: str, name: str = None) -> ABIContractFactory + + Return a :py:class:`ABIContractFactory` from an ABI string + + :param json_str: The ABI as a JSON string (something which can be passed to ``json.loads()``) + :param name: The name of the contract. + :returns: A :py:class:`ABIContractFactory` factory instance. + + .. rubric:: Example + + .. code-block:: python + + >>> import boa + >>> src = """[{"stateMutability": "nonpayable", "type": "function", "name": "foo", "inputs": [{"name": "", "type": "bytes"}], "outputs": [{"name": "", "type": "bytes"}]}]""" + >>> boa.loads_abi(src, name="Foo") + + + +.. function:: from_etherscan(address: str | bytes | Address, name: str = None, uri: str = "https://api.etherscan.io/api", api_key: str = None) -> ABIContract + + Fetch the ABI for an address from etherscan and return an :py:class:`ABIContract` + + :param address: The address. Can be str, bytes or Address + :param name: (Optional) The name of the contract. + :returns: A :py:class:`ABIContract` instance. + + .. rubric:: Example + + .. code-block:: python + + >>> import boa, os + >>> boa.env.fork(os.environ["ALCHEMY_MAINNET_ENDPOINT"]) + >>> crvusd = boa.from_etherscan("0xf939E0A03FB07F59A73314E73794Be0E57ac1b4E", name="crvUSD") + >>> crvusd + + >>> crvusd.totalSupply() + 730773174461124520709282012 + + .. function:: eval(statement: str) -> Any Evaluate a Vyper statement in the context of a contract with no state. From 61333e891941cc6344419d9d0cd8609ad2ef517d Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 7 Dec 2023 12:28:21 -0500 Subject: [PATCH 105/122] fix compiler optimizer settings for source map --- boa/vyper/compiler_utils.py | 11 ++++++++++- boa/vyper/contract.py | 16 ++++++++++------ 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/boa/vyper/compiler_utils.py b/boa/vyper/compiler_utils.py index c639aedb..71e43b1c 100644 --- a/boa/vyper/compiler_utils.py +++ b/boa/vyper/compiler_utils.py @@ -1,8 +1,10 @@ +import contextlib import textwrap import vyper.ast as vy_ast import vyper.semantics.analysis as analysis from vyper.ast.utils import parse_to_ast +from vyper.codegen.core import anchor_opt_level from vyper.codegen.function_definitions import generate_ir_for_function from vyper.codegen.ir_node import IRnode from vyper.evm.opcodes import anchor_evm_version @@ -14,6 +16,13 @@ from boa.vyper.ir_executor import executor_from_ir +@contextlib.contextmanager +def anchor_compiler_settings(compiler_data): + settings = compiler_data.settings + with anchor_opt_level(settings.optimize), anchor_evm_version(settings.evm_version): + yield + + def compile_vyper_function(vyper_function, contract): """Compiles a vyper function and appends it to the top of the IR of a contract. This is useful for vyper `eval` and internal functions, where @@ -24,7 +33,7 @@ def compile_vyper_function(vyper_function, contract): compiler_data = contract.compiler_data - with anchor_evm_version(compiler_data.settings.evm_version): + with anchor_compiler_settings(compiler_data): global_ctx = contract.global_ctx ifaces = compiler_data.interface_codes ast = parse_to_ast(vyper_function, ifaces) diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index 999f4df4..34536f37 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -42,6 +42,7 @@ from boa.vyper import _METHOD_ID_VAR from boa.vyper.ast_utils import ast_map_of, get_fn_ancestor_from_node, reason_at from boa.vyper.compiler_utils import ( + anchor_compiler_settings, compile_vyper_function, generate_bytecode_for_arbitrary_stmt, generate_bytecode_for_internal_fn, @@ -65,7 +66,7 @@ def __init__(self, compiler_data, filename=None): # force compilation so that if there are any errors in the contract, # we fail at load rather than at deploy time. - with anchor_evm_version(compiler_data.settings.evm_version): + with anchor_compiler_settings(self.compiler_data): _ = compiler_data.bytecode self.filename = filename @@ -108,6 +109,9 @@ class _BaseContract: def __init__(self, compiler_data, env=None, filename=None): self.compiler_data = compiler_data + with anchor_compiler_settings(self.compiler_data): + _ = compiler_data.bytecode + if env is None: env = Env.get_singleton() @@ -611,7 +615,7 @@ def global_ctx(self): @property def source_map(self): if self._source_map is None: - with anchor_evm_version(self.compiler_data.settings.evm_version): + with anchor_compiler_settings(self.compiler_data): _, self._source_map = compile_ir.assembly_to_evm( self.compiler_data.assembly_runtime ) @@ -759,7 +763,7 @@ def _ast_module(self): module = copy.deepcopy(self.compiler_data.vyper_module) # do the same thing as vyper_module_folded but skip getter expansion - with anchor_evm_version(self.compiler_data.settings.evm_version): + with anchor_compiler_settings(self.compiler_data): vy_ast.folding.fold(module) with vy_ns.get_namespace().enter_scope(): analysis.add_module_namespace( @@ -812,7 +816,7 @@ def override_vyper_namespace(self): @cached_property def unoptimized_assembly(self): with anchor_evm_version(self.compiler_data.settings.evm_version): - runtime = self.compiler_data.ir_runtime + runtime = self.unoptimized_ir[1] return compile_ir.compile_to_assembly( runtime, optimize=OptimizationLevel.NONE ) @@ -847,8 +851,8 @@ def unoptimized_ir(self): @cached_property def ir_executor(self): _, ir_runtime = self.unoptimized_ir - # TODO: check if this needs anchor_evm_version - return executor_from_ir(ir_runtime, self.compiler_data) + with anchor_evm_version(self.compiler_data.settings.evm_version): + return executor_from_ir(ir_runtime, self.compiler_data) @contextlib.contextmanager def _anchor_source_map(self, source_map): From 04e7ac65d24ca5cc213c36127a90bd06be48334c Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 7 Dec 2023 13:33:02 -0500 Subject: [PATCH 106/122] force bytecode_runtime to be generated --- boa/interpret.py | 3 ++- boa/vyper/contract.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/boa/interpret.py b/boa/interpret.py index a2150adb..95006936 100644 --- a/boa/interpret.py +++ b/boa/interpret.py @@ -50,7 +50,8 @@ def _ifaces(): def func(): ifaces = _ifaces() ret = CompilerData(source_code, contract_name, interface_codes=ifaces, **kwargs) - _ = ret.bytecode_runtime # force compilation to happen + with anchor_compiler_settings(ret) + _ = ret.bytecode, ret.bytecode_runtime # force compilation to happen return ret return _disk_cache.caching_lookup(str((kwargs, source_code)), func) diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index 34536f37..9b52644c 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -67,7 +67,7 @@ def __init__(self, compiler_data, filename=None): # force compilation so that if there are any errors in the contract, # we fail at load rather than at deploy time. with anchor_compiler_settings(self.compiler_data): - _ = compiler_data.bytecode + _ = compiler_data.bytecode, compiler_data.bytecode_runtime self.filename = filename @@ -110,7 +110,7 @@ def __init__(self, compiler_data, env=None, filename=None): self.compiler_data = compiler_data with anchor_compiler_settings(self.compiler_data): - _ = compiler_data.bytecode + _ = compiler_data.bytecode, compiler_data.bytecode_runtime if env is None: env = Env.get_singleton() From 03949fe9e3b1c15b8d88dd169b4f5e44fb64fae0 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 7 Dec 2023 13:35:35 -0500 Subject: [PATCH 107/122] fix a bad import --- boa/interpret.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/boa/interpret.py b/boa/interpret.py index 95006936..2c076ff9 100644 --- a/boa/interpret.py +++ b/boa/interpret.py @@ -10,6 +10,7 @@ from boa.environment import Address from boa.explorer import fetch_abi_from_etherscan from boa.util.disk_cache import DiskCache +from boa.vyper.compiler_utils import anchor_compiler_settings from boa.vyper.contract import ( ABIContractFactory, BoaError, @@ -50,7 +51,7 @@ def _ifaces(): def func(): ifaces = _ifaces() ret = CompilerData(source_code, contract_name, interface_codes=ifaces, **kwargs) - with anchor_compiler_settings(ret) + with anchor_compiler_settings(ret): _ = ret.bytecode, ret.bytecode_runtime # force compilation to happen return ret From 8ce2a962a6eabf49c101323dec0780e1809c97dd Mon Sep 17 00:00:00 2001 From: Daniel Schiavini Date: Thu, 7 Dec 2023 20:56:39 +0100 Subject: [PATCH 108/122] Avoid KeyError for unspecified ABI traces --- boa/vyper/contract.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index 9b52644c..4d45ecc4 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -1102,7 +1102,8 @@ def method_id_map(self): def stack_trace(self, computation=None): computation = computation or self._computation calldata_method_id = bytes(computation.msg.data[:4]) - abi_sig = self.method_id_map[calldata_method_id] + # when the method isn't specified in the ABI, it's not present in the map + abi_sig = self.method_id_map.get(calldata_method_id, "?") ret = StackTrace([f" (unknown location in {self}.{abi_sig})"]) return _handle_child_trace(computation, self.env, ret) From 767423bf88382ae0ab6b0dd19bf1d33873bb3224 Mon Sep 17 00:00:00 2001 From: Daniel Schiavini Date: Thu, 7 Dec 2023 21:06:17 +0100 Subject: [PATCH 109/122] Update fallback string to give the method ID --- boa/vyper/contract.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index 4d45ecc4..73eb9392 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -1103,7 +1103,7 @@ def stack_trace(self, computation=None): computation = computation or self._computation calldata_method_id = bytes(computation.msg.data[:4]) # when the method isn't specified in the ABI, it's not present in the map - abi_sig = self.method_id_map.get(calldata_method_id, "?") + abi_sig = self.method_id_map.get(calldata_method_id, f"({calldata_method_id})") ret = StackTrace([f" (unknown location in {self}.{abi_sig})"]) return _handle_child_trace(computation, self.env, ret) From a326e930f6af8a438427ac8f014e662014d90c76 Mon Sep 17 00:00:00 2001 From: Daniel Schiavini Date: Fri, 8 Dec 2023 09:26:43 +0100 Subject: [PATCH 110/122] Improve error message --- boa/vyper/contract.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index 73eb9392..e7e02213 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -1102,10 +1102,15 @@ def method_id_map(self): def stack_trace(self, computation=None): computation = computation or self._computation calldata_method_id = bytes(computation.msg.data[:4]) - # when the method isn't specified in the ABI, it's not present in the map - abi_sig = self.method_id_map.get(calldata_method_id, f"({calldata_method_id})") - ret = StackTrace([f" (unknown location in {self}.{abi_sig})"]) - return _handle_child_trace(computation, self.env, ret) + return_trace = StackTrace( + [ + f" (unknown location in {self}.{self.method_id_map[calldata_method_id]})" + if calldata_method_id in self.method_id_map + # Method might not be specified in the ABI + else f" (unknown method {calldata_method_id} in {self})" + ] + ) + return _handle_child_trace(computation, self.env, return_trace) @property def deployer(self): From e85ab24e644591ebf235218a267724f90fed6118 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 9 Dec 2023 20:05:45 -0500 Subject: [PATCH 111/122] pretty up method ID --- boa/vyper/contract.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index e7e02213..b7b79284 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -1102,14 +1102,14 @@ def method_id_map(self): def stack_trace(self, computation=None): computation = computation or self._computation calldata_method_id = bytes(computation.msg.data[:4]) - return_trace = StackTrace( - [ - f" (unknown location in {self}.{self.method_id_map[calldata_method_id]})" - if calldata_method_id in self.method_id_map - # Method might not be specified in the ABI - else f" (unknown method {calldata_method_id} in {self})" - ] - ) + + if calldata_method_id in self.method_id_map: + msg = f" (unknown location in {self}.{self.method_id_map[calldata_method_id]})" + else: + # Method might not be specified in the ABI + msg = f" (unknown method id {self}.0x{calldata_method_id.hex()})" + + return_trace = StackTrace([msg]) return _handle_child_trace(computation, self.env, return_trace) @property From 6d1faa3ba5b6a5c7823a7313b74b73dd551aebe4 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 13 Dec 2023 13:54:34 -0500 Subject: [PATCH 112/122] add gas tracker allows you to track overall gas used in the environment use `env.reset_gas_used()` to reset gas used to 0! --- boa/environment.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/boa/environment.py b/boa/environment.py index e1f3915c..edca1502 100644 --- a/boa/environment.py +++ b/boa/environment.py @@ -412,6 +412,8 @@ def __init__(self): self._cached_line_profiles = {} self._coverage_data = {} + self._gas_tracker = 0 + self.sha3_trace = {} self.sstore_trace = {} @@ -538,6 +540,13 @@ def lookup_alias(self, address): def _reset_access_counters(self): self.vm.state._account_db._reset_access_counters() + def get_gas_used(self): + return self._gas_tracker + + def reset_gas_used(self): + self._gas_tracker = 0 + self._reset_access_counters() + # context manager which snapshots the state and reverts # to the snapshot on exiting the with statement @contextlib.contextmanager @@ -582,6 +591,9 @@ def _get_sender(self, sender=None) -> PYEVM_Address: raise ValueError(f"{self}.eoa not defined!") return Address(sender).canonical_address + def _update_gas_used(self, gas_used: int): + self._gas_tracker += gas_used + def deploy_code( self, sender: Optional[_AddressType] = None, @@ -623,6 +635,9 @@ def deploy_code( self.vm.state, msg, tx_ctx ) + if c._gas_meter_class != NoGasMeter: + self._update_gas_used(c.get_gas_used()) + if c.is_error: raise c.error @@ -701,6 +716,9 @@ def execute_code( if self._coverage_enabled: self._hook_trace_computation(ret, contract) + if ret._gas_meter_class != NoGasMeter: + self._update_gas_used(ret.get_gas_used()) + return ret def _hook_trace_computation(self, computation, contract=None): From 878da481a5adc3963b07b22355c80a85a9cfecb7 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 15 Dec 2023 07:15:06 -0500 Subject: [PATCH 113/122] catch HTTPError when calculating `NetworkEnv._tracer` and unknown RPCErrors --- boa/network.py | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/boa/network.py b/boa/network.py index 824aad80..a77d3aa0 100644 --- a/boa/network.py +++ b/boa/network.py @@ -7,6 +7,7 @@ from math import ceil from eth_account import Account +from requests.exceptions import HTTPError from boa.environment import Address, Env from boa.rpc import EthereumRPC, RPCError, fixup_dict, to_bytes, to_hex, to_int, trim_dict @@ -323,28 +324,39 @@ def _wait_for_tx_trace(self, tx_hash, poll_latency=0.25): @cached_property def _tracer(self): + def _warn_no_tracer(): + warnings.warn( + "debug_traceTransaction not available! " + "titanoboa will try hard to interact with the network, but " + "this means that titanoboa is not able to do certain " + "safety checks at runtime. it is recommended to switch " + "to a node or provider with debug_traceTransaction.", + stacklevel=3, + ) + try: txn_hash = "0x" + "00" * 32 # alchemy only can do callTracer, plus it has lowest # overhead. call_tracer = {"tracer": "callTracer", "onlyTopCall": True} self._rpc.fetch("debug_traceTransaction", [txn_hash, call_tracer]) + except RPCError as e: - # -32600 is alchemy unpaid tier error message - # -32601 is infura error message (if i recall correctly) - if e.code in (-32601, -32600): - warnings.warn( - "debug_traceTransaction not available! " - "titanoboa will try hard to interact with the network, but " - "this means that titanoboa is not able to do certain " - "safety checks at runtime. it is recommended to switch " - "to a node or provider with debug_traceTransaction.", - stacklevel=2, - ) - return None # can't handle callTracer, use default (i.e. structLogs) if e.code == -32602: return {} + + # catchall - just don't have a tracer + # note on error codes: + # -32600 is alchemy unpaid tier error message + # -32601 is infura error message (if i recall correctly) + _warn_no_tracer() + return None + + except HTTPError: + _warn_no_tracer() + return None + return call_tracer def _get_nonce(self, addr): From 9754a1423ec21c922e692e96127ac4afb14a05c1 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 16 Dec 2023 08:56:31 -0500 Subject: [PATCH 114/122] fix: missing not None check --- boa/vyper/contract.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index b7b79284..bd2fc424 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -307,8 +307,8 @@ def _check(cond, msg=""): _check( err == frame.pretty_vm_reason or err == frame.error_detail - or err == frame.dev_reason.reason_str, - "does not match {args}", + or (frame.dev_reason and err == frame.dev_reason.reason_str), + f"does not match {args}", ) return From cb0d4fbdb17455635ed84716a5a13e05edb80236 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 16 Dec 2023 09:12:12 -0500 Subject: [PATCH 115/122] fix formatting of an error --- boa/vyper/contract.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index bd2fc424..62e41a05 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -326,7 +326,7 @@ def _check(cond, msg=""): _check( frame.error_detail == "user revert with reason" and v == frame.pretty_vm_reason, - f"{frame.vm_error} != {v}", + f"{frame.pretty_vm_reason} != {v}", ) # assume it is a dev reason string else: From a2f50a887059e58aeb9c64659dabc7823bb0cae5 Mon Sep 17 00:00:00 2001 From: Daniel Schiavini Date: Mon, 18 Dec 2023 17:36:09 +0100 Subject: [PATCH 116/122] Fix pre-commit checks --- .pre-commit-config.yaml | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b746b6c5..13b11e6a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,26 +2,28 @@ # See https://pre-commit.com/hooks.html for more hooks repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.1.0 + rev: v4.5.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer - id: check-yaml - - repo: https://github.com/psf/black - rev: 22.3.0 + - repo: https://github.com/psf/black-pre-commit-mirror + rev: 23.12.0 hooks: - id: black args: - - -C - - --target-version=py38 + - --skip-magic-trailing-comma + - --target-version=py310 - repo: https://github.com/PyCQA/flake8 - rev: 3.9.2 + rev: 6.1.0 hooks: - id: flake8 - - repo: https://github.com/pre-commit/mirrors-isort - rev: v5.10.1 + - repo: https://github.com/PyCQA/isort + rev: 5.13.2 hooks: - id: isort + # profile and line-length to avoid clashes with black + args: ["--profile=black", "--line-length=88"] default_language_version: python: python3.10 From de827d03203c73927381198ac4de35e5d4b53667 Mon Sep 17 00:00:00 2001 From: Daniel Schiavini Date: Mon, 18 Dec 2023 17:40:06 +0100 Subject: [PATCH 117/122] Run pre-commit for all files --- boa/network.py | 10 +++++++++- docs/source/coverage.rst | 7 +++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/boa/network.py b/boa/network.py index a77d3aa0..b4b647a7 100644 --- a/boa/network.py +++ b/boa/network.py @@ -10,7 +10,15 @@ from requests.exceptions import HTTPError from boa.environment import Address, Env -from boa.rpc import EthereumRPC, RPCError, fixup_dict, to_bytes, to_hex, to_int, trim_dict +from boa.rpc import ( + EthereumRPC, + RPCError, + fixup_dict, + to_bytes, + to_hex, + to_int, + trim_dict, +) class TraceObject: diff --git a/docs/source/coverage.rst b/docs/source/coverage.rst index 01637936..04a5372f 100644 --- a/docs/source/coverage.rst +++ b/docs/source/coverage.rst @@ -11,9 +11,9 @@ To use, add the following to ``.coveragerc``: plugins = boa.coverage (for more information see https://coverage.readthedocs.io/en/latest/config.html) - + Then, run with ``coverage run ...`` - + To run with pytest, it can be invoked in either of two ways, .. code-block:: @@ -29,6 +29,5 @@ or, `pytest-cov `_ is a wrapper around ``coverage.py`` for using with pytest; using it is recommended because it smooths out some quirks of using ``coverage.py`` with pytest. Finally, ``coverage.py`` saves coverage data to a file named ``.coverage`` in the directory it is run in. To view the formatted coverage data, you typically want to use ``coverage report`` or ``coverage html``. See more options at https://coverage.readthedocs.io/en/latest/cmd.html. - -Coverage is experimental and there may be odd corner cases! If so, please report them on github or in the ``#titanoboa-interpreter`` channel of the `Vyper discord `_. +Coverage is experimental and there may be odd corner cases! If so, please report them on github or in the ``#titanoboa-interpreter`` channel of the `Vyper discord `_. From 0edeb87b6a31dd11ffb663c7428edb0ba233e07b Mon Sep 17 00:00:00 2001 From: Daniel Schiavini Date: Wed, 20 Dec 2023 16:37:25 +0100 Subject: [PATCH 118/122] Create PR template --- .github/PULL_REQUEST_TEMPLATE.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 .github/PULL_REQUEST_TEMPLATE.md diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 00000000..baa8deca --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,15 @@ +### What I did + +### How I did it + +### How to verify it + +### Commit message + +Commit message for the final, squashed PR. (Optional, but reviewers will appreciate it! Please see [our commit message style guide](../../master/docs/style-guide.rst#best-practices-1) for what we would ideally like to see in a commit message.) + +### Description for the changelog + +### Cute Animal Picture + +![Put a link to a cute animal picture inside the parenthesis-->]() From b0099d522fe6f924a761d014d1c261f5406289fc Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 21 Dec 2023 12:22:46 -0500 Subject: [PATCH 119/122] improve some metadata on VyperFunction improves how it looks in ipython help (ex. `contract.function?`) --- boa/vyper/contract.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index 62e41a05..a1b947e3 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -919,9 +919,15 @@ def __init__(self, fn_ast, contract): self.contract = contract self.env = contract.env + self.__doc__ = fn_ast.doc_string.value + self.__module__ = self.contract.compiler_data.contract_name + def __repr__(self): return f"{self.contract.compiler_data.contract_name}.{self.fn_ast.name}" + def __str__(self): + return repr(self.func_t) + @cached_property def _source_map(self): return self.contract.source_map From 7640b1b1c263321bff48d15c0e1e010a37cf5fc3 Mon Sep 17 00:00:00 2001 From: Daniel Schiavini Date: Fri, 22 Dec 2023 09:10:24 +0100 Subject: [PATCH 120/122] Check if there is a doc_string --- boa/vyper/contract.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/boa/vyper/contract.py b/boa/vyper/contract.py index a1b947e3..fdd13726 100644 --- a/boa/vyper/contract.py +++ b/boa/vyper/contract.py @@ -919,7 +919,9 @@ def __init__(self, fn_ast, contract): self.contract = contract self.env = contract.env - self.__doc__ = fn_ast.doc_string.value + self.__doc__ = ( + fn_ast.doc_string.value if hasattr(fn_ast, "doc_string") else None + ) self.__module__ = self.contract.compiler_data.contract_name def __repr__(self): From f3d9cf0b6df70cd7aa0ec9a5eb737ef3630900d3 Mon Sep 17 00:00:00 2001 From: Daniel Schiavini Date: Fri, 22 Dec 2023 09:11:07 +0100 Subject: [PATCH 121/122] Remove commit message since commits aren't squashed --- .github/PULL_REQUEST_TEMPLATE.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index baa8deca..40c2e5f5 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -4,10 +4,6 @@ ### How to verify it -### Commit message - -Commit message for the final, squashed PR. (Optional, but reviewers will appreciate it! Please see [our commit message style guide](../../master/docs/style-guide.rst#best-practices-1) for what we would ideally like to see in a commit message.) - ### Description for the changelog ### Cute Animal Picture From 4d75e780382a9cf4cb537847e403e8790482dbe5 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 5 Jan 2024 09:25:58 -0500 Subject: [PATCH 122/122] fix decoder for strings stop was getting reduced by floor32(start - floor32(start)) instead of floor32(start). --- boa/vyper/decoder_utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/boa/vyper/decoder_utils.py b/boa/vyper/decoder_utils.py index 050219d2..7f19d828 100644 --- a/boa/vyper/decoder_utils.py +++ b/boa/vyper/decoder_utils.py @@ -34,8 +34,9 @@ def __getitem__(self, subscript): ret += self.db.get_storage(self.address, i).to_bytes(32, "big") i += 1 - start -= floor32(start) - stop -= floor32(start) + start_ofst = floor32(start) + start -= start_ofst + stop -= start_ofst return memoryview(ret[start:stop]) else: raise Exception("Must slice {self}")