From 608feda9910ac04da10f2b3fc089d880068cecfa Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Mon, 27 May 2024 17:04:03 +0300 Subject: [PATCH 01/53] feat[venom]: improve unused variable removal pass (#4055) This commit improves the variable elimination pass by efficiently removing all instructions that produce unused output. This new algorithm removes all unused variables in one run. Additionally, it adds the utility functions `pop()` and `addmany()` to the `OrderedSet()` class. The `DFTAnalysis` is also augmented with a method to remove uses: `remove_use(self, op: IRVariable, inst: IRInstruction)` --- vyper/utils.py | 7 +++ vyper/venom/__init__.py | 1 - vyper/venom/analysis/dfg.py | 4 ++ vyper/venom/passes/remove_unused_variables.py | 63 +++++++++---------- 4 files changed, 40 insertions(+), 35 deletions(-) diff --git a/vyper/utils.py b/vyper/utils.py index a1fed4087c..2b95485f4e 100644 --- a/vyper/utils.py +++ b/vyper/utils.py @@ -45,9 +45,16 @@ def __len__(self): def first(self): return next(iter(self)) + def pop(self): + return self._data.popitem()[0] + def add(self, item: _T) -> None: self._data[item] = None + def addmany(self, iterable): + for item in iterable: + self._data[item] = None + def remove(self, item: _T) -> None: del self._data[item] diff --git a/vyper/venom/__init__.py b/vyper/venom/__init__.py index 1abd240b5f..6bbcedaade 100644 --- a/vyper/venom/__init__.py +++ b/vyper/venom/__init__.py @@ -6,7 +6,6 @@ from vyper.codegen.ir_node import IRnode from vyper.compiler.settings import OptimizationLevel from vyper.venom.analysis.analysis import IRAnalysesCache -from vyper.venom.analysis.liveness import LivenessAnalysis from vyper.venom.context import IRContext from vyper.venom.function import IRFunction from vyper.venom.ir_node_to_venom import ir_node_to_venom diff --git a/vyper/venom/analysis/dfg.py b/vyper/venom/analysis/dfg.py index dc7076d5de..2fb172a979 100644 --- a/vyper/venom/analysis/dfg.py +++ b/vyper/venom/analysis/dfg.py @@ -22,6 +22,10 @@ def get_uses(self, op: IRVariable) -> list[IRInstruction]: def get_producing_instruction(self, op: IRVariable) -> Optional[IRInstruction]: return self._dfg_outputs.get(op) + def remove_use(self, op: IRVariable, inst: IRInstruction): + uses = self._dfg_inputs.get(op, []) + uses.remove(inst) + @property def outputs(self) -> dict[IRVariable, IRInstruction]: return self._dfg_outputs diff --git a/vyper/venom/passes/remove_unused_variables.py b/vyper/venom/passes/remove_unused_variables.py index 8307bdae40..653bab57d6 100644 --- a/vyper/venom/passes/remove_unused_variables.py +++ b/vyper/venom/passes/remove_unused_variables.py @@ -1,47 +1,42 @@ +from vyper.utils import OrderedSet from vyper.venom.analysis.dfg import DFGAnalysis -from vyper.venom.analysis.liveness import LivenessAnalysis -from vyper.venom.basicblock import IRBasicBlock +from vyper.venom.basicblock import IRInstruction from vyper.venom.passes.base_pass import IRPass class RemoveUnusedVariablesPass(IRPass): - def run_pass(self): - self.analyses_cache.request_analysis(LivenessAnalysis) - - for bb in self.function.get_basic_blocks(): - self._remove_unused_variables(bb) + """ + This pass removes instructions that produce output that is never used. + """ - self.analyses_cache.invalidate_analysis(DFGAnalysis) + dfg: DFGAnalysis + work_list: OrderedSet[IRInstruction] - def _remove_unused_variables(self, bb: IRBasicBlock): - """ - Remove the instructions of a basicblock that produce output that is never used. - """ - i = 0 - while i < len(bb.instructions) - 1: - inst = bb.instructions[i] - i += 1 + def run_pass(self): + self.dfg = self.analyses_cache.request_analysis(DFGAnalysis) - # Skip volatile instructions - if inst.volatile: - continue + work_list = OrderedSet() + self.work_list = work_list - # Skip instructions without output - if inst.output is None: - continue + uses = self.dfg.outputs.values() + work_list.addmany(uses) - # Skip instructions that produce output that is used - next_liveness = bb.instructions[i].liveness - if inst.output in next_liveness: - continue + while len(work_list) > 0: + inst = work_list.pop() + self._process_instruction(inst) - # Remove the rest - del bb.instructions[i - 1] + def _process_instruction(self, inst): + if inst.output is None: + return + if inst.volatile: + return + uses = self.dfg.get_uses(inst.output) + if len(uses) > 0: + return - # backtrack to the *previous* instruction, in case we removed - # an instruction which had prevented the previous instruction - # from being removed - i -= 2 + for operand in inst.get_inputs(): + self.dfg.remove_use(operand, inst) + new_uses = self.dfg.get_uses(operand) + self.work_list.addmany(new_uses) - # don't go beyond 0 though - i = max(0, i) + inst.parent.remove_instruction(inst) From 96a83842facde6f1bc75b534ad4689ea82d29abd Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 28 May 2024 04:48:43 -0700 Subject: [PATCH 02/53] refactor[tool]: refactor storage layout export (#3789) refactor storage layout allocator. separate concerns of allocating the storage layout and exporting the storage layout into separate functions. this is intended to make it easier to add features to the storage layout export in the future fix several bugs in storage layout overrides, including: - handle stateful modules - add a sanity check that the override file roundtrips - ignore non-storage variables in override files - set nonreentrant lock properly for all functions instead of panicking misc: add `n_slots` to each storage layout item in the export --- tests/unit/cli/storage_layout/__init__.py | 0 .../cli/storage_layout/test_storage_layout.py | 80 ++--- .../test_storage_layout_overrides.py | 296 +++++++++++++++++- tests/unit/cli/storage_layout/utils.py | 17 + vyper/compiler/phases.py | 5 +- vyper/semantics/analysis/base.py | 6 +- vyper/semantics/analysis/data_positions.py | 247 ++++++++++----- 7 files changed, 499 insertions(+), 152 deletions(-) create mode 100644 tests/unit/cli/storage_layout/__init__.py create mode 100644 tests/unit/cli/storage_layout/utils.py diff --git a/tests/unit/cli/storage_layout/__init__.py b/tests/unit/cli/storage_layout/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/unit/cli/storage_layout/test_storage_layout.py b/tests/unit/cli/storage_layout/test_storage_layout.py index ece2743b81..d490d2008f 100644 --- a/tests/unit/cli/storage_layout/test_storage_layout.py +++ b/tests/unit/cli/storage_layout/test_storage_layout.py @@ -1,21 +1,6 @@ from vyper.compiler import compile_code -from vyper.evm.opcodes import version_check - -def _adjust_storage_layout_for_cancun(layout): - def _go(layout): - for _varname, item in layout.items(): - if "slot" in item and isinstance(item["slot"], int): - item["slot"] -= 1 - else: - # recurse to submodule - _go(item) - - if version_check(begin="cancun"): - layout["transient_storage_layout"] = { - "$.nonreentrant_key": layout["storage_layout"].pop("$.nonreentrant_key") - } - _go(layout["storage_layout"]) +from .utils import adjust_storage_layout_for_cancun def test_storage_layout(): @@ -55,19 +40,18 @@ def public_foo3(): pass """ - out = compile_code(code, output_formats=["layout"]) - expected = { "storage_layout": { - "$.nonreentrant_key": {"slot": 0, "type": "nonreentrant lock"}, - "foo": {"slot": 1, "type": "HashMap[address, uint256]"}, - "arr": {"slot": 2, "type": "DynArray[uint256, 3]"}, - "baz": {"slot": 6, "type": "Bytes[65]"}, - "bar": {"slot": 10, "type": "uint256"}, + "$.nonreentrant_key": {"slot": 0, "type": "nonreentrant lock", "n_slots": 1}, + "foo": {"slot": 1, "type": "HashMap[address, uint256]", "n_slots": 1}, + "arr": {"slot": 2, "type": "DynArray[uint256, 3]", "n_slots": 4}, + "baz": {"slot": 6, "type": "Bytes[65]", "n_slots": 4}, + "bar": {"slot": 10, "type": "uint256", "n_slots": 1}, } } - _adjust_storage_layout_for_cancun(expected) + adjust_storage_layout_for_cancun(expected) + out = compile_code(code, output_formats=["layout"]) assert out["layout"] == expected @@ -88,12 +72,9 @@ def __init__(): "SYMBOL": {"length": 64, "offset": 0, "type": "String[32]"}, "DECIMALS": {"length": 32, "offset": 64, "type": "uint8"}, }, - "storage_layout": { - "$.nonreentrant_key": {"slot": 0, "type": "nonreentrant lock"}, - "name": {"slot": 1, "type": "String[32]"}, - }, + "storage_layout": {"name": {"slot": 1, "type": "String[32]", "n_slots": 2}}, } - _adjust_storage_layout_for_cancun(expected_layout) + adjust_storage_layout_for_cancun(expected_layout) out = compile_code(code, output_formats=["layout"]) assert out["layout"] == expected_layout @@ -137,13 +118,12 @@ def __init__(): }, }, "storage_layout": { - "$.nonreentrant_key": {"slot": 0, "type": "nonreentrant lock"}, - "counter": {"slot": 1, "type": "uint256"}, - "counter2": {"slot": 2, "type": "uint256"}, - "a_library": {"supply": {"slot": 3, "type": "uint256"}}, + "counter": {"slot": 1, "type": "uint256", "n_slots": 1}, + "counter2": {"slot": 2, "type": "uint256", "n_slots": 1}, + "a_library": {"supply": {"slot": 3, "type": "uint256", "n_slots": 1}}, }, } - _adjust_storage_layout_for_cancun(expected_layout) + adjust_storage_layout_for_cancun(expected_layout) out = compile_code(code, input_bundle=input_bundle, output_formats=["layout"]) assert out["layout"] == expected_layout @@ -187,13 +167,12 @@ def __init__(): }, }, "storage_layout": { - "$.nonreentrant_key": {"slot": 0, "type": "nonreentrant lock"}, - "counter": {"slot": 1, "type": "uint256"}, - "a_library": {"supply": {"slot": 2, "type": "uint256"}}, - "counter2": {"slot": 3, "type": "uint256"}, + "counter": {"slot": 1, "type": "uint256", "n_slots": 1}, + "a_library": {"supply": {"slot": 2, "type": "uint256", "n_slots": 1}}, + "counter2": {"slot": 3, "type": "uint256", "n_slots": 1}, }, } - _adjust_storage_layout_for_cancun(expected_layout) + adjust_storage_layout_for_cancun(expected_layout) out = compile_code(code, input_bundle=input_bundle, output_formats=["layout"]) assert out["layout"] == expected_layout @@ -271,14 +250,14 @@ def bar(): }, }, "storage_layout": { - "$.nonreentrant_key": {"slot": 0, "type": "nonreentrant lock"}, - "counter": {"slot": 1, "type": "uint256"}, - "lib2": {"storage_variable": {"slot": 2, "type": "uint256"}}, - "counter2": {"slot": 3, "type": "uint256"}, - "a_library": {"supply": {"slot": 4, "type": "uint256"}}, + "$.nonreentrant_key": {"slot": 0, "type": "nonreentrant lock", "n_slots": 1}, + "counter": {"slot": 1, "type": "uint256", "n_slots": 1}, + "lib2": {"storage_variable": {"slot": 2, "type": "uint256", "n_slots": 1}}, + "counter2": {"slot": 3, "type": "uint256", "n_slots": 1}, + "a_library": {"supply": {"slot": 4, "type": "uint256", "n_slots": 1}}, }, } - _adjust_storage_layout_for_cancun(expected_layout) + adjust_storage_layout_for_cancun(expected_layout) out = compile_code(code, input_bundle=input_bundle, output_formats=["layout"]) assert out["layout"] == expected_layout @@ -351,16 +330,15 @@ def foo() -> uint256: }, }, "storage_layout": { - "$.nonreentrant_key": {"slot": 0, "type": "nonreentrant lock"}, - "counter": {"slot": 1, "type": "uint256"}, + "counter": {"slot": 1, "type": "uint256", "n_slots": 1}, "lib2": { - "lib1": {"supply": {"slot": 2, "type": "uint256"}}, - "storage_variable": {"slot": 3, "type": "uint256"}, + "lib1": {"supply": {"slot": 2, "type": "uint256", "n_slots": 1}}, + "storage_variable": {"slot": 3, "type": "uint256", "n_slots": 1}, }, - "counter2": {"slot": 4, "type": "uint256"}, + "counter2": {"slot": 4, "type": "uint256", "n_slots": 1}, }, } - _adjust_storage_layout_for_cancun(expected_layout) + adjust_storage_layout_for_cancun(expected_layout) out = compile_code(code, input_bundle=input_bundle, output_formats=["layout"]) assert out["layout"] == expected_layout diff --git a/tests/unit/cli/storage_layout/test_storage_layout_overrides.py b/tests/unit/cli/storage_layout/test_storage_layout_overrides.py index 707c94c3fc..f02a8471e2 100644 --- a/tests/unit/cli/storage_layout/test_storage_layout_overrides.py +++ b/tests/unit/cli/storage_layout/test_storage_layout_overrides.py @@ -3,6 +3,7 @@ import pytest from vyper.compiler import compile_code +from vyper.evm.opcodes import version_check from vyper.exceptions import StorageLayoutException @@ -12,11 +13,11 @@ def test_storage_layout_overrides(): b: uint256""" storage_layout_overrides = { - "a": {"type": "uint256", "slot": 1}, - "b": {"type": "uint256", "slot": 0}, + "a": {"type": "uint256", "slot": 1, "n_slots": 1}, + "b": {"type": "uint256", "slot": 0, "n_slots": 1}, } - expected_output = {"storage_layout": storage_layout_overrides, "code_layout": {}} + expected_output = {"storage_layout": storage_layout_overrides} out = compile_code( code, output_formats=["layout"], storage_layout_override=storage_layout_overrides @@ -61,18 +62,26 @@ def public_foo3(): """ storage_layout_override = { - "$.nonreentrant_key": {"type": "nonreentrant lock", "slot": 8}, - "foo": {"type": "HashMap[address, uint256]", "slot": 1}, - "baz": {"type": "Bytes[65]", "slot": 2}, - "bar": {"type": "uint256", "slot": 6}, + "$.nonreentrant_key": {"type": "nonreentrant lock", "slot": 8, "n_slots": 1}, + "foo": {"type": "HashMap[address, uint256]", "slot": 1, "n_slots": 1}, + "baz": {"type": "Bytes[65]", "slot": 2, "n_slots": 4}, + "bar": {"type": "uint256", "slot": 6, "n_slots": 1}, } + if version_check(begin="cancun"): + del storage_layout_override["$.nonreentrant_key"] - expected_output = {"storage_layout": storage_layout_override, "code_layout": {}} + expected_output = {"storage_layout": storage_layout_override} out = compile_code( code, output_formats=["layout"], storage_layout_override=storage_layout_override ) + # adjust transient storage layout + if version_check(begin="cancun"): + expected_output["transient_storage_layout"] = { + "$.nonreentrant_key": {"n_slots": 1, "slot": 0, "type": "nonreentrant lock"} + } + assert out["layout"] == expected_output @@ -118,16 +127,55 @@ def test_override_nonreentrant_slot(): def foo(): pass """ - storage_layout_override = {"$.nonreentrant_key": {"slot": 2**256, "type": "nonreentrant key"}} - exception_regex = re.escape( - f"Invalid storage slot for var $.nonreentrant_key, out of bounds: {2**256}" - ) - with pytest.raises(StorageLayoutException, match=exception_regex): - compile_code( - code, output_formats=["layout"], storage_layout_override=storage_layout_override + if version_check(begin="cancun"): + del storage_layout_override["$.nonreentrant_key"] + assert ( + compile_code( + code, output_formats=["layout"], storage_layout_override=storage_layout_override + ) + is not None + ) + + else: + exception_regex = re.escape( + f"Invalid storage slot for var $.nonreentrant_key, out of bounds: {2**256}" ) + with pytest.raises(StorageLayoutException, match=exception_regex): + compile_code( + code, output_formats=["layout"], storage_layout_override=storage_layout_override + ) + + +def test_override_missing_nonreentrant_key(): + code = """ +@nonreentrant +@external +def foo(): + pass + """ + + storage_layout_override = {} + + if version_check(begin="cancun"): + assert ( + compile_code( + code, output_formats=["layout"], storage_layout_override=storage_layout_override + ) + is not None + ) + # in cancun, nonreentrant key is allocated in transient storage and can't be overridden + return + else: + exception_regex = re.escape( + "Could not find storage slot for $.nonreentrant_key." + " Have you used the correct storage layout file?" + ) + with pytest.raises(StorageLayoutException, match=exception_regex): + compile_code( + code, output_formats=["layout"], storage_layout_override=storage_layout_override + ) def test_incomplete_overrides(): @@ -139,9 +187,225 @@ def test_incomplete_overrides(): with pytest.raises( StorageLayoutException, - match="Could not find storage_slot for symbol. " + match="Could not find storage slot for symbol. " "Have you used the correct storage layout file?", ): compile_code( code, output_formats=["layout"], storage_layout_override=storage_layout_override ) + + +@pytest.mark.requires_evm_version("cancun") +def test_override_with_immutables_and_transient(): + code = """ +some_local: transient(uint256) +some_immutable: immutable(uint256) +name: public(String[64]) + +@deploy +def __init__(): + some_immutable = 5 + """ + + storage_layout_override = {"name": {"slot": 10, "type": "String[64]", "n_slots": 3}} + + out = compile_code( + code, output_formats=["layout"], storage_layout_override=storage_layout_override + ) + + expected_output = { + "storage_layout": storage_layout_override, + "transient_storage_layout": {"some_local": {"slot": 1, "type": "uint256", "n_slots": 1}}, + "code_layout": {"some_immutable": {"offset": 0, "type": "uint256", "length": 32}}, + } + + assert out["layout"] == expected_output + + +def test_override_modules(make_input_bundle): + # test module storage layout, with initializes in an imported module + # note code repetition with test_storage_layout.py; maybe refactor to + # some fixtures + lib1 = """ +supply: uint256 +SYMBOL: immutable(String[32]) +DECIMALS: immutable(uint8) + +@deploy +def __init__(): + SYMBOL = "VYPR" + DECIMALS = 18 + """ + lib2 = """ +import lib1 + +initializes: lib1 + +counter: uint256 +storage_variable: uint256 +immutable_variable: immutable(uint256) + +@deploy +def __init__(s: uint256): + immutable_variable = s + lib1.__init__() + +@internal +def decimals() -> uint8: + return lib1.DECIMALS + """ + code = """ +import lib1 as a_library +import lib2 + +counter: uint256 # test shadowing +some_immutable: immutable(DynArray[uint256, 10]) + +# for fun: initialize lib2 in front of lib1 +initializes: lib2 + +counter2: uint256 + +uses: a_library + +@deploy +def __init__(): + some_immutable = [1, 2, 3] + + lib2.__init__(17) + +@external +def foo() -> uint256: + return a_library.supply + """ + input_bundle = make_input_bundle({"lib1.vy": lib1, "lib2.vy": lib2}) + + override = { + "counter": {"slot": 5, "type": "uint256", "n_slots": 1}, + "lib2": { + "lib1": {"supply": {"slot": 12, "type": "uint256", "n_slots": 1}}, + "storage_variable": {"slot": 34, "type": "uint256", "n_slots": 1}, + "counter": {"slot": 15, "type": "uint256", "n_slots": 1}, + }, + "counter2": {"slot": 171, "type": "uint256", "n_slots": 1}, + } + out = compile_code( + code, output_formats=["layout"], input_bundle=input_bundle, storage_layout_override=override + ) + + expected_output = { + "storage_layout": override, + "code_layout": { + "some_immutable": {"length": 352, "offset": 0, "type": "DynArray[uint256, 10]"}, + "lib2": { + "lib1": { + "SYMBOL": {"length": 64, "offset": 352, "type": "String[32]"}, + "DECIMALS": {"length": 32, "offset": 416, "type": "uint8"}, + }, + "immutable_variable": {"length": 32, "offset": 448, "type": "uint256"}, + }, + }, + } + + assert out["layout"] == expected_output + + +def test_module_collision(make_input_bundle): + # test collisions between modules which are "siblings" in the import tree + # some fixtures + lib1 = """ +supply: uint256 + """ + lib2 = """ +counter: uint256 + """ + code = """ +import lib1 as a_library +import lib2 + +# for fun: initialize lib2 in front of lib1 +initializes: lib2 +initializes: a_library + """ + input_bundle = make_input_bundle({"lib1.vy": lib1, "lib2.vy": lib2}) + + override = { + "lib2": {"counter": {"slot": 15, "type": "uint256", "n_slots": 1}}, + "a_library": {"supply": {"slot": 15, "type": "uint256", "n_slots": 1}}, + } + + with pytest.raises( + StorageLayoutException, + match="Storage collision! Tried to assign 'a_library.supply' to" + " slot 15 but it has already been reserved by 'lib2.counter'", + ): + compile_code( + code, + output_formats=["layout"], + input_bundle=input_bundle, + storage_layout_override=override, + ) + + +def test_module_collision2(make_input_bundle): + # test "parent-child" collisions + lib1 = """ +supply: uint256 + """ + code = """ +import lib1 + +counter: uint256 + +initializes: lib1 + """ + input_bundle = make_input_bundle({"lib1.vy": lib1}) + + override = { + "counter": {"slot": 15, "type": "uint256", "n_slots": 1}, + "lib1": {"supply": {"slot": 15, "type": "uint256", "n_slots": 1}}, + } + + with pytest.raises( + StorageLayoutException, + match="Storage collision! Tried to assign 'lib1.supply' to" + " slot 15 but it has already been reserved by 'counter'", + ): + compile_code( + code, + output_formats=["layout"], + input_bundle=input_bundle, + storage_layout_override=override, + ) + + +def test_module_overlap(make_input_bundle): + # test a collision which only overlaps on one word + lib1 = """ +supply: uint256[2] + """ + code = """ +import lib1 + +counter: uint256[2] + +initializes: lib1 + """ + input_bundle = make_input_bundle({"lib1.vy": lib1}) + + override = { + "counter": {"slot": 15, "type": "uint256[2]", "n_slots": 2}, + "lib1": {"supply": {"slot": 16, "type": "uint256[2]", "n_slots": 2}}, + } + + with pytest.raises( + StorageLayoutException, + match="Storage collision! Tried to assign 'lib1.supply' to" + " slot 16 but it has already been reserved by 'counter'", + ): + compile_code( + code, + output_formats=["layout"], + input_bundle=input_bundle, + storage_layout_override=override, + ) diff --git a/tests/unit/cli/storage_layout/utils.py b/tests/unit/cli/storage_layout/utils.py new file mode 100644 index 0000000000..6e67886b0d --- /dev/null +++ b/tests/unit/cli/storage_layout/utils.py @@ -0,0 +1,17 @@ +from vyper.evm.opcodes import version_check + + +def adjust_storage_layout_for_cancun(layout): + def _go(layout): + for _varname, item in layout.items(): + if "slot" in item and isinstance(item["slot"], int): + item["slot"] -= 1 + else: + # recurse to submodule + _go(item) + + if version_check(begin="cancun"): + nonreentrant = layout["storage_layout"].pop("$.nonreentrant_key", None) + if nonreentrant is not None: + layout["transient_storage_layout"] = {"$.nonreentrant_key": nonreentrant} + _go(layout["storage_layout"]) diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index 10b4833e67..6f437395c6 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -12,6 +12,7 @@ from vyper.compiler.settings import OptimizationLevel, Settings, anchor_settings, merge_settings from vyper.ir import compile_ir, optimizer from vyper.semantics import analyze_module, set_data_positions, validate_compilation_target +from vyper.semantics.analysis.data_positions import generate_layout_export from vyper.semantics.types.function import ContractFunctionT from vyper.semantics.types.module import ModuleT from vyper.typing import StorageLayout @@ -180,7 +181,9 @@ def compilation_target(self): @cached_property def storage_layout(self) -> StorageLayout: module_ast = self.compilation_target - return set_data_positions(module_ast, self.storage_layout_override) + set_data_positions(module_ast, self.storage_layout_override) + + return generate_layout_export(module_ast) @property def global_ctx(self) -> ModuleT: diff --git a/vyper/semantics/analysis/base.py b/vyper/semantics/analysis/base.py index 718581c20c..026e0626e7 100644 --- a/vyper/semantics/analysis/base.py +++ b/vyper/semantics/analysis/base.py @@ -194,7 +194,7 @@ def getter_ast(self) -> Optional[vy_ast.VyperNode]: def set_position(self, position: VarOffset) -> None: if self.position is not None: - raise CompilerPanic("Position was already assigned") + raise CompilerPanic(f"Position was already assigned: {self}") assert isinstance(position, VarOffset) # sanity check self.position = position @@ -207,6 +207,10 @@ def is_state_variable(self): def get_size(self) -> int: return self.typ.get_size_in(self.location) + @property + def is_storage(self): + return self.location == DataLocation.STORAGE + @property def is_transient(self): return self.location == DataLocation.TRANSIENT diff --git a/vyper/semantics/analysis/data_positions.py b/vyper/semantics/analysis/data_positions.py index e5e8b998ca..5f6702668f 100644 --- a/vyper/semantics/analysis/data_positions.py +++ b/vyper/semantics/analysis/data_positions.py @@ -1,5 +1,6 @@ +import json from collections import defaultdict -from typing import Generic, TypeVar +from typing import Generic, Optional, TypeVar from vyper import ast as vy_ast from vyper.evm.opcodes import version_check @@ -11,7 +12,7 @@ def set_data_positions( vyper_module: vy_ast.Module, storage_layout_overrides: StorageLayout = None -) -> StorageLayout: +) -> None: """ Parse the annotated Vyper AST, determine data positions for all variables, and annotate the AST nodes with the position data. @@ -22,14 +23,19 @@ def set_data_positions( Top-level Vyper AST node that has already been annotated with type data. """ if storage_layout_overrides is not None: - # extract code layout with no overrides - code_offsets = _allocate_layout_r(vyper_module, immutables_only=True)["code_layout"] - storage_slots = set_storage_slots_with_overrides(vyper_module, storage_layout_overrides) - return {"storage_layout": storage_slots, "code_layout": code_offsets} + # allocate code layout with no overrides + _allocate_layout_r(vyper_module, no_storage=True) + _allocate_with_overrides(vyper_module, storage_layout_overrides) - ret = _allocate_layout_r(vyper_module) - assert isinstance(ret, defaultdict) - return dict(ret) # convert back to dict + # sanity check that generated layout file is the same as the input. + roundtrip = generate_layout_export(vyper_module).get(_LAYOUT_KEYS[DataLocation.STORAGE], {}) + if roundtrip != storage_layout_overrides: + msg = "Computed storage layout does not match override file!\n" + msg += f"expected: {json.dumps(storage_layout_overrides)}\n\n" + msg += f"got:\n{json.dumps(roundtrip)}" + raise CompilerPanic(msg) + else: + _allocate_layout_r(vyper_module) _T = TypeVar("_T") @@ -45,6 +51,7 @@ def __setitem__(self, k, v): # some name that the user cannot assign to a variable GLOBAL_NONREENTRANT_KEY = "$.nonreentrant_key" +NONREENTRANT_KEY_SIZE = 1 class SimpleAllocator: @@ -55,7 +62,7 @@ def __init__(self, max_slot: int = 2**256, starting_slot: int = 0): self._slot = starting_slot self._max_slot = max_slot - def allocate_slot(self, n, var_name, node=None): + def allocate_slot(self, n, node=None): ret = self._slot if self._slot + n >= self._max_slot: raise StorageLayoutException( @@ -67,7 +74,7 @@ def allocate_slot(self, n, var_name, node=None): return ret def allocate_global_nonreentrancy_slot(self): - slot = self.allocate_slot(1, GLOBAL_NONREENTRANT_KEY) + slot = self.allocate_slot(NONREENTRANT_KEY_SIZE) assert slot == self._starting_slot return slot @@ -141,74 +148,105 @@ def _reserve_slot(self, slot: int, var_name: str) -> None: self.occupied_slots[slot] = var_name -def set_storage_slots_with_overrides( - vyper_module: vy_ast.Module, storage_layout_overrides: StorageLayout -) -> StorageLayout: +def _fetch_path(path: list[str], layout: StorageLayout, node: vy_ast.VyperNode): + tmp = layout + qualified_path = ".".join(path) + + for segment in path: + if segment not in tmp: + raise StorageLayoutException( + f"Could not find storage slot for {qualified_path}. " + "Have you used the correct storage layout file?", + node, + ) + tmp = tmp[segment] + + try: + ret = tmp["slot"] + except KeyError as e: + raise StorageLayoutException(f"no storage slot for {qualified_path}", node) from e + + return ret + + +def _allocate_with_overrides(vyper_module: vy_ast.Module, layout: StorageLayout): """ Set storage layout given a layout override file. - Returns the layout as a dict of variable name -> variable info - (Doesn't handle modules, or transient storage) """ - ret: InsertableOnceDict[str, dict] = InsertableOnceDict() - reserved_slots = OverridingStorageAllocator() + allocator = OverridingStorageAllocator() + + nonreentrant_slot = None + if GLOBAL_NONREENTRANT_KEY in layout: + nonreentrant_slot = layout[GLOBAL_NONREENTRANT_KEY]["slot"] + + _allocate_with_overrides_r(vyper_module, layout, allocator, nonreentrant_slot, []) + +def _allocate_with_overrides_r( + vyper_module: vy_ast.Module, + layout: StorageLayout, + allocator: OverridingStorageAllocator, + global_nonreentrant_slot: Optional[int], + path: list[str], +): # Search through function definitions to find non-reentrant functions for node in vyper_module.get_children(vy_ast.FunctionDef): - type_ = node._metadata["func_type"] + fn_t = node._metadata["func_type"] # Ignore functions without non-reentrant - if not type_.nonreentrant: + if not fn_t.nonreentrant: continue - variable_name = GLOBAL_NONREENTRANT_KEY - - # re-entrant key was already identified - if variable_name in ret: + # if reentrancy keys get allocated in transient storage, we don't + # override them + if get_reentrancy_key_location() == DataLocation.TRANSIENT: continue # Expect to find this variable within the storage layout override - if variable_name in storage_layout_overrides: - reentrant_slot = storage_layout_overrides[variable_name]["slot"] - # Ensure that this slot has not been used, and prevents other storage variables - # from using the same slot - reserved_slots.reserve_slot_range(reentrant_slot, 1, variable_name) - - type_.set_reentrancy_key_position(VarOffset(reentrant_slot)) - - ret[variable_name] = {"type": "nonreentrant lock", "slot": reentrant_slot} - else: + if global_nonreentrant_slot is None: raise StorageLayoutException( - f"Could not find storage_slot for {variable_name}. " + f"Could not find storage slot for {GLOBAL_NONREENTRANT_KEY}. " "Have you used the correct storage layout file?", node, ) - # Iterate through variables - for node in vyper_module.get_children(vy_ast.VariableDecl): - # Ignore immutable parameters - if node.get("annotation.func.id") == "immutable": + # prevent other storage variables from using the same slot + if allocator.occupied_slots.get(global_nonreentrant_slot) != GLOBAL_NONREENTRANT_KEY: + allocator.reserve_slot_range( + global_nonreentrant_slot, NONREENTRANT_KEY_SIZE, GLOBAL_NONREENTRANT_KEY + ) + + fn_t.set_reentrancy_key_position(VarOffset(global_nonreentrant_slot)) + + for node in _get_allocatable(vyper_module): + if isinstance(node, vy_ast.InitializesDecl): + module_info = node._metadata["initializes_info"].module_info + + sub_path = [*path, module_info.alias] + _allocate_with_overrides_r( + module_info.module_node, layout, allocator, global_nonreentrant_slot, sub_path + ) continue + # Iterate through variables + # Ignore immutables and transient variables varinfo = node.target._metadata["varinfo"] + if not varinfo.is_storage: + continue + # Expect to find this variable within the storage layout overrides - if node.target.id in storage_layout_overrides: - var_slot = storage_layout_overrides[node.target.id]["slot"] - storage_length = varinfo.typ.storage_size_in_words - # Ensure that all required storage slots are reserved, and prevents other variables - # from using these slots - reserved_slots.reserve_slot_range(var_slot, storage_length, node.target.id) - varinfo.set_position(VarOffset(var_slot)) - - ret[node.target.id] = {"type": str(varinfo.typ), "slot": var_slot} - else: - raise StorageLayoutException( - f"Could not find storage_slot for {node.target.id}. " - "Have you used the correct storage layout file?", - node, - ) + varname = node.target.id + varpath = [*path, varname] + qualified_varname = ".".join(varpath) - return ret + var_slot = _fetch_path(varpath, layout, node) + + storage_length = varinfo.typ.storage_size_in_words + # Ensure that all required storage slots are reserved, and + # prevent other variables from using these slots + allocator.reserve_slot_range(var_slot, storage_length, qualified_varname) + varinfo.set_position(VarOffset(var_slot)) def _get_allocatable(vyper_module: vy_ast.Module) -> list[vy_ast.VyperNode]: @@ -229,7 +267,7 @@ def get_reentrancy_key_location() -> DataLocation: } -def _allocate_nonreentrant_keys(vyper_module, allocators): +def _set_nonreentrant_keys(vyper_module, allocators): SLOT = allocators.get_global_nonreentrant_key_slot() for node in vyper_module.get_children(vy_ast.FunctionDef): @@ -244,73 +282,116 @@ def _allocate_nonreentrant_keys(vyper_module, allocators): def _allocate_layout_r( - vyper_module: vy_ast.Module, allocators: Allocators = None, immutables_only=False -) -> StorageLayout: + vyper_module: vy_ast.Module, allocators: Allocators = None, no_storage=False +): """ Parse module-level Vyper AST to calculate the layout of storage variables. Returns the layout as a dict of variable name -> variable info """ - global_ = False if allocators is None: - global_ = True allocators = Allocators() # always allocate nonreentrancy slot, so that adding or removing # reentrancy protection from a contract does not change its layout allocators.allocate_global_nonreentrancy_slot() - ret: defaultdict[str, InsertableOnceDict[str, dict]] = defaultdict(InsertableOnceDict) - # tag functions with the global nonreentrant key - if not immutables_only: - _allocate_nonreentrant_keys(vyper_module, allocators) - - layout_key = _LAYOUT_KEYS[get_reentrancy_key_location()] - # TODO this could have better typing but leave it untyped until - # we nail down the format better - if global_ and GLOBAL_NONREENTRANT_KEY not in ret[layout_key]: - slot = allocators.get_global_nonreentrant_key_slot() - ret[layout_key][GLOBAL_NONREENTRANT_KEY] = {"type": "nonreentrant lock", "slot": slot} + if not no_storage or get_reentrancy_key_location() == DataLocation.TRANSIENT: + _set_nonreentrant_keys(vyper_module, allocators) for node in _get_allocatable(vyper_module): if isinstance(node, vy_ast.InitializesDecl): module_info = node._metadata["initializes_info"].module_info - module_layout = _allocate_layout_r(module_info.module_node, allocators) - module_alias = module_info.alias - for layout_key in module_layout.keys(): - assert layout_key in _LAYOUT_KEYS.values() - ret[layout_key][module_alias] = module_layout[layout_key] + _allocate_layout_r(module_info.module_node, allocators, no_storage) continue assert isinstance(node, vy_ast.VariableDecl) - # skip non-state variables varinfo = node.target._metadata["varinfo"] + + # skip things we don't need to allocate, like constants if not varinfo.is_state_variable(): continue - location = varinfo.location - if immutables_only and location != DataLocation.CODE: + if no_storage and varinfo.is_storage: continue - allocator = allocators.get_allocator(location) + allocator = allocators.get_allocator(varinfo.location) size = varinfo.get_size() # CMC 2021-07-23 note that HashMaps get assigned a slot here # using the same allocator (even though there is not really # any risk of physical overlap) - offset = allocator.allocate_slot(size, node.target.id, node) - + offset = allocator.allocate_slot(size, node) varinfo.set_position(VarOffset(offset)) + +# get the layout for export +def generate_layout_export(vyper_module: vy_ast.Module): + return _generate_layout_export_r(vyper_module) + + +def _generate_layout_export_r(vyper_module): + ret: defaultdict[str, InsertableOnceDict[str, dict]] = defaultdict(InsertableOnceDict) + + for node in _get_allocatable(vyper_module): + if isinstance(node, vy_ast.InitializesDecl): + module_info = node._metadata["initializes_info"].module_info + module_layout = _generate_layout_export_r(module_info.module_node) + module_alias = module_info.alias + for layout_key in module_layout.keys(): + assert layout_key in _LAYOUT_KEYS.values() + + # lift the nonreentrancy key (if any) into the outer dict + # note that lifting can leave the inner dict empty, which + # should be filtered (below) for cleanliness + nonreentrant = module_layout[layout_key].pop(GLOBAL_NONREENTRANT_KEY, None) + if nonreentrant is not None and GLOBAL_NONREENTRANT_KEY not in ret[layout_key]: + ret[layout_key][GLOBAL_NONREENTRANT_KEY] = nonreentrant + + # add the module as a nested dict, but only if it is non-empty + if len(module_layout[layout_key]) != 0: + ret[layout_key][module_alias] = module_layout[layout_key] + + continue + + assert isinstance(node, vy_ast.VariableDecl) + varinfo = node.target._metadata["varinfo"] + # skip non-state variables + if not varinfo.is_state_variable(): + continue + + location = varinfo.location layout_key = _LAYOUT_KEYS[location] type_ = varinfo.typ + size = varinfo.get_size() + offset = varinfo.position.position + # this could have better typing but leave it untyped until # we understand the use case better if location == DataLocation.CODE: item = {"type": str(type_), "length": size, "offset": offset} elif location in (DataLocation.STORAGE, DataLocation.TRANSIENT): - item = {"type": str(type_), "slot": offset} + item = {"type": str(type_), "n_slots": size, "slot": offset} else: # pragma: nocover raise CompilerPanic("unreachable") ret[layout_key][node.target.id] = item + for fn in vyper_module.get_children(vy_ast.FunctionDef): + fn_t = fn._metadata["func_type"] + if not fn_t.nonreentrant: + continue + + location = get_reentrancy_key_location() + layout_key = _LAYOUT_KEYS[location] + + if GLOBAL_NONREENTRANT_KEY in ret[layout_key]: + break + + slot = fn_t.reentrancy_key_position.position + ret[layout_key][GLOBAL_NONREENTRANT_KEY] = { + "type": "nonreentrant lock", + "slot": slot, + "n_slots": NONREENTRANT_KEY_SIZE, + } + break + return ret From a7a647f5a865a335f607f27b4d280434770c4c22 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 28 May 2024 16:49:32 +0300 Subject: [PATCH 03/53] feat[venom]: optimize branching (#4049) This commit introduces a new pass called `BranchOptimizationPass` that optimizes inefficient branches. More specifically, when a branch is led with a logic inversion `ISZERO` we remove the `ISZERO` and invert the branch targets. --- .../compiler/venom/test_branch_optimizer.py | 54 +++++++++++++++++++ vyper/venom/__init__.py | 2 + vyper/venom/analysis/dfg.py | 4 ++ vyper/venom/passes/branch_optimization.py | 30 +++++++++++ 4 files changed, 90 insertions(+) create mode 100644 tests/unit/compiler/venom/test_branch_optimizer.py create mode 100644 vyper/venom/passes/branch_optimization.py diff --git a/tests/unit/compiler/venom/test_branch_optimizer.py b/tests/unit/compiler/venom/test_branch_optimizer.py new file mode 100644 index 0000000000..b6e806e217 --- /dev/null +++ b/tests/unit/compiler/venom/test_branch_optimizer.py @@ -0,0 +1,54 @@ +from vyper.venom.analysis.analysis import IRAnalysesCache +from vyper.venom.analysis.dfg import DFGAnalysis +from vyper.venom.basicblock import IRBasicBlock, IRLabel +from vyper.venom.context import IRContext +from vyper.venom.passes.branch_optimization import BranchOptimizationPass +from vyper.venom.passes.make_ssa import MakeSSA + + +def test_simple_jump_case(): + ctx = IRContext() + fn = ctx.create_function("_global") + + bb = fn.get_basic_block() + + br1 = IRBasicBlock(IRLabel("then"), fn) + fn.append_basic_block(br1) + br2 = IRBasicBlock(IRLabel("else"), fn) + fn.append_basic_block(br2) + + p1 = bb.append_instruction("param") + op1 = bb.append_instruction("store", p1) + op2 = bb.append_instruction("store", 64) + op3 = bb.append_instruction("add", op1, op2) + jnz_input = bb.append_instruction("iszero", op3) + bb.append_instruction("jnz", jnz_input, br1.label, br2.label) + + br1.append_instruction("add", op3, 10) + br1.append_instruction("stop") + br2.append_instruction("add", op3, p1) + br2.append_instruction("stop") + + term_inst = bb.instructions[-1] + + ac = IRAnalysesCache(fn) + MakeSSA(ac, fn).run_pass() + + old_dfg = ac.request_analysis(DFGAnalysis) + assert term_inst not in old_dfg.get_uses(op3), "jnz not using the old condition" + assert term_inst in old_dfg.get_uses(jnz_input), "jnz using the new condition" + + BranchOptimizationPass(ac, fn).run_pass() + + # Test that the jnz targets are inverted and + # the jnz condition updated + assert term_inst.opcode == "jnz" + assert term_inst.operands[0] == op3 + assert term_inst.operands[1] == br2.label + assert term_inst.operands[2] == br1.label + + # Test that the dfg is updated correctly + dfg = ac.request_analysis(DFGAnalysis) + assert dfg is old_dfg, "DFG should not be invalidated by BranchOptimizationPass" + assert term_inst in dfg.get_uses(op3), "jnz not using the new condition" + assert term_inst not in dfg.get_uses(jnz_input), "jnz still using the old condition" diff --git a/vyper/venom/__init__.py b/vyper/venom/__init__.py index 6bbcedaade..82901126bc 100644 --- a/vyper/venom/__init__.py +++ b/vyper/venom/__init__.py @@ -9,6 +9,7 @@ from vyper.venom.context import IRContext from vyper.venom.function import IRFunction from vyper.venom.ir_node_to_venom import ir_node_to_venom +from vyper.venom.passes.branch_optimization import BranchOptimizationPass from vyper.venom.passes.dft import DFTPass from vyper.venom.passes.make_ssa import MakeSSA from vyper.venom.passes.mem2var import Mem2Var @@ -49,6 +50,7 @@ def _run_passes(fn: IRFunction, optimize: OptimizationLevel) -> None: SCCP(ac, fn).run_pass() StoreElimination(ac, fn).run_pass() SimplifyCFGPass(ac, fn).run_pass() + BranchOptimizationPass(ac, fn).run_pass() RemoveUnusedVariablesPass(ac, fn).run_pass() DFTPass(ac, fn).run_pass() diff --git a/vyper/venom/analysis/dfg.py b/vyper/venom/analysis/dfg.py index 2fb172a979..c64fb07fc2 100644 --- a/vyper/venom/analysis/dfg.py +++ b/vyper/venom/analysis/dfg.py @@ -22,6 +22,10 @@ def get_uses(self, op: IRVariable) -> list[IRInstruction]: def get_producing_instruction(self, op: IRVariable) -> Optional[IRInstruction]: return self._dfg_outputs.get(op) + def add_use(self, op: IRVariable, inst: IRInstruction): + uses = self._dfg_inputs.setdefault(op, []) + uses.append(inst) + def remove_use(self, op: IRVariable, inst: IRInstruction): uses = self._dfg_inputs.get(op, []) uses.remove(inst) diff --git a/vyper/venom/passes/branch_optimization.py b/vyper/venom/passes/branch_optimization.py new file mode 100644 index 0000000000..354aab7900 --- /dev/null +++ b/vyper/venom/passes/branch_optimization.py @@ -0,0 +1,30 @@ +from vyper.venom.analysis.dfg import DFGAnalysis +from vyper.venom.passes.base_pass import IRPass + + +class BranchOptimizationPass(IRPass): + """ + This pass optimizes branches inverting jnz instructions where appropriate + """ + + def _optimize_branches(self) -> None: + fn = self.function + for bb in fn.get_basic_blocks(): + term_inst = bb.instructions[-1] + if term_inst.opcode != "jnz": + continue + + prev_inst = self.dfg.get_producing_instruction(term_inst.operands[0]) + if prev_inst.opcode == "iszero": + new_cond = prev_inst.operands[0] + term_inst.operands = [new_cond, term_inst.operands[2], term_inst.operands[1]] + + # Since the DFG update is simple we do in place to avoid invalidating the DFG + # and having to recompute it (which is expensive(er)) + self.dfg.remove_use(prev_inst.output, term_inst) + self.dfg.add_use(new_cond, term_inst) + + def run_pass(self): + self.dfg = self.analyses_cache.request_analysis(DFGAnalysis) + + self._optimize_branches() From fe7d86bfff1d60fa664916caf9687f4e283aa76f Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 28 May 2024 07:58:34 -0700 Subject: [PATCH 04/53] feat[ci]: add codecov configuration (#4057) set threshold for reporting codecov failure at -0.5%, there is usually some noise in the coverage (+-0.1% or so) from fuzzing. --------- Co-authored-by: Daniel Schiavini --- codecov.yml | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 codecov.yml diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000000..c3b0793c77 --- /dev/null +++ b/codecov.yml @@ -0,0 +1,7 @@ +# https://docs.codecov.com/docs/codecovyml-reference +coverage: + status: + project: + default: + # set threshold given noise in the coverage from fuzzing + threshold: 0.5% From d6b300df136d6380c9a5e6a55a4f42e4b058d832 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 28 May 2024 19:40:22 +0300 Subject: [PATCH 05/53] feat[venom]: avoid last `swap` for commutative ops (#4048) This commit implements a simple "last swap" avoidance for commutative ops. Additionally, it renames then `get_inputs()` method to `get_input_variables()` in `IRInstruction` for clarity and consistency --- vyper/venom/analysis/dfg.py | 2 +- vyper/venom/analysis/dup_requirements.py | 2 +- vyper/venom/analysis/liveness.py | 2 +- vyper/venom/basicblock.py | 4 +-- vyper/venom/passes/dft.py | 2 +- vyper/venom/passes/remove_unused_variables.py | 2 +- vyper/venom/venom_to_assembly.py | 35 ++++++++++++++----- 7 files changed, 34 insertions(+), 15 deletions(-) diff --git a/vyper/venom/analysis/dfg.py b/vyper/venom/analysis/dfg.py index c64fb07fc2..ef16e1b357 100644 --- a/vyper/venom/analysis/dfg.py +++ b/vyper/venom/analysis/dfg.py @@ -43,7 +43,7 @@ def analyze(self): # dfg_inputs of %15 is all the instructions which *use* %15, ex. [(%16 = iszero %15), ...] for bb in self.function.get_basic_blocks(): for inst in bb.instructions: - operands = inst.get_inputs() + operands = inst.get_input_variables() res = inst.get_outputs() for op in operands: diff --git a/vyper/venom/analysis/dup_requirements.py b/vyper/venom/analysis/dup_requirements.py index 3452bc2e0f..7afb315035 100644 --- a/vyper/venom/analysis/dup_requirements.py +++ b/vyper/venom/analysis/dup_requirements.py @@ -8,7 +8,7 @@ def analyze(self): last_liveness = bb.out_vars for inst in reversed(bb.instructions): inst.dup_requirements = OrderedSet() - ops = inst.get_inputs() + ops = inst.get_input_variables() for op in ops: if op in last_liveness: inst.dup_requirements.add(op) diff --git a/vyper/venom/analysis/liveness.py b/vyper/venom/analysis/liveness.py index 5e78aa4ff3..ac06ff4dae 100644 --- a/vyper/venom/analysis/liveness.py +++ b/vyper/venom/analysis/liveness.py @@ -36,7 +36,7 @@ def _calculate_liveness(self, bb: IRBasicBlock) -> bool: orig_liveness = bb.instructions[0].liveness.copy() liveness = bb.out_vars.copy() for instruction in reversed(bb.instructions): - ins = instruction.get_inputs() + ins = instruction.get_input_variables() outs = instruction.get_outputs() if ins or outs: diff --git a/vyper/venom/basicblock.py b/vyper/venom/basicblock.py index 91faca03be..c979f33fbb 100644 --- a/vyper/venom/basicblock.py +++ b/vyper/venom/basicblock.py @@ -236,7 +236,7 @@ def get_non_label_operands(self) -> Iterator[IROperand]: """ return (op for op in self.operands if not isinstance(op, IRLabel)) - def get_inputs(self) -> Iterator[IRVariable]: + def get_input_variables(self) -> Iterator[IRVariable]: """ Get all input operands for instruction. """ @@ -477,7 +477,7 @@ def get_assignments(self): def get_uses(self) -> dict[IRVariable, OrderedSet[IRInstruction]]: uses: dict[IRVariable, OrderedSet[IRInstruction]] = {} for inst in self.instructions: - for op in inst.get_inputs(): + for op in inst.get_input_variables(): if op not in uses: uses[op] = OrderedSet() uses[op].add(inst) diff --git a/vyper/venom/passes/dft.py b/vyper/venom/passes/dft.py index 06366e4336..8429c19711 100644 --- a/vyper/venom/passes/dft.py +++ b/vyper/venom/passes/dft.py @@ -40,7 +40,7 @@ def _process_instruction_r(self, bb: IRBasicBlock, inst: IRInstruction, offset: self.inst_order[inst] = 0 return - for op in inst.get_inputs(): + for op in inst.get_input_variables(): target = self.dfg.get_producing_instruction(op) assert target is not None, f"no producing instruction for {op}" if target.parent != inst.parent or target.fence_id != inst.fence_id: diff --git a/vyper/venom/passes/remove_unused_variables.py b/vyper/venom/passes/remove_unused_variables.py index 653bab57d6..53b0505024 100644 --- a/vyper/venom/passes/remove_unused_variables.py +++ b/vyper/venom/passes/remove_unused_variables.py @@ -34,7 +34,7 @@ def _process_instruction(self, inst): if len(uses) > 0: return - for operand in inst.get_inputs(): + for operand in inst.get_input_variables(): self.dfg.remove_use(operand, inst) new_uses = self.dfg.get_uses(operand) self.work_list.addmany(new_uses) diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 2eb556b086..beb530a42c 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -102,6 +102,9 @@ ] ) +COMMUTATIVE_INSTRUCTIONS = frozenset(["add", "mul", "smul", "or", "xor", "and", "eq"]) + + _REVERT_POSTAMBLE = ["_sym___revert", "JUMPDEST", *PUSH(0), "DUP1", "REVERT"] @@ -195,8 +198,14 @@ def generate_evm(self, no_optimize: bool = False) -> list[str]: return top_asm def _stack_reorder( - self, assembly: list, stack: StackModel, stack_ops: list[IRVariable] - ) -> None: + self, assembly: list, stack: StackModel, stack_ops: list[IROperand], dry_run: bool = False + ) -> int: + cost = 0 + + if dry_run: + assert len(assembly) == 0, "Dry run should not work on assembly" + stack = stack.copy() + stack_ops_count = len(stack_ops) counts = Counter(stack_ops) @@ -216,8 +225,10 @@ def _stack_reorder( if op == stack.peek(final_stack_depth): continue - self.swap(assembly, stack, depth) - self.swap(assembly, stack, final_stack_depth) + cost += self.swap(assembly, stack, depth) + cost += self.swap(assembly, stack, final_stack_depth) + + return cost def _emit_input_operands( self, assembly: list, inst: IRInstruction, ops: list[IROperand], stack: StackModel @@ -376,7 +387,7 @@ def _generate_evm_for_instruction( if opcode == "phi": ret = inst.get_outputs()[0] - phis = list(inst.get_inputs()) + phis = list(inst.get_input_variables()) depth = stack.get_phi_depth(phis) # collapse the arguments to the phi node in the stack. # example, for `%56 = %label1 %13 %label2 %14`, we will @@ -406,9 +417,16 @@ def _generate_evm_for_instruction( target_stack_list = list(target_stack) self._stack_reorder(assembly, stack, target_stack_list) + if opcode in COMMUTATIVE_INSTRUCTIONS: + cost_no_swap = self._stack_reorder([], stack, operands, dry_run=True) + operands[-1], operands[-2] = operands[-2], operands[-1] + cost_with_swap = self._stack_reorder([], stack, operands, dry_run=True) + if cost_with_swap > cost_no_swap: + operands[-1], operands[-2] = operands[-2], operands[-1] + # final step to get the inputs to this instruction ordered # correctly on the stack - self._stack_reorder(assembly, stack, operands) # type: ignore + self._stack_reorder(assembly, stack, operands) # some instructions (i.e. invoke) need to do stack manipulations # with the stack model containing the return value(s), so we fiddle @@ -533,13 +551,14 @@ def pop(self, assembly, stack, num=1): stack.pop(num) assembly.extend(["POP"] * num) - def swap(self, assembly, stack, depth): + def swap(self, assembly, stack, depth) -> int: # Swaps of the top is no op if depth == 0: - return + return 0 stack.swap(depth) assembly.append(_evm_swap_for(depth)) + return 1 def dup(self, assembly, stack, depth): stack.dup(depth) From 003d0c6424c6595c4dded63389bd3cd8f0be7d8a Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 28 May 2024 10:26:11 -0700 Subject: [PATCH 06/53] fix[tool]: compile multiple files (#4053) fix compilation for multiple files where they initialize the same module. the analysis was getting cached between compilations, leading to a compiler panic on allocating the storage layout (because the module was previously touched by the allocator). this was not caught in previous testing because the pattern in the test suite is to run a single compilation per test, with a fresh input bundle. --- .../cli/storage_layout/test_storage_layout.py | 38 +++++++++++++++++++ tests/unit/compiler/test_compile_code.py | 34 +++++++++++++++-- vyper/compiler/__init__.py | 1 + vyper/compiler/phases.py | 3 ++ vyper/semantics/analysis/module.py | 4 -- 5 files changed, 73 insertions(+), 7 deletions(-) diff --git a/tests/unit/cli/storage_layout/test_storage_layout.py b/tests/unit/cli/storage_layout/test_storage_layout.py index d490d2008f..1aad3aad11 100644 --- a/tests/unit/cli/storage_layout/test_storage_layout.py +++ b/tests/unit/cli/storage_layout/test_storage_layout.py @@ -1,4 +1,5 @@ from vyper.compiler import compile_code +from vyper.evm.opcodes import version_check from .utils import adjust_storage_layout_for_cancun @@ -342,3 +343,40 @@ def foo() -> uint256: out = compile_code(code, input_bundle=input_bundle, output_formats=["layout"]) assert out["layout"] == expected_layout + + +def test_multiple_compile_codes(make_input_bundle): + # test calling compile_code multiple times with the same library allocated + # in different locations + lib = """ +x: uint256 + """ + input_bundle = make_input_bundle({"lib.vy": lib}) + + main1 = """ +import lib + +initializes: lib +t: uint256 + """ + main2 = """ +import lib + +t: uint256 +initializes: lib + """ + out1 = compile_code(main1, input_bundle=input_bundle, output_formats=["layout"])["layout"] + out2 = compile_code(main2, input_bundle=input_bundle, output_formats=["layout"])["layout"] + + layout1 = out1["storage_layout"]["lib"] + layout2 = out2["storage_layout"]["lib"] + + assert layout1 != layout2 # for clarity + + if version_check(begin="cancun"): + start_slot = 0 + else: + start_slot = 1 + + assert layout1 == {"x": {"slot": start_slot, "type": "uint256", "n_slots": 1}} + assert layout2 == {"x": {"slot": start_slot + 1, "type": "uint256", "n_slots": 1}} diff --git a/tests/unit/compiler/test_compile_code.py b/tests/unit/compiler/test_compile_code.py index 7af133e362..dc5a743e72 100644 --- a/tests/unit/compiler/test_compile_code.py +++ b/tests/unit/compiler/test_compile_code.py @@ -1,14 +1,42 @@ +import random + import pytest import vyper -def test_contract_size_exceeded(): - code = """ +@pytest.fixture +def huge_bytestring(): + r = random.Random(b"vyper") + + return bytes([r.getrandbits(8) for _ in range(0x6001)]) + + +def test_contract_size_exceeded(huge_bytestring): + code = f""" @external def a() -> bool: - q: Bytes[24577] = b"" # noqa: E501 + q: Bytes[24577] = {huge_bytestring} return True """ with pytest.warns(vyper.warnings.ContractSizeLimitWarning): vyper.compile_code(code, output_formats=["bytecode_runtime"]) + + +# test that each compilation run gets a fresh analysis and storage allocator +def test_shared_modules_allocation(make_input_bundle): + lib1 = """ +x: uint256 + """ + main1 = """ +import lib1 +initializes: lib1 + """ + main2 = """ +import lib1 +initializes: lib1 + """ + input_bundle = make_input_bundle({"lib1.vy": lib1}) + + vyper.compile_code(main1, input_bundle=input_bundle) + vyper.compile_code(main2, input_bundle=input_bundle) diff --git a/vyper/compiler/__init__.py b/vyper/compiler/__init__.py index e4c5bc49eb..0345c24931 100644 --- a/vyper/compiler/__init__.py +++ b/vyper/compiler/__init__.py @@ -103,6 +103,7 @@ def compile_from_file_input( output_formats = ("bytecode",) # make IR output the same between runs + # TODO: move this to CompilerData.__init__() codegen.reset_names() compiler_data = CompilerData( diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index 6f437395c6..147af24d67 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -94,6 +94,9 @@ def __init__( self.input_bundle = input_bundle or FilesystemInputBundle([Path(".")]) self.expected_integrity_sum = integrity_sum + # ast cache, hitchhike onto the input_bundle object + self.input_bundle._cache._ast_of: dict[int, vy_ast.Module] = {} # type: ignore + @cached_property def source_code(self): return self.file_input.source_code diff --git a/vyper/semantics/analysis/module.py b/vyper/semantics/analysis/module.py index dcaf27d661..d0b019db7a 100644 --- a/vyper/semantics/analysis/module.py +++ b/vyper/semantics/analysis/module.py @@ -199,10 +199,6 @@ def __init__( self.module_t: Optional[ModuleT] = None - # ast cache, hitchhike onto the input_bundle object - if not hasattr(self.input_bundle._cache, "_ast_of"): - self.input_bundle._cache._ast_of: dict[int, vy_ast.Module] = {} # type: ignore - def analyze_module_body(self): # generate a `ModuleT` from the top-level node # note: also validates unique method ids From 21376526e22dd4fc889cb16699041fd9e3025dbe Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 28 May 2024 10:36:37 -0700 Subject: [PATCH 07/53] chore[docs]: add docs for v0.4.0 features (#3947) document new v0.4.0 features, including: - module system - internal function decorator now optional - `extcall` and `staticcall` keywords - testing (remove eth-tester references) - `@deploy` visibility - search path - archives, integrity hash - type annotations required for loop variables - new CLI options --------- Co-authored-by: tserg <8017125+tserg@users.noreply.github.com> Co-authored-by: Daniel Schiavini Co-authored-by: El De-dog-lo <3859395+fubuloubu@users.noreply.github.com> --- .gitignore | 2 +- docs/compiling-a-contract.rst | 65 +++++++-- docs/constants-and-vars.rst | 2 +- docs/control-structures.rst | 105 +++++++++------ docs/deploying-contracts.rst | 15 ++- docs/index.rst | 4 +- docs/interfaces.rst | 135 ++++++------------- docs/resources.rst | 2 +- docs/scoping-and-declarations.rst | 13 +- docs/statements.rst | 4 +- docs/structure-of-a-contract.rst | 111 ++++++++++++++- docs/style-guide.rst | 2 +- docs/testing-contracts-ethtester.rst | 81 ----------- docs/testing-contracts-titanoboa.rst | 6 + docs/testing-contracts.rst | 4 +- docs/toctree.rst | 1 + docs/types.rst | 5 +- docs/using-modules.rst | 194 +++++++++++++++++++++++++++ vyper/cli/vyper_compile.py | 14 +- 19 files changed, 510 insertions(+), 255 deletions(-) delete mode 100644 docs/testing-contracts-ethtester.rst create mode 100644 docs/testing-contracts-titanoboa.rst create mode 100644 docs/using-modules.rst diff --git a/.gitignore b/.gitignore index 89b1e920f0..56215b8493 100644 --- a/.gitignore +++ b/.gitignore @@ -49,4 +49,4 @@ vyper/vyper_git_commithash.txt *.spec # mac -.DS_Store \ No newline at end of file +.DS_Store diff --git a/docs/compiling-a-contract.rst b/docs/compiling-a-contract.rst index 3e0e48213f..751af980b2 100644 --- a/docs/compiling-a-contract.rst +++ b/docs/compiling-a-contract.rst @@ -6,17 +6,19 @@ Command-Line Compiler Tools Vyper includes the following command-line scripts for compiling contracts: -* ``vyper``: Compiles vyper contract files into ``IR`` or bytecode +* ``vyper``: Compiles vyper contract or archive files * ``vyper-json``: Provides a JSON interface to the compiler .. note:: The ``--help`` flag gives verbose explanations of how to use each of these scripts. +.. _vyper-cli-command: + vyper ----- -``vyper`` provides command-line access to the compiler. It can generate various outputs including simple binaries, ASTs, interfaces and source mappings. +``vyper`` provides CLI access to the compiler. It can generate various outputs including simple binaries, ASTs, interfaces and source mappings. To compile a contract: @@ -29,7 +31,7 @@ Include the ``-f`` flag to specify which output formats to return. Use ``vyper - .. code:: shell - $ vyper -f abi,abi_python,bytecode,bytecode_runtime,interface,external_interface,ast,annotated_ast,ir,ir_json,ir_runtime,hex-ir,asm,opcodes,opcodes_runtime,source_map,method_identifiers,userdoc,devdoc,metadata,combined_json,layout yourFileName.vy + $ vyper -f abi,abi_python,bytecode,bytecode_runtime,blueprint_bytecode,interface,external_interface,ast,annotated_ast,integrity,ir,ir_json,ir_runtime,asm,opcodes,opcodes_runtime,source_map,source_map_runtime,archive,solc_json,method_identifiers,userdoc,devdoc,metadata,combined_json,layout yourFileName.vy .. note:: The ``opcodes`` and ``opcodes_runtime`` output of the compiler has been returning incorrect opcodes since ``0.2.0`` due to a lack of 0 padding (patched via `PR 3735 `_). If you rely on these functions for debugging, please use the latest patched versions. @@ -95,7 +97,6 @@ Importing Interfaces 1. Interfaces defined in the ``interfaces`` field of the input JSON. 2. Derived interfaces generated from contracts in the ``sources`` field of the input JSON. -3. (Optional) The local filesystem, if a root path was explicitly declared via the ``-p`` flag. See :ref:`searching_for_imports` for more information on Vyper's import system. @@ -121,7 +122,7 @@ Remix IDE Compiler Optimization Modes =========================== -The vyper CLI tool accepts an optimization mode ``"none"``, ``"codesize"``, or ``"gas"`` (default). It can be set using the ``--optimize`` flag. For example, invoking ``vyper --optimize codesize MyContract.vy`` will compile the contract, optimizing for code size. As a rough summary of the differences between gas and codesize mode, in gas optimized mode, the compiler will try to generate bytecode which minimizes gas (up to a point), including: +The Vyper CLI tool accepts an optimization mode ``"none"``, ``"codesize"``, or ``"gas"`` (default). It can be set using the ``--optimize`` flag. For example, invoking ``vyper --optimize codesize MyContract.vy`` will compile the contract, optimizing for code size. As a rough summary of the differences between gas and codesize mode, in gas optimized mode, the compiler will try to generate bytecode which minimizes gas (up to a point), including: * using a sparse selector table which optimizes for gas over codesize * inlining some constants, and @@ -192,11 +193,50 @@ The following is a list of supported EVM versions, and changes in the compiler i - Functions marked with ``@nonreentrant`` are protected with TLOAD/TSTORE instead of SLOAD/SSTORE - The ``MCOPY`` opcode will be generated automatically by the compiler for most memory operations. +.. _integrity-hash: + +Integrity Hash +============== + +To help tooling detect whether two builds are the same, Vyper provides the ``-f integrity`` output, which outputs the integrity hash of a contract. The integrity hash is recursively defined as the sha256 of the source code with the integrity hashes of its dependencies (imports). + +.. _vyper-archives: + +Vyper Archives +============== + +A Vyper archive is a compileable bundle of input sources and settings. Technically, it is a `ZIP file `_, with a special structure to make it useable as input to the compiler. It can use any suffix, but the convention is to use a ``.zip`` suffix or ``.vyz`` suffix. It must contain a ``MANIFEST/`` folder, with the following directory structure. + +:: + + MANIFEST + ├── cli_settings.txt + ├── compilation_targets + ├── compiler_version + ├── integrity + ├── searchpaths + └── settings.json + +* ``cli_settings.txt`` is a text representation of the settings that were used on the compilation run that generated this archive. +* ``compilation_targets`` is a newline separated list of compilation targets. Currently only one compilation is supported +* ``compiler_version`` is a text representation of the compiler version used to generate this archive +* ``integrity`` is the :ref:`integrity hash ` of the input contract +* ``searchpaths`` is a newline-separated list of the search paths used on this compilation run +* ``settings.json`` is a json representation of the settings used on this compilation run. It is 1:1 with ``cli_settings.txt``, but both are provided as they are convenient for different workflows (typically, manually vs automated). + +A Vyper archive file can be produced by requesting the ``-f archive`` output format. The compiler can also produce the archive in base64 encoded form using the ``--base64`` flag. The Vyper compiler can accept both ``.vyz`` and base64-encoded Vyper archives directly as input. + +.. code-block:: bash + + $ vyper -f archive my_contract.vy -o my_contract.vyz # write the archive to my_contract.vyz + $ vyper -f archive my_contract.vy --base64 > my_contract.vyz.b64 # write the archive, as base64-encoded text + $ vyper my_contract.vyz # compile my_contract.vyz + $ vyper my_contract.vyz.b64 # compile my_contract.vyz.b64 Compiler Input and Output JSON Description ========================================== -Especially when dealing with complex or automated setups, the recommended way to compile is to use :ref:`vyper-json` and the JSON-input-output interface. +JSON input/output is provided for compatibility with solidity, however, the recommended way is to use the aforementioned :ref:`Vyper archives `. So-called "standard json" input can be generated from a contract using the ``vyper -f solc_json`` output format. Where possible, the Vyper JSON compiler formats follow those of `Solidity `_. @@ -205,7 +245,7 @@ Where possible, the Vyper JSON compiler formats follow those of `Solidity `. Functions may accept input arguments and return variables in order to pass values between them. -.. _function-visibility: - Visibility ---------- -All functions must include exactly one visibility decorator. +.. _function-visibility: + +You can optionally declare a function's visibility by using a :ref:`decorator `. There are three visibility levels in Vyper: + + * ``@external``: exposed in the selector table, can be called by an external call into this contract + * ``@internal`` (default): can be invoked only from within this contract. Not available to external callers + * ``@deploy``: constructor code. This is code which is invoked once in the lifetime of a contract, upon its deploy. It is not available at runtime to either external callers or internal call invocations. At this time, only the :ref:`__init__() function ` may be marked as ``@deploy``. + External Functions ****************** @@ -50,20 +55,50 @@ A Vyper contract cannot call directly between two external functions. If you mus Internal Functions ****************** -Internal functions (marked with the ``@internal`` decorator) are only accessible from other functions within the same contract. They are called via the :ref:`self` object: +Internal functions (optionally marked with the ``@internal`` decorator) are only accessible from other functions within the same contract. They are invoked via the :ref:`self` object: .. code-block:: vyper - @internal - def _times_two(amount: uint256, two: uint256 = 2) -> uint256: - return amount * two + def _times_two(amount: uint256) -> uint256: + return amount * 2 @external def calculate(amount: uint256) -> uint256: return self._times_two(amount) +Or for internal functions which are defined in :ref:`imported modules `, they are invoked by prefixing the name of the module to the function name: + +.. code-block:: vyper + import calculator_library + + @external + def calculate(amount: uint256) -> uint256: + return calculator_library._times_two(amount) + +.. note:: + As of v0.4.0, the ``@internal`` decorator is optional. That is, functions with no visibility decorator default to being ``internal``. + .. note:: - Since calling an ``internal`` function is realized by jumping to its entry label, the internal function dispatcher ensures the correctness of the jumps. Please note that for ``internal`` functions which use more than one default parameter, Vyper versions ``>=0.3.8`` are strongly recommended due to the security advisory `GHSA-ph9x-4vc9-m39g `_. + Please note that for ``internal`` functions which use more than one default parameter, Vyper versions ``>=0.3.8`` are recommended due to the security advisory `GHSA-ph9x-4vc9-m39g `_. + + +The ``__init__`` Function +------------------------- + +.. _init-function: + +The ``__init__()`` function, also known as the constructor, is a special initialization function that is only called at the time of deploying a contract. It can be used to set initial values for storage or immutable variables. It must be declared with the ``@deploy`` decorator. A common use case is to set an ``owner`` variable with the creator of the contract: + +.. code-block:: vyper + + owner: address + + @deploy + def __init__(): + self.owner = msg.sender + +Additionally, :ref:`immutable variables ` may only be set within the constructor. + Mutability ---------- @@ -72,10 +107,10 @@ Mutability You can optionally declare a function's mutability by using a :ref:`decorator `. There are four mutability levels: - * **Pure**: does not read from the contract state or any environment variables. - * **View**: may read from the contract state, but does not alter it. - * **Nonpayable**: may read from and write to the contract state, but cannot receive Ether. - * **Payable**: may read from and write to the contract state, and can receive Ether. + * ``@pure``: does not read from the contract state or any environment variables. + * ``@view``: may read from the contract state, but does not alter it. + * ``@nonpayable`` (default): may read from and write to the contract state, but cannot receive Ether. + * ``@payable``: may read from and write to the contract state, and can receive Ether. .. code-block:: vyper @@ -151,7 +186,7 @@ If the function is annotated as ``@payable``, this function is executed whenever Considerations ************** -Just as in Solidity, Vyper generates a default function if one isn't found, in the form of a ``REVERT`` call. Note that this still `generates an exception `_ and thus will not succeed in receiving funds. +Just as in Solidity, Vyper generates a default function if one isn't found, in the form of a ``REVERT`` call. Note that this rolls back state changes, and thus will not succeed in receiving funds. Ethereum specifies that the operations will be rolled back if the contract runs out of gas in execution. ``send`` calls to the contract come with a free stipend of 2300 gas, which does not leave much room to perform other operations except basic logging. **However**, if the sender includes a higher gas amount through a ``call`` instead of ``send``, then more complex functionality can be run. @@ -168,33 +203,17 @@ Lastly, although the default function receives no arguments, it can still access * the amount of ETH sent (``msg.value``) * the gas provided (``msg.gas``). -The ``__init__`` Function -------------------------- - -``__init__`` is a special initialization function that may only be called at the time of deploying a contract. It can be used to set initial values for storage variables. A common use case is to set an ``owner`` variable with the creator the contract: - -.. code-block:: vyper - - owner: address - - @external - def __init__(): - self.owner = msg.sender - -You cannot call to other contract functions from the initialization function. - .. _function-decorators: Decorators Reference -------------------- -All functions must include one :ref:`visibility ` decorator (``@external`` or ``@internal``). The remaining decorators are optional. - =============================== =========================================================== Decorator Description =============================== =========================================================== -``@external`` Function can only be called externally +``@external`` Function can only be called externally, it is part of the runtime selector table ``@internal`` Function can only be called within current contract +``@deploy`` Function is called only at deploy time ``@pure`` Function does not read contract state or environment variables ``@view`` Function does not alter contract state ``@payable`` Function is able to receive Ether @@ -233,7 +252,7 @@ The ``for`` statement is a control flow construct used to iterate over a value: .. code-block:: vyper - for i in : + for i: in : ... The iterated value can be a static array, a dynamic array, or generated from the built-in ``range`` function. @@ -246,16 +265,16 @@ You can use ``for`` to iterate through the values of any array variable: .. code-block:: vyper foo: int128[3] = [4, 23, 42] - for i in foo: + for i: int128 in foo: ... In the above, example, the loop executes three times with ``i`` assigned the values of ``4``, ``23``, and then ``42``. -You can also iterate over a literal array, as long as a common type can be determined for each item in the array: +You can also iterate over a literal array, as long as the annotated type is valid for each item in the array: .. code-block:: vyper - for i in [4, 23, 42]: + for i: int128 in [4, 23, 42]: ... Some restrictions: @@ -270,32 +289,32 @@ Ranges are created using the ``range`` function. The following examples are vali .. code-block:: vyper - for i in range(STOP): + for i: uint256 in range(STOP): ... -``STOP`` is a literal integer greater than zero. ``i`` begins as zero and increments by one until it is equal to ``STOP``. +``STOP`` is a literal integer greater than zero. ``i`` begins as zero and increments by one until it is equal to ``STOP``. ``i`` must be of the same type as ``STOP``. .. code-block:: vyper - for i in range(stop, bound=N): + for i: uint256 in range(stop, bound=N): ... -Here, ``stop`` can be a variable with integer type, greater than zero. ``N`` must be a compile-time constant. ``i`` begins as zero and increments by one until it is equal to ``stop``. If ``stop`` is larger than ``N``, execution will revert at runtime. In certain cases, you may not have a guarantee that ``stop`` is less than ``N``, but still want to avoid the possibility of runtime reversion. To accomplish this, use the ``bound=`` keyword in combination with ``min(stop, N)`` as the argument to ``range``, like ``range(min(stop, N), bound=N)``. This is helpful for use cases like chunking up operations on larger arrays across multiple transactions. +Here, ``stop`` can be a variable with integer type, greater than zero. ``N`` must be a compile-time constant. ``i`` begins as zero and increments by one until it is equal to ``stop``. If ``stop`` is larger than ``N``, execution will revert at runtime. In certain cases, you may not have a guarantee that ``stop`` is less than ``N``, but still want to avoid the possibility of runtime reversion. To accomplish this, use the ``bound=`` keyword in combination with ``min(stop, N)`` as the argument to ``range``, like ``range(min(stop, N), bound=N)``. This is helpful for use cases like chunking up operations on larger arrays across multiple transactions. ``i``, ``stop`` and ``N`` must be of the same type. Another use of range can be with ``START`` and ``STOP`` bounds. .. code-block:: vyper - for i in range(START, STOP): + for i: uint256 in range(START, STOP): ... -Here, ``START`` and ``STOP`` are literal integers, with ``STOP`` being a greater value than ``START``. ``i`` begins as ``START`` and increments by one until it is equal to ``STOP``. +Here, ``START`` and ``STOP`` are literal integers, with ``STOP`` being a greater value than ``START``. ``i`` begins as ``START`` and increments by one until it is equal to ``STOP``. ``i``, ``START`` and ``STOP`` must be of the same type. Finally, it is possible to use ``range`` with runtime `start` and `stop` values as long as a constant `bound` value is provided. In this case, Vyper checks at runtime that `end - start <= bound`. -``N`` must be a compile-time constant. +``N`` must be a compile-time constant. ``i``, ``stop`` and ``N`` must be of the same type. .. code-block:: vyper - for i in range(start, end, bound=N): + for i: uint256 in range(start, end, bound=N): ... diff --git a/docs/deploying-contracts.rst b/docs/deploying-contracts.rst index d4fa4cec3f..4954ea8cec 100644 --- a/docs/deploying-contracts.rst +++ b/docs/deploying-contracts.rst @@ -21,7 +21,16 @@ Once you are ready to deploy your contract to a public test net or the main net, vyper -f abi yourFileName.vy # returns ABI -* Use the remote compiler provided by the `Remix IDE `_ to compile and deploy your contract on your net of choice. Remix also provides a JavaScript VM to test deploy your contract. +* Use `Titanoboa `_: -.. note:: - While the vyper version of the Remix IDE compiler is updated on a regular basis it might be a bit behind the latest version found in the master branch of the repository. Make sure the byte code matches the output from your local compiler. +.. code-block:: python + + import boa + boa.set_network_env() + from eth_account import Account + # in a real codebase, always load private keys safely from an encrypted store! + boa.env.add_account(Account()) + deployer = boa.load_partial("yourFileName.vy") + deployer.deploy() + +* Use the development environment provided at https://try.vyperlang.org to compile and deploy your contract on your net of choice. try.vyperlang.org comes "batteries-included", with Titanoboa pre-installed, and browser signer integration as well. diff --git a/docs/index.rst b/docs/index.rst index a7b0fbb4f2..5baaebb339 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -23,7 +23,7 @@ Because of this Vyper provides the following features: * **Strong typing** * **Clean and understandable compiler code** * **Support for pure functions**: Anything marked ``pure`` is not allowed to change the state. -* **Code reuse through composition**: Vyper supports code reuse through composition, and to help auditors, requires syntactic marking of dependencies which potentially modify state. +* **Code reuse through composition**: Vyper supports code reuse through composition, and requires syntactic marking of dependencies which potentially modify state. Following the principles and goals, Vyper **does not** provide the following features: @@ -35,5 +35,3 @@ Following the principles and goals, Vyper **does not** provide the following fea * **Recursive calling**: Recursive calling makes it impossible to set an upper bound on gas limits, opening the door for gas limit attacks. * **Infinite-length loops**: Similar to recursive calling, infinite-length loops make it impossible to set an upper bound on gas limits, opening the door for gas limit attacks. * **Binary fixed point**: Decimal fixed point is better, because any decimal fixed point value written as a literal in code has an exact representation, whereas with binary fixed point approximations are often required (e.g. (0.2)\ :sub:`10` = (0.001100110011...)\ :sub:`2`, which needs to be truncated), leading to unintuitive results, e.g. in Python 0.3 + 0.3 + 0.3 + 0.1 != 1. - -Vyper **does not** strive to be a 100% replacement for everything that can be done in Solidity; it will deliberately forbid things or make things harder if it deems fit to do so for the goal of increasing security. diff --git a/docs/interfaces.rst b/docs/interfaces.rst index b22facf030..acc0ce91f3 100644 --- a/docs/interfaces.rst +++ b/docs/interfaces.rst @@ -24,7 +24,11 @@ The defined interface can then be used to make external calls, given a contract @external def test(foobar: FooBar): - foobar.calculate() + extcall foobar.test1() + + @external + def test2(foobar: FooBar) -> uint256: + return staticcall foobar.calculate() The interface name can also be used as a type annotation for storage variables. You then assign an address value to the variable to access that interface. Note that casting an address to an interface is possible, e.g. ``FooBar()``: @@ -32,15 +36,21 @@ The interface name can also be used as a type annotation for storage variables. foobar_contract: FooBar - @external + @deploy def __init__(foobar_address: address): self.foobar_contract = FooBar(foobar_address) @external def test(): - self.foobar_contract.calculate() + extcall self.foobar_contract.test1() + +Specifying ``payable`` or ``nonpayable`` annotation in the interface indicates that the call made to the external contract will be able to alter storage, whereas ``view`` and ``pure`` calls will use a ``STATICCALL`` ensuring no storage can be altered during execution. Additionally, ``payable`` allows non-zero value to be sent along with the call. -Specifying ``payable`` or ``nonpayable`` annotation indicates that the call made to the external contract will be able to alter storage, whereas the ``view`` ``pure`` call will use a ``STATICCALL`` ensuring no storage can be altered during execution. Additionally, ``payable`` allows non-zero value to be sent along with the call. +Either the ``extcall`` or ``staticcall`` keyword is required to precede the external call to distinguish it from internal calls. The keyword must match the visibility of the function, ``staticcall`` for ``pure`` and ``view`` functions, and ``extcall`` for ``payable`` and ``nonpayable`` functions. Additionally, the output of a ``staticcall`` must be assigned to a result. + +.. warning:: + + If the signature in an interface does not match the actual signature of the called contract, you can get runtime errors or undefined behavior. For instance, if you accidentally mark a ``nonpayable`` function as ``view``, calling that function may result in the EVM reverting execution in the called contract. .. code-block:: vyper @@ -52,10 +62,10 @@ Specifying ``payable`` or ``nonpayable`` annotation indicates that the call made @external def test(foobar: FooBar): - foobar.calculate() # cannot change storage - foobar.query() # cannot change storage, but reads itself - foobar.update() # storage can be altered - foobar.pay(value=1) # storage can be altered, and value can be sent + s: uint256 = staticcall foobar.calculate() # cannot change storage + s = staticcall foobar.query() # cannot change storage, but reads itself + extcall foobar.update() # storage can be altered + extcall foobar.pay(value=1) # storage can be altered, and value can be sent Vyper offers the option to set the following additional keyword arguments when making external calls: @@ -72,95 +82,17 @@ The ``default_return_value`` parameter can be used to handle ERC20 tokens affect .. code-block:: vyper - IERC20(USDT).transfer(msg.sender, 1, default_return_value=True) # returns True - IERC20(USDT).transfer(msg.sender, 1) # reverts because nothing returned + extcall IERC20(USDT).transfer(msg.sender, 1, default_return_value=True) # returns True + extcall IERC20(USDT).transfer(msg.sender, 1) # reverts because nothing returned .. warning:: When ``skip_contract_check=True`` is used and the called function returns data (ex.: ``x: uint256 = SomeContract.foo(skip_contract_check=True)``, no guarantees are provided by the compiler as to the validity of the returned value. In other words, it is undefined behavior what happens if the called contract did not exist. In particular, the returned value might point to garbage memory. It is therefore recommended to only use ``skip_contract_check=True`` to call contracts which have been manually ensured to exist at the time of the call. -Importing Interfaces -==================== - -Interfaces are imported with ``import`` or ``from ... import`` statements. - -Imported interfaces are written using standard Vyper syntax. The body of each function is ignored when the interface is imported. If you are defining a standalone interface, it is normally specified by using a ``pass`` statement: - -.. code-block:: vyper - - @external - def test1(): - pass - - @external - def calculate() -> uint256: - pass - -You can also import a fully implemented contract and Vyper will automatically convert it to an interface. It is even possible for a contract to import itself to gain access to its own interface. - -.. code-block:: vyper - - import greeter as Greeter - - name: public(String[10]) - - @external - def __init__(_name: String[10]): - self.name = _name - - @view - @external - def greet() -> String[16]: - return concat("Hello ", Greeter(msg.sender).name()) - -Imports via ``import`` ----------------------- - -With absolute ``import`` statements, you **must** include an alias as a name for the imported package. In the following example, failing to include ``as Foo`` will raise a compile error: - -.. code-block:: vyper - - import contract.foo as Foo - -Imports via ``from ... import`` -------------------------------- - -Using ``from`` you can perform both absolute and relative imports. You may optionally include an alias - if you do not, the name of the interface will be the same as the file. - -.. code-block:: vyper - - # without an alias - from contract import foo - - # with an alias - from contract import foo as Foo - -Relative imports are possible by prepending dots to the contract name. A single leading dot indicates a relative import starting with the current package. Two leading dots indicate a relative import from the parent of the current package: - -.. code-block:: vyper - - from . import foo - from ..interfaces import baz - -.. _searching_for_imports: - -Searching For Interface Files ------------------------------ - -When looking for a file to import, Vyper will first search relative to the same folder as the contract being compiled. For absolute imports, it also searches relative to the root path for the project. Vyper checks for the file name with a ``.vy`` suffix first, then ``.json``. - -When using the command line compiler, the root path defaults to the current working directory. You can change it with the ``-p`` flag: - -:: - - $ vyper my_project/contracts/my_contract.vy -p my_project - -In the above example, the ``my_project`` folder is set as the root path. A contract cannot perform a relative import that goes beyond the top-level folder. - Built-in Interfaces =================== -Vyper includes common built-in interfaces such as `ERC20 `_ and `ERC721 `_. These are imported from ``ethereum.ercs``: +Vyper includes common built-in interfaces such as `IERC20 `_ and `IERC721 `_. These are imported from ``ethereum.ercs``: .. code-block:: vyper @@ -182,7 +114,7 @@ You can define an interface for your contract with the ``implements`` statement: implements: FooBarInterface -This imports the defined interface from the vyper file at ``an_interface.vy`` (or ``an_interface.json`` if using ABI json interface type) and ensures your current contract implements all the necessary external functions. If any interface functions are not included in the contract, it will fail to compile. This is especially useful when developing contracts around well-defined standards such as ERC20. +This imports the defined interface from the vyper file at ``an_interface.vyi`` (or ``an_interface.json`` if using ABI json interface type) and ensures your current contract implements all the necessary external functions. If any interface functions are not included in the contract, it will fail to compile. This is especially useful when developing contracts around well-defined standards such as ERC20. .. note:: @@ -192,11 +124,26 @@ This imports the defined interface from the vyper file at ``an_interface.vy`` (o Prior to v0.4.0, ``implements`` required that events defined in an interface were re-defined in the "implementing" contract. As of v0.4.0, this is no longer required because events can be used just by importing them. Any events used in a contract will automatically be exported in the ABI output. +Standalone Interfaces +===================== + +Standalone interfaces are written using a variant of standard Vyper syntax. The body of each function must be an ellipsis (``...``). Interface files must have a ``.vyi`` suffix in order to be found by an import statement. + +.. code-block:: vyper + # ISomeInterface.vyi + + @external + def test1(): + ... + + @external + def calculate() -> uint256: + ... Extracting Interfaces ===================== -Vyper has a built-in format option to allow you to make your own Vyper interfaces easily. +Vyper has a built-in format option to allow you to easily export a Vyper interface from a pre-existing contract. :: @@ -207,11 +154,11 @@ Vyper has a built-in format option to allow you to make your own Vyper interface @view @external def delegated(addr: address) -> bool: - pass + ... # ... -If you want to do an external call to another contract, Vyper provides an external interface extract utility as well. +If you want to export it as an inline interface, Vyper provides a utility to extract that as well. :: @@ -225,4 +172,4 @@ If you want to do an external call to another contract, Vyper provides an extern def forwardWeight(delegate_with_weight_to_forward: address): nonpayable # ... -The output can then easily be copy-pasted to be consumed. +The output can then easily be copy-pasted directly in a regular vyper file. diff --git a/docs/resources.rst b/docs/resources.rst index c2b0e3e427..977df2b3eb 100644 --- a/docs/resources.rst +++ b/docs/resources.rst @@ -18,7 +18,7 @@ General Frameworks and tooling ---------------------- -- `Titanoboa – An experimental Vyper interpreter with pretty tracebacks, forking, debugging features and more `_ +- `Titanoboa – A Vyper interpreter with pretty tracebacks, forking, debugging features and more `_ - `ApeWorX – The Ethereum development framework for Python Developers, Data Scientists, and Security Professionals `_ - `VyperDeployer – A helper smart contract to compile and test Vyper contracts in Foundry `_ - `🐍 snekmate – Vyper smart contract building blocks `_ diff --git a/docs/scoping-and-declarations.rst b/docs/scoping-and-declarations.rst index 838720c25b..9b59cb8c4f 100644 --- a/docs/scoping-and-declarations.rst +++ b/docs/scoping-and-declarations.rst @@ -33,6 +33,8 @@ The compiler automatically creates getter functions for all public storage varia For public arrays, you can only retrieve a single element via the generated getter. This mechanism exists to avoid high gas costs when returning an entire array. The getter will accept an argument to specify which element to return, for example ``data(0)``. +.. _immutable-variables: + Declaring Immutable Variables ----------------------------- @@ -42,7 +44,7 @@ Variables can be marked as ``immutable`` during declaration: DATA: immutable(uint256) - @external + @deploy def __init__(_data: uint256): DATA = _data @@ -123,8 +125,6 @@ Module Scope Variables and other items declared outside of a code block (functions, constants, event and struct definitions, ...), are visible even before they were declared. This means you can use module-scoped items before they are declared. -An exception to this rule is that you can only call functions that have already been declared. - Accessing Module Scope from Functions ************************************* @@ -161,9 +161,10 @@ It is not permitted for a memory or calldata variable to shadow the name of an i a: immutable(bool) - @external + @deploy def __init__(): a = True + @external def foo(a:bool) -> bool: # input argument cannot have the same name as a constant or immutable variable @@ -230,7 +231,7 @@ In a ``for`` statement, the target variable exists within the scope of the loop. @external def foo(a: bool) -> int128: - for i in [1, 2, 3]: + for i: int128 in [1, 2, 3]: pass i: bool = False @@ -240,6 +241,6 @@ The following contract fails to compile because ``a`` has not been declared outs @external def foo(a: bool) -> int128: - for i in [1, 2, 3]: + for i: int128 in [1, 2, 3]: a: int128 = i a += 3 diff --git a/docs/statements.rst b/docs/statements.rst index 34f15828a1..801eb36ae5 100644 --- a/docs/statements.rst +++ b/docs/statements.rst @@ -15,7 +15,7 @@ The ``break`` statement terminates the nearest enclosing ``for`` loop. .. code-block:: vyper - for i in [1, 2, 3, 4, 5]: + for i: uint256 in [1, 2, 3, 4, 5]: if i == a: break @@ -28,7 +28,7 @@ The ``continue`` statement begins the next cycle of the nearest enclosing ``for` .. code-block:: vyper - for i in [1, 2, 3, 4, 5]: + for i: uint256 in [1, 2, 3, 4, 5]: if i != a: continue ... diff --git a/docs/structure-of-a-contract.rst b/docs/structure-of-a-contract.rst index 561f3000dd..fc817cf4b6 100644 --- a/docs/structure-of-a-contract.rst +++ b/docs/structure-of-a-contract.rst @@ -54,6 +54,72 @@ EVM Version The EVM version can be set with the ``evm-version`` pragma, which is documented in :ref:`evm-version`. +Imports +======= + +Import statements allow you to import :ref:`modules` or :ref:`interfaces` with the ``import`` or ``from ... import`` syntax. + +Imports via ``import`` +---------------------- + +You may import modules (defined in ``.vy`` files) and interfaces (defined in ``.vyi`` or ``.json`` files) via ``import`` statements. You may use plain or ``as`` variants. + +.. code-block:: vyper + + # without an alias + import foo + + # with an alias + import my_package.foo as bar + +Imports via ``from ... import`` +------------------------------- + +Using ``from`` you can perform both absolute and relative imports. You may optionally include an alias - if you do not, the name of the interface will be the same as the file. + +.. code-block:: vyper + + # without an alias + from my_package import foo + + # with an alias + from my_package import foo as bar + +Relative imports are possible by prepending dots to the contract name. A single leading dot indicates a relative import starting with the current package. Two leading dots indicate a relative import from the parent of the current package: + +.. code-block:: vyper + + from . import foo + from ..interfaces import baz + +Further higher directories can be accessed with ``...``, ``....`` etc., as in Python. + +.. _searching_for_imports: + +Searching For Imports +----------------------------- + +When looking for a file to import, Vyper will first search relative to the same folder as the contract being compiled. It then checks for the file in the provided search paths, in the precedence provided. Vyper checks for the file name with a ``.vy`` suffix first, then ``.vyi``, then ``.json``. + +When using the :ref:`vyper CLI `, the search path defaults to the current working directory, plus the python `syspath `_. You can append to the search path with the ``-p`` flag, e.g.: + +:: + + $ vyper my_project/contracts/my_contract.vy -p ../path/to/other_project + +In the above example, the ``my_project`` folder is set as the root path. + +.. note:: + + Including the python syspath on the search path means that any Vyper module in the current ``virtualenv`` is discoverable by the Vyper compiler, and Vyper packages can be published to and installed from PyPI and accessed via ``import`` statements with no additional configuration. Keep in mind that best practice is always to install packages *within* a ``virtualenv`` and not globally! + +You can additionally disable the behavior of adding the syspath to the search path with the CLI flag ``--disable-sys-path``: + +:: + + $ vyper --disable-sys-path my_project/my_contract.vy + +When compiling from a :ref:`.vyz archive file ` or :ref:`standard json input `, the search path is already part of the bundle, it cannot be changed from the command line. .. _structure-state-variables: @@ -91,6 +157,47 @@ Functions may be called internally or externally depending on their :ref:`visibi See the :ref:`Functions ` documentation for more information. +.. _modules: + +Modules +========== + +A module is a set of function definitions and variable declarations which enables code reuse. Vyper favors code reuse through composition, rather than inheritance. + +Broadly speaking, a module contains: + +* function definitions +* state variable declarations +* type definitions + +Therefore, a module encapsulates + +* functionality (types and functions), and +* state (variables), which may be tightly coupled with that functionality + +Modules can be added to contracts by importing them from a ``.vy`` file. Any ``.vy`` file is a valid module which can be imported into another contract! This is a very powerful feature which allows you to assemble contracts via other contracts as building blocks. + +.. code-block:: vyper + # my_module.vy + + def perform_some_computation() -> uint256: + return 5 + + @external + def some_external_function() -> uint256: + return 6 + +.. code-block:: vyper + import my_module + + exports: my_module.some_external_function + + @external + def foo() -> uint256: + return my_module.perform_some_computation() + +Modules are opt-in by design. That is, any operations involving state or exposing external functions must be explicitly opted into using the ``exports``, ``uses`` or ``initializes`` keywords. See the :ref:`Modules ` documentation for more information. + Events ====== @@ -112,12 +219,14 @@ Events provide an interface for the EVM's logging facilities. Events may be logg See the :ref:`Event ` documentation for more information. +.. _interfaces: + Interfaces ========== An interface is a set of function definitions used to enable calls between smart contracts. A contract interface defines all of that contract's externally available functions. By importing the interface, your contract now knows how to call these functions in other contracts. -Interfaces can be added to contracts either through inline definition, or by importing them from a separate file. +Interfaces can be added to contracts either through inline definition, or by importing them from a separate ``.vyi`` file. .. code-block:: vyper diff --git a/docs/style-guide.rst b/docs/style-guide.rst index 10869076eb..3c9e8681ae 100644 --- a/docs/style-guide.rst +++ b/docs/style-guide.rst @@ -152,7 +152,7 @@ Type Annotations Tests ===== -We use the `pytest `_ framework for testing, and :ref:`eth-tester` for our local development chain. +We use the `pytest `_ framework for testing. Best Practices -------------- diff --git a/docs/testing-contracts-ethtester.rst b/docs/testing-contracts-ethtester.rst deleted file mode 100644 index 92522a1eca..0000000000 --- a/docs/testing-contracts-ethtester.rst +++ /dev/null @@ -1,81 +0,0 @@ -.. _testing-contracts-ethtester: - -Testing with Ethereum Tester -############################ - -`Ethereum Tester `_ is a tool suite for testing Ethereum based applications. - -This section provides a quick overview of testing with ``eth-tester``. To learn more, you can view the documentation at the `Github repo `_ or join the `Gitter `_ channel. - -Getting Started -=============== - -Prior to testing, the Vyper specific contract conversion and the blockchain related fixtures need to be set up. These fixtures will be used in every test file and should therefore be defined in `conftest.py `_. - -.. note:: - - Since the testing is done in the pytest framework, you can make use of `pytest.ini, tox.ini and setup.cfg `_ and you can use most IDEs' pytest plugins. - -.. literalinclude:: ../tests/conftest.py - :caption: conftest.py - :language: python - :linenos: - -The final two fixtures are optional and will be discussed later. The rest of this chapter assumes that you have this code set up in your ``conftest.py`` file. - -Alternatively, you can import the fixtures to ``conftest.py`` or use `pytest plugins `_. - -Writing a Basic Test -==================== - -Assume the following simple contract ``storage.vy``. It has a single integer variable and a function to set that value. - -.. literalinclude:: ../examples/storage/storage.vy - :caption: storage.vy - :linenos: - :language: vyper - -We create a test file ``test_storage.py`` where we write our tests in pytest style. - -.. literalinclude:: ../tests/functional/examples/storage/test_storage.py - :caption: test_storage.py - :linenos: - :language: python - -First we create a fixture for the contract which will compile our contract and set up a Web3 contract object. We then use this fixture for our test functions to interact with the contract. - -.. note:: - To run the tests, call ``pytest`` or ``python -m pytest`` from your project directory. - -Events and Failed Transactions -============================== - -To test events and failed transactions we expand our simple storage contract to include an event and two conditions for a failed transaction: ``advanced_storage.vy`` - -.. literalinclude:: ../examples/storage/advanced_storage.vy - :caption: advanced_storage.vy - :linenos: - :language: vyper - -Next, we take a look at the two fixtures that will allow us to read the event logs and to check for failed transactions. - -.. literalinclude:: ../tests/conftest.py - :caption: conftest.py - :language: python - :pyobject: tx_failed - -The fixture to assert failed transactions defaults to check for a ``TransactionFailed`` exception, but can be used to check for different exceptions too, as shown below. Also note that the chain gets reverted to the state before the failed transaction. - -.. literalinclude:: ../tests/conftest.py - :caption: conftest.py - :language: python - :pyobject: get_logs - -This fixture will return a tuple with all the logs for a certain event and transaction. The length of the tuple equals the number of events (of the specified type) logged and should be checked first. - -Finally, we create a new file ``test_advanced_storage.py`` where we use the new fixtures to test failed transactions and events. - -.. literalinclude:: ../tests/functional/examples/storage/test_advanced_storage.py - :caption: test_advanced_storage.py - :linenos: - :language: python diff --git a/docs/testing-contracts-titanoboa.rst b/docs/testing-contracts-titanoboa.rst new file mode 100644 index 0000000000..2dfbb5e630 --- /dev/null +++ b/docs/testing-contracts-titanoboa.rst @@ -0,0 +1,6 @@ +.. _testing-contracts-titanoboa: + +Testing with Titanoboa +###################### + +Titanoboa is a Vyper interpreter which is fast and provides a "swiss-army knife" toolkit for developing vyper applications. The best place to start is at `the official docs `_, and skip down to the `testing reference `_ for an overview of testing strategies. Finally, a more detailed API reference is available in the `API reference subsection `_. diff --git a/docs/testing-contracts.rst b/docs/testing-contracts.rst index 043af416fb..3e92819957 100644 --- a/docs/testing-contracts.rst +++ b/docs/testing-contracts.rst @@ -5,13 +5,13 @@ Testing a Contract For testing Vyper contracts we recommend the use of `pytest `_ along with one of the following packages: + * `Titanoboa `_: A Vyper interpreter, pretty tracebacks, forking, debugging and deployment features. Maintained by the Vyper team. * `Brownie `_: A development and testing framework for smart contracts targeting the Ethereum Virtual Machine - * `Ethereum Tester `_: A tool suite for testing ethereum applications Example usage for each package is provided in the sections listed below. .. toctree:: :maxdepth: 2 + testing-contracts-titanoboa.rst testing-contracts-brownie.rst - testing-contracts-ethtester.rst diff --git a/docs/toctree.rst b/docs/toctree.rst index e3583db56b..65bbe3ab9b 100644 --- a/docs/toctree.rst +++ b/docs/toctree.rst @@ -25,6 +25,7 @@ Vyper control-structures.rst scoping-and-declarations.rst built-in-functions.rst + using-modules.rst interfaces.rst event-logging.rst natspec.rst diff --git a/docs/types.rst b/docs/types.rst index f82153b1b9..752e06b14f 100644 --- a/docs/types.rst +++ b/docs/types.rst @@ -230,7 +230,8 @@ Decimals **Keyword:** ``decimal`` -A decimal is a type to store a decimal fixed point value. +A decimal is a type to store a decimal fixed point value. As of v0.4.0, decimals must be enabled with the CLI flag ``--enable-decimals``. + Values ****** @@ -562,7 +563,7 @@ Dynamic arrays represent bounded arrays whose length can be modified at runtime, .. code-block:: vyper - for item in self.my_array: + for item: uint256 in self.my_array: self.my_array[0] = item In the ABI, they are represented as ``_Type[]``. For instance, ``DynArray[int128, 3]`` gets represented as ``int128[]``, and ``DynArray[DynArray[int128, 3], 3]`` gets represented as ``int128[][]``. diff --git a/docs/using-modules.rst b/docs/using-modules.rst new file mode 100644 index 0000000000..4a8af1a7d9 --- /dev/null +++ b/docs/using-modules.rst @@ -0,0 +1,194 @@ +.. _modules: + +Modules +####### + +A module is a set of function definitions and variable declarations which enables code reuse. Vyper favors code reuse through composition, rather than inheritance. A module encapsulates everything needed for code reuse, from type and function declarations to state. It is important to note that functions which make use of defined state must be initialized in order to use that state, whereas functions that are "pure" do not require this. + +Declaring and using modules +=========================== + +The simplest way to define a module is to write a contract. In Vyper, any contract is a valid module! For example, the following contract is also a valid module. + +.. code-block:: vyper + + # ownable.vy + + owner: address + + @deploy + def __init__(): + self.owner = msg.sender + + def _check_owner(): + assert self.owner == msg.sender + + @pure + def _times_two(x: uint256) -> uint256: + return x * 2 + + @external + def update_owner(new_owner: address): + self._check_owner() + + self.owner = new_owner + +This contract basically has two bits of functionality which can be reused upon import, the ``_check_owner()`` function and the ``update_owner()`` function. The ``_check_owner()`` is an internal function which can be used as a helper to check ownership in importing modules, while the ``update_owner()`` is an external function which an importing module can itself :ref:`export ` as an externally facing piece of functionality. + +You can use this module's functionality simply by importing it, however any functionality that you do not use from a module will not be included in the final compilation target. For example, if you don't use the ``initializes`` statement to declare a module's location in the storage layout, you cannot use its state. Similarly, if you don't explicitly ``export`` an external function from a module, it will not appear in the runtime code. + +Importing a module +================== + +A module can be imported using ``import`` or ``from ... import`` statements. The following are all equivalent ways to import the above module: + +.. code-block:: vyper + + import ownable # accessible as `ownable` + import ownable as ow # accessible as `ow` + from . import ownable # accessible as `ownable` + from . import ownable as ow # accessible as `ow` + +When importing using the ``as`` keyword, the module will be referred to by its alias in the rest of the contract. + +The ``_times_two()`` helper function in the above module can be immediately used without any further work since it is "pure" and doesn't depend on initialized state. + +.. code-block:: vyper + + import ownable as helper + @external + def my_function(x: uint256) -> uint256: + return helper._times_two(x) + +The other functions cannot be used yet, because they touch the ``ownable`` module's state. There are two ways to declare a module so that its state can be used. + +Initializing a module +===================== + +In order to use a module's state, it must be "initialized". A module can be initialized with the ``initializes`` keyword. This declares the module's location in the contract's :ref:`Storage Layout `. It also creates a requirement to invoke the module's :ref:`__init__() function `, if it has one. This is a well-formedness requirement, since it does not make sense to access a module's state unless its ``__init__()`` function has been called. + +.. code-block:: vyper + + import ownable + + initializes: ownable + + @deploy + def __init__(): + ownable.__init__() + + @external + def my_access_controlled_function(): + ownable._check_owner() # reverts unless msg.sender == ownable.owner + + ... # do things that only the owner can do + +It is a compile-time error to invoke a module's ``__init__()`` function more than once! + +A module's state can be directly accessed just by prefixing the name of a variable with the module's alias, like follows: + +.. code-block:: vyper + + @external + def get_owner() -> address: + return ownable.owner + + +The ``uses`` statement +====================== + +Another way of using a contract's state without directly initializing it is to use the ``uses`` keyword. This is a more advanced usage which is expected to be mostly utilized by library designers. The ``uses`` statement allows a module to use another module's state but defer its initialization to another module in the compilation tree (most likely a user of the library in question). + +This is best illustrated with an example: + +.. code-block:: vyper + + # ownable_2step.vy + import ownable + + uses: ownable + + # does not export ownable.transfer_ownership! + + pending_owner: address # the pending owner in the 2-step transfer process + + @deploy + def __init__(): + self.pending_owner = empty(address) + + @external + def begin_transfer(new_owner: address): + ownable._check_owner() + + self.pending_owner = new_owner + + @external + def accept_transfer(): + assert msg.sender == self.pending_owner + + ownable.owner = new_owner + self.pending_owner = empty(address) + +Here, the ``ownable_2step`` module does not want to seal off access to calling the ``ownable`` module's ``__init__()`` function. So, it utilizes the ``uses: ownable`` statement to get access to the ``ownable`` module's state, without the requirement to initialize it. Note that this is a valid module, but it is not a valid contract (that is, it cannot produce bytecode) because it does not initialize the ``ownable`` module. To make a valid contract, the user of the ``ownable_2step`` module would be responsible for initializing the ``ownable`` module themselves (as in the next section: :ref:`initializing dependencies `). + +Whether to ``use`` or ``initialize`` a module is a choice which is left up to the library designer. + +Technical notes on the design +----------------------------- + +This section contains some notes on the design from a language design perspective. It can be safely skipped if you are just interested in how to use modules, and not necessarily in programming language theory. + +The design of the module system takes inspiration from (but is not directly related to) the rust language's `borrow checker `_. In the language of type systems, module initialization is modeled as an affine constraint which is promoted to a linear constraint if the module's state is touched in the compilation target. In practice, what this means is: + +* A module must be "used" or "initialized" before its state can be accessed in an import +* A module may be "used" many times +* A module which is "used" or its state touched must be "initialized" exactly once + +.. _init-dependencies: + +Initializing a module with dependencies +======================================= + +Sometimes, you may encounter a module which itself ``uses`` other modules. Vyper's module system is designed to allow this, but it requires you make explicit the access to the imported module's state. The above ``ownable_2step.vy`` contract is an example of this. If you wanted to initialize the ``ownable_2step`` module, it would use the special ``:=`` (aka "walrus") syntax, and look something like this: + +.. code-block:: vyper + + import ownable + import ownable_2step + + initializes: ownable + + # ownable is explicitly declared as a state dependency of `ownable_2step` + initializes: ownable_2step[ownable := ownable] + + @deploy + def __init__(): + ownable.__init__() + ownable_2step.__init__() + + # export all external functions from ownable_2step + exports: ownable_2step.__interface__ + +.. _exporting-functions: + +Exporting functions +=================== + +In Vyper, ``@external`` functions are not automatically exposed (i.e., included in the runtime code) in the importing contract. This is a safety feature, it means that any externally facing functionality must be explicitly defined in the top-level of the compilation target. + +So, exporting external functions from modules is accomplished using the ``exports`` keyword. In Vyper, functions can be exported individually, or, a wholesale export of all the functions in a module can be done. The following are all ways of exporting functions from an imported module. + +.. code-block:: vyper + + # export a single function from `ownable_2step` + exports: ownable_2step.transfer_ownership + + # export multiple functions from `ownable_2step`, being explicit about + # which specific functions are being exported + exports: ( + ownable_2step.transfer_ownership, + ownable_2step.accept_ownership, + ) + + # export all external functions from `ownable_2step` + exports: ownable_2step.__interface__ diff --git a/vyper/cli/vyper_compile.py b/vyper/cli/vyper_compile.py index 226a76242a..b7e664b975 100755 --- a/vyper/cli/vyper_compile.py +++ b/vyper/cli/vyper_compile.py @@ -42,6 +42,7 @@ ir_json - Intermediate representation in JSON format ir_runtime - Intermediate representation of runtime bytecode in list format asm - Output the EVM assembly of the deployable bytecode +integrity - Output the integrity hash of the source code archive - Output the build as an archive file solc_json - Output the build in solc json format """ @@ -163,8 +164,16 @@ def _parse_args(argv): "--hex-ir", help="Represent integers as hex values in the IR", action="store_true" ) parser.add_argument( - "--path", "-p", help="Set the root path for contract imports", action="append", dest="paths" + "--path", + "-p", + help="Add a path to the compiler's search path", + action="append", + dest="paths", ) + parser.add_argument( + "--disable-sys-path", help="Disable the use of sys.path", action="store_true" + ) + parser.add_argument("-o", help="Set the output path", dest="output_path") parser.add_argument( "--experimental-codegen", @@ -173,9 +182,6 @@ def _parse_args(argv): dest="experimental_codegen", ) parser.add_argument("--enable-decimals", help="Enable decimals", action="store_true") - parser.add_argument( - "--disable-sys-path", help="Disable the use of sys.path", action="store_true" - ) args = parser.parse_args(argv) From eb011367cc769d62a084deff62153825e626f87a Mon Sep 17 00:00:00 2001 From: cyberthirst Date: Tue, 28 May 2024 19:48:11 +0200 Subject: [PATCH 08/53] fix[codegen]: fix `_abi_decode` buffer overflow (#3925) this commit fixes two related bugs in the ABI decoder. the first is that the ABI decoder does not have a buffer overflow check for "head" (aka, dynamic offsets) pointers. this can result in a toctou, where the result of ABI decoding a bytearray can depend on the value allocated after the bytearray in memory, and a change is observable if there is a memory write between two ABI decodes of the same buffer: ```vyper def foo(xs: Bytes[1024]): y: Bytes[32] = b"foo" x: Bytes[1024] = xs # the `head` element of `xs` points outside of the `xs` buffer, # to `y` z1: Bytes[32] = _abi_decode(y, Bytes[32]) y = b"bar" # `z1` != `x2` z2: Bytes[32] = _abi_decode(y, Bytes[32]) ``` the second is that the "head" pointer can point within the allocated buffer but not within the payload. for instance, we might allocate 1024 bytes as an upper bound for the payload, but the payload could be just 128 bytes at runtime. if the "head" pointer points to 160, then the decoder might read dirty memory. we ban this behavior to prevent introspection of dirty memory. both of these are only considered security vulnerabilities when the payload is in memory. if the payload is in calldata (or returndata - although that is not currently applicable since we do not ABI decode directly from returndata at this time), the payload is user-controlled, and the worst they can do is force reads of zero bytes. therefore, the fix here only does the overflow check for decodes from memory. an alternative implementation strategy was considered, which is to decode ABI payloads more "strictly" - requiring that each "head" pointer is equal the the previous "head" plus the length of that item, but this was scrapped as it would requires a larger change to the decoder. --------- Co-authored-by: Charles Cooper Co-authored-by: Daniel Schiavini --- .../builtins/codegen/test_abi_decode.py | 609 ++++++++++++++++++ .../functional/builtins/codegen/test_empty.py | 23 +- vyper/builtins/functions.py | 27 +- vyper/codegen/core.py | 117 +++- vyper/codegen/external_call.py | 9 +- 5 files changed, 732 insertions(+), 53 deletions(-) diff --git a/tests/functional/builtins/codegen/test_abi_decode.py b/tests/functional/builtins/codegen/test_abi_decode.py index a580ba12a0..fad6ce889c 100644 --- a/tests/functional/builtins/codegen/test_abi_decode.py +++ b/tests/functional/builtins/codegen/test_abi_decode.py @@ -4,6 +4,7 @@ from tests.evm_backends.base_env import EvmError, ExecutionReverted from tests.utils import decimal_to_int from vyper.exceptions import ArgumentException, StructureException +from vyper.utils import method_id TEST_ADDR = "0x" + b"".join(chr(i).encode("utf-8") for i in range(20)).hex() @@ -471,3 +472,611 @@ def foo(x: Bytes[32]): @pytest.mark.parametrize("bad_code,exception", FAIL_LIST) def test_abi_decode_length_mismatch(get_contract, assert_compile_failed, bad_code, exception): assert_compile_failed(lambda: get_contract(bad_code), exception) + + +def _abi_payload_from_tuple(payload: tuple[int | bytes, ...]) -> bytes: + return b"".join(p.to_bytes(32, "big") if isinstance(p, int) else p for p in payload) + + +def _replicate(value: int, count: int) -> tuple[int, ...]: + return (value,) * count + + +def test_abi_decode_arithmetic_overflow(env, tx_failed, get_contract): + # test based on GHSA-9p8r-4xp4-gw5w: + # https://github.com/vyperlang/vyper/security/advisories/GHSA-9p8r-4xp4-gw5w#advisory-comment-91841 + # buf + head causes arithmetic overflow + code = """ +@external +def f(x: Bytes[32 * 3]): + a: Bytes[32] = b"foo" + y: Bytes[32 * 3] = x + + decoded_y1: Bytes[32] = _abi_decode(y, Bytes[32]) + a = b"bar" + decoded_y2: Bytes[32] = _abi_decode(y, Bytes[32]) + + assert decoded_y1 != decoded_y2 + """ + c = get_contract(code) + + data = method_id("f(bytes)") + payload = ( + 0x20, # tuple head + 0x60, # parent array length + # parent payload - this word will be considered as the head of the abi-encoded inner array + # and it will be added to base ptr leading to an arithmetic overflow + 2**256 - 0x60, + ) + data += _abi_payload_from_tuple(payload) + + with tx_failed(): + env.message_call(c.address, data=data) + + +def test_abi_decode_nonstrict_head(env, tx_failed, get_contract): + # data isn't strictly encoded - head is 0x21 instead of 0x20 + # but the head + length is still within runtime bounds of the parent buffer + code = """ +@external +def f(x: Bytes[32 * 5]): + y: Bytes[32 * 5] = x + a: Bytes[32] = b"a" + decoded_y1: DynArray[uint256, 3] = _abi_decode(y, DynArray[uint256, 3]) + a = b"aaaa" + decoded_y1 = _abi_decode(y, DynArray[uint256, 3]) + """ + c = get_contract(code) + + data = method_id("f(bytes)") + + payload = ( + 0x20, # tuple head + 0xA0, # parent array length + # head should be 0x20 but is 0x21 thus the data isn't strictly encoded + 0x21, + # we don't want to revert on invalid length, so set this to 0 + # the first byte of payload will be considered as the length + 0x00, + (0x01).to_bytes(1, "big"), # will be considered as the length=1 + (0x00).to_bytes(31, "big"), + *_replicate(0x03, 2), + ) + + data += _abi_payload_from_tuple(payload) + + env.message_call(c.address, data=data) + + +def test_abi_decode_child_head_points_to_parent(tx_failed, get_contract): + # data isn't strictly encoded and the head for the inner array + # skipts the corresponding payload and points to other valid section of the parent buffer + code = """ +@external +def run(x: Bytes[14 * 32]): + y: Bytes[14 * 32] = x + decoded_y1: DynArray[DynArray[DynArray[uint256, 2], 1], 2] = _abi_decode( + y, + DynArray[DynArray[DynArray[uint256, 2], 1], 2] + ) + """ + c = get_contract(code) + # encode [[[1, 1]], [[2, 2]]] and modify the head for [1, 1] + # to actually point to [2, 2] + payload = ( + 0x20, # top-level array head + 0x02, # top-level array length + 0x40, # head of DAr[DAr[DAr, uint256]]][0] + 0xE0, # head of DAr[DAr[DAr, uint256]]][1] + 0x01, # DAr[DAr[DAr, uint256]]][0] length + # head of DAr[DAr[DAr, uint256]]][0][0] + # points to DAr[DAr[DAr, uint256]]][1][0] + 0x20 * 6, + 0x02, # DAr[DAr[DAr, uint256]]][0][0] length + 0x01, # DAr[DAr[DAr, uint256]]][0][0][0] + 0x01, # DAr[DAr[DAr, uint256]]][0][0][1] + 0x01, # DAr[DAr[DAr, uint256]]][1] length + 0x20, # DAr[DAr[DAr, uint256]]][1][0] head + 0x02, # DAr[DAr[DAr, uint256]]][1][0] length + 0x02, # DAr[DAr[DAr, uint256]]][1][0][0] + 0x02, # DAr[DAr[DAr, uint256]]][1][0][1] + ) + + data = _abi_payload_from_tuple(payload) + + c.run(data) + + +def test_abi_decode_nonstrict_head_oob(tx_failed, get_contract): + # data isn't strictly encoded and (non_strict_head + len(DynArray[..][2])) > parent_static_sz + # thus decoding the data pointed to by the head would cause an OOB read + # non_strict_head + length == parent + parent_static_sz + 1 + code = """ +@external +def run(x: Bytes[2 * 32 + 3 * 32 + 3 * 32 * 4]): + y: Bytes[2 * 32 + 3 * 32 + 3 * 32 * 4] = x + decoded_y1: DynArray[Bytes[32 * 3], 3] = _abi_decode(y, DynArray[Bytes[32 * 3], 3]) + """ + c = get_contract(code) + + payload = ( + 0x20, # DynArray head + 0x03, # DynArray length + # non_strict_head - if the length pointed to by this head is 0x60 (which is valid + # length for the Bytes[32*3] buffer), the decoding function would decode + # 1 byte over the end of the buffer + # we define the non_strict_head as: skip the remaining heads, 1st and 2nd tail + # to the third tail + 1B + 0x20 * 8 + 0x20 * 3 + 0x01, # inner array0 head + 0x20 * 4 + 0x20 * 3, # inner array1 head + 0x20 * 8 + 0x20 * 3, # inner array2 head + 0x60, # DynArray[Bytes[96], 3][0] length + *_replicate(0x01, 3), # DynArray[Bytes[96], 3][0] data + 0x60, # DynArray[Bytes[96], 3][1] length + *_replicate(0x01, 3), # DynArray[Bytes[96], 3][1] data + # the invalid head points here + 1B (thus the length is 0x60) + # we don't revert because of invalid length, but because head+length is OOB + 0x00, # DynArray[Bytes[96], 3][2] length + (0x60).to_bytes(1, "big"), + (0x00).to_bytes(31, "big"), + *_replicate(0x03, 2), + ) + + data = _abi_payload_from_tuple(payload) + + with tx_failed(): + c.run(data) + + +def test_abi_decode_nonstrict_head_oob2(tx_failed, get_contract): + # same principle as in Test_abi_decode_nonstrict_head_oob + # but adapted for dynarrays + code = """ +@external +def run(x: Bytes[2 * 32 + 3 * 32 + 3 * 32 * 4]): + y: Bytes[2 * 32 + 3 * 32 + 3 * 32 * 4] = x + decoded_y1: DynArray[DynArray[uint256, 3], 3] = _abi_decode( + y, + DynArray[DynArray[uint256, 3], 3] + ) + """ + c = get_contract(code) + + payload = ( + 0x20, # DynArray head + 0x03, # DynArray length + (0x20 * 8 + 0x20 * 3 + 0x01), # inner array0 head + (0x20 * 4 + 0x20 * 3), # inner array1 head + (0x20 * 8 + 0x20 * 3), # inner array2 head + 0x03, # DynArray[..][0] length + *_replicate(0x01, 3), # DynArray[..][0] data + 0x03, # DynArray[..][1] length + *_replicate(0x01, 3), # DynArray[..][1] data + 0x00, # DynArray[..][2] length + (0x03).to_bytes(1, "big"), + (0x00).to_bytes(31, "big"), + *_replicate(0x01, 2), # DynArray[..][2] data + ) + + data = _abi_payload_from_tuple(payload) + + with tx_failed(): + c.run(data) + + +def test_abi_decode_head_pointing_outside_buffer(tx_failed, get_contract): + # the head points completely outside the buffer + code = """ +@external +def run(x: Bytes[3 * 32]): + y: Bytes[3 * 32] = x + decoded_y1: Bytes[32] = _abi_decode(y, Bytes[32]) + """ + c = get_contract(code) + + payload = (0x80, 0x20, 0x01) + data = _abi_payload_from_tuple(payload) + + with tx_failed(): + c.run(data) + + +def test_abi_decode_bytearray_clamp(tx_failed, get_contract): + # data has valid encoding, but the length of DynArray[Bytes[96], 3][0] is set to 0x61 + # and thus the decoding should fail on bytestring clamp + code = """ +@external +def run(x: Bytes[2 * 32 + 3 * 32 + 3 * 32 * 4]): + y: Bytes[2 * 32 + 3 * 32 + 3 * 32 * 4] = x + decoded_y1: DynArray[Bytes[32 * 3], 3] = _abi_decode(y, DynArray[Bytes[32 * 3], 3]) + """ + c = get_contract(code) + + payload = ( + 0x20, # DynArray head + 0x03, # DynArray length + 0x20 * 3, # inner array0 head + 0x20 * 4 + 0x20 * 3, # inner array1 head + 0x20 * 8 + 0x20 * 3, # inner array2 head + # invalid length - should revert on bytestring clamp + 0x61, # DynArray[Bytes[96], 3][0] length + *_replicate(0x01, 3), # DynArray[Bytes[96], 3][0] data + 0x60, # DynArray[Bytes[96], 3][1] length + *_replicate(0x01, 3), # DynArray[Bytes[96], 3][1] data + 0x60, # DynArray[Bytes[96], 3][2] length + *_replicate(0x01, 3), # DynArray[Bytes[96], 3][2] data + ) + + data = _abi_payload_from_tuple(payload) + + with tx_failed(): + c.run(data) + + +def test_abi_decode_runtimesz_oob(tx_failed, get_contract, env): + # provide enough data, but set the runtime size to be smaller than the actual size + # so after y: [..] = x, y will have the incorrect size set and only part of the + # original data will be copied. This will cause oob read outside the + # runtime sz (but still within static size of the buffer) + code = """ +@external +def f(x: Bytes[2 * 32 + 3 * 32 + 3 * 32 * 4]): + y: Bytes[2 * 32 + 3 * 32 + 3 * 32 * 4] = x + decoded_y1: DynArray[Bytes[32 * 3], 3] = _abi_decode(y, DynArray[Bytes[32 * 3], 3]) + """ + c = get_contract(code) + + data = method_id("f(bytes)") + + payload = ( + 0x20, # tuple head + # the correct size is 0x220 (2*32+3*32+4*3*32) + # therefore we will decode after the end of runtime size (but still within the buffer) + 0x01E4, # top-level bytes array length + 0x20, # DynArray head + 0x03, # DynArray length + 0x20 * 3, # inner array0 head + 0x20 * 4 + 0x20 * 3, # inner array1 head + 0x20 * 8 + 0x20 * 3, # inner array2 head + 0x60, # DynArray[Bytes[96], 3][0] length + *_replicate(0x01, 3), # DynArray[Bytes[96], 3][0] data + 0x60, # DynArray[Bytes[96], 3][1] length + *_replicate(0x01, 3), # DynArray[Bytes[96], 3][1] data + 0x60, # DynArray[Bytes[96], 3][2] length + *_replicate(0x01, 3), # DynArray[Bytes[96], 3][2] data + ) + + data += _abi_payload_from_tuple(payload) + + with tx_failed(): + env.message_call(c.address, data=data) + + +def test_abi_decode_runtimesz_oob2(tx_failed, get_contract, env): + # same principle as in test_abi_decode_runtimesz_oob + # but adapted for dynarrays + code = """ +@external +def f(x: Bytes[2 * 32 + 3 * 32 + 3 * 32 * 4]): + y: Bytes[2 * 32 + 3 * 32 + 3 * 32 * 4] = x + decoded_y1: DynArray[DynArray[uint256, 3], 3] = _abi_decode( + y, + DynArray[DynArray[uint256, 3], 3] + ) + """ + c = get_contract(code) + + data = method_id("f(bytes)") + + payload = ( + 0x20, # tuple head + 0x01E4, # top-level bytes array length + 0x20, # DynArray head + 0x03, # DynArray length + 0x20 * 3, # inner array0 head + 0x20 * 4 + 0x20 * 3, # inner array1 head + 0x20 * 8 + 0x20 * 3, # inner array2 head + 0x03, # DynArray[..][0] length + *_replicate(0x01, 3), # DynArray[..][0] data + 0x03, # DynArray[..][1] length + *_replicate(0x01, 3), # DynArray[..][1] data + 0x03, # DynArray[..][2] length + *_replicate(0x01, 3), # DynArray[..][2] data + ) + + data += _abi_payload_from_tuple(payload) + + with tx_failed(): + env.message_call(c.address, data=data) + + +def test_abi_decode_head_roundtrip(tx_failed, get_contract, env): + # top-level head in the y2 buffer points to the y1 buffer + # and y1 contains intermediate heads pointing to the inner arrays + # which are in turn in the y2 buffer + # NOTE: the test is memory allocator dependent - we assume that y1 and y2 + # have the 800 & 960 addresses respectively + code = """ +@external +def run(x1: Bytes[4 * 32], x2: Bytes[2 * 32 + 3 * 32 + 3 * 32 * 4]): + y1: Bytes[4*32] = x1 # addr: 800 + y2: Bytes[2 * 32 + 3 * 32 + 3 * 32 * 4] = x2 # addr: 960 + decoded_y1: DynArray[DynArray[uint256, 3], 3] = _abi_decode( + y2, + DynArray[DynArray[uint256, 3], 3] + ) + """ + c = get_contract(code) + + payload = ( + 0x03, # DynArray length + # distance to y2 from y1 is 160 + 160 + 0x20 + 0x20 * 3, # points to DynArray[..][0] length + 160 + 0x20 + 0x20 * 4 + 0x20 * 3, # points to DynArray[..][1] length + 160 + 0x20 + 0x20 * 8 + 0x20 * 3, # points to DynArray[..][2] length + ) + + data1 = _abi_payload_from_tuple(payload) + + payload = ( + # (960 + (2**256 - 160)) % 2**256 == 800, ie will roundtrip to y1 + 2**256 - 160, # points to y1 + 0x03, # DynArray length (not used) + 0x20 * 3, # inner array0 head + 0x20 * 4 + 0x20 * 3, # inner array1 head + 0x20 * 8 + 0x20 * 3, # inner array2 head + 0x03, # DynArray[..][0] length + *_replicate(0x01, 3), # DynArray[..][0] data + 0x03, # DynArray[..][1] length + *_replicate(0x02, 3), # DynArray[..][1] data + 0x03, # DynArray[..][2] length + *_replicate(0x03, 3), # DynArray[..][2] data + ) + + data2 = _abi_payload_from_tuple(payload) + + with tx_failed(): + c.run(data1, data2) + + +def test_abi_decode_merge_head_and_length(get_contract): + # compress head and length into 33B + code = """ +@external +def run(x: Bytes[32 * 2 + 8 * 32]) -> uint256: + y: Bytes[32 * 2 + 8 * 32] = x + decoded_y1: Bytes[256] = _abi_decode(y, Bytes[256]) + return len(decoded_y1) + """ + c = get_contract(code) + + payload = (0x01, (0x00).to_bytes(1, "big"), *_replicate(0x00, 8)) + + data = _abi_payload_from_tuple(payload) + + length = c.run(data) + + assert length == 256 + + +def test_abi_decode_extcall_invalid_head(tx_failed, get_contract): + # the head returned from the extcall is set to invalid value of 480 + code = """ +@external +def bar() -> (uint256, uint256, uint256): + return (480, 0, 0) + +interface A: + def bar() -> String[32]: nonpayable + +@external +def foo(): + x:String[32] = extcall A(self).bar() + """ + c = get_contract(code) + with tx_failed(): + c.foo() + + +def test_abi_decode_extcall_oob(tx_failed, get_contract): + # the head returned from the extcall is 1 byte bigger than expected + # thus we'll take the last 31 0-bytes from tuple[1] and the 1st byte from tuple[2] + # and consider this the length - thus the length is 2**5 + # and thus we'll read 1B over the buffer end (33 + 32 + 32) + code = """ +@external +def bar() -> (uint256, uint256, uint256): + return (33, 0, 2**(5+248)) + +interface A: + def bar() -> String[32]: nonpayable + +@external +def foo(): + x:String[32] = extcall A(self).bar() + """ + c = get_contract(code) + with tx_failed(): + c.foo() + + +def test_abi_decode_extcall_runtimesz_oob(tx_failed, get_contract): + # the runtime size (33) is bigger than the actual payload (32 bytes) + # thus we'll read 1B over the runtime size - but still within the static size of the buffer + code = """ +@external +def bar() -> (uint256, uint256, uint256): + return (32, 33, 0) + +interface A: + def bar() -> String[64]: nonpayable + +@external +def foo(): + x:String[64] = extcall A(self).bar() + """ + c = get_contract(code) + with tx_failed(): + c.foo() + + +def test_abi_decode_extcall_truncate_returndata(get_contract): + # return more data than expected + # the truncated data is still valid + code = """ +@external +def bar() -> (uint256, uint256, uint256, uint256): + return (32, 32, 36, 36) + +interface A: + def bar() -> Bytes[32]: nonpayable + +@external +def foo(): + x:Bytes[32] = extcall A(self).bar() + """ + c = get_contract(code) + c.foo() + + +def test_abi_decode_extcall_truncate_returndata2(tx_failed, get_contract): + # return more data than expected + # after truncation the data is invalid because the length is too big + # wrt to the static size of the buffer + code = """ +@external +def bar() -> (uint256, uint256, uint256, uint256): + return (32, 33, 36, 36) + +interface A: + def bar() -> Bytes[32]: nonpayable + +@external +def foo(): + x:Bytes[32] = extcall A(self).bar() + """ + c = get_contract(code) + with tx_failed(): + c.foo() + + +def test_abi_decode_extcall_return_nodata(tx_failed, get_contract): + code = """ +@external +def bar(): + return + +interface A: + def bar() -> Bytes[32]: nonpayable + +@external +def foo(): + x:Bytes[32] = extcall A(self).bar() + """ + c = get_contract(code) + with tx_failed(): + c.foo() + + +def test_abi_decode_extcall_array_oob(tx_failed, get_contract): + # same as in test_abi_decode_extcall_oob + # DynArray[..][1] head isn't strict and points 1B over + # thus the 1st B of 2**(5+248) is considered as the length (32) + # thus we try to decode 1B over the buffer end + code = """ +@external +def bar() -> (uint256, uint256, uint256, uint256, uint256, uint256, uint256, uint256): + return ( + 32, # DynArray head + 2, # DynArray length + 32 * 2, # DynArray[..][0] head + 32 * 2 + 32 * 2 + 1, # DynArray[..][1] head + 32, # DynArray[..][0] length + 0, # DynArray[..][0] data + 0, # DynArray[..][1] length + 2**(5+248) # DynArray[..][1] length (and data) + ) + +interface A: + def bar() -> DynArray[Bytes[32], 2]: nonpayable + +@external +def run(): + x: DynArray[Bytes[32], 2] = extcall A(self).bar() + """ + c = get_contract(code) + + with tx_failed(): + c.run() + + +def test_abi_decode_extcall_array_oob_with_truncate(tx_failed, get_contract): + # same as in test_abi_decode_extcall_oob but we also return more data than expected + # DynArray[..][1] head isn't strict and points 1B over + # thus the 1st B of 2**(5+248) is considered as the length (32) + # thus we try to decode 1B over the buffer end + code = """ +@external +def bar() -> (uint256, uint256, uint256, uint256, uint256, uint256, uint256, uint256, uint256): + return ( + 32, # DynArray head + 2, # DynArray length + 32 * 2, # DynArray[..][0] head + 32 * 2 + 32 * 2 + 1, # DynArray[..][1] head + 32, # DynArray[..][0] length + 0, # DynArray[..][0] data + 0, # DynArray[..][1] length + 2**(5+248), # DynArray[..][1] length (and data) + 0 # extra data + ) + +interface A: + def bar() -> DynArray[Bytes[32], 2]: nonpayable + +@external +def run(): + x: DynArray[Bytes[32], 2] = extcall A(self).bar() + """ + c = get_contract(code) + + with tx_failed(): + c.run() + + +def test_abi_decode_extcall_zero_len_array(get_contract): + code = """ +@external +def bar() -> (uint256, uint256): + return 32, 0 + +interface A: + def bar() -> DynArray[Bytes[32], 2]: nonpayable + +@external +def run(): + x: DynArray[Bytes[32], 2] = extcall A(self).bar() + """ + c = get_contract(code) + + c.run() + + +def test_abi_decode_extcall_zero_len_array2(get_contract): + code = """ +@external +def bar() -> (uint256, uint256): + return 0, 0 + +interface A: + def bar() -> DynArray[Bytes[32], 2]: nonpayable + +@external +def run() -> uint256: + x: DynArray[Bytes[32], 2] = extcall A(self).bar() + return len(x) + """ + c = get_contract(code) + + length = c.run() + + assert length == 0 diff --git a/tests/functional/builtins/codegen/test_empty.py b/tests/functional/builtins/codegen/test_empty.py index c8e6fc374e..dd6c5c7cc1 100644 --- a/tests/functional/builtins/codegen/test_empty.py +++ b/tests/functional/builtins/codegen/test_empty.py @@ -619,24 +619,17 @@ def test_clear_typecheck(contract, get_contract, assert_compile_failed): assert_compile_failed(lambda: get_contract(contract), TypeMismatch) +_33_bytes = b"\x01" * 33 +_65_bytes = b"\x01" * 65 + + @pytest.mark.parametrize( "a,b,expected", [ - ("empty(Bytes[65])", "b'hello'", (b"hello", b"")), - ("b'hello'", "empty(Bytes[33])", (b"", b"hello")), - ( - "empty(Bytes[65])", - "b'thirty three bytes long baby!!!!!'", - (b"thirty three bytes long baby!!!!!", b""), - ), - ( - "b'thirty three bytes long baby!!!aathirty three bytes long baby!!!a'", - "b'thirty three bytes long baby!!!aa'", - ( - b"thirty three bytes long baby!!!aa", - b"thirty three bytes long baby!!!aathirty three bytes long baby!!!a", - ), - ), + ("empty(Bytes[65])", b"hello", (b"hello", b"")), + (b"hello", "empty(Bytes[33])", (b"", b"hello")), + ("empty(Bytes[65])", _33_bytes, (_33_bytes, b"")), + (_65_bytes, _33_bytes, (_33_bytes, _65_bytes)), ], ) def test_empty_as_func_arg(get_contract, a, b, expected): diff --git a/vyper/builtins/functions.py b/vyper/builtins/functions.py index 280aaea266..d4c83b2bda 100644 --- a/vyper/builtins/functions.py +++ b/vyper/builtins/functions.py @@ -14,6 +14,7 @@ add_ofst, bytes_data_ptr, calculate_type_for_external_return, + check_buffer_overflow_ir, check_external_call, clamp, clamp2, @@ -232,18 +233,6 @@ def build_IR(self, expr, context): ADHOC_SLICE_NODE_MACROS = ["~calldata", "~selfcode", "~extcode"] -# make sure we don't overrun the source buffer, checking for overflow: -# valid inputs satisfy: -# `assert !(start+length > src_len || start+length < start` -def _make_slice_bounds_check(start, length, src_len): - with start.cache_when_complex("start") as (b1, start): - with add_ofst(start, length).cache_when_complex("end") as (b2, end): - arithmetic_overflow = ["lt", end, start] - buffer_oob = ["gt", end, src_len] - ok = ["iszero", ["or", arithmetic_overflow, buffer_oob]] - return b1.resolve(b2.resolve(["assert", ok])) - - def _build_adhoc_slice_node(sub: IRnode, start: IRnode, length: IRnode, context: Context) -> IRnode: assert length.is_literal, "typechecker failed" assert isinstance(length.value, int) # mypy hint @@ -257,7 +246,7 @@ def _build_adhoc_slice_node(sub: IRnode, start: IRnode, length: IRnode, context: if sub.value == "~calldata": node = [ "seq", - _make_slice_bounds_check(start, length, "calldatasize"), + check_buffer_overflow_ir(start, length, "calldatasize"), ["mstore", buf, length], ["calldatacopy", add_ofst(buf, 32), start, length], buf, @@ -267,7 +256,7 @@ def _build_adhoc_slice_node(sub: IRnode, start: IRnode, length: IRnode, context: elif sub.value == "~selfcode": node = [ "seq", - _make_slice_bounds_check(start, length, "codesize"), + check_buffer_overflow_ir(start, length, "codesize"), ["mstore", buf, length], ["codecopy", add_ofst(buf, 32), start, length], buf, @@ -282,7 +271,7 @@ def _build_adhoc_slice_node(sub: IRnode, start: IRnode, length: IRnode, context: sub.args[0], [ "seq", - _make_slice_bounds_check(start, length, ["extcodesize", "_extcode_address"]), + check_buffer_overflow_ir(start, length, ["extcodesize", "_extcode_address"]), ["mstore", buf, length], ["extcodecopy", "_extcode_address", add_ofst(buf, 32), start, length], buf, @@ -456,7 +445,7 @@ def build_IR(self, expr, args, kwargs, context): ret = [ "seq", - _make_slice_bounds_check(start, length, src_len), + check_buffer_overflow_ir(start, length, src_len), do_copy, ["mstore", dst, length], # set length dst, # return pointer to dst @@ -2539,7 +2528,11 @@ def build_IR(self, expr, args, kwargs, context): # sanity check buffer size for wrapped output type will not buffer overflow assert wrapped_typ.memory_bytes_required == output_typ.memory_bytes_required - ret.append(make_setter(output_buf, to_decode)) + + # pass a buffer bound to make_setter so appropriate oob + # validation is performed + buf_bound = add_ofst(data_ptr, data_len) + ret.append(make_setter(output_buf, to_decode, hi=buf_bound)) ret.append(output_buf) # finalize. set the type and location for the return buffer. diff --git a/vyper/codegen/core.py b/vyper/codegen/core.py index 7c932994d7..29831909c1 100644 --- a/vyper/codegen/core.py +++ b/vyper/codegen/core.py @@ -194,7 +194,7 @@ def dynarray_data_ptr(ptr): return add_ofst(ptr, ptr.location.word_scale) -def _dynarray_make_setter(dst, src): +def _dynarray_make_setter(dst, src, hi=None): assert isinstance(src.typ, DArrayT) assert isinstance(dst.typ, DArrayT) @@ -208,6 +208,9 @@ def _dynarray_make_setter(dst, src): # before we clobber the length word. if src.value == "multi": + # validation is only performed on unsafe data, but we are dealing with + # a literal here. + assert hi is None ret = ["seq"] # handle literals @@ -258,6 +261,7 @@ def _dynarray_make_setter(dst, src): loop_body = make_setter( get_element_ptr(dst, i, array_bounds_check=False), get_element_ptr(src, i, array_bounds_check=False), + hi=hi, ) loop_body.annotation = f"{dst}[i] = {src}[i]" @@ -449,7 +453,7 @@ def _mul(x, y): # Resolve pointer locations for ABI-encoded data -def _getelemptr_abi_helper(parent, member_t, ofst, clamp=True): +def _getelemptr_abi_helper(parent, member_t, ofst): member_abi_t = member_t.abi_type # ABI encoding has length word and then pretends length is not there @@ -462,9 +466,10 @@ def _getelemptr_abi_helper(parent, member_t, ofst, clamp=True): if member_abi_t.is_dynamic(): # double dereference, according to ABI spec - # TODO optimize special case: first dynamic item - # offset is statically known. ofst_ir = add_ofst(parent, unwrap_location(ofst_ir)) + if _dirty_read_risk(ofst_ir): + # check no arithmetic overflow + ofst_ir = ["seq", ["assert", ["ge", ofst_ir, parent]], ofst_ir] return IRnode.from_list( ofst_ir, @@ -476,7 +481,7 @@ def _getelemptr_abi_helper(parent, member_t, ofst, clamp=True): # TODO simplify this code, especially the ABI decoding -def _get_element_ptr_tuplelike(parent, key): +def _get_element_ptr_tuplelike(parent, key, hi=None): typ = parent.typ assert is_tuple_like(typ) @@ -487,7 +492,7 @@ def _get_element_ptr_tuplelike(parent, key): index = attrs.index(key) annotation = key else: - # TupleT + assert isinstance(typ, TupleT) assert isinstance(key, int) subtype = typ.member_types[key] attrs = list(typ.tuple_keys()) @@ -872,10 +877,41 @@ def needs_clamp(t, encoding): raise CompilerPanic("unreachable") # pragma: nocover +# when abi encoded data is user provided and lives in memory, +# we risk either reading oob of the buffer or oob of the payload data. +# in these cases, we need additional validation. +def _dirty_read_risk(ir_node): + return ir_node.encoding == Encoding.ABI and ir_node.location == MEMORY + + +# child elements which have dynamic length, and could overflow the buffer +# even if the start of the item is in-bounds. +def _abi_payload_size(ir_node): + SCALE = ir_node.location.word_scale + assert SCALE == 32 # we must be in some byte-addressable region, like memory + + OFFSET = DYNAMIC_ARRAY_OVERHEAD * SCALE + + if isinstance(ir_node.typ, DArrayT): + return ["add", OFFSET, ["mul", get_dyn_array_count(ir_node), SCALE]] + + if isinstance(ir_node.typ, _BytestringT): + return ["add", OFFSET, get_bytearray_length(ir_node)] + + raise CompilerPanic("unreachable") # pragma: nocover + + # Create an x=y statement, where the types may be compound -def make_setter(left, right): +def make_setter(left, right, hi=None): check_assign(left, right) + # we need bounds checks when decoding from memory, otherwise we can + # get oob reads. + # + # the caller is responsible for calculating the bound; + # sanity check that there is a bound if there is dirty read risk + assert (hi is not None) == _dirty_read_risk(right) + # For types which occupy just one word we can use single load/store if left.typ._is_prim_word: enc = right.encoding # unwrap_location butchers encoding @@ -892,7 +928,7 @@ def make_setter(left, right): if needs_clamp(right.typ, right.encoding): with right.cache_when_complex("bs_ptr") as (b, right): copier = make_byte_array_copier(left, right) - ret = b.resolve(["seq", clamp_bytestring(right), copier]) + ret = b.resolve(["seq", clamp_bytestring(right, hi=hi), copier]) else: ret = make_byte_array_copier(left, right) @@ -907,8 +943,8 @@ def make_setter(left, right): # TODO rethink/streamline the clamp_basetype logic if needs_clamp(right.typ, right.encoding): with right.cache_when_complex("arr_ptr") as (b, right): - copier = _dynarray_make_setter(left, right) - ret = b.resolve(["seq", clamp_dyn_array(right), copier]) + copier = _dynarray_make_setter(left, right, hi=hi) + ret = b.resolve(["seq", clamp_dyn_array(right, hi=hi), copier]) else: ret = _dynarray_make_setter(left, right) @@ -917,7 +953,7 @@ def make_setter(left, right): # Complex Types assert isinstance(left.typ, (SArrayT, TupleT, StructT)) - return _complex_make_setter(left, right) + return _complex_make_setter(left, right, hi=hi) # locations with no dedicated copy opcode @@ -929,7 +965,7 @@ def copy_opcode_available(left, right): return left.location == MEMORY and right.location.has_copy_opcode -def _complex_make_setter(left, right): +def _complex_make_setter(left, right, hi=None): if right.value == "~empty" and left.location == MEMORY: # optimized memzero return mzero(left, left.typ.memory_bytes_required) @@ -1013,13 +1049,14 @@ def _complex_make_setter(left, right): for k in keys: l_i = get_element_ptr(left, k, array_bounds_check=False) r_i = get_element_ptr(right, k, array_bounds_check=False) - ret.append(make_setter(l_i, r_i)) + ret.append(make_setter(l_i, r_i, hi=hi)) return b1.resolve(b2.resolve(IRnode.from_list(ret))) def ensure_in_memory(ir_var, context): - """Ensure a variable is in memory. This is useful for functions + """ + Ensure a variable is in memory. This is useful for functions which expect to operate on memory variables. """ if ir_var.location == MEMORY: @@ -1085,19 +1122,47 @@ def sar(bits, x): return ["sar", bits, x] -def clamp_bytestring(ir_node): +def clamp_bytestring(ir_node, hi=None): t = ir_node.typ if not isinstance(t, _BytestringT): # pragma: nocover raise CompilerPanic(f"{t} passed to clamp_bytestring") - ret = ["assert", ["le", get_bytearray_length(ir_node), t.maxlen]] - return IRnode.from_list(ret, error_msg=f"{ir_node.typ} bounds check") + # check if byte array length is within type max + with get_bytearray_length(ir_node).cache_when_complex("length") as (b1, length): + len_check = ["assert", ["le", length, t.maxlen]] + + assert (hi is not None) == _dirty_read_risk(ir_node) + if hi is not None: + assert t.maxlen < 2**64 # sanity check -def clamp_dyn_array(ir_node): + # note: this add does not risk arithmetic overflow because + # length is bounded by maxlen. + item_end = add_ofst(ir_node, _abi_payload_size(ir_node)) + + len_check = ["seq", ["assert", ["le", item_end, hi]], len_check] + + return IRnode.from_list(b1.resolve(len_check), error_msg=f"{ir_node.typ} bounds check") + + +def clamp_dyn_array(ir_node, hi=None): t = ir_node.typ assert isinstance(t, DArrayT) - ret = ["assert", ["le", get_dyn_array_count(ir_node), t.count]] - return IRnode.from_list(ret, error_msg=f"{ir_node.typ} bounds check") + + len_check = ["assert", ["le", get_dyn_array_count(ir_node), t.count]] + + assert (hi is not None) == _dirty_read_risk(ir_node) + + # if the subtype is dynamic, the check will be performed in the recursion + if hi is not None and not t.abi_type.subtyp.is_dynamic(): + assert t.count < 2**64 # sanity check + + # note: this add does not risk arithmetic overflow because + # length is bounded by count * elemsize. + item_end = add_ofst(ir_node, _abi_payload_size(ir_node)) + + len_check = ["seq", ["assert", ["le", item_end, hi]], len_check] + + return IRnode.from_list(len_check, error_msg=f"{ir_node.typ} bounds check") # clampers for basetype @@ -1211,3 +1276,15 @@ def clamp2(lo, arg, hi, signed): LE = "sle" if signed else "le" ret = ["seq", ["assert", ["and", [GE, arg, lo], [LE, arg, hi]]], arg] return IRnode.from_list(b1.resolve(ret), typ=arg.typ) + + +# make sure we don't overrun the source buffer, checking for overflow: +# valid inputs satisfy: +# `assert !(start+length > src_len || start+length < start)` +def check_buffer_overflow_ir(start, length, src_len): + with start.cache_when_complex("start") as (b1, start): + with add_ofst(start, length).cache_when_complex("end") as (b2, end): + arithmetic_overflow = ["lt", end, start] + buffer_oob = ["gt", end, src_len] + ok = ["iszero", ["or", arithmetic_overflow, buffer_oob]] + return b1.resolve(b2.resolve(["assert", ok])) diff --git a/vyper/codegen/external_call.py b/vyper/codegen/external_call.py index 607872b052..f3cd4e7a44 100644 --- a/vyper/codegen/external_call.py +++ b/vyper/codegen/external_call.py @@ -119,7 +119,14 @@ def _unpack_returndata(buf, fn_type, call_kwargs, contract_address, context, exp return_buf = context.new_internal_variable(wrapped_return_t) # note: make_setter does ABI decoding and clamps - unpacker.append(make_setter(return_buf, buf)) + + payload_bound = IRnode.from_list( + ["select", ["lt", ret_len, "returndatasize"], ret_len, "returndatasize"] + ) + with payload_bound.cache_when_complex("payload_bound") as (b1, payload_bound): + unpacker.append( + b1.resolve(make_setter(return_buf, buf, hi=add_ofst(buf, payload_bound))) + ) else: return_buf = buf From 81b48b7a67487793b08e42dcd10f15f4bfc87149 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 29 May 2024 09:53:37 -0700 Subject: [PATCH 09/53] fix[test]: fix call graph stability fuzzer (#4064) the call graph stability fuzzer could generate an internal function named `foo()`, which is the same name as the entry function in the test contract. --- tests/functional/codegen/test_call_graph_stability.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/functional/codegen/test_call_graph_stability.py b/tests/functional/codegen/test_call_graph_stability.py index e14843de40..f64ae4b8ac 100644 --- a/tests/functional/codegen/test_call_graph_stability.py +++ b/tests/functional/codegen/test_call_graph_stability.py @@ -11,6 +11,11 @@ def _valid_identifier(attr): + if attr == "foo": + # the entry point to the test is named foo(), + # skip it to avoid collision + return False + return attr not in RESERVED_KEYWORDS From 1b335c5e224a94ccdaa0cc7f685da7ddf6991cf4 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 29 May 2024 22:29:38 +0300 Subject: [PATCH 10/53] fix[venom]: remove liveness requests (#4058) This commit sanitizes the requests/invalidations of liveness analysis in the various passes. Additionally it renames the property `in_vars` to `liveness_in_vars` to make it clear that this property is liveness dependent --- vyper/venom/analysis/dfg.py | 4 ++++ vyper/venom/basicblock.py | 2 +- vyper/venom/passes/make_ssa.py | 6 +++++- vyper/venom/passes/mem2var.py | 1 - vyper/venom/passes/remove_unused_variables.py | 3 +++ 5 files changed, 13 insertions(+), 3 deletions(-) diff --git a/vyper/venom/analysis/dfg.py b/vyper/venom/analysis/dfg.py index ef16e1b357..328ed47c72 100644 --- a/vyper/venom/analysis/dfg.py +++ b/vyper/venom/analysis/dfg.py @@ -1,6 +1,7 @@ from typing import Optional from vyper.venom.analysis.analysis import IRAnalysesCache, IRAnalysis +from vyper.venom.analysis.liveness import LivenessAnalysis from vyper.venom.basicblock import IRInstruction, IRVariable from vyper.venom.function import IRFunction @@ -67,5 +68,8 @@ def as_graph(self) -> str: lines.append("}") return "\n".join(lines) + def invalidate(self): + self.analyses_cache.invalidate_analysis(LivenessAnalysis) + def __repr__(self) -> str: return self.as_graph() diff --git a/vyper/venom/basicblock.py b/vyper/venom/basicblock.py index c979f33fbb..19e8801663 100644 --- a/vyper/venom/basicblock.py +++ b/vyper/venom/basicblock.py @@ -509,7 +509,7 @@ def is_terminal(self) -> bool: return len(self.cfg_out) == 0 @property - def in_vars(self) -> OrderedSet[IRVariable]: + def liveness_in_vars(self) -> OrderedSet[IRVariable]: for inst in self.instructions: if inst.opcode != "phi": return inst.liveness diff --git a/vyper/venom/passes/make_ssa.py b/vyper/venom/passes/make_ssa.py index 0ea3a20884..a803514d8b 100644 --- a/vyper/venom/passes/make_ssa.py +++ b/vyper/venom/passes/make_ssa.py @@ -19,6 +19,8 @@ def run_pass(self): self.analyses_cache.request_analysis(CFGAnalysis) self.dom = self.analyses_cache.request_analysis(DominatorTreeAnalysis) + + # Request liveness analysis so the `liveness_in_vars` field is valid self.analyses_cache.request_analysis(LivenessAnalysis) self._add_phi_nodes() @@ -28,6 +30,8 @@ def run_pass(self): self._rename_vars(fn.entry) self._remove_degenerate_phis(fn.entry) + self.analyses_cache.invalidate_analysis(LivenessAnalysis) + def _add_phi_nodes(self): """ Add phi nodes to the function. @@ -54,7 +58,7 @@ def _add_phi_nodes(self): defs.append(dom) def _place_phi(self, var: IRVariable, basic_block: IRBasicBlock): - if var not in basic_block.in_vars: + if var not in basic_block.liveness_in_vars: return args: list[IROperand] = [] diff --git a/vyper/venom/passes/mem2var.py b/vyper/venom/passes/mem2var.py index eb9a6d52c5..f4a37f5abb 100644 --- a/vyper/venom/passes/mem2var.py +++ b/vyper/venom/passes/mem2var.py @@ -19,7 +19,6 @@ class Mem2Var(IRPass): def run_pass(self): self.analyses_cache.request_analysis(CFGAnalysis) dfg = self.analyses_cache.request_analysis(DFGAnalysis) - self.analyses_cache.request_analysis(LivenessAnalysis) self.var_name_count = 0 for var, inst in dfg.outputs.items(): diff --git a/vyper/venom/passes/remove_unused_variables.py b/vyper/venom/passes/remove_unused_variables.py index 53b0505024..1ce5c141d9 100644 --- a/vyper/venom/passes/remove_unused_variables.py +++ b/vyper/venom/passes/remove_unused_variables.py @@ -1,5 +1,6 @@ from vyper.utils import OrderedSet from vyper.venom.analysis.dfg import DFGAnalysis +from vyper.venom.analysis.liveness import LivenessAnalysis from vyper.venom.basicblock import IRInstruction from vyper.venom.passes.base_pass import IRPass @@ -25,6 +26,8 @@ def run_pass(self): inst = work_list.pop() self._process_instruction(inst) + self.analyses_cache.invalidate_analysis(LivenessAnalysis) + def _process_instruction(self, inst): if inst.output is None: return From dcec25789bc70da57cc15d55f0bdf80f669427a6 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 29 May 2024 23:30:02 +0300 Subject: [PATCH 11/53] feat[venom]: add algebraic optimization pass (#4054) Add a new venom pass to do algebraic optimizations. Currently optimizes `iszero` chains. --------- Co-authored-by: Charles Cooper --- .../venom/test_algebraic_optimizer.py | 129 ++++++++++++++++++ vyper/venom/__init__.py | 2 + vyper/venom/passes/algebraic_optimization.py | 67 +++++++++ 3 files changed, 198 insertions(+) create mode 100644 tests/unit/compiler/venom/test_algebraic_optimizer.py create mode 100644 vyper/venom/passes/algebraic_optimization.py diff --git a/tests/unit/compiler/venom/test_algebraic_optimizer.py b/tests/unit/compiler/venom/test_algebraic_optimizer.py new file mode 100644 index 0000000000..e0368d4197 --- /dev/null +++ b/tests/unit/compiler/venom/test_algebraic_optimizer.py @@ -0,0 +1,129 @@ +import pytest + +from vyper.venom.analysis.analysis import IRAnalysesCache +from vyper.venom.basicblock import IRBasicBlock, IRLabel +from vyper.venom.context import IRContext +from vyper.venom.passes.algebraic_optimization import AlgebraicOptimizationPass +from vyper.venom.passes.make_ssa import MakeSSA +from vyper.venom.passes.remove_unused_variables import RemoveUnusedVariablesPass + + +@pytest.mark.parametrize("iszero_count", range(5)) +def test_simple_jump_case(iszero_count): + ctx = IRContext() + fn = ctx.create_function("_global") + + bb = fn.get_basic_block() + + br1 = IRBasicBlock(IRLabel("then"), fn) + fn.append_basic_block(br1) + br2 = IRBasicBlock(IRLabel("else"), fn) + fn.append_basic_block(br2) + + p1 = bb.append_instruction("param") + op1 = bb.append_instruction("store", p1) + op2 = bb.append_instruction("store", 64) + op3 = bb.append_instruction("add", op1, op2) + jnz_input = op3 + + for _ in range(iszero_count): + jnz_input = bb.append_instruction("iszero", jnz_input) + + bb.append_instruction("jnz", jnz_input, br1.label, br2.label) + + br1.append_instruction("add", op3, 10) + br1.append_instruction("stop") + br2.append_instruction("add", op3, p1) + br2.append_instruction("stop") + + ac = IRAnalysesCache(fn) + MakeSSA(ac, fn).run_pass() + AlgebraicOptimizationPass(ac, fn).run_pass() + RemoveUnusedVariablesPass(ac, fn).run_pass() + + iszeros = [inst for inst in bb.instructions if inst.opcode == "iszero"] + removed_iszeros = iszero_count - len(iszeros) + + assert removed_iszeros % 2 == 0 + assert len(iszeros) == iszero_count % 2 + + +@pytest.mark.parametrize("iszero_count", range(1, 5)) +def test_simple_bool_cast_case(iszero_count): + ctx = IRContext() + fn = ctx.create_function("_global") + + bb = fn.get_basic_block() + + br1 = IRBasicBlock(IRLabel("then"), fn) + fn.append_basic_block(br1) + + p1 = bb.append_instruction("param") + op1 = bb.append_instruction("store", p1) + op2 = bb.append_instruction("store", 64) + op3 = bb.append_instruction("add", op1, op2) + jnz_input = op3 + + for _ in range(iszero_count): + jnz_input = bb.append_instruction("iszero", jnz_input) + + bb.append_instruction("mstore", jnz_input, p1) + bb.append_instruction("jmp", br1.label) + + br1.append_instruction("add", op3, 10) + br1.append_instruction("stop") + + ac = IRAnalysesCache(fn) + MakeSSA(ac, fn).run_pass() + AlgebraicOptimizationPass(ac, fn).run_pass() + RemoveUnusedVariablesPass(ac, fn).run_pass() + + iszeros = [inst for inst in bb.instructions if inst.opcode == "iszero"] + removed_iszeros = iszero_count - len(iszeros) + + assert removed_iszeros % 2 == 0 + assert len(iszeros) in [1, 2] + assert len(iszeros) % 2 == iszero_count % 2 + + +@pytest.mark.parametrize("interleave_point", range(1, 5)) +def test_interleaved_case(interleave_point): + iszeros_after_interleave_point = interleave_point // 2 + ctx = IRContext() + fn = ctx.create_function("_global") + + bb = fn.get_basic_block() + + br1 = IRBasicBlock(IRLabel("then"), fn) + fn.append_basic_block(br1) + br2 = IRBasicBlock(IRLabel("else"), fn) + fn.append_basic_block(br2) + + p1 = bb.append_instruction("param") + op1 = bb.append_instruction("store", p1) + op2 = bb.append_instruction("store", 64) + op3 = bb.append_instruction("add", op1, op2) + op3_inv = bb.append_instruction("iszero", op3) + jnz_input = op3_inv + for _ in range(interleave_point): + jnz_input = bb.append_instruction("iszero", jnz_input) + bb.append_instruction("mstore", jnz_input, p1) + for _ in range(iszeros_after_interleave_point): + jnz_input = bb.append_instruction("iszero", jnz_input) + bb.append_instruction("jnz", jnz_input, br1.label, br2.label) + + br1.append_instruction("add", op3, 10) + br1.append_instruction("stop") + br2.append_instruction("add", op3, p1) + br2.append_instruction("stop") + + ac = IRAnalysesCache(fn) + MakeSSA(ac, fn).run_pass() + AlgebraicOptimizationPass(ac, fn).run_pass() + RemoveUnusedVariablesPass(ac, fn).run_pass() + + assert bb.instructions[-1].opcode == "jnz" + if (interleave_point + iszeros_after_interleave_point) % 2 == 0: + assert bb.instructions[-1].operands[0] == op3_inv + else: + assert bb.instructions[-1].operands[0] == op3 diff --git a/vyper/venom/__init__.py b/vyper/venom/__init__.py index 82901126bc..cd981cd462 100644 --- a/vyper/venom/__init__.py +++ b/vyper/venom/__init__.py @@ -9,6 +9,7 @@ from vyper.venom.context import IRContext from vyper.venom.function import IRFunction from vyper.venom.ir_node_to_venom import ir_node_to_venom +from vyper.venom.passes.algebraic_optimization import AlgebraicOptimizationPass from vyper.venom.passes.branch_optimization import BranchOptimizationPass from vyper.venom.passes.dft import DFTPass from vyper.venom.passes.make_ssa import MakeSSA @@ -50,6 +51,7 @@ def _run_passes(fn: IRFunction, optimize: OptimizationLevel) -> None: SCCP(ac, fn).run_pass() StoreElimination(ac, fn).run_pass() SimplifyCFGPass(ac, fn).run_pass() + AlgebraicOptimizationPass(ac, fn).run_pass() BranchOptimizationPass(ac, fn).run_pass() RemoveUnusedVariablesPass(ac, fn).run_pass() DFTPass(ac, fn).run_pass() diff --git a/vyper/venom/passes/algebraic_optimization.py b/vyper/venom/passes/algebraic_optimization.py new file mode 100644 index 0000000000..4094219a6d --- /dev/null +++ b/vyper/venom/passes/algebraic_optimization.py @@ -0,0 +1,67 @@ +from vyper.venom.analysis.dfg import DFGAnalysis +from vyper.venom.analysis.liveness import LivenessAnalysis +from vyper.venom.basicblock import IRInstruction, IROperand +from vyper.venom.passes.base_pass import IRPass + + +class AlgebraicOptimizationPass(IRPass): + """ + This pass reduces algebraic evaluatable expressions. + + It currently optimizes: + * iszero chains + """ + + def _optimize_iszero_chains(self) -> None: + fn = self.function + for bb in fn.get_basic_blocks(): + for inst in bb.instructions: + if inst.opcode != "iszero": + continue + + iszero_chain = self._get_iszero_chain(inst.operands[0]) + iszero_count = len(iszero_chain) + if iszero_count == 0: + continue + + for use_inst in self.dfg.get_uses(inst.output): + opcode = use_inst.opcode + + if opcode == "iszero": + # We keep iszero instuctions as is + continue + if opcode in ("jnz", "assert"): + # instructions that accept a truthy value as input: + # we can remove up to all the iszero instructions + keep_count = 1 - iszero_count % 2 + else: + # all other instructions: + # we need to keep at least one or two iszero instructions + keep_count = 1 + iszero_count % 2 + + if keep_count >= iszero_count: + continue + + out_var = iszero_chain[keep_count].operands[0] + use_inst.replace_operands({inst.output: out_var}) + + def _get_iszero_chain(self, op: IROperand) -> list[IRInstruction]: + chain: list[IRInstruction] = [] + + while True: + inst = self.dfg.get_producing_instruction(op) + if inst is None or inst.opcode != "iszero": + break + op = inst.operands[0] + chain.append(inst) + + chain.reverse() + return chain + + def run_pass(self): + self.dfg = self.analyses_cache.request_analysis(DFGAnalysis) + + self._optimize_iszero_chains() + + self.analyses_cache.invalidate_analysis(DFGAnalysis) + self.analyses_cache.invalidate_analysis(LivenessAnalysis) From d9f9fdadd81a148cbc68f02dbbbcdc0c92fad652 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 30 May 2024 05:22:47 -0700 Subject: [PATCH 12/53] fix[ir]: fix a latent bug in `sha3_64` codegen (#4063) this commit fixes a latent bug in `sha3_64` asm generation, where the second argument to `sha3_64` is generated with the wrong stack height note the bug cannot currently be triggered from vyper source code (hence, it is latent), because there is nowhere in IR generation that the second item is accessed as a witharg. patches GHSA-6845-xw22-ffxv --- vyper/ir/compile_ir.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 472d28f4fb..4c68aa2c8f 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -580,7 +580,9 @@ def _height_of(witharg): # SHA3 a 64 byte value elif code.value == "sha3_64": o = _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height) - o.extend(_compile_to_assembly(code.args[1], withargs, existing_labels, break_dest, height)) + o.extend( + _compile_to_assembly(code.args[1], withargs, existing_labels, break_dest, height + 1) + ) o.extend( [ *PUSH(MemoryPositions.FREE_VAR_SPACE2), From 9745d44daacd6228ab56f8751940fc3df1b039fb Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 30 May 2024 05:30:37 -0700 Subject: [PATCH 13/53] fix[codegen]: fix `make_setter` overlap in `dynarray_append` (#4059) `make_setter` can potentially run into overlap when called from `dynarray_append`; this commit copies to a temporary intermediate buffer (as is done in `AnnAssign`) before copying to the destination if the condition is detected. this is a variant of the bugs fixed in ad9c10b0b98e2d and 1c8349e867b2b. this commit also adds a util function to detect overlap, and adds an assertion directly in `make_setter` so that future variants panic instead of generating bad code. --- .../codegen/types/test_dynamic_array.py | 22 +++++++++++ vyper/codegen/context.py | 1 - vyper/codegen/core.py | 23 +++++++++++ vyper/codegen/expr.py | 38 ++++++++++++++----- vyper/codegen/stmt.py | 6 +-- 5 files changed, 76 insertions(+), 14 deletions(-) diff --git a/tests/functional/codegen/types/test_dynamic_array.py b/tests/functional/codegen/types/test_dynamic_array.py index e475b79be1..5f26e05839 100644 --- a/tests/functional/codegen/types/test_dynamic_array.py +++ b/tests/functional/codegen/types/test_dynamic_array.py @@ -1865,3 +1865,25 @@ def test_dynarray_length_no_clobber(get_contract, tx_failed, code): c = get_contract(code) with tx_failed(): c.should_revert() + + +def test_dynarray_make_setter_overlap(get_contract): + # GH 4056, variant of GH 3503 + code = """ +a: DynArray[DynArray[uint256, 10], 10] + +@external +def foo() -> DynArray[uint256, 10]: + self.a.append([1, 2, self.boo(), 4]) + return self.a[0] # returns [11, 12, 3, 4] + +@internal +def boo() -> uint256: + self.a.append([11, 12, 13, 14, 15, 16]) + self.a.pop() + # it should now be impossible to read any of [11, 12, 13, 14, 15, 16] + return 3 + """ + + c = get_contract(code) + assert c.foo() == [1, 2, 3, 4] diff --git a/vyper/codegen/context.py b/vyper/codegen/context.py index 42488f06da..f49914ac78 100644 --- a/vyper/codegen/context.py +++ b/vyper/codegen/context.py @@ -67,7 +67,6 @@ def as_ir_node(self): mutable=self.mutable, location=self.location, ) - ret._referenced_variables = {self} if self.alloca is not None: ret.passthrough_metadata["alloca"] = self.alloca return ret diff --git a/vyper/codegen/core.py b/vyper/codegen/core.py index 29831909c1..3c81778660 100644 --- a/vyper/codegen/core.py +++ b/vyper/codegen/core.py @@ -901,10 +901,33 @@ def _abi_payload_size(ir_node): raise CompilerPanic("unreachable") # pragma: nocover +def potential_overlap(left, right): + """ + Return true if make_setter(left, right) could potentially trample + src or dst during evaluation. + """ + if left.typ._is_prim_word and right.typ._is_prim_word: + return False + + if len(left.referenced_variables & right.referenced_variables) > 0: + return True + + if len(left.referenced_variables) > 0 and right.contains_risky_call: + return True + + if left.contains_risky_call and len(right.referenced_variables) > 0: + return True + + return False + + # Create an x=y statement, where the types may be compound def make_setter(left, right, hi=None): check_assign(left, right) + if potential_overlap(left, right): + raise CompilerPanic("overlap between src and dst!") + # we need bounds checks when decoding from memory, otherwise we can # get oob reads. # diff --git a/vyper/codegen/expr.py b/vyper/codegen/expr.py index 5ed0107c79..65df5a0930 100644 --- a/vyper/codegen/expr.py +++ b/vyper/codegen/expr.py @@ -18,7 +18,9 @@ is_flag_type, is_numeric_type, is_tuple_like, + make_setter, pop_dyn_array, + potential_overlap, sar, shl, shr, @@ -165,17 +167,24 @@ def parse_NameConstant(self): # Variable names def parse_Name(self): - if self.expr.id == "self": + varname = self.expr.id + + if varname == "self": return IRnode.from_list(["address"], typ=AddressT()) - elif self.expr.id in self.context.vars: - return self.context.lookup_var(self.expr.id).as_ir_node() - elif (varinfo := self.expr._expr_info.var_info) is not None: - if varinfo.is_constant: - return Expr.parse_value_expr(varinfo.decl_node.value, self.context) + varinfo = self.expr._expr_info.var_info + assert varinfo is not None - assert varinfo.is_immutable, "not an immutable!" + # local variable + if varname in self.context.vars: + ret = self.context.lookup_var(varname).as_ir_node() + ret._referenced_variables = {varinfo} + return ret + if varinfo.is_constant: + return Expr.parse_value_expr(varinfo.decl_node.value, self.context) + + if varinfo.is_immutable: mutable = self.context.is_ctor_context location = data_location_to_address_space( @@ -186,12 +195,14 @@ def parse_Name(self): varinfo.position.position, typ=varinfo.typ, location=location, - annotation=self.expr.id, + annotation=varname, mutable=mutable, ) ret._referenced_variables = {varinfo} return ret + raise CompilerPanic("unreachable") # pragma: nocover + # x.y or x[5] def parse_Attribute(self): typ = self.expr._metadata["type"] @@ -691,7 +702,16 @@ def parse_Call(self): check_assign( dummy_node_for_type(darray.typ.value_type), dummy_node_for_type(arg.typ) ) - return append_dyn_array(darray, arg) + + ret = ["seq"] + if potential_overlap(darray, arg): + tmp = self.context.new_internal_variable(arg.typ) + tmp = IRnode.from_list(tmp, typ=arg.typ, location=MEMORY) + ret.append(make_setter(tmp, arg)) + arg = tmp + + ret.append(append_dyn_array(darray, arg)) + return IRnode.from_list(ret) assert isinstance(func_t, ContractFunctionT) assert func_t.is_internal or func_t.is_constructor diff --git a/vyper/codegen/stmt.py b/vyper/codegen/stmt.py index 947de2dcde..830f2f923d 100644 --- a/vyper/codegen/stmt.py +++ b/vyper/codegen/stmt.py @@ -13,6 +13,7 @@ get_element_ptr, get_type_for_exact_size, make_setter, + potential_overlap, wrap_value_for_external_return, writeable, ) @@ -70,10 +71,7 @@ def parse_Assign(self): dst = self._get_target(self.stmt.target) ret = ["seq"] - overlap = len(dst.referenced_variables & src.referenced_variables) > 0 - overlap |= len(dst.referenced_variables) > 0 and src.contains_risky_call - overlap |= dst.contains_risky_call and len(src.referenced_variables) > 0 - if overlap and not dst.typ._is_prim_word: + if potential_overlap(dst, src): # there is overlap between the lhs and rhs, and the type is # complex - i.e., it spans multiple words. for safety, we # copy to a temporary buffer before copying to the destination. From 33719560a5d13089fcd527681d170556824bf8d1 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 30 May 2024 13:17:21 -0700 Subject: [PATCH 14/53] fix[venom]: fix list of volatile instructions (#4065) `create` and `create2` are volatile instructions that were missing. the other ones were already handled by the case for `output is None` in `removed_unused_variables`. misc: - rename `volatile` to `is_volatile` for uniformity - use `is_bb_terminator` instead of `in BB_TERMINATORS` --------- Co-authored-by: Harry Kalogirou --- vyper/venom/analysis/cfg.py | 6 ++--- vyper/venom/basicblock.py | 24 ++++++++++++++++--- vyper/venom/passes/dft.py | 6 ++--- vyper/venom/passes/remove_unused_variables.py | 2 +- vyper/venom/venom_to_assembly.py | 2 +- 5 files changed, 28 insertions(+), 12 deletions(-) diff --git a/vyper/venom/analysis/cfg.py b/vyper/venom/analysis/cfg.py index 6bd7e538e9..bd2ae34b68 100644 --- a/vyper/venom/analysis/cfg.py +++ b/vyper/venom/analysis/cfg.py @@ -1,6 +1,6 @@ from vyper.utils import OrderedSet from vyper.venom.analysis.analysis import IRAnalysis -from vyper.venom.basicblock import BB_TERMINATORS, CFG_ALTERING_INSTRUCTIONS +from vyper.venom.basicblock import CFG_ALTERING_INSTRUCTIONS class CFGAnalysis(IRAnalysis): @@ -18,9 +18,7 @@ def analyze(self) -> None: for bb in fn.get_basic_blocks(): assert len(bb.instructions) > 0, "Basic block should not be empty" last_inst = bb.instructions[-1] - assert ( - last_inst.opcode in BB_TERMINATORS - ), f"Last instruction should be a terminator {bb}" + assert last_inst.is_bb_terminator, f"Last instruction should be a terminator {bb}" for inst in bb.instructions: if inst.opcode in CFG_ALTERING_INSTRUCTIONS: diff --git a/vyper/venom/basicblock.py b/vyper/venom/basicblock.py index 19e8801663..d6fb9560cd 100644 --- a/vyper/venom/basicblock.py +++ b/vyper/venom/basicblock.py @@ -12,6 +12,8 @@ "call", "staticcall", "delegatecall", + "create", + "create2", "invoke", "sload", "sstore", @@ -34,6 +36,15 @@ "ret", "jmp", "jnz", + "djmp", + "log", + "selfdestruct", + "invalid", + "revert", + "assert", + "assert_unreachable", + "stop", + "exit", ] ) @@ -41,7 +52,6 @@ [ "mstore", "sstore", - "dstore", "istore", "tstore", "dloadbytes", @@ -67,6 +77,10 @@ ] ) +assert VOLATILE_INSTRUCTIONS.issuperset(NO_OUTPUT_INSTRUCTIONS), ( + NO_OUTPUT_INSTRUCTIONS - VOLATILE_INSTRUCTIONS +) + CFG_ALTERING_INSTRUCTIONS = frozenset(["jmp", "djmp", "jnz"]) if TYPE_CHECKING: @@ -221,9 +235,13 @@ def __init__( self.error_msg = None @property - def volatile(self) -> bool: + def is_volatile(self) -> bool: return self.opcode in VOLATILE_INSTRUCTIONS + @property + def is_bb_terminator(self) -> bool: + return self.opcode in BB_TERMINATORS + def get_label_operands(self) -> Iterator[IRLabel]: """ Get all labels in instruction. @@ -499,7 +517,7 @@ def is_terminated(self) -> bool: # if we can/need to append instructions to the basic block. if len(self.instructions) == 0: return False - return self.instructions[-1].opcode in BB_TERMINATORS + return self.instructions[-1].is_bb_terminator @property def is_terminal(self) -> bool: diff --git a/vyper/venom/passes/dft.py b/vyper/venom/passes/dft.py index 8429c19711..f45a60079c 100644 --- a/vyper/venom/passes/dft.py +++ b/vyper/venom/passes/dft.py @@ -1,6 +1,6 @@ from vyper.utils import OrderedSet from vyper.venom.analysis.dfg import DFGAnalysis -from vyper.venom.basicblock import BB_TERMINATORS, IRBasicBlock, IRInstruction, IRVariable +from vyper.venom.basicblock import IRBasicBlock, IRInstruction, IRVariable from vyper.venom.function import IRFunction from vyper.venom.passes.base_pass import IRPass @@ -30,7 +30,7 @@ def _process_instruction_r(self, bb: IRBasicBlock, inst: IRInstruction, offset: self.visited_instructions.add(inst) self.inst_order_num += 1 - if inst.opcode in BB_TERMINATORS: + if inst.is_bb_terminator: offset = len(bb.instructions) if inst.opcode == "phi": @@ -55,7 +55,7 @@ def _process_basic_block(self, bb: IRBasicBlock) -> None: for inst in bb.instructions: inst.fence_id = self.fence_id - if inst.volatile: + if inst.is_volatile: self.fence_id += 1 # We go throught the instructions and calculate the order in which they should be executed diff --git a/vyper/venom/passes/remove_unused_variables.py b/vyper/venom/passes/remove_unused_variables.py index 1ce5c141d9..be9c1ed535 100644 --- a/vyper/venom/passes/remove_unused_variables.py +++ b/vyper/venom/passes/remove_unused_variables.py @@ -31,7 +31,7 @@ def run_pass(self): def _process_instruction(self, inst): if inst.output is None: return - if inst.volatile: + if inst.is_volatile or inst.is_bb_terminator: return uses = self.dfg.get_uses(inst.output) if len(uses) > 0: diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index beb530a42c..51fac10134 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -305,7 +305,7 @@ def _clean_unused_params(self, asm: list, bb: IRBasicBlock, stack: StackModel) - for i, inst in enumerate(bb.instructions): if inst.opcode != "param": break - if inst.volatile and i + 1 < len(bb.instructions): + if inst.is_volatile and i + 1 < len(bb.instructions): liveness = bb.instructions[i + 1].liveness if inst.output is not None and inst.output not in liveness: depth = stack.get_depth(inst.output) From 7b0ee5f631dc7ef2ababb6728c7d75ab655422d2 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Fri, 31 May 2024 15:22:02 +0300 Subject: [PATCH 15/53] fix[venom]: remove dominator tree invalidation for store elimination pass (#4069) Each pass has the responsibility of invalidating analyses that it affects according to the changes it introduces. The `StoreEliminationPass` should not invalidate the `DominatorTree` --- vyper/venom/passes/store_elimination.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/vyper/venom/passes/store_elimination.py b/vyper/venom/passes/store_elimination.py index fe3d0f7900..17b9ce995a 100644 --- a/vyper/venom/passes/store_elimination.py +++ b/vyper/venom/passes/store_elimination.py @@ -1,6 +1,5 @@ from vyper.venom.analysis.cfg import CFGAnalysis from vyper.venom.analysis.dfg import DFGAnalysis -from vyper.venom.analysis.dominators import DominatorTreeAnalysis from vyper.venom.analysis.liveness import LivenessAnalysis from vyper.venom.basicblock import IRVariable from vyper.venom.passes.base_pass import IRPass @@ -27,7 +26,6 @@ def run_pass(self): continue self._process_store(dfg, inst, var, inst.operands[0]) - self.analyses_cache.invalidate_analysis(DominatorTreeAnalysis) self.analyses_cache.invalidate_analysis(LivenessAnalysis) self.analyses_cache.invalidate_analysis(DFGAnalysis) From bb9129aaa42b2f810fe48bec0c29a552db3f105c Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 31 May 2024 07:59:28 -0700 Subject: [PATCH 16/53] feat[venom]: add `extract_literals` pass (#4067) extract `IRLiterals` which are instruction arguments; this reduces pressure on the stack scheduler because `_emit_input_operands` can cause stack storms when we hit `_stack_reorder`. by extracting them, we allow `DFTPass` to reorder literal emission in a more optimized way before even getting to `_emit_input_operands` --- vyper/venom/__init__.py | 2 ++ vyper/venom/passes/extract_literals.py | 37 ++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 vyper/venom/passes/extract_literals.py diff --git a/vyper/venom/__init__.py b/vyper/venom/__init__.py index cd981cd462..afd79fc44f 100644 --- a/vyper/venom/__init__.py +++ b/vyper/venom/__init__.py @@ -12,6 +12,7 @@ from vyper.venom.passes.algebraic_optimization import AlgebraicOptimizationPass from vyper.venom.passes.branch_optimization import BranchOptimizationPass from vyper.venom.passes.dft import DFTPass +from vyper.venom.passes.extract_literals import ExtractLiteralsPass from vyper.venom.passes.make_ssa import MakeSSA from vyper.venom.passes.mem2var import Mem2Var from vyper.venom.passes.remove_unused_variables import RemoveUnusedVariablesPass @@ -53,6 +54,7 @@ def _run_passes(fn: IRFunction, optimize: OptimizationLevel) -> None: SimplifyCFGPass(ac, fn).run_pass() AlgebraicOptimizationPass(ac, fn).run_pass() BranchOptimizationPass(ac, fn).run_pass() + ExtractLiteralsPass(ac, fn).run_pass() RemoveUnusedVariablesPass(ac, fn).run_pass() DFTPass(ac, fn).run_pass() diff --git a/vyper/venom/passes/extract_literals.py b/vyper/venom/passes/extract_literals.py new file mode 100644 index 0000000000..b8e042b357 --- /dev/null +++ b/vyper/venom/passes/extract_literals.py @@ -0,0 +1,37 @@ +from vyper.venom.analysis.dfg import DFGAnalysis +from vyper.venom.analysis.liveness import LivenessAnalysis +from vyper.venom.basicblock import IRInstruction, IRLiteral +from vyper.venom.passes.base_pass import IRPass + + +class ExtractLiteralsPass(IRPass): + """ + This pass extracts literals so that they can be reordered by the DFT pass + """ + + def run_pass(self): + for bb in self.function.get_basic_blocks(): + self._process_bb(bb) + + self.analyses_cache.invalidate_analysis(DFGAnalysis) + self.analyses_cache.invalidate_analysis(LivenessAnalysis) + + def _process_bb(self, bb): + i = 0 + while i < len(bb.instructions): + inst = bb.instructions[i] + if inst.opcode == "store": + i += 1 + continue + + for j, op in enumerate(inst.operands): + # first operand to log is magic + if inst.opcode == "log" and j == 0: + continue + + if isinstance(op, IRLiteral): + var = self.function.get_next_variable() + to_insert = IRInstruction("store", [op], var) + bb.insert_instruction(to_insert, index=i) + inst.operands[j] = var + i += 1 From 80dcee211ac7a9ed874a6ddae04f3a013d23e28d Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 1 Jun 2024 08:41:05 -0700 Subject: [PATCH 17/53] feat[docs]: add more detail to modules docs (#4087) - update technical notes in using modules (reference the VIP) - note about linearization check - exports of interfaces --- docs/using-modules.rst | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/docs/using-modules.rst b/docs/using-modules.rst index 4a8af1a7d9..7d63eb6617 100644 --- a/docs/using-modules.rst +++ b/docs/using-modules.rst @@ -144,6 +144,8 @@ The design of the module system takes inspiration from (but is not directly rela * A module may be "used" many times * A module which is "used" or its state touched must be "initialized" exactly once +To read more about the design background of Vyper's module system, please see its original `design document `_. + .. _init-dependencies: Initializing a module with dependencies @@ -169,6 +171,9 @@ Sometimes, you may encounter a module which itself ``uses`` other modules. Vyper # export all external functions from ownable_2step exports: ownable_2step.__interface__ +.. warning:: + In normal usage, you should make sure that ``__init__()`` functions are called in dependency order. In the above example, you can get unexpected behavior if ``ownable_2step.__init__()`` is called before ``ownable.__init__()``! The compiler may enforce this behavior in the future. + .. _exporting-functions: Exporting functions @@ -176,7 +181,9 @@ Exporting functions In Vyper, ``@external`` functions are not automatically exposed (i.e., included in the runtime code) in the importing contract. This is a safety feature, it means that any externally facing functionality must be explicitly defined in the top-level of the compilation target. -So, exporting external functions from modules is accomplished using the ``exports`` keyword. In Vyper, functions can be exported individually, or, a wholesale export of all the functions in a module can be done. The following are all ways of exporting functions from an imported module. +So, exporting external functions from modules is accomplished using the ``exports`` keyword. In Vyper, functions can be exported individually, or, a wholesale export of all the functions in an interface can be done. The special interface ``module.__interface__`` is a compiler-defined interface, which automatically includes all the functions in a module. + +The following are all ways of exporting functions from an imported module. .. code-block:: vyper @@ -190,5 +197,11 @@ So, exporting external functions from modules is accomplished using the ``export ownable_2step.accept_ownership, ) + # export all IERC20 functions from `base_token` + exports: base_token.IERC20 + # export all external functions from `ownable_2step` exports: ownable_2step.__interface__ + +.. note:: + Any exported interfaces must be implemented by the module. For example, in the above example, ``base_token`` must contain ``implements: IERC20``, or else the compiler will raise an error. From d9f10d7ea0be07baa35e2f72317a00ddaaeb5aef Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 2 Jun 2024 06:17:59 -0700 Subject: [PATCH 18/53] fix[lang]: prevent modules as storage variables (#4088) prevent modules being initialized as storage variables. --- .../test_instantiation_exception.py | 33 +++++++++++++++++-- vyper/semantics/types/module.py | 1 + 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/tests/functional/syntax/exceptions/test_instantiation_exception.py b/tests/functional/syntax/exceptions/test_instantiation_exception.py index 4dd0bf6e02..f693846f81 100644 --- a/tests/functional/syntax/exceptions/test_instantiation_exception.py +++ b/tests/functional/syntax/exceptions/test_instantiation_exception.py @@ -1,5 +1,6 @@ import pytest +from vyper.compiler import compile_code from vyper.exceptions import InstantiationException invalid_list = [ @@ -77,5 +78,33 @@ def __init__(): @pytest.mark.parametrize("bad_code", invalid_list) -def test_instantiation_exception(bad_code, get_contract, assert_compile_failed): - assert_compile_failed(lambda: get_contract(bad_code), InstantiationException) +def test_instantiation_exception(bad_code): + with pytest.raises(InstantiationException): + compile_code(bad_code) + + +def test_instantiation_exception_module(make_input_bundle): + main = """ +# main.vy +import lib + +initializes: lib + +x:lib + +@external +def foo() -> (uint256, uint256): + return (self.x.bar(), self.x.bar()) + """ + lib = """ +# lib.vy +a:uint256 + +@internal +def bar()->uint256: + self.a += 1 + return self.a + """ + input_bundle = make_input_bundle({"lib.vy": lib}) + with pytest.raises(InstantiationException): + compile_code(main, input_bundle=input_bundle) diff --git a/vyper/semantics/types/module.py b/vyper/semantics/types/module.py index b3e3f2ef2b..e55c4d145f 100644 --- a/vyper/semantics/types/module.py +++ b/vyper/semantics/types/module.py @@ -272,6 +272,7 @@ class ModuleT(VyperType): _attribute_in_annotation = True _invalid_locations = ( + DataLocation.STORAGE, DataLocation.CALLDATA, DataLocation.CODE, DataLocation.MEMORY, From e52241afb1209c5a649b2587e9f343d567e60156 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 2 Jun 2024 06:27:23 -0700 Subject: [PATCH 19/53] fix[lang]: fix a hint in global initializer check (#4089) the hint is bad because the invalid `uses:` statement might not be in the main contract. --- .../syntax/modules/test_initializers.py | 34 +++++++++++++++++++ vyper/semantics/analysis/global_.py | 14 ++++---- 2 files changed, 40 insertions(+), 8 deletions(-) diff --git a/tests/functional/syntax/modules/test_initializers.py b/tests/functional/syntax/modules/test_initializers.py index 29d611d54a..624904e45f 100644 --- a/tests/functional/syntax/modules/test_initializers.py +++ b/tests/functional/syntax/modules/test_initializers.py @@ -1383,3 +1383,37 @@ def foo(): hint = f"add `uses: {lib}` or `initializes: {lib}` as a top-level statement to your contract" assert e.value._hint == hint assert e.value.annotations[0].lineno == 6 + + +def test_global_initialize_missed_import_hint(make_input_bundle, chdir_tmp_path): + lib1 = """ +import lib2 +import lib3 + +initializes: lib2[ + lib3 := lib3 +] + """ + lib2 = """ +import lib3 + +uses: lib3 + +@external +def set_some_mod(): + a: uint256 = lib3.var + """ + lib3 = """ +var: uint256 + """ + main = """ +import lib1 + +initializes: lib1 + """ + + input_bundle = make_input_bundle({"lib1.vy": lib1, "lib2.vy": lib2, "lib3.vy": lib3}) + with pytest.raises(InitializerException) as e: + compile_code(main, input_bundle=input_bundle) + assert e.value._message == "module `lib3.vy` is used but never initialized!" + assert e.value._hint is None diff --git a/vyper/semantics/analysis/global_.py b/vyper/semantics/analysis/global_.py index 8b3e0544a5..23b45a1114 100644 --- a/vyper/semantics/analysis/global_.py +++ b/vyper/semantics/analysis/global_.py @@ -58,6 +58,10 @@ def _validate_global_initializes_constraint(module_t: ModuleT): for u, uses in all_used_modules.items(): if u not in all_initialized_modules: + msg = f"module `{u}` is used but never initialized!" + + # construct a hint if the module is in scope + hint = None found_module = module_t.find_module_info(u) if found_module is not None: # TODO: do something about these constants @@ -66,13 +70,7 @@ def _validate_global_initializes_constraint(module_t: ModuleT): else: module_str = f"`{module_t}`" hint = f"add `initializes: {found_module.alias}` to {module_str}" - else: - # CMC 2024-02-06 is this actually reachable? - hint = f"ensure `{module_t}` is imported in your main contract!" - err_list.append( - InitializerException( - f"module `{u}` is used but never initialized!", *uses, hint=hint - ) - ) + + err_list.append(InitializerException(msg, *uses, hint=hint)) err_list.raise_if_not_empty() From 1f6b9433fbd52447d0737fb2eee16f42a01308f4 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 2 Jun 2024 08:54:59 -0700 Subject: [PATCH 20/53] fix[codegen]: zero-length dynarray `abi_decode` validation (#4060) fix an edge case in `abi_decode` dynarray validation. when the child type is dynamic and the runtime length is zero, the check that the offset pointer is valid (points within the payload) was skipped. skipping the offset pointer check is valid any time the runtime length is nonzero, because the pointer is bounded by the checks in the recursive runtime loop in `_dynarray_make_setter`. however, it is invalid to skip the check when the runtime length of the dynarray is zero, because then the recursive loop does not get run. the impact of this can be seen in the included test cases, particularly `test_abi_decode_top_level_head_oob`. although as of eb011367cc769d6 it is impossible to convince the decoder to *copy* oob data since the validation is only skipped when the length is zero, a payload can be crafted which will revert depending on if some value outside of the buffer is nonzero (i.e. the runtime behavior can be influenced by some data outside of the payload). this commit fixes this issue by _unconditionally_ checking that the offset pointer is valid. note that the check is now always performed, even when the runtime length is nonzero and therefore the check is redundant (because, as stated, the checks within the loop already bound the offset pointer). a more efficient implementation is possible, since the check only needs to be run in the case that the runtime length is 0, which theoretically can be merged into the same basic block with the 0-case in the `repeat` loop. however, this commit leaves that to future optimizer work; the optimization here is it just avoids the multiplication when the child type is dynamic (because the result of the multiplication is always 0). this commit also fixes another bug in dynarray recursion; the calculation in `_abi_payload_size` was not correct when the size of the child type is larger than 32. misc: - add additional tests for abi_decode validation. --------- Co-authored-by: cyberthirst Co-authored-by: Robert Chen --- .../builtins/codegen/test_abi_decode.py | 249 +++++++++++++++++- vyper/codegen/core.py | 17 +- 2 files changed, 259 insertions(+), 7 deletions(-) diff --git a/tests/functional/builtins/codegen/test_abi_decode.py b/tests/functional/builtins/codegen/test_abi_decode.py index fad6ce889c..36b87137b9 100644 --- a/tests/functional/builtins/codegen/test_abi_decode.py +++ b/tests/functional/builtins/codegen/test_abi_decode.py @@ -495,8 +495,8 @@ def f(x: Bytes[32 * 3]): decoded_y1: Bytes[32] = _abi_decode(y, Bytes[32]) a = b"bar" decoded_y2: Bytes[32] = _abi_decode(y, Bytes[32]) - - assert decoded_y1 != decoded_y2 + # original POC: + # assert decoded_y1 != decoded_y2 """ c = get_contract(code) @@ -1043,7 +1043,7 @@ def run(): c.run() -def test_abi_decode_extcall_zero_len_array(get_contract): +def test_abi_decode_extcall_empty_array(get_contract): code = """ @external def bar() -> (uint256, uint256): @@ -1061,6 +1061,59 @@ def run(): c.run() +def test_abi_decode_extcall_complex_empty_dynarray(get_contract): + # 5th word of the payload points to the last word of the payload + # which is considered the length of the Point.y array + # because the length is 0, the decoding should succeed + code = """ +struct Point: + x: uint256 + y: DynArray[uint256, 2] + z: uint256 + +@external +def bar() -> (uint256, uint256, uint256, uint256, uint256, uint256): + return 32, 1, 32, 1, 64, 0 + +interface A: + def bar() -> DynArray[Point, 2]: nonpayable + +@external +def run(): + x: DynArray[Point, 2] = extcall A(self).bar() + assert len(x) == 1 and len(x[0].y) == 0 + """ + c = get_contract(code) + + c.run() + + +def test_abi_decode_extcall_complex_empty_dynarray2(tx_failed, get_contract): + # top-level head points 1B over the runtime buffer end + # thus the decoding should fail although the length is 0 + code = """ +struct Point: + x: uint256 + y: DynArray[uint256, 2] + z: uint256 + +@external +def bar() -> (uint256, uint256): + return 33, 0 + +interface A: + def bar() -> DynArray[Point, 2]: nonpayable + +@external +def run(): + x: DynArray[Point, 2] = extcall A(self).bar() + """ + c = get_contract(code) + + with tx_failed(): + c.run() + + def test_abi_decode_extcall_zero_len_array2(get_contract): code = """ @external @@ -1080,3 +1133,193 @@ def run() -> uint256: length = c.run() assert length == 0 + + +def test_abi_decode_top_level_head_oob(tx_failed, get_contract): + code = """ +@external +def run(x: Bytes[256], y: uint256): + player_lost: bool = empty(bool) + + if y == 1: + player_lost = True + + decoded: DynArray[Bytes[1], 2] = empty(DynArray[Bytes[1], 2]) + decoded = _abi_decode(x, DynArray[Bytes[1], 2]) + """ + c = get_contract(code) + + # head points over the buffer end + payload = (0x0100, *_replicate(0x00, 7)) + + data = _abi_payload_from_tuple(payload) + + with tx_failed(): + c.run(data, 1) + + with tx_failed(): + c.run(data, 0) + + +def test_abi_decode_dynarray_complex_insufficient_data(env, tx_failed, get_contract): + code = """ +struct Point: + x: uint256 + y: uint256 + +@external +def run(x: Bytes[32 * 8]): + y: Bytes[32 * 8] = x + decoded_y1: DynArray[Point, 3] = _abi_decode(y, DynArray[Point, 3]) + """ + c = get_contract(code) + + # runtime buffer has insufficient size - we decode 3 points, but provide only + # 3 * 32B of payload + payload = (0x20, 0x03, *_replicate(0x03, 3)) + + data = _abi_payload_from_tuple(payload) + + with tx_failed(): + c.run(data) + + +def test_abi_decode_dynarray_complex2(env, tx_failed, get_contract): + # point head to the 1st 0x01 word (ie the length) + # but size of the point is 3 * 32B, thus we'd decode 2B over the buffer end + code = """ +struct Point: + x: uint256 + y: uint256 + z: uint256 + + +@external +def run(x: Bytes[32 * 8]): + y: Bytes[32 * 11] = x + decoded_y1: DynArray[Point, 2] = _abi_decode(y, DynArray[Point, 2]) + """ + c = get_contract(code) + + payload = ( + 0xC0, # points to the 1st 0x01 word (ie the length) + *_replicate(0x03, 5), + *_replicate(0x01, 2), + ) + + data = _abi_payload_from_tuple(payload) + + with tx_failed(): + c.run(data) + + +def test_abi_decode_complex_empty_dynarray(env, tx_failed, get_contract): + # point head to the last word of the payload + # this will be the length, but because it's set to 0, the decoding should succeed + code = """ +struct Point: + x: uint256 + y: DynArray[uint256, 2] + z: uint256 + + +@external +def run(x: Bytes[32 * 16]): + y: Bytes[32 * 16] = x + decoded_y1: DynArray[Point, 2] = _abi_decode(y, DynArray[Point, 2]) + assert len(decoded_y1) == 1 and len(decoded_y1[0].y) == 0 + """ + c = get_contract(code) + + payload = ( + 0x20, + 0x01, + 0x20, + 0x01, + 0xA0, # points to the last word of the payload + 0x04, + 0x02, + 0x02, + 0x00, # length is 0, so decoding should succeed + ) + + data = _abi_payload_from_tuple(payload) + + c.run(data) + + +def test_abi_decode_complex_arithmetic_overflow(tx_failed, get_contract): + # inner head roundtrips due to arithmetic overflow + code = """ +struct Point: + x: uint256 + y: DynArray[uint256, 2] + z: uint256 + + +@external +def run(x: Bytes[32 * 16]): + y: Bytes[32 * 16] = x + decoded_y1: DynArray[Point, 2] = _abi_decode(y, DynArray[Point, 2]) + """ + c = get_contract(code) + + payload = ( + 0x20, + 0x01, + 0x20, + 0x01, # both Point.x and Point.y length + 2**256 - 0x20, # points to the "previous" word of the payload + 0x04, + 0x02, + 0x02, + 0x00, + ) + + data = _abi_payload_from_tuple(payload) + + with tx_failed(): + c.run(data) + + +def test_abi_decode_empty_toplevel_dynarray(get_contract): + code = """ +@external +def run(x: Bytes[2 * 32 + 3 * 32 + 3 * 32 * 4]): + y: Bytes[2 * 32 + 3 * 32 + 3 * 32 * 4] = x + assert len(y) == 2 * 32 + decoded_y1: DynArray[DynArray[uint256, 3], 3] = _abi_decode( + y, + DynArray[DynArray[uint256, 3], 3] + ) + assert len(decoded_y1) == 0 + """ + c = get_contract(code) + + payload = (0x20, 0x00) # DynArray head, DynArray length + + data = _abi_payload_from_tuple(payload) + + c.run(data) + + +def test_abi_decode_invalid_toplevel_dynarray_head(tx_failed, get_contract): + # head points 1B over the bounds of the runtime buffer + code = """ +@external +def run(x: Bytes[2 * 32 + 3 * 32 + 3 * 32 * 4]): + y: Bytes[2 * 32 + 3 * 32 + 3 * 32 * 4] = x + decoded_y1: DynArray[DynArray[uint256, 3], 3] = _abi_decode( + y, + DynArray[DynArray[uint256, 3], 3] + ) + """ + c = get_contract(code) + + # head points 1B over the bounds of the runtime buffer + payload = (0x21, 0x00) # DynArray head, DynArray length + + data = _abi_payload_from_tuple(payload) + + with tx_failed(): + c.run(data) diff --git a/vyper/codegen/core.py b/vyper/codegen/core.py index 3c81778660..5d4621518f 100644 --- a/vyper/codegen/core.py +++ b/vyper/codegen/core.py @@ -889,11 +889,17 @@ def _dirty_read_risk(ir_node): def _abi_payload_size(ir_node): SCALE = ir_node.location.word_scale assert SCALE == 32 # we must be in some byte-addressable region, like memory - OFFSET = DYNAMIC_ARRAY_OVERHEAD * SCALE if isinstance(ir_node.typ, DArrayT): - return ["add", OFFSET, ["mul", get_dyn_array_count(ir_node), SCALE]] + # the amount of size each value occupies in static section + # (the amount of size it occupies in the dynamic section is handled in + # make_setter recursion) + item_size = ir_node.typ.value_type.abi_type.static_size() + if item_size == 0: + # manual optimization; the mload cannot currently be optimized out + return ["add", OFFSET, 0] + return ["add", OFFSET, ["mul", get_dyn_array_count(ir_node), item_size]] if isinstance(ir_node.typ, _BytestringT): return ["add", OFFSET, get_bytearray_length(ir_node)] @@ -1175,14 +1181,17 @@ def clamp_dyn_array(ir_node, hi=None): assert (hi is not None) == _dirty_read_risk(ir_node) - # if the subtype is dynamic, the check will be performed in the recursion - if hi is not None and not t.abi_type.subtyp.is_dynamic(): + if hi is not None: assert t.count < 2**64 # sanity check # note: this add does not risk arithmetic overflow because # length is bounded by count * elemsize. item_end = add_ofst(ir_node, _abi_payload_size(ir_node)) + # if the subtype is dynamic, the length check is performed in + # the recursion, UNLESS the count is zero. here we perform the + # check all the time, but it could maybe be optimized out in the + # make_setter loop (in the common case that runtime count > 0). len_check = ["seq", ["assert", ["le", item_end, hi]], len_check] return IRnode.from_list(len_check, error_msg=f"{ir_node.typ} bounds check") From 24cfe0bcc10ec9418054c8def5cf4eeaa3ed0164 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 7 Jun 2024 14:49:51 -0700 Subject: [PATCH 21/53] fix[tool]: star option in `outputSelection` (#4094) vyper will accept either `"*"` or `["*"]` for `outputSelection`, but some verifiers expect it to always be a list. make `solc_json` output choose the common formatting. --- vyper/compiler/output_bundle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vyper/compiler/output_bundle.py b/vyper/compiler/output_bundle.py index b93ecbd015..92494e3a70 100644 --- a/vyper/compiler/output_bundle.py +++ b/vyper/compiler/output_bundle.py @@ -194,7 +194,7 @@ def write_integrity(self, integrity_sum: str): def write_compilation_target(self, targets: list[str]): for target in targets: - self._output["settings"]["outputSelection"][target] = "*" + self._output["settings"]["outputSelection"][target] = ["*"] def write_version(self, version): self._output["compiler_version"] = version From 4d1bacd5b7ef3e2986393fd7f23b0ac24bbe1227 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 10 Jun 2024 11:16:53 -0700 Subject: [PATCH 22/53] fix[venom]: move loop invariant assertion to entry block (#4098) loop invariant bound check was in the body of the loop, not the entry block. move it up to the entry so we don't re-check the same assertion every loop iteration. --- tests/functional/syntax/test_for_range.py | 4 ++-- vyper/venom/ir_node_to_venom.py | 20 ++++++++------------ 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/tests/functional/syntax/test_for_range.py b/tests/functional/syntax/test_for_range.py index 1de32108c5..97e77f32f7 100644 --- a/tests/functional/syntax/test_for_range.py +++ b/tests/functional/syntax/test_for_range.py @@ -368,14 +368,14 @@ def foo(): """ @external def foo(): - x: int128 = 5 + x: int128 = 4 for i: int128 in range(x, bound=4): pass """, """ @external def foo(): - x: int128 = 5 + x: int128 = 4 for i: int128 in range(0, x, bound=4): pass """, diff --git a/vyper/venom/ir_node_to_venom.py b/vyper/venom/ir_node_to_venom.py index 61b3c081ff..2c99cf5668 100644 --- a/vyper/venom/ir_node_to_venom.py +++ b/vyper/venom/ir_node_to_venom.py @@ -468,14 +468,7 @@ def emit_body_blocks(): start, end, _ = _convert_ir_bb_list(fn, ir.args[1:4], symbols) assert ir.args[3].is_literal, "repeat bound expected to be literal" - bound = ir.args[3].value - if ( - isinstance(end, IRLiteral) - and isinstance(start, IRLiteral) - and end.value + start.value <= bound - ): - bound = None body = ir.args[4] @@ -491,9 +484,15 @@ def emit_body_blocks(): counter_var = entry_block.append_instruction("store", start) symbols[sym.value] = counter_var + + if bound is not None: + # assert le end bound + invalid_end = entry_block.append_instruction("gt", bound, end) + valid_end = entry_block.append_instruction("iszero", invalid_end) + entry_block.append_instruction("assert", valid_end) + end = entry_block.append_instruction("add", start, end) - if bound: - bound = entry_block.append_instruction("add", start, bound) + entry_block.append_instruction("jmp", cond_block.label) xor_ret = cond_block.append_instruction("xor", counter_var, end) @@ -501,9 +500,6 @@ def emit_body_blocks(): fn.append_basic_block(cond_block) fn.append_basic_block(body_block) - if bound: - xor_ret = body_block.append_instruction("xor", counter_var, bound) - body_block.append_instruction("assert", xor_ret) emit_body_blocks() body_end = fn.get_basic_block() From 7c8862ae9da3267c3c65ce06f2f0b1c59d082704 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 11 Jun 2024 06:23:26 -0700 Subject: [PATCH 23/53] feat[lang]: rename `_abi_encode` and `_abi_decode` (#4097) rename to `abi_encode` and `abi_decode` respectively leave the old ones in, but with deprecation warnings --- docs/built-in-functions.rst | 18 ++++-- .../builtins/codegen/test_abi_decode.py | 12 ++-- .../builtins/codegen/test_abi_encode.py | 62 +++++++++---------- vyper/ast/grammar.lark | 2 +- vyper/builtins/functions.py | 32 ++++++++-- 5 files changed, 78 insertions(+), 48 deletions(-) diff --git a/docs/built-in-functions.rst b/docs/built-in-functions.rst index 2e2f38ab74..367a08d80d 100644 --- a/docs/built-in-functions.rst +++ b/docs/built-in-functions.rst @@ -264,7 +264,7 @@ Vyper has three built-ins for contract creation; all three contract creation bui x: uint256 = 123 success, response = raw_call( _target, - _abi_encode(x, method_id=method_id("someMethodName(uint256)")), + abi_encode(x, method_id=method_id("someMethodName(uint256)")), max_outsize=32, value=msg.value, revert_on_failure=False @@ -1023,7 +1023,7 @@ Utilities >>> ExampleContract.foo() 0xa9059cbb -.. py:function:: _abi_encode(*args, ensure_tuple: bool = True) -> Bytes[] +.. py:function:: abi_encode(*args, ensure_tuple: bool = True) -> Bytes[] Takes a variable number of args as input, and returns the ABIv2-encoded bytestring. Used for packing arguments to raw_call, EIP712 and other cases where a consistent and efficient serialization method is needed. Once this function has seen more use we provisionally plan to put it into the ``ethereum.abi`` namespace. @@ -1041,7 +1041,7 @@ Utilities def foo() -> Bytes[132]: x: uint256 = 1 y: Bytes[32] = b"234" - return _abi_encode(x, y, method_id=method_id("foo()")) + return abi_encode(x, y, method_id=method_id("foo()")) .. code-block:: vyper @@ -1052,15 +1052,18 @@ Utilities "0000000000000000000000000000000000000000000000000000000000000003" "3233340000000000000000000000000000000000000000000000000000000000" + .. note:: + Prior to v0.4.0, this function was named ``_abi_encode``. + -.. py:function:: _abi_decode(b: Bytes, output_type: type_, unwrap_tuple: bool = True) -> Any +.. py:function:: abi_decode(b: Bytes, output_type: type_, unwrap_tuple: bool = True) -> Any Takes a byte array as input, and returns the decoded values according to the specified output types. Used for unpacking ABIv2-encoded values. Once this function has seen more use we provisionally plan to put it into the ``ethereum.abi`` namespace. * ``b``: A byte array of a length that is between the minimum and maximum ABIv2 size bounds of the ``output type``. * ``output_type``: Name of the output type, or tuple of output types, to be decoded. - * ``unwrap_tuple``: If set to True, the input is decoded as a tuple even if only one output type is specified. In other words, ``_abi_decode(b, Bytes[32])`` gets decoded as ``(Bytes[32],)``. This is the convention for ABIv2-encoded values generated by Vyper and Solidity functions. Except for very specific use cases, this should be set to True. Must be a literal. + * ``unwrap_tuple``: If set to True, the input is decoded as a tuple even if only one output type is specified. In other words, ``abi_decode(b, Bytes[32])`` gets decoded as ``(Bytes[32],)``. This is the convention for ABIv2-encoded values generated by Vyper and Solidity functions. Except for very specific use cases, this should be set to True. Must be a literal. Returns the decoded value(s), with type as specified by `output_type`. @@ -1071,9 +1074,12 @@ Utilities def foo(someInput: Bytes[128]) -> (uint256, Bytes[32]): x: uint256 = empty(uint256) y: Bytes[32] = empty(Bytes[32]) - x, y = _abi_decode(someInput, (uint256, Bytes[32])) + x, y = abi_decode(someInput, (uint256, Bytes[32])) return x, y + .. note:: + Prior to v0.4.0, this function was named ``_abi_decode``. + .. py:function:: print(*args, hardhat_compat=False) -> None diff --git a/tests/functional/builtins/codegen/test_abi_decode.py b/tests/functional/builtins/codegen/test_abi_decode.py index 36b87137b9..d77bb1b5ae 100644 --- a/tests/functional/builtins/codegen/test_abi_decode.py +++ b/tests/functional/builtins/codegen/test_abi_decode.py @@ -31,7 +31,7 @@ def abi_decode(x: Bytes[160]) -> (address, int128, bool, decimal, bytes32): c: bool = False d: decimal = 0.0 e: bytes32 = 0x0000000000000000000000000000000000000000000000000000000000000000 - a, b, c, d, e = _abi_decode(x, (address, int128, bool, decimal, bytes32)) + a, b, c, d, e = abi_decode(x, (address, int128, bool, decimal, bytes32)) return a, b, c, d, e @external @@ -48,7 +48,7 @@ def abi_decode_struct(x: Bytes[544]) -> Human: metadata=0x0000000000000000000000000000000000000000000000000000000000000000 ) ) - human = _abi_decode(x, Human) + human = abi_decode(x, Human) return human """ @@ -97,7 +97,7 @@ def test_abi_decode_single(get_contract, expected, input_len, output_typ, abi_ty contract = f""" @external def foo(x: Bytes[{input_len}]) -> {output_typ}: - a: {output_typ} = _abi_decode(x, {output_typ}, unwrap_tuple={unwrap_tuple}) + a: {output_typ} = abi_decode(x, {output_typ}, unwrap_tuple={unwrap_tuple}) return a """ c = get_contract(contract) @@ -135,7 +135,7 @@ def test_abi_decode_double( def foo(x: Bytes[{input_len}]) -> ({output_typ1}, {output_typ2}): a: {output_typ1} = empty({output_typ1}) b: {output_typ2} = empty({output_typ2}) - a, b = _abi_decode(x, ({output_typ1}, {output_typ2}), unwrap_tuple={unwrap_tuple}) + a, b = abi_decode(x, ({output_typ1}, {output_typ2}), unwrap_tuple={unwrap_tuple}) return a, b """ @@ -173,7 +173,7 @@ def test_abi_decode_nested_dynarray(get_contract, args, unwrap_tuple): @external def abi_decode(x: Bytes[{len}]) -> DynArray[DynArray[uint256, 3], 3]: a: DynArray[DynArray[uint256, 3], 3] = [] - a = _abi_decode(x, DynArray[DynArray[uint256, 3], 3], unwrap_tuple={unwrap_tuple}) + a = abi_decode(x, DynArray[DynArray[uint256, 3], 3], unwrap_tuple={unwrap_tuple}) return a """ @@ -213,7 +213,7 @@ def test_abi_decode_nested_dynarray2(get_contract, args, unwrap_tuple): @external def abi_decode(x: Bytes[{len}]) -> DynArray[DynArray[DynArray[uint256, 3], 3], 3]: a: DynArray[DynArray[DynArray[uint256, 3], 3], 3] = [] - a = _abi_decode( + a = abi_decode( x, DynArray[DynArray[DynArray[uint256, 3], 3], 3], unwrap_tuple={unwrap_tuple} diff --git a/tests/functional/builtins/codegen/test_abi_encode.py b/tests/functional/builtins/codegen/test_abi_encode.py index 7acf00e0b6..123a3898bb 100644 --- a/tests/functional/builtins/codegen/test_abi_encode.py +++ b/tests/functional/builtins/codegen/test_abi_encode.py @@ -48,38 +48,38 @@ def abi_encode( ) if ensure_tuple: if not include_method_id: - return _abi_encode(human) # default ensure_tuple=True - return _abi_encode(human, method_id=0xdeadbeef) + return abi_encode(human) # default ensure_tuple=True + return abi_encode(human, method_id=0xdeadbeef) else: if not include_method_id: - return _abi_encode(human, ensure_tuple=False) - return _abi_encode(human, ensure_tuple=False, method_id=0xdeadbeef) + return abi_encode(human, ensure_tuple=False) + return abi_encode(human, ensure_tuple=False, method_id=0xdeadbeef) @external def abi_encode2(name: String[32], ensure_tuple: bool, include_method_id: bool) -> Bytes[100]: if ensure_tuple: if not include_method_id: - return _abi_encode(name) # default ensure_tuple=True - return _abi_encode(name, method_id=0xdeadbeef) + return abi_encode(name) # default ensure_tuple=True + return abi_encode(name, method_id=0xdeadbeef) else: if not include_method_id: - return _abi_encode(name, ensure_tuple=False) - return _abi_encode(name, ensure_tuple=False, method_id=0xdeadbeef) + return abi_encode(name, ensure_tuple=False) + return abi_encode(name, ensure_tuple=False, method_id=0xdeadbeef) @external def abi_encode3(x: uint256, ensure_tuple: bool, include_method_id: bool) -> Bytes[36]: if ensure_tuple: if not include_method_id: - return _abi_encode(x) # default ensure_tuple=True + return abi_encode(x) # default ensure_tuple=True - return _abi_encode(x, method_id=0xdeadbeef) + return abi_encode(x, method_id=0xdeadbeef) else: if not include_method_id: - return _abi_encode(x, ensure_tuple=False) + return abi_encode(x, ensure_tuple=False) - return _abi_encode(x, ensure_tuple=False, method_id=0xdeadbeef) + return abi_encode(x, ensure_tuple=False, method_id=0xdeadbeef) """ c = get_contract(code) @@ -129,7 +129,7 @@ def test_abi_encode_length_failing(get_contract, assert_compile_failed, type, va @internal def foo(): x: WrappedBytes = WrappedBytes(bs={value}) - y: {type}[96] = _abi_encode(x, ensure_tuple=True) # should be Bytes[128] + y: {type}[96] = abi_encode(x, ensure_tuple=True) # should be Bytes[128] """ assert_compile_failed(lambda: get_contract(code)) @@ -141,12 +141,12 @@ def test_abi_encode_dynarray(get_contract): def abi_encode(d: DynArray[uint256, 3], ensure_tuple: bool, include_method_id: bool) -> Bytes[164]: if ensure_tuple: if not include_method_id: - return _abi_encode(d) # default ensure_tuple=True - return _abi_encode(d, method_id=0xdeadbeef) + return abi_encode(d) # default ensure_tuple=True + return abi_encode(d, method_id=0xdeadbeef) else: if not include_method_id: - return _abi_encode(d, ensure_tuple=False) - return _abi_encode(d, ensure_tuple=False, method_id=0xdeadbeef) + return abi_encode(d, ensure_tuple=False) + return abi_encode(d, ensure_tuple=False, method_id=0xdeadbeef) """ c = get_contract(code) @@ -185,12 +185,12 @@ def abi_encode( ) -> Bytes[548]: if ensure_tuple: if not include_method_id: - return _abi_encode(d) # default ensure_tuple=True - return _abi_encode(d, method_id=0xdeadbeef) + return abi_encode(d) # default ensure_tuple=True + return abi_encode(d, method_id=0xdeadbeef) else: if not include_method_id: - return _abi_encode(d, ensure_tuple=False) - return _abi_encode(d, ensure_tuple=False, method_id=0xdeadbeef) + return abi_encode(d, ensure_tuple=False) + return abi_encode(d, ensure_tuple=False, method_id=0xdeadbeef) """ c = get_contract(code) @@ -236,12 +236,12 @@ def abi_encode( ) -> Bytes[1700]: if ensure_tuple: if not include_method_id: - return _abi_encode(d) # default ensure_tuple=True - return _abi_encode(d, method_id=0xdeadbeef) + return abi_encode(d) # default ensure_tuple=True + return abi_encode(d, method_id=0xdeadbeef) else: if not include_method_id: - return _abi_encode(d, ensure_tuple=False) - return _abi_encode(d, ensure_tuple=False, method_id=0xdeadbeef) + return abi_encode(d, ensure_tuple=False) + return abi_encode(d, ensure_tuple=False, method_id=0xdeadbeef) """ c = get_contract(code) @@ -281,7 +281,7 @@ def get_counter() -> (uint256, String[6]): nonpayable @external def foo(addr: address) -> Bytes[164]: - return _abi_encode(extcall Foo(addr).get_counter(), method_id=0xdeadbeef) + return abi_encode(extcall Foo(addr).get_counter(), method_id=0xdeadbeef) """ c2 = get_contract(contract_2) @@ -300,7 +300,7 @@ def test_abi_encode_private(get_contract): bytez: Bytes[96] @internal def _foo(bs: Bytes[32]): - self.bytez = _abi_encode(bs) + self.bytez = abi_encode(bs) @external def foo(bs: Bytes[32]) -> (uint256, Bytes[96]): @@ -318,7 +318,7 @@ def test_abi_encode_private_dynarray(get_contract): bytez: Bytes[160] @internal def _foo(bs: DynArray[uint256, 3]): - self.bytez = _abi_encode(bs) + self.bytez = abi_encode(bs) @external def foo(bs: DynArray[uint256, 3]) -> (uint256, Bytes[160]): dont_clobber_me: uint256 = max_value(uint256) @@ -335,7 +335,7 @@ def test_abi_encode_private_nested_dynarray(get_contract): bytez: Bytes[1696] @internal def _foo(bs: DynArray[DynArray[DynArray[uint256, 3], 3], 3]): - self.bytez = _abi_encode(bs) + self.bytez = abi_encode(bs) @external def foo(bs: DynArray[DynArray[DynArray[uint256, 3], 3], 3]) -> (uint256, Bytes[1696]): @@ -358,9 +358,9 @@ def test_abi_encode_empty_string(get_contract, empty_literal): @external def foo(ensure_tuple: bool) -> Bytes[96]: if ensure_tuple: - return _abi_encode({empty_literal}) # default ensure_tuple=True + return abi_encode({empty_literal}) # default ensure_tuple=True else: - return _abi_encode({empty_literal}, ensure_tuple=False) + return abi_encode({empty_literal}, ensure_tuple=False) """ c = get_contract(code) diff --git a/vyper/ast/grammar.lark b/vyper/ast/grammar.lark index 3feb4df92f..97f9f70e24 100644 --- a/vyper/ast/grammar.lark +++ b/vyper/ast/grammar.lark @@ -297,7 +297,7 @@ call: atom_expr "(" [arguments] ")" empty: "empty" "(" type ")" // special rule to handle types as "arguments" (for `_abi_decode` builtin) -abi_decode: "_abi_decode" "(" arg "," type ( "," kwarg )* ")" +abi_decode: ("_abi_decode" | "abi_decode") "(" arg "," type ( "," kwarg )* ")" special_builtins: empty | abi_decode diff --git a/vyper/builtins/functions.py b/vyper/builtins/functions.py index d4c83b2bda..1944d32125 100644 --- a/vyper/builtins/functions.py +++ b/vyper/builtins/functions.py @@ -2331,7 +2331,7 @@ def build_IR(self, expr, args, kwargs, context): class ABIEncode(BuiltinFunctionT): - _id = "_abi_encode" # TODO prettier to rename this to abi.encode + _id = "abi_encode" # signature: *, ensure_tuple= -> Bytes[] # explanation of ensure_tuple: # default is to force even a single value into a tuple, @@ -2452,7 +2452,7 @@ def build_IR(self, expr, args, kwargs, context): class ABIDecode(BuiltinFunctionT): - _id = "_abi_decode" + _id = "abi_decode" _inputs = [("data", BytesT.any()), ("output_type", TYPE_T.any())] _kwargs = {"unwrap_tuple": KwargSettings(BoolT(), True, require_literal=True)} @@ -2541,6 +2541,28 @@ def build_IR(self, expr, args, kwargs, context): return b1.resolve(ret) +class OldABIEncode(ABIEncode): + _warned = False + _id = "_abi_encode" + + def _try_fold(self, node): + if not self.__class__._warned: + vyper_warn(f"`{self._id}()` is deprecated! Please use `{super()._id}()` instead.", node) + self.__class__._warned = True + super()._try_fold(node) + + +class OldABIDecode(ABIDecode): + _warned = False + _id = "_abi_decode" + + def _try_fold(self, node): + if not self.__class__._warned: + vyper_warn(f"`{self._id}()` is deprecated! Please use `{super()._id}()` instead.", node) + self.__class__._warned = True + super()._try_fold(node) + + class _MinMaxValue(TypenameFoldedFunctionT): def _try_fold(self, node): self._validate_arg_types(node) @@ -2593,8 +2615,10 @@ def _try_fold(self, node): DISPATCH_TABLE = { - "_abi_encode": ABIEncode(), - "_abi_decode": ABIDecode(), + "abi_encode": ABIEncode(), + "abi_decode": ABIDecode(), + "_abi_encode": OldABIEncode(), + "_abi_decode": OldABIDecode(), "floor": Floor(), "ceil": Ceil(), "convert": Convert(), From 21f7172274e551c721e9e35ab3c9d8322a2455d0 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 11 Jun 2024 14:04:33 -0700 Subject: [PATCH 24/53] fix[codegen]: recursive dynarray oob check (#4091) this commit fixes more edge cases in `abi_decode` dynarray validation. these are bugs which were missed (or regressions) in 1f6b9433fbd524, which itself was a continuation of eb011367cc769. there are multiple fixes contained in this commit. - similar conceptual error as in 1f6b9433fbd524. when the length word is out-of-bounds and its runtime is value is zero, `make_setter` does not enter recursion and therefore there is no oob check. an example payload which demonstrates this is in `test_nested_invalid_dynarray_head()`. the fix is to check the size of the static section ("embedded static size") before entering the recursion, rather than child_type.static_size (which could be zero). essentially, this checks that the end of the static section is in bounds, rather than the beginning. - the fallback case in `complex_make_setter` could be referring to a tuple of dynamic types, which makes the tuple itself dynamic, so there needs to be an oob check there as well. - `static_size()` is more appropriate than `min_size()` for abi payload validation, because you can have "valid" ABI payloads where the runtime length of the dynamic section is zero, because the heads in the static section all point back into the static section. this commit replaces the `static_size()` check with `min_size()` check, everywhere. - remove `returndatasize` check in external calls, because it gets checked anyways during `make_setter` oob checks. - add a comment clarifying that payloads larger than `size_bound()` get rejected by `abi_decode` but not calldata decoding. tests for each case, contributed by @trocher --------- Co-authored-by: trocher --- .../builtins/codegen/test_abi_decode.py | 98 +++++++++++++++++++ .../test_external_contract_calls.py | 8 +- vyper/builtins/functions.py | 6 +- vyper/codegen/core.py | 15 ++- vyper/codegen/external_call.py | 12 +-- .../function_definitions/external_function.py | 2 +- 6 files changed, 119 insertions(+), 22 deletions(-) diff --git a/tests/functional/builtins/codegen/test_abi_decode.py b/tests/functional/builtins/codegen/test_abi_decode.py index d77bb1b5ae..5773636add 100644 --- a/tests/functional/builtins/codegen/test_abi_decode.py +++ b/tests/functional/builtins/codegen/test_abi_decode.py @@ -1323,3 +1323,101 @@ def run(x: Bytes[2 * 32 + 3 * 32 + 3 * 32 * 4]): with tx_failed(): c.run(data) + + +def test_nested_invalid_dynarray_head(get_contract, tx_failed): + code = """ +@nonpayable +@external +def foo(x:Bytes[320]): + if True: + a: Bytes[320-32] = b'' + + # make the word following the buffer x_mem dirty to make a potential + # OOB revert + fake_head: uint256 = 32 + x_mem: Bytes[320] = x + + y: DynArray[DynArray[uint256, 2], 2] = _abi_decode(x_mem,DynArray[DynArray[uint256, 2], 2]) + +@nonpayable +@external +def bar(x:Bytes[320]): + x_mem: Bytes[320] = x + + y:DynArray[DynArray[uint256, 2], 2] = _abi_decode(x_mem,DynArray[DynArray[uint256, 2], 2]) + """ + c = get_contract(code) + + encoded = (0x20, 0x02) # head of the dynarray # len of outer + inner = ( + 0x0, # head1 + # 0x0, # head2 + ) + + encoded = _abi_payload_from_tuple(encoded + inner) + with tx_failed(): + c.foo(encoded) # revert + with tx_failed(): + c.bar(encoded) # return [[],[]] + + +def test_static_outer_type_invalid_heads(get_contract, tx_failed): + code = """ +@nonpayable +@external +def foo(x:Bytes[320]): + x_mem: Bytes[320] = x + y:DynArray[uint256, 2][2] = _abi_decode(x_mem,DynArray[uint256, 2][2]) + +@nonpayable +@external +def bar(x:Bytes[320]): + if True: + a: Bytes[160] = b'' + # write stuff here to make the call revert in case decode do + # an out of bound access: + fake_head: uint256 = 32 + x_mem: Bytes[320] = x + y:DynArray[uint256, 2][2] = _abi_decode(x_mem,DynArray[uint256, 2][2]) + """ + c = get_contract(code) + + encoded = (0x20,) # head of the static array + inner = ( + 0x00, # head of the first dynarray + # 0x00, # head of the second dynarray + ) + + encoded = _abi_payload_from_tuple(encoded + inner) + + with tx_failed(): + c.foo(encoded) + with tx_failed(): + c.bar(encoded) + + +def test_abi_decode_max_size(get_contract, tx_failed): + # test case where the payload is "too large" than the max size + # of abi encoding the type. this can happen when the payload is + # "sparse" and has garbage bytes in between the static and dynamic + # sections + code = """ +@external +def foo(a:Bytes[1000]): + v: DynArray[uint256, 1] = _abi_decode(a,DynArray[uint256, 1]) + """ + c = get_contract(code) + + payload = ( + 0xA0, # head + 0x00, # garbage + 0x00, # garbage + 0x00, # garbage + 0x00, # garbage + 0x01, # len + 0x12, # elem1 + ) + + with tx_failed(): + c.foo(_abi_payload_from_tuple(payload)) diff --git a/tests/functional/codegen/calling_convention/test_external_contract_calls.py b/tests/functional/codegen/calling_convention/test_external_contract_calls.py index 2192e6b6ab..e6616a1481 100644 --- a/tests/functional/codegen/calling_convention/test_external_contract_calls.py +++ b/tests/functional/codegen/calling_convention/test_external_contract_calls.py @@ -2519,13 +2519,13 @@ def foo(a: DynArray[{typ}, 3], b: String[5]): encoded = abi.encode(f"({typ}[],string)", val).hex() data = f"0x{sig}{encoded}" - # Dynamic size is short by 1 byte - malformed = data[:264] + # Static size is short by 1 byte + malformed = data[:136] with tx_failed(): env.message_call(c1.address, data=malformed) - # Dynamic size is at least minimum (132 bytes * 2 + 2 (for 0x) = 266) - valid = data[:266] + # Static size is at least minimum ((4 + 64) bytes * 2 + 2 (for 0x) = 138) + valid = data[:138] env.message_call(c1.address, data=valid) diff --git a/vyper/builtins/functions.py b/vyper/builtins/functions.py index 1944d32125..2564329b65 100644 --- a/vyper/builtins/functions.py +++ b/vyper/builtins/functions.py @@ -2482,7 +2482,7 @@ def build_IR(self, expr, args, kwargs, context): wrapped_typ = calculate_type_for_external_return(output_typ) abi_size_bound = wrapped_typ.abi_type.size_bound() - abi_min_size = wrapped_typ.abi_type.min_size() + abi_min_size = wrapped_typ.abi_type.static_size() # Get the size of data input_max_len = data.typ.maxlen @@ -2506,6 +2506,10 @@ def build_IR(self, expr, args, kwargs, context): ret = ["seq"] + # NOTE: we could replace these 4 lines with + # `[assert [le, abi_min_size, data_len]]`. it depends on + # what we consider a "valid" payload. + # cf. test_abi_decode_max_size() if abi_min_size == abi_size_bound: ret.append(["assert", ["eq", abi_min_size, data_len]]) else: diff --git a/vyper/codegen/core.py b/vyper/codegen/core.py index 5d4621518f..ff0f801d74 100644 --- a/vyper/codegen/core.py +++ b/vyper/codegen/core.py @@ -895,10 +895,7 @@ def _abi_payload_size(ir_node): # the amount of size each value occupies in static section # (the amount of size it occupies in the dynamic section is handled in # make_setter recursion) - item_size = ir_node.typ.value_type.abi_type.static_size() - if item_size == 0: - # manual optimization; the mload cannot currently be optimized out - return ["add", OFFSET, 0] + item_size = ir_node.typ.value_type.abi_type.embedded_static_size() return ["add", OFFSET, ["mul", get_dyn_array_count(ir_node), item_size]] if isinstance(ir_node.typ, _BytestringT): @@ -982,7 +979,15 @@ def make_setter(left, right, hi=None): # Complex Types assert isinstance(left.typ, (SArrayT, TupleT, StructT)) - return _complex_make_setter(left, right, hi=hi) + with right.cache_when_complex("c_right") as (b1, right): + ret = ["seq"] + if hi is not None: + item_end = add_ofst(right, right.typ.abi_type.static_size()) + len_check = ["assert", ["le", item_end, hi]] + ret.append(len_check) + + ret.append(_complex_make_setter(left, right, hi=hi)) + return b1.resolve(IRnode.from_list(ret)) # locations with no dedicated copy opcode diff --git a/vyper/codegen/external_call.py b/vyper/codegen/external_call.py index f3cd4e7a44..b6ac180722 100644 --- a/vyper/codegen/external_call.py +++ b/vyper/codegen/external_call.py @@ -86,9 +86,8 @@ def _unpack_returndata(buf, fn_type, call_kwargs, contract_address, context, exp abi_return_t = wrapped_return_t.abi_type - min_return_size = abi_return_t.min_size() max_return_size = abi_return_t.size_bound() - assert 0 < min_return_size <= max_return_size + assert 0 <= max_return_size ret_ofst = buf ret_len = max_return_size @@ -103,15 +102,6 @@ def _unpack_returndata(buf, fn_type, call_kwargs, contract_address, context, exp unpacker = ["seq"] - # revert when returndatasize is not in bounds - # (except when return_override is provided.) - if not call_kwargs.skip_contract_check: - assertion = IRnode.from_list( - ["assert", ["ge", "returndatasize", min_return_size]], - error_msg="returndatasize too small", - ) - unpacker.append(assertion) - assert isinstance(wrapped_return_t, TupleT) # unpack strictly diff --git a/vyper/codegen/function_definitions/external_function.py b/vyper/codegen/function_definitions/external_function.py index a9b4a93025..4c733ee851 100644 --- a/vyper/codegen/function_definitions/external_function.py +++ b/vyper/codegen/function_definitions/external_function.py @@ -84,7 +84,7 @@ def handler_for(calldata_kwargs, default_kwargs): # ensure calldata is at least of minimum length args_abi_t = calldata_args_t.abi_type - calldata_min_size = args_abi_t.min_size() + 4 + calldata_min_size = args_abi_t.static_size() + 4 # TODO optimize make_setter by using # TupleT(list(arg.typ for arg in calldata_kwargs + default_kwargs)) From 7770967ff8346430d699325fed3c02744e0100a9 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 12 Jun 2024 05:35:55 -0700 Subject: [PATCH 25/53] fix[codegen]: `make_setter` overlap in the presence of `staticcall` (#4128) this commit fixes another overlap bug in `make_setter`. this is a variant of the fixes in ad9c10b0b98e2d and 1c8349e867b2b3, specifically fixing an oversight in ad9c10b0b98e2d - when there is a `staticcall` contained inside of `make_setter`, there can still be src/dst overlap, due to read-only re-entrancy(!). this commit adds `staticcall` to the list of "risky call" opcodes, and adds a poc test case (contributed by @trocher). --------- Co-authored-by: trocher --- .../test_external_contract_calls.py | 35 +++++++++++++++++++ vyper/codegen/ir_node.py | 2 +- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/tests/functional/codegen/calling_convention/test_external_contract_calls.py b/tests/functional/codegen/calling_convention/test_external_contract_calls.py index e6616a1481..f9252f0a99 100644 --- a/tests/functional/codegen/calling_convention/test_external_contract_calls.py +++ b/tests/functional/codegen/calling_convention/test_external_contract_calls.py @@ -2582,3 +2582,38 @@ def boo(): c = get_contract(code) assert c.foo() == [1, 2, 3, 4] + + +def test_make_setter_staticcall(get_contract): + # variant of GH #3503 + code = """ +interface A: + def boo() -> uint256 : view +interface B: + def boo() -> uint256 : nonpayable + +a: DynArray[uint256, 10] + +@external +def foo() -> DynArray[uint256, 10]: + self.a = [3, 0, 0] + self.a = [1, 2, staticcall A(self).boo(), 4] + return self.a # bug returns [1, 2, 1, 4] + +@external +def bar() -> DynArray[uint256, 10]: + self.a = [3, 0, 0] + self.a = [1, 2, extcall B(self).boo(), 4] + return self.a # returns [1, 2, 3, 4] + + +@external +@view +# @nonpayable +def boo() -> uint256: + return self.a[0] + """ + c = get_contract(code) + + assert c.foo() == [1, 2, 3, 4] + assert c.bar() == [1, 2, 3, 4] diff --git a/vyper/codegen/ir_node.py b/vyper/codegen/ir_node.py index 3a9540f2cc..9d39ebd033 100644 --- a/vyper/codegen/ir_node.py +++ b/vyper/codegen/ir_node.py @@ -468,7 +468,7 @@ def referenced_variables(self): @cached_property def contains_risky_call(self): - ret = self.value in ("call", "delegatecall", "create", "create2") + ret = self.value in ("call", "delegatecall", "staticcall", "create", "create2") for arg in self.args: ret |= arg.contains_risky_call From 153262bfb690cdb3c367d5961c0b70a670a646a8 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 12 Jun 2024 23:44:42 +0300 Subject: [PATCH 26/53] fix[venom]: clear `out_vars` during calculation (#4129) Clear `out_vars` set at start of calculations When computing `out_vars` for each basic block, we perform set unions of variables expected by all successor basic blocks of the current block. This commit clears the `out_vars` set at the beginning of the calculations. --------- Co-authored-by: Charles Cooper --- vyper/venom/analysis/liveness.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vyper/venom/analysis/liveness.py b/vyper/venom/analysis/liveness.py index ac06ff4dae..5d1ac488f1 100644 --- a/vyper/venom/analysis/liveness.py +++ b/vyper/venom/analysis/liveness.py @@ -54,7 +54,8 @@ def _calculate_out_vars(self, bb: IRBasicBlock) -> bool: Compute out_vars of basic block. Returns True if out_vars changed """ - out_vars = bb.out_vars.copy() + out_vars = bb.out_vars + bb.out_vars = OrderedSet() for out_bb in bb.cfg_out: target_vars = self.input_vars_from(bb, out_bb) bb.out_vars = bb.out_vars.union(target_vars) From e9e9d78a2bb8eeb693709b56c8152df5f8af2efa Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 13 Jun 2024 08:29:10 -0700 Subject: [PATCH 27/53] fix[ux]: improve initializer hint for unimported modules (#4145) improve initializer hint in the case that the needed module is not already imported --- tests/functional/syntax/modules/test_initializers.py | 5 +++-- vyper/semantics/analysis/module.py | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/functional/syntax/modules/test_initializers.py b/tests/functional/syntax/modules/test_initializers.py index 624904e45f..ead0fbcf6b 100644 --- a/tests/functional/syntax/modules/test_initializers.py +++ b/tests/functional/syntax/modules/test_initializers.py @@ -1273,7 +1273,7 @@ def foo(): assert e.value._hint == "add `lib3 := lib3` to its initializer list" -def test_hint_for_missing_initializer_when_no_import(make_input_bundle): +def test_hint_for_missing_initializer_when_no_import(make_input_bundle, chdir_tmp_path): lib1 = """ counter: uint256 """ @@ -1297,7 +1297,8 @@ def foo(): with pytest.raises(InitializerException) as e: compile_code(main, input_bundle=input_bundle) assert e.value._message == "`lib2` uses `lib1`, but it is not initialized with `lib1`" - assert e.value._hint == "try importing lib1 first" + hint = "try importing `lib1` first (located at `lib1.vy`)" + assert e.value._hint == hint @pytest.fixture diff --git a/vyper/semantics/analysis/module.py b/vyper/semantics/analysis/module.py index d0b019db7a..d6bbea1b48 100644 --- a/vyper/semantics/analysis/module.py +++ b/vyper/semantics/analysis/module.py @@ -514,7 +514,8 @@ def visit_InitializesDecl(self, node): break if rhs is None: - hint = f"try importing {item.alias} first" + hint = f"try importing `{item.alias}` first " + hint += f"(located at `{item.module_t._module.path}`)" elif not isinstance(annotation, vy_ast.Subscript): # it's `initializes: foo` instead of `initializes: foo[...]` hint = f"did you mean {module_ref.id}[{lhs} := {rhs}]?" From 44bb281ccaac89dc3bd66030702473c386bceae6 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 13 Jun 2024 09:22:58 -0700 Subject: [PATCH 28/53] fix[codegen]: add back in `returndatasize` check (#4144) add back in `returndatasize` check for external calls in the case that `make_setter()` is not called (i.e. when `needs_clamp()` is `True`). the check was removed (i.e. there was a regression) in 21f7172274e test case and poc contributed by @cyberthirst --------- Co-authored-by: cyberthirst --- .../builtins/codegen/test_abi_decode.py | 20 +++++++++++ vyper/abi_types.py | 25 -------------- vyper/codegen/external_call.py | 33 ++++++++++++++++--- 3 files changed, 48 insertions(+), 30 deletions(-) diff --git a/tests/functional/builtins/codegen/test_abi_decode.py b/tests/functional/builtins/codegen/test_abi_decode.py index 5773636add..9ae869c9cc 100644 --- a/tests/functional/builtins/codegen/test_abi_decode.py +++ b/tests/functional/builtins/codegen/test_abi_decode.py @@ -1421,3 +1421,23 @@ def foo(a:Bytes[1000]): with tx_failed(): c.foo(_abi_payload_from_tuple(payload)) + + +# returndatasize check for uint256 +def test_returndatasize_check(get_contract, tx_failed): + code = """ +@external +def bar(): + pass + +interface A: + def bar() -> uint256: nonpayable + +@external +def run() -> uint256: + return extcall A(self).bar() + """ + c = get_contract(code) + + with tx_failed(): + c.run() diff --git a/vyper/abi_types.py b/vyper/abi_types.py index 24d6fe866a..a95930b16d 100644 --- a/vyper/abi_types.py +++ b/vyper/abi_types.py @@ -24,11 +24,6 @@ def embedded_dynamic_size_bound(self): return 0 return self.size_bound() - def embedded_min_dynamic_size(self): - if not self.is_dynamic(): - return 0 - return self.min_size() - # size (in bytes) of the static section def static_size(self): raise NotImplementedError("ABIType.static_size") @@ -42,14 +37,6 @@ def dynamic_size_bound(self): def size_bound(self): return self.static_size() + self.dynamic_size_bound() - def min_size(self): - return self.static_size() + self.min_dynamic_size() - - def min_dynamic_size(self): - if not self.is_dynamic(): - return 0 - raise NotImplementedError("ABIType.min_dynamic_size") - # The canonical name of the type for calculating the function selector def selector_name(self): raise NotImplementedError("ABIType.selector_name") @@ -158,9 +145,6 @@ def static_size(self): def dynamic_size_bound(self): return self.m_elems * self.subtyp.embedded_dynamic_size_bound() - def min_dynamic_size(self): - return self.m_elems * self.subtyp.embedded_min_dynamic_size() - def selector_name(self): return f"{self.subtyp.selector_name()}[{self.m_elems}]" @@ -187,9 +171,6 @@ def dynamic_size_bound(self): # length word + data return 32 + ceil32(self.bytes_bound) - def min_dynamic_size(self): - return 32 - def selector_name(self): return "bytes" @@ -222,9 +203,6 @@ def dynamic_size_bound(self): # length + size of embedded children return 32 + subtyp_size * self.elems_bound - def min_dynamic_size(self): - return 32 - def selector_name(self): return f"{self.subtyp.selector_name()}[]" @@ -245,9 +223,6 @@ def static_size(self): def dynamic_size_bound(self): return sum([t.embedded_dynamic_size_bound() for t in self.subtyps]) - def min_dynamic_size(self): - return sum([t.embedded_min_dynamic_size() for t in self.subtyps]) - def is_complex_type(self): return True diff --git a/vyper/codegen/external_call.py b/vyper/codegen/external_call.py index b6ac180722..72fff5378f 100644 --- a/vyper/codegen/external_call.py +++ b/vyper/codegen/external_call.py @@ -86,8 +86,9 @@ def _unpack_returndata(buf, fn_type, call_kwargs, contract_address, context, exp abi_return_t = wrapped_return_t.abi_type + min_return_size = abi_return_t.static_size() max_return_size = abi_return_t.size_bound() - assert 0 <= max_return_size + assert 0 < min_return_size <= max_return_size ret_ofst = buf ret_len = max_return_size @@ -105,11 +106,35 @@ def _unpack_returndata(buf, fn_type, call_kwargs, contract_address, context, exp assert isinstance(wrapped_return_t, TupleT) # unpack strictly - if needs_clamp(wrapped_return_t, encoding): + if not needs_clamp(wrapped_return_t, encoding): + # revert when returndatasize is not in bounds, except when + # skip_contract_check is enabled. + # NOTE: there is an optimization here: when needs_clamp is True, + # make_setter (implicitly) checks returndatasize during abi + # decoding. + # since make_setter is not called in this branch, we need to check + # returndatasize here, but we avoid a redundant check by only doing + # the returndatasize check inside of this branch (and not in the + # `needs_clamp==True` branch). + # in the future, this check could be moved outside of the branch, and + # instead rely on the optimizer to optimize out the redundant check, + # it would need the optimizer to do algebraic reductions (along the + # lines of `a>b and b>c and a>c` reduced to `a>b and b>c`). + # another thing we could do instead once we have the machinery is to + # simply always use make_setter instead of having this assertion, and + # rely on memory analyser to optimize out the memory movement. + if not call_kwargs.skip_contract_check: + assertion = IRnode.from_list( + ["assert", ["ge", "returndatasize", min_return_size]], + error_msg="returndatasize too small", + ) + unpacker.append(assertion) + return_buf = buf + + else: return_buf = context.new_internal_variable(wrapped_return_t) # note: make_setter does ABI decoding and clamps - payload_bound = IRnode.from_list( ["select", ["lt", ret_len, "returndatasize"], ret_len, "returndatasize"] ) @@ -117,8 +142,6 @@ def _unpack_returndata(buf, fn_type, call_kwargs, contract_address, context, exp unpacker.append( b1.resolve(make_setter(return_buf, buf, hi=add_ofst(buf, payload_bound))) ) - else: - return_buf = buf if call_kwargs.default_return_value is not None: # if returndatasize == 0: From 69e5c0541a9b23ad1b085e0a89b545124716b516 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 14 Jun 2024 13:31:45 -0700 Subject: [PATCH 29/53] feat[test]: implement `abi_decode` spec test (#4095) this commit implements a spec-based differential fuzzer for `abi_decode`. it introduces several components: - a "spec" implementation of `abi_decode`, which is how vyper's abi_decode should behave on a given payload, implemented in python - a hypothesis strategy to draw vyper types - hypothesis strategy to create valid data for a given vyper type - a hypothesis strategy to _mutate_ a given payload which is designed to introduce faults in the decoder. testing indicated splicing pointers into the payload - either valid pointers or "nearly" valid pointers - had the highest success rate for finding bugs in the decoder. the intuition here is that the most difficult part of the decoder is validating out-of-bound pointers in the payload, so pointers represent "semantically high-value" data to the fuzzer. - some hypothesis tuning to ensure a good distribution of types over several days of testing+tuning, this fuzzer independently found the bugs fixed in 44bb281ccaa and 21f7172274e (which were originally found by manual review). --- tests/conftest.py | 2 +- tests/evm_backends/base_env.py | 12 +- tests/evm_backends/revm_env.py | 2 + .../functional/builtins/codegen/abi_decode.py | 148 +++++++ .../builtins/codegen/test_abi_decode_fuzz.py | 416 ++++++++++++++++++ vyper/codegen/core.py | 12 +- vyper/semantics/types/subscriptable.py | 5 +- 7 files changed, 592 insertions(+), 5 deletions(-) create mode 100644 tests/functional/builtins/codegen/abi_decode.py create mode 100644 tests/functional/builtins/codegen/test_abi_decode_fuzz.py diff --git a/tests/conftest.py b/tests/conftest.py index 4b3d90f65a..31c72246bd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -196,7 +196,7 @@ def env(gas_limit, evm_version, evm_backend, tracing, account_keys) -> BaseEnv: ) -@pytest.fixture +@pytest.fixture(scope="module") def get_contract_from_ir(env, optimize): def ir_compiler(ir, *args, **kwargs): ir = IRnode.from_list(ir) diff --git a/tests/evm_backends/base_env.py b/tests/evm_backends/base_env.py index a8ab4d2367..1ea3dba328 100644 --- a/tests/evm_backends/base_env.py +++ b/tests/evm_backends/base_env.py @@ -30,7 +30,7 @@ class ExecutionResult: gas_used: int -class EvmError(RuntimeError): +class EvmError(Exception): """Exception raised when a call fails.""" @@ -205,6 +205,16 @@ def out_of_gas_error(self) -> str: """Expected error message when user runs out of gas""" raise NotImplementedError # must be implemented by subclasses + @property + def contract_size_limit_error(self) -> str: + """Expected error message when contract is over codesize limit""" + raise NotImplementedError # must be implemented by subclasses + + @property + def initcode_size_limit_error(self) -> str: + """Expected error message when contract is over codesize limit""" + raise NotImplementedError # must be implemented by subclasses + def _compile( source_code: str, diff --git a/tests/evm_backends/revm_env.py b/tests/evm_backends/revm_env.py index 5c8b8aba08..d5a7570f96 100644 --- a/tests/evm_backends/revm_env.py +++ b/tests/evm_backends/revm_env.py @@ -11,6 +11,8 @@ class RevmEnv(BaseEnv): invalid_opcode_error = "InvalidFEOpcode" out_of_gas_error = "OutOfGas" + contract_size_limit_error = "CreateContractSizeLimit" + initcode_size_limit_error = "CreateInitCodeSizeLimit" def __init__( self, diff --git a/tests/functional/builtins/codegen/abi_decode.py b/tests/functional/builtins/codegen/abi_decode.py new file mode 100644 index 0000000000..9e10b862d5 --- /dev/null +++ b/tests/functional/builtins/codegen/abi_decode.py @@ -0,0 +1,148 @@ +from typing import TYPE_CHECKING, Iterable + +from eth_utils import to_checksum_address + +from vyper.abi_types import ( + ABI_Address, + ABI_Bool, + ABI_Bytes, + ABI_BytesM, + ABI_DynamicArray, + ABI_GIntM, + ABI_StaticArray, + ABI_String, + ABI_Tuple, + ABIType, +) +from vyper.utils import int_bounds, unsigned_to_signed + +if TYPE_CHECKING: + from vyper.semantics.types import VyperType + + +class DecodeError(Exception): + pass + + +def _strict_slice(payload, start, length): + if start < 0: + raise DecodeError(f"OOB {start}") + + end = start + length + if end > len(payload): + raise DecodeError(f"OOB {start} + {length} (=={end}) > {len(payload)}") + return payload[start:end] + + +def _read_int(payload, ofst): + return int.from_bytes(_strict_slice(payload, ofst, 32)) + + +# vyper abi_decode spec implementation +def spec_decode(typ: "VyperType", payload: bytes): + abi_t = typ.abi_type + + lo, hi = abi_t.static_size(), abi_t.size_bound() + if not (lo <= len(payload) <= hi): + raise DecodeError(f"bad payload size {lo}, {len(payload)}, {hi}") + + return _decode_r(abi_t, 0, payload) + + +def _decode_r(abi_t: ABIType, current_offset: int, payload: bytes): + if isinstance(abi_t, ABI_Tuple): + return tuple(_decode_multi_r(abi_t.subtyps, current_offset, payload)) + + if isinstance(abi_t, ABI_StaticArray): + n = abi_t.m_elems + subtypes = [abi_t.subtyp] * n + return _decode_multi_r(subtypes, current_offset, payload) + + if isinstance(abi_t, ABI_DynamicArray): + bound = abi_t.elems_bound + + n = _read_int(payload, current_offset) + if n > bound: + raise DecodeError("Dynarray too large") + + # offsets in dynarray start from after the length word + current_offset += 32 + subtypes = [abi_t.subtyp] * n + return _decode_multi_r(subtypes, current_offset, payload) + + # sanity check + assert not abi_t.is_complex_type() + + if isinstance(abi_t, ABI_Bytes): + bound = abi_t.bytes_bound + length = _read_int(payload, current_offset) + if length > bound: + raise DecodeError("bytes too large") + + current_offset += 32 # size of length word + ret = _strict_slice(payload, current_offset, length) + + # abi string doesn't actually define string decoder, so we + # just bytecast the output + if isinstance(abi_t, ABI_String): + # match eth-stdlib, since that's what we check against + ret = ret.decode(errors="surrogateescape") + + return ret + + # sanity check + assert not abi_t.is_dynamic() + + if isinstance(abi_t, ABI_GIntM): + ret = _read_int(payload, current_offset) + + # handle signedness + if abi_t.signed: + ret = unsigned_to_signed(ret, 256, strict=True) + + # bounds check + lo, hi = int_bounds(signed=abi_t.signed, bits=abi_t.m_bits) + if not (lo <= ret <= hi): + u = "" if abi_t.signed else "u" + raise DecodeError(f"invalid {u}int{abi_t.m_bits}") + + if isinstance(abi_t, ABI_Address): + return to_checksum_address(ret.to_bytes(20, "big")) + + if isinstance(abi_t, ABI_Bool): + if ret not in (0, 1): + raise DecodeError("invalid bool") + return ret + + return ret + + if isinstance(abi_t, ABI_BytesM): + ret = _strict_slice(payload, current_offset, 32) + m = abi_t.m_bytes + assert 1 <= m <= 32 # internal sanity check + # BytesM is right-padded with zeroes + if ret[m:] != b"\x00" * (32 - m): + raise DecodeError(f"invalid bytes{m}") + return ret[:m] + + raise RuntimeError("unreachable") + + +def _decode_multi_r(types: Iterable[ABIType], outer_offset: int, payload: bytes) -> list: + ret = [] + static_ofst = outer_offset + + for sub_t in types: + if sub_t.is_dynamic(): + # "head" terminology from abi spec + head = _read_int(payload, static_ofst) + ofst = outer_offset + head + else: + ofst = static_ofst + + item = _decode_r(sub_t, ofst, payload) + + ret.append(item) + static_ofst += sub_t.embedded_static_size() + + return ret diff --git a/tests/functional/builtins/codegen/test_abi_decode_fuzz.py b/tests/functional/builtins/codegen/test_abi_decode_fuzz.py new file mode 100644 index 0000000000..d12b2cde7e --- /dev/null +++ b/tests/functional/builtins/codegen/test_abi_decode_fuzz.py @@ -0,0 +1,416 @@ +from dataclasses import dataclass + +import hypothesis as hp +import hypothesis.strategies as st +import pytest +from eth.codecs import abi + +from tests.evm_backends.base_env import EvmError +from vyper.codegen.core import calculate_type_for_external_return, needs_external_call_wrap +from vyper.semantics.types import ( + AddressT, + BoolT, + BytesM_T, + BytesT, + DArrayT, + DecimalT, + HashMapT, + IntegerT, + SArrayT, + StringT, + TupleT, + VyperType, + _get_primitive_types, + _get_sequence_types, +) +from vyper.semantics.types.shortcuts import UINT256_T + +from .abi_decode import DecodeError, spec_decode + +pytestmark = pytest.mark.fuzzing + +type_ctors = [] +for t in _get_primitive_types().values(): + if t == HashMapT or t == DecimalT(): + continue + if isinstance(t, VyperType): + t = t.__class__ + if t in type_ctors: + continue + type_ctors.append(t) + +complex_static_ctors = [SArrayT, TupleT] +complex_dynamic_ctors = [DArrayT] +leaf_ctors = [t for t in type_ctors if t not in _get_sequence_types().values()] +static_leaf_ctors = [t for t in leaf_ctors if t._is_prim_word] +dynamic_leaf_ctors = [BytesT, StringT] + +MAX_MUTATIONS = 33 + + +@st.composite +# max type nesting +def vyper_type(draw, nesting=3, skip=None): + assert nesting >= 0 + + skip = skip or [] + + st_leaves = st.one_of(st.sampled_from(dynamic_leaf_ctors), st.sampled_from(static_leaf_ctors)) + st_complex = st.one_of( + st.sampled_from(complex_dynamic_ctors), st.sampled_from(complex_static_ctors) + ) + + if nesting == 0: + st_type = st_leaves + else: + st_type = st.one_of(st_complex, st_leaves) + + # filter here is a bit of a kludge, would be better to improve sampling + t = draw(st_type.filter(lambda t: t not in skip)) + + # note: maybe st.deferred is good here, we could define it with + # mutual recursion + def _go(skip=skip): + return draw(vyper_type(nesting=nesting - 1, skip=skip)) + + if t in (BytesT, StringT): + # arbitrary max_value + bound = draw(st.integers(min_value=1, max_value=1024)) + return t(bound) + + if t == SArrayT: + subtype = _go(skip=[TupleT, BytesT, StringT]) + bound = draw(st.integers(min_value=1, max_value=6)) + return t(subtype, bound) + if t == DArrayT: + subtype = _go(skip=[TupleT]) + bound = draw(st.integers(min_value=1, max_value=16)) + return t(subtype, bound) + + if t == TupleT: + # zero-length tuples are not allowed in vyper + n = draw(st.integers(min_value=1, max_value=6)) + subtypes = [_go() for _ in range(n)] + return TupleT(subtypes) + + if t in (BoolT, AddressT): + return t() + + if t == IntegerT: + signed = draw(st.booleans()) + bits = 8 * draw(st.integers(min_value=1, max_value=32)) + return t(signed, bits) + + if t == BytesM_T: + m = draw(st.integers(min_value=1, max_value=32)) + return t(m) + + raise RuntimeError("unreachable") + + +@st.composite +def data_for_type(draw, typ): + def _go(t): + return draw(data_for_type(t)) + + if isinstance(typ, TupleT): + return tuple(_go(item_t) for item_t in typ.member_types) + + if isinstance(typ, SArrayT): + return [_go(typ.value_type) for _ in range(typ.length)] + + if isinstance(typ, DArrayT): + n = draw(st.integers(min_value=0, max_value=typ.length)) + return [_go(typ.value_type) for _ in range(n)] + + if isinstance(typ, StringT): + # technically the ABI spec doesn't say string has to be valid utf-8, + # but eth-stdlib won't encode invalid utf-8 + return draw(st.text(max_size=typ.length)) + + if isinstance(typ, BytesT): + return draw(st.binary(max_size=typ.length)) + + if isinstance(typ, IntegerT): + lo, hi = typ.ast_bounds + return draw(st.integers(min_value=lo, max_value=hi)) + + if isinstance(typ, BytesM_T): + return draw(st.binary(min_size=typ.length, max_size=typ.length)) + + if isinstance(typ, BoolT): + return draw(st.booleans()) + + if isinstance(typ, AddressT): + ret = draw(st.binary(min_size=20, max_size=20)) + return "0x" + ret.hex() + + raise RuntimeError("unreachable") + + +def _sort2(x, y): + if x > y: + return y, x + return x, y + + +@st.composite +def _mutate(draw, payload, max_mutations=MAX_MUTATIONS): + # do point+bulk mutations, + # add/edit/delete/splice/flip up to max_mutations. + if len(payload) == 0: + return + + ret = bytearray(payload) + + # for add/edit, the new byte is any character, but we bias it towards + # bytes already in the payload. + st_any_byte = st.integers(min_value=0, max_value=255) + payload_nonzeroes = list(x for x in payload if x != 0) + if len(payload_nonzeroes) > 0: + st_existing_byte = st.sampled_from(payload) + st_byte = st.one_of(st_existing_byte, st_any_byte) + else: + st_byte = st_any_byte + + # add, edit, delete, word, splice, flip + possible_actions = "adwww" + actions = draw(st.lists(st.sampled_from(possible_actions), max_size=MAX_MUTATIONS)) + + for action in actions: + if len(ret) == 0: + # bail out. could we maybe be smarter, like only add here? + break + + # for the mutation position, we can use any index in the payload, + # but we bias it towards indices of nonzero bytes. + st_any_ix = st.integers(min_value=0, max_value=len(ret) - 1) + nonzero_indexes = [i for i, s in enumerate(ret) if s != 0] + if len(nonzero_indexes) > 0: + st_nonzero_ix = st.sampled_from(nonzero_indexes) + st_ix = st.one_of(st_any_ix, st_nonzero_ix) + else: + st_ix = st_any_ix + + ix = draw(st_ix) + + if action == "a": + ret.insert(ix, draw(st_byte)) + elif action == "e": + ret[ix] = draw(st_byte) + elif action == "d": + ret.pop(ix) + elif action == "w": + # splice word + st_uint256 = st.integers(min_value=0, max_value=2**256 - 1) + + # valid pointers, but maybe *just* out of bounds + st_poison = st.integers(min_value=-2 * len(ret), max_value=2 * len(ret)).map( + lambda x: x % (2**256) + ) + word = draw(st.one_of(st_poison, st_uint256)) + ret[ix - 31 : ix + 1] = word.to_bytes(32) + elif action == "s": + ix2 = draw(st_ix) + ix, ix2 = _sort2(ix, ix2) + ix2 += 1 + # max splice is 64 bytes, due to MAX_BUFFER_SIZE limitation in st.binary + ix2 = ix + (ix2 % 64) + length = ix2 - ix + substr = draw(st.binary(min_size=length, max_size=length)) + ret[ix:ix2] = substr + elif action == "f": + ix2 = draw(st_ix) + ix, ix2 = _sort2(ix, ix2) + ix2 += 1 + for i in range(ix, ix2): + # flip the bits in the byte + ret[i] = 255 ^ ret[i] + else: + raise RuntimeError("unreachable") + + return bytes(ret) + + +@st.composite +def payload_from(draw, typ): + data = draw(data_for_type(typ)) + schema = typ.abi_type.selector_name() + payload = abi.encode(schema, data) + + return draw(_mutate(payload)) + + +_settings = dict( + report_multiple_bugs=False, + # verbosity=hp.Verbosity.verbose, + suppress_health_check=( + hp.HealthCheck.data_too_large, + hp.HealthCheck.too_slow, + hp.HealthCheck.large_base_example, + ), + phases=( + hp.Phase.explicit, + hp.Phase.reuse, + hp.Phase.generate, + hp.Phase.target, + # Phase.shrink, # can force long waiting for examples + # Phase.explain, # not helpful here + ), +) + + +@dataclass(frozen=True) +class _TypeStats: + nesting: int = 0 + num_dynamic_types: int = 0 # number of dynamic types in the type + breadth: int = 0 # e.g. int16[50] has higher breadth than int16[1] + width: int = 0 # size of type + + +def _type_stats(typ: VyperType) -> _TypeStats: + def _finalize(): # little trick to save re-typing the arguments + width = typ.memory_bytes_required + return _TypeStats( + nesting=nesting, num_dynamic_types=num_dynamic_types, breadth=breadth, width=width + ) + + if typ._is_prim_word: + nesting = 0 + breadth = 1 + num_dynamic_types = 0 + return _finalize() + + if isinstance(typ, (BytesT, StringT)): + nesting = 0 + breadth = 1 # idk + num_dynamic_types = 1 + return _finalize() + + if isinstance(typ, TupleT): + substats = [_type_stats(t) for t in typ.member_types] + nesting = 1 + max(s.nesting for s in substats) + breadth = max(typ.length, *[s.breadth for s in substats]) + num_dynamic_types = sum(s.num_dynamic_types for s in substats) + return _finalize() + + if isinstance(typ, DArrayT): + substat = _type_stats(typ.value_type) + nesting = 1 + substat.nesting + breadth = max(typ.count, substat.breadth) + num_dynamic_types = 1 + substat.num_dynamic_types + return _finalize() + + if isinstance(typ, SArrayT): + substat = _type_stats(typ.value_type) + nesting = 1 + substat.nesting + breadth = max(typ.count, substat.breadth) + num_dynamic_types = substat.num_dynamic_types + return _finalize() + + raise RuntimeError("unreachable") + + +@pytest.fixture(scope="module") +def payload_copier(get_contract_from_ir): + # some contract which will return the buffer passed to it + # note: hardcode the location of the bytestring + ir = [ + "with", + "length", + ["calldataload", 36], + ["seq", ["calldatacopy", 0, 68, "length"], ["return", 0, "length"]], + ] + return get_contract_from_ir(["deploy", 0, ir, 0]) + + +PARALLELISM = 1 # increase on fuzzer box + + +# NOTE: this is a heavy test. 100 types * 100 payloads per type can take +# 3-4minutes on a regular CPU core. +@pytest.mark.parametrize("_n", list(range(PARALLELISM))) +@hp.given(typ=vyper_type()) +@hp.settings(max_examples=100, **_settings) +@hp.example(typ=DArrayT(DArrayT(UINT256_T, 2), 2)) +def test_abi_decode_fuzz(_n, typ, get_contract, tx_failed, payload_copier): + # import time + # t0 = time.time() + # print("ENTER", typ) + + wrapped_type = calculate_type_for_external_return(typ) + + stats = _type_stats(typ) + # for k, v in asdict(stats).items(): + # event(k, v) + hp.target(stats.num_dynamic_types) + # hp.target(typ.abi_type.is_dynamic() + typ.abi_type.is_complex_type())) + + # add max_mutations bytes worth of padding so we don't just get caught + # by bytes length check at function entry + type_bound = wrapped_type.abi_type.size_bound() + buffer_bound = type_bound + MAX_MUTATIONS + type_str = repr(typ) # annotation in vyper code + # TODO: intrinsic decode from staticcall/extcall + # TODO: _abi_decode from other sources (staticcall/extcall?) + # TODO: dirty the buffer + # TODO: check unwrap_tuple=False + code = f""" +@external +def run(xs: Bytes[{buffer_bound}]) -> {type_str}: + ret: {type_str} = abi_decode(xs, {type_str}) + return ret + +interface Foo: + def foo(xs: Bytes[{buffer_bound}]) -> {type_str}: view # STATICCALL + def bar(xs: Bytes[{buffer_bound}]) -> {type_str}: nonpayable # CALL + +@external +def run2(xs: Bytes[{buffer_bound}], copier: Foo) -> {type_str}: + assert len(xs) <= {type_bound} + return staticcall copier.foo(xs) + +@external +def run3(xs: Bytes[{buffer_bound}], copier: Foo) -> {type_str}: + assert len(xs) <= {type_bound} + return (extcall copier.bar(xs)) + """ + c = get_contract(code) + + @hp.given(data=payload_from(wrapped_type)) + @hp.settings(max_examples=100, **_settings) + def _fuzz(data): + hp.note(f"type: {typ}") + hp.note(f"abi_t: {wrapped_type.abi_type.selector_name()}") + hp.note(code) + hp.note(data.hex()) + + try: + expected = spec_decode(wrapped_type, data) + + # unwrap if necessary + if needs_external_call_wrap(typ): + assert isinstance(expected, tuple) + (expected,) = expected + + hp.note(f"expected {expected}") + assert expected == c.run(data) + assert expected == c.run2(data, payload_copier.address) + assert expected == c.run3(data, payload_copier.address) + + except DecodeError: + # note EvmError includes reverts *and* exceptional halts. + # we can get OOG during abi decoding due to how + # `_abi_payload_size()` works + hp.note("expect failure") + with tx_failed(EvmError): + c.run(data) + with tx_failed(EvmError): + c.run2(data, payload_copier.address) + with tx_failed(EvmError): + c.run3(data, payload_copier.address) + + _fuzz() + + # t1 = time.time() + # print(f"elapsed {t1 - t0}s") diff --git a/vyper/codegen/core.py b/vyper/codegen/core.py index ff0f801d74..9a0a08097c 100644 --- a/vyper/codegen/core.py +++ b/vyper/codegen/core.py @@ -1169,8 +1169,12 @@ def clamp_bytestring(ir_node, hi=None): if hi is not None: assert t.maxlen < 2**64 # sanity check - # note: this add does not risk arithmetic overflow because + # NOTE: this add does not risk arithmetic overflow because # length is bounded by maxlen. + # however(!) _abi_payload_size can OOG, since it loads the word + # at `ir_node` to find the length of the bytearray, which could + # be out-of-bounds. + # if we didn't get OOG, we could overflow in `add`. item_end = add_ofst(ir_node, _abi_payload_size(ir_node)) len_check = ["seq", ["assert", ["le", item_end, hi]], len_check] @@ -1189,8 +1193,12 @@ def clamp_dyn_array(ir_node, hi=None): if hi is not None: assert t.count < 2**64 # sanity check - # note: this add does not risk arithmetic overflow because + # NOTE: this add does not risk arithmetic overflow because # length is bounded by count * elemsize. + # however(!) _abi_payload_size can OOG, since it loads the word + # at `ir_node` to find the length of the bytearray, which could + # be out-of-bounds. + # if we didn't get OOG, we could overflow in `add`. item_end = add_ofst(ir_node, _abi_payload_size(ir_node)) # if the subtype is dynamic, the length check is performed in diff --git a/vyper/semantics/types/subscriptable.py b/vyper/semantics/types/subscriptable.py index c392ff48b1..4068d815d2 100644 --- a/vyper/semantics/types/subscriptable.py +++ b/vyper/semantics/types/subscriptable.py @@ -334,7 +334,10 @@ def __init__(self, member_types: Tuple[VyperType, ...]) -> None: self.key_type = UINT256_T # API Compatibility def __repr__(self): - return "(" + ", ".join(repr(t) for t in self.member_types) + ")" + if len(self.member_types) == 1: + (t,) = self.member_types + return f"({t},)" + return "(" + ", ".join(f"{t}" for t in self.member_types) + ")" @property def length(self): From 2d82a74937edeed5e9d4c0c8cecd78a0d70530fa Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 17 Jun 2024 04:10:01 -0700 Subject: [PATCH 30/53] feat[test]: add more coverage to `abi_decode` fuzzer tests (#4153) fuzz with `unwrap_tuple=False` add fuzzing for structs follow up to 69e5c0541a9b23 --- .../builtins/codegen/test_abi_decode_fuzz.py | 124 +++++++++++++++--- vyper/semantics/types/user.py | 11 +- 2 files changed, 115 insertions(+), 20 deletions(-) diff --git a/tests/functional/builtins/codegen/test_abi_decode_fuzz.py b/tests/functional/builtins/codegen/test_abi_decode_fuzz.py index d12b2cde7e..e215002446 100644 --- a/tests/functional/builtins/codegen/test_abi_decode_fuzz.py +++ b/tests/functional/builtins/codegen/test_abi_decode_fuzz.py @@ -18,12 +18,12 @@ IntegerT, SArrayT, StringT, + StructT, TupleT, VyperType, _get_primitive_types, _get_sequence_types, ) -from vyper.semantics.types.shortcuts import UINT256_T from .abi_decode import DecodeError, spec_decode @@ -39,7 +39,7 @@ continue type_ctors.append(t) -complex_static_ctors = [SArrayT, TupleT] +complex_static_ctors = [SArrayT, TupleT, StructT] complex_dynamic_ctors = [DArrayT] leaf_ctors = [t for t in type_ctors if t not in _get_sequence_types().values()] static_leaf_ctors = [t for t in leaf_ctors if t._is_prim_word] @@ -50,10 +50,12 @@ @st.composite # max type nesting -def vyper_type(draw, nesting=3, skip=None): +def vyper_type(draw, nesting=3, skip=None, source_fragments=None): assert nesting >= 0 skip = skip or [] + if source_fragments is None: + source_fragments = [] st_leaves = st.one_of(st.sampled_from(dynamic_leaf_ctors), st.sampled_from(static_leaf_ctors)) st_complex = st.one_of( @@ -71,39 +73,52 @@ def vyper_type(draw, nesting=3, skip=None): # note: maybe st.deferred is good here, we could define it with # mutual recursion def _go(skip=skip): - return draw(vyper_type(nesting=nesting - 1, skip=skip)) + _, typ = draw(vyper_type(nesting=nesting - 1, skip=skip, source_fragments=source_fragments)) + return typ + + def finalize(typ): + return source_fragments, typ if t in (BytesT, StringT): # arbitrary max_value bound = draw(st.integers(min_value=1, max_value=1024)) - return t(bound) + return finalize(t(bound)) if t == SArrayT: subtype = _go(skip=[TupleT, BytesT, StringT]) bound = draw(st.integers(min_value=1, max_value=6)) - return t(subtype, bound) + return finalize(t(subtype, bound)) if t == DArrayT: subtype = _go(skip=[TupleT]) bound = draw(st.integers(min_value=1, max_value=16)) - return t(subtype, bound) + return finalize(t(subtype, bound)) if t == TupleT: # zero-length tuples are not allowed in vyper n = draw(st.integers(min_value=1, max_value=6)) subtypes = [_go() for _ in range(n)] - return TupleT(subtypes) + return finalize(TupleT(subtypes)) + + if t == StructT: + n = draw(st.integers(min_value=1, max_value=6)) + subtypes = {f"x{i}": _go() for i in range(n)} + _id = len(source_fragments) # poor man's unique id + name = f"MyStruct{_id}" + typ = StructT(name, subtypes) + source_fragments.append(typ.def_source_str()) + return finalize(StructT(name, subtypes)) if t in (BoolT, AddressT): - return t() + return finalize(t()) if t == IntegerT: signed = draw(st.booleans()) bits = 8 * draw(st.integers(min_value=1, max_value=32)) - return t(signed, bits) + return finalize(t(signed, bits)) if t == BytesM_T: m = draw(st.integers(min_value=1, max_value=32)) - return t(m) + return finalize(t(m)) raise RuntimeError("unreachable") @@ -116,6 +131,9 @@ def _go(t): if isinstance(typ, TupleT): return tuple(_go(item_t) for item_t in typ.member_types) + if isinstance(typ, StructT): + return tuple(_go(item_t) for item_t in typ.tuple_members()) + if isinstance(typ, SArrayT): return [_go(typ.value_type) for _ in range(typ.length)] @@ -294,6 +312,13 @@ def _finalize(): # little trick to save re-typing the arguments num_dynamic_types = sum(s.num_dynamic_types for s in substats) return _finalize() + if isinstance(typ, StructT): + substats = [_type_stats(t) for t in typ.tuple_members()] + nesting = 1 + max(s.nesting for s in substats) + breadth = max(len(typ.member_types), *[s.breadth for s in substats]) + num_dynamic_types = sum(s.num_dynamic_types for s in substats) + return _finalize() + if isinstance(typ, DArrayT): substat = _type_stats(typ.value_type) nesting = 1 + substat.nesting @@ -332,8 +357,8 @@ def payload_copier(get_contract_from_ir): @pytest.mark.parametrize("_n", list(range(PARALLELISM))) @hp.given(typ=vyper_type()) @hp.settings(max_examples=100, **_settings) -@hp.example(typ=DArrayT(DArrayT(UINT256_T, 2), 2)) -def test_abi_decode_fuzz(_n, typ, get_contract, tx_failed, payload_copier): +def test_abi_decode_fuzz(_n, typ, get_contract, tx_failed, payload_copier, env): + source_fragments, typ = typ # import time # t0 = time.time() # print("ENTER", typ) @@ -350,12 +375,13 @@ def test_abi_decode_fuzz(_n, typ, get_contract, tx_failed, payload_copier): # by bytes length check at function entry type_bound = wrapped_type.abi_type.size_bound() buffer_bound = type_bound + MAX_MUTATIONS - type_str = repr(typ) # annotation in vyper code - # TODO: intrinsic decode from staticcall/extcall - # TODO: _abi_decode from other sources (staticcall/extcall?) - # TODO: dirty the buffer - # TODO: check unwrap_tuple=False + + preamble = "\n\n".join(source_fragments) + type_str = str(typ) # annotation in vyper code + code = f""" +{preamble} + @external def run(xs: Bytes[{buffer_bound}]) -> {type_str}: ret: {type_str} = abi_decode(xs, {type_str}) @@ -375,6 +401,13 @@ def run3(xs: Bytes[{buffer_bound}], copier: Foo) -> {type_str}: assert len(xs) <= {type_bound} return (extcall copier.bar(xs)) """ + try: + c = get_contract(code) + except EvmError as e: + if env.contract_size_limit_error in str(e): + hp.assume(False) + # print(code) + hp.note(code) c = get_contract(code) @hp.given(data=payload_from(wrapped_type)) @@ -382,7 +415,6 @@ def run3(xs: Bytes[{buffer_bound}], copier: Foo) -> {type_str}: def _fuzz(data): hp.note(f"type: {typ}") hp.note(f"abi_t: {wrapped_type.abi_type.selector_name()}") - hp.note(code) hp.note(data.hex()) try: @@ -414,3 +446,57 @@ def _fuzz(data): # t1 = time.time() # print(f"elapsed {t1 - t0}s") + + +@pytest.mark.parametrize("_n", list(range(PARALLELISM))) +@hp.given(typ=vyper_type()) +@hp.settings(max_examples=100, **_settings) +def test_abi_decode_no_wrap_fuzz(_n, typ, get_contract, tx_failed, env): + source_fragments, typ = typ + # import time + # t0 = time.time() + # print("ENTER", typ) + + stats = _type_stats(typ) + hp.target(stats.num_dynamic_types) + + # add max_mutations bytes worth of padding so we don't just get caught + # by bytes length check at function entry + type_bound = typ.abi_type.size_bound() + buffer_bound = type_bound + MAX_MUTATIONS + + type_str = str(typ) # annotation in vyper code + preamble = "\n\n".join(source_fragments) + + code = f""" +{preamble} + +@external +def run(xs: Bytes[{buffer_bound}]) -> {type_str}: + ret: {type_str} = abi_decode(xs, {type_str}, unwrap_tuple=False) + return ret + """ + try: + c = get_contract(code) + except EvmError as e: + if env.contract_size_limit_error in str(e): + hp.assume(False) + + @hp.given(data=payload_from(typ)) + @hp.settings(max_examples=100, **_settings) + def _fuzz(data): + hp.note(code) + hp.note(data.hex()) + try: + expected = spec_decode(typ, data) + hp.note(f"expected {expected}") + assert expected == c.run(data) + except DecodeError: + hp.note("expect failure") + with tx_failed(EvmError): + c.run(data) + + _fuzz() + + # t1 = time.time() + # print(f"elapsed {t1 - t0}s") diff --git a/vyper/semantics/types/user.py b/vyper/semantics/types/user.py index a6ee646e62..ca8e99bc92 100644 --- a/vyper/semantics/types/user.py +++ b/vyper/semantics/types/user.py @@ -371,8 +371,11 @@ def from_StructDef(cls, base_node: vy_ast.StructDef) -> "StructT": return cls(struct_name, members, ast_def=base_node) + def __str__(self): + return f"{self._id}" + def __repr__(self): - return f"{self._id} declaration object" + return f"{self._id} {self.members}" def _try_fold(self, node): if len(node.args) != 1: @@ -384,6 +387,12 @@ def _try_fold(self, node): # it can't be reduced, but this lets upstream code know it's constant return node + def def_source_str(self): + ret = f"struct {self._id}:\n" + for k, v in self.member_types.items(): + ret += f" {k}: {v}\n" + return ret + @property def size_in_bytes(self): return sum(i.size_in_bytes for i in self.member_types.values()) From c79c0b658ba34d7b161048d0d80ebd207ff5247b Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 17 Jun 2024 08:22:50 -0700 Subject: [PATCH 31/53] fix[venom]: alloca for default arguments (#4155) this commit fixes an `ir_node_to_venom` translation bug. when there is a default argument to an external function, it can generate multiple allocas, because the entry points allocate separate symbol tables, but actually they should all correspond to the same alloca. for instance, `external 1 foo(uint256)12345` and `external 1 foo()67890` both feed into the same `external 1 foo()__common`, but the current translator mistakenly creates different symbol tables for the two "feeder" entry points, resulting in separate allocas for the same logical variable. this commit fixes the bug by fusing the symbol tables for multiple entry points to the same external function. --- vyper/venom/ir_node_to_venom.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/vyper/venom/ir_node_to_venom.py b/vyper/venom/ir_node_to_venom.py index 2c99cf5668..4fca95be90 100644 --- a/vyper/venom/ir_node_to_venom.py +++ b/vyper/venom/ir_node_to_venom.py @@ -107,14 +107,16 @@ NOOP_INSTRUCTIONS = frozenset(["pass", "cleanup_repeat", "var_list", "unique_symbol"]) SymbolTable = dict[str, Optional[IROperand]] -_global_symbols: SymbolTable = {} +_global_symbols: SymbolTable = None # type: ignore MAIN_ENTRY_LABEL_NAME = "__main_entry" +_external_functions: dict[int, SymbolTable] = None # type: ignore # convert IRnode directly to venom def ir_node_to_venom(ir: IRnode) -> IRContext: - global _global_symbols + global _global_symbols, _external_functions _global_symbols = {} + _external_functions = {} ctx = IRContext() fn = ctx.create_function(MAIN_ENTRY_LABEL_NAME) @@ -214,10 +216,6 @@ def _convert_ir_bb_list(fn, ir, symbols): return ret -current_func = None -var_list: list[str] = [] - - def pop_source_on_return(func): @functools.wraps(func) def pop_source(*args, **kwargs): @@ -232,7 +230,10 @@ def pop_source(*args, **kwargs): @pop_source_on_return def _convert_ir_bb(fn, ir, symbols): assert isinstance(ir, IRnode), ir - global _break_target, _continue_target, current_func, var_list, _global_symbols + # TODO: refactor these to not be globals + global _break_target, _continue_target, _global_symbols, _external_functions + + # keep a map from external functions to all possible entry points ctx = fn.ctx fn.push_source(ir) @@ -274,7 +275,6 @@ def _convert_ir_bb(fn, ir, symbols): return ret elif is_external: - _global_symbols = {} ret = _convert_ir_bb(fn, ir.args[0], symbols) _append_return_args(fn) else: @@ -382,6 +382,13 @@ def _convert_ir_bb(fn, ir, symbols): data = _convert_ir_bb(fn, c, symbols) ctx.append_data("db", [data]) # type: ignore elif ir.value == "label": + function_id_pattern = r"external (\d+)" + function_name = ir.args[0].value + m = re.match(function_id_pattern, function_name) + if m is not None: + function_id = m.group(1) + _global_symbols = _external_functions.setdefault(function_id, {}) + label = IRLabel(ir.args[0].value, True) bb = fn.get_basic_block() if not bb.is_terminated: From a72488ce68125a65813199f9b1188ce60a987feb Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 17 Jun 2024 14:23:30 -0700 Subject: [PATCH 32/53] fix[venom]: add `unique_symbols` check to venom pipeline (#4149) when `-Onone` is specified along with `--experimental-codegen`, the unique symbols check does not get run. this calculates the `ir_node.unique_symbols` property, which implicitly runs the unique symbols check. also, change an assertion to a proper panic exception --- vyper/codegen/ir_node.py | 3 ++- vyper/venom/ir_node_to_venom.py | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/vyper/codegen/ir_node.py b/vyper/codegen/ir_node.py index 9d39ebd033..97d9c45fb6 100644 --- a/vyper/codegen/ir_node.py +++ b/vyper/codegen/ir_node.py @@ -405,7 +405,8 @@ def unique_symbols(self): for arg in children: s = arg.unique_symbols non_uniques = ret.intersection(s) - assert len(non_uniques) == 0, f"non-unique symbols {non_uniques}" + if len(non_uniques) != 0: # pragma: nocover + raise CompilerPanic(f"non-unique symbols {non_uniques}") ret |= s return ret diff --git a/vyper/venom/ir_node_to_venom.py b/vyper/venom/ir_node_to_venom.py index 4fca95be90..85172c70e1 100644 --- a/vyper/venom/ir_node_to_venom.py +++ b/vyper/venom/ir_node_to_venom.py @@ -114,6 +114,8 @@ # convert IRnode directly to venom def ir_node_to_venom(ir: IRnode) -> IRContext: + _ = ir.unique_symbols # run unique symbols check + global _global_symbols, _external_functions _global_symbols = {} _external_functions = {} From d92cd344add84aa17434baefed24a6c548471cc2 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 18 Jun 2024 10:23:37 -0700 Subject: [PATCH 33/53] chore[docs]: add evaluation order warning for builtins (#4158) some builtins have undefined order of evaluation of arguments; make a note in the docs --- docs/built-in-functions.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/built-in-functions.rst b/docs/built-in-functions.rst index 367a08d80d..a0e424adb4 100644 --- a/docs/built-in-functions.rst +++ b/docs/built-in-functions.rst @@ -1090,3 +1090,6 @@ Utilities .. note:: Issuing of the static call is *NOT* mode-dependent (that is, it is not removed from production code), although the compiler will issue a warning whenever ``print`` is used. + +.. warning:: + In Vyper, as of v0.4.0, the order of argument evaluation of builtins is not defined. That means that the compiler may choose to reorder evaluation of arguments. For example, ``extract32(x(), y())`` may yield unexpected results if ``x()`` and ``y()`` both touch the same data. For this reason, it is best to avoid calling functions with side-effects inside of builtins. For more information, see `GHSA-g2xh-c426-v8mf `_ and `issue #4019 `_. From 3d9c537142fb99b2672f21e2057f5f202cde194f Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 18 Jun 2024 12:49:36 -0700 Subject: [PATCH 34/53] fix[codegen]: panic on potential eval order issue for some builtins (#4157) `extract32()` and `slice()` have an evaluation order issue when the arguments touch the same data. specifically, the length and data evaluation are interleaved with the index/start/length evaluations. in unusual situations (such as those in the included test cases), this can result in "invalid" reads where the data and length reads appear out of order. this commit conservatively blocks compilation if the preconditions for the interleaved evaluation are detected. --------- Co-authored-by: trocher Co-authored-by: cyberthirst --- .../builtins/codegen/test_extract32.py | 48 +++++++++++++++++ .../functional/builtins/codegen/test_slice.py | 52 ++++++++++++++++++- vyper/builtins/functions.py | 7 +++ 3 files changed, 106 insertions(+), 1 deletion(-) diff --git a/tests/functional/builtins/codegen/test_extract32.py b/tests/functional/builtins/codegen/test_extract32.py index 8a92adbb07..f8db51ee36 100644 --- a/tests/functional/builtins/codegen/test_extract32.py +++ b/tests/functional/builtins/codegen/test_extract32.py @@ -1,6 +1,7 @@ import pytest from vyper.evm.opcodes import version_check +from vyper.exceptions import CompilerPanic @pytest.mark.parametrize("location", ["storage", "transient"]) @@ -98,3 +99,50 @@ def foq(inp: Bytes[32]) -> address: with tx_failed(): c.foq(b"crow" * 8) + + +# to fix in future release +@pytest.mark.xfail(raises=CompilerPanic, reason="risky overlap") +def test_extract32_order_of_eval(get_contract): + extract32_code = """ +var:DynArray[Bytes[96], 1] + +@internal +def bar() -> uint256: + self.var[0] = b'hellohellohellohellohellohellohello' + self.var.pop() + return 3 + +@external +def foo() -> bytes32: + self.var = [b'abcdefghijklmnopqrstuvwxyz123456789'] + return extract32(self.var[0], self.bar(), output_type=bytes32) + """ + + c = get_contract(extract32_code) + assert c.foo() == b"defghijklmnopqrstuvwxyz123456789" + + +# to fix in future release +@pytest.mark.xfail(raises=CompilerPanic, reason="risky overlap") +def test_extract32_order_of_eval_extcall(get_contract): + slice_code = """ +var:DynArray[Bytes[96], 1] + +interface Bar: + def bar() -> uint256: payable + +@external +def bar() -> uint256: + self.var[0] = b'hellohellohellohellohellohellohello' + self.var.pop() + return 3 + +@external +def foo() -> bytes32: + self.var = [b'abcdefghijklmnopqrstuvwxyz123456789'] + return extract32(self.var[0], extcall Bar(self).bar(), output_type=bytes32) + """ + + c = get_contract(slice_code) + assert c.foo() == b"defghijklmnopqrstuvwxyz123456789" diff --git a/tests/functional/builtins/codegen/test_slice.py b/tests/functional/builtins/codegen/test_slice.py index 08800e7a8c..d5d1efca0f 100644 --- a/tests/functional/builtins/codegen/test_slice.py +++ b/tests/functional/builtins/codegen/test_slice.py @@ -5,7 +5,7 @@ from vyper.compiler import compile_code from vyper.compiler.settings import OptimizationLevel, Settings from vyper.evm.opcodes import version_check -from vyper.exceptions import ArgumentException, TypeMismatch +from vyper.exceptions import ArgumentException, CompilerPanic, TypeMismatch _fun_bytes32_bounds = [(0, 32), (3, 29), (27, 5), (0, 5), (5, 3), (30, 2)] @@ -562,3 +562,53 @@ def foo(cs: String[64]) -> uint256: c = get_contract(code) # ensure that counter was incremented only once assert c.foo(arg) == 1 + + +# to fix in future release +@pytest.mark.xfail(raises=CompilerPanic, reason="risky overlap") +def test_slice_order_of_eval(get_contract): + slice_code = """ +var:DynArray[Bytes[96], 1] + +interface Bar: + def bar() -> uint256: payable + +@external +def bar() -> uint256: + self.var[0] = b'hellohellohellohellohellohellohello' + self.var.pop() + return 32 + +@external +def foo() -> Bytes[96]: + self.var = [b'abcdefghijklmnopqrstuvwxyz123456789'] + return slice(self.var[0], 3, extcall Bar(self).bar()) + """ + + c = get_contract(slice_code) + assert c.foo() == b"defghijklmnopqrstuvwxyz123456789" + + +# to fix in future release +@pytest.mark.xfail(raises=CompilerPanic, reason="risky overlap") +def test_slice_order_of_eval2(get_contract): + slice_code = """ +var:DynArray[Bytes[96], 1] + +interface Bar: + def bar() -> uint256: payable + +@external +def bar() -> uint256: + self.var[0] = b'hellohellohellohellohellohellohello' + self.var.pop() + return 3 + +@external +def foo() -> Bytes[96]: + self.var = [b'abcdefghijklmnopqrstuvwxyz123456789'] + return slice(self.var[0], extcall Bar(self).bar(), 32) + """ + + c = get_contract(slice_code) + assert c.foo() == b"defghijklmnopqrstuvwxyz123456789" diff --git a/vyper/builtins/functions.py b/vyper/builtins/functions.py index 2564329b65..672d978455 100644 --- a/vyper/builtins/functions.py +++ b/vyper/builtins/functions.py @@ -29,6 +29,7 @@ get_type_for_exact_size, ir_tuple_from_args, make_setter, + potential_overlap, promote_signed_int, sar, shl, @@ -357,6 +358,9 @@ def build_IR(self, expr, args, kwargs, context): assert is_bytes32, src src = ensure_in_memory(src, context) + if potential_overlap(src, start) or potential_overlap(src, length): + raise CompilerPanic("risky overlap") + with src.cache_when_complex("src") as (b1, src), start.cache_when_complex("start") as ( b2, start, @@ -862,6 +866,9 @@ def build_IR(self, expr, args, kwargs, context): bytez, index = args ret_type = kwargs["output_type"] + if potential_overlap(bytez, index): + raise CompilerPanic("risky overlap") + def finalize(ret): annotation = "extract32" ret = IRnode.from_list(ret, typ=ret_type, annotation=annotation) From 4594f8badf13a583875f8891698cd3bbefb1c787 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 19 Jun 2024 12:38:33 -0700 Subject: [PATCH 35/53] fix[codegen]: panic on potential subscript eval order issue (#4159) subscript expressions have an evaluation order issue when evaluation of the index (i.e. `node.index`) modifies the parent (i.e. `node.value`). because the evaluation of the parent is interleaved with evaluation of the index, it can result in "invalid" reads where the length check occurs before evaluation of the index, but the data read occurs afterwards. if evaluation of the index results in modification of the container size for instance, the data read from the container can happen on a dangling reference. another variant of this issue would be accessing `self.nested_array.pop().append(...)`; however, this currently happens to be blocked by a panic in the frontend. this commit conservatively blocks compilation if the preconditions for the interleaved evaluation are detected. POC tests that the appropriate panics are generated are included as well. --------- Co-authored-by: trocher Co-authored-by: Hubert Ritzdorf Co-authored-by: cyberthirst --- .../codegen/types/test_array_indexing.py | 77 +++++++++++++++++++ .../codegen/types/test_dynamic_array.py | 16 ++++ vyper/ast/nodes.pyi | 1 + vyper/codegen/core.py | 20 +++++ vyper/codegen/expr.py | 7 ++ vyper/codegen/ir_node.py | 12 +++ vyper/semantics/analysis/utils.py | 15 +++- 7 files changed, 147 insertions(+), 1 deletion(-) diff --git a/tests/functional/codegen/types/test_array_indexing.py b/tests/functional/codegen/types/test_array_indexing.py index 45e777d919..7f5c0d0e21 100644 --- a/tests/functional/codegen/types/test_array_indexing.py +++ b/tests/functional/codegen/types/test_array_indexing.py @@ -1,5 +1,9 @@ # TODO: rewrite the tests in type-centric way, parametrize array and indices types +import pytest + +from vyper.exceptions import CompilerPanic + def test_negative_ix_access(get_contract, tx_failed): # Arrays can't be accessed with negative indices @@ -130,3 +134,76 @@ def foo(): c.foo() for i in range(10): assert c.arr(i) == i + + +# to fix in future release +@pytest.mark.xfail(raises=CompilerPanic, reason="risky overlap") +def test_array_index_overlap(get_contract): + code = """ +a: public(DynArray[DynArray[Bytes[96], 5], 5]) + +@external +def foo() -> Bytes[96]: + self.a.append([b'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx']) + return self.a[0][self.bar()] + + +@internal +def bar() -> uint256: + self.a[0] = [b'yyy'] + self.a.pop() + return 0 + """ + c = get_contract(code) + # tricky to get this right, for now we just panic instead of generating code + assert c.foo() == b"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + + +# to fix in future release +@pytest.mark.xfail(raises=CompilerPanic, reason="risky overlap") +def test_array_index_overlap_extcall(get_contract): + code = """ + +interface Bar: + def bar() -> uint256: payable + +a: public(DynArray[DynArray[Bytes[96], 5], 5]) + +@external +def foo() -> Bytes[96]: + self.a.append([b'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx']) + return self.a[0][extcall Bar(self).bar()] + + +@external +def bar() -> uint256: + self.a[0] = [b'yyy'] + self.a.pop() + return 0 + """ + c = get_contract(code) + assert c.foo() == b"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + + +# to fix in future release +@pytest.mark.xfail(raises=CompilerPanic, reason="risky overlap") +def test_array_index_overlap_extcall2(get_contract): + code = """ +interface B: + def calculate_index() -> uint256: nonpayable + +a: HashMap[uint256, DynArray[uint256, 5]] + +@external +def bar() -> uint256: + self.a[0] = [2] + return self.a[0][extcall B(self).calculate_index()] + +@external +def calculate_index() -> uint256: + self.a[0] = [1] + return 0 + """ + c = get_contract(code) + + assert c.bar() == 1 diff --git a/tests/functional/codegen/types/test_dynamic_array.py b/tests/functional/codegen/types/test_dynamic_array.py index 5f26e05839..2a0f4e77e5 100644 --- a/tests/functional/codegen/types/test_dynamic_array.py +++ b/tests/functional/codegen/types/test_dynamic_array.py @@ -8,6 +8,7 @@ from vyper.exceptions import ( ArgumentException, ArrayIndexException, + CompilerPanic, ImmutableViolation, OverflowException, StackTooDeep, @@ -1887,3 +1888,18 @@ def boo() -> uint256: c = get_contract(code) assert c.foo() == [1, 2, 3, 4] + + +@pytest.mark.xfail(raises=CompilerPanic) +def test_dangling_reference(get_contract, tx_failed): + code = """ +a: DynArray[DynArray[uint256, 5], 5] + +@external +def foo(): + self.a = [[1]] + self.a.pop().append(2) + """ + c = get_contract(code) + with tx_failed(): + c.foo() diff --git a/vyper/ast/nodes.pyi b/vyper/ast/nodes.pyi index 1c7aaf55ee..58c7d0b2e4 100644 --- a/vyper/ast/nodes.pyi +++ b/vyper/ast/nodes.pyi @@ -23,6 +23,7 @@ class VyperNode: end_col_offset: int = ... _metadata: dict = ... _original_node: Optional[VyperNode] = ... + _children: list[VyperNode] = ... def __init__(self, parent: Optional[VyperNode] = ..., **kwargs: Any) -> None: ... def __hash__(self) -> Any: ... def __eq__(self, other: Any) -> Any: ... diff --git a/vyper/codegen/core.py b/vyper/codegen/core.py index 9a0a08097c..25a6d06fbf 100644 --- a/vyper/codegen/core.py +++ b/vyper/codegen/core.py @@ -924,6 +924,26 @@ def potential_overlap(left, right): return False +# similar to `potential_overlap()`, but compares left's _reads_ vs +# right's _writes_. +# TODO: `potential_overlap()` can probably be replaced by this function, +# but all the cases need to be checked. +def read_write_overlap(left, right): + if not isinstance(left, IRnode) or not isinstance(right, IRnode): + return False + + if left.typ._is_prim_word and right.typ._is_prim_word: + return False + + if len(left.referenced_variables & right.variable_writes) > 0: + return True + + if len(left.referenced_variables) > 0 and right.contains_risky_call: + return True + + return False + + # Create an x=y statement, where the types may be compound def make_setter(left, right, hi=None): check_assign(left, right) diff --git a/vyper/codegen/expr.py b/vyper/codegen/expr.py index 65df5a0930..f28a068be6 100644 --- a/vyper/codegen/expr.py +++ b/vyper/codegen/expr.py @@ -21,6 +21,7 @@ make_setter, pop_dyn_array, potential_overlap, + read_write_overlap, sar, shl, shr, @@ -40,6 +41,7 @@ UnimplementedException, tag_exceptions, ) +from vyper.semantics.analysis.utils import get_expr_writes from vyper.semantics.types import ( AddressT, BoolT, @@ -86,6 +88,9 @@ def __init__(self, node, context, is_stmt=False): self.ir_node = fn() assert isinstance(self.ir_node, IRnode), self.ir_node + writes = set(access.variable for access in get_expr_writes(self.expr)) + self.ir_node._writes = writes + self.ir_node.annotation = self.expr.get("node_source_code") self.ir_node.ast_source = self.expr @@ -352,6 +357,8 @@ def parse_Subscript(self): elif is_array_like(sub.typ): index = Expr.parse_value_expr(self.expr.slice, self.context) + if read_write_overlap(sub, index): + raise CompilerPanic("risky overlap") elif is_tuple_like(sub.typ): # should we annotate expr.slice in the frontend with the diff --git a/vyper/codegen/ir_node.py b/vyper/codegen/ir_node.py index 97d9c45fb6..6f9eb0359b 100644 --- a/vyper/codegen/ir_node.py +++ b/vyper/codegen/ir_node.py @@ -467,6 +467,18 @@ def referenced_variables(self): return ret + @cached_property + def variable_writes(self): + ret = getattr(self, "_writes", set()) + + for arg in self.args: + ret |= arg.variable_writes + + if getattr(self, "is_self_call", False): + ret |= self.invoked_function_ir.func_ir.variable_writes + + return ret + @cached_property def contains_risky_call(self): ret = self.value in ("call", "delegatecall", "staticcall", "create", "create2") diff --git a/vyper/semantics/analysis/utils.py b/vyper/semantics/analysis/utils.py index be323b1d13..d30eee79e0 100644 --- a/vyper/semantics/analysis/utils.py +++ b/vyper/semantics/analysis/utils.py @@ -24,7 +24,7 @@ from vyper.semantics.types.bytestrings import BytesT, StringT from vyper.semantics.types.primitives import AddressT, BoolT, BytesM_T, IntegerT from vyper.semantics.types.subscriptable import DArrayT, SArrayT, TupleT -from vyper.utils import checksum_encode, int_to_fourbytes +from vyper.utils import OrderedSet, checksum_encode, int_to_fourbytes def _validate_op(node, types_list, validation_fn_name): @@ -681,3 +681,16 @@ def check_modifiability(node: vy_ast.ExprNode, modifiability: Modifiability) -> info = get_expr_info(node) return info.modifiability <= modifiability + + +# TODO: move this into part of regular analysis in `local.py` +def get_expr_writes(node: vy_ast.VyperNode) -> OrderedSet[VarAccess]: + if "writes_r" in node._metadata: + return node._metadata["writes_r"] + ret: OrderedSet = OrderedSet() + if isinstance(node, vy_ast.ExprNode) and node._expr_info is not None: + ret = node._expr_info._writes + for c in node._children: + ret |= get_expr_writes(c) + node._metadata["writes_r"] = ret + return ret From e9db8d9f7486eae38f5b86531629019ad28f514e Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 20 Jun 2024 09:27:25 -0700 Subject: [PATCH 36/53] feat[docs]: v0.4.0 release (#4152) add release notes for v0.4.0 release slight update to wording of front matter --- docs/index.rst | 7 +- docs/release-notes.rst | 310 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 311 insertions(+), 6 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 5baaebb339..6c36b5fd7c 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -6,14 +6,17 @@ Vyper ##### -Vyper is a contract-oriented, pythonic programming language that targets the `Ethereum Virtual Machine (EVM) `_. +Vyper is a contract-oriented, Pythonic programming language that targets the `Ethereum Virtual Machine (EVM) `_. +It prioritizes user safety, encourages clear coding practices via language design and efficient execution. In other words, Vyper code is safe, clear and efficient! Principles and Goals ==================== * **Security**: It should be possible and natural to build secure smart-contracts in Vyper. * **Language and compiler simplicity**: The language and the compiler implementation should strive to be simple. -* **Auditability**: Vyper code should be maximally human-readable. Furthermore, it should be maximally difficult to write misleading code. Simplicity for the reader is more important than simplicity for the writer, and simplicity for readers with low prior experience with Vyper (and low prior experience with programming in general) is particularly important. +* **Auditability**: Vyper code should be maximally human-readable. + Furthermore, it should be maximally difficult to write misleading code. + Simplicity for the reader is more important than simplicity for the writer, and simplicity for readers with low prior experience with Vyper (and low prior experience with programming in general) is particularly important. Because of this Vyper provides the following features: diff --git a/docs/release-notes.rst b/docs/release-notes.rst index c107ee5554..fa17ef4f7b 100644 --- a/docs/release-notes.rst +++ b/docs/release-notes.rst @@ -11,17 +11,319 @@ Release Notes :'<,'>s/\v(https:\/\/github.com\/vyperlang\/vyper\/pull\/)(\d+)/(`#\2 <\1\2>`_)/g ex. in: https://github.com/vyperlang/vyper/pull/3373 ex. out: (`#3373 `_) + remove authorship slugs (leave them on github release page; they have no meaning outside of github though) + :'<,'>s/by @\S\+ //c for advisory links: :'<,'>s/\v(https:\/\/github.com\/vyperlang\/vyper\/security\/advisories\/)([-A-Za-z0-9]+)/(`\2 <\1\2>`_)/g -v0.4.0b1 ("Nagini") -******************* +v0.4.0 ("Nagini") +***************** -Date released: TBD -================== +Date released: 2024-06-20 +========================= v0.4.0 represents a major overhaul to the Vyper language. Notably, it overhauls the import system and adds support for code reuse. It also adds a new, experimental backend to Vyper which lays the foundation for improved analysis, optimization and integration with third party tools. +Breaking Changes +---------------- +* feat[tool]!: make cancun the default evm version (`#4029 `_) +* feat[lang]: remove named reentrancy locks (`#3769 `_) +* feat[lang]!: change the signature of ``block.prevrandao`` (`#3879 `_) +* feat[lang]!: change ABI type of ``decimal`` to ``int168`` (`#3696 `_) +* feat[lang]: rename ``_abi_encode`` and ``_abi_decode`` (`#4097 `_) +* feat[lang]!: add feature flag for decimals (`#3930 `_) +* feat[lang]!: make internal decorator optional (`#4040 `_) +* feat[lang]: protect external calls with keyword (`#2938 `_) +* introduce floordiv, ban regular div for integers (`#2937 `_) +* feat[lang]: use keyword arguments for struct instantiation (`#3777 `_) +* feat: require type annotations for loop variables (`#3596 `_) +* feat: replace ``enum`` with ``flag`` keyword (`#3697 `_) +* feat: remove builtin constants (`#3350 `_) +* feat: drop istanbul and berlin support (`#3843 `_) +* feat: allow range with two arguments and bound (`#3679 `_) +* fix[codegen]: range bound check for signed integers (`#3814 `_) +* feat: default code offset = 3 (`#3454 `_) +* feat: rename ``vyper.interfaces`` to ``ethereum.ercs`` (`#3741 `_) +* chore: add prefix to ERC interfaces (`#3804 `_) +* chore[ux]: compute natspec as part of standard pipeline (`#3946 `_) +* feat: deprecate ``vyper-serve`` (`#3666 `_) + +Module system +------------- +* refactor: internal handling of imports (`#3655 `_) +* feat: implement "stateless" modules (`#3663 `_) +* feat[lang]: export interfaces (`#3919 `_) +* feat[lang]: singleton modules with ownership hierarchy (`#3729 `_) +* feat[lang]: implement function exports (`#3786 `_) +* feat[lang]: auto-export events in ABI (`#3808 `_) +* fix: allow using interface defs from imported modules (`#3725 `_) +* feat: add support for constants in imported modules (`#3726 `_) +* fix[lang]: prevent modules as storage variables (`#4088 `_) +* fix[ux]: improve initializer hint for unimported modules (`#4145 `_) +* feat: add python ``sys.path`` to vyper path (`#3763 `_) +* feat[ux]: improve error message for importing ERC20 (`#3816 `_) +* fix[lang]: fix importing of flag types (`#3871 `_) +* feat: search path resolution for cli (`#3694 `_) +* fix[lang]: transitive exports (`#3888 `_) +* fix[ux]: error messages relating to initializer issues (`#3831 `_) +* fix[lang]: recursion in ``uses`` analysis for nonreentrant functions (`#3971 `_) +* fix[ux]: fix ``uses`` error message (`#3926 `_) +* fix[lang]: fix ``uses`` analysis for nonreentrant functions (`#3927 `_) +* fix[lang]: fix a hint in global initializer check (`#4089 `_) +* fix[lang]: builtin type comparisons (`#3956 `_) +* fix[tool]: fix ``combined_json`` output for CLI (`#3901 `_) +* fix[tool]: compile multiple files (`#4053 `_) +* refactor: reimplement AST folding (`#3669 `_) +* refactor: constant folding (`#3719 `_) +* fix[lang]: typecheck hashmap indexes with folding (`#4007 `_) +* fix[lang]: fix array index checks when the subscript is folded (`#3924 `_) +* fix[lang]: pure access analysis (`#3895 `_) + +Venom +----- +* feat: implement new IR for vyper (venom IR) (`#3659 `_) +* feat[ir]: add ``make_ssa`` pass to venom pipeline (`#3825 `_) +* feat[venom]: implement ``mem2var`` and ``sccp`` passes (`#3941 `_) +* feat[venom]: add store elimination pass (`#4021 `_) +* feat[venom]: add ``extract_literals`` pass (`#4067 `_) +* feat[venom]: optimize branching (`#4049 `_) +* feat[venom]: avoid last ``swap`` for commutative ops (`#4048 `_) +* feat[venom]: "pickaxe" stack scheduler optimization (`#3951 `_) +* feat[venom]: add algebraic optimization pass (`#4054 `_) +* feat: Implement target constrained venom jump instruction (`#3687 `_) +* feat: remove ``deploy`` instruction from venom (`#3703 `_) +* fix[venom]: liveness analysis in some loops (`#3732 `_) +* feat: add more venom instructions (`#3733 `_) +* refactor[venom]: use venom pass instances (`#3908 `_) +* refactor[venom]: refactor venom operand classes (`#3915 `_) +* refactor[venom]: introduce ``IRContext`` and ``IRAnalysisCache`` (`#3983 `_) +* feat: add utility functions to ``OrderedSet`` (`#3833 `_) +* feat[venom]: optimize ``get_basic_block()`` (`#4002 `_) +* fix[venom]: fix branch eliminator cases in sccp (`#4003 `_) +* fix[codegen]: same symbol jumpdest merge (`#3982 `_) +* fix[venom]: fix eval of ``exp`` in sccp (`#4009 `_) +* refactor[venom]: remove unused method in ``make_ssa.py`` (`#4012 `_) +* fix[venom]: fix return opcode handling in mem2var (`#4011 `_) +* fix[venom]: fix ``cfg`` output format (`#4010 `_) +* chore[venom]: fix output formatting of data segment in ``IRContext`` (`#4016 `_) +* feat[venom]: optimize mem2var and store/variable elimination pass sequences (`#4032 `_) +* fix[venom]: fix some sccp evaluations (`#4028 `_) +* fix[venom]: add ``unique_symbols`` check to venom pipeline (`#4149 `_) +* feat[venom]: remove redundant store elimination pass (`#4036 `_) +* fix[venom]: remove some dead code in ``venom_to_assembly`` (`#4042 `_) +* feat[venom]: improve unused variable removal pass (`#4055 `_) +* fix[venom]: remove liveness requests (`#4058 `_) +* fix[venom]: fix list of volatile instructions (`#4065 `_) +* fix[venom]: remove dominator tree invalidation for store elimination pass (`#4069 `_) +* fix[venom]: move loop invariant assertion to entry block (`#4098 `_) +* fix[venom]: clear ``out_vars`` during calculation (`#4129 `_) +* fix[venom]: alloca for default arguments (`#4155 `_) +* Refactor ctx.add_instruction() and friends (`#3685 `_) +* fix: type annotation of helper function (`#3702 `_) +* feat[ir]: emit ``djump`` in dense selector table (`#3849 `_) +* chore: move venom tests to ``tests/unit/compiler`` (`#3684 `_) + +Other new features +------------------ +* feat[lang]: add ``blobhash()`` builtin (`#3962 `_) +* feat[lang]: support ``block.blobbasefee`` (`#3945 `_) +* feat[lang]: add ``revert_on_failure`` kwarg for create builtins (`#3844 `_) +* feat[lang]: allow downcasting of bytestrings (`#3832 `_) + +Docs +---- +* chore[docs]: add docs for v0.4.0 features (`#3947 `_) +* chore[docs]: ``implements`` does not check event declarations (`#4052 `_) +* docs: adopt a new theme: ``shibuya`` (`#3754 `_) +* chore[docs]: add evaluation order warning for builtins (`#4158 `_) +* Update ``FUNDING.yml`` (`#3636 `_) +* docs: fix nit in v0.3.10 release notes (`#3638 `_) +* docs: add note on ``pragma`` parsing (`#3640 `_) +* docs: retire security@vyperlang.org (`#3660 `_) +* feat[docs]: add more detail to modules docs (`#4087 `_) +* docs: update resources section (`#3656 `_) +* docs: add script to help working on the compiler (`#3674 `_) +* docs: add warnings at the top of all example token contracts (`#3676 `_) +* docs: typo in ``on_chain_market_maker.vy`` (`#3677 `_) +* docs: clarify ``address.codehash`` for empty account (`#3711 `_) +* docs: indexed arguments for events are limited (`#3715 `_) +* docs: Fix typos (`#3747 `_) +* docs: Upgrade dependencies and fixes (`#3745 `_) +* docs: add missing cli flags (`#3736 `_) +* chore: fix formatting and docs for new struct instantiation syntax (`#3792 `_) +* docs: floordiv (`#3797 `_) +* docs: add missing ``annotated_ast`` flag (`#3813 `_) +* docs: update logo in readme, remove competition reference (`#3837 `_) +* docs: add rationale for floordiv rounding behavior (`#3845 `_) +* chore[docs]: amend ``revert_on_failure`` kwarg docs for create builtins (`#3921 `_) +* fix[docs]: fix clipped ``endAuction`` method in example section (`#3969 `_) +* refactor[docs]: refactor security policy (`#3981 `_) +* fix: edit link to style guide (`#3658 `_) +* Add Vyper online compiler tooling (`#3680 `_) +* chore: fix typos (`#3749 `_) + +Bugfixes +-------- +* fix[codegen]: fix ``raw_log()`` when topics are non-literals (`#3977 `_) +* fix[codegen]: fix transient codegen for ``slice`` and ``extract32`` (`#3874 `_) +* fix[codegen]: bounds check for signed index accesses (`#3817 `_) +* fix: disallow ``value=`` passing for delegate and static raw_calls (`#3755 `_) +* fix[codegen]: fix double evals in sqrt, slice, blueprint (`#3976 `_) +* fix[codegen]: fix double eval in dynarray append/pop (`#4030 `_) +* fix[codegen]: fix double eval of start in range expr (`#4033 `_) +* fix[codegen]: overflow check in ``slice()`` (`#3818 `_) +* fix: concat buffer bug (`#3738 `_) +* fix[codegen]: fix ``make_setter`` overlap with internal calls (`#4037 `_) +* fix[codegen]: fix ``make_setter`` overlap in ``dynarray_append`` (`#4059 `_) +* fix[codegen]: ``make_setter`` overlap in the presence of ``staticcall`` (`#4128 `_) +* fix[codegen]: fix ``_abi_decode`` buffer overflow (`#3925 `_) +* fix[codegen]: zero-length dynarray ``abi_decode`` validation (`#4060 `_) +* fix[codegen]: recursive dynarray oob check (`#4091 `_) +* fix[codegen]: add back in ``returndatasize`` check (`#4144 `_) +* fix: block memory allocation overflow (`#3639 `_) +* fix[codegen]: panic on potential eval order issue for some builtins (`#4157 `_) +* fix[codegen]: panic on potential subscript eval order issue (`#4159 `_) +* add comptime check for uint2str input (`#3671 `_) +* fix: dead code analysis inside for loops (`#3731 `_) +* fix[ir]: fix a latent bug in ``sha3_64`` codegen (`#4063 `_) +* fix: ``opcodes`` and ``opcodes_runtime`` outputs (`#3735 `_) +* fix: bad assertion in expr.py (`#3758 `_) +* fix: iterator modification analysis (`#3764 `_) +* feat: allow constant interfaces (`#3718 `_) +* fix: assembly dead code eliminator (`#3791 `_) +* fix: prevent range over decimal (`#3798 `_) +* fix: mutability check for interface implements (`#3805 `_) +* fix[codegen]: fix non-memory reason strings (`#3877 `_) +* fix[ux]: fix compiler hang for large exponentiations (`#3893 `_) +* fix[lang]: allow type expressions inside pure functions (`#3906 `_) +* fix[ux]: raise ``VersionException`` with source info (`#3920 `_) +* fix[lang]: fix ``pow`` folding when args are not literals (`#3949 `_) +* fix[codegen]: fix some hardcoded references to ``STORAGE`` location (`#4015 `_) + +Patched security advisories (GHSAs) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Bounds check on built-in ``slice()`` function can be overflowed (`GHSA-9x7f-gwxq-6f2c `_) +* ``concat`` built-in can corrupt memory (`GHSA-2q8v-3gqq-4f8p `_) +* ``raw_call`` ``value=`` kwargs not disabled for static and delegate calls (`GHSA-x2c2-q32w-4w6m `_) +* negative array index bounds checks (`GHSA-52xq-j7v9-v4v2 `_) +* ``range(start, start + N)`` reverts for negative numbers (`GHSA-ppx5-q359-pvwj `_) +* incorrect topic logging in ``raw_log`` (`GHSA-xchq-w5r3-4wg3 `_) +* double eval of the ``slice`` start/length args in certain cases (`GHSA-r56x-j438-vw5m `_) +* multiple eval of ``sqrt()`` built in argument (`GHSA-5jrj-52x8-m64h `_) +* double eval of raw_args in ``create_from_blueprint`` (`GHSA-3whq-64q2-qfj6 `_) +* ``sha3`` codegen bug (`GHSA-6845-xw22-ffxv `_) +* ``extract32`` can read dirty memory (`GHSA-4hwq-4cpm-8vmx `_) +* ``_abi_decode`` Memory Overflow (`GHSA-9p8r-4xp4-gw5w `_) +* External calls can overflow return data to return input buffer (`GHSA-gp3w-2v2m-p686 `_) + +Tooling +------- +* feat[tool]: archive format (`#3891 `_) +* feat[tool]: add source map for constructors (`#4008 `_) +* feat: add short options ``-v`` and ``-O`` to the CLI (`#3695 `_) +* feat: Add ``bb`` and ``bb_runtime`` output options (`#3700 `_) +* fix: remove hex-ir from format cli options list (`#3657 `_) +* fix: pickleability of ``CompilerData`` (`#3803 `_) +* feat[tool]: validate AST nodes early in the pipeline (`#3809 `_) +* feat[tool]: delay global constraint check (`#3810 `_) +* feat[tool]: export variable read/write access (`#3790 `_) +* feat[tool]: improvements to AST annotation (`#3829 `_) +* feat[tool]: add ``node_id`` map to source map (`#3811 `_) +* chore[tool]: add help text for ``hex-ir`` CLI flag (`#3942 `_) +* refactor[tool]: refactor storage layout export (`#3789 `_) +* fix[tool]: fix cross-compilation issues, add windows CI (`#4014 `_) +* fix[tool]: star option in ``outputSelection`` (`#4094 `_) + +Performance +----------- +* perf: lazy eval of f-strings in IRnode ctor (`#3602 `_) +* perf: levenshtein optimization (`#3780 `_) +* feat: frontend optimizations (`#3781 `_) +* feat: optimize ``VyperNode.deepcopy`` (`#3784 `_) +* feat: more frontend optimizations (`#3785 `_) +* perf: reimplement ``IRnode.__deepcopy__`` (`#3761 `_) + +Testing suite improvements +-------------------------- +* refactor[test]: bypass ``eth-tester`` and interface with evm backend directly (`#3846 `_) +* feat: Refactor assert_tx_failed into a context (`#3706 `_) +* feat[test]: implement ``abi_decode`` spec test (`#4095 `_) +* feat[test]: add more coverage to ``abi_decode`` fuzzer tests (`#4153 `_) +* feat[ci]: enable cancun testing (`#3861 `_) +* fix: add missing test for memory allocation overflow (`#3650 `_) +* chore: fix test for ``slice`` (`#3633 `_) +* add abi_types unit tests (`#3662 `_) +* refactor: test directory structure (`#3664 `_) +* chore: test all output formats (`#3683 `_) +* chore: deduplicate test files (`#3773 `_) +* feat[test]: add more transient storage tests (`#3883 `_) +* chore[ci]: fix apt-get failure in era pipeline (`#3821 `_) +* chore[ci]: enable python3.12 tests (`#3860 `_) +* chore[ci]: refactor jobs to use gh actions (`#3863 `_) +* chore[ci]: use ``--dist worksteal`` from latest ``xdist`` (`#3869 `_) +* chore: run mypy as part of lint rule in Makefile (`#3771 `_) +* chore[test]: always specify the evm backend (`#4006 `_) +* chore: update lint dependencies (`#3704 `_) +* chore: add color to mypy output (`#3793 `_) +* chore: remove tox rules for lint commands (`#3826 `_) +* chore[ci]: roll back GH actions/artifacts version (`#3838 `_) +* chore: Upgrade GitHub action dependencies (`#3807 `_) +* chore[ci]: pin eth-abi for decode regression (`#3834 `_) +* fix[ci]: release artifacts (`#3839 `_) +* chore[ci]: merge mypy job into lint (`#3840 `_) +* test: parametrize CI over EVM versions (`#3842 `_) +* feat[ci]: add PR title validation (`#3887 `_) +* fix[test]: fix failure in grammar fuzzing (`#3892 `_) +* feat[test]: add ``xfail_strict``, clean up ``setup.cfg`` (`#3889 `_) +* fix[ci]: pin hexbytes to pre-1.0.0 (`#3903 `_) +* chore[test]: update hexbytes version and tests (`#3904 `_) +* fix[test]: fix a bad bound in decimal fuzzing (`#3909 `_) +* fix[test]: fix a boundary case in decimal fuzzing (`#3918 `_) +* feat[ci]: update pypi release pipeline to use OIDC (`#3912 `_) +* chore[ci]: reconfigure single commit validation (`#3937 `_) +* chore[ci]: downgrade codecov action to v3 (`#3940 `_) +* feat[ci]: add codecov configuration (`#4057 `_) +* feat[test]: remove memory mocker (`#4005 `_) +* refactor[test]: change fixture scope in examples (`#3995 `_) +* fix[test]: fix call graph stability fuzzer (`#4064 `_) +* chore[test]: add macos to test matrix (`#4025 `_) +* refactor[test]: change default expected exception type (`#4004 `_) + +Misc / refactor +--------------- +* feat[ir]: add ``eval_once`` sanity fences to more builtins (`#3835 `_) +* fix: reorder compilation of branches in stmt.py (`#3603 `_) +* refactor[codegen]: make settings into a global object (`#3929 `_) +* chore: improve exception handling in IR generation (`#3705 `_) +* refactor: merge ``annotation.py`` and ``local.py`` (`#3456 `_) +* chore[ux]: remove deprecated python AST classes (`#3998 `_) +* refactor[ux]: remove deprecated ``VyperNode`` properties (`#3999 `_) +* feat: remove Index AST node (`#3757 `_) +* refactor: for loop target parsing (`#3724 `_) +* chore: improve diagnostics for invalid for loop annotation (`#3721 `_) +* refactor: builtin functions inherit from ``VyperType`` (`#3559 `_) +* fix: remove .keyword from Call AST node (`#3689 `_) +* improvement: assert descriptions in Crowdfund finalize() and participate() (`#3064 `_) +* feat: improve panics in IR generation (`#3708 `_) +* feat: improve warnings, refactor ``vyper_warn()`` (`#3800 `_) +* fix[ir]: unique symbol name (`#3848 `_) +* refactor: remove duplicate terminus checking code (`#3541 `_) +* refactor: ``ExprVisitor`` type validation (`#3739 `_) +* chore: improve exception for type validation (`#3759 `_) +* fix: fuzz test not updated to use TypeMismatch (`#3768 `_) +* chore: fix StringEnum._generate_next_value_ signature (`#3770 `_) +* chore: improve some error messages (`#3775 `_) +* refactor: ``get_search_paths()`` for vyper cli (`#3778 `_) +* chore: replace occurrences of 'enum' by 'flag' (`#3794 `_) +* chore: add another borrowship test (`#3802 `_) +* chore[ux]: improve an exports error message (`#3822 `_) +* chore: improve codegen test coverage report (`#3824 `_) +* chore: improve syntax error messages (`#3885 `_) +* chore[tool]: remove ``vyper-serve`` from ``setup.py`` (`#3936 `_) +* fix[ux]: replace standard strings with f-strings (`#3953 `_) +* chore[ir]: sanity check types in for range codegen (`#3968 `_) + v0.3.10 ("Black Adder") *********************** From f92ef414a551de34b64ae09ba2985ee240244c4c Mon Sep 17 00:00:00 2001 From: Benny Date: Wed, 26 Jun 2024 15:47:04 +1000 Subject: [PATCH 37/53] chore[docs]: add `FUNDING.json` for drips funding (#4167) Add json file to verify Vyper on https://www.drips.network/app/projects --- FUNDING.json | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 FUNDING.json diff --git a/FUNDING.json b/FUNDING.json new file mode 100644 index 0000000000..301aa05572 --- /dev/null +++ b/FUNDING.json @@ -0,0 +1,7 @@ +{ + "drips": { + "ethereum": { + "ownedBy": "0x70CCBE10F980d80b7eBaab7D2E3A73e87D67B775" + } + } +} From 5067b86906f4a3815c4d7a2d3b64f2694ae3a520 Mon Sep 17 00:00:00 2001 From: Rim Rakhimov Date: Wed, 3 Jul 2024 20:30:07 +0400 Subject: [PATCH 38/53] chore[docs]: update `sourceMap` field descriptions (#4170) * Removed `evm.deployedBytecode.sourceMapFull`, as it does not work in v0.4.0 * Updated `evm.deployedBytecode.sourceMap` to be an object in compiler output * Added `evm.bytecode.sourceMap` into compiler input and compiler output --- docs/compiling-a-contract.rst | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/docs/compiling-a-contract.rst b/docs/compiling-a-contract.rst index 751af980b2..c2cd3ed22c 100644 --- a/docs/compiling-a-contract.rst +++ b/docs/compiling-a-contract.rst @@ -308,10 +308,10 @@ The following example describes the expected input format of ``vyper-json``. (Co // devdoc - Natspec developer documentation // evm.bytecode.object - Bytecode object // evm.bytecode.opcodes - Opcodes list + // evm.bytecode.sourceMap - Source mapping (useful for debugging) // evm.deployedBytecode.object - Deployed bytecode object // evm.deployedBytecode.opcodes - Deployed opcodes list - // evm.deployedBytecode.sourceMap - Solidity-style source mapping - // evm.deployedBytecode.sourceMapFull - Deployed source mapping (useful for debugging) + // evm.deployedBytecode.sourceMap - Deployed source mapping (useful for debugging) // evm.methodIdentifiers - The list of function hashes // // Using `evm`, `evm.bytecode`, etc. will select every target part of that output. @@ -388,15 +388,37 @@ The following example describes the output format of ``vyper-json``. Comments ar // The bytecode as a hex string. "object": "00fe", // Opcodes list (string) - "opcodes": "" + "opcodes": "", + // The deployed source mapping. + "sourceMap": { + "breakpoints": [], + "error_map": {}, + "pc_ast_map": {}, + "pc_ast_map_item_keys": [], + "pc_breakpoints": [], + "pc_jump_map": {}, + "pc_pos_map": {}, + // The deployed source mapping as a string. + "pc_pos_map_compressed": "" + } }, "deployedBytecode": { // The deployed bytecode as a hex string. "object": "00fe", // Deployed opcodes list (string) "opcodes": "", - // The deployed source mapping as a string. - "sourceMap": "" + // The deployed source mapping. + "sourceMap": { + "breakpoints": [], + "error_map": {}, + "pc_ast_map": {}, + "pc_ast_map_item_keys": [], + "pc_breakpoints": [], + "pc_jump_map": {}, + "pc_pos_map": {}, + // The deployed source mapping as a string. + "pc_pos_map_compressed": "" + } }, // The list of function hashes "methodIdentifiers": { From 8931e54f8c577f6d60563ff47588c18e58a04c04 Mon Sep 17 00:00:00 2001 From: Zhipeng Xue <543984341@qq.com> Date: Fri, 26 Jul 2024 09:40:23 +0800 Subject: [PATCH 39/53] chore[test]: fix a type hint (#4173) Description Fix a type check warning reported by Pyre@Google, which was outdated after code modifications. Detail update the return type of function fix_terminal from bool to str, since it could be str after commit 176e7f7 --- tests/functional/grammar/test_grammar.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/functional/grammar/test_grammar.py b/tests/functional/grammar/test_grammar.py index de399e84b7..c1d2e1d6e6 100644 --- a/tests/functional/grammar/test_grammar.py +++ b/tests/functional/grammar/test_grammar.py @@ -37,7 +37,7 @@ def test_basic_grammar_empty(): assert len(tree.children) == 0 -def fix_terminal(terminal: str) -> bool: +def fix_terminal(terminal: str) -> str: # these throw exceptions in the grammar for bad in ("\x00", "\\ ", "\x0c"): terminal = terminal.replace(bad, " ") From fc192847932dcac83f0d1a0f8f8679867f525a1e Mon Sep 17 00:00:00 2001 From: HodanPlodky <36966616+HodanPlodky@users.noreply.github.com> Date: Fri, 26 Jul 2024 11:03:53 +0000 Subject: [PATCH 40/53] feat[venom]: offset instruction (#4180) this commit introduces an `offset` instruction that is emitted in the algebraic pass when the add instruction calculates an offset from a code label, which is used for immutables. this allows compilation directly to the magic `OFST` assembly instruction, which does additional constant folding after symbol resolution. --------- Co-authored-by: Charles Cooper --- .../venom/test_algebraic_optimizer.py | 51 +++++++++++++++++++ vyper/venom/passes/algebraic_optimization.py | 16 +++++- vyper/venom/passes/extract_literals.py | 2 +- vyper/venom/venom_to_assembly.py | 6 +++ 4 files changed, 73 insertions(+), 2 deletions(-) diff --git a/tests/unit/compiler/venom/test_algebraic_optimizer.py b/tests/unit/compiler/venom/test_algebraic_optimizer.py index e0368d4197..b5d55efbdc 100644 --- a/tests/unit/compiler/venom/test_algebraic_optimizer.py +++ b/tests/unit/compiler/venom/test_algebraic_optimizer.py @@ -127,3 +127,54 @@ def test_interleaved_case(interleave_point): assert bb.instructions[-1].operands[0] == op3_inv else: assert bb.instructions[-1].operands[0] == op3 + + +def test_offsets(): + ctx = IRContext() + fn = ctx.create_function("_global") + + bb = fn.get_basic_block() + + br1 = IRBasicBlock(IRLabel("then"), fn) + fn.append_basic_block(br1) + br2 = IRBasicBlock(IRLabel("else"), fn) + fn.append_basic_block(br2) + + p1 = bb.append_instruction("param") + op1 = bb.append_instruction("store", 32) + op2 = bb.append_instruction("add", 0, IRLabel("mem")) + op3 = bb.append_instruction("store", 64) + bb.append_instruction("dloadbytes", op1, op2, op3) + op5 = bb.append_instruction("mload", op3) + op6 = bb.append_instruction("iszero", op5) + bb.append_instruction("jnz", op6, br1.label, br2.label) + + op01 = br1.append_instruction("store", 32) + op02 = br1.append_instruction("add", 0, IRLabel("mem")) + op03 = br1.append_instruction("store", 64) + br1.append_instruction("dloadbytes", op01, op02, op03) + op05 = br1.append_instruction("mload", op03) + op06 = br1.append_instruction("iszero", op05) + br1.append_instruction("return", p1, op06) + + op11 = br2.append_instruction("store", 32) + op12 = br2.append_instruction("add", 0, IRLabel("mem")) + op13 = br2.append_instruction("store", 64) + br2.append_instruction("dloadbytes", op11, op12, op13) + op15 = br2.append_instruction("mload", op13) + op16 = br2.append_instruction("iszero", op15) + br2.append_instruction("return", p1, op16) + + ac = IRAnalysesCache(fn) + MakeSSA(ac, fn).run_pass() + AlgebraicOptimizationPass(ac, fn).run_pass() + RemoveUnusedVariablesPass(ac, fn).run_pass() + + offset_count = 0 + for bb in fn.get_basic_blocks(): + for instruction in bb.instructions: + assert instruction.opcode != "add" + if instruction.opcode == "offset": + offset_count += 1 + + assert offset_count == 3 diff --git a/vyper/venom/passes/algebraic_optimization.py b/vyper/venom/passes/algebraic_optimization.py index 4094219a6d..1d375ea988 100644 --- a/vyper/venom/passes/algebraic_optimization.py +++ b/vyper/venom/passes/algebraic_optimization.py @@ -1,6 +1,6 @@ from vyper.venom.analysis.dfg import DFGAnalysis from vyper.venom.analysis.liveness import LivenessAnalysis -from vyper.venom.basicblock import IRInstruction, IROperand +from vyper.venom.basicblock import IRInstruction, IRLabel, IRLiteral, IROperand from vyper.venom.passes.base_pass import IRPass @@ -58,10 +58,24 @@ def _get_iszero_chain(self, op: IROperand) -> list[IRInstruction]: chain.reverse() return chain + def _handle_offsets(self): + for bb in self.function.get_basic_blocks(): + for inst in bb.instructions: + # check if the instruction is of the form + # `add