From aec47b0226b7e97306ea4ce81aba777f7dfc0507 Mon Sep 17 00:00:00 2001 From: Josselin Feist Date: Wed, 14 Dec 2022 15:14:32 +0100 Subject: [PATCH 1/2] Add generic dataflow framework + example + stack value analysis - Add AbstractDataflow, which is a generic dataflow analysis, following an abstract interpretation approach. - Add CollectInstruction, which is just an example. The analysis collects all the instructions executed - Add StackValue, which track the values push/pop on the stack, with a focus on int/global field/transaction field --- tealer/analyses/dataflow/abstract.py | 60 +++++ .../analyses/dataflow/collect_instructions.py | 102 ++++++++ tealer/analyses/dataflow/stack_value.py | 229 ++++++++++++++++++ tealer/teal/global_field.py | 10 + tealer/teal/instructions/transaction_field.py | 8 + .../test_collect_instructions.py | 139 +++++++++++ tests/test_dataflow/test_stack_value.py | 207 ++++++++++++++++ 7 files changed, 755 insertions(+) create mode 100644 tealer/analyses/dataflow/abstract.py create mode 100644 tealer/analyses/dataflow/collect_instructions.py create mode 100644 tealer/analyses/dataflow/stack_value.py create mode 100644 tests/test_dataflow/test_collect_instructions.py create mode 100644 tests/test_dataflow/test_stack_value.py diff --git a/tealer/analyses/dataflow/abstract.py b/tealer/analyses/dataflow/abstract.py new file mode 100644 index 0000000..a3db91c --- /dev/null +++ b/tealer/analyses/dataflow/abstract.py @@ -0,0 +1,60 @@ +from abc import ABC, abstractmethod +from typing import List, Any, TypeVar, Generic, TYPE_CHECKING + +from tealer.teal.basic_blocks import BasicBlock + +AbstractValues = TypeVar("AbstractValues") + +if TYPE_CHECKING: + from tealer.teal.teal import Teal + + +class AbstractDataflow(ABC, Generic[AbstractValues]): + def __init__(self, teal: "Teal"): + self.teal = teal + + @abstractmethod + def _merge_predecessor(self, bb: BasicBlock) -> AbstractValues: + pass + + @abstractmethod + def _is_fix_point(self, bb: BasicBlock, values: AbstractValues) -> bool: + pass + + @abstractmethod + def _transfer_function(self, bb: BasicBlock) -> AbstractValues: + pass + + @abstractmethod + def _store_values_in(self, bb: BasicBlock, values: AbstractValues) -> None: + pass + + @abstractmethod + def _store_values_out(self, bb: BasicBlock, values: AbstractValues) -> None: + pass + + @abstractmethod + def _filter_successors(self, bb: BasicBlock) -> List[BasicBlock]: + pass + + @abstractmethod + def result(self) -> Any: + pass + + def explore(self, bb: BasicBlock, is_entry_node: bool = False) -> None: + + values = self._merge_predecessor(bb) + + if not is_entry_node and self._is_fix_point(bb, values): + return + + self._store_values_in(bb, values) + values = self._transfer_function(bb) + self._store_values_out(bb, values) + + successors = self._filter_successors(bb) + for successor in successors: + self.explore(successor) + + def run_analysis(self) -> None: + self.explore(self.teal.bbs[0], is_entry_node=True) diff --git a/tealer/analyses/dataflow/collect_instructions.py b/tealer/analyses/dataflow/collect_instructions.py new file mode 100644 index 0000000..e3257af --- /dev/null +++ b/tealer/analyses/dataflow/collect_instructions.py @@ -0,0 +1,102 @@ +from collections import defaultdict +from typing import Union, Set, Any, List, Dict, TYPE_CHECKING + +from tealer.analyses.dataflow.abstract import AbstractDataflow +from tealer.teal.basic_blocks import BasicBlock +from tealer.teal.instructions.instructions import Instruction + +if TYPE_CHECKING: + from tealer.teal.teal import Teal + +MAX_ELEMS = 35 + + +class InstructionSet: + def __init__(self, values: Union[str, Instruction, Set[Instruction]]) -> None: + + if isinstance(values, str): + assert values in ["TOP", "BOTTOM"] + + if isinstance(values, set) and len(values) > MAX_ELEMS: + values = "TOP" + if isinstance(values, Instruction): + values = {values} + + self.values: Union[str, Set[Instruction]] = values + + @property + def is_top(self) -> bool: + return isinstance(self.values, str) and self.values == "TOP" + + @property + def is_bottom(self) -> bool: + return isinstance(self.values, str) and self.values == "BOTTOM" + + def union(self, instructions: "InstructionSet") -> "InstructionSet": + v0 = self.values + v1 = instructions.values + if v0 == "TOP" or v1 == "TOP": + return InstructionSet("TOP") + + if v0 == "BOTTOM": + return InstructionSet(v1) + + if v1 == "BOTTOM": + return InstructionSet(v0) + + assert isinstance(v0, set) + assert isinstance(v1, set) + + return InstructionSet(v0.union(v1)) + + def __eq__(self, other: Any) -> bool: + if isinstance(other, InstructionSet): + if isinstance(self.values, str) and isinstance(other.values, str): + return self.values == other.values + if isinstance(self.values, set) and isinstance(other.values, set): + return self.values == other.values + return False + + def __str__(self) -> str: + if isinstance(self.values, str): + return self.values + return "[" + ",".join([str(x) for x in self.values]) + "]" + + +class CollectInstructions(AbstractDataflow[InstructionSet]): + def __init__(self, teal: "Teal") -> None: + super().__init__(teal) + self.bb_in: Dict[BasicBlock, InstructionSet] = defaultdict(lambda: InstructionSet("BOTTOM")) + self.bb_out: Dict[BasicBlock, InstructionSet] = defaultdict( + lambda: InstructionSet("BOTTOM") + ) + + def _merge_predecessor(self, bb: BasicBlock) -> InstructionSet: + s = InstructionSet("BOTTOM") + for bb_prev in bb.prev: + s = s.union(self.bb_out[bb_prev]) + + return s + + def _is_fix_point(self, bb: BasicBlock, values: InstructionSet) -> bool: + return self.bb_in[bb] == values + + def _transfer_function(self, bb: BasicBlock) -> InstructionSet: + bb_out = self.bb_in[bb] + + for ins in bb.instructions: + bb_out = bb_out.union(InstructionSet(ins)) + + return bb_out + + def _store_values_in(self, bb: BasicBlock, values: InstructionSet) -> None: + self.bb_in[bb] = values + + def _store_values_out(self, bb: BasicBlock, values: InstructionSet) -> None: + self.bb_out[bb] = values + + def _filter_successors(self, bb: BasicBlock) -> List[BasicBlock]: + return bb.next + + def result(self) -> Dict[BasicBlock, InstructionSet]: + return self.bb_out diff --git a/tealer/analyses/dataflow/stack_value.py b/tealer/analyses/dataflow/stack_value.py new file mode 100644 index 0000000..398da1b --- /dev/null +++ b/tealer/analyses/dataflow/stack_value.py @@ -0,0 +1,229 @@ +from collections import defaultdict +from typing import Union, Set, Any, List, Dict, TYPE_CHECKING, Type, Callable, Tuple + +from tealer.analyses.dataflow.abstract import AbstractDataflow +from tealer.teal.basic_blocks import BasicBlock +from tealer.teal.global_field import GlobalField +from tealer.teal.instructions import instructions +from tealer.teal.instructions.instructions import Instruction +from tealer.teal.instructions.transaction_field import TransactionField + +if TYPE_CHECKING: + from tealer.teal.teal import Teal + +MAX_ELEMS_PER_STACK_VALUE = 35 +MAX_STACK_DEPTH = 100 + + +# pylint: disable=too-few-public-methods +class TOP: + def __eq__(self, other: Any) -> bool: + return isinstance(other, TOP) + + def __str__(self) -> str: + return "TOP" + + +# pylint: disable=too-few-public-methods +class BOTTOM: + def __eq__(self, other: Any) -> bool: + return isinstance(other, BOTTOM) + + def __str__(self) -> str: + return "BOTTOM" + + +VALUES_TRACKED = Union[Set[Union[GlobalField, TransactionField, int, str]], TOP, BOTTOM] + + +class StackValue: + """ + StackValue represent an abstract value on the stack + It can be either a set of int/str/fields, or TOP/BOTTOM + The set's size is limited to MAX_ELEMS_PER_STACK_VALUE, if above, it becomes TOP + + """ + + def __init__(self, values: VALUES_TRACKED): + if isinstance(values, set) and len(values) > MAX_ELEMS_PER_STACK_VALUE: + values = TOP() + self.values = values + + def union(self, other_stack_value: "StackValue") -> "StackValue": + self_values = self.values + other_values = other_stack_value.values + if isinstance(self_values, TOP) or isinstance(other_values, TOP): + return StackValue(TOP()) + if isinstance(self_values, BOTTOM): + return StackValue(other_values) + if isinstance(other_values, BOTTOM): + return StackValue(self_values) + assert isinstance(self_values, set) + assert isinstance(other_values, set) + return StackValue(self_values | other_values) + + def __eq__(self, other: Any) -> bool: + if isinstance(other, StackValue): + return self.values == other.values + return False + + def __str__(self) -> str: + values = self.values + if isinstance(values, (TOP, BOTTOM)): + return str(values) + assert isinstance(values, set) + return str({str(x) for x in values}) + + +class Stack: + """ + Represent an abstract stack + The length is limited by MAX_STACK_DEPTH + self.values contains the abstract element + + If there is two paths merged, where one path push 1 (concrete stack [1]) + and the other push 3; (concrete stack [3]) + then the abstract stack is + [ [1;3] ]x + Ie: the top can either be 1 or 3 + + If we pop beyond the known values, we return TOP(). + As a result, if the most left elements are TOP in the stack, we can stop tracking them + + If there is two paths merged, and the stack size has a different size, then + we use TOP for the elements in the difference. Ex: + - one path push 1; push 2; (concrete stack [2;1]) + - and the other push 3; (concrete stack [3]) + - then the abstract stack is + - [ [TOP] ; [1;3] ] + Ie: the top can either be 1 or 3, and the second one is TOP + Because the most left element of the stack can be removed, this can be simplied as + [ [1;3] ] + + + + """ + + def __init__(self, values: List[StackValue]) -> None: + if len(values) > MAX_STACK_DEPTH: + values = values[:-MAX_STACK_DEPTH] + + while values and values[0] == StackValue(TOP()): + values.pop() + + self.values = values + + def union(self, stack: "Stack") -> "Stack": + + v1 = self.values + v2 = stack.values + + min_length = min(len(v1), len(v2)) + v1 = v1[:-min_length] + v2 = v2[:-min_length] + + v3 = [] + for i in range(min_length): + v3.append(v1[i].union(v2[i])) + + return Stack(v3) + + def pop(self, number: int) -> Tuple["Stack", List[StackValue]]: + if number == 0: + return Stack(self.values), [] + if len(self.values) < number: + diff = number - len(self.values) + poped_values = list(self.values) + return Stack([]), [StackValue(TOP()) for _ in range(diff)] + poped_values + + poped_values = self.values[:-number] + return Stack(self.values[: len(self.values) - number]), poped_values + + def push(self, pushed_values: List[StackValue]) -> "Stack": + return Stack(self.values + pushed_values) + + def __eq__(self, other: Any) -> bool: + if isinstance(other, Stack): + return self.values == other.values + return False + + def __str__(self) -> str: + return str([str(x) for x in self.values]) + + +# pylint: disable=unused-argument +def handle_int(ins: Instruction, stack: Stack) -> List[StackValue]: + assert isinstance(ins, instructions.Int) + return [StackValue({ins.value})] + + +# pylint: disable=unused-argument +def handle_pushint(ins: Instruction, stack: Stack) -> List[StackValue]: + assert isinstance(ins, instructions.PushInt) + return [StackValue({ins.value})] + + +# pylint: disable=unused-argument +def handle_txn(ins: Instruction, stack: Stack) -> List[StackValue]: + assert isinstance(ins, instructions.Txn) + return [StackValue({ins.field})] + + +# pylint: disable=unused-argument +def handle_global(ins: Instruction, stack: Stack) -> List[StackValue]: + assert isinstance(ins, instructions.Global) + return [StackValue({ins.field})] + + +special_handling: Dict[Type[Instruction], Callable[[Instruction, Stack], List[StackValue]]] = { + instructions.Int: handle_int, + instructions.PushInt: handle_pushint, + instructions.Txn: handle_txn, + instructions.Global: handle_global, +} + + +class StackValueAnalysis(AbstractDataflow[Stack]): + def __init__(self, teal: "Teal") -> None: + super().__init__(teal) + self.bb_in: Dict[BasicBlock, Stack] = defaultdict(lambda: Stack([StackValue(BOTTOM())])) + self.bb_out: Dict[BasicBlock, Stack] = defaultdict(lambda: Stack([])) + + self.ins_in: Dict[Instruction, Stack] = defaultdict(lambda: Stack([])) + self.ins_out: Dict[Instruction, Stack] = defaultdict(lambda: Stack([])) + + def _merge_predecessor(self, bb: BasicBlock) -> Stack: + s = Stack([]) + for bb_prev in bb.prev: + s = s.union(self.bb_out[bb_prev]) + return s + + def _is_fix_point(self, bb: BasicBlock, values: Stack) -> bool: + return self.bb_in[bb] == values + + def _transfer_function(self, bb: BasicBlock) -> Stack: + bb_out = self.bb_in[bb] + + for ins in bb.instructions: + self.ins_in[ins] = Stack(bb_out.values) + if type(ins) in special_handling: + pushed_elems = special_handling[type(ins)](ins, bb_out) + else: + pushed_elems = [StackValue(TOP()) for _ in range(ins.stack_push_size)] + bb_out, _ = bb_out.pop(ins.stack_pop_size) + bb_out = bb_out.push(pushed_elems) + self.ins_out[ins] = Stack(bb_out.values) + + return bb_out + + def _store_values_in(self, bb: BasicBlock, values: Stack) -> None: + self.bb_in[bb] = values + + def _store_values_out(self, bb: BasicBlock, values: Stack) -> None: + self.bb_out[bb] = values + + def _filter_successors(self, bb: BasicBlock) -> List[BasicBlock]: + return bb.next + + def result(self) -> Dict[BasicBlock, Stack]: + return self.bb_out diff --git a/tealer/teal/global_field.py b/tealer/teal/global_field.py index 63a5326..6c9b9e0 100644 --- a/tealer/teal/global_field.py +++ b/tealer/teal/global_field.py @@ -10,6 +10,9 @@ """ # pylint: disable=too-few-public-methods +from typing import Any + + class GlobalField: """Base class representing a global field""" @@ -21,9 +24,16 @@ def version(self) -> int: """Teal version this field is introduced in and supported from.""" return self._version + def __eq__(self, other: Any) -> bool: + # pylint: disable=unidiomatic-typecheck + return type(other) == type(self) + def __str__(self) -> str: return self.__class__.__qualname__ + def __hash__(self) -> int: + return hash(str(self)) + class GroupSize(GlobalField): """Number of transactions in this atomic transaction group.""" diff --git a/tealer/teal/instructions/transaction_field.py b/tealer/teal/instructions/transaction_field.py index 650b446..95b8857 100644 --- a/tealer/teal/instructions/transaction_field.py +++ b/tealer/teal/instructions/transaction_field.py @@ -11,6 +11,7 @@ # pylint: disable=too-few-public-methods # https://developer.algorand.org/docs/reference/teal/opcodes/#txn +from typing import Any class TransactionField: @@ -24,6 +25,13 @@ def version(self) -> int: """Teal version this field is introduced in and supported from.""" return self._version + def __eq__(self, other: Any) -> bool: + # pylint: disable=unidiomatic-typecheck + return type(other) == type(self) + + def __hash__(self) -> int: + return hash(str(self)) + def __str__(self) -> str: return self.__class__.__qualname__ diff --git a/tests/test_dataflow/test_collect_instructions.py b/tests/test_dataflow/test_collect_instructions.py new file mode 100644 index 0000000..2cbf834 --- /dev/null +++ b/tests/test_dataflow/test_collect_instructions.py @@ -0,0 +1,139 @@ +from typing import Tuple + +import pytest + +from tealer.analyses.dataflow.collect_instructions import CollectInstructions +from tealer.teal.instructions.instructions import Return +from tealer.teal.parse_teal import parse_teal + +MULTIPLE_RETSUB = """ +#pragma version 5 +b main +is_even: + int 2 + % + bz return_1 + int 0 + global GroupSize + int 2 + == + assert + retsub +return_1: + global GroupSize + int 3 + < + assert + int 1 + retsub +main: + global GroupSize + int 1 + != + assert + int 4 + callsub is_even + return +""" + +SUBROUTINE_BACK_JUMP = """ +#pragma version 5 +b main +getmod: + % + retsub +is_odd: + global GroupSize + int 4 + < + assert + global GroupSize + int 2 + != + assert + int 2 + b getmod +main: + int 5 + callsub is_odd + return +""" + +BRANCHING = """ +#pragma version 4 +global GroupSize +int 2 +>= +assert +global GroupSize +int 4 +> +bz fin +global GroupSize +int 1 +== +bnz check_second_arg +int 0 +return +check_second_arg: +txn ApplicationArgs 1 +btoi +int 100 +> +bnz fin +int 0 +return +fin: +int 1 +return +""" + +LOOPS = """ +#pragma version 5 +global GroupSize +int 4 +!= +assert +int 0 +loop: + dup + global GroupSize + int 3 + >= + bz end + int 1 + + + global GroupSize + int 3 + < + assert + b loop +end: + int 2 + global GroupSize + == + assert + int 1 + return +""" + +ALL_TESTS = [(MULTIPLE_RETSUB, 27), (SUBROUTINE_BACK_JUMP, 20), (BRANCHING, 22), (LOOPS, 26)] + + +@pytest.mark.parametrize("test", ALL_TESTS) +def test_group_indices(test: Tuple[str, int]) -> None: + teal = parse_teal(test[0]) + + ci = CollectInstructions(teal) + ci.run_analysis() + bb_out = ci.result() + + # We assume that the return node is always the last in instructions + # This might not hold if we change the parsing + return_node = teal.instructions[-1] + assert isinstance(return_node, Return) + assert return_node.bb + + values = bb_out[return_node.bb].values + assert isinstance(values, set) + assert len(values) == test[1] diff --git a/tests/test_dataflow/test_stack_value.py b/tests/test_dataflow/test_stack_value.py new file mode 100644 index 0000000..937545d --- /dev/null +++ b/tests/test_dataflow/test_stack_value.py @@ -0,0 +1,207 @@ +from typing import Tuple, Dict, List, Any + +import pytest + +from tealer.analyses.dataflow.stack_value import StackValueAnalysis, Stack, StackValue +from tealer.teal import global_field +from tealer.teal.instructions import instructions +from tealer.teal.parse_teal import parse_teal + +MULTIPLE_RETSUB = """ +#pragma version 5 +b main +is_even: + int 2 + % + bz return_1 + int 0 + global GroupSize + int 2 + == + assert + retsub +return_1: + global GroupSize + int 3 + < + assert + int 1 + retsub +main: + global GroupSize + int 1 + != + assert + int 4æ + callsub is_even + return +""" + +MULTIPLE_RETSUB_EXCEPTED = { + 11: Stack([StackValue({0}), StackValue({global_field.GroupSize()}), StackValue({2})]), + 17: Stack([StackValue({global_field.GroupSize()}), StackValue({3})]), + 24: Stack([StackValue({global_field.GroupSize()}), StackValue({1})]), +} + +SUBROUTINE_BACK_JUMP = """ +#pragma version 5 +b main +getmod: + % + retsub +is_odd: + global GroupSize + int 4 + < + assert + global GroupSize + int 2 + != + assert + int 2 + b getmod +main: + int 5 + callsub is_odd + return +""" + +SUBROUTINE_BACK_JUMP_EXCEPTED = { + 10: Stack([StackValue({global_field.GroupSize()}), StackValue({4})]), + 14: Stack([StackValue({global_field.GroupSize()}), StackValue({2})]), +} + +BRANCHING = """ +#pragma version 4 +global GroupSize +int 2 +>= +assert +global GroupSize +int 4 +> +bz fin +global GroupSize +int 1 +== +bnz check_second_arg +int 0 +return +check_second_arg: +txn ApplicationArgs 1 +btoi +int 100 +> +bnz fin +int 0 +return +fin: +int 1 +return +""" + +BRANCHING_EXCEPTED = { + 5: Stack([StackValue({global_field.GroupSize()}), StackValue({2})]), + 9: Stack([StackValue({global_field.GroupSize()}), StackValue({4})]), + 13: Stack([StackValue({global_field.GroupSize()}), StackValue({1})]), + 21: Stack([StackValue({100})]), +} + +LOOPS = """ +#pragma version 5 +global GroupSize +int 4 +!= +assert +int 0 +loop: + dup + global GroupSize + int 3 + >= + bz end + int 1 + + + global GroupSize + int 3 + < + assert + b loop +end: + int 2 + global GroupSize + == + assert + int 1 + return +""" + +LOOPS_EXCEPTED = { + 5: Stack([StackValue({global_field.GroupSize()}), StackValue({4})]), + 12: Stack([StackValue({global_field.GroupSize()}), StackValue({3})]), + 18: Stack([StackValue({global_field.GroupSize()}), StackValue({3})]), + 24: Stack([StackValue({2}), StackValue({global_field.GroupSize()})]), +} + +ALL_TESTS = [ + (MULTIPLE_RETSUB, MULTIPLE_RETSUB_EXCEPTED), + (SUBROUTINE_BACK_JUMP, SUBROUTINE_BACK_JUMP_EXCEPTED), + (BRANCHING, BRANCHING_EXCEPTED), + (LOOPS, LOOPS_EXCEPTED), +] + + +@pytest.mark.parametrize("test", ALL_TESTS) +def test_group_indices(test: Tuple[str, Dict[int, List[Any]]]) -> None: + teal = parse_teal(test[0]) + + ci = StackValueAnalysis(teal) + ci.run_analysis() + + for ins in teal.instructions: + if isinstance( + ins, + ( + instructions.Greater, + instructions.GreaterE, + instructions.Less, + instructions.LessE, + instructions.Eq, + instructions.Neq, + ), + ): + excepted = test[1][ins.line] + assert excepted == ci.ins_in[ins] + + +def print_stack(code: str) -> None: + teal = parse_teal(code) + + sv = StackValueAnalysis(teal) + sv.run_analysis() + print(code) + + for ins in teal.instructions: + if isinstance( + ins, + ( + instructions.Greater, + instructions.GreaterE, + instructions.Less, + instructions.LessE, + instructions.Eq, + instructions.Neq, + ), + ): + print("###") + print(ins) + print(f"Before: {sv.ins_in[ins]}") + print(f"After: {sv.ins_out[ins]}") + print() + + +if __name__ == "__main__": + print_stack(MULTIPLE_RETSUB) + print_stack(SUBROUTINE_BACK_JUMP) + print_stack(BRANCHING) + print_stack(LOOPS) From 971adaae15cb79d0a790e550a33b68f7831b94bb Mon Sep 17 00:00:00 2001 From: Feist Josselin Date: Mon, 18 Sep 2023 14:28:11 +0200 Subject: [PATCH 2/2] minor --- tests/test_dataflow/test_collect_instructions.py | 2 +- tests/test_dataflow/test_stack_value.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_dataflow/test_collect_instructions.py b/tests/test_dataflow/test_collect_instructions.py index 2cbf834..a89f78b 100644 --- a/tests/test_dataflow/test_collect_instructions.py +++ b/tests/test_dataflow/test_collect_instructions.py @@ -120,7 +120,7 @@ ALL_TESTS = [(MULTIPLE_RETSUB, 27), (SUBROUTINE_BACK_JUMP, 20), (BRANCHING, 22), (LOOPS, 26)] -@pytest.mark.parametrize("test", ALL_TESTS) +@pytest.mark.parametrize("test", ALL_TESTS) # type: ignore def test_group_indices(test: Tuple[str, int]) -> None: teal = parse_teal(test[0]) diff --git a/tests/test_dataflow/test_stack_value.py b/tests/test_dataflow/test_stack_value.py index 937545d..9b25822 100644 --- a/tests/test_dataflow/test_stack_value.py +++ b/tests/test_dataflow/test_stack_value.py @@ -151,7 +151,7 @@ ] -@pytest.mark.parametrize("test", ALL_TESTS) +@pytest.mark.parametrize("test", ALL_TESTS) # type: ignore def test_group_indices(test: Tuple[str, Dict[int, List[Any]]]) -> None: teal = parse_teal(test[0])