From 18acd36410c2df9befb818e0b5c3791b77883021 Mon Sep 17 00:00:00 2001 From: Vara Prasad Bandaru Date: Mon, 19 Sep 2022 15:22:34 +0530 Subject: [PATCH 1/4] Add generic class for dataflow analysis of transaction fields and a class for group size field --- tealer/analyses/__init__.py | 0 tealer/analyses/dataflow/__init__.py | 0 tealer/analyses/dataflow/all_constraints.py | 2 + tealer/analyses/dataflow/generic.py | 253 +++++++++++++++ tealer/analyses/dataflow/int_fields.py | 130 ++++++++ tealer/teal/basic_blocks.py | 7 + tealer/teal/context/__init__.py | 0 .../teal/context/block_transaction_context.py | 23 ++ tealer/teal/parse_teal.py | 18 ++ tealer/utils/analyses.py | 10 +- tests/transaction_context/__init__.py | 0 tests/transaction_context/test_group_sizes.py | 296 ++++++++++++++++++ 12 files changed, 734 insertions(+), 5 deletions(-) create mode 100644 tealer/analyses/__init__.py create mode 100644 tealer/analyses/dataflow/__init__.py create mode 100644 tealer/analyses/dataflow/all_constraints.py create mode 100644 tealer/analyses/dataflow/generic.py create mode 100644 tealer/analyses/dataflow/int_fields.py create mode 100644 tealer/teal/context/__init__.py create mode 100644 tealer/teal/context/block_transaction_context.py create mode 100644 tests/transaction_context/__init__.py create mode 100644 tests/transaction_context/test_group_sizes.py diff --git a/tealer/analyses/__init__.py b/tealer/analyses/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tealer/analyses/dataflow/__init__.py b/tealer/analyses/dataflow/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tealer/analyses/dataflow/all_constraints.py b/tealer/analyses/dataflow/all_constraints.py new file mode 100644 index 0000000..8c15cca --- /dev/null +++ b/tealer/analyses/dataflow/all_constraints.py @@ -0,0 +1,2 @@ +# pylint: disable=unused-import +from tealer.analyses.dataflow.int_fields import IntFields diff --git a/tealer/analyses/dataflow/generic.py b/tealer/analyses/dataflow/generic.py new file mode 100644 index 0000000..117dcaa --- /dev/null +++ b/tealer/analyses/dataflow/generic.py @@ -0,0 +1,253 @@ +"""Defines generic class for dataflow analysis to find constraints on transaction fields. + +Possible values for a field are considered to be a set, referred to as universal set(U) for that field. +if U is finite and small, values are enumerated and are stored in the context. However, in case U is large +for example, address type fields have very large set, Enum type values are used to represent U and NullSet. + +Algorithm works as: + - Collect constraints asserted within the block and constraints specific for each path, happens if bz/bnz are + directly used on the constraint. + - Use fix point algorithm and repeatedly merge information until no new information is found + +Equation for merging: + # path_transaction_context[b][bi] gives the transaction constraints for path bi -> b + block_transaction_context[b] = Union((block_transaction_context[bi] & path_transaction_context[b][bi]) for bi in predecessors[b]) \ + & block_transaction_context[b] \ + & Union(block_transaction_context[bi] for bi in successors[b]) +""" + +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Any, Dict, List, Tuple + +from tealer.teal.instructions.instructions import ( + Assert, + Return, + BZ, + BNZ, + Err, +) + +from tealer.utils.analyses import is_int_push_ins + +if TYPE_CHECKING: + from tealer.teal.teal import Teal + from tealer.teal.basic_blocks import BasicBlock + from tealer.teal.instructions.instructions import Instruction + + +class IncorrectDataflowTransactionContextInitialization(Exception): + pass + + +class DataflowTransactionContext(ABC): # pylint: disable=too-few-public-methods + + # List of keys, unique and separate context is stored for each key. + KEYS: List[str] = [] + + def __init__(self, teal: "Teal"): + self._teal: "Teal" = teal + # self._block_contexts[KEY][B] -> block_context of KEY for block B + self._block_contexts: Dict[str, Dict["BasicBlock", Any]] = {} + # self._path_contexts[KEY][Bi][Bj] -> path_context of KEY for path Bj -> Bi + self._path_contexts: Dict[str, Dict["BasicBlock", Dict["BasicBlock", Any]]] = {} + if not self.KEYS: + raise IncorrectDataflowTransactionContextInitialization( + f"KEYS are not initialized {self.__class__.__name__}" + ) + + def _gtx_key(self, idx: int, key: str) -> str: # pylint: disable=no-self-use + """return key used for tracking context of gtxn {idx} {field represented by key}""" + return f"GTXN_{idx:02d}_{key}" + + @abstractmethod + def _universal_set(self, key: str) -> Any: + """Return universal set of the field corresponding to given key""" + + @abstractmethod + def _null_set(self, key: str) -> Any: + """Return null set of the field corresponding to given key""" + + @abstractmethod + def _union(self, key: str, a: Any, b: Any) -> Any: + """return union of a and b, where a, b represent values for the given key""" + + @abstractmethod + def _intersection(self, key: str, a: Any, b: Any) -> Any: + """return intersection of a and b, where a, b represent values for the given key""" + + @abstractmethod + def _get_asserted(self, key: str, ins_stack: List["Instruction"]) -> Tuple[Any, Any]: + """For the given key and ins_stack, return true_values and false_values + + true_values for a key are considered to be values which result in non-zero value on + top of the stack. + false_values for a key are considered to be values which result in zero value on top + of the stack. + """ + + @abstractmethod + def _store_results(self) -> None: + """Store the collected information in the context object of each block""" + + def _block_level_constraints(self, block: "BasicBlock") -> None: + """Calculate and store constraints on keys applied within the block. + + By default, no constraints are considered i.e values are assumed to be universal_set + if block contains `Err` or `Return 0`, values are set to null set. + + if block contains assert instruction, values are further constrained using the comparison being + asserted. Values are stored in self._block_contexts + self._block_contexts[KEY][B] -> block_context of KEY for block B + """ + for key in self.KEYS: + if key not in self._block_contexts: + self._block_contexts[key] = {} + self._block_contexts[key][block] = self._universal_set(key) + + stack: List["Instruction"] = [] + for ins in block.instructions: + if isinstance(ins, Assert): + for key in self.KEYS: + asserted_values, _ = self._get_asserted(key, stack) + present_values = self._block_contexts[key][block] + self._block_contexts[key][block] = self._intersection( + key, present_values, asserted_values + ) + + # if return 0, set possible values to NullSet() + if isinstance(ins, Return): + if len(ins.prev) == 1: + is_int, value = is_int_push_ins(ins.prev[0]) + if is_int and value == 0: + for key in self.KEYS: + self._block_contexts[key][block] = self._null_set(key) + + if isinstance(ins, Err): + for key in self.KEYS: + self._block_contexts[key][block] = self._null_set(key) + + stack.append(ins) + + def _path_level_constraints(self, block: "BasicBlock") -> None: + """Calculate and store constraints on keys applied along each path. + + By default, no constraints are considered i.e values are assumed to be universal_set + + if block contains bz/bnz instruction, possible values are calculated for each branch and + are stored in self._path_contexts + self._path_contexts[KEY][Bi][Bj] -> path_context of KEY for path Bj -> Bi + """ + + for key in self.KEYS: + if key not in self._path_contexts: + self._path_contexts[key] = {} + path_context = self._path_contexts[key] + for b in block.next: + # path_context[bi][bj]: path context of path bj -> bi, bi is the successor + if b not in path_context: + path_context[b] = {} + # if there are no constraints, set the possible values to universal set + path_context[b][block] = self._universal_set(key) + + if isinstance(block.exit_instr, (BZ, BNZ)): + for key in self.KEYS: + # true_values: possible values for {key} which result in non-zero value on top of the stack + # false_values: possible values for {key} which result in zero value on top of the stack + # if the check is not related to the field, true_values and false_values will be universal sets + true_values, false_values = self._get_asserted(key, block.instructions[:-1]) + + if len(block.next) == 1: + # happens when bz/bnz is the last instruction in the contract and there is no default branch + default_branch = None + jump_branch = block.next[0] + else: + default_branch = block.next[0] + jump_branch = block.next[1] + + if isinstance(block.exit_instr, BZ): + # jump branch is taken if the comparison is false i.e not in asserted values + self._path_contexts[key][jump_branch][block] = false_values + # default branch is taken if the comparison is true i.e in asserted values + if default_branch is not None: + self._path_contexts[key][default_branch][block] = true_values + elif isinstance(block.exit_instr, BNZ): + # jump branch is taken if the comparison is true i.e in asserted values + self._path_contexts[key][jump_branch][block] = true_values + # default branch is taken if the comparison is false i.e not in asserted values + if default_branch is not None: + self._path_contexts[key][default_branch][block] = false_values + + def _merge_information(self, block: "BasicBlock") -> bool: + """Merge information for predecessors, successors for the :block: and return whether information is updated or not + + # path_transaction_context[b][bi] gives the transaction constraints for path bi -> b + block_transaction_context[b] = Union((block_transaction_context[bi] & path_transaction_context[b][bi]) for bi in predecessors[b]) \ + & block_transaction_context[b] \ + & Union(block_transaction_context[bi] for bi in successors[b]) + """ + + updated = False + for key in self.KEYS: + block_context = self._block_contexts[key] + path_context = self._path_contexts[key] + + new_block_context = self._union(key, block_context[block], block_context[block]) + + if len(block.prev) != 0: + prev_b = block.prev[0] + # TODO: While considering predecessor information, use dominator block information instead of + # all predecessors. Current approach doesn't consider constraints applied within the loop body + # blocks for the blocks outside the loop. Or use reverse postorder while constructing the block contexts(?) + predecessor_information = self._intersection( + key, block_context[prev_b], path_context[block][prev_b] + ) + for prev_b in block.prev[1:]: + predecessor_information = self._union( + key, + predecessor_information, + self._intersection(key, block_context[prev_b], path_context[block][prev_b]), + ) + + new_block_context = self._intersection( + key, predecessor_information, new_block_context + ) + + if len(block.next) != 0: + next_b = block.next[0] + successor_information = block_context[next_b] + for next_b in block.next[1:]: + successor_information = self._union( + key, successor_information, block_context[next_b] + ) + + new_block_context = self._intersection( + key, successor_information, new_block_context + ) + + if new_block_context != block_context[block]: + block_context[block] = new_block_context + updated = True + return updated + + def run_analysis(self) -> None: + """Run analysis""" + # phase 1 + for block in self._teal.bbs: + self._block_level_constraints(block) + self._path_level_constraints(block) + + # phase 2 + worklist = list(self._teal.bbs) + + while worklist: + b = worklist[0] + worklist = worklist[1:] + updated = self._merge_information(b) + + if updated: + for bi in b.prev + b.next: + if bi not in worklist: + worklist.append(bi) + + print([b.idx for b in worklist]) + self._store_results() diff --git a/tealer/analyses/dataflow/int_fields.py b/tealer/analyses/dataflow/int_fields.py new file mode 100644 index 0000000..d33067a --- /dev/null +++ b/tealer/analyses/dataflow/int_fields.py @@ -0,0 +1,130 @@ +from typing import TYPE_CHECKING, List, Set, Tuple, Dict + +from tealer.analyses.dataflow.generic import DataflowTransactionContext +from tealer.teal.instructions.instructions import ( + Global, + Eq, + Neq, + Greater, + GreaterE, + Less, + LessE, +) +from tealer.teal.global_field import GroupSize +from tealer.utils.analyses import is_int_push_ins + +if TYPE_CHECKING: + from tealer.teal.instructions.instructions import Instruction + +group_size_key = "GroupSize" +analysis_keys = [group_size_key] +universal_sets = {} +universal_sets[group_size_key] = list(range(1, 17)) + + +class IntFields(DataflowTransactionContext): # pylint: disable=too-few-public-methods + + GROUP_SIZE_KEY = group_size_key + KEYS = analysis_keys + UNIVERSAL_SETS: Dict[str, List] = universal_sets + + def _universal_set(self, key: str) -> Set: # pylint: disable=no-self-use + return set(self.UNIVERSAL_SETS[key]) + + def _null_set(self, key: str) -> Set: # pylint: disable=no-self-use + return set() + + def _union(self, key: str, a: Set, b: Set) -> Set: # pylint: disable=no-self-use + return a | b + + def _intersection(self, key: str, a: Set, b: Set) -> Set: # pylint: disable=no-self-use + return a & b + + def _get_asserted_int_values( # pylint: disable=no-self-use + self, comparison_ins: "Instruction", compared_int: int, universal_set: List[int] + ) -> List[int]: + """return list of ints from universal set(U) that will satisfy the comparison. + + if the given condition uses ==, return compared int list. + if the condition uses != then return {U - compared_int} + if the given condition uses <, return U[ : U.index(compared_int)] + if the given condition uses <=, return U[ : U.index(compared_int) + 1] + if the given condition uses >, return U[U.index(compared_int) + 1:] + if the given condition uses >=, return U[U.index(compared_int): ] + + Args: + comparison_ins: comparison instruction used. can be [==, !=, <, <=, >, >=] + compared_int: integer value compared. + universal_set: list of all possible integer values for the field. + + Returns: + list of ints that will satisfy the comparison + """ + U = universal_set + + if isinstance(comparison_ins, Eq): # pylint: disable=no-else-return + return [compared_int] + elif isinstance(comparison_ins, Neq): + if compared_int in U: + U.remove(compared_int) + return U + elif isinstance(comparison_ins, Less): + return [i for i in U if i < compared_int] + elif isinstance(comparison_ins, LessE): + return [i for i in U if i <= compared_int] + elif isinstance(comparison_ins, Greater): + return [i for i in U if i > compared_int] + elif isinstance(comparison_ins, GreaterE): + return [i for i in U if i >= compared_int] + else: + return U + + def _get_asserted_groupsizes(self, ins_stack: List["Instruction"]) -> Tuple[Set[int], Set[int]]: + """return set of values for groupsize that will make the comparison true and false + + checks for instruction sequence and returns group size values that will make the comparison true. + + [ Global GroupSize | (int | pushint)] + [ (int | pushint) | Global GroupSize] + [ == | != | < | <= | > | >=] + + Args: + ins_stack: list of instructions that are executed up until the comparison instruction (including the comparison instruction). + + Returns: + set of groupsize values that will make the comparison true, set of groupsize values that will make the comparison false. + """ + U = list(self.UNIVERSAL_SETS[self.GROUP_SIZE_KEY]) + if len(ins_stack) < 3: + return set(U), set(U) + + if isinstance(ins_stack[-1], (Eq, Neq, Less, LessE, Greater, GreaterE)): + ins1 = ins_stack[-2] + ins2 = ins_stack[-3] + compared_value = None + + if isinstance(ins1, Global) and isinstance(ins1.field, GroupSize): + is_int, value = is_int_push_ins(ins2) + if is_int: + compared_value = value + elif isinstance(ins2, Global) and isinstance(ins2.field, GroupSize): + is_int, value = is_int_push_ins(ins1) + if is_int: + compared_value = value + + if compared_value is None or not isinstance(compared_value, int): + # if the comparison does not check groupsize, return U as values that make the comparison false + return set(U), set(U) + + ins = ins_stack[-1] + asserted_values = self._get_asserted_int_values(ins, compared_value, U) + return set(asserted_values), set(U) - set(asserted_values) + return set(U), set(U) + + def _get_asserted(self, key: str, ins_stack: List["Instruction"]) -> Tuple[Set, Set]: + return self._get_asserted_groupsizes(ins_stack) + + def _store_results(self) -> None: + group_size_block_context = self._block_contexts[self.GROUP_SIZE_KEY] + for block in self._teal.bbs: + block.transaction_context.group_sizes = list(group_size_block_context[block]) diff --git a/tealer/teal/basic_blocks.py b/tealer/teal/basic_blocks.py index d174d91..c6b7116 100644 --- a/tealer/teal/basic_blocks.py +++ b/tealer/teal/basic_blocks.py @@ -16,6 +16,8 @@ from typing import List, Optional, TYPE_CHECKING from tealer.teal.instructions.instructions import Instruction +from tealer.teal.context.block_transaction_context import BlockTransactionContext + if TYPE_CHECKING: from tealer.teal.teal import Teal @@ -37,6 +39,7 @@ def __init__(self) -> None: self._next: List[BasicBlock] = [] self._idx: int = 0 self._teal: Optional["Teal"] = None + self._transaction_context = BlockTransactionContext() def add_instruction(self, instruction: Instruction) -> None: """Append instruction to this basic block. @@ -127,6 +130,10 @@ def teal(self) -> Optional["Teal"]: def teal(self, teal_instance: "Teal") -> None: self._teal = teal_instance + @property + def transaction_context(self) -> "BlockTransactionContext": + return self._transaction_context + def __str__(self) -> str: ret = "" for ins in self._instructions: diff --git a/tealer/teal/context/__init__.py b/tealer/teal/context/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tealer/teal/context/block_transaction_context.py b/tealer/teal/context/block_transaction_context.py new file mode 100644 index 0000000..8b1dcaa --- /dev/null +++ b/tealer/teal/context/block_transaction_context.py @@ -0,0 +1,23 @@ +from typing import List, Optional + +from tealer.exceptions import TealerException + + +class BlockTransactionContext: # pylint: disable=too-few-public-methods + + _group_transactions_context: Optional[List["BlockTransactionContext"]] = None + + def __init__(self, tail: bool = False) -> None: + if not tail: + self._group_transactions_context = [BlockTransactionContext(True) for _ in range(16)] + + # set default values + self.group_sizes = list(range(1, 17)) + + def gtxn_context(self, txn_index: int) -> "BlockTransactionContext": + """context information collected from gtxn {txn_index} field instructions""" + if self._group_transactions_context is None: + raise TealerException() + if txn_index >= 16: + raise TealerException() + return self._group_transactions_context[txn_index] diff --git a/tealer/teal/parse_teal.py b/tealer/teal/parse_teal.py index edf99ba..ce6dcdb 100644 --- a/tealer/teal/parse_teal.py +++ b/tealer/teal/parse_teal.py @@ -25,6 +25,7 @@ """ +import inspect import sys from typing import Optional, Dict, List @@ -48,6 +49,8 @@ from tealer.teal.instructions.asset_params_field import AssetParamsField from tealer.teal.instructions.app_params_field import AppParamsField from tealer.teal.teal import Teal +from tealer.analyses.dataflow import all_constraints +from tealer.analyses.dataflow.generic import DataflowTransactionContext def _detect_contract_type(instructions: List[Instruction]) -> ContractType: @@ -455,6 +458,19 @@ def _verify_version(ins_list: List[Instruction], program_version: int) -> bool: return error +def _apply_transaction_context_analysis(teal: "Teal") -> None: + analyses_classes = [getattr(all_constraints, name) for name in dir(all_constraints)] + analyses_classes = [ + c + for c in analyses_classes + if inspect.isclass(c) and issubclass(c, DataflowTransactionContext) + ] + + for cl in analyses_classes: + obj = cl(teal) + obj.run_analysis() + + def parse_teal(source_code: str) -> Teal: """Parse algorand smart contracts written in teal. @@ -515,4 +531,6 @@ def parse_teal(source_code: str) -> Teal: for bb in teal.bbs: bb.teal = teal + _apply_transaction_context_analysis(teal) + return teal diff --git a/tealer/utils/analyses.py b/tealer/utils/analyses.py index 3f17933..62043aa 100644 --- a/tealer/utils/analyses.py +++ b/tealer/utils/analyses.py @@ -19,7 +19,7 @@ from tealer.teal.instructions.transaction_field import TransactionField, OnCompletion, ApplicationID -def _is_int_push_ins(ins: Instruction) -> Tuple[bool, Optional[Union[int, str]]]: +def is_int_push_ins(ins: Instruction) -> Tuple[bool, Optional[Union[int, str]]]: if isinstance(ins, Int) or isinstance( # pylint: disable=consider-merging-isinstance ins, PushInt ): @@ -123,7 +123,7 @@ def detect_missing_txn_check( if isinstance(ins, Return): if len(ins.prev) == 1: prev = ins.prev[0] - is_int_push, value = _is_int_push_ins(prev) + is_int_push, value = is_int_push_ins(prev) if is_int_push and value == 0: return @@ -172,7 +172,7 @@ def is_oncompletion_check(ins1: Instruction, ins2: Instruction, checked_values: integer_checked_values.append(ENUM_NAMES_TO_INT[named_constant]) if isinstance(ins1, Txn) and isinstance(ins1.field, OnCompletion): - is_int_push, value = _is_int_push_ins(ins2) + is_int_push, value = is_int_push_ins(ins2) return is_int_push and (value in checked_values or value in integer_checked_values) return False @@ -200,7 +200,7 @@ def is_application_creation_check(ins1: Instruction, ins2: Instruction) -> bool: """ if isinstance(ins1, Txn) and isinstance(ins1.field, ApplicationID): - is_int_push, value = _is_int_push_ins(ins2) + is_int_push, value = is_int_push_ins(ins2) return is_int_push and value == 0 return False @@ -256,7 +256,7 @@ def detect_missing_on_completion( # pylint: disable=too-many-branches, too-many if isinstance(ins, Return): if len(ins.prev) == 1: prev = ins.prev[0] - is_int_push, value = _is_int_push_ins(prev) + is_int_push, value = is_int_push_ins(prev) if is_int_push and value == 0: return diff --git a/tests/transaction_context/__init__.py b/tests/transaction_context/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/transaction_context/test_group_sizes.py b/tests/transaction_context/test_group_sizes.py new file mode 100644 index 0000000..c28627d --- /dev/null +++ b/tests/transaction_context/test_group_sizes.py @@ -0,0 +1,296 @@ +from typing import List, Tuple +import pytest + + +from tealer.teal.basic_blocks import BasicBlock +from tealer.teal.instructions import instructions +from tealer.teal.instructions import transaction_field +from tealer.teal import global_field +from tealer.teal.parse_teal import parse_teal + +from tests.utils import cmp_cfg, construct_cfg, order_basic_blocks + + +MULTIPLE_RETSUB = """ +#pragma version 5 +b main +is_even: + int 2 + % + bz return_1 + int 0 + global GroupSize + int 2 + == + assert + retsub +return_1: + global GroupSize + int 3 + < + assert + int 1 + retsub +main: + global GroupSize + int 1 + != + assert + int 4 + callsub is_even + return +""" + +ins_list = [ + instructions.Pragma(5), + instructions.B("main"), + instructions.Label("is_even"), + instructions.Int(2), + instructions.Modulo(), + instructions.BZ("return_1"), + instructions.Int(0), + instructions.Global(global_field.GroupSize()), + instructions.Int(2), + instructions.Eq(), + instructions.Assert(), + instructions.Retsub(), + instructions.Label("return_1"), + instructions.Global(global_field.GroupSize()), + instructions.Int(3), + instructions.Less(), + instructions.Assert(), + instructions.Int(1), + instructions.Retsub(), + instructions.Label("main"), + instructions.Global(global_field.GroupSize()), + instructions.Int(1), + instructions.Neq(), + instructions.Assert(), + instructions.Int(4), + instructions.Callsub("is_even"), + instructions.Return(), +] + +ins_partitions = [(0, 2), (2, 6), (6, 12), (12, 19), (19, 26), (26, 27)] +bbs_links = [(0, 4), (4, 1), (1, 2), (1, 3), (2, 5), (3, 5)] + +MULTIPLE_RETSUB_CFG_GROUP_SIZES = [[2], [2], [2], [2], [2], [2]] + +MULTIPLE_RETSUB_CFG = construct_cfg(ins_list, ins_partitions, bbs_links) + + +SUBROUTINE_BACK_JUMP = """ +#pragma version 5 +b main +getmod: + % + retsub +is_odd: + global GroupSize + int 4 + < + assert + global GroupSize + int 2 + != + assert + int 2 + b getmod +main: + int 5 + callsub is_odd + return +""" + +ins_list = [ + instructions.Pragma(5), + instructions.B("main"), + instructions.Label("getmod"), + instructions.Modulo(), + instructions.Retsub(), + instructions.Label("is_odd"), + instructions.Global(global_field.GroupSize()), + instructions.Int(4), + instructions.Less(), + instructions.Assert(), + instructions.Global(global_field.GroupSize()), + instructions.Int(2), + instructions.Neq(), + instructions.Assert(), + instructions.Int(2), + instructions.B("getmod"), + instructions.Label("main"), + instructions.Int(5), + instructions.Callsub("is_odd"), + instructions.Return(), +] + +ins_partitions = [(0, 2), (2, 5), (5, 16), (16, 19), (19, 20)] +bbs_links = [(0, 3), (3, 2), (2, 1), (1, 4)] + +SUBROUTINE_BACK_JUMP_CFG_GROUP_SIZES = [[1, 3], [1, 3], [1, 3], [1, 3], [1, 3], [1, 3]] +SUBROUTINE_BACK_JUMP_CFG = construct_cfg(ins_list, ins_partitions, bbs_links) + +BRANCHING = """ +#pragma version 4 +global GroupSize +int 2 +>= +assert +global GroupSize +int 4 +> +bz fin +global GroupSize +int 1 +== +bnz check_second_arg +int 0 +return +check_second_arg: +txn ApplicationArgs 1 +btoi +int 100 +> +bnz fin +int 0 +return +fin: +int 1 +return +""" + +ins_list = [ + instructions.Pragma(4), + instructions.Global(global_field.GroupSize()), + instructions.Int(2), + instructions.GreaterE(), + instructions.Assert(), + instructions.Global(global_field.GroupSize()), + instructions.Int(4), + instructions.Greater(), + instructions.BZ("fin"), + instructions.Global(global_field.GroupSize()), + instructions.Int(1), + instructions.Eq(), + instructions.BNZ("check_second_arg"), + instructions.Int(0), + instructions.Return(), + instructions.Label("check_second_arg"), + instructions.Txn(transaction_field.ApplicationArgs(1)), + instructions.Btoi(), + instructions.Int(100), + instructions.Greater(), + instructions.BNZ("fin"), + instructions.Int(0), + instructions.Return(), + instructions.Label("fin"), + instructions.Int(1), + instructions.Return(), +] + +ins_partitions = [(0, 9), (9, 13), (13, 15), (15, 21), (21, 23), (23, 26)] +bbs_links = [(0, 1), (0, 5), (1, 2), (1, 3), (3, 4), (3, 5)] + +BRANCHING_CFG_GROUP_SIZES = [[2, 3, 4], [], [], [], [], [2, 3, 4]] +BRANCHING_CFG = construct_cfg(ins_list, ins_partitions, bbs_links) + +LOOPS = """ +#pragma version 5 +global GroupSize +int 4 +!= +assert +int 0 +loop: + dup + global GroupSize + int 1 + >= + bz end + int 1 + + + global GroupSize + int 3 + < + assert + b loop +end: + int 2 + global GroupSize + == + assert + int 1 + return +""" + +ins_list = [ + instructions.Pragma(5), + instructions.Global(global_field.GroupSize()), + instructions.Int(4), + instructions.Neq(), + instructions.Assert(), + instructions.Int(0), + instructions.Label("loop"), + instructions.Dup(), + instructions.Global(global_field.GroupSize()), + instructions.Int(1), + instructions.GreaterE(), + instructions.BZ("end"), + instructions.Int(1), + instructions.Add(), + instructions.Global(global_field.GroupSize()), + instructions.Int(3), + instructions.Less(), + instructions.Assert(), + instructions.B("loop"), + instructions.Label("end"), + instructions.Int(2), + instructions.Global(global_field.GroupSize()), + instructions.Eq(), + instructions.Assert(), + instructions.Int(1), + instructions.Return(), +] + +ins_partitions = [(0, 6), (6, 12), (12, 19), (19, 26)] +bbs_links = [(0, 1), (1, 2), (1, 3), (2, 1)] + +LOOPS_CFG_GROUP_SIZES = [[1, 2], [1, 2], [1, 2], []] +LOOPS_CFG = construct_cfg(ins_list, ins_partitions, bbs_links) + +cfg_group_sizes = [ + (MULTIPLE_RETSUB_CFG, MULTIPLE_RETSUB_CFG_GROUP_SIZES), + (SUBROUTINE_BACK_JUMP_CFG, SUBROUTINE_BACK_JUMP_CFG_GROUP_SIZES), + (BRANCHING_CFG, BRANCHING_CFG_GROUP_SIZES), + (LOOPS_CFG, LOOPS_CFG_GROUP_SIZES), +] + +for cfg, sizes in cfg_group_sizes: + bb = order_basic_blocks(cfg) + for b, group_sizes in zip(bb, sizes): + b.transaction_context.group_sizes = group_sizes + + +ALL_TESTS = [ + (MULTIPLE_RETSUB, MULTIPLE_RETSUB_CFG), + (SUBROUTINE_BACK_JUMP, SUBROUTINE_BACK_JUMP_CFG), + (BRANCHING, BRANCHING_CFG), + (LOOPS, LOOPS_CFG), +] + + +@pytest.mark.parametrize("test", ALL_TESTS) # type: ignore +def test_cfg_construction(test: Tuple[str, List[BasicBlock]]) -> None: + code, cfg = test + teal = parse_teal(code.strip()) + for bb in cfg: + print(bb) + print("*" * 20) + assert cmp_cfg(teal.bbs, cfg) + + bbs = order_basic_blocks(teal.bbs) + cfg = order_basic_blocks(cfg) + for b1, b2 in zip(bbs, cfg): + print(b1.transaction_context.group_sizes, b2.transaction_context.group_sizes) + assert b1.transaction_context.group_sizes == b2.transaction_context.group_sizes From d5f3108c2a923d9e512a28553b72ddba34ef8844 Mon Sep 17 00:00:00 2001 From: Vara Prasad Bandaru Date: Mon, 19 Sep 2022 15:44:00 +0530 Subject: [PATCH 2/4] Add tx field analyses for group index field --- tealer/analyses/dataflow/int_fields.py | 58 ++++++- .../teal/context/block_transaction_context.py | 1 + .../transaction_context/test_group_indices.py | 146 ++++++++++++++++++ 3 files changed, 203 insertions(+), 2 deletions(-) create mode 100644 tests/transaction_context/test_group_indices.py diff --git a/tealer/analyses/dataflow/int_fields.py b/tealer/analyses/dataflow/int_fields.py index d33067a..ba52b74 100644 --- a/tealer/analyses/dataflow/int_fields.py +++ b/tealer/analyses/dataflow/int_fields.py @@ -9,22 +9,27 @@ GreaterE, Less, LessE, + Txn, ) from tealer.teal.global_field import GroupSize +from tealer.teal.instructions.transaction_field import GroupIndex from tealer.utils.analyses import is_int_push_ins if TYPE_CHECKING: from tealer.teal.instructions.instructions import Instruction group_size_key = "GroupSize" -analysis_keys = [group_size_key] +group_index_key = "GroupIndex" +analysis_keys = [group_size_key, group_index_key] universal_sets = {} universal_sets[group_size_key] = list(range(1, 17)) +universal_sets[group_index_key] = list(range(0, 16)) class IntFields(DataflowTransactionContext): # pylint: disable=too-few-public-methods GROUP_SIZE_KEY = group_size_key + GROUP_INDEX_KEY = group_index_key KEYS = analysis_keys UNIVERSAL_SETS: Dict[str, List] = universal_sets @@ -121,10 +126,59 @@ def _get_asserted_groupsizes(self, ins_stack: List["Instruction"]) -> Tuple[Set[ return set(asserted_values), set(U) - set(asserted_values) return set(U), set(U) + def _get_asserted_groupindices( + self, ins_stack: List["Instruction"] + ) -> Tuple[Set[int], Set[int]]: + """return list of values for group index that will make the comparison true and false + + checks for instruction sequence and returns group index values that will make the comparison true. + + [ txn GroupIndex | (int | pushint)] + [ (int | pushint) | txn GroupIndex] + [ == | != | < | <= | > | >=] + + Args: + ins_stack: list of instructions that are executed up until the comparison instruction (including the comparison instruction). + + Returns: + List of groupindex values that will make the comparison true. + """ + U = list(self.UNIVERSAL_SETS[self.GROUP_INDEX_KEY]) + if len(ins_stack) < 3: + return set(U), set(U) + + if isinstance(ins_stack[-1], (Eq, Neq, Less, LessE, Greater, GreaterE)): + ins1 = ins_stack[-2] + ins2 = ins_stack[-3] + compared_value = None + + if isinstance(ins1, Txn) and isinstance(ins1.field, GroupIndex): + is_int, value = is_int_push_ins(ins2) + if is_int: + compared_value = value + elif isinstance(ins2, Txn) and isinstance(ins2.field, GroupIndex): + is_int, value = is_int_push_ins(ins1) + if is_int: + compared_value = value + + if compared_value is None or not isinstance(compared_value, int): + return set(U), set(U) + + ins = ins_stack[-1] + asserted_values = self._get_asserted_int_values(ins, compared_value, U) + return set(asserted_values), set(U) - set(asserted_values) + return set(U), set(U) + def _get_asserted(self, key: str, ins_stack: List["Instruction"]) -> Tuple[Set, Set]: - return self._get_asserted_groupsizes(ins_stack) + if key == self.GROUP_SIZE_KEY: + return self._get_asserted_groupsizes(ins_stack) + return self._get_asserted_groupindices(ins_stack) def _store_results(self) -> None: group_size_block_context = self._block_contexts[self.GROUP_SIZE_KEY] for block in self._teal.bbs: block.transaction_context.group_sizes = list(group_size_block_context[block]) + + group_index_block_context = self._block_contexts[self.GROUP_INDEX_KEY] + for block in self._teal.bbs: + block.transaction_context.group_indices = list(group_index_block_context[block]) diff --git a/tealer/teal/context/block_transaction_context.py b/tealer/teal/context/block_transaction_context.py index 8b1dcaa..7fc3ad7 100644 --- a/tealer/teal/context/block_transaction_context.py +++ b/tealer/teal/context/block_transaction_context.py @@ -13,6 +13,7 @@ def __init__(self, tail: bool = False) -> None: # set default values self.group_sizes = list(range(1, 17)) + self.group_indices = list(range(0, 16)) def gtxn_context(self, txn_index: int) -> "BlockTransactionContext": """context information collected from gtxn {txn_index} field instructions""" diff --git a/tests/transaction_context/test_group_indices.py b/tests/transaction_context/test_group_indices.py new file mode 100644 index 0000000..5d74a7c --- /dev/null +++ b/tests/transaction_context/test_group_indices.py @@ -0,0 +1,146 @@ +from typing import List, Tuple +import pytest + +from tealer.teal.parse_teal import parse_teal +from tests.utils import order_basic_blocks + + +MULTIPLE_RETSUB = """ +#pragma version 5 +b main +is_even: + int 2 + % + bz return_1 + int 0 + txn GroupIndex + int 2 + == + assert + retsub +return_1: + txn GroupIndex + int 3 + < + assert + int 1 + retsub +main: + txn GroupIndex + int 1 + != + assert + int 4 + callsub is_even + return +""" + +MULTIPLE_RETSUB_GROUP_INDICES = [[0, 2], [0, 2], [2], [0, 2], [0, 2], [0, 2]] + +SUBROUTINE_BACK_JUMP = """ +#pragma version 5 +b main +getmod: + % + retsub +is_odd: + txn GroupIndex + int 4 + < + assert + txn GroupIndex + int 2 + != + assert + int 2 + b getmod +main: + int 5 + callsub is_odd + return +""" + +SUBROUTINE_BACK_JUMP_GROUP_INDICES = [[0, 1, 3], [0, 1, 3], [0, 1, 3], [0, 1, 3], [0, 1, 3], [0, 1, 3]] + +BRANCHING = """ +#pragma version 4 +txn GroupIndex +int 2 +>= +assert +txn GroupIndex +int 4 +> +bz fin +txn GroupIndex +int 1 +== +bnz check_second_arg +int 0 +return +check_second_arg: +txn ApplicationArgs 1 +btoi +int 100 +> +bnz fin +int 0 +return +fin: +int 1 +return +""" + +BRANCHING_GROUP_INDICES = [[2, 3, 4], [], [], [], [], [2, 3, 4]] + +LOOPS = """ +#pragma version 5 +txn GroupIndex +int 4 +!= +assert +int 0 +loop: + dup + txn GroupIndex + int 1 + >= + bz end + int 1 + + + txn GroupIndex + int 3 + < + assert + b loop +end: + int 2 + txn GroupIndex + == + assert + int 1 + return +""" + + + +LOOPS_GROUP_INDICES = [[1, 2], [1, 2], [1, 2], []] + + +ALL_TESTS = [ + (MULTIPLE_RETSUB, MULTIPLE_RETSUB_GROUP_INDICES), + (SUBROUTINE_BACK_JUMP, SUBROUTINE_BACK_JUMP_GROUP_INDICES), + (BRANCHING, BRANCHING_GROUP_INDICES), + (LOOPS, LOOPS_GROUP_INDICES), +] + + +@pytest.mark.parametrize("test", ALL_TESTS) # type: ignore +def test_cfg_construction(test: Tuple[str, List[List[int]]]) -> None: + code, group_indices = test + teal = parse_teal(code.strip()) + + bbs = order_basic_blocks(teal.bbs) + for b, indices in zip(bbs, group_indices): + assert b.transaction_context.group_indices == indices + From 32a978dabbd2687188df553f7d1000a631c8a684 Mon Sep 17 00:00:00 2001 From: Vara Prasad Bandaru Date: Sat, 12 Nov 2022 00:44:27 +0530 Subject: [PATCH 3/4] Fix dataflow analyses for loops and subroutines --- .github/workflows/pytest.yml | 5 + tealer/analyses/dataflow/generic.py | 321 ++++++++++++++---- tealer/teal/basic_blocks.py | 26 +- tealer/teal/instructions/instructions.py | 22 +- tealer/teal/parse_teal.py | 9 + .../transaction_context/test_group_indices.py | 6 +- tests/transaction_context/test_group_sizes.py | 8 +- 7 files changed, 322 insertions(+), 75 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 8da51e1..e17c77e 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -54,3 +54,8 @@ jobs: - name: Run detectors tests run: | pytest tests/test_detectors.py + + - name: Run dataflow analysis tests + run: | + pytest tests/transaction_context/test_group_sizes.py + pytest tests/transaction_context/test_group_indices.py diff --git a/tealer/analyses/dataflow/generic.py b/tealer/analyses/dataflow/generic.py index 117dcaa..4ec5a35 100644 --- a/tealer/analyses/dataflow/generic.py +++ b/tealer/analyses/dataflow/generic.py @@ -1,23 +1,148 @@ """Defines generic class for dataflow analysis to find constraints on transaction fields. -Possible values for a field are considered to be a set, referred to as universal set(U) for that field. -if U is finite and small, values are enumerated and are stored in the context. However, in case U is large -for example, address type fields have very large set, Enum type values are used to represent U and NullSet. - -Algorithm works as: - - Collect constraints asserted within the block and constraints specific for each path, happens if bz/bnz are - directly used on the constraint. - - Use fix point algorithm and repeatedly merge information until no new information is found - -Equation for merging: - # path_transaction_context[b][bi] gives the transaction constraints for path bi -> b - block_transaction_context[b] = Union((block_transaction_context[bi] & path_transaction_context[b][bi]) for bi in predecessors[b]) \ - & block_transaction_context[b] \ - & Union(block_transaction_context[bi] for bi in successors[b]) +Possible values for a field are considered to be a set, referred to as universal set `U` for that field. +if U is finite and small, values are enumerated and are stored in the context. However, in case U is large, +such as address type fields, enum type values are used to represent UniversalSet and NullSet. + +For a given `key` and a `basic_block`, block_contexts[key][basic_block] are values(V), such that, if the transaction field represented by the `key` +is set to one of the values present in V, then: + - The execution might reach the `basic_block` + - The execution might successfully reach the end of the `basic_block` + - The execution might reach a leaf basic block which results in successful completion of execution. + +block_contexts is computed in three steps, each step making the information more precise. +1: In the first step, local information is considered. For each block, information inferred from the instructions present in the + block is computed. + - if the basic block contains instructions `assert(txn OnCompletion == int UpdateApplication)`, then block context of this block + for transaction types will be equal to `{ApplUpdateApplication}`. + - if basic block errors, contains `err` instruction or `return 0`, then block context will be NullSet. + - if instructions in the block does not provide any information related to the field, then block context will be equal to + UniversalSet (all possible values) for that key. +2: In the second step, information from the predecessors is considered. For this, forward dataflow analysis is used. + This problem is analogous to reaching definitions problem: + I Each possible value is a definition that is defined at the start of execution i.e defined at the start of entry block. + II The definition(value) will reach start of the basic block, if it reaches the end of one of it's predecessors. + III The definition(value) will reach the end of the block, if it is preserved by the basic block or it is defined in the basic block. + IV No definition(value) is defined in a basic block + V The definition(value) will reach start of the basic block, if the condition specific to reach this block is satisfied. Condition used + to determine the branch taken can contain constraints related to the analysis. Developers can branch to error block or success block + based on the transaction field value. Path based context is used to combine this information in forward analysis. + + Equations: + initialization: + RCHin(entry) = UniversalSet() - from (I) + RCHout(b) = NullSet() for all basic blocks b. + fixed point iteration: + RCHin(b) = union(intersection(RCHout(prev_b), path_context[b][prev_b]) for prev_b in predecessors) - from (II), (V) + RCHout(b) = union(GEN(b), intersection(RCHin(b), PRSV(b)) - from (III) + GEN(b) = NullSet() for all basic blocks b. + RCHout(b) = intersection(RCHin(b), PRSV(b)) + + `PRSV(b)` is `block_contexts` computed in the first step. + Reverse postorder ordering is used for the iteration algorithm. + +3: Finally, information from the successors is combined using backward dataflow analysis similar to Live-variable analysis. + - `block_contexts` is equal to reach out information computed in the second step. + - For leaf blocks, value is live if the value is preserved by the block. + - For other blocks, the value is live, if the value is used(preserved) by one of the successors. + equations: + initialization: + LIVEout(b) = NullSet() for all non-leaf blocks. + LIVEout(b) = PRSV(b) for all leaf blocks. + fixed point iteration: + LIVEin(b) = union(LIVEou(succ_b) for succ_b in successors) + LIVEout(b) = intersection(LIVEin(b), PRSV(b)) + + `PRSV(b)` is `block_contexts` after the second step. + Postorder ordering is used for the iteration algorithm. + +Blocks containing `callsub` instruction and blocks which are right after the `callsub` instruction are +treated differently. +e.g + main: // Basic Block B1 + int 2 + retsub + + path_1: // Basic Block B2 + txn OnCompletion + int UpdateApplication + == + assert + callsub main + + byte "PATH_1" // Basic Block B3 + int 1 + return + + path_2: // Basic Block B4 + txn OnCompletion + int DeleteApplication + == + assert + callsub main + + byte "PATH_2" // Basic Block B5 + int 1 + return + +CFG: + B2 + +--------------------------+ + | 4: path_1: | + | 5: txn OnCompletion | + | 6: int UpdateApplication | + | 7: == | + | 8: assert | + | 9: callsub main | + +--------------------------+ + | + | +B4 v B1 B5 ++---------------------------+ +--------------------------+ +-------------------+ +| 13: path_2: | | | | | +| 14: txn OnCompletion | | 1: main: | | 19: byte "PATH_2" | +| 15: int DeleteApplication | | 2: int 2 | | 20: int 1 | +| 16: == | | 3: retsub | | 21: return | +| 17: assert | | | | | +| 18: callsub main | --> | | --> | | ++---------------------------+ +--------------------------+ +-------------------+ + | + | + v B3 + +--------------------------+ + | 10: byte "PATH_1" | + | 11: int 1 | + | 12: return | + +--------------------------+ + +CFG will have edges: + - B2 -> B1 # callsub instruction transfers the execution to called subroutine + - B4 -> B1 + - B1 -> B3 # execution returns to the next instruction present after callsub instruction + - B1 -> B5 + +B3 and B5 are return points of the subroutine. +B3 is only executed if execution reaches B2 => block_context for txn type is `{DeleteApplication}`. +similarly, B5 is only executed if execution reaches B4 => block_context for txn type is `{UpdateApplication}`. + +block_contexts["TransactionType"][B1] = `{UpdateApplication, DeleteApplication}` # from B2 -> B1 and B4 -> B1. + +B3 and B5 are return points and have only one predecessor(B1) in CFG. As a result, block_contexts will be +block_contexts["TransactionType"][B4] = `{UpdateApplication, DeleteApplication}` +block_contexts["TransactionType"][B5] = `{UpdateApplication, DeleteApplication}`. + +This is because, when traversing the CFG without differentiating subroutine blocks and others, possible execution paths will be: +1. B2 -> B1 -> B3 +2. B4 -> B1 -> B5 +3. B2 -> B1 -> B5 # won't be possible at runtime. +4. B4 -> B1 -> B3 # won't be possible at runtime. + +However, At runtime, execution will reach B3 if and only if it reaches B2, same for B5 and B3. Using this reasoning while +combining information from predecessors and successors will give more accurate results. """ from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Any, Dict, List, Tuple +from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Set from tealer.teal.instructions.instructions import ( Assert, @@ -46,10 +171,16 @@ class DataflowTransactionContext(ABC): # pylint: disable=too-few-public-methods def __init__(self, teal: "Teal"): self._teal: "Teal" = teal + # entry block of CFG + self._entry_block: "BasicBlock" = teal.bbs[ + 0 + ] # blocks are ordered by entry instruction in parsing stage. # self._block_contexts[KEY][B] -> block_context of KEY for block B self._block_contexts: Dict[str, Dict["BasicBlock", Any]] = {} # self._path_contexts[KEY][Bi][Bj] -> path_context of KEY for path Bj -> Bi self._path_contexts: Dict[str, Dict["BasicBlock", Dict["BasicBlock", Any]]] = {} + self._reachout: Dict[str, Dict["BasicBlock", Any]] = {} # used for forward analysis + self._liveout: Dict[str, Dict["BasicBlock", Any]] = {} # used for backward analysis if not self.KEYS: raise IncorrectDataflowTransactionContextInitialization( f"KEYS are not initialized {self.__class__.__name__}" @@ -61,11 +192,11 @@ def _gtx_key(self, idx: int, key: str) -> str: # pylint: disable=no-self-use @abstractmethod def _universal_set(self, key: str) -> Any: - """Return universal set of the field corresponding to given key""" + """Return universal set for the field corresponding to given key""" @abstractmethod def _null_set(self, key: str) -> Any: - """Return null set of the field corresponding to given key""" + """Return null set for the field corresponding to given key""" @abstractmethod def _union(self, key: str, a: Any, b: Any) -> Any: @@ -177,77 +308,135 @@ def _path_level_constraints(self, block: "BasicBlock") -> None: if default_branch is not None: self._path_contexts[key][default_branch][block] = false_values - def _merge_information(self, block: "BasicBlock") -> bool: - """Merge information for predecessors, successors for the :block: and return whether information is updated or not + def _calculate_reachin(self, key: str, block: "BasicBlock") -> Any: + if block == self._entry_block: + # We are considering each possible value as a definition defined at the start of entry block. + reachin_information = self._universal_set(key) + else: + reachin_information = self._null_set(key) + + reachout = self._reachout[key] + path_context = self._path_contexts[key] + for prev_b in block.prev: + reachin_information = self._union( + key, + reachin_information, + self._intersection(key, reachout[prev_b], path_context[block][prev_b]), + ) - # path_transaction_context[b][bi] gives the transaction constraints for path bi -> b - block_transaction_context[b] = Union((block_transaction_context[bi] & path_transaction_context[b][bi]) for bi in predecessors[b]) \ - & block_transaction_context[b] \ - & Union(block_transaction_context[bi] for bi in successors[b]) - """ + if block.callsub_block is not None: + # this block is the return point for callsub instruction present in `block.callsub_block` + # execution will only reach this block, if it reaches `block.callsub_block` + reachin_information = self._intersection( + key, reachin_information, reachout[block.callsub_block] + ) - updated = False - for key in self.KEYS: - block_context = self._block_contexts[key] - path_context = self._path_contexts[key] + return reachin_information - new_block_context = self._union(key, block_context[block], block_context[block]) - - if len(block.prev) != 0: - prev_b = block.prev[0] - # TODO: While considering predecessor information, use dominator block information instead of - # all predecessors. Current approach doesn't consider constraints applied within the loop body - # blocks for the blocks outside the loop. Or use reverse postorder while constructing the block contexts(?) - predecessor_information = self._intersection( - key, block_context[prev_b], path_context[block][prev_b] - ) - for prev_b in block.prev[1:]: - predecessor_information = self._union( - key, - predecessor_information, - self._intersection(key, block_context[prev_b], path_context[block][prev_b]), - ) + def _calculate_livein(self, key: str, block: "BasicBlock") -> Any: + liveout = self._liveout[key] + livein_information = self._null_set(key) - new_block_context = self._intersection( - key, predecessor_information, new_block_context - ) + for next_b in block.next: + livein_information = self._union(key, livein_information, liveout[next_b]) - if len(block.next) != 0: - next_b = block.next[0] - successor_information = block_context[next_b] - for next_b in block.next[1:]: - successor_information = self._union( - key, successor_information, block_context[next_b] - ) + if block.sub_return_point is not None: + # this block is the `callsub block` and `block.sub_return_point` is the block that will be executed after subroutine. + livein_information = self._intersection( + key, livein_information, liveout[block.sub_return_point] + ) + return livein_information - new_block_context = self._intersection( - key, successor_information, new_block_context - ) + def _merge_information_forward(self, block: "BasicBlock") -> bool: + updated = False + for key in self.KEYS: + # RCHout(b) = intersection(RCHin(b), PRSV(b)) + new_reachout = self._intersection( + key, self._calculate_reachin(key, block), self._block_contexts[key][block] + ) + if new_reachout != self._reachout[key][block]: + self._reachout[key][block] = new_reachout + updated = True + return updated - if new_block_context != block_context[block]: - block_context[block] = new_block_context + def _merge_information_backward(self, block: "BasicBlock") -> bool: + if len(block.next) == 0: # leaf block + return False + + updated = False + for key in self.KEYS: + new_liveout = self._intersection( + key, self._calculate_livein(key, block), self._block_contexts[key][block] + ) + if new_liveout != self._liveout[key][block]: + self._liveout[key][block] = new_liveout updated = True return updated - def run_analysis(self) -> None: + @staticmethod + def _postorder(entry: "BasicBlock") -> List["BasicBlock"]: + visited: Set["BasicBlock"] = set() + order: List["BasicBlock"] = [] + + def dfs(block: "BasicBlock") -> None: + visited.add(block) + for successor in block.next: + if not successor in visited: + dfs(successor) + order.append(block) + + dfs(entry) + return order + + def run_analysis(self) -> None: # pylint: disable=too-many-branches """Run analysis""" - # phase 1 + # step 1 for block in self._teal.bbs: self._block_level_constraints(block) self._path_level_constraints(block) - # phase 2 - worklist = list(self._teal.bbs) + # step 2 initialization + for key in self.KEYS: + self._reachout[key] = {} + for b in self._teal.bbs: + self._reachout[key][b] = self._null_set(key) + + postorder = self._postorder(self._entry_block) + worklist = postorder[::-1] # Reverse postorder + + while worklist: + b = worklist[0] + worklist = worklist[1:] + updated = self._merge_information_forward(b) + + if updated: + return_point_block = [b.sub_return_point] if b.sub_return_point is not None else [] + for bi in b.next + return_point_block: + if bi not in worklist: + worklist.append(bi) + + # step 3 + self._block_contexts = self._reachout + for key in self.KEYS: + self._liveout[key] = {} + for b in self._teal.bbs: + if len(b.next) == 0: # leaf block + self._liveout[key][b] = self._block_contexts[key][b] + else: + self._liveout[key][b] = self._null_set(key) + + worklist = [b for b in postorder if len(b.next) != 0] while worklist: b = worklist[0] worklist = worklist[1:] - updated = self._merge_information(b) + updated = self._merge_information_backward(b) if updated: - for bi in b.prev + b.next: + callsub_block = [b.callsub_block] if b.callsub_block is not None else [] + for bi in b.prev + callsub_block: if bi not in worklist: worklist.append(bi) - print([b.idx for b in worklist]) + self._block_contexts = self._liveout self._store_results() diff --git a/tealer/teal/basic_blocks.py b/tealer/teal/basic_blocks.py index c6b7116..991c760 100644 --- a/tealer/teal/basic_blocks.py +++ b/tealer/teal/basic_blocks.py @@ -23,7 +23,7 @@ from tealer.teal.teal import Teal -class BasicBlock: +class BasicBlock: # pylint: disable=too-many-instance-attributes """Class to represent basic blocks of the teal contract. A basic block is a sequence of instructions with a single entry @@ -40,6 +40,8 @@ def __init__(self) -> None: self._idx: int = 0 self._teal: Optional["Teal"] = None self._transaction_context = BlockTransactionContext() + self._callsub_block: Optional[BasicBlock] = None + self._sub_return_point: Optional[BasicBlock] = None def add_instruction(self, instruction: Instruction) -> None: """Append instruction to this basic block. @@ -116,6 +118,28 @@ def idx(self) -> int: def idx(self, i: int) -> None: self._idx = i + @property + def callsub_block(self) -> Optional["BasicBlock"]: + """If this block is the return point of a subroutine, `callsub_block` is the block + that called the subroutine. + """ + return self._callsub_block + + @callsub_block.setter + def callsub_block(self, b: "BasicBlock") -> None: + self._callsub_block = b + + @property + def sub_return_point(self) -> Optional["BasicBlock"]: + """If a subroutine is executed after this block i.e exit instruction is callsub. + then, sub_return_point will be basic block that will be executed after the subroutine. + """ + return self._sub_return_point + + @sub_return_point.setter + def sub_return_point(self, b: "BasicBlock") -> None: + self._sub_return_point = b + @property def cost(self) -> int: """cost of executing all instructions in this basic block""" diff --git a/tealer/teal/instructions/instructions.py b/tealer/teal/instructions/instructions.py index 4b40625..313f651 100644 --- a/tealer/teal/instructions/instructions.py +++ b/tealer/teal/instructions/instructions.py @@ -49,7 +49,7 @@ class ContractType(ComparableEnum): } -class Instruction: +class Instruction: # pylint: disable=too-many-instance-attributes """Base class for Teal instructions. Any class that represents a teal instruction must inherit from @@ -66,6 +66,7 @@ def __init__(self) -> None: self._bb: Optional["BasicBlock"] = None self._version: int = 1 self._mode: ContractType = ContractType.ANY + self._callsub_ins: Optional["Instruction"] = None def add_prev(self, prev_ins: "Instruction") -> None: """Add instruction that may execute just before this instruction. @@ -137,6 +138,25 @@ def bb(self) -> Optional["BasicBlock"]: def bb(self, b: "BasicBlock") -> None: self._bb = b + @property + def callsub_ins(self) -> Optional["Instruction"]: + """if this instruction is a return point to a callsub instruction i.e callsub instruction is + present right before this instruction, then callsub_ins returns a reference to that callsub + instruction object. + + e.g + callsub main + int 1 + return + + callsub_ins of `int 1` will be instruction obj of `callsub main`. + """ + return self._callsub_ins + + @callsub_ins.setter + def callsub_ins(self, ins: "Instruction") -> None: + self._callsub_ins = ins + @property def version(self) -> int: """Teal version this instruction is introduced in and supported from.""" diff --git a/tealer/teal/parse_teal.py b/tealer/teal/parse_teal.py index ce6dcdb..b81711d 100644 --- a/tealer/teal/parse_teal.py +++ b/tealer/teal/parse_teal.py @@ -127,6 +127,14 @@ def create_bb(instructions: List[Instruction], all_bbs: List[BasicBlock]) -> Non bb.add_instruction(ins) ins.bb = bb + if ins.callsub_ins is not None and ins.bb is not None: + # callsub is before this instruction in the code. so, bb should have been assigned + # already + callsub_basic_block = ins.callsub_ins.bb + if callsub_basic_block is not None: + ins.bb.callsub_block = callsub_basic_block + callsub_basic_block.sub_return_point = ins.bb + if len(ins.next) > 1 and not isinstance(ins, Retsub): if not isinstance(ins.next[0], Label): next_bb = BasicBlock() @@ -213,6 +221,7 @@ def _first_pass( rets[call.label].append(ins) else: rets[call.label] = [ins] + ins.callsub_ins = call # ins is the return point when call is executed. # Now prepare for the next-line instruction # A flag that says that this was an unconditional jump diff --git a/tests/transaction_context/test_group_indices.py b/tests/transaction_context/test_group_indices.py index 5d74a7c..f630552 100644 --- a/tests/transaction_context/test_group_indices.py +++ b/tests/transaction_context/test_group_indices.py @@ -103,7 +103,7 @@ loop: dup txn GroupIndex - int 1 + int 3 >= bz end int 1 @@ -124,7 +124,7 @@ -LOOPS_GROUP_INDICES = [[1, 2], [1, 2], [1, 2], []] +LOOPS_GROUP_INDICES = [[2], [2], [], [2]] ALL_TESTS = [ @@ -136,7 +136,7 @@ @pytest.mark.parametrize("test", ALL_TESTS) # type: ignore -def test_cfg_construction(test: Tuple[str, List[List[int]]]) -> None: +def test_group_indices(test: Tuple[str, List[List[int]]]) -> None: code, group_indices = test teal = parse_teal(code.strip()) diff --git a/tests/transaction_context/test_group_sizes.py b/tests/transaction_context/test_group_sizes.py index c28627d..e49d259 100644 --- a/tests/transaction_context/test_group_sizes.py +++ b/tests/transaction_context/test_group_sizes.py @@ -205,7 +205,7 @@ loop: dup global GroupSize - int 1 + int 3 >= bz end int 1 @@ -234,7 +234,7 @@ instructions.Label("loop"), instructions.Dup(), instructions.Global(global_field.GroupSize()), - instructions.Int(1), + instructions.Int(3), instructions.GreaterE(), instructions.BZ("end"), instructions.Int(1), @@ -256,7 +256,7 @@ ins_partitions = [(0, 6), (6, 12), (12, 19), (19, 26)] bbs_links = [(0, 1), (1, 2), (1, 3), (2, 1)] -LOOPS_CFG_GROUP_SIZES = [[1, 2], [1, 2], [1, 2], []] +LOOPS_CFG_GROUP_SIZES = [[2], [2], [], [2]] LOOPS_CFG = construct_cfg(ins_list, ins_partitions, bbs_links) cfg_group_sizes = [ @@ -281,7 +281,7 @@ @pytest.mark.parametrize("test", ALL_TESTS) # type: ignore -def test_cfg_construction(test: Tuple[str, List[BasicBlock]]) -> None: +def test_group_sizes(test: Tuple[str, List[BasicBlock]]) -> None: code, cfg = test teal = parse_teal(code.strip()) for bb in cfg: From 9fba884a0a66bfcc75a895b386b12e9cef3418e9 Mon Sep 17 00:00:00 2001 From: Vara Prasad Bandaru Date: Wed, 7 Dec 2022 14:20:08 +0530 Subject: [PATCH 4/4] Use txn info and group indices to constraint gtxn info --- tealer/analyses/dataflow/all_constraints.py | 2 +- tealer/analyses/dataflow/generic.py | 244 ++++++++++++------ tealer/analyses/dataflow/int_fields.py | 33 ++- .../teal/context/block_transaction_context.py | 12 +- tealer/teal/parse_teal.py | 11 +- tealer/utils/algorand_constants.py | 2 + .../transaction_context/test_group_indices.py | 40 ++- tests/transaction_context/test_group_sizes.py | 23 ++ 8 files changed, 271 insertions(+), 96 deletions(-) create mode 100644 tealer/utils/algorand_constants.py diff --git a/tealer/analyses/dataflow/all_constraints.py b/tealer/analyses/dataflow/all_constraints.py index 8c15cca..f8b0fd7 100644 --- a/tealer/analyses/dataflow/all_constraints.py +++ b/tealer/analyses/dataflow/all_constraints.py @@ -1,2 +1,2 @@ # pylint: disable=unused-import -from tealer.analyses.dataflow.int_fields import IntFields +from tealer.analyses.dataflow.int_fields import GroupIndices diff --git a/tealer/analyses/dataflow/generic.py b/tealer/analyses/dataflow/generic.py index 4ec5a35..98b2471 100644 --- a/tealer/analyses/dataflow/generic.py +++ b/tealer/analyses/dataflow/generic.py @@ -153,6 +153,7 @@ ) from tealer.utils.analyses import is_int_push_ins +from tealer.utils.algorand_constants import MAX_GROUP_SIZE if TYPE_CHECKING: from tealer.teal.teal import Teal @@ -167,7 +168,10 @@ class IncorrectDataflowTransactionContextInitialization(Exception): class DataflowTransactionContext(ABC): # pylint: disable=too-few-public-methods # List of keys, unique and separate context is stored for each key. - KEYS: List[str] = [] + # each key represents a transaction field. + BASE_KEYS: List[str] = [] + # BASE_KEYS for which transaction context information from `gtxn {i} {field}` is also stored. + KEYS_WITH_GTXN: List[str] = [] # every key in this list should also present in BASE_KEYS. def __init__(self, teal: "Teal"): self._teal: "Teal" = teal @@ -179,14 +183,13 @@ def __init__(self, teal: "Teal"): self._block_contexts: Dict[str, Dict["BasicBlock", Any]] = {} # self._path_contexts[KEY][Bi][Bj] -> path_context of KEY for path Bj -> Bi self._path_contexts: Dict[str, Dict["BasicBlock", Dict["BasicBlock", Any]]] = {} - self._reachout: Dict[str, Dict["BasicBlock", Any]] = {} # used for forward analysis - self._liveout: Dict[str, Dict["BasicBlock", Any]] = {} # used for backward analysis - if not self.KEYS: + if not self.BASE_KEYS: raise IncorrectDataflowTransactionContextInitialization( - f"KEYS are not initialized {self.__class__.__name__}" + f"BASE_KEYS are not initialized {self.__class__.__name__}" ) - def _gtx_key(self, idx: int, key: str) -> str: # pylint: disable=no-self-use + @staticmethod + def gtx_key(idx: int, key: str) -> str: """return key used for tracking context of gtxn {idx} {field represented by key}""" return f"GTXN_{idx:02d}_{key}" @@ -220,7 +223,7 @@ def _get_asserted(self, key: str, ins_stack: List["Instruction"]) -> Tuple[Any, def _store_results(self) -> None: """Store the collected information in the context object of each block""" - def _block_level_constraints(self, block: "BasicBlock") -> None: + def _block_level_constraints(self, analysis_keys: List[str], block: "BasicBlock") -> None: """Calculate and store constraints on keys applied within the block. By default, no constraints are considered i.e values are assumed to be universal_set @@ -230,7 +233,7 @@ def _block_level_constraints(self, block: "BasicBlock") -> None: asserted. Values are stored in self._block_contexts self._block_contexts[KEY][B] -> block_context of KEY for block B """ - for key in self.KEYS: + for key in analysis_keys: if key not in self._block_contexts: self._block_contexts[key] = {} self._block_contexts[key][block] = self._universal_set(key) @@ -238,7 +241,7 @@ def _block_level_constraints(self, block: "BasicBlock") -> None: stack: List["Instruction"] = [] for ins in block.instructions: if isinstance(ins, Assert): - for key in self.KEYS: + for key in analysis_keys: asserted_values, _ = self._get_asserted(key, stack) present_values = self._block_contexts[key][block] self._block_contexts[key][block] = self._intersection( @@ -250,16 +253,16 @@ def _block_level_constraints(self, block: "BasicBlock") -> None: if len(ins.prev) == 1: is_int, value = is_int_push_ins(ins.prev[0]) if is_int and value == 0: - for key in self.KEYS: + for key in analysis_keys: self._block_contexts[key][block] = self._null_set(key) if isinstance(ins, Err): - for key in self.KEYS: + for key in analysis_keys: self._block_contexts[key][block] = self._null_set(key) stack.append(ins) - def _path_level_constraints(self, block: "BasicBlock") -> None: + def _path_level_constraints(self, analysis_keys: List[str], block: "BasicBlock") -> None: """Calculate and store constraints on keys applied along each path. By default, no constraints are considered i.e values are assumed to be universal_set @@ -269,7 +272,7 @@ def _path_level_constraints(self, block: "BasicBlock") -> None: self._path_contexts[KEY][Bi][Bj] -> path_context of KEY for path Bj -> Bi """ - for key in self.KEYS: + for key in analysis_keys: if key not in self._path_contexts: self._path_contexts[key] = {} path_context = self._path_contexts[key] @@ -281,7 +284,7 @@ def _path_level_constraints(self, block: "BasicBlock") -> None: path_context[b][block] = self._universal_set(key) if isinstance(block.exit_instr, (BZ, BNZ)): - for key in self.KEYS: + for key in analysis_keys: # true_values: possible values for {key} which result in non-zero value on top of the stack # false_values: possible values for {key} which result in zero value on top of the stack # if the check is not related to the field, true_values and false_values will be universal sets @@ -308,14 +311,52 @@ def _path_level_constraints(self, block: "BasicBlock") -> None: if default_branch is not None: self._path_contexts[key][default_branch][block] = false_values - def _calculate_reachin(self, key: str, block: "BasicBlock") -> Any: + def _update_gtxn_constraints(self, keys_with_gtxn: List[str], block: "BasicBlock") -> None: + """Use information from txn constraints on gtxn_ + + `block.transaction_context.group_indices` will contain indices the `txn` can have. + + The values of each key represent possible values for that field. Values of `GTXN_0_RekeyTo` are + possible values for `gtxn 0 RekeyTo` i.e possible `RekeyTo` field values of transaction present at index 0. + + self._block_contexts[f"GTXN_{idx}_{key}"] stores the information collected from + instructions `gtxn {idx} {field}`. This information represents validations performed + on the `txn {field}` by accessing it through `gtxn {idx} {field}`. + + e.g if index of the `txn` should be 0 then `txn RekeyTo` is same as `gtxn 0 RekeyTo`. + similary, if index of `txn` can be `0` or `1` then checking `txn RekeyTo` is equaivalent to + checking both `gtxn 0 RekeyTo` and `gtxn 1 RekeyTo`. + + if `i` is not a possible index of `txn` for basic block `B`, then possible values for `txn {field}` when + accessed through `gtxn {i} {field}` is Null. Because, `txn` can never have index `i` and `gtxn {i} {field}` is field + of `txn` when index of `txn` is `i`. + + This requires that group_indices analysis is done before any other analysis. + """ + for key in keys_with_gtxn: + for ind in range(MAX_GROUP_SIZE): + gtx_key = self.gtx_key(ind, key) + if ind in block.transaction_context.group_indices: + # txn can have index {ind} + # gtxn {ind} {field} can have a value if and only if {txn} {field} can also have that value + self._block_contexts[gtx_key][block] = self._intersection( + gtx_key, + self._block_contexts[gtx_key][block], + self._block_contexts[key][block], + ) + else: + # txn cannot have index {ind} + self._block_contexts[gtx_key][block] = self._null_set(gtx_key) + + def _calculate_reachin( + self, key: str, block: "BasicBlock", reachout: Dict["BasicBlock", Any] + ) -> Any: if block == self._entry_block: # We are considering each possible value as a definition defined at the start of entry block. reachin_information = self._universal_set(key) else: reachin_information = self._null_set(key) - reachout = self._reachout[key] path_context = self._path_contexts[key] for prev_b in block.prev: reachin_information = self._union( @@ -333,8 +374,52 @@ def _calculate_reachin(self, key: str, block: "BasicBlock") -> Any: return reachin_information - def _calculate_livein(self, key: str, block: "BasicBlock") -> Any: - liveout = self._liveout[key] + def _merge_information_forward( + self, + analysis_keys: List[str], + block: "BasicBlock", + global_reachout: Dict[str, Dict["BasicBlock", Any]], + ) -> bool: + updated = False + for key in analysis_keys: + # RCHout(b) = intersection(RCHin(b), PRSV(b)) + new_reachout = self._intersection( + key, + self._calculate_reachin(key, block, global_reachout[key]), + self._block_contexts[key][block], + ) + if new_reachout != global_reachout[key][block]: + global_reachout[key][block] = new_reachout + updated = True + return updated + + def forward_analyis(self, analysis_keys: List[str], worklist: List["BasicBlock"]) -> None: + """Perform forward analysis for analysis_keys and update self._block_contexts""" + # reachout for all analysis keys. global_reachout[key] -> reachout of key. + # global_reachout[key][block] -> reachout of block for key. + global_reachout: Dict[str, Dict["BasicBlock", Any]] = {} + for key in analysis_keys: + global_reachout[key] = {} + for b in self._teal.bbs: + global_reachout[key][b] = self._null_set(key) + + while worklist: + b = worklist[0] + worklist = worklist[1:] + updated = self._merge_information_forward(analysis_keys, b, global_reachout) + + if updated: + return_point_block = [b.sub_return_point] if b.sub_return_point is not None else [] + for bi in b.next + return_point_block: + if bi not in worklist: + worklist.append(bi) + + for key in analysis_keys: + self._block_contexts[key] = global_reachout[key] + + def _calculate_livein( + self, key: str, block: "BasicBlock", liveout: Dict["BasicBlock", Any] + ) -> Any: livein_information = self._null_set(key) for next_b in block.next: @@ -347,32 +432,52 @@ def _calculate_livein(self, key: str, block: "BasicBlock") -> Any: ) return livein_information - def _merge_information_forward(self, block: "BasicBlock") -> bool: - updated = False - for key in self.KEYS: - # RCHout(b) = intersection(RCHin(b), PRSV(b)) - new_reachout = self._intersection( - key, self._calculate_reachin(key, block), self._block_contexts[key][block] - ) - if new_reachout != self._reachout[key][block]: - self._reachout[key][block] = new_reachout - updated = True - return updated - - def _merge_information_backward(self, block: "BasicBlock") -> bool: + def _merge_information_backward( + self, + analysis_keys: List[str], + block: "BasicBlock", + global_liveout: Dict[str, Dict["BasicBlock", Any]], + ) -> bool: if len(block.next) == 0: # leaf block return False updated = False - for key in self.KEYS: + for key in analysis_keys: new_liveout = self._intersection( - key, self._calculate_livein(key, block), self._block_contexts[key][block] + key, + self._calculate_livein(key, block, global_liveout[key]), + self._block_contexts[key][block], ) - if new_liveout != self._liveout[key][block]: - self._liveout[key][block] = new_liveout + if new_liveout != global_liveout[key][block]: + global_liveout[key][block] = new_liveout updated = True return updated + def backward_analysis(self, analysis_keys: List[str], worklist: List["BasicBlock"]) -> None: + """Perform backward analysis for analysis_keys and update self._block_contexts""" + global_liveout: Dict[str, Dict["BasicBlock", Any]] = {} + for key in analysis_keys: + global_liveout[key] = {} + for b in self._teal.bbs: + if len(b.next) == 0: # leaf block + global_liveout[key][b] = self._block_contexts[key][b] + else: + global_liveout[key][b] = self._null_set(key) + + while worklist: + b = worklist[0] + worklist = worklist[1:] + updated = self._merge_information_backward(analysis_keys, b, global_liveout) + + if updated: + callsub_block = [b.callsub_block] if b.callsub_block is not None else [] + for bi in b.prev + callsub_block: + if bi not in worklist: + worklist.append(bi) + + for key in analysis_keys: + self._block_contexts[key] = global_liveout[key] + @staticmethod def _postorder(entry: "BasicBlock") -> List["BasicBlock"]: visited: Set["BasicBlock"] = set() @@ -388,55 +493,44 @@ def dfs(block: "BasicBlock") -> None: dfs(entry) return order - def run_analysis(self) -> None: # pylint: disable=too-many-branches + def run_analysis(self) -> None: """Run analysis""" - # step 1 - for block in self._teal.bbs: - self._block_level_constraints(block) - self._path_level_constraints(block) - # step 2 initialization - for key in self.KEYS: - self._reachout[key] = {} - for b in self._teal.bbs: - self._reachout[key][b] = self._null_set(key) + gtx_keys = [] + for key in self.KEYS_WITH_GTXN: + for ind in range(MAX_GROUP_SIZE): + gtx_keys.append(self.gtx_key(ind, key)) + + all_keys = self.BASE_KEYS + gtx_keys + + # step 1: initialise information + for block in self._teal.bbs: + self._block_level_constraints(all_keys, block) # initialise information for all keys + self._path_level_constraints(all_keys, block) postorder = self._postorder(self._entry_block) - worklist = postorder[::-1] # Reverse postorder - while worklist: - b = worklist[0] - worklist = worklist[1:] - updated = self._merge_information_forward(b) + # perform analysis of base keys first. Information of these base keys will be used for + # gtxn keys. see `self._update_gtxn_constraints` + analysis_keys = list(self.BASE_KEYS) - if updated: - return_point_block = [b.sub_return_point] if b.sub_return_point is not None else [] - for bi in b.next + return_point_block: - if bi not in worklist: - worklist.append(bi) + worklist = postorder[::-1] # Reverse postorder + self.forward_analyis(analysis_keys, worklist) - # step 3 - self._block_contexts = self._reachout - for key in self.KEYS: - self._liveout[key] = {} - for b in self._teal.bbs: - if len(b.next) == 0: # leaf block - self._liveout[key][b] = self._block_contexts[key][b] - else: - self._liveout[key][b] = self._null_set(key) + worklist = [b for b in postorder if len(b.next) != 0] # postorder, exclude leaf blocks + self.backward_analysis(analysis_keys, worklist) - worklist = [b for b in postorder if len(b.next) != 0] + # update gtxn constraints using possible group indices and txn constraints. + for block in self._teal.bbs: + self._update_gtxn_constraints(self.KEYS_WITH_GTXN, block) - while worklist: - b = worklist[0] - worklist = worklist[1:] - updated = self._merge_information_backward(b) + # Now perform analysis for gtx_keys + analysis_keys = gtx_keys - if updated: - callsub_block = [b.callsub_block] if b.callsub_block is not None else [] - for bi in b.prev + callsub_block: - if bi not in worklist: - worklist.append(bi) + worklist = postorder[::-1] # Reverse postorder + self.forward_analyis(analysis_keys, worklist) + + worklist = [b for b in postorder if len(b.next) != 0] # postorder, exclude leaf blocks + self.backward_analysis(analysis_keys, worklist) - self._block_contexts = self._liveout self._store_results() diff --git a/tealer/analyses/dataflow/int_fields.py b/tealer/analyses/dataflow/int_fields.py index ba52b74..13d175b 100644 --- a/tealer/analyses/dataflow/int_fields.py +++ b/tealer/analyses/dataflow/int_fields.py @@ -14,6 +14,7 @@ from tealer.teal.global_field import GroupSize from tealer.teal.instructions.transaction_field import GroupIndex from tealer.utils.analyses import is_int_push_ins +from tealer.utils.algorand_constants import MAX_GROUP_SIZE if TYPE_CHECKING: from tealer.teal.instructions.instructions import Instruction @@ -22,31 +23,33 @@ group_index_key = "GroupIndex" analysis_keys = [group_size_key, group_index_key] universal_sets = {} -universal_sets[group_size_key] = list(range(1, 17)) -universal_sets[group_index_key] = list(range(0, 16)) +universal_sets[group_size_key] = list(range(1, MAX_GROUP_SIZE + 1)) +universal_sets[group_index_key] = list(range(0, MAX_GROUP_SIZE)) -class IntFields(DataflowTransactionContext): # pylint: disable=too-few-public-methods +class GroupIndices(DataflowTransactionContext): # pylint: disable=too-few-public-methods GROUP_SIZE_KEY = group_size_key GROUP_INDEX_KEY = group_index_key - KEYS = analysis_keys + BASE_KEYS: List[str] = analysis_keys + KEYS_WITH_GTXN: List[str] = [] # gtxn information is not collected for any of the keys UNIVERSAL_SETS: Dict[str, List] = universal_sets - def _universal_set(self, key: str) -> Set: # pylint: disable=no-self-use + def _universal_set(self, key: str) -> Set: return set(self.UNIVERSAL_SETS[key]) - def _null_set(self, key: str) -> Set: # pylint: disable=no-self-use + def _null_set(self, key: str) -> Set: return set() - def _union(self, key: str, a: Set, b: Set) -> Set: # pylint: disable=no-self-use + def _union(self, key: str, a: Set, b: Set) -> Set: return a | b - def _intersection(self, key: str, a: Set, b: Set) -> Set: # pylint: disable=no-self-use + def _intersection(self, key: str, a: Set, b: Set) -> Set: return a & b - def _get_asserted_int_values( # pylint: disable=no-self-use - self, comparison_ins: "Instruction", compared_int: int, universal_set: List[int] + @staticmethod + def _get_asserted_int_values( + comparison_ins: "Instruction", compared_int: int, universal_set: List[int] ) -> List[int]: """return list of ints from universal set(U) that will satisfy the comparison. @@ -65,7 +68,7 @@ def _get_asserted_int_values( # pylint: disable=no-self-use Returns: list of ints that will satisfy the comparison """ - U = universal_set + U = list(universal_set) if isinstance(comparison_ins, Eq): # pylint: disable=no-else-return return [compared_int] @@ -175,6 +178,14 @@ def _get_asserted(self, key: str, ins_stack: List["Instruction"]) -> Tuple[Set, return self._get_asserted_groupindices(ins_stack) def _store_results(self) -> None: + # use group_sizes to update group_indices + group_sizes_context = self._block_contexts[self.GROUP_SIZE_KEY] + group_indices_context = self._block_contexts[self.GROUP_INDEX_KEY] + for bi in self._teal.bbs: + group_indices_context[bi] = group_indices_context[bi] & set( + range(0, max(group_sizes_context[bi], default=0)) + ) + group_size_block_context = self._block_contexts[self.GROUP_SIZE_KEY] for block in self._teal.bbs: block.transaction_context.group_sizes = list(group_size_block_context[block]) diff --git a/tealer/teal/context/block_transaction_context.py b/tealer/teal/context/block_transaction_context.py index 7fc3ad7..313f1a3 100644 --- a/tealer/teal/context/block_transaction_context.py +++ b/tealer/teal/context/block_transaction_context.py @@ -1,6 +1,7 @@ from typing import List, Optional from tealer.exceptions import TealerException +from tealer.utils.algorand_constants import MAX_GROUP_SIZE class BlockTransactionContext: # pylint: disable=too-few-public-methods @@ -12,13 +13,18 @@ def __init__(self, tail: bool = False) -> None: self._group_transactions_context = [BlockTransactionContext(True) for _ in range(16)] # set default values - self.group_sizes = list(range(1, 17)) - self.group_indices = list(range(0, 16)) + if tail: + # information from gtxn {i} instructions. + self.group_indices = [] + self.group_sizes = [] + else: + self.group_sizes = list(range(1, MAX_GROUP_SIZE + 1)) + self.group_indices = list(range(0, MAX_GROUP_SIZE)) def gtxn_context(self, txn_index: int) -> "BlockTransactionContext": """context information collected from gtxn {txn_index} field instructions""" if self._group_transactions_context is None: raise TealerException() - if txn_index >= 16: + if txn_index >= MAX_GROUP_SIZE: raise TealerException() return self._group_transactions_context[txn_index] diff --git a/tealer/teal/parse_teal.py b/tealer/teal/parse_teal.py index b81711d..8f1b9e0 100644 --- a/tealer/teal/parse_teal.py +++ b/tealer/teal/parse_teal.py @@ -468,16 +468,19 @@ def _verify_version(ins_list: List[Instruction], program_version: int) -> bool: def _apply_transaction_context_analysis(teal: "Teal") -> None: + group_indices_cls = all_constraints.GroupIndices analyses_classes = [getattr(all_constraints, name) for name in dir(all_constraints)] analyses_classes = [ c for c in analyses_classes - if inspect.isclass(c) and issubclass(c, DataflowTransactionContext) + if inspect.isclass(c) + and issubclass(c, DataflowTransactionContext) + and c != group_indices_cls ] - + # Run group indices analysis first as other analysis use them. + group_indices_cls(teal).run_analysis() for cl in analyses_classes: - obj = cl(teal) - obj.run_analysis() + cl(teal).run_analysis() def parse_teal(source_code: str) -> Teal: diff --git a/tealer/utils/algorand_constants.py b/tealer/utils/algorand_constants.py new file mode 100644 index 0000000..78843b3 --- /dev/null +++ b/tealer/utils/algorand_constants.py @@ -0,0 +1,2 @@ +MAX_GROUP_SIZE = 16 +MIN_ALGORAND_FEE = 1000 # in micro algos diff --git a/tests/transaction_context/test_group_indices.py b/tests/transaction_context/test_group_indices.py index f630552..dbc5ac1 100644 --- a/tests/transaction_context/test_group_indices.py +++ b/tests/transaction_context/test_group_indices.py @@ -122,16 +122,52 @@ return """ - - LOOPS_GROUP_INDICES = [[2], [2], [], [2]] +LOOPS_GROUP_SIZES = """ +#pragma version 5 +txn GroupIndex +int 4 +!= +assert +global GroupSize +int 6 +<= +int 0 +loop: + dup + txn GroupIndex + int 3 + > + bz end + int 1 + + + txn GroupIndex + int 6 + < + assert + b loop +end: + int 2 + txn GroupIndex + > + assert + int 5 + global GroupSize + <= + assert + int 1 + return +""" + +LOOPS_GROUP_SIZES_GROUP_INDICES = [[3], [3], [], [3]] ALL_TESTS = [ (MULTIPLE_RETSUB, MULTIPLE_RETSUB_GROUP_INDICES), (SUBROUTINE_BACK_JUMP, SUBROUTINE_BACK_JUMP_GROUP_INDICES), (BRANCHING, BRANCHING_GROUP_INDICES), (LOOPS, LOOPS_GROUP_INDICES), + (LOOPS_GROUP_SIZES, LOOPS_GROUP_SIZES_GROUP_INDICES), ] diff --git a/tests/transaction_context/test_group_sizes.py b/tests/transaction_context/test_group_sizes.py index e49d259..cdc3eaa 100644 --- a/tests/transaction_context/test_group_sizes.py +++ b/tests/transaction_context/test_group_sizes.py @@ -294,3 +294,26 @@ def test_group_sizes(test: Tuple[str, List[BasicBlock]]) -> None: for b1, b2 in zip(bbs, cfg): print(b1.transaction_context.group_sizes, b2.transaction_context.group_sizes) assert b1.transaction_context.group_sizes == b2.transaction_context.group_sizes + + +MULTIPLE_RETSUB_CFG_GROUP_INDICES = [[0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1]] +SUBROUTINE_BACK_JUMP_GROUP_INDICES = [[0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2]] +BRANCHING_GROUP_INDICES = [[0, 1, 2, 3], [], [], [], [], [0, 1, 2, 3]] +LOOPS_GROUP_INDICES = [[0, 1], [0, 1], [], [0, 1]] + +GROUP_INDICES_TESTS = [ + (MULTIPLE_RETSUB, MULTIPLE_RETSUB_CFG_GROUP_INDICES), + (SUBROUTINE_BACK_JUMP, SUBROUTINE_BACK_JUMP_GROUP_INDICES), + (BRANCHING, BRANCHING_GROUP_INDICES), + (LOOPS, LOOPS_GROUP_INDICES), +] + +@pytest.mark.parametrize("test", GROUP_INDICES_TESTS) # type: ignore +def test_group_indices(test: Tuple[str, List[List[int]]]) -> None: + code, group_indices_list = test + teal = parse_teal(code.strip()) + + bbs = order_basic_blocks(teal.bbs) + for b, group_indices in zip(bbs, group_indices_list): + print(b.transaction_context.group_indices, group_indices) + assert b.transaction_context.group_indices == group_indices