Skip to content

Commit

Permalink
Some instrumentation, package structure
Browse files Browse the repository at this point in the history
  • Loading branch information
AryazE committed Oct 26, 2021
1 parent 6c9ccd6 commit 74956dd
Show file tree
Hide file tree
Showing 8 changed files with 375 additions and 0 deletions.
18 changes: 18 additions & 0 deletions sample_code/a.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# DYNAPYT: DO NOT INSTRUMENT

from dynapyt.runtime import _assign_
from dynapyt.runtime import _expr_
from dynapyt.runtime import _binop_

x = _assign_(3, [x], a + b)

def foo(bar):
return _binop_(5, _binop_(4, a, "Add", b, a + b), "Add", c, a + b + c)

for i in range(_binop_(6, n, "Multiply", 2, n*2)):
_expr_(7, print(m[i]))
y = _assign_(9, [y], m[i:2*i])
# comment
if g.name == 'Aryaz':
g.test.x = _assign_(10, [g.test.x], 10)
u = _assign_(11, [u], g.test.y)
12 changes: 12 additions & 0 deletions sample_code/a.py.orig
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
x = a + b

def foo(bar):
return a + b + c

for i in range(n*2):
print(m[i])
y = m[i:2*i]
# comment
if g.name == 'Aryaz':
g.test.x = 10
u = g.test.y
26 changes: 26 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import setuptools

with open("README.md", "r", encoding="utf-8") as fh:
long_description = fh.read()

setuptools.setup(
name="dynapyt",
version="0.0.1",
author="Aryaz Eghbali",
author_email="[email protected]",
description="Dynamic analysis framework for Python",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/sola-st/python-dynamic-analysis",
project_urls={
"Bug Tracker": "https://github.com/sola-st/python-dynamic-analysis/issues",
},
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
],
package_dir={"": "src"},
packages=setuptools.find_packages(where="src"),
python_requires=">=3.6",
)
152 changes: 152 additions & 0 deletions src/analyses/trace_all.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
from typing import Any, List, Optional, Tuple
import libcst as cst

class TraceAll():

# Literals

def literal(self, iid: int, val: Any) -> Any:
return val

# Variables

def read_var(self, iid: int, name: str, val: Any, is_global: bool) -> Any:
pass

def write_var(self, iid: int, name: str, old_val: Any, new_val: Any, is_global: bool) -> Any:
pass

def delete_var(self, iid: int, name: str, val: Any, is_global: bool) -> None:
pass

# Attributes

def read_attr(self, iid: int, name: str, val: Any, base_obj: Any) -> Any:
pass

def write_attr(self, iid: int, name: str, old_val: Any, new_val: Any, base_obj: Any) -> Any:
pass

def delete_attr(self, iid: int, name: str, val: Any, base_obj: Any) -> None:
pass

# Expressions

def binary_op(self, iid: int, op: str, left: Any, right: Any, result: Any) -> Any:
pass

def unary_op(self, iid: int, op: str, arg: Any, result: Any) -> Any:
pass

def compare(self, iid: int, op: str, left: Any, right_list: List[Any], result: Any) -> Any:
pass

def invoke_func_pre(self, iid: int, f: str, base: Any, args: List[Any], is_constructor: bool, function_iid: int, function_sid: str) -> None:
pass

def invoke_func(self, iid: int, f: str, base: Any, args: List[Any], result: Any, is_constructor: bool, function_iid: int, function_sid: str) -> Any:
pass

def conditional_jump(self, iid: int, result: bool, goto_iid: int) -> Optional[bool]:
pass

# Subscripts

def read_sub(self, iid: int, name: str, val: Any, slices: List[Tuple[int, int]]) -> Any:
pass

def write_sub(self, iid: int, name: str, old_val: Any, new_val: Any, slices: List[Tuple[int, int]]) -> Any:
pass

def delete_sub(self, iid: int, name: str, val: Any, slices: List[Tuple[int, int]]) -> None:
pass

# Statements

def assignment(self, iid: int, op: str, left: Any, right: Any) -> Any:
pass

def raise_stmt(self, iid: int, type: Exception) -> Optional[Exception]:
pass

def assert_stmt(self, iid: int, condition: bool, message: str) -> Optional[bool]:
pass

def pass_stmt(self, iid: int) -> None:
pass

# Imports

def import_stmt(self, iid: int, name: str, module: str, alias: str) -> None:
pass

# Control flow

def if_stmt(self, iid: int, cond_value: bool) -> Optional[bool]:
pass

def for_stmt(self, iid: int, cond_value: bool, is_async: bool) -> Optional[bool]:
pass

def while_stmt(self, iid: int, cond_value: bool) -> Optional[bool]:
pass

def break_stmt(self, iid: int, goto_iid: int) -> Optional[bool]:
pass

def continue_stmt(self, iid: int, goto_iid: int) -> Optional[bool]:
pass

def try_stmt(self, iid: int) -> None:
pass

def exception_stmt(self, iid: int, exceptions: List[Exception], caught: Exception) -> Optional[Exception]:
pass

# With

def with_stmt(self, iid: int, items: List[Tuple[Any, str]], is_async: bool) -> Optional[List[Tuple[Any, str]]]:
pass

# Function definitions

def function_def(self, iid: int, name: str, args: List[cst.Arg], decorators: List[cst.Decorator], returns: List[Any], is_async:bool) -> None: # name is None for lambda functions
pass

def function_arg(self, iid: int, name: str, default: Any, annotation: cst.Annotation) -> None:
pass

def return_stmt(self, iid: int, function_iid: int, value: Any) -> Any:
pass

def yield_stmt(self, iid: int, function_iid: int, value: Any) -> Any:
pass

# Global

def global_declaration(self, iid: int, names: List[str]) -> None:
pass

def nonlocal_declaration(self, iid: int, names: List[str]) -> None:
pass

# Class definitions

def class_def(self, iid: int, name: str, bases: List[Any], decorators: List[cst.Decorator], meta_classes: List[Any]) -> None:
pass

# Await

def await_stmt(self, iid: int, waiting_for: Any) -> Any:
pass

# Top level

def module(self, iid: int) -> None:
pass

def expression(self, iid: int, value: Any) -> Any:
pass

def statement(self, iid: int) -> None:
pass
73 changes: 73 additions & 0 deletions src/instrument/CodeInstrumenter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import libcst as cst
from libcst.metadata import ParentNodeProvider, PositionProvider


class CodeInstrumenter(cst.CSTTransformer):

METADATA_DEPENDENCIES = (ParentNodeProvider, PositionProvider,)

def __init__(self, file_path, iids):
self.file_path = file_path
self.iids = iids

def __create_iid(self, node):
location = self.get_metadata(PositionProvider, node)
line = location.start.line
column = location.start.column
iid = self.iids.new(self.file_path, line, column)
return iid

def __create_import(self, name):
module_name = cst.Attribute(value=cst.Name(value="dynapyt"), attr=cst.Name(value="runtime"))
fct_name = cst.Name(value=name)
imp_alias = cst.ImportAlias(name=fct_name)
imp = cst.ImportFrom(module=module_name, names=[imp_alias])
stmt = cst.SimpleStatementLine(body=[imp])
return stmt

# add import of our runtime library to the file
def leave_Module(self, original_node, updated_node):
import_assign = self.__create_import("_assign_")
import_expr = self.__create_import("_expr_")
import_binop = self.__create_import("_binop_")
new_body = [import_assign, import_expr, import_binop, cst.Newline(value='\n')]+list(updated_node.body)
return updated_node.with_changes(body=new_body)

def leave_BinaryOperation(self, original_node, updated_node):
callee_name = cst.Name(value="_binop_")
iid = self.__create_iid(original_node)
iid_arg = cst.Arg(value=cst.Integer(value=str(iid)))
left_arg = cst.Arg(updated_node.left)
operator_name = type(original_node.operator).__name__
operator_arg = cst.Arg(cst.SimpleString(value=f'"{operator_name}"'))
right_arg = cst.Arg(updated_node.right)
val_arg = cst.Arg(original_node)
call = cst.Call(func=callee_name, args=[
iid_arg, left_arg, operator_arg, right_arg, val_arg])
return call

def leave_Assign(self, original_node, updated_node):
callee_name = cst.Name(value="_assign_")
iid = self.__create_iid(original_node)
iid_arg = cst.Arg(value=cst.Integer(value=str(iid)))
targets = list(map(lambda t: cst.Element(value=t.target), original_node.targets))
left_arg = cst.Arg(value=cst.List(targets))
right_arg = cst.Arg(original_node.value)
call = cst.Call(func=callee_name, args=[iid_arg, left_arg, right_arg])
return updated_node.with_changes(value=call)

def leave_Expr(self, original_node, updated_node):
callee_name = cst.Name(value="_expr_")
iid = self.__create_iid(original_node)
iid_arg = cst.Arg(value=cst.Integer(value=str(iid)))
val_arg = cst.Arg(original_node)
call = cst.Call(func=callee_name, args=[iid_arg, val_arg])
return updated_node.with_changes(value=call)

# def leave_FunctionDef(self, original_node, updated_node):
# callee_name = cst.Name(value="_func_entry_")
# iid = self.__create_iid(original_node)
# iid_arg = cst.Arg(value=cst.Integer(value=str(iid)))
# entry_stmt = cst.Call(func=callee_name, args=[iid_arg])
# print('!!!', updated_node.body.body, '!!!')
# return updated_node.with_changes(body=updated_node.body.with_changes(body=[entry_stmt, cst.Newline(value='\n')]+list(updated_node.body.body)))
34 changes: 34 additions & 0 deletions src/instrument/IIDs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from collections import namedtuple
from os import path
import json


Location = namedtuple("Location", ["file", "line", "column"])


class IIDs:
def __init__(self, file_path):
if file_path is None:
file_path = "iids.json"
self.next_iid = 1
self.iid_to_location = {}
else:
with open(file_path, "r") as file:
json_object = json.load(file)
self.next_iid = json_object["next_iid"]
self.iid_to_location = json_object["iid_to_location"]
self.file_path = file_path

def new(self, file, line, column):
self.iid_to_location[self.next_iid] = Location(file, line, column)
self.next_iid += 1
return self.next_iid

def store(self):
all_data = {
"next_iid": self.next_iid,
"iid_to_location": self.iid_to_location,
}
json_object = json.dumps(all_data, indent=2)
with open(self.file_path, "w") as file:
file.write(json_object)
1 change: 1 addition & 0 deletions src/instrument/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#
59 changes: 59 additions & 0 deletions src/instrument/instrument.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import argparse
from os import path
import libcst as cst
from CodeInstrumenter import CodeInstrumenter
from IIDs import IIDs
import re
from shutil import copyfile


parser = argparse.ArgumentParser()
parser.add_argument(
"--files", help="Python files to instrument or .txt file with all file paths", nargs="+")
parser.add_argument(
"--iids", help="JSON file with instruction IDs (will create iids.json if nothing given)")


def gather_files(files_arg):
if len(files_arg) == 1 and files_arg[0].endswith('.txt'):
files = []
with open(files_arg[0]) as fp:
for line in fp.readlines():
files.append(line.rstrip())
else:
for f in files_arg:
if not f.endswith('.py'):
raise Exception(f'Incorrect argument, expected .py file: {f}')
files = files_arg
return files


def instrument_file(file_path, iids):
with open(file_path, 'r') as file:
src = file.read()

if 'DYNAPYT: DO NOT INSTRUMENT' in src:
print(f'{file_path} is already instrumented -- skipping it')
return

ast = cst.parse_module(src)
ast_wrapper = cst.metadata.MetadataWrapper(ast)

instrumented_code = CodeInstrumenter(file_path, iids)
instrumented_ast = ast_wrapper.visit(instrumented_code)

copied_file_path = re.sub(r'\.py$', '.py.orig', file_path)
copyfile(file_path, copied_file_path)

rewritten_code = '# DYNAPYT: DO NOT INSTRUMENT\n\n' + instrumented_ast.code
with open(file_path, 'w') as file:
file.write(rewritten_code)


if __name__ == '__main__':
args = parser.parse_args()
files = gather_files(args.files)
iids = IIDs(args.iids)
for file_path in files:
instrument_file(file_path, iids)
iids.store()

0 comments on commit 74956dd

Please sign in to comment.