Skip to content

Commit

Permalink
merge: development -> main
Browse files Browse the repository at this point in the history
Demo developments
  • Loading branch information
VyacheslavIurevich authored Jul 19, 2024
2 parents 0893b81 + 970fa2c commit 29ecfa4
Show file tree
Hide file tree
Showing 13 changed files with 237 additions and 7 deletions.
1 change: 0 additions & 1 deletion .githooks/pre-commit
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,3 @@ if [[ $pylint_check_status -ne 0 ]]; then
else
exit 0
fi

9 changes: 9 additions & 0 deletions .github/mergeable.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
version: 2
mergeable:
- when: pull_request.*
name: 'Check PR description'
validate:
- do: description
no_empty:
enabled: true
message: Description matter and should not be empty. Provide detail with **what** was changed, **why** it was changed, and **how** it was changed.
1 change: 0 additions & 1 deletion .github/workflows/pylint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,3 @@ jobs:
- name: Analysing the code with pylint
run: |
pylint $(git ls-files '*.py')
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,10 @@ cython_debug/
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

# Our app resources
*.rep
*.gpr
resources/out

.vscode/
.idea/
1 change: 0 additions & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,3 @@ git config --local core.hooksPath .githooks/
8. Your naming of commits and pull requests must be logical and reasonable.
9. Please use "git commit -s" option.
10. If you are in doubt what to do, it's better to text developers.

1 change: 0 additions & 1 deletion LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,3 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

13 changes: 12 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
# recompilation_postprocessor
![Pylint](https://github.com/VyacheslavIurevich/recompilation_postprocessor/actions/workflows/pylint.yml/badge.svg)

This console app provides ability to postprocess code, which is decompiled via Ghidra, to make it buildable.
# Team
* Vyacheslav Kochergin. [GitHub](https://github.com/VyacheslavIurevich), [Contact](https://t.me/se4life).
* Vasilii Sarapulov. [GitHub](https://github.com/Sarapulov-Vas), [Contact](https://t.me/sarpaulov).

# Technologies used
* [Python 3.12](https://www.python.org/)
* [pyhidra](https://github.com/dod-cyber-crime-center/pyhidra)
* [Pylint](https://www.pylint.org/)
# Project status
In development.
# Contributing
See [CONTRIBUTING.md](./CONTRIBUTING.md)
# License
See [LICENSE](./LICENSE)
Binary file added resources/in/bmp-header.out
Binary file not shown.
Binary file added resources/in/calculator
Binary file not shown.
Binary file added resources/in/integrate_sin
Binary file not shown.
Binary file added resources/in/test.out
Binary file not shown.
66 changes: 64 additions & 2 deletions src/main.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,65 @@
"""Simple program"""
"""Postprocessor main"""

print("Hello world")
# pylint: disable=wrong-import-position, import-error
from shutil import rmtree
import pyhidra

pyhidra.start()
from java.io import File, PrintWriter
from ghidra.app.decompiler import DecompileOptions, DecompInterface
import tools

LIBRARY_LIST = ["stdio.h", "stdlib.h", "inttypes.h"]


def init_decompiler(program):
"""Decompiler initialization"""
options = DecompileOptions()
options.grabFromProgram(program)
decompiler = DecompInterface()
decompiler.setOptions(options)
decompiler.openProgram(program)
return decompiler


def put_functions(program, file_writer, monitor):
"""Puts all functions and their signatures into C code file"""
decompiler = init_decompiler(program)
functions_code = []
for function in program.getFunctionManager().getFunctions(True):
if tools.exclude_function(function):
continue
results = decompiler.decompileFunction(function, 0, monitor)
decompiled_function = results.getDecompiledFunction()
function_signature = decompiled_function.getSignature()
function_signature_processed = tools.replace_types(function_signature)
function_code = decompiled_function.getC()
function_code_processed = tools.handle_function(function_code)
functions_code.append(function_code_processed)
file_writer.println(function_signature_processed + '\n')
used_concats = set()
for function_code in functions_code:
if "CONCAT" in function_code:
used_concats = \
tools.put_concat(file_writer, function_code, used_concats)
file_writer.println(function_code)
decompiler.closeProgram()
decompiler.dispose()


def export_c_code(binary_file_path, output_file_path):
"""Exporting c code to a file"""
with pyhidra.open_program(binary_file_path) as flat_api:
program = flat_api.getCurrentProgram()
f = File(output_file_path)
c_file_writer = PrintWriter(f)
for lib in LIBRARY_LIST:
c_file_writer.println(f"#include <{lib}>")
tools.write_program_data_types(program, c_file_writer, flat_api.monitor)
put_functions(program, c_file_writer, flat_api.monitor)
c_file_writer.close()
project_folder = str(flat_api.getProjectRootFolder())[:-2] # last two symbols are :/
rmtree(f"resources/in/{project_folder}")


export_c_code("resources/in/bmp-header.out", "resources/out/test.c")
145 changes: 145 additions & 0 deletions src/tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
"""Tools for checking functions and exporting decompiled program to a .c file"""

# pylint: disable=wrong-import-order, wrong-import-position, import-error

from collections import OrderedDict
from math import floor, log2
import pyhidra

pyhidra.start()
from ghidra.program.model.data import DataTypeWriter

TYPES_TO_REPLACE = OrderedDict(uint="unsigned int",
ushort="unsigned short",
ulong="unsigned long")
CONCAT_LEN = 6 # = len("CONCAT")
BYTE_SIZE = 8
HEX_BASE = 16
RUNTIME_PREFIX = '_'
PLT_INSTRUCTION = "JMP qword ptr"
STACK_PROTECTOR_VARIABLE = "in_FS_OFFSET"


def address_to_int(address):
"""Address is a number in hex"""
return int(str(address), HEX_BASE)


def function_in_runtime(function):
"""Check if input function is from C Runtime"""
function_name = function.getName()
return function_name.startswith(RUNTIME_PREFIX)


def function_is_plt(function):
"""Check if input function is PLT jump"""
program = function.getProgram()
listing = program.getListing()
body = function.getBody()
min_address = address_to_int(body.getMinAddress())
max_address = address_to_int(body.getMaxAddress())
for address in body.getAddresses(True):
code_unit = str(listing.getCodeUnitAt(address))
if code_unit.startswith(PLT_INSTRUCTION):
words = code_unit.split()
jmp_address = address_to_int(words[-1][1:-1]) # [1:-1] is to remove [] from address
if not min_address <= jmp_address <= max_address:
return True
return False


def exclude_function(function):
"""Dumping program data types"""
entry_point = function.getEntryPoint()
code_unit_at = function.getProgram().getListing().getCodeUnitAt(entry_point)
return function_in_runtime(function) \
or function_is_plt(function) \
or code_unit_at.getMnemonicString() == "??"


def write_program_data_types(program, file_writer, monitor):
"""Dumping program data types"""
dtm = program.getDataTypeManager()
data_type_writer = DataTypeWriter(dtm, file_writer)
data_type_list = []
for data_type in dtm.getAllDataTypes():
if ".h" not in data_type.getPathName().split('/')[1]:
data_type_list.append(data_type)
data_type_writer.write(data_type_list, monitor)
dtm.close()


def replace_types(code):
"""Replacing all Ghidra types with types from intttypes.h and standart C types"""
for old_type, new_type in TYPES_TO_REPLACE.items():
code = code.replace(old_type, new_type)
return code


def remove_stack_protection(code):
"""Removal of stack protection from code"""
lines = code.split('\n')
for num, line in enumerate(lines):
if STACK_PROTECTOR_VARIABLE in line:
# if we have "if ..." with in_FS_OFFSET checking
# we must remove all "if" block - 4 lines (with ghidra comment)
if "if" in line:
# if == is in the condition:
# lines[num] = "if ... == ... STACK_PROTECTOR_VARIABLE {"
# lines[num + 1] = " return 0;"
# lines[num + 2] = "}"
# lines[num + 3] is Ghidra's comment
# lines[num + 4] = "__stack_chk_fail();"
# if != is in the condition:
# lines[num] = "if ... != ... STACK_PROTECTOR_VARIABLE {"
# lines[num + 1] is Ghidra's comment
# lines[num + 2] = "__stack_chk_fail();"
# lines[num + 3] = "}"
# lines[num + 4] = "return 0;" - we keep it
if "==" in line:
lines[num + 1] = lines[num + 1][2:]
lines.pop(num + 4)
lines.pop(num + 3)
lines.pop(num + 2)
if "!=" in line:
lines.pop(num + 1)
lines.pop(num)
new_code = '\n'.join(lines)
return new_code


def handle_function(code):
"""Handling function code"""
code_replaced_types = replace_types(code)
if STACK_PROTECTOR_VARIABLE not in code_replaced_types:
return code_replaced_types
code_removed_stack_protection = remove_stack_protection(code_replaced_types)
return code_removed_stack_protection


def get_nearest_lower_power_2(num):
"""Rounds a number to nearest lower power of 2"""
return 2 ** floor(log2(num))


def put_concat(file_writer, code, used_concats):
"""Puts CONCATXY functions into C code"""
concat_cnt = code.count("CONCAT")
concat_idx = 0
for _ in range(concat_cnt):
concat_idx = code.find("CONCAT", concat_idx) + CONCAT_LEN
first_size = int(code[concat_idx])
second_size = int(code[concat_idx + 1])
if (first_size, second_size) in used_concats:
continue
first_inttype_size = get_nearest_lower_power_2(first_size * BYTE_SIZE)
second_inttype_size = get_nearest_lower_power_2(second_size * BYTE_SIZE)
concat_name = f"unsigned long CONCAT{first_size}{second_size}"
concat_args = f"(uint{first_inttype_size}_t a, uint{second_inttype_size}_t b)\n"
concat_body = \
f"return ((unsigned long)b) | (unsigned long)a << ({second_size} * {BYTE_SIZE});"
concat_signature = concat_name + concat_args
concat_function = concat_signature + '{' + '\n' + '\t' + concat_body + '\n' + '}' + '\n'
file_writer.println(concat_function)
used_concats.add((first_size, second_size))
return used_concats

0 comments on commit 29ecfa4

Please sign in to comment.