add wasm-slice tool

sola-st · Sep 3, 2024 · 260e876 · 260e876
1 parent c779add
commit 260e876
Showing 5 changed files with 168 additions and 119 deletions.
diff --git a/.gitignore b/.gitignore
@@ -15,3 +15,5 @@ data/
 tests/*output.txt
 out/
 .vscode
+*.shrunken.wasm
+*.shrunken.wat
diff --git a/evaluation/interesting.py b/evaluation/interesting.py
@@ -48,13 +48,22 @@ def run_command(engine, test_input):
 
 result = run_command(ENGINE, WASM)
 
-# Check if the output contains the target bug's panic message
-def is_runnable(result):
-    return result.stdout.find('no main export from module') == -1
+def crash_on_wizard():
+    result = run_command(ENGINE, WASM)
+    return result.returncode != 0 and result.stdout.find('no main export from module') == -1
 
-if result.returncode != 0 and is_runnable(result):
-    print("Return code is not 0, interesting!")
+def ok_on_wasmtime():
+    command = ['timeout', '1', 'wasmtime', '--invoke', 'main', WASM]
+    result = subprocess.run(
+        command,
+        capture_output=True,
+        text=True,
+    )
+    return result.returncode == 0
+
+if crash_on_wizard() and ok_on_wasmtime():
+    print("Interesting!")
     sys.exit(0)
 else:
-    print("Return code is 0, not interesting")
+    print("Not interesting")
     sys.exit(1)
diff --git a/evaluation/run_reduction_tool.py b/evaluation/run_reduction_tool.py
@@ -32,9 +32,9 @@
 }
 
 tool_to_command = {
-    "wasm-reduce": "wasm-reduce -b $BINARYEN_ROOT/bin '--command=./evaluation/interesting.py test.wasm' -t test.wasm -w work.wasm",
+    "wasm-reduce": "wasm-reduce -b $BINARYEN_ROOT/bin '--command=./evaluation/interesting.py test.shrunken.wasm' -t test.shrunken.wasm -w work.shrunken.wasm",
     "wasm-shrink": "wasm-tools shrink ./evaluation/interesting.py",
-    "slicedice": "",
+    "wasm-slice": "wasm-slice ./evaluation/interesting.py",
 }
 
 def run_command(tool, test_input):

diff --git a/evaluation/run_slicedice.py b/evaluation/run_slicedice.py
diff --git a/wasm-slice b/wasm-slice
@@ -0,0 +1,149 @@
+#!/usr/bin/env python3
+
+import sys, subprocess, concurrent.futures, re, os, re, subprocess, csv
+
+# this takes upto 150GB of memory.
+# TODO: why?
+WASMR3_PATH = os.getenv("WASMR3_PATH", "/home/wasm-r3")
+PARALLEL = os.getenv
+TIMEOUT = 120
+
+# The Wasm file is given as the first and only argument to the script.
+if len(sys.argv) != 3:
+    print("Usage: wasm-slice <WASM_FILE>")
+    sys.exit(1)
+
+interesting_script = sys.argv[1]
+test_input = sys.argv[2]
+test_name = os.path.splitext(os.path.basename(test_input))[0]
+
+# We prioritize single function replay of heuristic > dynamic > all
+
+def get_heuristic_fidx() -> list:
+    # Hardcode for now
+    # TODO: implement heuristics
+    testname_to_heuristic = {
+        "commanderkeen": [490],
+        "funky-kart": [1529],
+        "guiicons": [213],
+        "hydro": [1290],
+        "jsc": [4771],
+        "mandelbrot": [7],
+        "rfxgen": [271],
+        "rguilayout": [216],
+        "rguistyler": [260],
+        "riconpacker": [244],
+        "rtexviewer": [303],
+        "sqlgui": [917],
+    }
+    return testname_to_heuristic[test_name]
+
+def get_dynamic_fidx():
+    command = ["wizeng.x86-64-linux", "-no-names", "-csv", "--monitors=icount", test_input]
+    try:
+        return extract_dynamic_fidx(subprocess.check_output(command, text=True))
+    except subprocess.CalledProcessError as e:
+        print(f"Command failed: {' '.join(command)}\nError: {e.output}")
+        raise
+
+def extract_dynamic_fidx(csv_output: str):
+    dynamic_fidx = []
+    reader = csv.reader(csv_output.splitlines())
+    next(reader)  # Skip header
+    for row in reader:
+        if len(row) >= 3:
+            function_index = int(row[0].lstrip('#'))
+            dynamic_count = int(row[2])
+            if dynamic_count > 0 and function_index != 0:
+                dynamic_fidx.append(function_index)
+    return sorted(dynamic_fidx)
+
+def get_all_fidx():
+    command = f"wasm-tools objdump {test_input}"
+    output = subprocess.check_output(command, shell=True, text=True)
+    count = extract_function_count(output)
+    return list(range(count))
+
+def extract_function_count(objdump_output):
+    for line in objdump_output.split('\n'):
+        if 'functions' in line:
+            match = re.search(r'(\d+) count', line)
+            if match:
+                return int(match.group(1))
+    return None
+
+
+test_get_function_count = """
+types                                  |        0xb -      0x198 |       397 bytes | 55 count
+functions                              |      0x19b -      0x396 |       507 bytes | 505 count
+tables                                 |      0x398 -      0x39f |         7 bytes | 1 count
+memories                               |      0x3a1 -      0x3a7 |         6 bytes | 1 count
+globals                                |      0x3aa -      0x930 |      1414 bytes | 282 count
+exports                                |      0x933 -     0x100c |      1753 bytes | 301 count
+elements                               |     0x100f -     0x1193 |       388 bytes | 1 count
+data count                             |     0x1195 -     0x1197 |         2 bytes | 1 count
+code                                   |     0x119b -    0x46e2b |    285840 bytes | 505 count
+data                                   |    0x46e2f -    0x5c006 |     86487 bytes | 1422 count
+"""
+
+assert extract_function_count(test_get_function_count) == 505
+
+def get_fidx():
+    heuristic_fidx = get_heuristic_fidx()
+    dynamic_fidx = get_dynamic_fidx()
+    all_fidx = get_all_fidx()
+
+    print("Heuristic function indices:", heuristic_fidx)
+    dynamic_filtered = [idx for idx in dynamic_fidx if idx not in heuristic_fidx]
+    print("Dynamic function indices (except heuristic):", dynamic_filtered)
+
+    # Filter out both heuristic and dynamic indices from all
+    all_filtered = [idx for idx in all_fidx if idx not in heuristic_fidx and idx not in dynamic_fidx]
+    print("All function indices (except heuristic and dynamic):", all_filtered)
+
+    # Create a set to keep track of added indices
+    added = set()
+    combined_fidx = []
+
+    # Add heuristic_fidx first
+    for idx in heuristic_fidx:
+        if idx not in added:
+            combined_fidx.append(idx)
+            added.add(idx)
+
+    # Add dynamic_fidx second
+    for idx in dynamic_fidx:
+        if idx not in added:
+            combined_fidx.append(idx)
+            added.add(idx)
+
+    # Add all_fidx last
+    for idx in all_fidx:
+        if idx not in added:
+            combined_fidx.append(idx)
+            added.add(idx)
+
+    return combined_fidx
+
+
+def run_slicedice(testname, fidx):
+    try:
+        # fidxargs = " ".join([f"-i {f}" for f in fidx.split("-")])
+        command = f"timeout {TIMEOUT}s npm test slicedice -- -t {testname} -i {fidx}"
+        output = subprocess.check_output(command, shell=True, text=True)
+        # TODO: make this configurable
+        replay_wasm_path = f'{WASMR3_PATH}/benchmarks/{test_name}/out/{fidx}/benchmarks/bin_1/replay.wasm'
+        interestingness_command = [interesting_script, replay_wasm_path]
+        result = subprocess.run(interestingness_command, check=False)
+        test_input_size = os.path.getsize(test_input)
+        replay_wasm_size = os.path.getsize(replay_wasm_path)
+        print("Test input file size:", test_input_size)
+        print("Sliced wasm file size:", replay_wasm_size)
+        sys.exit(result.returncode)
+    except Exception as e:
+        print(f"Failed to run {testname} - {fidx}")
+        print(e)
+        return [testname, fidx, "fail"]
+
+for fidx in get_fidx():
+    run_slicedice(test_name, fidx)