Refactor analysis module and backends (#27)

* Fix plugin settings * Remove unused code in analysis module * Refactor metrics with a schema dataclass * Fix minor bugs in frontend CLI and binary ninja
ex0dus-0x · Sep 21, 2022 · ef77482 · ef77482
1 parent a6e2856
commit ef77482
Show file tree

Hide file tree

Showing 9 changed files with 99 additions and 67 deletions.
diff --git a/__init__.py b/__init__.py
@@ -27,25 +27,26 @@
     """
     {
         "title"         : "Skip Stripped Symbols",
-        "description"   : "Ignore stripped symbols",
+        "description"   : "Ignore stripped symbols.",
         "type"          : "boolean",
         "default"       : false
     }
 """,
 )
 
-# TODO: DEFAULT_SCORE_WEIGHTS
 Settings().register_setting(
     "fuzzable.score_weights",
     """
     {
         "title"         : "Override Score Weights",
-        "description"   : "Reset",
+        "description"   : "Change default score weights for each metric.",
         "type"          : "array",
         "elementType"   : "string",
-        "default"       : [0.3, 0.3, 0.05, 0.05, 0.3]
+        "default"       : {}
     }
-""",
+""".format(
+        DEFAULT_SCORE_WEIGHTS
+    ),
 )
 
 PluginCommand.register(

diff --git a/fuzzable/__main__.py b/fuzzable/__main__.py
@@ -63,7 +63,7 @@ def analyze(
     if debug:
         log.setLevel(logging.DEBUG)
 
-    if not target.is_file() or target.is_dir():
+    if not target.is_file() and not target.is_dir():
         error(f"Target path `{target}` does not exist.")
 
     try:

diff --git a/fuzzable/analysis/__init__.py b/fuzzable/analysis/__init__.py
@@ -8,14 +8,10 @@
 import enum
 import typing as t
 
-SCIKIT = True
-try:
-    import skcriteria as skc
-    from skcriteria.madm import simple
-except Exception:
-    SCIKIT = False
-
-from ..metrics import CallScore
+import skcriteria as skc
+from skcriteria.madm import simple
+
+from ..metrics import CallScore, METRICS
 from ..config import INTERESTING_PATTERNS, RISKY_GLIBC_CALL_PATTERNS
 
 # Type sig for a finalized list
@@ -81,11 +77,6 @@ def _rank_fuzzability(self, unranked: t.List[CallScore]) -> Fuzzability:
         This should be the tail call for run, as it produces the finalized results
         """
 
-        # TODO: deprecate this.
-        if not SCIKIT:
-            return self._rank_simple_fuzzability(unranked)
-
-        # normalize
         nl_normalized = AnalysisBackend._normalize(
             [score.natural_loops for score in unranked]
         )
@@ -108,13 +99,7 @@ def _rank_fuzzability(self, unranked: t.List[CallScore]) -> Fuzzability:
             objectives,
             weights=self.score_weights,
             alternatives=names,
-            criteria=[
-                "fuzz_friendly",
-                "sinks",
-                "loop",
-                "coverage",
-                "cyclomatic_complexity",
-            ],
+            criteria=[metric.identifier for metric in METRICS[3:8]],
         )
 
         dec = simple.WeightedSumModel()
@@ -135,19 +120,9 @@ def _rank_fuzzability(self, unranked: t.List[CallScore]) -> Fuzzability:
         sorted_results = [y for _, y in sorted(zip(ranks, new_unranked))]
         return sorted_results
 
-    def _rank_simple_fuzzability(self, unranked: t.List[CallScore]) -> Fuzzability:
-        nl_normalized = AnalysisBackend._normalize(
-            [score.natural_loops for score in unranked]
-        )
-        for score, new_nl in zip(unranked, nl_normalized):
-            score.natural_loops = new_nl
-
-        cc_normalized = AnalysisBackend._normalize(
-            [score.cyclomatic_complexity for score in unranked]
-        )
-        for score, new_cc in zip(unranked, cc_normalized):
-            score.cyclomatic_complexity = new_cc
-
+    @staticmethod
+    def _rank_simple_fuzzability(unranked: t.List[CallScore]) -> Fuzzability:
+        """Not used anymore."""
         return sorted(unranked, key=lambda obj: obj.simple_fuzzability, reverse=True)
 
     @staticmethod
@@ -201,7 +176,8 @@ def is_toplevel_call(self, target: t.Any) -> bool:
     @abc.abstractmethod
     def risky_sinks(self, func: t.Any) -> int:
         """
-        HEURISTIC
+        FUZZABILITY HEURISTIC
+
         Checks to see if one or more of the function's arguments is
         potentially user-controlled, and flows into an abusable call.
         """
@@ -215,7 +191,8 @@ def _is_risky_call(name: str) -> bool:
     @abc.abstractmethod
     def get_coverage_depth(self, func: t.Any) -> int:
         """
-        HEURISTIC
+        FUZZABILITY HEURISTIC
+
         Calculates and returns a `CoverageReport` that highlights how much
         a fuzzer would ideally explore at different granularities.
         """
@@ -224,7 +201,8 @@ def get_coverage_depth(self, func: t.Any) -> int:
     @abc.abstractmethod
     def natural_loops(self, func: t.Any) -> int:
         """
-        HEURISTIC
+        FUZZABILITY HEURISTIC
+
         Detection of loops is at a basic block level by checking the dominance frontier,
         which denotes the next successor the current block node will definitely reach. If the
         same basic block exists in the dominance frontier set, then that means the block will
@@ -235,7 +213,8 @@ def natural_loops(self, func: t.Any) -> int:
     @abc.abstractmethod
     def get_cyclomatic_complexity(self) -> int:
         """
-        HEURISTIC
+        FUZZABILITY HEURISTIC
+
         Calculates the complexity of a given function using McCabe's metric. We do not
         account for connected components since we assume that the target is a singular
         connected component.

diff --git a/fuzzable/analysis/binja.py b/fuzzable/analysis/binja.py
@@ -24,8 +24,7 @@
 
 from .. import generate
 from . import AnalysisBackend, AnalysisMode, Fuzzability, DEFAULT_SCORE_WEIGHTS
-from ..metrics import CallScore
-from ..cli import COLUMNS, CSV_HEADER
+from ..metrics import CallScore, METRICS
 
 
 class _BinjaAnalysisMeta(type(AnalysisBackend), type(BackgroundTaskThread)):
@@ -87,8 +86,10 @@ def run(self) -> t.Optional[Fuzzability]:
 
         # if headless, handle displaying results back
         if not self.headless:
-            csv_result = CSV_HEADER
-            csv_result = ", ".join([f'"{column}"' for column in COLUMNS])
+            csv_result = ",".join([metric.identifier for metric in METRICS])
+
+            columns = [metric.friendly_name for metric in METRICS]
+            csv_result = ", ".join([f'"{column}"' for column in columns])
 
             # TODO: reuse rich for markdown
             markdown_result = f"""# Fuzzable Targets

diff --git a/fuzzable/cli.py b/fuzzable/cli.py
@@ -12,7 +12,8 @@
 from rich.console import Console
 from rich.table import Table
 
-from .analysis import Fuzzability
+from .analysis import Fuzzability, CallScore
+from .metrics import METRICS
 from .log import log
 
 from pathlib import Path
@@ -23,20 +24,6 @@
     bg=typer.colors.RED,
 )
 
-COLUMNS = [
-    "Function Signature",
-    "Location",
-    "Fuzzability Score",
-    "Fuzz-Friendly Name",
-    "Risky Data Sinks",
-    "Natural Loops",
-    "Cyclomatic Complexity",
-    "Coverage Depth",
-]
-
-# TODO: merge with the one above
-CSV_HEADER = '"name", "loc, "fuzz_friendly", "risky_sinks", "natural_loops", "cyc_complex", "cov_depth", "fuzzability"\n'
-
 
 def error(string: str) -> None:
     """Pretty-prints an error message and exits"""
@@ -56,7 +43,7 @@ def print_table(
 ) -> None:
     """Pretty-prints fuzzability results for the CLI"""
     table = Table(title=f"\nFuzzable Report for Target `{target}`")
-    for column in COLUMNS:
+    for column in [metric.friendly_name for metric in METRICS]:
         table.add_column(column, style="magenta")
 
     for row in fuzzability:
@@ -86,13 +73,15 @@ def print_table(
         rprint("\n")
 
 
-def export_results(export, results) -> None:
+def export_results(export: Path, results: t.List[CallScore]) -> None:
+    """Given a file format and generated results, write to path."""
     writer = open(export, "w")
     ext = export.suffix
     if ext == ".json":
         writer.write(json.dumps([res.asdict() for res in results]))
     elif ext == ".csv":
-        writer.write(CSV_HEADER.replace('"', ""))
+        csv_header = ",".join([metric.identifier for metric in METRICS])
+        writer.write(csv_header + "\n")
         for res in results:
             writer.write(res.csv_row)
     elif ext == ".md":

diff --git a/fuzzable/config.py b/fuzzable/config.py
@@ -6,7 +6,6 @@
 """
 import typing as t
 
-from os.path import dirname, abspath
 from pathlib import Path
 
 

diff --git a/fuzzable/metrics.py b/fuzzable/metrics.py
@@ -3,13 +3,36 @@
 
     Dataclass definitions for various metrics collected during qthe risk analysis.
 """
-import json
 import functools
 import typing as t
 
 from dataclasses import dataclass, field, asdict
 
 
+@dataclass
+class MetricSchema:
+    # shorthand name
+    identifier: str
+
+    # how is displayed in the CLI/disassembly frontend
+    friendly_name: str
+
+
+# Stores all the static analysis metrics that fuzzable currently supports.
+# This list should be expanded if additional metrics are to be introduced,
+# alongside a new base method in the AnalysisBackend
+METRICS: t.List[MetricSchema] = [
+    MetricSchema(identifier="name", friendly_name="Function Signature"),
+    MetricSchema(identifier="loc", friendly_name="Location"),
+    MetricSchema(identifier="fuzzability", friendly_name="Fuzzability Score"),
+    MetricSchema(identifier="fuzz_friendly", friendly_name="Fuzz-Friendly Name"),
+    MetricSchema(identifier="risky_sinks", friendly_name="Risky Data Sinks"),
+    MetricSchema(identifier="natural_loops", friendly_name="Natural Loops"),
+    MetricSchema(identifier="cyc_complex", friendly_name="Cyclomatic Complexity"),
+    MetricSchema(identifier="cov_depth", friendly_name="Coverage Depth"),
+]
+
+
 @dataclass
 class CoverageReport:
     """TODO"""

diff --git a/requirements.txt b/requirements.txt
@@ -218,6 +218,7 @@ pygments==2.12.0; python_full_version >= "3.6.3" and python_full_version < "4.0.
 pyparsing==3.0.9; python_full_version >= "3.6.8" and python_version >= "3.7" \
     --hash=sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc \
     --hash=sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb
+pypcode==1.0.7; python_version >= "3.6"
 pyquery==1.4.3
 pysmt==0.9.6.dev21; python_version >= "3.8"
 python-dateutil==2.8.2; python_version >= "3.8" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version >= "3.8"

diff --git a/tests/test_main.py b/tests/test_main.py
@@ -0,0 +1,39 @@
+"""
+test_main.py
+
+    Tests main functionality, including
+"""
+
+import unittest
+
+from pathlib import Path
+
+from fuzzable.analysis import AnalysisMode
+from fuzzable.analysis.angr import AngrAnalysis
+from fuzzable.analysis.ast import AstAnalysis
+
+
+class TestMain(unittest.TestCase):
+    def test_basic(self):
+        data = [1, 2, 3]
+        result = sum(data)
+        self.assertEqual(result, 6)
+
+    def test_analysis_binary(self):
+        target = Path("examples/binaries/libbasic.so.1")
+        analyzer = AngrAnalysis(target, mode=AnalysisMode.RANK)
+        analyzer.run()
+
+    def test_analysis_source_file(self):
+        target = Path("examples/source/libbasic.c")
+        analyzer = AstAnalysis([target], mode=AnalysisMode.RANK)
+        analyzer.run()
+
+    def test_analysis_source_folder(self):
+        target = Path("examples/source/libyaml")
+        analyzer = AstAnalysis(target, mode=AnalysisMode.RANK)
+        analyzer.run()
+
+
+if __name__ == "__main__":
+    unittest.main()
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,7 +6,6 @@ @@
     """
     import typing as t
-    from os.path import dirname, abspath
     from pathlib import Path
@@ Expand Down @@