feat: include the purity analysis into the 'run_api` command (#241)

Closes #239 ### Summary of Changes The newly added purity analysis entry function: `get_purity_result` is now called when the `run_api` command is executed. It is the starting point of the purity analysis and runs it on the provided `src_path` string, representing the package to analyze. This includes further improvements and bug fixes to the purity analysis itself. --------- Co-authored-by: megalinter-bot <[email protected]>
Safe-DS · May 1, 2024 · 7b09e3b · 7b09e3b
1 parent 11efcdb
commit 7b09e3b
Show file tree

Hide file tree

Showing 16 changed files with 4,305 additions and 1,185 deletions.
diff --git a/src/library_analyzer/cli/_run_api.py b/src/library_analyzer/cli/_run_api.py
@@ -2,6 +2,7 @@
 
 from library_analyzer.processing.api import get_api
 from library_analyzer.processing.api.docstring_parsing import DocstringStyle
+from library_analyzer.processing.api.purity_analysis import get_purity_results
 from library_analyzer.processing.dependencies import get_dependencies
 
 
@@ -32,3 +33,7 @@ def _run_api_command(
     api_dependencies = get_dependencies(api)
     out_file_api_dependencies = out_dir_path.joinpath(f"{package}__api_dependencies.json")
     api_dependencies.to_json_file(out_file_api_dependencies)
+
+    api_purity = get_purity_results(src_dir_path)
+    out_file_api_purity = out_dir_path.joinpath(f"{package}__api_purity.json")
+    api_purity.to_json_file(out_file_api_purity)
diff --git a/src/library_analyzer/processing/api/purity_analysis/__init__.py b/src/library_analyzer/processing/api/purity_analysis/__init__.py
@@ -7,15 +7,11 @@
     get_module_data,
 )
 from ._infer_purity import (
+    get_purity_results,
     infer_purity,
 )
 from ._resolve_references import (
     resolve_references,
 )
 
-__all__ = [
-    "get_module_data",
-    "resolve_references",
-    "infer_purity",
-    "build_call_graph",
-]
+__all__ = ["get_module_data", "resolve_references", "infer_purity", "build_call_graph", "get_purity_results"]
diff --git a/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py b/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py
@@ -25,30 +25,31 @@ class CallGraphBuilder:
         Classnames in the module as key and their corresponding ClassScope instance as value.
     raw_reasons : dict[NodeID, Reasons]
         The raw reasons for impurity for all functions.
+        Keys are the ids of the functions.
     call_graph_forest : CallGraphForest
-        The call graph forest of the module.
+        The call graph forest for the given functions.
+    visited : set[NodeID]
+        A set of all visited nodes.
+
+    Parameters
+    ----------
+    classes : dict[str, ClassScope]
+        Classnames in the module as key and their corresponding ClassScope instance as value.
+    raw_reasons : dict[NodeID, Reasons]
+        The raw reasons for impurity for all functions.
+        Keys are the ids of the functions.
     """
 
-    # TODO: is this the right way to document instance attributes? LARS
     def __init__(
         self,
         classes: dict[str, ClassScope],
         raw_reasons: dict[NodeID, Reasons],
     ) -> None:
-        """Initialize the CallGraphBuilder.
-
-        Parameters
-        ----------
-        classes : dict[str, ClassScope]
-            Classnames in the module as key and their corresponding ClassScope instance as value.
-        raw_reasons : dict[str, Reasons]
-            The raw reasons for impurity for all functions.
-            Keys are the ids of the functions.
-        """
         self.classes = classes
         self.raw_reasons = raw_reasons
         self.call_graph_forest = CallGraphForest()
-        # TODO: does this belong into post init? LARS
+        self.visited: set[NodeID] = set()
+
         self._build_call_graph_forest()
 
     def _build_call_graph_forest(self) -> CallGraphForest:
@@ -94,15 +95,29 @@ def _prepare_classes(self) -> None:
         for klass in self.classes.values():
             # Create a new CallGraphNode for each class and add it to the forest.
             class_cgn = CallGraphNode(symbol=klass.symbol, reasons=Reasons(klass.symbol.id))
-            # If the class has an init function, add it to the class node as a child.
+            # If the class has a __new__, __init__ or __post_init__ function, add it to the class node as a child.
             # Also add the init function to the forest if it is not already there.
+            if klass.new_function:
+                new_cgn = CallGraphNode(
+                    symbol=klass.new_function.symbol,
+                    reasons=self.raw_reasons[klass.new_function.symbol.id],
+                )
+                self.call_graph_forest.add_graph(klass.new_function.symbol.id, new_cgn)
+                class_cgn.add_child(new_cgn)
             if klass.init_function:
                 init_cgn = CallGraphNode(
                     symbol=klass.init_function.symbol,
                     reasons=self.raw_reasons[klass.init_function.symbol.id],
                 )
                 self.call_graph_forest.add_graph(klass.init_function.symbol.id, init_cgn)
                 class_cgn.add_child(init_cgn)
+            if klass.post_init_function:
+                post_init_cgn = CallGraphNode(
+                    symbol=klass.post_init_function.symbol,
+                    reasons=self.raw_reasons[klass.post_init_function.symbol.id],
+                )
+                self.call_graph_forest.add_graph(klass.post_init_function.symbol.id, post_init_cgn)
+                class_cgn.add_child(post_init_cgn)
 
             # Add the class to the forest.
             self.call_graph_forest.add_graph(klass.symbol.id, class_cgn)
@@ -112,13 +127,20 @@ def _built_call_graph(self, reason: Reasons) -> None:
 
         Recursively builds the call graph for a function and adds it to the forest.
         The order in which the functions are handled does not matter,
-         since the functions will set the pointers to the children if needed.
+        since the functions will set the pointers to the children if needed.
 
         Parameters
         ----------
         reason : Reasons
             The raw reasons of the function.
         """
+        # If the node has already been visited, return
+        if reason.id in self.visited:
+            return
+
+        # Mark the current node as visited
+        self.visited.add(reason.id)
+
         # If the node is already inside the forest and does not have any calls left, it is considered to be finished.
         if self.call_graph_forest.has_graph(reason.id) and not reason.calls:
             return
@@ -131,7 +153,9 @@ def _built_call_graph(self, reason: Reasons) -> None:
         self.call_graph_forest.add_graph(reason.id, cgn)
 
         # The node has calls, which need to be added to the forest and to the children of the current node.
-        for call in cgn.reasons.calls.copy():
+        # They are sorted to ensure a deterministic order of the children (especially but not only for testing).
+        sorted_calls = sorted(cgn.reasons.calls, key=lambda x: x.id)
+        for call in sorted_calls:
             if call in self.call_graph_forest.get_graph(reason.id).reasons.calls:
                 self.call_graph_forest.get_graph(reason.id).reasons.calls.remove(call)
             if isinstance(call, Builtin):
@@ -174,7 +198,6 @@ def _handle_unknown_call(self, call: Symbol, reason_id: NodeID) -> None:
             imported_cgn = ImportedCallGraphNode(
                 symbol=call,
                 reasons=Reasons(id=call.id),
-                # is_imported=bool(isinstance(call.node, astroid.Import | astroid.ImportFrom))
             )
             self.call_graph_forest.add_graph(call.id, imported_cgn)
             self.call_graph_forest.get_graph(reason_id).add_child(self.call_graph_forest.get_graph(call.id))
@@ -192,7 +215,7 @@ def _handle_unknown_call(self, call: Symbol, reason_id: NodeID) -> None:
                     )
 
         # Deal with the case that the call calls a function parameter.
-        if isinstance(call, Parameter):
+        elif isinstance(call, Parameter):
             self.call_graph_forest.get_graph(reason_id).reasons.unknown_calls.add(call)
 
         else:
@@ -262,7 +285,6 @@ def _test_cgn_for_cycles(
 
         # If the current node is already in the path, a cycle is found.
         if cgn.symbol.id in path:
-            # TODO: how to handle nested cycles? LARS
             cut_path = path[path.index(cgn.symbol.id) :]
             return {node_id: self.call_graph_forest.get_graph(node_id) for node_id in cut_path}
 
@@ -287,10 +309,25 @@ def _test_cgn_for_cycles(
         return cycle
 
     def _contract_cycle(self, cycle: dict[NodeID, CallGraphNode]) -> None:
+        """Contract a cycle in the call graph.
+
+        Contracts a cycle in the call graph into a single node.
+        Therefore, creates a new CombinedCallGraphNode out of all nodes in the cycle and adds it to the forest.
+
+        Parameters
+        ----------
+        cycle : dict[NodeID, CallGraphNode]
+            A dict of all nodes in the cycle.
+            Keys are the NodeIDs of the CallGraphNodes.
+        """
         # Create the new combined node.
         combined_name = "+".join(sorted(c.__str__() for c in cycle))
-        # module = cycle[next(iter(cycle))].symbol.node.root()
-        combined_id = NodeID(None, combined_name)
+        module = (
+            next(iter(cycle.values())).symbol.node.root().name
+            if (next(iter(cycle.values())).symbol.node and next(iter(cycle.values())).symbol.node.root().name != "")
+            else None
+        )
+        combined_id = NodeID(module, combined_name)
         combined_reasons = Reasons(id=combined_id).join_reasons_list([node.reasons for node in cycle.values()])
         combined_cgn = CombinedCallGraphNode(
             symbol=CombinedSymbol(
@@ -299,19 +336,25 @@ def _contract_cycle(self, cycle: dict[NodeID, CallGraphNode]) -> None:
                 name=combined_name,
             ),
             reasons=combined_reasons,
-            combines=cycle,
         )
+        combines: dict[NodeID, CallGraphNode] = {}
         # Check if the combined node is already in the forest.
         if self.call_graph_forest.has_graph(combined_cgn.symbol.id):
             return
 
         # Find all other calls (calls that are not part of the cycle) and remove all nodes in the cycle from the forest.
-        for node in cycle.values():  # TODO: call _test_cgn_for_cycles recursively
+        for node in cycle.values():
             for child in node.children.values():
                 if child.symbol.id not in cycle and not combined_cgn.has_child(child.symbol.id):
                     combined_cgn.add_child(child)
             self.call_graph_forest.delete_graph(node.symbol.id)
 
+            if isinstance(node, CombinedCallGraphNode):
+                combines.update(node.combines)
+            else:
+                combines[node.symbol.id] = node
+        combined_cgn.combines = combines
+
         # Add the combined node to the forest.
         self.call_graph_forest.add_graph(combined_id, combined_cgn)