Skip to content
This repository has been archived by the owner on Nov 23, 2024. It is now read-only.

Commit

Permalink
feat: include the purity analysis into the 'run_api` command (#241)
Browse files Browse the repository at this point in the history
Closes #239 

### Summary of Changes

The newly added purity analysis entry function: `get_purity_result` is
now called when the `run_api` command is executed.
It is the starting point of the purity analysis and runs it on the
provided `src_path` string, representing the package to analyze.

This includes further improvements and bug fixes to the purity analysis
itself.

---------

Co-authored-by: megalinter-bot <[email protected]>
  • Loading branch information
lukarade and megalinter-bot authored May 1, 2024
1 parent 11efcdb commit 7b09e3b
Show file tree
Hide file tree
Showing 16 changed files with 4,305 additions and 1,185 deletions.
5 changes: 5 additions & 0 deletions src/library_analyzer/cli/_run_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from library_analyzer.processing.api import get_api
from library_analyzer.processing.api.docstring_parsing import DocstringStyle
from library_analyzer.processing.api.purity_analysis import get_purity_results
from library_analyzer.processing.dependencies import get_dependencies


Expand Down Expand Up @@ -32,3 +33,7 @@ def _run_api_command(
api_dependencies = get_dependencies(api)
out_file_api_dependencies = out_dir_path.joinpath(f"{package}__api_dependencies.json")
api_dependencies.to_json_file(out_file_api_dependencies)

api_purity = get_purity_results(src_dir_path)
out_file_api_purity = out_dir_path.joinpath(f"{package}__api_purity.json")
api_purity.to_json_file(out_file_api_purity)
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,11 @@
get_module_data,
)
from ._infer_purity import (
get_purity_results,
infer_purity,
)
from ._resolve_references import (
resolve_references,
)

__all__ = [
"get_module_data",
"resolve_references",
"infer_purity",
"build_call_graph",
]
__all__ = ["get_module_data", "resolve_references", "infer_purity", "build_call_graph", "get_purity_results"]
Original file line number Diff line number Diff line change
Expand Up @@ -25,30 +25,31 @@ class CallGraphBuilder:
Classnames in the module as key and their corresponding ClassScope instance as value.
raw_reasons : dict[NodeID, Reasons]
The raw reasons for impurity for all functions.
Keys are the ids of the functions.
call_graph_forest : CallGraphForest
The call graph forest of the module.
The call graph forest for the given functions.
visited : set[NodeID]
A set of all visited nodes.
Parameters
----------
classes : dict[str, ClassScope]
Classnames in the module as key and their corresponding ClassScope instance as value.
raw_reasons : dict[NodeID, Reasons]
The raw reasons for impurity for all functions.
Keys are the ids of the functions.
"""

# TODO: is this the right way to document instance attributes? LARS
def __init__(
self,
classes: dict[str, ClassScope],
raw_reasons: dict[NodeID, Reasons],
) -> None:
"""Initialize the CallGraphBuilder.
Parameters
----------
classes : dict[str, ClassScope]
Classnames in the module as key and their corresponding ClassScope instance as value.
raw_reasons : dict[str, Reasons]
The raw reasons for impurity for all functions.
Keys are the ids of the functions.
"""
self.classes = classes
self.raw_reasons = raw_reasons
self.call_graph_forest = CallGraphForest()
# TODO: does this belong into post init? LARS
self.visited: set[NodeID] = set()

self._build_call_graph_forest()

def _build_call_graph_forest(self) -> CallGraphForest:
Expand Down Expand Up @@ -94,15 +95,29 @@ def _prepare_classes(self) -> None:
for klass in self.classes.values():
# Create a new CallGraphNode for each class and add it to the forest.
class_cgn = CallGraphNode(symbol=klass.symbol, reasons=Reasons(klass.symbol.id))
# If the class has an init function, add it to the class node as a child.
# If the class has a __new__, __init__ or __post_init__ function, add it to the class node as a child.
# Also add the init function to the forest if it is not already there.
if klass.new_function:
new_cgn = CallGraphNode(
symbol=klass.new_function.symbol,
reasons=self.raw_reasons[klass.new_function.symbol.id],
)
self.call_graph_forest.add_graph(klass.new_function.symbol.id, new_cgn)
class_cgn.add_child(new_cgn)
if klass.init_function:
init_cgn = CallGraphNode(
symbol=klass.init_function.symbol,
reasons=self.raw_reasons[klass.init_function.symbol.id],
)
self.call_graph_forest.add_graph(klass.init_function.symbol.id, init_cgn)
class_cgn.add_child(init_cgn)
if klass.post_init_function:
post_init_cgn = CallGraphNode(
symbol=klass.post_init_function.symbol,
reasons=self.raw_reasons[klass.post_init_function.symbol.id],
)
self.call_graph_forest.add_graph(klass.post_init_function.symbol.id, post_init_cgn)
class_cgn.add_child(post_init_cgn)

# Add the class to the forest.
self.call_graph_forest.add_graph(klass.symbol.id, class_cgn)
Expand All @@ -112,13 +127,20 @@ def _built_call_graph(self, reason: Reasons) -> None:
Recursively builds the call graph for a function and adds it to the forest.
The order in which the functions are handled does not matter,
since the functions will set the pointers to the children if needed.
since the functions will set the pointers to the children if needed.
Parameters
----------
reason : Reasons
The raw reasons of the function.
"""
# If the node has already been visited, return
if reason.id in self.visited:
return

# Mark the current node as visited
self.visited.add(reason.id)

# If the node is already inside the forest and does not have any calls left, it is considered to be finished.
if self.call_graph_forest.has_graph(reason.id) and not reason.calls:
return
Expand All @@ -131,7 +153,9 @@ def _built_call_graph(self, reason: Reasons) -> None:
self.call_graph_forest.add_graph(reason.id, cgn)

# The node has calls, which need to be added to the forest and to the children of the current node.
for call in cgn.reasons.calls.copy():
# They are sorted to ensure a deterministic order of the children (especially but not only for testing).
sorted_calls = sorted(cgn.reasons.calls, key=lambda x: x.id)
for call in sorted_calls:
if call in self.call_graph_forest.get_graph(reason.id).reasons.calls:
self.call_graph_forest.get_graph(reason.id).reasons.calls.remove(call)
if isinstance(call, Builtin):
Expand Down Expand Up @@ -174,7 +198,6 @@ def _handle_unknown_call(self, call: Symbol, reason_id: NodeID) -> None:
imported_cgn = ImportedCallGraphNode(
symbol=call,
reasons=Reasons(id=call.id),
# is_imported=bool(isinstance(call.node, astroid.Import | astroid.ImportFrom))
)
self.call_graph_forest.add_graph(call.id, imported_cgn)
self.call_graph_forest.get_graph(reason_id).add_child(self.call_graph_forest.get_graph(call.id))
Expand All @@ -192,7 +215,7 @@ def _handle_unknown_call(self, call: Symbol, reason_id: NodeID) -> None:
)

# Deal with the case that the call calls a function parameter.
if isinstance(call, Parameter):
elif isinstance(call, Parameter):
self.call_graph_forest.get_graph(reason_id).reasons.unknown_calls.add(call)

else:
Expand Down Expand Up @@ -262,7 +285,6 @@ def _test_cgn_for_cycles(

# If the current node is already in the path, a cycle is found.
if cgn.symbol.id in path:
# TODO: how to handle nested cycles? LARS
cut_path = path[path.index(cgn.symbol.id) :]
return {node_id: self.call_graph_forest.get_graph(node_id) for node_id in cut_path}

Expand All @@ -287,10 +309,25 @@ def _test_cgn_for_cycles(
return cycle

def _contract_cycle(self, cycle: dict[NodeID, CallGraphNode]) -> None:
"""Contract a cycle in the call graph.
Contracts a cycle in the call graph into a single node.
Therefore, creates a new CombinedCallGraphNode out of all nodes in the cycle and adds it to the forest.
Parameters
----------
cycle : dict[NodeID, CallGraphNode]
A dict of all nodes in the cycle.
Keys are the NodeIDs of the CallGraphNodes.
"""
# Create the new combined node.
combined_name = "+".join(sorted(c.__str__() for c in cycle))
# module = cycle[next(iter(cycle))].symbol.node.root()
combined_id = NodeID(None, combined_name)
module = (
next(iter(cycle.values())).symbol.node.root().name
if (next(iter(cycle.values())).symbol.node and next(iter(cycle.values())).symbol.node.root().name != "")
else None
)
combined_id = NodeID(module, combined_name)
combined_reasons = Reasons(id=combined_id).join_reasons_list([node.reasons for node in cycle.values()])
combined_cgn = CombinedCallGraphNode(
symbol=CombinedSymbol(
Expand All @@ -299,19 +336,25 @@ def _contract_cycle(self, cycle: dict[NodeID, CallGraphNode]) -> None:
name=combined_name,
),
reasons=combined_reasons,
combines=cycle,
)
combines: dict[NodeID, CallGraphNode] = {}
# Check if the combined node is already in the forest.
if self.call_graph_forest.has_graph(combined_cgn.symbol.id):
return

# Find all other calls (calls that are not part of the cycle) and remove all nodes in the cycle from the forest.
for node in cycle.values(): # TODO: call _test_cgn_for_cycles recursively
for node in cycle.values():
for child in node.children.values():
if child.symbol.id not in cycle and not combined_cgn.has_child(child.symbol.id):
combined_cgn.add_child(child)
self.call_graph_forest.delete_graph(node.symbol.id)

if isinstance(node, CombinedCallGraphNode):
combines.update(node.combines)
else:
combines[node.symbol.id] = node
combined_cgn.combines = combines

# Add the combined node to the forest.
self.call_graph_forest.add_graph(combined_id, combined_cgn)

Expand Down
Loading

0 comments on commit 7b09e3b

Please sign in to comment.