Skip to content

Commit

Permalink
[x2cpg] Performance improvements in Overlay Passes (#4227)
Browse files Browse the repository at this point in the history
* Changes to parallelize linkToSingle method processing

Changes to parallelize linkToSingle method processing

* Converted StaticCallLinker to parallel pass

* Some testing changes

* Improvments in StaticCallLinker and LinkToMultiple methods

1. Made changes in `StaticCallLinker to divide the list of chunks
further into smaller batches so that the in memory data generated in one
batch becomes smaller and in turn when it gets absorbed the given memory
will be garbage collected to free up the memory.
2. Introduced parallism inside `linkToMultiple` method handling/

* Few debug logs along with failing unit tests

1. Added few debug logs while linking `CALL` nodes to `METHOD` nodes if
the number of METHOD nodes found is greater than 5.
2. There were wrong `FieldAccess` nodes being created, which is
resulting into large memory being consumed in `StaticCallLinker` pass.
Added respective failing unit tests in ignored state. These unit tests
needs to be fixed.

* Updated unit tests with working one and the failing one

* Review comment fixes

* Review comment fixes

* Fix for fieldAccess operator call node

In case of chained field access like `a.b.c` for `.c`, it was creating a
`CALL` node with methodFullName set to `<operator>.fieldAccess` and name
set to "c". Hence there were many `METHOD` sutbs being created for each
of such `CALL` nodes with fullName set to `<operator>.fieldAccess`.
Which is resulting into many edges being created mapping each
fieldAccess `CALL` nodes to each of these `METHOD` stubs inside
`StaticCallLinker` Pass. This change fixes the issue.

* Changed TypeUsagePass and StaticCallLinker

1. Converted TypeUsagePass into two passes with ForkJoinParallelPass
2. Converted StaticCallLinker pass to extend from ForkJoinParallelPass

* removed unwanted code

* changes as per review comment

* Review comment fixes

* review comment fixes

* review comment fixes
  • Loading branch information
pandurangpatil authored Mar 1, 2024
1 parent 310f209 commit a02559d
Show file tree
Hide file tree
Showing 9 changed files with 131 additions and 122 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ object Base {
new MethodDecoratorPass(cpg),
new AstLinkerPass(cpg),
new ContainsEdgePass(cpg),
new TypeUsagePass(cpg)
new TypeRefPass(cpg),
new TypeEvalPass(cpg)
)

}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
package io.joern.x2cpg.passes.base

import io.joern.x2cpg.utils.LinkingUtil
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.codepropertygraph.generated.nodes.{NewFile, StoredNode}
import io.shiftleft.codepropertygraph.generated.{EdgeTypes, NodeTypes, PropertyNames}
import io.shiftleft.passes.CpgPass
import io.shiftleft.semanticcpg.language._
import io.shiftleft.semanticcpg.language.*
import io.shiftleft.semanticcpg.language.types.structure.FileTraversal
import io.joern.x2cpg.utils.LinkingUtil

import scala.collection.mutable

/** For all nodes with FILENAME fields, create corresponding FILE nodes and connect node with FILE node via outgoing
* SOURCE_FILE edges.
*/
class FileCreationPass(cpg: Cpg) extends CpgPass(cpg) with LinkingUtil {
private val srcLabels = List(NodeTypes.NAMESPACE_BLOCK, NodeTypes.TYPE_DECL, NodeTypes.METHOD, NodeTypes.COMMENT)

override def run(dstGraph: DiffGraphBuilder): Unit = {
val originalFileNameToNode = mutable.Map.empty[String, StoredNode]
Expand Down Expand Up @@ -41,7 +42,8 @@ class FileCreationPass(cpg: Cpg) extends CpgPass(cpg) with LinkingUtil {
// Create SOURCE_FILE edges from nodes of various types to FILE
linkToSingle(
cpg,
srcLabels = List(NodeTypes.NAMESPACE_BLOCK, NodeTypes.TYPE_DECL, NodeTypes.METHOD, NodeTypes.COMMENT),
srcNodes = cpg.graph.nodes(srcLabels: _*).toList,
srcLabels = srcLabels,
dstNodeLabel = NodeTypes.FILE,
edgeType = EdgeTypes.SOURCE_FILE,
dstNodeMap = { x =>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package io.joern.x2cpg.passes.base

import io.joern.x2cpg.utils.LinkingUtil
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.codepropertygraph.generated.{EdgeTypes, NodeTypes, PropertyNames}
import io.shiftleft.passes.ForkJoinParallelCpgPass
import overflowdb.Node
import overflowdb.traversal.*

class TypeEvalPass(cpg: Cpg) extends ForkJoinParallelCpgPass[List[Node]](cpg) with LinkingUtil {
val srcLabels = List(
NodeTypes.METHOD_PARAMETER_IN,
NodeTypes.METHOD_PARAMETER_OUT,
NodeTypes.METHOD_RETURN,
NodeTypes.MEMBER,
NodeTypes.LITERAL,
NodeTypes.CALL,
NodeTypes.LOCAL,
NodeTypes.IDENTIFIER,
NodeTypes.BLOCK,
NodeTypes.METHOD_REF,
NodeTypes.TYPE_REF,
NodeTypes.UNKNOWN
)

def generateParts(): Array[List[Node]] = {
val nodes = cpg.graph.nodes(srcLabels: _*).toList
nodes.grouped(getBatchSize(nodes.size)).toArray
}
def runOnPart(builder: DiffGraphBuilder, part: List[overflowdb.Node]): Unit = {
linkToSingle(
cpg = cpg,
srcNodes = part,
srcLabels = srcLabels,
dstNodeLabel = NodeTypes.TYPE,
edgeType = EdgeTypes.EVAL_TYPE,
dstNodeMap = typeFullNameToNode(cpg, _),
dstFullNameKey = PropertyNames.TYPE_FULL_NAME,
dstGraph = builder,
None
)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package io.joern.x2cpg.passes.base

import io.joern.x2cpg.utils.LinkingUtil
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.codepropertygraph.generated.{EdgeTypes, NodeTypes, PropertyNames}
import io.shiftleft.passes.ForkJoinParallelCpgPass
import overflowdb.Node
import overflowdb.traversal.*

class TypeRefPass(cpg: Cpg) extends ForkJoinParallelCpgPass[List[Node]](cpg) with LinkingUtil {
val srcLabels = List(NodeTypes.TYPE)

def generateParts(): Array[List[Node]] = {
val nodes = cpg.graph.nodes(srcLabels: _*).toList
nodes.grouped(getBatchSize(nodes.size)).toArray
}
def runOnPart(builder: DiffGraphBuilder, part: List[overflowdb.Node]): Unit = {
linkToSingle(
cpg = cpg,
srcNodes = part,
srcLabels = srcLabels,
dstNodeLabel = NodeTypes.TYPE_DECL,
edgeType = EdgeTypes.REF,
dstNodeMap = typeDeclFullNameToNode(cpg, _),
dstFullNameKey = PropertyNames.TYPE_DECL_FULL_NAME,
dstGraph = builder,
None
)
}
}

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,19 +1,23 @@
package io.joern.x2cpg.passes.callgraph

import io.joern.x2cpg.utils.LinkingUtil
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.codepropertygraph.generated._
import io.shiftleft.codepropertygraph.generated.*
import io.shiftleft.passes.CpgPass
import io.joern.x2cpg.utils.LinkingUtil
import overflowdb.traversal.*

/** This pass has MethodStubCreator and TypeDeclStubCreator as prerequisite for language frontends which do not provide
* method stubs and type decl stubs.
*/
class MethodRefLinker(cpg: Cpg) extends CpgPass(cpg) with LinkingUtil {

private val srcLabels = List(NodeTypes.METHOD_REF)

override def run(dstGraph: DiffGraphBuilder): Unit = {
// Create REF edges from METHOD_REFs to METHOD
linkToSingle(
cpg,
srcNodes = cpg.graph.nodes(srcLabels: _*).toList,
srcLabels = List(NodeTypes.METHOD_REF),
dstNodeLabel = NodeTypes.METHOD,
edgeType = EdgeTypes.REF,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,64 +1,40 @@
package io.joern.x2cpg.passes.callgraph

import io.joern.x2cpg.utils.LinkingUtil
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.codepropertygraph.generated.nodes._
import io.shiftleft.codepropertygraph.generated.nodes.*
import io.shiftleft.codepropertygraph.generated.{DispatchTypes, EdgeTypes}
import io.shiftleft.passes.CpgPass
import io.shiftleft.semanticcpg.language._
import io.shiftleft.passes.ForkJoinParallelCpgPass
import io.shiftleft.semanticcpg.language.*
import org.slf4j.{Logger, LoggerFactory}

import scala.collection.mutable
class StaticCallLinker(cpg: Cpg) extends ForkJoinParallelCpgPass[Seq[Call]](cpg) with LinkingUtil {

class StaticCallLinker(cpg: Cpg) extends CpgPass(cpg) {

import StaticCallLinker._
private val methodFullNameToNode = mutable.Map.empty[String, List[Method]]

override def run(dstGraph: DiffGraphBuilder): Unit = {
private val logger: Logger = LoggerFactory.getLogger(classOf[StaticCallLinker])

cpg.method.foreach { method =>
methodFullNameToNode.updateWith(method.fullName) {
case Some(l) => Some(method :: l)
case None => Some(List(method))
}
}
override def generateParts(): Array[Seq[Call]] = {
val calls = cpg.call.l
calls.grouped(getBatchSize(calls.size)).toArray
}

cpg.call.foreach { call =>
override def runOnPart(builder: DiffGraphBuilder, calls: Seq[Call]): Unit = {
calls.foreach { call =>
try {
linkCall(call, dstGraph)
call.dispatchType match {
case DispatchTypes.STATIC_DISPATCH | DispatchTypes.INLINED =>
val resolvedMethods = cpg.method.fullNameExact(call.methodFullName).l
resolvedMethods.foreach(dst => builder.addEdge(call, dst, EdgeTypes.CALL))
val size = resolvedMethods.size
// Add the debug logs with number of METHOD nodes found for given method full name
if size > 1 then logger.debug(s"Total $size METHOD nodes found for -> ${call.methodFullName}")
case DispatchTypes.DYNAMIC_DISPATCH =>
// Do nothing
case _ => logger.warn(s"Unknown dispatch type on dynamic CALL ${call.code}")
}
} catch {
case exception: Exception =>
throw new RuntimeException(exception)
logger.error(s"Exception in StaticCallLinker: ", exception)
}
}
}

private def linkCall(call: Call, dstGraph: DiffGraphBuilder): Unit = {
call.dispatchType match {
case DispatchTypes.STATIC_DISPATCH | DispatchTypes.INLINED =>
linkStaticCall(call, dstGraph)
case DispatchTypes.DYNAMIC_DISPATCH =>
// Do nothing
case _ => logger.warn(s"Unknown dispatch type on dynamic CALL ${call.code}")
}
}

private def linkStaticCall(call: Call, dstGraph: DiffGraphBuilder): Unit = {
val resolvedMethodOption = methodFullNameToNode.get(call.methodFullName)
if (resolvedMethodOption.isDefined) {
resolvedMethodOption.get.foreach { dst =>
dstGraph.addEdge(call, dst, EdgeTypes.CALL)
}
} else {
logger.info(
s"Unable to link static CALL with METHOD_FULL_NAME ${call.methodFullName}, NAME ${call.name}, " +
s"SIGNATURE ${call.signature}, CODE ${call.code}"
)
}
}

}

object StaticCallLinker {
private val logger: Logger = LoggerFactory.getLogger(classOf[StaticCallLinker])
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import scala.util.Try
*/
object ConcurrentTaskUtil {

private val MAX_POOL_SIZE = Runtime.getRuntime.availableProcessors()
val MAX_POOL_SIZE = Runtime.getRuntime.availableProcessors()

/** Uses a thread pool with a limited number of active threads executing a task at any given point. This is effective
* when tasks may require large amounts of memory, or single tasks are too short lived.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,28 +1,21 @@
package io.joern.x2cpg.utils

import io.joern.x2cpg.passes.frontend.Dereference
import io.shiftleft.codepropertygraph.generated.PropertyNames
import io.shiftleft.codepropertygraph.generated.nodes.StoredNode
import io.shiftleft.codepropertygraph.generated.nodes.TypeDecl
import io.shiftleft.codepropertygraph.generated.Properties
import io.shiftleft.codepropertygraph.generated.nodes.Method
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.codepropertygraph.generated.nodes.NamespaceBlock
import io.shiftleft.codepropertygraph.generated.nodes.Type
import org.slf4j.Logger
import org.slf4j.LoggerFactory
import overflowdb.NodeDb
import overflowdb.PropertyKey
import overflowdb.NodeRef
import overflowdb.traversal._
import overflowdb.traversal.ChainedImplicitsTemp._
import io.shiftleft.codepropertygraph.generated.nodes.*
import io.shiftleft.codepropertygraph.generated.{Properties, PropertyNames}
import org.slf4j.{Logger, LoggerFactory}
import overflowdb.traversal.*
import overflowdb.traversal.ChainedImplicitsTemp.*
import overflowdb.{Node, NodeDb, NodeRef, PropertyKey}

import scala.collection.mutable
import scala.jdk.CollectionConverters._
import scala.jdk.CollectionConverters.*

trait LinkingUtil {

import overflowdb.BatchedUpdate.DiffGraphBuilder
private val MAX_BATCH_SIZE = 100

val logger: Logger = LoggerFactory.getLogger(classOf[LinkingUtil])

Expand All @@ -41,11 +34,20 @@ trait LinkingUtil {
def nodesWithFullName(cpg: Cpg, x: String): mutable.Seq[NodeRef[_ <: NodeDb]] =
cpg.graph.indexManager.lookup(PropertyNames.FULL_NAME, x).asScala

protected def getBatchSize(totalItems: Int): Int = {
val batchSize =
if totalItems > MAX_BATCH_SIZE then totalItems / ConcurrentTaskUtil.MAX_POOL_SIZE
else MAX_BATCH_SIZE
Math.min(batchSize, MAX_BATCH_SIZE)
}

/** For all nodes `n` with a label in `srcLabels`, determine the value of `n.\$dstFullNameKey`, use that to lookup the
* destination node in `dstNodeMap`, and create an edge of type `edgeType` between `n` and the destination node.
*/
def linkToSingle(

protected def linkToSingle(
cpg: Cpg,
srcNodes: List[Node],
srcLabels: List[String],
dstNodeLabel: String,
edgeType: String,
Expand All @@ -54,9 +56,9 @@ trait LinkingUtil {
dstGraph: DiffGraphBuilder,
dstNotExistsHandler: Option[(StoredNode, String) => Unit]
): Unit = {
var loggedDeprecationWarning = false
val dereference = Dereference(cpg)
cpg.graph.nodes(srcLabels: _*).foreach { srcNode =>
var loggedDeprecationWarning = false
srcNodes.foreach { srcNode =>
// If the source node does not have any outgoing edges of this type
// This check is just required for backward compatibility
if (srcNode.outE(edgeType).isEmpty) {
Expand Down Expand Up @@ -171,5 +173,4 @@ trait LinkingUtil {
s"dstNodeType=$dstNodeType, dstNodeId=$dstNodeId"
)
}

}

0 comments on commit a02559d

Please sign in to comment.