diff --git a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/layers/Base.scala b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/layers/Base.scala index 6a7deb418413..57e017fb0c77 100644 --- a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/layers/Base.scala +++ b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/layers/Base.scala @@ -20,7 +20,8 @@ object Base { new MethodDecoratorPass(cpg), new AstLinkerPass(cpg), new ContainsEdgePass(cpg), - new TypeUsagePass(cpg) + new TypeRefPass(cpg), + new TypeEvalPass(cpg) ) } diff --git a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/passes/base/FileCreationPass.scala b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/passes/base/FileCreationPass.scala index bc340b45fd45..32e815c20bab 100644 --- a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/passes/base/FileCreationPass.scala +++ b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/passes/base/FileCreationPass.scala @@ -1,12 +1,12 @@ package io.joern.x2cpg.passes.base +import io.joern.x2cpg.utils.LinkingUtil import io.shiftleft.codepropertygraph.Cpg import io.shiftleft.codepropertygraph.generated.nodes.{NewFile, StoredNode} import io.shiftleft.codepropertygraph.generated.{EdgeTypes, NodeTypes, PropertyNames} import io.shiftleft.passes.CpgPass -import io.shiftleft.semanticcpg.language._ +import io.shiftleft.semanticcpg.language.* import io.shiftleft.semanticcpg.language.types.structure.FileTraversal -import io.joern.x2cpg.utils.LinkingUtil import scala.collection.mutable @@ -14,6 +14,7 @@ import scala.collection.mutable * SOURCE_FILE edges. */ class FileCreationPass(cpg: Cpg) extends CpgPass(cpg) with LinkingUtil { + private val srcLabels = List(NodeTypes.NAMESPACE_BLOCK, NodeTypes.TYPE_DECL, NodeTypes.METHOD, NodeTypes.COMMENT) override def run(dstGraph: DiffGraphBuilder): Unit = { val originalFileNameToNode = mutable.Map.empty[String, StoredNode] @@ -41,7 +42,8 @@ class FileCreationPass(cpg: Cpg) extends CpgPass(cpg) with LinkingUtil { // Create SOURCE_FILE edges from nodes of various types to FILE linkToSingle( cpg, - srcLabels = List(NodeTypes.NAMESPACE_BLOCK, NodeTypes.TYPE_DECL, NodeTypes.METHOD, NodeTypes.COMMENT), + srcNodes = cpg.graph.nodes(srcLabels: _*).toList, + srcLabels = srcLabels, dstNodeLabel = NodeTypes.FILE, edgeType = EdgeTypes.SOURCE_FILE, dstNodeMap = { x => diff --git a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/passes/base/TypeEvalPass.scala b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/passes/base/TypeEvalPass.scala new file mode 100644 index 000000000000..5f63163ec71b --- /dev/null +++ b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/passes/base/TypeEvalPass.scala @@ -0,0 +1,43 @@ +package io.joern.x2cpg.passes.base + +import io.joern.x2cpg.utils.LinkingUtil +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.codepropertygraph.generated.{EdgeTypes, NodeTypes, PropertyNames} +import io.shiftleft.passes.ForkJoinParallelCpgPass +import overflowdb.Node +import overflowdb.traversal.* + +class TypeEvalPass(cpg: Cpg) extends ForkJoinParallelCpgPass[List[Node]](cpg) with LinkingUtil { + val srcLabels = List( + NodeTypes.METHOD_PARAMETER_IN, + NodeTypes.METHOD_PARAMETER_OUT, + NodeTypes.METHOD_RETURN, + NodeTypes.MEMBER, + NodeTypes.LITERAL, + NodeTypes.CALL, + NodeTypes.LOCAL, + NodeTypes.IDENTIFIER, + NodeTypes.BLOCK, + NodeTypes.METHOD_REF, + NodeTypes.TYPE_REF, + NodeTypes.UNKNOWN + ) + + def generateParts(): Array[List[Node]] = { + val nodes = cpg.graph.nodes(srcLabels: _*).toList + nodes.grouped(getBatchSize(nodes.size)).toArray + } + def runOnPart(builder: DiffGraphBuilder, part: List[overflowdb.Node]): Unit = { + linkToSingle( + cpg = cpg, + srcNodes = part, + srcLabels = srcLabels, + dstNodeLabel = NodeTypes.TYPE, + edgeType = EdgeTypes.EVAL_TYPE, + dstNodeMap = typeFullNameToNode(cpg, _), + dstFullNameKey = PropertyNames.TYPE_FULL_NAME, + dstGraph = builder, + None + ) + } +} diff --git a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/passes/base/TypeRefPass.scala b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/passes/base/TypeRefPass.scala new file mode 100644 index 000000000000..3d371aa1b429 --- /dev/null +++ b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/passes/base/TypeRefPass.scala @@ -0,0 +1,30 @@ +package io.joern.x2cpg.passes.base + +import io.joern.x2cpg.utils.LinkingUtil +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.codepropertygraph.generated.{EdgeTypes, NodeTypes, PropertyNames} +import io.shiftleft.passes.ForkJoinParallelCpgPass +import overflowdb.Node +import overflowdb.traversal.* + +class TypeRefPass(cpg: Cpg) extends ForkJoinParallelCpgPass[List[Node]](cpg) with LinkingUtil { + val srcLabels = List(NodeTypes.TYPE) + + def generateParts(): Array[List[Node]] = { + val nodes = cpg.graph.nodes(srcLabels: _*).toList + nodes.grouped(getBatchSize(nodes.size)).toArray + } + def runOnPart(builder: DiffGraphBuilder, part: List[overflowdb.Node]): Unit = { + linkToSingle( + cpg = cpg, + srcNodes = part, + srcLabels = srcLabels, + dstNodeLabel = NodeTypes.TYPE_DECL, + edgeType = EdgeTypes.REF, + dstNodeMap = typeDeclFullNameToNode(cpg, _), + dstFullNameKey = PropertyNames.TYPE_DECL_FULL_NAME, + dstGraph = builder, + None + ) + } +} diff --git a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/passes/base/TypeUsagePass.scala b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/passes/base/TypeUsagePass.scala deleted file mode 100644 index 9ace35c71092..000000000000 --- a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/passes/base/TypeUsagePass.scala +++ /dev/null @@ -1,48 +0,0 @@ -package io.joern.x2cpg.passes.base - -import io.joern.x2cpg.utils.LinkingUtil -import io.shiftleft.codepropertygraph.Cpg -import io.shiftleft.codepropertygraph.generated.{EdgeTypes, NodeTypes, PropertyNames} -import io.shiftleft.passes.CpgPass - -class TypeUsagePass(cpg: Cpg) extends CpgPass(cpg) with LinkingUtil { - - override def run(dstGraph: DiffGraphBuilder): Unit = { - // Create REF edges from TYPE nodes to TYPE_DECL - linkToSingle( - cpg, - srcLabels = List(NodeTypes.TYPE), - dstNodeLabel = NodeTypes.TYPE_DECL, - edgeType = EdgeTypes.REF, - dstNodeMap = typeDeclFullNameToNode(cpg, _), - dstFullNameKey = PropertyNames.TYPE_DECL_FULL_NAME, - dstGraph, - None - ) - - // Create EVAL_TYPE edges from nodes of various types to TYPE - linkToSingle( - cpg, - srcLabels = List( - NodeTypes.METHOD_PARAMETER_IN, - NodeTypes.METHOD_PARAMETER_OUT, - NodeTypes.METHOD_RETURN, - NodeTypes.MEMBER, - NodeTypes.LITERAL, - NodeTypes.CALL, - NodeTypes.LOCAL, - NodeTypes.IDENTIFIER, - NodeTypes.BLOCK, - NodeTypes.METHOD_REF, - NodeTypes.TYPE_REF, - NodeTypes.UNKNOWN - ), - dstNodeLabel = NodeTypes.TYPE, - edgeType = EdgeTypes.EVAL_TYPE, - dstNodeMap = typeFullNameToNode(cpg, _), - dstFullNameKey = "TYPE_FULL_NAME", - dstGraph, - None - ) - } -} diff --git a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/passes/callgraph/MethodRefLinker.scala b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/passes/callgraph/MethodRefLinker.scala index 6d8432d7269e..686fe6bc85fa 100644 --- a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/passes/callgraph/MethodRefLinker.scala +++ b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/passes/callgraph/MethodRefLinker.scala @@ -1,19 +1,23 @@ package io.joern.x2cpg.passes.callgraph +import io.joern.x2cpg.utils.LinkingUtil import io.shiftleft.codepropertygraph.Cpg -import io.shiftleft.codepropertygraph.generated._ +import io.shiftleft.codepropertygraph.generated.* import io.shiftleft.passes.CpgPass -import io.joern.x2cpg.utils.LinkingUtil +import overflowdb.traversal.* /** This pass has MethodStubCreator and TypeDeclStubCreator as prerequisite for language frontends which do not provide * method stubs and type decl stubs. */ class MethodRefLinker(cpg: Cpg) extends CpgPass(cpg) with LinkingUtil { + private val srcLabels = List(NodeTypes.METHOD_REF) + override def run(dstGraph: DiffGraphBuilder): Unit = { // Create REF edges from METHOD_REFs to METHOD linkToSingle( cpg, + srcNodes = cpg.graph.nodes(srcLabels: _*).toList, srcLabels = List(NodeTypes.METHOD_REF), dstNodeLabel = NodeTypes.METHOD, edgeType = EdgeTypes.REF, diff --git a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/passes/callgraph/StaticCallLinker.scala b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/passes/callgraph/StaticCallLinker.scala index 0d06049764c1..7f5ab2387383 100644 --- a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/passes/callgraph/StaticCallLinker.scala +++ b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/passes/callgraph/StaticCallLinker.scala @@ -1,64 +1,40 @@ package io.joern.x2cpg.passes.callgraph +import io.joern.x2cpg.utils.LinkingUtil import io.shiftleft.codepropertygraph.Cpg -import io.shiftleft.codepropertygraph.generated.nodes._ +import io.shiftleft.codepropertygraph.generated.nodes.* import io.shiftleft.codepropertygraph.generated.{DispatchTypes, EdgeTypes} -import io.shiftleft.passes.CpgPass -import io.shiftleft.semanticcpg.language._ +import io.shiftleft.passes.ForkJoinParallelCpgPass +import io.shiftleft.semanticcpg.language.* import org.slf4j.{Logger, LoggerFactory} -import scala.collection.mutable +class StaticCallLinker(cpg: Cpg) extends ForkJoinParallelCpgPass[Seq[Call]](cpg) with LinkingUtil { -class StaticCallLinker(cpg: Cpg) extends CpgPass(cpg) { - - import StaticCallLinker._ - private val methodFullNameToNode = mutable.Map.empty[String, List[Method]] - - override def run(dstGraph: DiffGraphBuilder): Unit = { + private val logger: Logger = LoggerFactory.getLogger(classOf[StaticCallLinker]) - cpg.method.foreach { method => - methodFullNameToNode.updateWith(method.fullName) { - case Some(l) => Some(method :: l) - case None => Some(List(method)) - } - } + override def generateParts(): Array[Seq[Call]] = { + val calls = cpg.call.l + calls.grouped(getBatchSize(calls.size)).toArray + } - cpg.call.foreach { call => + override def runOnPart(builder: DiffGraphBuilder, calls: Seq[Call]): Unit = { + calls.foreach { call => try { - linkCall(call, dstGraph) + call.dispatchType match { + case DispatchTypes.STATIC_DISPATCH | DispatchTypes.INLINED => + val resolvedMethods = cpg.method.fullNameExact(call.methodFullName).l + resolvedMethods.foreach(dst => builder.addEdge(call, dst, EdgeTypes.CALL)) + val size = resolvedMethods.size + // Add the debug logs with number of METHOD nodes found for given method full name + if size > 1 then logger.debug(s"Total $size METHOD nodes found for -> ${call.methodFullName}") + case DispatchTypes.DYNAMIC_DISPATCH => + // Do nothing + case _ => logger.warn(s"Unknown dispatch type on dynamic CALL ${call.code}") + } } catch { case exception: Exception => - throw new RuntimeException(exception) + logger.error(s"Exception in StaticCallLinker: ", exception) } } } - - private def linkCall(call: Call, dstGraph: DiffGraphBuilder): Unit = { - call.dispatchType match { - case DispatchTypes.STATIC_DISPATCH | DispatchTypes.INLINED => - linkStaticCall(call, dstGraph) - case DispatchTypes.DYNAMIC_DISPATCH => - // Do nothing - case _ => logger.warn(s"Unknown dispatch type on dynamic CALL ${call.code}") - } - } - - private def linkStaticCall(call: Call, dstGraph: DiffGraphBuilder): Unit = { - val resolvedMethodOption = methodFullNameToNode.get(call.methodFullName) - if (resolvedMethodOption.isDefined) { - resolvedMethodOption.get.foreach { dst => - dstGraph.addEdge(call, dst, EdgeTypes.CALL) - } - } else { - logger.info( - s"Unable to link static CALL with METHOD_FULL_NAME ${call.methodFullName}, NAME ${call.name}, " + - s"SIGNATURE ${call.signature}, CODE ${call.code}" - ) - } - } - -} - -object StaticCallLinker { - private val logger: Logger = LoggerFactory.getLogger(classOf[StaticCallLinker]) } diff --git a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/utils/ConcurrentTaskUtil.scala b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/utils/ConcurrentTaskUtil.scala index 97303cd67d96..3116eed4b0e4 100644 --- a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/utils/ConcurrentTaskUtil.scala +++ b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/utils/ConcurrentTaskUtil.scala @@ -11,7 +11,7 @@ import scala.util.Try */ object ConcurrentTaskUtil { - private val MAX_POOL_SIZE = Runtime.getRuntime.availableProcessors() + val MAX_POOL_SIZE = Runtime.getRuntime.availableProcessors() /** Uses a thread pool with a limited number of active threads executing a task at any given point. This is effective * when tasks may require large amounts of memory, or single tasks are too short lived. diff --git a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/utils/LinkingUtil.scala b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/utils/LinkingUtil.scala index 004efa4d8dde..dd3d44985d72 100644 --- a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/utils/LinkingUtil.scala +++ b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/utils/LinkingUtil.scala @@ -1,28 +1,21 @@ package io.joern.x2cpg.utils import io.joern.x2cpg.passes.frontend.Dereference -import io.shiftleft.codepropertygraph.generated.PropertyNames -import io.shiftleft.codepropertygraph.generated.nodes.StoredNode -import io.shiftleft.codepropertygraph.generated.nodes.TypeDecl -import io.shiftleft.codepropertygraph.generated.Properties -import io.shiftleft.codepropertygraph.generated.nodes.Method import io.shiftleft.codepropertygraph.Cpg -import io.shiftleft.codepropertygraph.generated.nodes.NamespaceBlock -import io.shiftleft.codepropertygraph.generated.nodes.Type -import org.slf4j.Logger -import org.slf4j.LoggerFactory -import overflowdb.NodeDb -import overflowdb.PropertyKey -import overflowdb.NodeRef -import overflowdb.traversal._ -import overflowdb.traversal.ChainedImplicitsTemp._ +import io.shiftleft.codepropertygraph.generated.nodes.* +import io.shiftleft.codepropertygraph.generated.{Properties, PropertyNames} +import org.slf4j.{Logger, LoggerFactory} +import overflowdb.traversal.* +import overflowdb.traversal.ChainedImplicitsTemp.* +import overflowdb.{Node, NodeDb, NodeRef, PropertyKey} import scala.collection.mutable -import scala.jdk.CollectionConverters._ +import scala.jdk.CollectionConverters.* trait LinkingUtil { import overflowdb.BatchedUpdate.DiffGraphBuilder + private val MAX_BATCH_SIZE = 100 val logger: Logger = LoggerFactory.getLogger(classOf[LinkingUtil]) @@ -41,11 +34,20 @@ trait LinkingUtil { def nodesWithFullName(cpg: Cpg, x: String): mutable.Seq[NodeRef[_ <: NodeDb]] = cpg.graph.indexManager.lookup(PropertyNames.FULL_NAME, x).asScala + protected def getBatchSize(totalItems: Int): Int = { + val batchSize = + if totalItems > MAX_BATCH_SIZE then totalItems / ConcurrentTaskUtil.MAX_POOL_SIZE + else MAX_BATCH_SIZE + Math.min(batchSize, MAX_BATCH_SIZE) + } + /** For all nodes `n` with a label in `srcLabels`, determine the value of `n.\$dstFullNameKey`, use that to lookup the * destination node in `dstNodeMap`, and create an edge of type `edgeType` between `n` and the destination node. */ - def linkToSingle( + + protected def linkToSingle( cpg: Cpg, + srcNodes: List[Node], srcLabels: List[String], dstNodeLabel: String, edgeType: String, @@ -54,9 +56,9 @@ trait LinkingUtil { dstGraph: DiffGraphBuilder, dstNotExistsHandler: Option[(StoredNode, String) => Unit] ): Unit = { - var loggedDeprecationWarning = false val dereference = Dereference(cpg) - cpg.graph.nodes(srcLabels: _*).foreach { srcNode => + var loggedDeprecationWarning = false + srcNodes.foreach { srcNode => // If the source node does not have any outgoing edges of this type // This check is just required for backward compatibility if (srcNode.outE(edgeType).isEmpty) { @@ -171,5 +173,4 @@ trait LinkingUtil { s"dstNodeType=$dstNodeType, dstNodeId=$dstNodeId" ) } - }