From 47ceb721b16dfeddf8ae707def65bb8bdd90feec Mon Sep 17 00:00:00 2001 From: Pandurang Patil <5101898+pandurangpatil@users.noreply.github.com> Date: Tue, 26 Mar 2024 22:00:22 +0530 Subject: [PATCH 1/9] Memory cleanup at early stage to free up memory for GC --- .../scala/io/joern/gosrc2cpg/GoSrc2Cpg.scala | 1 + .../gosrc2cpg/astcreation/AstCreator.scala | 10 ++++++- .../astcreation/AstCreatorHelper.scala | 28 +++++++++---------- .../gosrc2cpg/datastructures/GoGlobal.scala | 9 +++++- .../gosrc2cpg/passes/AstCreationPass.scala | 1 + .../MethodAndTypeCacheBuilderPass.scala | 3 ++ .../passes/ast/DownloadDependencyTest.scala | 6 ++-- 7 files changed, 39 insertions(+), 19 deletions(-) diff --git a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/GoSrc2Cpg.scala b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/GoSrc2Cpg.scala index ad8b6d9da1ae..944dc35d7d08 100644 --- a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/GoSrc2Cpg.scala +++ b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/GoSrc2Cpg.scala @@ -51,6 +51,7 @@ class GoSrc2Cpg(goGlobalOption: Option[GoGlobal] = Option(GoGlobal())) extends X goGlobal.processingDependencies = false } new AstCreationPass(cpg, astCreators, report).createAndApply() + goGlobal.firstCleanup() if goGlobal.pkgLevelVarAndConstantAstMap.size() > 0 then new PackageCtorCreationPass(cpg, config, goGlobal).createAndApply() report.print() diff --git a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstCreator.scala b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstCreator.scala index 79cad094de8b..22da2701681a 100644 --- a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstCreator.scala +++ b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstCreator.scala @@ -14,7 +14,7 @@ import io.shiftleft.codepropertygraph.generated.{ModifierTypes, NodeTypes} import org.slf4j.{Logger, LoggerFactory} import overflowdb.BatchedUpdate.DiffGraphBuilder import ujson.Value - +import scala.collection.mutable.Map import scala.collection.mutable class AstCreator( @@ -44,6 +44,7 @@ class AstCreator( protected val declaredPackageName = parserResult.json(ParserKeys.Name)(ParserKeys.Name).str protected val fullyQualifiedPackage = goMod.getNameSpace(parserResult.fullPath, declaredPackageName) + protected val parserNodeCache = mutable.TreeMap[Long, ParserNodeInfo]() override def createAst(): DiffGraphBuilder = { val rootNode = createParserNodeInfo(parserResult.json) @@ -103,4 +104,11 @@ class AstCreator( protected def astForNode(json: Value): Seq[Ast] = { astForNode(createParserNodeInfo(json)) } + + def cleanup(): Unit = { + methodAstParentStack.clear() + aliasToNameSpaceMapping.clear() + lineNumberMapping.clear() + parserNodeCache.clear() + } } diff --git a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstCreatorHelper.scala b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstCreatorHelper.scala index 84fcb66b50ec..cf2139f33cf4 100644 --- a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstCreatorHelper.scala +++ b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstCreatorHelper.scala @@ -1,23 +1,19 @@ package io.joern.gosrc2cpg.astcreation -import io.joern.gosrc2cpg.datastructures.GoGlobal import io.joern.gosrc2cpg.parser.ParserAst.* import io.joern.gosrc2cpg.parser.{ParserAst, ParserKeys, ParserNodeInfo} import io.joern.x2cpg.utils.NodeBuilders.newModifierNode import io.joern.x2cpg.{Ast, Defines as XDefines} import io.shiftleft.codepropertygraph.generated.nodes.{NewModifier, NewNode} import io.shiftleft.codepropertygraph.generated.{EvaluationStrategies, ModifierTypes, PropertyNames} -import org.apache.commons.lang3.StringUtils import ujson.Value import scala.collection.mutable -import scala.collection.mutable.ListBuffer +import scala.collection.mutable.{ListBuffer} import scala.util.{Failure, Success, Try} trait AstCreatorHelper { this: AstCreator => - private val parserNodeCache = mutable.TreeMap[Long, ParserNodeInfo]() - protected def createParserNodeInfo(json: Value): ParserNodeInfo = { Try(json(ParserKeys.NodeReferenceId).num.toLong) match case Failure(_) => @@ -98,7 +94,6 @@ trait AstCreatorHelper { this: AstCreator => val colNumber = column(node).get - 1 val lineEndNumber = lineEndNo(node).get val colEndNumber = columnEndNo(node).get - 1 - if (lineNumber == lineEndNumber) { lineNumberMapping(lineNumber).substring(colNumber, colEndNumber) } else { @@ -121,14 +116,19 @@ trait AstCreatorHelper { this: AstCreator => protected def columnEndNo(node: Value): Option[Integer] = Try(node(ParserKeys.NodeColEndNo).num).toOption.map(_.toInt) - protected def positionLookupTables(source: String): Map[Int, String] = { - source - .split("\n") - .zipWithIndex - .map { case (sourceLine, lineNumber) => - (lineNumber + 1, sourceLine) - } - .toMap + protected def positionLookupTables(source: String): mutable.Map[Int, String] = { + val mutableMap = mutable.Map[Int, String]() + if (!goGlobal.processingDependencies) { + val immutableMap = source + .split("\n") + .zipWithIndex + .map { case (sourceLine, lineNumber) => + (lineNumber + 1, sourceLine) + } + .toMap + mutableMap ++= immutableMap + } + mutableMap } protected def resolveAliasToFullName(alias: String): String = { diff --git a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/datastructures/GoGlobal.scala b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/datastructures/GoGlobal.scala index 9a42a1bbcb65..e6b3f53906c0 100644 --- a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/datastructures/GoGlobal.scala +++ b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/datastructures/GoGlobal.scala @@ -4,7 +4,7 @@ import io.joern.x2cpg.Ast import org.slf4j.LoggerFactory import java.util.concurrent.{ConcurrentHashMap, ConcurrentSkipListSet} -class GoGlobal { +class GoGlobal(val testflag: Boolean = false) { private val logger = LoggerFactory.getLogger(getClass) var mainModule: Option[String] = None var processingDependencies = false @@ -139,6 +139,13 @@ class GoGlobal { def checkForDependencyFlags(name: String): Boolean = { !processingDependencies || processingDependencies && name.headOption.exists(_.isUpper) } + def firstCleanup(): Unit = { + if (!testflag) { + aliasToNameSpaceMapping.clear() + lambdaSignatureToLambdaTypeMap.clear() + nameSpaceMetaDataMap.clear() + } + } } case class NameSpaceMetaData( diff --git a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/AstCreationPass.scala b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/AstCreationPass.scala index e5b89294f9b7..9f9bb09c818d 100644 --- a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/AstCreationPass.scala +++ b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/AstCreationPass.scala @@ -35,6 +35,7 @@ class AstCreationPass(cpg: Cpg, astCreators: Seq[AstCreator], report: Report) (true, astCreator.relPathFileName) } } + astCreator.cleanup() report.updateReport(filename, cpg = gotCpg, duration) } } diff --git a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/MethodAndTypeCacheBuilderPass.scala b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/MethodAndTypeCacheBuilderPass.scala index b2bdb95e10bf..d53263dcc6bc 100644 --- a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/MethodAndTypeCacheBuilderPass.scala +++ b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/MethodAndTypeCacheBuilderPass.scala @@ -29,6 +29,9 @@ class MethodAndTypeCacheBuilderPass( val relPathFileName = SourceFiles.toRelativePath(parserResult.fullPath, config.inputPath) val astCreator = new AstCreator(relPathFileName, parserResult, goMod, goGlobal)(config.schemaValidation) val diffGraph = astCreator.buildCache(cpgOpt) + if (goGlobal.processingDependencies) { + astCreator.cleanup() + } (astCreator, diffGraph) } ) diff --git a/joern-cli/frontends/gosrc2cpg/src/test/scala/io/joern/go2cpg/passes/ast/DownloadDependencyTest.scala b/joern-cli/frontends/gosrc2cpg/src/test/scala/io/joern/go2cpg/passes/ast/DownloadDependencyTest.scala index a889fcd9a641..43eee49f1a20 100644 --- a/joern-cli/frontends/gosrc2cpg/src/test/scala/io/joern/go2cpg/passes/ast/DownloadDependencyTest.scala +++ b/joern-cli/frontends/gosrc2cpg/src/test/scala/io/joern/go2cpg/passes/ast/DownloadDependencyTest.scala @@ -174,7 +174,7 @@ class DownloadDependencyTest extends GoCodeToCpgSuite { } "If the dependency is not getting used then it " should { - val goGlobal = GoGlobal() + val goGlobal = GoGlobal(testflag = true) val config = Config().withFetchDependencies(true).withIgnoredFilesRegex(IGNORE_TEST_FILE_REGEX) val cpg = code( """ @@ -242,7 +242,7 @@ class DownloadDependencyTest extends GoCodeToCpgSuite { } "The dependency is getting imported somewhere but not getting used then it" should { - val goGlobal = GoGlobal() + val goGlobal = GoGlobal(testflag = true) val config = Config().withFetchDependencies(true).withIgnoredFilesRegex(IGNORE_TEST_FILE_REGEX) val cpg = code( """ @@ -317,7 +317,7 @@ class DownloadDependencyTest extends GoCodeToCpgSuite { } "The dependency is getting imported and used in the code then it" should { - val goGlobal = GoGlobal() + val goGlobal = GoGlobal(testflag = true) val config = Config().withFetchDependencies(true).withIgnoredFilesRegex(IGNORE_TEST_FILE_REGEX) val cpg = code( """ From e0004fea39825b4f9a707dbac945f1929ea5fbc9 Mon Sep 17 00:00:00 2001 From: Pandurang Patil <5101898+pandurangpatil@users.noreply.github.com> Date: Tue, 26 Mar 2024 22:10:24 +0530 Subject: [PATCH 2/9] minor change --- .../main/scala/io/joern/gosrc2cpg/passes/AstCreationPass.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/AstCreationPass.scala b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/AstCreationPass.scala index 9f9bb09c818d..d2ac8d4a7c6e 100644 --- a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/AstCreationPass.scala +++ b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/AstCreationPass.scala @@ -25,6 +25,7 @@ class AstCreationPass(cpg: Cpg, astCreators: Seq[AstCreator], report: Report) report.addReportInfo(astCreator.relPathFileName, fileLOC, parsed = true) Try { val localDiff = astCreator.createAst() + astCreator.cleanup() diffGraph.absorb(localDiff) } match { case Failure(exception) => @@ -35,7 +36,6 @@ class AstCreationPass(cpg: Cpg, astCreators: Seq[AstCreator], report: Report) (true, astCreator.relPathFileName) } } - astCreator.cleanup() report.updateReport(filename, cpg = gotCpg, duration) } } From dd520f78c9c0d7da2b36df2c78fd8b25f792357a Mon Sep 17 00:00:00 2001 From: Pandurang Patil <5101898+pandurangpatil@users.noreply.github.com> Date: Tue, 26 Mar 2024 23:55:58 +0530 Subject: [PATCH 3/9] minor changes --- .../io/joern/gosrc2cpg/astcreation/AstCreator.scala | 8 ++++---- .../gosrc2cpg/astcreation/AstCreatorHelper.scala | 11 +++++------ 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstCreator.scala b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstCreator.scala index 22da2701681a..e5e3d6c0288e 100644 --- a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstCreator.scala +++ b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstCreator.scala @@ -8,13 +8,13 @@ import io.joern.x2cpg.astgen.{AstGenNodeBuilder, ParserResult} import io.joern.x2cpg.datastructures.Scope import io.joern.x2cpg.datastructures.Stack.* import io.joern.x2cpg.utils.NodeBuilders.newModifierNode -import io.joern.x2cpg.{Ast, AstCreatorBase, ValidationMode, AstNodeBuilder as X2CpgAstNodeBuilder} +import io.joern.x2cpg.{Ast, AstCreatorBase, ValidationMode} import io.shiftleft.codepropertygraph.generated.nodes.NewNode import io.shiftleft.codepropertygraph.generated.{ModifierTypes, NodeTypes} import org.slf4j.{Logger, LoggerFactory} import overflowdb.BatchedUpdate.DiffGraphBuilder import ujson.Value -import scala.collection.mutable.Map + import scala.collection.mutable class AstCreator( @@ -40,7 +40,7 @@ class AstCreator( protected val methodAstParentStack: Stack[NewNode] = new Stack() protected val scope: Scope[String, (NewNode, String), NewNode] = new Scope() protected val aliasToNameSpaceMapping: mutable.Map[String, String] = mutable.Map.empty - protected val lineNumberMapping: Map[Int, String] = positionLookupTables(parserResult.fileContent) + protected var lineNumberMapping: Map[Int, String] = positionLookupTables(parserResult.fileContent) protected val declaredPackageName = parserResult.json(ParserKeys.Name)(ParserKeys.Name).str protected val fullyQualifiedPackage = goMod.getNameSpace(parserResult.fullPath, declaredPackageName) @@ -108,7 +108,7 @@ class AstCreator( def cleanup(): Unit = { methodAstParentStack.clear() aliasToNameSpaceMapping.clear() - lineNumberMapping.clear() + lineNumberMapping = Map[Int, String]() parserNodeCache.clear() } } diff --git a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstCreatorHelper.scala b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstCreatorHelper.scala index cf2139f33cf4..2f5e82b80a87 100644 --- a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstCreatorHelper.scala +++ b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstCreatorHelper.scala @@ -9,7 +9,7 @@ import io.shiftleft.codepropertygraph.generated.{EvaluationStrategies, ModifierT import ujson.Value import scala.collection.mutable -import scala.collection.mutable.{ListBuffer} +import scala.collection.mutable.ListBuffer import scala.util.{Failure, Success, Try} trait AstCreatorHelper { this: AstCreator => @@ -116,19 +116,18 @@ trait AstCreatorHelper { this: AstCreator => protected def columnEndNo(node: Value): Option[Integer] = Try(node(ParserKeys.NodeColEndNo).num).toOption.map(_.toInt) - protected def positionLookupTables(source: String): mutable.Map[Int, String] = { - val mutableMap = mutable.Map[Int, String]() + protected def positionLookupTables(source: String): Map[Int, String] = { if (!goGlobal.processingDependencies) { - val immutableMap = source + source .split("\n") .zipWithIndex .map { case (sourceLine, lineNumber) => (lineNumber + 1, sourceLine) } .toMap - mutableMap ++= immutableMap + } else { + Map[Int, String]() } - mutableMap } protected def resolveAliasToFullName(alias: String): String = { From 8c913a4b176256a1726668fead6aaafb51be14bf Mon Sep 17 00:00:00 2001 From: Pandurang Patil <5101898+pandurangpatil@users.noreply.github.com> Date: Wed, 27 Mar 2024 00:20:58 +0530 Subject: [PATCH 4/9] error logging --- .../MethodAndTypeCacheBuilderPass.scala | 46 +++++++++++-------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/MethodAndTypeCacheBuilderPass.scala b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/MethodAndTypeCacheBuilderPass.scala index d53263dcc6bc..766486872b30 100644 --- a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/MethodAndTypeCacheBuilderPass.scala +++ b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/MethodAndTypeCacheBuilderPass.scala @@ -8,6 +8,7 @@ import io.joern.gosrc2cpg.parser.GoAstJsonParser import io.joern.x2cpg.SourceFiles import io.shiftleft.codepropertygraph.Cpg import io.shiftleft.codepropertygraph.generated.DiffGraphBuilder +import org.slf4j.LoggerFactory import java.nio.file.Paths import scala.concurrent.ExecutionContext.Implicits.global @@ -21,30 +22,37 @@ class MethodAndTypeCacheBuilderPass( goMod: GoModHelper, goGlobal: GoGlobal ) { + private val logger = LoggerFactory.getLogger(getClass) def process(): Seq[AstCreator] = { val futures = astFiles .map(file => Future { - val parserResult = GoAstJsonParser.readFile(Paths.get(file)) - val relPathFileName = SourceFiles.toRelativePath(parserResult.fullPath, config.inputPath) - val astCreator = new AstCreator(relPathFileName, parserResult, goMod, goGlobal)(config.schemaValidation) - val diffGraph = astCreator.buildCache(cpgOpt) - if (goGlobal.processingDependencies) { - astCreator.cleanup() - } - (astCreator, diffGraph) + try { + val parserResult = GoAstJsonParser.readFile(Paths.get(file)) + val relPathFileName = SourceFiles.toRelativePath(parserResult.fullPath, config.inputPath) + val astCreator = new AstCreator(relPathFileName, parserResult, goMod, goGlobal)(config.schemaValidation) + val diffGraph = astCreator.buildCache(cpgOpt) + if (goGlobal.processingDependencies) { + astCreator.cleanup() + } + Some(astCreator, diffGraph) + } catch + case exception: Exception => + logger.error(s"error while processing file $file", exception) + None } ) - val allResults: Future[List[(AstCreator, DiffGraphBuilder)]] = Future.sequence(futures) - val results = Await.result(allResults, Duration.Inf) - val (astCreators, diffGraphs) = results.unzip - cpgOpt.map { cpg => - diffGraphs.foreach { diffGraph => - overflowdb.BatchedUpdate - .applyDiff(cpg.graph, diffGraph, null, null) - .transitiveModifications() - } - } - astCreators + val allResults: Future[List[Option[(AstCreator, DiffGraphBuilder)]]] = Future.sequence(futures) + val results = Await.result(allResults, Duration.Inf) + results.flatMap(result => + result.flatMap(r => { + cpgOpt.map { cpg => + overflowdb.BatchedUpdate + .applyDiff(cpg.graph, r._2, null, null) + .transitiveModifications() + } + Some(r._1) + }) + ) } } From 987325fe36221c40ae3d1bf0eaaacd3af84e1c66 Mon Sep 17 00:00:00 2001 From: Pandurang Patil <5101898+pandurangpatil@users.noreply.github.com> Date: Wed, 27 Mar 2024 15:10:57 +0530 Subject: [PATCH 5/9] Memory optimisations --- .../scala/io/joern/gosrc2cpg/GoSrc2Cpg.scala | 14 ++- .../gosrc2cpg/astcreation/AstCreator.scala | 106 +++++++++++++++--- .../astcreation/AstCreatorHelper.scala | 7 +- .../astcreation/AstForFunctionsCreator.scala | 2 +- .../astcreation/AstForLambdaCreator.scala | 2 +- .../AstForMethodCallExpressionCreator.scala | 2 +- .../gosrc2cpg/astcreation/CacheBuilder.scala | 3 +- .../gosrc2cpg/passes/AstCreationPass.scala | 11 +- .../MethodAndTypeCacheBuilderPass.scala | 25 +++-- .../scala/io/joern/x2cpg/astgen/package.scala | 3 +- 10 files changed, 136 insertions(+), 39 deletions(-) diff --git a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/GoSrc2Cpg.scala b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/GoSrc2Cpg.scala index 944dc35d7d08..96cfedf47cc4 100644 --- a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/GoSrc2Cpg.scala +++ b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/GoSrc2Cpg.scala @@ -43,7 +43,14 @@ class GoSrc2Cpg(goGlobalOption: Option[GoGlobal] = Option(GoGlobal())) extends X ) goGlobal.mainModule = goMod.flatMap(modHelper => modHelper.getModMetaData().map(mod => mod.module.name)) val astCreators = - new MethodAndTypeCacheBuilderPass(Some(cpg), astGenResult.parsedFiles, config, goMod.get, goGlobal) + new MethodAndTypeCacheBuilderPass( + Some(cpg), + astGenResult.parsedFiles, + config, + goMod.get, + goGlobal, + Some(tmpDir) + ) .process() if (config.fetchDependencies) { goGlobal.processingDependencies = true @@ -51,9 +58,10 @@ class GoSrc2Cpg(goGlobalOption: Option[GoGlobal] = Option(GoGlobal())) extends X goGlobal.processingDependencies = false } new AstCreationPass(cpg, astCreators, report).createAndApply() - goGlobal.firstCleanup() - if goGlobal.pkgLevelVarAndConstantAstMap.size() > 0 then + if (goGlobal.pkgLevelVarAndConstantAstMap.size() > 0) { new PackageCtorCreationPass(cpg, config, goGlobal).createAndApply() + goGlobal.pkgLevelVarAndConstantAstMap.clear() + } report.print() }) } diff --git a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstCreator.scala b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstCreator.scala index e5e3d6c0288e..9d85ccd77435 100644 --- a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstCreator.scala +++ b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstCreator.scala @@ -1,9 +1,10 @@ package io.joern.gosrc2cpg.astcreation +import better.files.File import io.joern.gosrc2cpg.datastructures.GoGlobal import io.joern.gosrc2cpg.model.GoModHelper import io.joern.gosrc2cpg.parser.ParserAst.* -import io.joern.gosrc2cpg.parser.{ParserKeys, ParserNodeInfo} +import io.joern.gosrc2cpg.parser.{GoAstJsonParser, ParserKeys, ParserNodeInfo} import io.joern.x2cpg.astgen.{AstGenNodeBuilder, ParserResult} import io.joern.x2cpg.datastructures.Scope import io.joern.x2cpg.datastructures.Stack.* @@ -15,13 +16,17 @@ import org.slf4j.{Logger, LoggerFactory} import overflowdb.BatchedUpdate.DiffGraphBuilder import ujson.Value +import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} +import java.nio.file.{Files, Path, Paths} +import java.util.UUID import scala.collection.mutable class AstCreator( + val jsonAstFilePath: String, val relPathFileName: String, - val parserResult: ParserResult, val goMod: GoModHelper, - val goGlobal: GoGlobal + val goGlobal: GoGlobal, + tmpDir: Option[File] = None )(implicit withSchemaValidation: ValidationMode) extends AstCreatorBase(relPathFileName) with AstCreatorHelper @@ -36,24 +41,32 @@ class AstCreator( with AstForLambdaCreator with AstGenNodeBuilder[AstCreator] { - protected val logger: Logger = LoggerFactory.getLogger(classOf[AstCreator]) + protected val logger: Logger = LoggerFactory.getLogger(classOf[AstCreator]) + protected val tempParserNodeCacheFilePath: Option[Path] = + tmpDir.map(dir => Paths.get(dir.pathAsString, s"node-cache${UUID.randomUUID().toString}")) + protected val tempAliasToNameSpaceMappingFilePath: Option[Path] = + tmpDir.map(dir => Paths.get(dir.pathAsString, s"alias-cache${UUID.randomUUID().toString}")) protected val methodAstParentStack: Stack[NewNode] = new Stack() protected val scope: Scope[String, (NewNode, String), NewNode] = new Scope() - protected val aliasToNameSpaceMapping: mutable.Map[String, String] = mutable.Map.empty - protected var lineNumberMapping: Map[Int, String] = positionLookupTables(parserResult.fileContent) - protected val declaredPackageName = parserResult.json(ParserKeys.Name)(ParserKeys.Name).str - protected val fullyQualifiedPackage = - goMod.getNameSpace(parserResult.fullPath, declaredPackageName) - protected val parserNodeCache = mutable.TreeMap[Long, ParserNodeInfo]() + protected var aliasToNameSpaceMapping: mutable.Map[String, String] = mutable.Map.empty + protected var parserNodeCache = mutable.TreeMap[Long, ParserNodeInfo]() + protected var lineNumberMapping: Map[Int, String] = Map.empty + protected var declaredPackageName = "" + protected var fullyQualifiedPackage = "" + protected var fileName = "" + + var originalFilePath = "" override def createAst(): DiffGraphBuilder = { + val parserResult = init() + loadCacheToProcess() val rootNode = createParserNodeInfo(parserResult.json) - val ast = astForTranslationUnit(rootNode) + val ast = astForTranslationUnit(rootNode, parserResult) Ast.storeInDiffGraph(ast, diffGraph) diffGraph } - private def astForTranslationUnit(rootNode: ParserNodeInfo): Ast = { + private def astForTranslationUnit(rootNode: ParserNodeInfo, parserResult: ParserResult): Ast = { val name = s"$fullyQualifiedPackage.${parserResult.filename}" val fullName = s"$relPathFileName:$name" val fakeGlobalMethodForFile = @@ -105,10 +118,77 @@ class AstCreator( astForNode(createParserNodeInfo(json)) } + def init(): ParserResult = { + val parserResult = GoAstJsonParser.readFile(Paths.get(jsonAstFilePath)) + lineNumberMapping = positionLookupTables(parserResult) + declaredPackageName = parserResult.json(ParserKeys.Name)(ParserKeys.Name).str + fullyQualifiedPackage = goMod.getNameSpace(parserResult.fullPath, declaredPackageName) + fileName = parserResult.filename + originalFilePath = parserResult.fullPath + parserResult + } + + def cacheSerializeAndStore(): Unit = { +// tempParserNodeCacheFilePath.map(file => { +// Files.write(file, serialise(parserNodeCache)) +// parserNodeCache.clear() +// }) + tempAliasToNameSpaceMappingFilePath.map(file => { + Files.write(file, serialise(aliasToNameSpaceMapping)) + aliasToNameSpaceMapping.clear() + }) + lineNumberMapping = Map.empty + } + + def loadCacheToProcess(): Unit = { +// tempParserNodeCacheFilePath.map(file => { +// val deserialised = deserialise(Files.readAllBytes(file)) +// parserNodeCache = deserialised.asInstanceOf[mutable.TreeMap[Long, ParserNodeInfo]] +// }) + tempAliasToNameSpaceMappingFilePath.map(file => { + val deserialised = deserialise(Files.readAllBytes(file)) + aliasToNameSpaceMapping = deserialised.asInstanceOf[mutable.Map[String, String]] + }) + } + def cleanup(): Unit = { methodAstParentStack.clear() aliasToNameSpaceMapping.clear() - lineNumberMapping = Map[Int, String]() + lineNumberMapping = Map.empty parserNodeCache.clear() + tempAliasToNameSpaceMappingFilePath.map(file => { + if (Files.exists(file)) { + Files.delete(file) + } + }) + } + + /** Serialise any object to byte array to be passed through queue + * + * @param value + * \- Any object to passed through queue as a result item. + * @return + * \- Object serialised into ByteArray + */ + private def serialise(value: Any): Array[Byte] = { + val stream: ByteArrayOutputStream = new ByteArrayOutputStream() + val oos = new ObjectOutputStream(stream) + oos.writeObject(value) + oos.close() + stream.toByteArray + } + + /** Deserialize the ByteArray back to Object. + * + * @param bytes + * \- Array[Byte] to be deserialized + * @return + * \- Deserialized object + */ + private def deserialise(bytes: Array[Byte]): Any = { + val ois = new ObjectInputStream(new ByteArrayInputStream(bytes)) + val value = ois.readObject + ois.close() + value } } diff --git a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstCreatorHelper.scala b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstCreatorHelper.scala index 2f5e82b80a87..cf1d064f2cc2 100644 --- a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstCreatorHelper.scala +++ b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstCreatorHelper.scala @@ -2,6 +2,7 @@ package io.joern.gosrc2cpg.astcreation import io.joern.gosrc2cpg.parser.ParserAst.* import io.joern.gosrc2cpg.parser.{ParserAst, ParserKeys, ParserNodeInfo} +import io.joern.x2cpg.astgen.ParserResult import io.joern.x2cpg.utils.NodeBuilders.newModifierNode import io.joern.x2cpg.{Ast, Defines as XDefines} import io.shiftleft.codepropertygraph.generated.nodes.{NewModifier, NewNode} @@ -116,15 +117,17 @@ trait AstCreatorHelper { this: AstCreator => protected def columnEndNo(node: Value): Option[Integer] = Try(node(ParserKeys.NodeColEndNo).num).toOption.map(_.toInt) - protected def positionLookupTables(source: String): Map[Int, String] = { + protected def positionLookupTables(parserResult: ParserResult): Map[Int, String] = { if (!goGlobal.processingDependencies) { - source + val map = parserResult.fileContent .split("\n") .zipWithIndex .map { case (sourceLine, lineNumber) => (lineNumber + 1, sourceLine) } .toMap + parserResult.fileContent = "" + map } else { Map[Int, String]() } diff --git a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstForFunctionsCreator.scala b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstForFunctionsCreator.scala index 4cce137d4030..9c273e6e2904 100644 --- a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstForFunctionsCreator.scala +++ b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstForFunctionsCreator.scala @@ -86,7 +86,7 @@ trait AstForFunctionsCreator(implicit withSchemaValidation: ValidationMode) { th EvaluationStrategies.BY_SHARING ) case x => - logger.warn(s"Unhandled class ${x.getClass} under getReceiverInfo! file -> ${parserResult.fullPath}") + logger.warn(s"Unhandled class ${x.getClass} under getReceiverInfo! file -> $originalFilePath") ("", "") (recName, typeFullName, evaluationStrategy, recnode) }) diff --git a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstForLambdaCreator.scala b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstForLambdaCreator.scala index 41d80224c68c..bdcad342d90a 100644 --- a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstForLambdaCreator.scala +++ b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstForLambdaCreator.scala @@ -17,7 +17,7 @@ trait AstForLambdaCreator(implicit withSchemaValidation: ValidationMode) { this: val lambdaName = nextClosureName() // if the top of the stack function is fake file level method node (which is checked with filename) then use the fully qualified package name as base fullname val baseFullName = methodAstParentStack - .collectFirst({ case m: NewMethod if !m.fullName.endsWith(parserResult.filename) => m.fullName }) + .collectFirst({ case m: NewMethod if !m.fullName.endsWith(fileName) => m.fullName }) .getOrElse(fullyQualifiedPackage) val fullName = s"$baseFullName.$lambdaName" val LambdaFunctionMetaData(signature, returnTypeStr, methodReturn, params, genericTypeMethodMap) = diff --git a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstForMethodCallExpressionCreator.scala b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstForMethodCallExpressionCreator.scala index 1882d37cc897..c0be1b2a7e7c 100644 --- a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstForMethodCallExpressionCreator.scala +++ b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstForMethodCallExpressionCreator.scala @@ -51,7 +51,7 @@ trait AstForMethodCallExpressionCreator(implicit withSchemaValidation: Validatio (Some(xNode), funcDetails.json(ParserKeys.Sel)(ParserKeys.Name).str) case x => logger.warn( - s"Unhandled class ${x.getClass} under astForCallExpression! file -> ${parserResult.fullPath} -> Line no -> ${funcDetails.lineNumber.get}" + s"Unhandled class ${x.getClass} under astForCallExpression! file -> $originalFilePath -> Line no -> ${funcDetails.lineNumber.get}" ) (None, "") callMethodFullNameTypeFullNameAndSignature(methodName, aliasOpt) diff --git a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/CacheBuilder.scala b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/CacheBuilder.scala index b1cf71451e02..a16b15f2aa57 100644 --- a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/CacheBuilder.scala +++ b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/CacheBuilder.scala @@ -16,7 +16,8 @@ import scala.util.Try trait CacheBuilder(implicit withSchemaValidation: ValidationMode) { this: AstCreator => def buildCache(cpgOpt: Option[Cpg]): DiffGraphBuilder = { - val diffGraph = new DiffGraphBuilder + val diffGraph = new DiffGraphBuilder + val parserResult = init() try { if (checkIfGivenDependencyPackageCanBeProcessed()) { cpgOpt.map { _ => diff --git a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/AstCreationPass.scala b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/AstCreationPass.scala index d2ac8d4a7c6e..bb0a61dac044 100644 --- a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/AstCreationPass.scala +++ b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/AstCreationPass.scala @@ -1,11 +1,6 @@ package io.joern.gosrc2cpg.passes -import io.joern.gosrc2cpg.Config import io.joern.gosrc2cpg.astcreation.AstCreator -import io.joern.gosrc2cpg.datastructures.GoGlobal -import io.joern.gosrc2cpg.parser.GoAstJsonParser -import io.joern.x2cpg.astgen.ParserResult -import io.joern.x2cpg.SourceFiles import io.joern.x2cpg.utils.{Report, TimeUtils} import io.shiftleft.codepropertygraph.Cpg import io.shiftleft.passes.ConcurrentWriterCpgPass @@ -21,7 +16,7 @@ class AstCreationPass(cpg: Cpg, astCreators: Seq[AstCreator], report: Report) override def generateParts(): Array[AstCreator] = astCreators.toArray override def runOnPart(diffGraph: DiffGraphBuilder, astCreator: AstCreator): Unit = { val ((gotCpg, filename), duration) = TimeUtils.time { - val fileLOC = IOUtils.readLinesInFile(Paths.get(astCreator.parserResult.fullPath)).size + val fileLOC = IOUtils.readLinesInFile(Paths.get(astCreator.originalFilePath)).size report.addReportInfo(astCreator.relPathFileName, fileLOC, parsed = true) Try { val localDiff = astCreator.createAst() @@ -29,10 +24,10 @@ class AstCreationPass(cpg: Cpg, astCreators: Seq[AstCreator], report: Report) diffGraph.absorb(localDiff) } match { case Failure(exception) => - logger.warn(s"Failed to generate a CPG for: '${astCreator.parserResult.fullPath}'", exception) + logger.warn(s"Failed to generate a CPG for: '${astCreator.originalFilePath}'", exception) (false, astCreator.relPathFileName) case Success(_) => - logger.info(s"Generated a CPG for: '${astCreator.parserResult.fullPath}'") + logger.info(s"Generated a CPG for: '${astCreator.originalFilePath}'") (true, astCreator.relPathFileName) } } diff --git a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/MethodAndTypeCacheBuilderPass.scala b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/MethodAndTypeCacheBuilderPass.scala index 766486872b30..0d8df9c4f556 100644 --- a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/MethodAndTypeCacheBuilderPass.scala +++ b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/MethodAndTypeCacheBuilderPass.scala @@ -1,5 +1,6 @@ package io.joern.gosrc2cpg.passes +import better.files.File import io.joern.gosrc2cpg.Config import io.joern.gosrc2cpg.astcreation.AstCreator import io.joern.gosrc2cpg.datastructures.GoGlobal @@ -20,7 +21,8 @@ class MethodAndTypeCacheBuilderPass( astFiles: List[String], config: Config, goMod: GoModHelper, - goGlobal: GoGlobal + goGlobal: GoGlobal, + tmpDir: Option[File] = None ) { private val logger = LoggerFactory.getLogger(getClass) def process(): Seq[AstCreator] = { @@ -28,12 +30,19 @@ class MethodAndTypeCacheBuilderPass( .map(file => Future { try { - val parserResult = GoAstJsonParser.readFile(Paths.get(file)) - val relPathFileName = SourceFiles.toRelativePath(parserResult.fullPath, config.inputPath) - val astCreator = new AstCreator(relPathFileName, parserResult, goMod, goGlobal)(config.schemaValidation) - val diffGraph = astCreator.buildCache(cpgOpt) + val relFilePath = tmpDir.map(dir => { + SourceFiles.toRelativePath(file, dir.pathAsString).replace(".json", "") + }) + val parserResult = GoAstJsonParser.readFile(Paths.get(file)) + val astCreator = + new AstCreator(file, relFilePath.getOrElse("dummyfile.go"), goMod, goGlobal, tmpDir)( + config.schemaValidation + ) + val diffGraph = astCreator.buildCache(cpgOpt) if (goGlobal.processingDependencies) { astCreator.cleanup() + } else { + astCreator.cacheSerializeAndStore() } Some(astCreator, diffGraph) } catch @@ -45,13 +54,13 @@ class MethodAndTypeCacheBuilderPass( val allResults: Future[List[Option[(AstCreator, DiffGraphBuilder)]]] = Future.sequence(futures) val results = Await.result(allResults, Duration.Inf) results.flatMap(result => - result.flatMap(r => { + result.flatMap((astCreator, diffGraph) => { cpgOpt.map { cpg => overflowdb.BatchedUpdate - .applyDiff(cpg.graph, r._2, null, null) + .applyDiff(cpg.graph, diffGraph, null, null) .transitiveModifications() } - Some(r._1) + Some(astCreator) }) ) } diff --git a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/astgen/package.scala b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/astgen/package.scala index 5b3c31d8a47c..da3ec7d28dce 100644 --- a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/astgen/package.scala +++ b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/astgen/package.scala @@ -38,6 +38,7 @@ package object astgen { * @param fileContent * the raw file contents. */ - case class ParserResult(filename: String, fullPath: String, json: Value, fileContent: String) extends BaseParserResult + case class ParserResult(filename: String, fullPath: String, json: Value, var fileContent: String) + extends BaseParserResult } From 204ee2fa5a5b8f62005d214222ebe97d43a32bc0 Mon Sep 17 00:00:00 2001 From: Pandurang Patil <5101898+pandurangpatil@users.noreply.github.com> Date: Wed, 27 Mar 2024 15:38:43 +0530 Subject: [PATCH 6/9] minor updates --- .../joern/gosrc2cpg/passes/MethodAndTypeCacheBuilderPass.scala | 3 --- 1 file changed, 3 deletions(-) diff --git a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/MethodAndTypeCacheBuilderPass.scala b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/MethodAndTypeCacheBuilderPass.scala index 0d8df9c4f556..2cc9d727ca01 100644 --- a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/MethodAndTypeCacheBuilderPass.scala +++ b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/MethodAndTypeCacheBuilderPass.scala @@ -5,13 +5,11 @@ import io.joern.gosrc2cpg.Config import io.joern.gosrc2cpg.astcreation.AstCreator import io.joern.gosrc2cpg.datastructures.GoGlobal import io.joern.gosrc2cpg.model.GoModHelper -import io.joern.gosrc2cpg.parser.GoAstJsonParser import io.joern.x2cpg.SourceFiles import io.shiftleft.codepropertygraph.Cpg import io.shiftleft.codepropertygraph.generated.DiffGraphBuilder import org.slf4j.LoggerFactory -import java.nio.file.Paths import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.duration.Duration import scala.concurrent.{Await, Future} @@ -33,7 +31,6 @@ class MethodAndTypeCacheBuilderPass( val relFilePath = tmpDir.map(dir => { SourceFiles.toRelativePath(file, dir.pathAsString).replace(".json", "") }) - val parserResult = GoAstJsonParser.readFile(Paths.get(file)) val astCreator = new AstCreator(file, relFilePath.getOrElse("dummyfile.go"), goMod, goGlobal, tmpDir)( config.schemaValidation From f488fc22fc267e3b5d794fb45e77a785b917093d Mon Sep 17 00:00:00 2001 From: Pandurang Patil <5101898+pandurangpatil@users.noreply.github.com> Date: Wed, 27 Mar 2024 16:41:15 +0530 Subject: [PATCH 7/9] Queued processing of one dependency at one time Queued processing of one dependency at one time post parallel proessing of download. 1. Delinked downloading and processing of the dependency. 2. In one thread we are downloading the dependencies and queuing them to be processed in separate writer thread. --- .../passes/DownloadDependenciesPass.scala | 102 +++++++++++------- 1 file changed, 62 insertions(+), 40 deletions(-) diff --git a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/DownloadDependenciesPass.scala b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/DownloadDependenciesPass.scala index 7629e04eb6cb..c6ed23fc8f09 100644 --- a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/DownloadDependenciesPass.scala +++ b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/DownloadDependenciesPass.scala @@ -12,6 +12,7 @@ import org.slf4j.LoggerFactory import java.io.File as JFile import java.nio.file.Paths +import java.util.concurrent.LinkedBlockingQueue import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.duration.Duration import scala.concurrent.{Await, Future} @@ -20,57 +21,78 @@ import scala.util.{Failure, Success, Try} class DownloadDependenciesPass(parentGoMod: GoModHelper, goGlobal: GoGlobal, config: Config) { private val logger = LoggerFactory.getLogger(getClass) def process(): Unit = { + val writer = new Writer() + val writerThread = new Thread(writer) + writerThread.start() File.usingTemporaryDirectory("go-temp-download") { tmpDir => - setupDummyProjectAndDownload(tmpDir.toString) - } - } - - private def setupDummyProjectAndDownload(prjDir: String): Unit = { - parentGoMod - .getModMetaData() - .foreach(mod => { - ExternalCommand.run("go mod init joern.io/temp", prjDir) match { - case Success(_) => - val futures = mod.dependencies - .filter(dep => dep.beingUsed) - .map(dependency => { - Future { + val projDir = tmpDir.pathAsString + parentGoMod + .getModMetaData() + .foreach(mod => { + ExternalCommand.run("go mod init joern.io/temp", projDir) match { + case Success(_) => + mod.dependencies + .filter(dep => dep.beingUsed) + .map(dependency => { val dependencyStr = s"${dependency.module}@${dependency.version}" val cmd = s"go get $dependencyStr" - val results = synchronized(ExternalCommand.run(cmd, prjDir)) + val results = ExternalCommand.run(cmd, projDir) results match { case Success(_) => print(". ") - processDependency(dependencyStr) + writer.queue.put(Some(dependencyStr)) case Failure(f) => logger.error(s"\t- command '$cmd' failed", f) } - } - }) - val allResults: Future[List[Unit]] = Future.sequence(futures) - Await.result(allResults, Duration.Inf) - case Failure(f) => - logger.error("\t- command 'go mod init joern.io/temp' failed", f) - } - }) + }) + case Failure(f) => + logger.error("\t- command 'go mod init joern.io/temp' failed", f) + } + }) + } + writer.queue.put(None) + writerThread.join() } - private def processDependency(dependencyStr: String): Unit = { - val gopath = Try(sys.env("GOPATH")).getOrElse(Seq(os.home, "go").mkString(JFile.separator)) - val dependencyLocation = (Seq(gopath, "pkg", "mod") ++ dependencyStr.split("/")).mkString(JFile.separator) - File.usingTemporaryDirectory("godep") { astLocation => - val depConfig = Config() - .withInputPath(dependencyLocation) - .withIgnoredFilesRegex(config.ignoredFilesRegex.toString()) - .withIgnoredFiles(config.ignoredFiles.toList) - // TODO: Need to implement mechanism to filter and process only used namespaces(folders) of the dependency. - // In order to achieve this filtering, we need to add support for inclusive rule with goastgen utility first. - val astGenResult = new AstGenRunner(depConfig).execute(astLocation).asInstanceOf[GoAstGenRunnerResult] - val goMod = new GoModHelper( - Some(depConfig), - astGenResult.parsedModFile.flatMap(modFile => GoAstJsonParser.readModFile(Paths.get(modFile)).map(x => x)) - ) - new MethodAndTypeCacheBuilderPass(None, astGenResult.parsedFiles, depConfig, goMod, goGlobal).process() + private class Writer() extends Runnable { + val queue = + new LinkedBlockingQueue[Option[String]]() + override def run(): Unit = { + try { + var terminate = false + while (!terminate) { + queue.take() match { + case None => + logger.debug("Shutting down WriterThread") + terminate = true + case Some(dependencyStr) => + processDependency(dependencyStr) + } + } + } catch { + case exception: InterruptedException => logger.warn("Interrupted WriterThread", exception) + case exc: Exception => + logger.error("error in writer thread, ", exc) + } + } + + private def processDependency(dependencyStr: String): Unit = { + val gopath = Try(sys.env("GOPATH")).getOrElse(Seq(os.home, "go").mkString(JFile.separator)) + val dependencyLocation = (Seq(gopath, "pkg", "mod") ++ dependencyStr.split("/")).mkString(JFile.separator) + File.usingTemporaryDirectory("godep") { astLocation => + val depConfig = Config() + .withInputPath(dependencyLocation) + .withIgnoredFilesRegex(config.ignoredFilesRegex.toString()) + .withIgnoredFiles(config.ignoredFiles.toList) + // TODO: Need to implement mechanism to filter and process only used namespaces(folders) of the dependency. + // In order to achieve this filtering, we need to add support for inclusive rule with goastgen utility first. + val astGenResult = new AstGenRunner(depConfig).execute(astLocation).asInstanceOf[GoAstGenRunnerResult] + val goMod = new GoModHelper( + Some(depConfig), + astGenResult.parsedModFile.flatMap(modFile => GoAstJsonParser.readModFile(Paths.get(modFile)).map(x => x)) + ) + new MethodAndTypeCacheBuilderPass(None, astGenResult.parsedFiles, depConfig, goMod, goGlobal).process() + } } } } From 359788b21b95cbefe88788f80d9897732270663a Mon Sep 17 00:00:00 2001 From: Pandurang Patil <5101898+pandurangpatil@users.noreply.github.com> Date: Wed, 27 Mar 2024 16:51:38 +0530 Subject: [PATCH 8/9] removed unwanted imports --- .../io/joern/gosrc2cpg/passes/DownloadDependenciesPass.scala | 3 --- 1 file changed, 3 deletions(-) diff --git a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/DownloadDependenciesPass.scala b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/DownloadDependenciesPass.scala index c6ed23fc8f09..5ea3dc58b0ab 100644 --- a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/DownloadDependenciesPass.scala +++ b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/passes/DownloadDependenciesPass.scala @@ -13,9 +13,6 @@ import org.slf4j.LoggerFactory import java.io.File as JFile import java.nio.file.Paths import java.util.concurrent.LinkedBlockingQueue -import scala.concurrent.ExecutionContext.Implicits.global -import scala.concurrent.duration.Duration -import scala.concurrent.{Await, Future} import scala.util.{Failure, Success, Try} class DownloadDependenciesPass(parentGoMod: GoModHelper, goGlobal: GoGlobal, config: Config) { From 0b5e6c56d15f831425464e1f01356169574b32e6 Mon Sep 17 00:00:00 2001 From: Pandurang Patil <5101898+pandurangpatil@users.noreply.github.com> Date: Wed, 27 Mar 2024 17:47:21 +0530 Subject: [PATCH 9/9] code cleanup --- .../io/joern/gosrc2cpg/astcreation/AstCreator.scala | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstCreator.scala b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstCreator.scala index 9d85ccd77435..4a28d338dc71 100644 --- a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstCreator.scala +++ b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/astcreation/AstCreator.scala @@ -42,8 +42,6 @@ class AstCreator( with AstGenNodeBuilder[AstCreator] { protected val logger: Logger = LoggerFactory.getLogger(classOf[AstCreator]) - protected val tempParserNodeCacheFilePath: Option[Path] = - tmpDir.map(dir => Paths.get(dir.pathAsString, s"node-cache${UUID.randomUUID().toString}")) protected val tempAliasToNameSpaceMappingFilePath: Option[Path] = tmpDir.map(dir => Paths.get(dir.pathAsString, s"alias-cache${UUID.randomUUID().toString}")) protected val methodAstParentStack: Stack[NewNode] = new Stack() @@ -129,10 +127,6 @@ class AstCreator( } def cacheSerializeAndStore(): Unit = { -// tempParserNodeCacheFilePath.map(file => { -// Files.write(file, serialise(parserNodeCache)) -// parserNodeCache.clear() -// }) tempAliasToNameSpaceMappingFilePath.map(file => { Files.write(file, serialise(aliasToNameSpaceMapping)) aliasToNameSpaceMapping.clear() @@ -141,10 +135,6 @@ class AstCreator( } def loadCacheToProcess(): Unit = { -// tempParserNodeCacheFilePath.map(file => { -// val deserialised = deserialise(Files.readAllBytes(file)) -// parserNodeCache = deserialised.asInstanceOf[mutable.TreeMap[Long, ParserNodeInfo]] -// }) tempAliasToNameSpaceMappingFilePath.map(file => { val deserialised = deserialise(Files.readAllBytes(file)) aliasToNameSpaceMapping = deserialised.asInstanceOf[mutable.Map[String, String]]