Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[gosrc2cpg] Memory cleanup #4407

Closed
wants to merge 11 commits into from
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,25 @@ class GoSrc2Cpg(goGlobalOption: Option[GoGlobal] = Option(GoGlobal())) extends X
)
goGlobal.mainModule = goMod.flatMap(modHelper => modHelper.getModMetaData().map(mod => mod.module.name))
val astCreators =
new MethodAndTypeCacheBuilderPass(Some(cpg), astGenResult.parsedFiles, config, goMod.get, goGlobal)
new MethodAndTypeCacheBuilderPass(
Some(cpg),
astGenResult.parsedFiles,
config,
goMod.get,
goGlobal,
Some(tmpDir)
)
.process()
if (config.fetchDependencies) {
goGlobal.processingDependencies = true
new DownloadDependenciesPass(goMod.get, goGlobal, config).process()
goGlobal.processingDependencies = false
}
new AstCreationPass(cpg, astCreators, report).createAndApply()
if goGlobal.pkgLevelVarAndConstantAstMap.size() > 0 then
if (goGlobal.pkgLevelVarAndConstantAstMap.size() > 0) {
new PackageCtorCreationPass(cpg, config, goGlobal).createAndApply()
goGlobal.pkgLevelVarAndConstantAstMap.clear()
}
report.print()
})
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,27 +1,32 @@
package io.joern.gosrc2cpg.astcreation

import better.files.File
import io.joern.gosrc2cpg.datastructures.GoGlobal
import io.joern.gosrc2cpg.model.GoModHelper
import io.joern.gosrc2cpg.parser.ParserAst.*
import io.joern.gosrc2cpg.parser.{ParserKeys, ParserNodeInfo}
import io.joern.gosrc2cpg.parser.{GoAstJsonParser, ParserKeys, ParserNodeInfo}
import io.joern.x2cpg.astgen.{AstGenNodeBuilder, ParserResult}
import io.joern.x2cpg.datastructures.Scope
import io.joern.x2cpg.datastructures.Stack.*
import io.joern.x2cpg.utils.NodeBuilders.newModifierNode
import io.joern.x2cpg.{Ast, AstCreatorBase, ValidationMode, AstNodeBuilder as X2CpgAstNodeBuilder}
import io.joern.x2cpg.{Ast, AstCreatorBase, ValidationMode}
import io.shiftleft.codepropertygraph.generated.nodes.NewNode
import io.shiftleft.codepropertygraph.generated.{ModifierTypes, NodeTypes}
import org.slf4j.{Logger, LoggerFactory}
import overflowdb.BatchedUpdate.DiffGraphBuilder
import ujson.Value

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}
import java.nio.file.{Files, Path, Paths}
import java.util.UUID
import scala.collection.mutable

class AstCreator(
val jsonAstFilePath: String,
val relPathFileName: String,
val parserResult: ParserResult,
val goMod: GoModHelper,
val goGlobal: GoGlobal
val goGlobal: GoGlobal,
tmpDir: Option[File] = None
)(implicit withSchemaValidation: ValidationMode)
extends AstCreatorBase(relPathFileName)
with AstCreatorHelper
Expand All @@ -36,23 +41,30 @@ class AstCreator(
with AstForLambdaCreator
with AstGenNodeBuilder[AstCreator] {

protected val logger: Logger = LoggerFactory.getLogger(classOf[AstCreator])
protected val logger: Logger = LoggerFactory.getLogger(classOf[AstCreator])
protected val tempAliasToNameSpaceMappingFilePath: Option[Path] =
tmpDir.map(dir => Paths.get(dir.pathAsString, s"alias-cache${UUID.randomUUID().toString}"))
protected val methodAstParentStack: Stack[NewNode] = new Stack()
protected val scope: Scope[String, (NewNode, String), NewNode] = new Scope()
protected val aliasToNameSpaceMapping: mutable.Map[String, String] = mutable.Map.empty
protected val lineNumberMapping: Map[Int, String] = positionLookupTables(parserResult.fileContent)
protected val declaredPackageName = parserResult.json(ParserKeys.Name)(ParserKeys.Name).str
protected val fullyQualifiedPackage =
goMod.getNameSpace(parserResult.fullPath, declaredPackageName)
protected var aliasToNameSpaceMapping: mutable.Map[String, String] = mutable.Map.empty
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Line 49-56 is going to become technical debt and hard to maintain. Can we try to find a solution that avoids so much global mutable state in this class?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reading the code below, perhaps you want to look at what happens in AstSummaryVisitor in RubySrc2Cpg and CSharpSrc2Cpg for how to do a pre-parse without all this mutable state?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At the very least, we need to move these variables to another trait, as it's becoming very difficult to follow this file.

protected var parserNodeCache = mutable.TreeMap[Long, ParserNodeInfo]()
protected var lineNumberMapping: Map[Int, String] = Map.empty
protected var declaredPackageName = ""
protected var fullyQualifiedPackage = ""
protected var fileName = ""

var originalFilePath = ""

override def createAst(): DiffGraphBuilder = {
val parserResult = init()
loadCacheToProcess()
val rootNode = createParserNodeInfo(parserResult.json)
val ast = astForTranslationUnit(rootNode)
val ast = astForTranslationUnit(rootNode, parserResult)
Ast.storeInDiffGraph(ast, diffGraph)
diffGraph
}

private def astForTranslationUnit(rootNode: ParserNodeInfo): Ast = {
private def astForTranslationUnit(rootNode: ParserNodeInfo, parserResult: ParserResult): Ast = {
val name = s"$fullyQualifiedPackage.${parserResult.filename}"
val fullName = s"$relPathFileName:$name"
val fakeGlobalMethodForFile =
Expand Down Expand Up @@ -103,4 +115,70 @@ class AstCreator(
protected def astForNode(json: Value): Seq[Ast] = {
astForNode(createParserNodeInfo(json))
}

def init(): ParserResult = {
val parserResult = GoAstJsonParser.readFile(Paths.get(jsonAstFilePath))
lineNumberMapping = positionLookupTables(parserResult)
declaredPackageName = parserResult.json(ParserKeys.Name)(ParserKeys.Name).str
fullyQualifiedPackage = goMod.getNameSpace(parserResult.fullPath, declaredPackageName)
fileName = parserResult.filename
originalFilePath = parserResult.fullPath
parserResult
}

def cacheSerializeAndStore(): Unit = {
tempAliasToNameSpaceMappingFilePath.map(file => {
Files.write(file, serialise(aliasToNameSpaceMapping))
aliasToNameSpaceMapping.clear()
})
lineNumberMapping = Map.empty
}

def loadCacheToProcess(): Unit = {
tempAliasToNameSpaceMappingFilePath.map(file => {
val deserialised = deserialise(Files.readAllBytes(file))
aliasToNameSpaceMapping = deserialised.asInstanceOf[mutable.Map[String, String]]
})
}

def cleanup(): Unit = {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would rather suggest using close and having AstCreator extend AutoCloseable, this can enforce Using.resource(astCreator) => which guarantees this global mutable state is at least closed otherwise we may sit with an implicit protocol which may not be followed and result in a memory leak.

methodAstParentStack.clear()
aliasToNameSpaceMapping.clear()
lineNumberMapping = Map.empty
parserNodeCache.clear()
tempAliasToNameSpaceMappingFilePath.map(file => {
if (Files.exists(file)) {
Files.delete(file)
}
})
}

/** Serialise any object to byte array to be passed through queue
*
* @param value
* \- Any object to passed through queue as a result item.
* @return
* \- Object serialised into ByteArray
*/
private def serialise(value: Any): Array[Byte] = {
val stream: ByteArrayOutputStream = new ByteArrayOutputStream()
val oos = new ObjectOutputStream(stream)
oos.writeObject(value)
oos.close()
stream.toByteArray
}

/** Deserialize the ByteArray back to Object.
*
* @param bytes
* \- Array[Byte] to be deserialized
* @return
* \- Deserialized object
*/
private def deserialise(bytes: Array[Byte]): Any = {
val ois = new ObjectInputStream(new ByteArrayInputStream(bytes))
val value = ois.readObject
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

readObject is an RCE waiting to happen, it is an extremely dangerous function.

We have a whole talk at BSides about exploiting Java deserialization https://www.youtube.com/watch?v=8qo0ZGK0tt0

https://msgpack.org is on the classpath from ODB, rather serialize data using that format.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess serialising the alias to namespace mapping is anyway shouldn't make much of the difference. I need to try removing this.

ois.close()
value
}
}
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
package io.joern.gosrc2cpg.astcreation

import io.joern.gosrc2cpg.datastructures.GoGlobal
import io.joern.gosrc2cpg.parser.ParserAst.*
import io.joern.gosrc2cpg.parser.{ParserAst, ParserKeys, ParserNodeInfo}
import io.joern.x2cpg.astgen.ParserResult
import io.joern.x2cpg.utils.NodeBuilders.newModifierNode
import io.joern.x2cpg.{Ast, Defines as XDefines}
import io.shiftleft.codepropertygraph.generated.nodes.{NewModifier, NewNode}
import io.shiftleft.codepropertygraph.generated.{EvaluationStrategies, ModifierTypes, PropertyNames}
import org.apache.commons.lang3.StringUtils
import ujson.Value

import scala.collection.mutable
Expand All @@ -16,8 +15,6 @@ import scala.util.{Failure, Success, Try}

trait AstCreatorHelper { this: AstCreator =>

private val parserNodeCache = mutable.TreeMap[Long, ParserNodeInfo]()

protected def createParserNodeInfo(json: Value): ParserNodeInfo = {
Try(json(ParserKeys.NodeReferenceId).num.toLong) match
case Failure(_) =>
Expand Down Expand Up @@ -98,7 +95,6 @@ trait AstCreatorHelper { this: AstCreator =>
val colNumber = column(node).get - 1
val lineEndNumber = lineEndNo(node).get
val colEndNumber = columnEndNo(node).get - 1

if (lineNumber == lineEndNumber) {
lineNumberMapping(lineNumber).substring(colNumber, colEndNumber)
} else {
Expand All @@ -121,14 +117,20 @@ trait AstCreatorHelper { this: AstCreator =>

protected def columnEndNo(node: Value): Option[Integer] = Try(node(ParserKeys.NodeColEndNo).num).toOption.map(_.toInt)

protected def positionLookupTables(source: String): Map[Int, String] = {
source
.split("\n")
.zipWithIndex
.map { case (sourceLine, lineNumber) =>
(lineNumber + 1, sourceLine)
}
.toMap
protected def positionLookupTables(parserResult: ParserResult): Map[Int, String] = {
if (!goGlobal.processingDependencies) {
val map = parserResult.fileContent
.split("\n")
.zipWithIndex
.map { case (sourceLine, lineNumber) =>
(lineNumber + 1, sourceLine)
}
.toMap
parserResult.fileContent = ""
map
} else {
Map[Int, String]()
}
}

protected def resolveAliasToFullName(alias: String): String = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ trait AstForFunctionsCreator(implicit withSchemaValidation: ValidationMode) { th
EvaluationStrategies.BY_SHARING
)
case x =>
logger.warn(s"Unhandled class ${x.getClass} under getReceiverInfo! file -> ${parserResult.fullPath}")
logger.warn(s"Unhandled class ${x.getClass} under getReceiverInfo! file -> $originalFilePath")
("", "")
(recName, typeFullName, evaluationStrategy, recnode)
})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ trait AstForLambdaCreator(implicit withSchemaValidation: ValidationMode) { this:
val lambdaName = nextClosureName()
// if the top of the stack function is fake file level method node (which is checked with filename) then use the fully qualified package name as base fullname
val baseFullName = methodAstParentStack
.collectFirst({ case m: NewMethod if !m.fullName.endsWith(parserResult.filename) => m.fullName })
.collectFirst({ case m: NewMethod if !m.fullName.endsWith(fileName) => m.fullName })
.getOrElse(fullyQualifiedPackage)
val fullName = s"$baseFullName.$lambdaName"
val LambdaFunctionMetaData(signature, returnTypeStr, methodReturn, params, genericTypeMethodMap) =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ trait AstForMethodCallExpressionCreator(implicit withSchemaValidation: Validatio
(Some(xNode), funcDetails.json(ParserKeys.Sel)(ParserKeys.Name).str)
case x =>
logger.warn(
s"Unhandled class ${x.getClass} under astForCallExpression! file -> ${parserResult.fullPath} -> Line no -> ${funcDetails.lineNumber.get}"
s"Unhandled class ${x.getClass} under astForCallExpression! file -> $originalFilePath -> Line no -> ${funcDetails.lineNumber.get}"
)
(None, "")
callMethodFullNameTypeFullNameAndSignature(methodName, aliasOpt)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ import scala.util.Try
trait CacheBuilder(implicit withSchemaValidation: ValidationMode) { this: AstCreator =>

def buildCache(cpgOpt: Option[Cpg]): DiffGraphBuilder = {
val diffGraph = new DiffGraphBuilder
val diffGraph = new DiffGraphBuilder
val parserResult = init()
try {
if (checkIfGivenDependencyPackageCanBeProcessed()) {
cpgOpt.map { _ =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import io.joern.x2cpg.Ast
import org.slf4j.LoggerFactory

import java.util.concurrent.{ConcurrentHashMap, ConcurrentSkipListSet}
class GoGlobal {
class GoGlobal(val testflag: Boolean = false) {
private val logger = LoggerFactory.getLogger(getClass)
var mainModule: Option[String] = None
var processingDependencies = false
Expand Down Expand Up @@ -139,6 +139,13 @@ class GoGlobal {
def checkForDependencyFlags(name: String): Boolean = {
!processingDependencies || processingDependencies && name.headOption.exists(_.isUpper)
}
def firstCleanup(): Unit = {
if (!testflag) {
aliasToNameSpaceMapping.clear()
lambdaSignatureToLambdaTypeMap.clear()
nameSpaceMetaDataMap.clear()
}
}
}

case class NameSpaceMetaData(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
package io.joern.gosrc2cpg.passes

import io.joern.gosrc2cpg.Config
import io.joern.gosrc2cpg.astcreation.AstCreator
import io.joern.gosrc2cpg.datastructures.GoGlobal
import io.joern.gosrc2cpg.parser.GoAstJsonParser
import io.joern.x2cpg.astgen.ParserResult
import io.joern.x2cpg.SourceFiles
import io.joern.x2cpg.utils.{Report, TimeUtils}
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.passes.ConcurrentWriterCpgPass
Expand All @@ -21,17 +16,18 @@ class AstCreationPass(cpg: Cpg, astCreators: Seq[AstCreator], report: Report)
override def generateParts(): Array[AstCreator] = astCreators.toArray
override def runOnPart(diffGraph: DiffGraphBuilder, astCreator: AstCreator): Unit = {
val ((gotCpg, filename), duration) = TimeUtils.time {
val fileLOC = IOUtils.readLinesInFile(Paths.get(astCreator.parserResult.fullPath)).size
val fileLOC = IOUtils.readLinesInFile(Paths.get(astCreator.originalFilePath)).size
report.addReportInfo(astCreator.relPathFileName, fileLOC, parsed = true)
Try {
val localDiff = astCreator.createAst()
astCreator.cleanup()
diffGraph.absorb(localDiff)
} match {
case Failure(exception) =>
logger.warn(s"Failed to generate a CPG for: '${astCreator.parserResult.fullPath}'", exception)
logger.warn(s"Failed to generate a CPG for: '${astCreator.originalFilePath}'", exception)
(false, astCreator.relPathFileName)
case Success(_) =>
logger.info(s"Generated a CPG for: '${astCreator.parserResult.fullPath}'")
logger.info(s"Generated a CPG for: '${astCreator.originalFilePath}'")
(true, astCreator.relPathFileName)
}
}
Expand Down
Loading