-
Notifications
You must be signed in to change notification settings - Fork 296
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[gosrc2cpg] Memory cleanup #4407
Changes from 10 commits
47ceb72
e0004fe
dd520f7
8c913a4
987325f
204ee2f
f488fc2
359788b
0b5e6c5
013a13d
c18ab75
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,27 +1,32 @@ | ||
package io.joern.gosrc2cpg.astcreation | ||
|
||
import better.files.File | ||
import io.joern.gosrc2cpg.datastructures.GoGlobal | ||
import io.joern.gosrc2cpg.model.GoModHelper | ||
import io.joern.gosrc2cpg.parser.ParserAst.* | ||
import io.joern.gosrc2cpg.parser.{ParserKeys, ParserNodeInfo} | ||
import io.joern.gosrc2cpg.parser.{GoAstJsonParser, ParserKeys, ParserNodeInfo} | ||
import io.joern.x2cpg.astgen.{AstGenNodeBuilder, ParserResult} | ||
import io.joern.x2cpg.datastructures.Scope | ||
import io.joern.x2cpg.datastructures.Stack.* | ||
import io.joern.x2cpg.utils.NodeBuilders.newModifierNode | ||
import io.joern.x2cpg.{Ast, AstCreatorBase, ValidationMode, AstNodeBuilder as X2CpgAstNodeBuilder} | ||
import io.joern.x2cpg.{Ast, AstCreatorBase, ValidationMode} | ||
import io.shiftleft.codepropertygraph.generated.nodes.NewNode | ||
import io.shiftleft.codepropertygraph.generated.{ModifierTypes, NodeTypes} | ||
import org.slf4j.{Logger, LoggerFactory} | ||
import overflowdb.BatchedUpdate.DiffGraphBuilder | ||
import ujson.Value | ||
|
||
import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} | ||
import java.nio.file.{Files, Path, Paths} | ||
import java.util.UUID | ||
import scala.collection.mutable | ||
|
||
class AstCreator( | ||
val jsonAstFilePath: String, | ||
val relPathFileName: String, | ||
val parserResult: ParserResult, | ||
val goMod: GoModHelper, | ||
val goGlobal: GoGlobal | ||
val goGlobal: GoGlobal, | ||
tmpDir: Option[File] = None | ||
)(implicit withSchemaValidation: ValidationMode) | ||
extends AstCreatorBase(relPathFileName) | ||
with AstCreatorHelper | ||
|
@@ -36,23 +41,30 @@ class AstCreator( | |
with AstForLambdaCreator | ||
with AstGenNodeBuilder[AstCreator] { | ||
|
||
protected val logger: Logger = LoggerFactory.getLogger(classOf[AstCreator]) | ||
protected val logger: Logger = LoggerFactory.getLogger(classOf[AstCreator]) | ||
protected val tempAliasToNameSpaceMappingFilePath: Option[Path] = | ||
tmpDir.map(dir => Paths.get(dir.pathAsString, s"alias-cache${UUID.randomUUID().toString}")) | ||
protected val methodAstParentStack: Stack[NewNode] = new Stack() | ||
protected val scope: Scope[String, (NewNode, String), NewNode] = new Scope() | ||
protected val aliasToNameSpaceMapping: mutable.Map[String, String] = mutable.Map.empty | ||
protected val lineNumberMapping: Map[Int, String] = positionLookupTables(parserResult.fileContent) | ||
protected val declaredPackageName = parserResult.json(ParserKeys.Name)(ParserKeys.Name).str | ||
protected val fullyQualifiedPackage = | ||
goMod.getNameSpace(parserResult.fullPath, declaredPackageName) | ||
protected var aliasToNameSpaceMapping: mutable.Map[String, String] = mutable.Map.empty | ||
protected var parserNodeCache = mutable.TreeMap[Long, ParserNodeInfo]() | ||
protected var lineNumberMapping: Map[Int, String] = Map.empty | ||
protected var declaredPackageName = "" | ||
protected var fullyQualifiedPackage = "" | ||
protected var fileName = "" | ||
|
||
var originalFilePath = "" | ||
|
||
override def createAst(): DiffGraphBuilder = { | ||
val parserResult = init() | ||
loadCacheToProcess() | ||
val rootNode = createParserNodeInfo(parserResult.json) | ||
val ast = astForTranslationUnit(rootNode) | ||
val ast = astForTranslationUnit(rootNode, parserResult) | ||
Ast.storeInDiffGraph(ast, diffGraph) | ||
diffGraph | ||
} | ||
|
||
private def astForTranslationUnit(rootNode: ParserNodeInfo): Ast = { | ||
private def astForTranslationUnit(rootNode: ParserNodeInfo, parserResult: ParserResult): Ast = { | ||
val name = s"$fullyQualifiedPackage.${parserResult.filename}" | ||
val fullName = s"$relPathFileName:$name" | ||
val fakeGlobalMethodForFile = | ||
|
@@ -103,4 +115,70 @@ class AstCreator( | |
protected def astForNode(json: Value): Seq[Ast] = { | ||
astForNode(createParserNodeInfo(json)) | ||
} | ||
|
||
def init(): ParserResult = { | ||
val parserResult = GoAstJsonParser.readFile(Paths.get(jsonAstFilePath)) | ||
lineNumberMapping = positionLookupTables(parserResult) | ||
declaredPackageName = parserResult.json(ParserKeys.Name)(ParserKeys.Name).str | ||
fullyQualifiedPackage = goMod.getNameSpace(parserResult.fullPath, declaredPackageName) | ||
fileName = parserResult.filename | ||
originalFilePath = parserResult.fullPath | ||
parserResult | ||
} | ||
|
||
def cacheSerializeAndStore(): Unit = { | ||
tempAliasToNameSpaceMappingFilePath.map(file => { | ||
Files.write(file, serialise(aliasToNameSpaceMapping)) | ||
aliasToNameSpaceMapping.clear() | ||
}) | ||
lineNumberMapping = Map.empty | ||
} | ||
|
||
def loadCacheToProcess(): Unit = { | ||
tempAliasToNameSpaceMappingFilePath.map(file => { | ||
val deserialised = deserialise(Files.readAllBytes(file)) | ||
aliasToNameSpaceMapping = deserialised.asInstanceOf[mutable.Map[String, String]] | ||
}) | ||
} | ||
|
||
def cleanup(): Unit = { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would rather suggest using |
||
methodAstParentStack.clear() | ||
aliasToNameSpaceMapping.clear() | ||
lineNumberMapping = Map.empty | ||
parserNodeCache.clear() | ||
tempAliasToNameSpaceMappingFilePath.map(file => { | ||
if (Files.exists(file)) { | ||
Files.delete(file) | ||
} | ||
}) | ||
} | ||
|
||
/** Serialise any object to byte array to be passed through queue | ||
* | ||
* @param value | ||
* \- Any object to passed through queue as a result item. | ||
* @return | ||
* \- Object serialised into ByteArray | ||
*/ | ||
private def serialise(value: Any): Array[Byte] = { | ||
val stream: ByteArrayOutputStream = new ByteArrayOutputStream() | ||
val oos = new ObjectOutputStream(stream) | ||
oos.writeObject(value) | ||
oos.close() | ||
stream.toByteArray | ||
} | ||
|
||
/** Deserialize the ByteArray back to Object. | ||
* | ||
* @param bytes | ||
* \- Array[Byte] to be deserialized | ||
* @return | ||
* \- Deserialized object | ||
*/ | ||
private def deserialise(bytes: Array[Byte]): Any = { | ||
val ois = new ObjectInputStream(new ByteArrayInputStream(bytes)) | ||
val value = ois.readObject | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
We have a whole talk at BSides about exploiting Java deserialization https://www.youtube.com/watch?v=8qo0ZGK0tt0 https://msgpack.org is on the classpath from ODB, rather serialize data using that format. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess serialising the alias to namespace mapping is anyway shouldn't make much of the difference. I need to try removing this. |
||
ois.close() | ||
value | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Line 49-56 is going to become technical debt and hard to maintain. Can we try to find a solution that avoids so much global mutable state in this class?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Reading the code below, perhaps you want to look at what happens in
AstSummaryVisitor
inRubySrc2Cpg
andCSharpSrc2Cpg
for how to do a pre-parse without all this mutable state?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
At the very least, we need to move these variables to another trait, as it's becoming very difficult to follow this file.