Skip to content

Commit

Permalink
[Gosrc2cpg] download dependencies and caching improvements (#4352)
Browse files Browse the repository at this point in the history
* Some code refactor and optimisations

1. Removed some unwanted brackets
2. Parallelised downloading of the dependencies and processing them.

* Partial changes to add failing unit tests.

* Unit tests to cover expected situations to lower the memory footprint

* minor changes

* Initial download dependency optimisation

1. Record which dependencies are getting used as well as which
subpackages are getting used.
2. Only download those dependencies which are directly getting imported
or used.

* handling for used dependencies

1. Handling for the downloading of dependencies only if those are
getting used in the main code.
2. While doing optimisation, came across a bug where if more than one
packages with the same name created in the code. Then it was creating
package level `TypeDecl` and `NamspaceBlock` only once. Introduced few
unit test which covers these use cases as well as made the respective
handling for the same.

* Fixing one more unit test from first

* changes to not cache unwanted imports

1. Made changes to not cache unwanted imports from dependency source
code.
2. Made changes to cache only used imports in source code with all the
non aliased imports to global cache and aliased ones in the context of
file `AstCreator`.
3. Caching only those packages whose package name is different from
enclosing folder name inside global cache.

* few test corrections as per updated changes

* minor updates

* Optimisation to cache lamdbda type info

Optimisation to cache lamdbda type info

* optimisations to store method meta data along with strcut type metata

1. Changed the storage structure to minimize the amount of data being
stored for method meta data cache and struct type members type
information.
2. Made respective changes to fix all the breaking unit tests.

* not caching namespaces having starting letter in small case

* Fix for issue related to package TypeDecl

1. While making the optimisations, while processing imports if the main
source code package is being imported and processed. In some cases
TypeDecl for package level global variables wasn't getting created.
2. Identified the issue and made a fix for the same.

* Ignoring few unit tests which needs to be updated with improvements.

* review comment fixes
  • Loading branch information
pandurangpatil authored Mar 26, 2024
1 parent 9b86e80 commit 5106dd3
Show file tree
Hide file tree
Showing 16 changed files with 760 additions and 216 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,31 +22,42 @@ import io.shiftleft.codepropertygraph.generated.Languages
import java.nio.file.Paths
import scala.util.Try

class GoSrc2Cpg extends X2CpgFrontend[Config] {
class GoSrc2Cpg(goGlobalOption: Option[GoGlobal] = Option(GoGlobal())) extends X2CpgFrontend[Config] {
private val report: Report = new Report()

private var goMod: Option[GoModHelper] = None
def createCpg(config: Config): Try[Cpg] = {
withNewEmptyCpg(config.outputPath, config) { (cpg, config) =>
File.usingTemporaryDirectory("gosrc2cpgOut") { tmpDir =>
val goGlobal = GoGlobal()
new MetaDataPass(cpg, Languages.GOLANG, config.inputPath).createAndApply()
val astGenResult = new AstGenRunner(config).execute(tmpDir).asInstanceOf[GoAstGenRunnerResult]
val goMod = new GoModHelper(
Some(config),
astGenResult.parsedModFile.flatMap(modFile => GoAstJsonParser.readModFile(Paths.get(modFile)).map(x => x))
)
if (config.fetchDependencies) {
goGlobal.processingDependencies = true
new DownloadDependenciesPass(goMod, goGlobal, config).process()
goGlobal.processingDependencies = false
}
val astCreators =
new MethodAndTypeCacheBuilderPass(Some(cpg), astGenResult.parsedFiles, config, goMod, goGlobal).process()
new AstCreationPass(cpg, astCreators, report).createAndApply()
if goGlobal.pkgLevelVarAndConstantAstMap.size() > 0 then
new PackageCtorCreationPass(cpg, config, goGlobal).createAndApply()
report.print()
goGlobalOption
.orElse(Option(GoGlobal()))
.foreach(goGlobal => {
new MetaDataPass(cpg, Languages.GOLANG, config.inputPath).createAndApply()
val astGenResult = new AstGenRunner(config).execute(tmpDir).asInstanceOf[GoAstGenRunnerResult]
goMod = Some(
new GoModHelper(
Some(config),
astGenResult.parsedModFile
.flatMap(modFile => GoAstJsonParser.readModFile(Paths.get(modFile)).map(x => x))
)
)
goGlobal.mainModule = goMod.flatMap(modHelper => modHelper.getModMetaData().map(mod => mod.module.name))
val astCreators =
new MethodAndTypeCacheBuilderPass(Some(cpg), astGenResult.parsedFiles, config, goMod.get, goGlobal)
.process()
if (config.fetchDependencies) {
goGlobal.processingDependencies = true
new DownloadDependenciesPass(goMod.get, goGlobal, config).process()
goGlobal.processingDependencies = false
}
new AstCreationPass(cpg, astCreators, report).createAndApply()
if goGlobal.pkgLevelVarAndConstantAstMap.size() > 0 then
new PackageCtorCreationPass(cpg, config, goGlobal).createAndApply()
report.print()
})
}
}
}

def getGoModHelper: GoModHelper = goMod.get
}
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@ trait AstCreatorHelper { this: AstCreator =>
.toMap
}

protected def resolveAliasToFullName(alias: String, typeOrMethodName: String): String = {
s"${aliasToNameSpaceMapping.getOrElse(alias, goGlobal.aliasToNameSpaceMapping.getOrDefault(alias, s"${XDefines.Unknown}.<$alias>"))}.$typeOrMethodName"
protected def resolveAliasToFullName(alias: String): String = {
s"${aliasToNameSpaceMapping.getOrElse(alias, goGlobal.aliasToNameSpaceMapping.getOrDefault(alias, s"${XDefines.Unknown}.<$alias>"))}"
}
protected def generateTypeFullName(
typeName: Option[String] = None,
Expand All @@ -156,7 +156,7 @@ trait AstCreatorHelper { this: AstCreator =>
Defines.primitiveTypeMap.getOrElse(typname, s"$fullyQualifiedPackage.$typname")
}
case Some(alias) =>
resolveAliasToFullName(alias, typname)
s"${resolveAliasToFullName(alias)}.$typname"

}
private def internalTypeFullName(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,12 @@ trait AstForGenDeclarationCreator(implicit withSchemaValidation: ValidationMode)
val localParserNode = createParserNodeInfo(parserNode)
if globalStatements then {
val variableName = localParserNode.json(ParserKeys.Name).str
if (checkForDependencyFlags(variableName)) {
if (goGlobal.checkForDependencyFlags(variableName)) {
// While processing the dependencies code ignoring package level global variables starting with lower case letter
// as these variables are only accessible within package. So those will not be referred from main source code.
goGlobal.recordStructTypeMemberType(
s"$fullyQualifiedPackage${Defines.dot}$variableName",
goGlobal.recordStructTypeMemberTypeInfo(
fullyQualifiedPackage,
variableName,
typeFullName.getOrElse(Defines.anyTypeName)
)
astForGlobalVarAndConstants(typeFullName.getOrElse(Defines.anyTypeName), localParserNode)
Expand All @@ -94,8 +95,8 @@ trait AstForGenDeclarationCreator(implicit withSchemaValidation: ValidationMode)
val rhsTypeFullName = typeFullName.getOrElse(getTypeFullNameFromAstNode(rhsAst))
if (globalStatements) {
val variableName = lhsParserNode.json(ParserKeys.Name).str
if (checkForDependencyFlags(variableName)) {
goGlobal.recordStructTypeMemberType(s"$fullyQualifiedPackage${Defines.dot}$variableName", rhsTypeFullName)
if (goGlobal.checkForDependencyFlags(variableName)) {
goGlobal.recordStructTypeMemberTypeInfo(fullyQualifiedPackage, variableName, rhsTypeFullName)
astForGlobalVarAndConstants(rhsTypeFullName, lhsParserNode, Some(rhsAst))
}
(Ast(), Ast())
Expand Down Expand Up @@ -160,14 +161,4 @@ trait AstForGenDeclarationCreator(implicit withSchemaValidation: ValidationMode)
Ast()
}
}

/** While processing the dependencies code ignoring package level global variables, constants, types, and functions
* starting with lower case letter as those are only accessible within package. So those will not be referred from
* main source code.
* @param name
* @return
*/
protected def checkForDependencyFlags(name: String): Boolean = {
!goGlobal.processingDependencies || goGlobal.processingDependencies && name.headOption.exists(_.isUpper)
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package io.joern.gosrc2cpg.astcreation

import io.joern.gosrc2cpg.datastructures.{LambdaTypeInfo, MethodCacheMetaData}
import io.joern.gosrc2cpg.parser.{ParserKeys, ParserNodeInfo}
import io.joern.x2cpg.datastructures.Stack.StackWrapper
import io.joern.x2cpg.utils.NodeBuilders.newModifierNode
Expand All @@ -8,6 +9,8 @@ import io.shiftleft.codepropertygraph.generated.nodes.{NewMethod, NewMethodRetur
import io.shiftleft.codepropertygraph.generated.{ModifierTypes, NodeTypes}
import ujson.Value

import scala.jdk.CollectionConverters.*

trait AstForLambdaCreator(implicit withSchemaValidation: ValidationMode) { this: AstCreator =>

def astForFuncLiteral(funcLiteral: ParserNodeInfo): Seq[Ast] = {
Expand All @@ -17,9 +20,8 @@ trait AstForLambdaCreator(implicit withSchemaValidation: ValidationMode) { this:
.collectFirst({ case m: NewMethod if !m.fullName.endsWith(parserResult.filename) => m.fullName })
.getOrElse(fullyQualifiedPackage)
val fullName = s"$baseFullName.$lambdaName"
val (signature, returnTypeStr, methodReturn, params, genericTypeMethodMap) = generateLambdaSignature(
createParserNodeInfo(funcLiteral.json(ParserKeys.Type))
)
val LambdaFunctionMetaData(signature, returnTypeStr, methodReturn, params, genericTypeMethodMap) =
generateLambdaSignature(createParserNodeInfo(funcLiteral.json(ParserKeys.Type)))
val methodNode_ = methodNode(funcLiteral, lambdaName, funcLiteral.code, fullName, Some(signature), relPathFileName)
methodAstParentStack.push(methodNode_)
scope.pushNewScope(methodNode_)
Expand All @@ -40,7 +42,7 @@ trait AstForLambdaCreator(implicit withSchemaValidation: ValidationMode) { this:
typeDeclNode_.astParentType(NodeTypes.TYPE_DECL).astParentFullName(fullyQualifiedPackage)
else typeDeclNode_.astParentType(NodeTypes.METHOD).astParentFullName(baseFullName)
val structTypes = Option(goGlobal.lambdaSignatureToLambdaTypeMap.get(signature)) match {
case Some(types) => types.map(_._1)
case Some(types) => types.asScala.map(_.lambdaStructTypeFullName)
case None => Seq.empty
}
typeDeclNode_.inheritsFromTypeFullName(structTypes)
Expand All @@ -50,13 +52,11 @@ trait AstForLambdaCreator(implicit withSchemaValidation: ValidationMode) { this:
methodNode_.astParentFullName(fullName)
Ast.storeInDiffGraph(astForMethod, diffGraph)
}
goGlobal.recordFullNameToReturnType(fullName, returnTypeStr, signature)
goGlobal.recordMethodMetadata(baseFullName, lambdaName, MethodCacheMetaData(returnTypeStr, signature))
Seq(Ast(methodRefNode(funcLiteral, funcLiteral.code, fullName, fullName)))
}

protected def generateLambdaSignature(
funcType: ParserNodeInfo
): (String, String, NewMethodReturn, Value, Map[String, List[String]]) = {
protected def generateLambdaSignature(funcType: ParserNodeInfo): LambdaFunctionMetaData = {
val genericTypeMethodMap: Map[String, List[String]] = Map()
// TODO: While handling the tuple return type we need to handle it here as well.
val (returnTypeStr, returnTypeInfo) =
Expand All @@ -68,6 +68,14 @@ trait AstForLambdaCreator(implicit withSchemaValidation: ValidationMode) { this:
val paramSignature = parameterSignature(params, genericTypeMethodMap)
val signature =
s"${XDefines.ClosurePrefix}($paramSignature)$returnTypeStr"
(signature, returnTypeStr, methodReturn, params, genericTypeMethodMap)
LambdaFunctionMetaData(signature, returnTypeStr, methodReturn, params, genericTypeMethodMap)
}
}

case class LambdaFunctionMetaData(
signature: String,
returnTypeStr: String,
methodReturn: NewMethodReturn,
params: Value,
genericTypeMethodMap: Map[String, List[String]]
)
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package io.joern.gosrc2cpg.astcreation

import io.joern.gosrc2cpg.datastructures.MethodCacheMetaData
import io.joern.gosrc2cpg.parser.ParserAst.*
import io.joern.gosrc2cpg.parser.{ParserKeys, ParserNodeInfo}
import io.joern.x2cpg.{Ast, ValidationMode, Defines as XDefines}
Expand Down Expand Up @@ -94,26 +95,22 @@ trait AstForMethodCallExpressionCreator(implicit withSchemaValidation: Validatio
// Then we are assuming that the given function is defined inside same package as that of current file's package.
// This assumption will be invalid when another package is imported with alias "."
val methodFullName = s"$fullyQualifiedPackage.$methodName"
val (returnTypeFullNameCache, signatureCache) =
goGlobal.methodFullNameReturnTypeMap
.getOrDefault(methodFullName, (Defines.anyTypeName, s"$methodFullName()"))
val methodInfo = goGlobal
.getMethodMetadata(fullyQualifiedPackage, methodName)
.getOrElse(MethodCacheMetaData(Defines.anyTypeName, s"$methodFullName()"))
val (signature, fullName, returnTypeFullName) =
Defines.builtinFunctions.getOrElse(methodName, (signatureCache, methodFullName, returnTypeFullNameCache))
Defines.builtinFunctions.getOrElse(methodName, (methodInfo.signature, methodFullName, methodInfo.returnType))
val probableLambdaTypeFullName = scope.lookupVariable(methodName) match
case Some((_, lambdaTypeFullName)) => Some(lambdaTypeFullName)
case _ =>
Option(goGlobal.structTypeMemberTypeMapping.get(methodFullName)) match
case Some(globalLambdaTypeFullName) => Some(globalLambdaTypeFullName)
case _ => None
goGlobal.getStructTypeMemberType(fullyQualifiedPackage, methodName)
val (postLambdaFullname, postLambdaSignature, postLambdaReturnTypeFullName) = probableLambdaTypeFullName match
case Some(lambdaTypeFullName) =>
Option(
goGlobal.methodFullNameReturnTypeMap
.get(lambdaTypeFullName)
) match
case Some((lambdaReturnTypeFullNameCache, lambdaSignatureCache)) =>
(lambdaTypeFullName, lambdaSignatureCache, lambdaReturnTypeFullNameCache)
case _ => (fullName, signature, returnTypeFullName)
val (nameSpaceName, lambdaName) = goGlobal.splitNamespaceFromMember(lambdaTypeFullName)
goGlobal.getMethodMetadata(nameSpaceName, lambdaName) match {
case Some(metaData) => (lambdaTypeFullName, metaData.signature, metaData.returnType)
case _ => (fullName, signature, returnTypeFullName)
}
case _ =>
(fullName, signature, returnTypeFullName)
(methodName, postLambdaSignature, postLambdaFullname, postLambdaReturnTypeFullName, Seq.empty)
Expand All @@ -126,18 +123,21 @@ trait AstForMethodCallExpressionCreator(implicit withSchemaValidation: Validatio
processReceiverAst(methodName, xnode)
case _ =>
// Otherwise its an alias to imported namespace on which method call is made
val alias = xnode.json(ParserKeys.Name).str
val callMethodFullName =
resolveAliasToFullName(alias, methodName)
val alias = xnode.json(ParserKeys.Name).str
val fullNamespace = resolveAliasToFullName(alias)
val callMethodFullName = s"$fullNamespace.$methodName"
val lambdaFullName =
goGlobal.structTypeMemberTypeMapping.getOrDefault(callMethodFullName, callMethodFullName)
val (returnTypeFullNameCache, signatureCache) = Option(
goGlobal.methodFullNameReturnTypeMap
.get(lambdaFullName)
) match
case Some((returnTypeFullName, signature)) => (returnTypeFullName, signature)
case _ => (s"$callMethodFullName.${Defines.ReturnType}.${XDefines.Unknown}", s"$callMethodFullName()")

goGlobal.getStructTypeMemberType(fullNamespace, methodName).getOrElse(callMethodFullName)
val (nameSpace, memberName) = goGlobal.splitNamespaceFromMember(lambdaFullName)
val MethodCacheMetaData(returnTypeFullNameCache, signatureCache) =
goGlobal
.getMethodMetadata(nameSpace, memberName)
.getOrElse(
MethodCacheMetaData(
s"$callMethodFullName.${Defines.ReturnType}.${XDefines.Unknown}",
s"$callMethodFullName()"
)
)
(methodName, signatureCache, lambdaFullName, returnTypeFullNameCache, Seq.empty)
case _ =>
// This will take care of chained method calls. It will call `astForCallExpression` in recursive way,
Expand All @@ -157,12 +157,14 @@ trait AstForMethodCallExpressionCreator(implicit withSchemaValidation: Validatio
.getOrElse(Defines.anyTypeName)
.stripPrefix("*")
val callMethodFullName = s"$receiverTypeFullName.$methodName"
val (returnTypeFullNameCache, signatureCache) =
goGlobal.methodFullNameReturnTypeMap
.getOrDefault(
callMethodFullName,
(s"$receiverTypeFullName.$methodName.${Defines.ReturnType}.${XDefines.Unknown}", s"$callMethodFullName()")
val MethodCacheMetaData(returnTypeFullNameCache, signatureCache) = goGlobal
.getMethodMetadata(receiverTypeFullName, methodName)
.getOrElse(
MethodCacheMetaData(
s"$receiverTypeFullName.$methodName.${Defines.ReturnType}.${XDefines.Unknown}",
s"$callMethodFullName()"
)
)
(methodName, signatureCache, callMethodFullName, returnTypeFullNameCache, receiverAst)
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package io.joern.gosrc2cpg.astcreation

import io.joern.gosrc2cpg.datastructures.GoGlobal
import io.joern.gosrc2cpg.parser.ParserAst.*
import io.joern.gosrc2cpg.parser.{ParserKeys, ParserNodeInfo}
import io.joern.x2cpg.utils.NodeBuilders.newOperatorCallNode
Expand Down Expand Up @@ -72,11 +71,12 @@ trait AstForPrimitivesCreator(implicit withSchemaValidation: ValidationMode) { t
Ast(node).withRefEdge(node, variable)
case _ =>
// If its not local node then check if its global member variable of package TypeDecl
Option(goGlobal.structTypeMemberTypeMapping.get(s"$fullyQualifiedPackage${Defines.dot}$identifierName")) match
goGlobal.getStructTypeMemberType(fullyQualifiedPackage, identifierName) match {
case Some(fieldTypeFullName) => astForPackageGlobalFieldAccess(fieldTypeFullName, identifierName, ident)
case _ =>
// TODO: something is wrong here. Refer to SwitchTests -> "be correct for switch case 4"
Ast(identifierNode(ident, identifierName, ident.json(ParserKeys.Name).str, Defines.anyTypeName))
}
}
} else {
Ast()
Expand Down
Loading

0 comments on commit 5106dd3

Please sign in to comment.