Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Gosrc2cpg] download dependencies and caching improvements #4352

Merged
merged 28 commits into from
Mar 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
46ef88b
Some code refactor and optimisations
pandurangpatil Mar 12, 2024
e611fcb
Partial changes to add failing unit tests.
pandurangpatil Mar 13, 2024
7f277c5
Merge branch 'master' into go-download-impr
pandurangpatil Mar 13, 2024
5401c17
Unit tests to cover expected situations to lower the memory footprint
pandurangpatil Mar 14, 2024
fee3296
Merge branch 'master' into go-download-impr
pandurangpatil Mar 14, 2024
bb104b4
minor changes
pandurangpatil Mar 15, 2024
f0d66c5
Merge branch 'master' into go-download-impr
pandurangpatil Mar 15, 2024
19e2c25
Merge branch 'master' into go-download-impr
pandurangpatil Mar 18, 2024
aef54ca
Initial download dependency optimisation
pandurangpatil Mar 18, 2024
bcc6a4a
Merge branch 'master' into go-download-impr
pandurangpatil Mar 18, 2024
d595227
handling for used dependencies
pandurangpatil Mar 18, 2024
3222da6
Merge branch 'master' into go-download-impr
pandurangpatil Mar 18, 2024
5cfe3fa
Fixing one more unit test from first
pandurangpatil Mar 18, 2024
ae4fa1d
changes to not cache unwanted imports
pandurangpatil Mar 18, 2024
cf455e3
Merge branch 'master' into go-download-impr
pandurangpatil Mar 18, 2024
cc7f1d6
few test corrections as per updated changes
pandurangpatil Mar 18, 2024
1ea95ea
minor updates
pandurangpatil Mar 18, 2024
206c06e
Merge branch 'master' into go-download-impr
pandurangpatil Mar 21, 2024
e0cfa7f
Optimisation to cache lamdbda type info
pandurangpatil Mar 21, 2024
4b47630
Merge branch 'master' into go-download-impr
pandurangpatil Mar 21, 2024
fdefb9d
optimisations to store method meta data along with strcut type metata
pandurangpatil Mar 23, 2024
89d843f
Merge branch 'master' into go-download-impr
pandurangpatil Mar 23, 2024
d942fd7
Merge branch 'master' into go-download-impr
pandurangpatil Mar 25, 2024
e2d11e3
not caching namespaces having starting letter in small case
pandurangpatil Mar 25, 2024
967b878
Fix for issue related to package TypeDecl
pandurangpatil Mar 25, 2024
1dfe680
Ignoring few unit tests which needs to be updated with improvements.
pandurangpatil Mar 26, 2024
0506a10
Merge branch 'master' into go-download-impr
pandurangpatil Mar 26, 2024
ebce63b
review comment fixes
pandurangpatil Mar 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,31 +22,42 @@ import io.shiftleft.codepropertygraph.generated.Languages
import java.nio.file.Paths
import scala.util.Try

class GoSrc2Cpg extends X2CpgFrontend[Config] {
class GoSrc2Cpg(goGlobalOption: Option[GoGlobal] = Option(GoGlobal())) extends X2CpgFrontend[Config] {
private val report: Report = new Report()

private var goMod: Option[GoModHelper] = None
def createCpg(config: Config): Try[Cpg] = {
withNewEmptyCpg(config.outputPath, config) { (cpg, config) =>
File.usingTemporaryDirectory("gosrc2cpgOut") { tmpDir =>
val goGlobal = GoGlobal()
new MetaDataPass(cpg, Languages.GOLANG, config.inputPath).createAndApply()
val astGenResult = new AstGenRunner(config).execute(tmpDir).asInstanceOf[GoAstGenRunnerResult]
val goMod = new GoModHelper(
Some(config),
astGenResult.parsedModFile.flatMap(modFile => GoAstJsonParser.readModFile(Paths.get(modFile)).map(x => x))
)
if (config.fetchDependencies) {
goGlobal.processingDependencies = true
new DownloadDependenciesPass(goMod, goGlobal, config).process()
goGlobal.processingDependencies = false
}
val astCreators =
new MethodAndTypeCacheBuilderPass(Some(cpg), astGenResult.parsedFiles, config, goMod, goGlobal).process()
new AstCreationPass(cpg, astCreators, report).createAndApply()
if goGlobal.pkgLevelVarAndConstantAstMap.size() > 0 then
new PackageCtorCreationPass(cpg, config, goGlobal).createAndApply()
report.print()
goGlobalOption
.orElse(Option(GoGlobal()))
.foreach(goGlobal => {
new MetaDataPass(cpg, Languages.GOLANG, config.inputPath).createAndApply()
val astGenResult = new AstGenRunner(config).execute(tmpDir).asInstanceOf[GoAstGenRunnerResult]
goMod = Some(
new GoModHelper(
Some(config),
astGenResult.parsedModFile
.flatMap(modFile => GoAstJsonParser.readModFile(Paths.get(modFile)).map(x => x))
)
)
goGlobal.mainModule = goMod.flatMap(modHelper => modHelper.getModMetaData().map(mod => mod.module.name))
val astCreators =
new MethodAndTypeCacheBuilderPass(Some(cpg), astGenResult.parsedFiles, config, goMod.get, goGlobal)
.process()
if (config.fetchDependencies) {
goGlobal.processingDependencies = true
new DownloadDependenciesPass(goMod.get, goGlobal, config).process()
goGlobal.processingDependencies = false
}
new AstCreationPass(cpg, astCreators, report).createAndApply()
if goGlobal.pkgLevelVarAndConstantAstMap.size() > 0 then
new PackageCtorCreationPass(cpg, config, goGlobal).createAndApply()
report.print()
})
}
}
}

def getGoModHelper: GoModHelper = goMod.get
}
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@ trait AstCreatorHelper { this: AstCreator =>
.toMap
}

protected def resolveAliasToFullName(alias: String, typeOrMethodName: String): String = {
s"${aliasToNameSpaceMapping.getOrElse(alias, goGlobal.aliasToNameSpaceMapping.getOrDefault(alias, s"${XDefines.Unknown}.<$alias>"))}.$typeOrMethodName"
protected def resolveAliasToFullName(alias: String): String = {
s"${aliasToNameSpaceMapping.getOrElse(alias, goGlobal.aliasToNameSpaceMapping.getOrDefault(alias, s"${XDefines.Unknown}.<$alias>"))}"
}
protected def generateTypeFullName(
typeName: Option[String] = None,
Expand All @@ -156,7 +156,7 @@ trait AstCreatorHelper { this: AstCreator =>
Defines.primitiveTypeMap.getOrElse(typname, s"$fullyQualifiedPackage.$typname")
}
case Some(alias) =>
resolveAliasToFullName(alias, typname)
s"${resolveAliasToFullName(alias)}.$typname"

}
private def internalTypeFullName(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,12 @@ trait AstForGenDeclarationCreator(implicit withSchemaValidation: ValidationMode)
val localParserNode = createParserNodeInfo(parserNode)
if globalStatements then {
val variableName = localParserNode.json(ParserKeys.Name).str
if (checkForDependencyFlags(variableName)) {
if (goGlobal.checkForDependencyFlags(variableName)) {
// While processing the dependencies code ignoring package level global variables starting with lower case letter
// as these variables are only accessible within package. So those will not be referred from main source code.
goGlobal.recordStructTypeMemberType(
s"$fullyQualifiedPackage${Defines.dot}$variableName",
goGlobal.recordStructTypeMemberTypeInfo(
fullyQualifiedPackage,
variableName,
typeFullName.getOrElse(Defines.anyTypeName)
)
astForGlobalVarAndConstants(typeFullName.getOrElse(Defines.anyTypeName), localParserNode)
Expand All @@ -94,8 +95,8 @@ trait AstForGenDeclarationCreator(implicit withSchemaValidation: ValidationMode)
val rhsTypeFullName = typeFullName.getOrElse(getTypeFullNameFromAstNode(rhsAst))
if (globalStatements) {
val variableName = lhsParserNode.json(ParserKeys.Name).str
if (checkForDependencyFlags(variableName)) {
goGlobal.recordStructTypeMemberType(s"$fullyQualifiedPackage${Defines.dot}$variableName", rhsTypeFullName)
if (goGlobal.checkForDependencyFlags(variableName)) {
goGlobal.recordStructTypeMemberTypeInfo(fullyQualifiedPackage, variableName, rhsTypeFullName)
astForGlobalVarAndConstants(rhsTypeFullName, lhsParserNode, Some(rhsAst))
}
(Ast(), Ast())
Expand Down Expand Up @@ -160,14 +161,4 @@ trait AstForGenDeclarationCreator(implicit withSchemaValidation: ValidationMode)
Ast()
}
}

/** While processing the dependencies code ignoring package level global variables, constants, types, and functions
* starting with lower case letter as those are only accessible within package. So those will not be referred from
* main source code.
* @param name
* @return
*/
protected def checkForDependencyFlags(name: String): Boolean = {
!goGlobal.processingDependencies || goGlobal.processingDependencies && name.headOption.exists(_.isUpper)
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package io.joern.gosrc2cpg.astcreation

import io.joern.gosrc2cpg.datastructures.{LambdaTypeInfo, MethodCacheMetaData}
import io.joern.gosrc2cpg.parser.{ParserKeys, ParserNodeInfo}
import io.joern.x2cpg.datastructures.Stack.StackWrapper
import io.joern.x2cpg.utils.NodeBuilders.newModifierNode
Expand All @@ -8,6 +9,8 @@ import io.shiftleft.codepropertygraph.generated.nodes.{NewMethod, NewMethodRetur
import io.shiftleft.codepropertygraph.generated.{ModifierTypes, NodeTypes}
import ujson.Value

import scala.jdk.CollectionConverters.*

trait AstForLambdaCreator(implicit withSchemaValidation: ValidationMode) { this: AstCreator =>

def astForFuncLiteral(funcLiteral: ParserNodeInfo): Seq[Ast] = {
Expand All @@ -17,9 +20,8 @@ trait AstForLambdaCreator(implicit withSchemaValidation: ValidationMode) { this:
.collectFirst({ case m: NewMethod if !m.fullName.endsWith(parserResult.filename) => m.fullName })
.getOrElse(fullyQualifiedPackage)
val fullName = s"$baseFullName.$lambdaName"
val (signature, returnTypeStr, methodReturn, params, genericTypeMethodMap) = generateLambdaSignature(
createParserNodeInfo(funcLiteral.json(ParserKeys.Type))
)
val LambdaFunctionMetaData(signature, returnTypeStr, methodReturn, params, genericTypeMethodMap) =
generateLambdaSignature(createParserNodeInfo(funcLiteral.json(ParserKeys.Type)))
val methodNode_ = methodNode(funcLiteral, lambdaName, funcLiteral.code, fullName, Some(signature), relPathFileName)
methodAstParentStack.push(methodNode_)
scope.pushNewScope(methodNode_)
Expand All @@ -40,7 +42,7 @@ trait AstForLambdaCreator(implicit withSchemaValidation: ValidationMode) { this:
typeDeclNode_.astParentType(NodeTypes.TYPE_DECL).astParentFullName(fullyQualifiedPackage)
else typeDeclNode_.astParentType(NodeTypes.METHOD).astParentFullName(baseFullName)
val structTypes = Option(goGlobal.lambdaSignatureToLambdaTypeMap.get(signature)) match {
case Some(types) => types.map(_._1)
case Some(types) => types.asScala.map(_.lambdaStructTypeFullName)
case None => Seq.empty
}
typeDeclNode_.inheritsFromTypeFullName(structTypes)
Expand All @@ -50,13 +52,11 @@ trait AstForLambdaCreator(implicit withSchemaValidation: ValidationMode) { this:
methodNode_.astParentFullName(fullName)
Ast.storeInDiffGraph(astForMethod, diffGraph)
}
goGlobal.recordFullNameToReturnType(fullName, returnTypeStr, signature)
goGlobal.recordMethodMetadata(baseFullName, lambdaName, MethodCacheMetaData(returnTypeStr, signature))
Seq(Ast(methodRefNode(funcLiteral, funcLiteral.code, fullName, fullName)))
}

protected def generateLambdaSignature(
funcType: ParserNodeInfo
): (String, String, NewMethodReturn, Value, Map[String, List[String]]) = {
protected def generateLambdaSignature(funcType: ParserNodeInfo): LambdaFunctionMetaData = {
val genericTypeMethodMap: Map[String, List[String]] = Map()
// TODO: While handling the tuple return type we need to handle it here as well.
val (returnTypeStr, returnTypeInfo) =
Expand All @@ -68,6 +68,14 @@ trait AstForLambdaCreator(implicit withSchemaValidation: ValidationMode) { this:
val paramSignature = parameterSignature(params, genericTypeMethodMap)
val signature =
s"${XDefines.ClosurePrefix}($paramSignature)$returnTypeStr"
(signature, returnTypeStr, methodReturn, params, genericTypeMethodMap)
LambdaFunctionMetaData(signature, returnTypeStr, methodReturn, params, genericTypeMethodMap)
}
}

case class LambdaFunctionMetaData(
signature: String,
returnTypeStr: String,
methodReturn: NewMethodReturn,
params: Value,
genericTypeMethodMap: Map[String, List[String]]
)
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package io.joern.gosrc2cpg.astcreation

import io.joern.gosrc2cpg.datastructures.MethodCacheMetaData
import io.joern.gosrc2cpg.parser.ParserAst.*
import io.joern.gosrc2cpg.parser.{ParserKeys, ParserNodeInfo}
import io.joern.x2cpg.{Ast, ValidationMode, Defines as XDefines}
Expand Down Expand Up @@ -94,26 +95,22 @@ trait AstForMethodCallExpressionCreator(implicit withSchemaValidation: Validatio
// Then we are assuming that the given function is defined inside same package as that of current file's package.
// This assumption will be invalid when another package is imported with alias "."
val methodFullName = s"$fullyQualifiedPackage.$methodName"
val (returnTypeFullNameCache, signatureCache) =
goGlobal.methodFullNameReturnTypeMap
.getOrDefault(methodFullName, (Defines.anyTypeName, s"$methodFullName()"))
val methodInfo = goGlobal
.getMethodMetadata(fullyQualifiedPackage, methodName)
.getOrElse(MethodCacheMetaData(Defines.anyTypeName, s"$methodFullName()"))
val (signature, fullName, returnTypeFullName) =
Defines.builtinFunctions.getOrElse(methodName, (signatureCache, methodFullName, returnTypeFullNameCache))
Defines.builtinFunctions.getOrElse(methodName, (methodInfo.signature, methodFullName, methodInfo.returnType))
val probableLambdaTypeFullName = scope.lookupVariable(methodName) match
case Some((_, lambdaTypeFullName)) => Some(lambdaTypeFullName)
case _ =>
Option(goGlobal.structTypeMemberTypeMapping.get(methodFullName)) match
case Some(globalLambdaTypeFullName) => Some(globalLambdaTypeFullName)
case _ => None
goGlobal.getStructTypeMemberType(fullyQualifiedPackage, methodName)
val (postLambdaFullname, postLambdaSignature, postLambdaReturnTypeFullName) = probableLambdaTypeFullName match
case Some(lambdaTypeFullName) =>
Option(
goGlobal.methodFullNameReturnTypeMap
.get(lambdaTypeFullName)
) match
case Some((lambdaReturnTypeFullNameCache, lambdaSignatureCache)) =>
(lambdaTypeFullName, lambdaSignatureCache, lambdaReturnTypeFullNameCache)
case _ => (fullName, signature, returnTypeFullName)
val (nameSpaceName, lambdaName) = goGlobal.splitNamespaceFromMember(lambdaTypeFullName)
goGlobal.getMethodMetadata(nameSpaceName, lambdaName) match {
case Some(metaData) => (lambdaTypeFullName, metaData.signature, metaData.returnType)
case _ => (fullName, signature, returnTypeFullName)
}
case _ =>
(fullName, signature, returnTypeFullName)
(methodName, postLambdaSignature, postLambdaFullname, postLambdaReturnTypeFullName, Seq.empty)
Expand All @@ -126,18 +123,21 @@ trait AstForMethodCallExpressionCreator(implicit withSchemaValidation: Validatio
processReceiverAst(methodName, xnode)
case _ =>
// Otherwise its an alias to imported namespace on which method call is made
val alias = xnode.json(ParserKeys.Name).str
val callMethodFullName =
resolveAliasToFullName(alias, methodName)
val alias = xnode.json(ParserKeys.Name).str
val fullNamespace = resolveAliasToFullName(alias)
val callMethodFullName = s"$fullNamespace.$methodName"
val lambdaFullName =
goGlobal.structTypeMemberTypeMapping.getOrDefault(callMethodFullName, callMethodFullName)
val (returnTypeFullNameCache, signatureCache) = Option(
goGlobal.methodFullNameReturnTypeMap
.get(lambdaFullName)
) match
case Some((returnTypeFullName, signature)) => (returnTypeFullName, signature)
case _ => (s"$callMethodFullName.${Defines.ReturnType}.${XDefines.Unknown}", s"$callMethodFullName()")

goGlobal.getStructTypeMemberType(fullNamespace, methodName).getOrElse(callMethodFullName)
val (nameSpace, memberName) = goGlobal.splitNamespaceFromMember(lambdaFullName)
val MethodCacheMetaData(returnTypeFullNameCache, signatureCache) =
goGlobal
.getMethodMetadata(nameSpace, memberName)
.getOrElse(
MethodCacheMetaData(
s"$callMethodFullName.${Defines.ReturnType}.${XDefines.Unknown}",
s"$callMethodFullName()"
)
)
(methodName, signatureCache, lambdaFullName, returnTypeFullNameCache, Seq.empty)
case _ =>
// This will take care of chained method calls. It will call `astForCallExpression` in recursive way,
Expand All @@ -157,12 +157,14 @@ trait AstForMethodCallExpressionCreator(implicit withSchemaValidation: Validatio
.getOrElse(Defines.anyTypeName)
.stripPrefix("*")
val callMethodFullName = s"$receiverTypeFullName.$methodName"
val (returnTypeFullNameCache, signatureCache) =
goGlobal.methodFullNameReturnTypeMap
.getOrDefault(
callMethodFullName,
(s"$receiverTypeFullName.$methodName.${Defines.ReturnType}.${XDefines.Unknown}", s"$callMethodFullName()")
val MethodCacheMetaData(returnTypeFullNameCache, signatureCache) = goGlobal
.getMethodMetadata(receiverTypeFullName, methodName)
.getOrElse(
MethodCacheMetaData(
s"$receiverTypeFullName.$methodName.${Defines.ReturnType}.${XDefines.Unknown}",
s"$callMethodFullName()"
)
)
(methodName, signatureCache, callMethodFullName, returnTypeFullNameCache, receiverAst)
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package io.joern.gosrc2cpg.astcreation

import io.joern.gosrc2cpg.datastructures.GoGlobal
import io.joern.gosrc2cpg.parser.ParserAst.*
import io.joern.gosrc2cpg.parser.{ParserKeys, ParserNodeInfo}
import io.joern.x2cpg.utils.NodeBuilders.newOperatorCallNode
Expand Down Expand Up @@ -72,11 +71,12 @@ trait AstForPrimitivesCreator(implicit withSchemaValidation: ValidationMode) { t
Ast(node).withRefEdge(node, variable)
case _ =>
// If its not local node then check if its global member variable of package TypeDecl
Option(goGlobal.structTypeMemberTypeMapping.get(s"$fullyQualifiedPackage${Defines.dot}$identifierName")) match
goGlobal.getStructTypeMemberType(fullyQualifiedPackage, identifierName) match {
case Some(fieldTypeFullName) => astForPackageGlobalFieldAccess(fieldTypeFullName, identifierName, ident)
case _ =>
// TODO: something is wrong here. Refer to SwitchTests -> "be correct for switch case 4"
Ast(identifierNode(ident, identifierName, ident.json(ParserKeys.Name).str, Defines.anyTypeName))
}
}
} else {
Ast()
Expand Down
Loading
Loading