From 951b0f3ba1474bf1622ce2ece0362479c50195dc Mon Sep 17 00:00:00 2001 From: David Baker Effendi Date: Sat, 11 Nov 2023 09:09:58 +0200 Subject: [PATCH] [pysrc2cpg] Rework Import Resolution (#3812) * [pysrc2cpg] Rework Import Resolution The issue with the import handling is that paths that resolve to internal entities, and those to external dependencies underwent the same heuristic-driven path resolution. The problem here is that internal entities have simple guarantees that allow easier and faster traversals with little to no heuristics required other than handling Python 2/3 differences. The rest can then be bundled accurately as unresolved. ### Main changes * Moved changes for Python's import resolver `codeRoot` to the base class * Separated handling of internal entities with external entities: - Internal entities that are importable, are given associated Pythonic import paths in the `moduleCache` map. - Any import paths that don't get a hit in this map then undergo some heuristic-based path building to make sensible looking types * Split the `ResolvedImport` classes into `UnresolvedImport` and `ResolvedImport` with `EvaluatedImport` as the high-level trait. This separates imports found and those that have not been, but have undergone some heuristic processing. ### Misc Renamed frontends' `ImportPass` to be prepended with the language name for easier navigation. ### Follow-up Python models entities imported as `import x.y` as a field access, `FieldAccess(x).fieldIdentifier(y)`. However, `x` may not have an associated type declaration for the module since it is simply a directory holding various modules. This means that `y` may not always be resolved as it's interpreted as a member instead of a standalone module. * Removed redundant code * Escape backslash on windows * Fixed compilation issue in Ruby * Ignore function type refs --- .../cpgcreation/PythonSrcCpgGenerator.scala | 2 +- .../scala/io/joern/jssrc2cpg/JsSrc2Cpg.scala | 2 +- ...ala => JavaScriptImportResolverPass.scala} | 8 +- .../passes/TypeRecoveryPassTests.scala | 12 +- .../joern/pysrc2cpg/ImportResolverPass.scala | 154 -------------- .../pysrc2cpg/PythonImportResolverPass.scala | 189 ++++++++++++++++++ .../joern/pysrc2cpg/PythonTypeRecovery.scala | 6 +- .../io/joern/pysrc2cpg/PySrc2CpgFixture.scala | 2 +- .../passes/TypeRecoveryPassTests.scala | 96 +++++++-- .../io/joern/rubysrc2cpg/RubySrc2Cpg.scala | 2 +- ...ass.scala => RubyImportResolverPass.scala} | 10 +- .../passes/RubyTypeRecoveryPass.scala | 2 +- .../passes/RubyTypeRecoveryTests.scala | 7 +- .../passes/frontend/XImportResolverPass.scala | 44 ++-- .../x2cpg/passes/frontend/XTypeRecovery.scala | 29 ++- 15 files changed, 340 insertions(+), 225 deletions(-) rename joern-cli/frontends/jssrc2cpg/src/main/scala/io/joern/jssrc2cpg/passes/{ImportResolverPass.scala => JavaScriptImportResolverPass.scala} (95%) delete mode 100644 joern-cli/frontends/pysrc2cpg/src/main/scala/io/joern/pysrc2cpg/ImportResolverPass.scala create mode 100644 joern-cli/frontends/pysrc2cpg/src/main/scala/io/joern/pysrc2cpg/PythonImportResolverPass.scala rename joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/deprecated/passes/{ImportResolverPass.scala => RubyImportResolverPass.scala} (93%) diff --git a/console/src/main/scala/io/joern/console/cpgcreation/PythonSrcCpgGenerator.scala b/console/src/main/scala/io/joern/console/cpgcreation/PythonSrcCpgGenerator.scala index 919ce9e51eb2..c7bf6804ffdf 100644 --- a/console/src/main/scala/io/joern/console/cpgcreation/PythonSrcCpgGenerator.scala +++ b/console/src/main/scala/io/joern/console/cpgcreation/PythonSrcCpgGenerator.scala @@ -28,7 +28,7 @@ case class PythonSrcCpgGenerator(config: FrontendConfig, rootPath: Path) extends override def applyPostProcessingPasses(cpg: Cpg): Cpg = { new ImportsPass(cpg).createAndApply() - new ImportResolverPass(cpg).createAndApply() + new PythonImportResolverPass(cpg).createAndApply() new DynamicTypeHintFullNamePass(cpg).createAndApply() new PythonInheritanceNamePass(cpg).createAndApply() val typeRecoveryConfig = pyConfig match diff --git a/joern-cli/frontends/jssrc2cpg/src/main/scala/io/joern/jssrc2cpg/JsSrc2Cpg.scala b/joern-cli/frontends/jssrc2cpg/src/main/scala/io/joern/jssrc2cpg/JsSrc2Cpg.scala index e05d55eaa7b6..4f92037c50fc 100644 --- a/joern-cli/frontends/jssrc2cpg/src/main/scala/io/joern/jssrc2cpg/JsSrc2Cpg.scala +++ b/joern-cli/frontends/jssrc2cpg/src/main/scala/io/joern/jssrc2cpg/JsSrc2Cpg.scala @@ -63,7 +63,7 @@ object JsSrc2Cpg { List( new JavaScriptInheritanceNamePass(cpg), new ConstClosurePass(cpg), - new ImportResolverPass(cpg), + new JavaScriptImportResolverPass(cpg), new JavaScriptTypeRecoveryPass(cpg, typeRecoveryConfig), new JavaScriptTypeHintCallLinker(cpg), new NaiveCallLinker(cpg) diff --git a/joern-cli/frontends/jssrc2cpg/src/main/scala/io/joern/jssrc2cpg/passes/ImportResolverPass.scala b/joern-cli/frontends/jssrc2cpg/src/main/scala/io/joern/jssrc2cpg/passes/JavaScriptImportResolverPass.scala similarity index 95% rename from joern-cli/frontends/jssrc2cpg/src/main/scala/io/joern/jssrc2cpg/passes/ImportResolverPass.scala rename to joern-cli/frontends/jssrc2cpg/src/main/scala/io/joern/jssrc2cpg/passes/JavaScriptImportResolverPass.scala index b31d53f23306..82bdd027eb7f 100644 --- a/joern-cli/frontends/jssrc2cpg/src/main/scala/io/joern/jssrc2cpg/passes/ImportResolverPass.scala +++ b/joern-cli/frontends/jssrc2cpg/src/main/scala/io/joern/jssrc2cpg/passes/JavaScriptImportResolverPass.scala @@ -11,7 +11,7 @@ import java.io.File as JFile import java.util.regex.{Matcher, Pattern} import scala.util.{Failure, Success, Try} -class ImportResolverPass(cpg: Cpg) extends XImportResolverPass(cpg) { +class JavaScriptImportResolverPass(cpg: Cpg) extends XImportResolverPass(cpg) { private val pathPattern = Pattern.compile("[\"']([\\w/.]+)[\"']") @@ -27,7 +27,7 @@ class ImportResolverPass(cpg: Cpg) extends XImportResolverPass(cpg) { val alias = importedAs val matcher = pathPattern.matcher(rawEntity) val sep = Matcher.quoteReplacement(JFile.separator) - val root = s"$codeRoot${JFile.separator}" + val root = s"$codeRootDir${JFile.separator}" val currentFile = s"$root$fileName" // We want to know if the import is local since if an external name is used to match internal methods we may have // false paths. @@ -114,12 +114,12 @@ class ImportResolverPass(cpg: Cpg) extends XImportResolverPass(cpg) { // Exported closure with a method ref within the AST of the RHS y.ast.isMethodRef.map(mRef => ResolvedMethod(mRef.methodFullName, alias, Option("this"))).toSet case _ => - Set.empty[ResolvedImport] + Set.empty[EvaluatedImport] } }.toSet } else { Set(UnknownMethod(entity, alias, Option("this")), UnknownTypeDecl(entity)) - }).foreach(x => resolvedImportToTag(x, importCall, diffGraph)) + }).foreach(x => evaluatedImportToTag(x, importCall, diffGraph)) } } diff --git a/joern-cli/frontends/jssrc2cpg/src/test/scala/io/joern/jssrc2cpg/passes/TypeRecoveryPassTests.scala b/joern-cli/frontends/jssrc2cpg/src/test/scala/io/joern/jssrc2cpg/passes/TypeRecoveryPassTests.scala index 0d2d98f2a90c..5339d785aaa5 100644 --- a/joern-cli/frontends/jssrc2cpg/src/test/scala/io/joern/jssrc2cpg/passes/TypeRecoveryPassTests.scala +++ b/joern-cli/frontends/jssrc2cpg/src/test/scala/io/joern/jssrc2cpg/passes/TypeRecoveryPassTests.scala @@ -57,7 +57,7 @@ class TypeRecoveryPassTests extends DataFlowCodeToCpgSuite { "resolve correct imports via tag nodes" in { val List(a: UnknownMethod, b: UnknownTypeDecl, x: UnknownMethod, y: UnknownTypeDecl) = - cpg.call.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked + cpg.call.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked a.fullName shouldBe "slack_sdk:WebClient" b.fullName shouldBe "slack_sdk:WebClient" x.fullName shouldBe "sendgrid:SendGridAPIClient" @@ -141,7 +141,7 @@ class TypeRecoveryPassTests extends DataFlowCodeToCpgSuite { "resolve correct imports via tag nodes" in { val List(a: ResolvedMember, b: ResolvedMember, c: ResolvedMember, d: UnknownMethod, e: UnknownTypeDecl) = - cpg.call.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked + cpg.call.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked a.basePath shouldBe "Foo.ts::program" a.memberName shouldBe "x" b.basePath shouldBe "Foo.ts::program" @@ -229,7 +229,7 @@ class TypeRecoveryPassTests extends DataFlowCodeToCpgSuite { ) "resolve correct imports via tag nodes" in { - val List(x: ResolvedMethod) = cpg.call.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked + val List(x: ResolvedMethod) = cpg.call.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked x.fullName shouldBe "util.js::program:getIncrementalInteger" } @@ -258,7 +258,7 @@ class TypeRecoveryPassTests extends DataFlowCodeToCpgSuite { "resolve correct imports via tag nodes" in { val List(x: UnknownMethod, y: UnknownTypeDecl) = - cpg.call.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked + cpg.call.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked x.fullName shouldBe "googleapis" y.fullName shouldBe "googleapis" } @@ -280,7 +280,7 @@ class TypeRecoveryPassTests extends DataFlowCodeToCpgSuite { "resolve correct imports via tag nodes" in { val List(x: UnknownMethod, y: UnknownTypeDecl, z: UnknownMethod) = - cpg.call.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked + cpg.call.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked x.fullName shouldBe "googleapis" y.fullName shouldBe "googleapis" z.fullName shouldBe "googleapis" @@ -381,7 +381,7 @@ class TypeRecoveryPassTests extends DataFlowCodeToCpgSuite { "resolve correct imports via tag nodes" in { val List(a: ResolvedTypeDecl, b: ResolvedMethod, c: ResolvedMethod, d: UnknownMethod, e: UnknownTypeDecl) = - cpg.call.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked + cpg.call.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked a.fullName shouldBe "foo.js::program" b.fullName shouldBe "foo.js::program:literalFunction" c.fullName shouldBe "foo.js::program:get" diff --git a/joern-cli/frontends/pysrc2cpg/src/main/scala/io/joern/pysrc2cpg/ImportResolverPass.scala b/joern-cli/frontends/pysrc2cpg/src/main/scala/io/joern/pysrc2cpg/ImportResolverPass.scala deleted file mode 100644 index 355b7f151e4f..000000000000 --- a/joern-cli/frontends/pysrc2cpg/src/main/scala/io/joern/pysrc2cpg/ImportResolverPass.scala +++ /dev/null @@ -1,154 +0,0 @@ -package io.joern.pysrc2cpg - -import better.files.File as BFile -import io.joern.x2cpg.passes.frontend.ImportsPass.* -import io.joern.x2cpg.passes.frontend.XImportResolverPass -import io.shiftleft.codepropertygraph.Cpg -import io.shiftleft.codepropertygraph.generated.nodes.* -import io.shiftleft.semanticcpg.language.* - -import java.io.File as JFile -import java.util.regex.{Matcher, Pattern} - -class ImportResolverPass(cpg: Cpg) extends XImportResolverPass(cpg) { - - private lazy val root = BFile(cpg.metaData.root.headOption.getOrElse("").stripSuffix(JFile.separator)) match - case f if f.isDirectory => f.pathAsString - case f => f.parent.pathAsString - - override protected def optionalResolveImport( - fileName: String, - importCall: Call, - importedEntity: String, - importedAs: String, - diffGraph: DiffGraphBuilder - ): Unit = { - val (namespace, entityName) = if (importedEntity.contains(".")) { - val splitName = importedEntity.split('.').toSeq - val namespace = importedEntity.stripSuffix(s".${splitName.last}") - (relativizeNamespace(namespace, fileName), splitName.last) - } else { - val currDir = BFile(root) / fileName match - case x if x.isDirectory => x - case x => x.parent - - val relCurrDir = currDir.pathAsString.stripPrefix(root).stripPrefix(JFile.separator) - - (relCurrDir, importedEntity) - } - - resolveEntities(namespace, entityName, importedAs).foreach(x => resolvedImportToTag(x, importCall, diffGraph)) - } - - private def relativizeNamespace(path: String, fileName: String): String = if (path.startsWith(".")) { - // TODO: pysrc2cpg does not link files to the correct namespace nodes - val sep = Matcher.quoteReplacement(JFile.separator) - // The below gives us the full path of the relative "." - val relativeNamespace = - if (fileName.contains(JFile.separator)) - fileName.substring(0, fileName.lastIndexOf(JFile.separator)).replaceAll(sep, ".") - else "" - (if (path.length > 1) relativeNamespace + path.replaceAll(sep, ".") - else relativeNamespace).stripPrefix(".") - } else path - - /** For an import - given by its module path and the name of the imported function or module - determine the possible - * callee names. - * - * @param path - * the module path. - * @param expEntity - * the name of the imported entity. This could be a function, module, or variable/field. - * @param alias - * how the imported entity is named. - * @return - * the possible callee names - */ - private def resolveEntities(path: String, expEntity: String, alias: String): Set[ResolvedImport] = { - - implicit class ResolvedNodeExt(val traversal: Seq[String]) { - def toResolvedImport(cpg: Cpg): Seq[ResolvedImport] = { - val resolvedEntities = - traversal.flatMap(x => cpg.typeDecl.fullNameExact(x) ++ cpg.method.fullNameExact(x)).collect { - case x: Method => ResolvedMethod(x.fullName, alias) - case x: TypeDecl => ResolvedTypeDecl(x.fullName) - } - if (resolvedEntities.isEmpty) { - traversal.filterNot(_.contains("__init__.py")).map(x => UnknownImport(x)) - } else { - resolvedEntities - } - } - } - - implicit class CalleeAsInitExt(val name: String) { - def asInit: String = if (name.contains("__init__.py")) name - else name.replace(".py", s"${JFile.separator}__init__.py") - - def withInit: Seq[String] = Seq(name, name.asInit) - } - - val pathSep = "." - val sep = Matcher.quoteReplacement(JFile.separator) - val isMaybeConstructor = expEntity.split("\\.").lastOption.exists(s => s.nonEmpty && s.charAt(0).isUpper) - - lazy val membersMatchingImports: List[(TypeDecl, Member)] = cpg.typeDecl - .fullName(s".*${Pattern.quote(path)}.*") - .flatMap(t => - t.member.nameExact(expEntity).headOption match { - case Some(member) => Option((t, member)) - case None => None - } - ) - .toList - - (path match { - case "" if expEntity.contains(".") => - // Case 1: Qualified path: import foo.bar => (bar.py or bar/__init__.py) - val splitFunc = expEntity.split("\\.") - val name = splitFunc.tail.mkString(".") - s"${splitFunc(0)}.py:$pathSep$name".withInit.toResolvedImport(cpg) - case "" => - // Case 2: import of a module: import foo => (foo.py or foo/__init__.py) - s"$expEntity.py:".withInit.toResolvedImport(cpg) - case _ if membersMatchingImports.nonEmpty => - // Case 3: import of a variable: from api import db => (api.py or foo.__init__.py) @ identifier(db) - membersMatchingImports.map { - case (t, m) if t.method.nameExact(m.name).nonEmpty => - ResolvedMethod(t.method.nameExact(m.name).fullName.head, alias) - case (t, m) if t.astSiblings.isMethod.fullNameExact(t.fullName).ast.isTypeDecl.nameExact(m.name).nonEmpty => - ResolvedTypeDecl( - t.astSiblings.isMethod.fullNameExact(t.fullName).ast.isTypeDecl.nameExact(m.name).fullName.head - ) - case (t, m) => ResolvedMember(t.fullName, m.name) - } - case _ => - // Case 4: Import from module using alias, e.g. import bar from foo as faz - val fileOrDir = BFile(codeRoot) / path - val pyFile = BFile(codeRoot) / s"$path.py" - fileOrDir match { - case f if f.isDirectory && !pyFile.exists => - val namespace = path.replaceAll("\\.", sep) - val module = s"$expEntity.py:" - val initSubmodule = s"__init__.py:.$expEntity" - Seq(s"$namespace${JFile.separator}$module", s"$namespace${JFile.separator}$initSubmodule") - .toResolvedImport(cpg) - case f if f.isDirectory && (f / s"$expEntity.py").exists => - Seq(s"${(f / s"$expEntity.py").pathAsString.stripPrefix(codeRoot)}:").toResolvedImport(cpg) - case _ => - s"${path.replaceAll("\\.", sep)}.py:$pathSep$expEntity".withInit.toResolvedImport(cpg) - } - }).flatMap { - // If we import the constructor, we also import the type - case x: ResolvedMethod if isMaybeConstructor => - Seq(ResolvedMethod(Seq(x.fullName, "__init__").mkString(pathSep), alias), ResolvedTypeDecl(x.fullName)) - // If we import the type, we also import the constructor - case x: ResolvedTypeDecl if isMaybeConstructor => - Seq(x, ResolvedMethod(Seq(x.fullName, "__init__").mkString(pathSep), alias)) - // If we can determine the import is a constructor, then it is likely not a member - case x: UnknownImport if isMaybeConstructor => - Seq(UnknownMethod(Seq(x.path, "__init__").mkString(pathSep), alias), UnknownTypeDecl(x.path)) - case x => Seq(x) - }.toSet - } -} diff --git a/joern-cli/frontends/pysrc2cpg/src/main/scala/io/joern/pysrc2cpg/PythonImportResolverPass.scala b/joern-cli/frontends/pysrc2cpg/src/main/scala/io/joern/pysrc2cpg/PythonImportResolverPass.scala new file mode 100644 index 000000000000..2a7cc01d71e4 --- /dev/null +++ b/joern-cli/frontends/pysrc2cpg/src/main/scala/io/joern/pysrc2cpg/PythonImportResolverPass.scala @@ -0,0 +1,189 @@ +package io.joern.pysrc2cpg + +import better.files.File +import io.joern.x2cpg.passes.frontend.ImportsPass.* +import io.joern.x2cpg.passes.frontend.XImportResolverPass +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.codepropertygraph.generated.nodes.* +import io.shiftleft.semanticcpg.language.* + +import java.io.File as JFile +import java.util.regex.Matcher +import scala.collection.mutable + +/** Attempts to solve imports similar Python, and will use heuristics to build out unresolved imports. + * + * @see + * Python 3 Import Searching Reference + */ +class PythonImportResolverPass(cpg: Cpg) extends XImportResolverPass(cpg) { + + /** Stores all CPG entities with their associated Pythonic import paths as keys. + */ + private val moduleCache: mutable.HashMap[String, ImportableEntity] = mutable.HashMap.empty + + override def init(): Unit = { + cpg.typeDecl.isExternal(false).nameExact("").foreach { moduleType => + val modulePath = fileToPythonImportNotation(moduleType.filename) + cpg.method.fullNameExact(moduleType.fullName).headOption.foreach { moduleMethod => + moduleCache.put(modulePath, Module(moduleType, moduleMethod)) + moduleMethod.astChildren.foreach { + case moduleFunction: Method => + moduleCache.put(s"$modulePath.${moduleFunction.name}", ImportableFunction(moduleFunction)) + // Ignore types for functions that are used for method pointers + case moduleType: TypeDecl if moduleMethod.astChildren.isMethod.fullNameExact(moduleType.fullName).isEmpty => + moduleCache.put(s"$modulePath.${moduleType.name}", ImportableType(moduleType)) + case _ => // do nothing + } + } + moduleType.member.foreach { moduleMember => + moduleCache.put(s"$modulePath.${moduleMember.name}", ModuleVariable(moduleType.fullName, moduleMember)) + } + } + } + + private def fileToPythonImportNotation(filename: String): String = + filename + .stripPrefix(codeRootDir) + .replaceAll(Matcher.quoteReplacement(JFile.separator), ".") + .stripSuffix(".py") + .stripSuffix(".__init__") + + override protected def optionalResolveImport( + fileName: String, + importCall: Call, + importedEntity: String, + importedAs: String, + diffGraph: DiffGraphBuilder + ): Unit = { + val currDir = File(codeRootDir) / fileName match + case x if x.isDirectory => x + case x => x.parent + + val importedEntityAsFullyQualifiedImport = + // If the path/entity uses Python's `from .import x` syntax, we will need to remove these + fileToPythonImportNotation(importedEntity.replaceFirst("^\\.+", "")) + val importedEntityAsRelativeImport = Seq( + fileToPythonImportNotation(currDir.pathAsString.stripPrefix(codeRootDir).stripPrefix(JFile.separator)), + importedEntityAsFullyQualifiedImport + ).filterNot(_.isBlank).mkString(".") + + // We evaluated both variations, based on what we could expect from different versions of Python and how the package + // layout is interpreted by the presence of lack of `__init__.py` files. Additionally, external packages are always + // fully qualified. + val resolvedImports = + Seq( + moduleCache.get(importedEntityAsRelativeImport), + moduleCache.get(importedEntityAsFullyQualifiedImport) + ).flatten.flatMap(_.toResolvedImport(importedAs)) + + if (resolvedImports.nonEmpty) { + // The import was resolved to an entity successfully + resolvedImports.foreach(x => evaluatedImportToTag(x, importCall, diffGraph)) + } else { + // Here we use heuristics to guess the correct paths, and make the types look friendly for querying + unresolvableImportToUnknownImport(currDir, fileName, importedEntity, importedAs) + .foreach(x => evaluatedImportToTag(x, importCall, diffGraph)) + } + } + + private def unresolvableImportToUnknownImport( + currDir: File, + currFileName: String, + importedEntity: String, + importedAs: String + ): Set[EvaluatedImport] = { + val (namespace, entityName) = if (importedEntity.contains(".")) { + val splitName = importedEntity.split('.').toSeq + val namespace = importedEntity.stripSuffix(s".${splitName.last}") + (relativizeNamespace(namespace, currFileName), splitName.last) + } else { + val relCurrDir = currDir.pathAsString.stripPrefix(codeRootDir).stripPrefix(JFile.separator) + + (relCurrDir, importedEntity) + } + + createPseudoImports(namespace, entityName, importedAs) + } + + private def relativizeNamespace(path: String, fileName: String): String = if (path.startsWith(".")) { + // TODO: pysrc2cpg does not link files to the correct namespace nodes + val sep = Matcher.quoteReplacement(JFile.separator) + // The below gives us the full path of the relative "." + val relativeNamespace = + if (fileName.contains(JFile.separator)) + fileName.substring(0, fileName.lastIndexOf(JFile.separator)).replaceAll(sep, ".") + else "" + (if (path.length > 1) relativeNamespace + path.replaceAll(sep, ".") + else relativeNamespace).stripPrefix(".") + } else path + + /** For an unresolveable import, create a best-effort path of what could be imported, as well as what kind of entity + * may be imported. + * + * @param path + * the module path. + * @param expEntity + * the name of the imported entity. This could be a function, module, or variable/field. + * @param alias + * how the imported entity is named. + * @return + * the possible callee names + */ + private def createPseudoImports(path: String, expEntity: String, alias: String): Set[EvaluatedImport] = { + val pathSep = "." + val sep = Matcher.quoteReplacement(JFile.separator) + val isMaybeConstructor = expEntity.split("\\.").lastOption.exists(s => s.nonEmpty && s.charAt(0).isUpper) + + def toUnresolvedImport(pseudoPath: String): Set[EvaluatedImport] = { + if (isMaybeConstructor) { + Set(UnknownMethod(Seq(pseudoPath, "__init__").mkString(pathSep), alias), UnknownTypeDecl(pseudoPath)) + } else { + Set(UnknownImport(pseudoPath)) + } + } + + if (path.isBlank) { + if (expEntity.contains(".")) { + // Case 1: Qualified path: import foo.bar + val splitFunc = expEntity.split("\\.") + val name = splitFunc.tail.mkString(".") + toUnresolvedImport(s"${splitFunc(0)}.py:$pathSep$name") + } else { + // Case 2: import of a module: import foo => foo.py + toUnresolvedImport(s"$expEntity.py:") + } + } else { + // Case 3: Import from module using alias, e.g. import bar from foo as faz + toUnresolvedImport(s"${path.replaceAll("\\.", sep)}.py:$pathSep$expEntity") + } + } + + private sealed trait ImportableEntity { + + def toResolvedImport(alias: String): List[EvaluatedImport] + + } + + private case class Module(moduleType: TypeDecl, moduleMethod: Method) extends ImportableEntity { + override def toResolvedImport(alias: String): List[EvaluatedImport] = + List(ResolvedTypeDecl(moduleType.fullName), ResolvedMethod(moduleMethod.fullName, moduleMethod.name)) + + } + + private case class ModuleVariable(baseTypeFullName: String, member: Member) extends ImportableEntity { + + override def toResolvedImport(alias: String): List[EvaluatedImport] = List( + ResolvedMember(baseTypeFullName, member.name) + ) + } + + private case class ImportableFunction(function: Method) extends ImportableEntity { + override def toResolvedImport(alias: String): List[EvaluatedImport] = List(ResolvedMethod(function.fullName, alias)) + } + + private case class ImportableType(typ: TypeDecl) extends ImportableEntity { + override def toResolvedImport(alias: String): List[EvaluatedImport] = + List(ResolvedTypeDecl(typ.fullName), ResolvedMethod(s"${typ.fullName}.__init__", typ.name)) + } +} diff --git a/joern-cli/frontends/pysrc2cpg/src/main/scala/io/joern/pysrc2cpg/PythonTypeRecovery.scala b/joern-cli/frontends/pysrc2cpg/src/main/scala/io/joern/pysrc2cpg/PythonTypeRecovery.scala index 0ae0e0d3133d..951cd47d44b7 100644 --- a/joern-cli/frontends/pysrc2cpg/src/main/scala/io/joern/pysrc2cpg/PythonTypeRecovery.scala +++ b/joern-cli/frontends/pysrc2cpg/src/main/scala/io/joern/pysrc2cpg/PythonTypeRecovery.scala @@ -9,8 +9,6 @@ import io.shiftleft.semanticcpg.language.operatorextension.OpNodes import io.shiftleft.semanticcpg.language.operatorextension.OpNodes.FieldAccess import overflowdb.BatchedUpdate.DiffGraphBuilder -import scala.collection.immutable.{AbstractSet, SortedSet} - class PythonTypeRecoveryPass(cpg: Cpg, config: XTypeRecoveryConfig = XTypeRecoveryConfig()) extends XTypeRecoveryPass[File](cpg, config) { @@ -50,10 +48,10 @@ private class RecoverForPythonFile(cpg: Cpg, cu: File, builder: DiffGraphBuilder override def visitImport(i: Import): Unit = { if (i.importedAs.isDefined && i.importedEntity.isDefined) { - import io.joern.x2cpg.passes.frontend.ImportsPass._ + import io.joern.x2cpg.passes.frontend.ImportsPass.* val entityName = i.importedAs.get - i.call.tag.flatMap(ResolvedImport.tagToResolvedImport).foreach { + i.call.tag.flatMap(EvaluatedImport.tagToEvaluatedImport).foreach { case ResolvedMethod(fullName, alias, receiver, _) => symbolTable.put(CallAlias(alias, receiver), fullName) case ResolvedTypeDecl(fullName, _) => symbolTable.put(LocalVar(entityName), fullName) case ResolvedMember(basePath, memberName, _) => diff --git a/joern-cli/frontends/pysrc2cpg/src/test/scala/io/joern/pysrc2cpg/PySrc2CpgFixture.scala b/joern-cli/frontends/pysrc2cpg/src/test/scala/io/joern/pysrc2cpg/PySrc2CpgFixture.scala index 16ac4b3a4d86..9ef4e963d38c 100644 --- a/joern-cli/frontends/pysrc2cpg/src/test/scala/io/joern/pysrc2cpg/PySrc2CpgFixture.scala +++ b/joern-cli/frontends/pysrc2cpg/src/test/scala/io/joern/pysrc2cpg/PySrc2CpgFixture.scala @@ -36,7 +36,7 @@ class PySrcTestCpg extends TestCpg with PythonFrontend { override def applyPasses(): Unit = { X2Cpg.applyDefaultOverlays(this) new ImportsPass(this).createAndApply() - new ImportResolverPass(this).createAndApply() + new PythonImportResolverPass(this).createAndApply() new PythonInheritanceNamePass(this).createAndApply() new DynamicTypeHintFullNamePass(this).createAndApply() new PythonTypeRecoveryPass(this).createAndApply() diff --git a/joern-cli/frontends/pysrc2cpg/src/test/scala/io/joern/pysrc2cpg/passes/TypeRecoveryPassTests.scala b/joern-cli/frontends/pysrc2cpg/src/test/scala/io/joern/pysrc2cpg/passes/TypeRecoveryPassTests.scala index eee98ab4da1d..1f495764f219 100644 --- a/joern-cli/frontends/pysrc2cpg/src/test/scala/io/joern/pysrc2cpg/passes/TypeRecoveryPassTests.scala +++ b/joern-cli/frontends/pysrc2cpg/src/test/scala/io/joern/pysrc2cpg/passes/TypeRecoveryPassTests.scala @@ -3,11 +3,11 @@ package io.joern.pysrc2cpg.passes import io.joern.pysrc2cpg.PySrc2CpgFixture import io.joern.x2cpg.passes.frontend.ImportsPass.* import io.joern.x2cpg.passes.frontend.{ImportsPass, XTypeHintCallLinker} -import io.shiftleft.codepropertygraph.generated.nodes.Local import io.shiftleft.semanticcpg.language.* import java.io.File import scala.collection.immutable.Seq + class TypeRecoveryPassTests extends PySrc2CpgFixture(withOssDataflow = false) { "literals declared from built-in types" should { @@ -71,7 +71,7 @@ class TypeRecoveryPassTests extends PySrc2CpgFixture(withOssDataflow = false) { webClientT: UnknownTypeDecl, sendGridM: UnknownMethod, sendGridT: UnknownTypeDecl - ) = cpg.call.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked + ) = cpg.call.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked webClientM.fullName shouldBe "slack_sdk.py:.WebClient.__init__" webClientT.fullName shouldBe "slack_sdk.py:.WebClient" sendGridM.fullName shouldBe "sendgrid.py:.SendGridAPIClient.__init__" @@ -234,11 +234,11 @@ class TypeRecoveryPassTests extends PySrc2CpgFixture(withOssDataflow = false) { "resolve correct imports via tag nodes" in { val List(foo1: UnknownMethod, foo2: UnknownTypeDecl) = - cpg.file(".*foo.py").ast.isCall.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked + cpg.file(".*foo.py").ast.isCall.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked foo1.fullName shouldBe "flask_sqlalchemy.py:.SQLAlchemy.__init__" foo2.fullName shouldBe "flask_sqlalchemy.py:.SQLAlchemy" val List(bar1: ResolvedTypeDecl, bar2: ResolvedMethod) = - cpg.file(".*bar.py").ast.isCall.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked + cpg.file(".*bar.py").ast.isCall.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked bar1.fullName shouldBe "foo.py:" bar2.fullName shouldBe "foo.py:" } @@ -327,7 +327,7 @@ class TypeRecoveryPassTests extends PySrc2CpgFixture(withOssDataflow = false) { "resolve correct imports via tag nodes" in { val List(a: ResolvedTypeDecl, b: ResolvedMethod, c: UnknownImport, d: ResolvedMember) = - cpg.file(".*UserController.py").ast.isCall.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked + cpg.file(".*UserController.py").ast.isCall.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked a.fullName shouldBe "app.py:" b.fullName shouldBe "app.py:" c.path shouldBe "flask.py:.jsonify" @@ -335,7 +335,7 @@ class TypeRecoveryPassTests extends PySrc2CpgFixture(withOssDataflow = false) { d.memberName shouldBe "db" val List(sqlAlchemyM: UnknownMethod, sqlAlchemyT: UnknownTypeDecl) = - cpg.file(".*app.py").ast.isCall.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked + cpg.file(".*app.py").ast.isCall.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked sqlAlchemyM.fullName shouldBe "flask_sqlalchemy.py:.SQLAlchemy.__init__" sqlAlchemyT.fullName shouldBe "flask_sqlalchemy.py:.SQLAlchemy" } @@ -373,7 +373,7 @@ class TypeRecoveryPassTests extends PySrc2CpgFixture(withOssDataflow = false) { |""".stripMargin).cpg "resolve correct imports via tag nodes" in { - val List(logging: UnknownImport) = cpg.call.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked + val List(logging: UnknownImport) = cpg.call.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked logging.path shouldBe "logging.py:" } @@ -397,7 +397,7 @@ class TypeRecoveryPassTests extends PySrc2CpgFixture(withOssDataflow = false) { "resolve correct imports via tag nodes" in { val List(error: UnknownImport, request: UnknownImport) = - cpg.call.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked + cpg.call.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked error.path shouldBe "urllib.py:.error" request.path shouldBe "urllib.py:.request" } @@ -448,16 +448,11 @@ class TypeRecoveryPassTests extends PySrc2CpgFixture(withOssDataflow = false) { | return dict(res).get("customerId", None) |""".stripMargin, "InstallationDao.py" - ).moreCode( - """ - |# dummy file to trigger isExternal = false on methods that are imported from here - |""".stripMargin, - "pymongo.py" - ).cpg + ) "resolve correct imports via tag nodes" in { val List(a: ResolvedTypeDecl, b: ResolvedMethod, c: UnknownMethod, d: UnknownTypeDecl, e: UnknownImport) = - cpg.call.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked + cpg.call.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked a.fullName shouldBe "MongoConnection.py:.MongoConnection" b.fullName shouldBe "MongoConnection.py:.MongoConnection.__init__" @@ -586,7 +581,7 @@ class TypeRecoveryPassTests extends PySrc2CpgFixture(withOssDataflow = false) { sessionM: ResolvedMethod, sqlSessionM: UnknownMethod, sqlSessionT: UnknownTypeDecl - ) = cpg.call.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked + ) = cpg.call.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked sessionT.fullName shouldBe Seq("data", "db_session.py:").mkString(File.separator) sessionM.fullName shouldBe Seq("data", "db_session.py:").mkString(File.separator) sqlSessionM.fullName shouldBe Seq("sqlalchemy", "orm.py:.Session.__init__").mkString(File.separator) @@ -673,7 +668,7 @@ class TypeRecoveryPassTests extends PySrc2CpgFixture(withOssDataflow = false) { "resolve correct imports via tag nodes" in { val List(sqlSessionM: UnknownMethod, sqlSessionT: UnknownTypeDecl, db: ResolvedMember) = - cpg.call.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked + cpg.call.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked sqlSessionM.fullName shouldBe Seq("flask_sqlalchemy.py:.SQLAlchemy.__init__").mkString(File.separator) sqlSessionT.fullName shouldBe Seq("flask_sqlalchemy.py:.SQLAlchemy").mkString(File.separator) db.basePath shouldBe Seq("api", "__init__.py:").mkString(File.separator) @@ -954,7 +949,7 @@ class TypeRecoveryPassTests extends PySrc2CpgFixture(withOssDataflow = false) { "resolve correct imports via tag nodes" in { val List(djangoModels: UnknownImport, profileT: ResolvedTypeDecl, profileM: ResolvedMethod) = - cpg.call.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked + cpg.call.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked djangoModels.path shouldBe Seq("django", "db.py:.models").mkString(File.separator) profileT.fullName shouldBe "models.py:.Profile" profileM.fullName shouldBe "models.py:.Profile.__init__" @@ -996,7 +991,7 @@ class TypeRecoveryPassTests extends PySrc2CpgFixture(withOssDataflow = false) { "resolve correct imports via tag nodes" in { val List(connectorT: ResolvedTypeDecl, connectorM: ResolvedMethod) = - cpg.call.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked + cpg.call.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked connectorT.fullName shouldBe Seq("lib", "connector.py:.Connector").mkString(File.separator) connectorM.fullName shouldBe Seq("lib", "connector.py:.Connector.__init__").mkString(File.separator) } @@ -1180,6 +1175,69 @@ class TypeRecoveryPassTests extends PySrc2CpgFixture(withOssDataflow = false) { } + "Modules imported from non-root modules" should { + val cpg = code( + """ + |from fastapi import FastAPI + |import v1.appv1 + |from v2.appv2 import appV2 + | + |app = FastAPI(root_path="/api") + | + |app.mount("/v1", v1.appv1.appV1) + |app.mount("/v2", appV2) + |""".stripMargin, + Seq("code", "itemsrouter.py").mkString(File.separator) + ).moreCode( + """ + |appV1 = "v1" + |""".stripMargin, + Seq("code", "v1", "appv1.py").mkString(File.separator) + ).moreCode( + """ + |appV2 = "v2" + |""".stripMargin, + Seq("code", "v2", "appv2.py").mkString(File.separator) + ) + + "correctly resolve the type of the `appV1`, via member access" in { + val appV1 = cpg.member("appV1").ref.inAssignment.target.isIdentifier.head + appV1.typeFullName shouldBe "__builtin.str" + } + + "correctly resolve the type of the `appV2`, via member access" in { + val appV2 = cpg.member("appV2").ref.inAssignment.target.isIdentifier.head + appV2.typeFullName shouldBe "__builtin.str" + } + + // TODO: code.v1 is a directory with multiple modules, but is not a module itself and thus has no member nodes + // pointing to the child modules. This means that field accesses of code.v1 have no base type to rely on failing + // this test case + "correctly resolve the type of the `appV1` as a field access argument" ignore { + val appV1 = cpg.call + .methodFullNameExact("fastapi.py:.FastAPI.mount") + .argument + .argumentIndex(2) + .fieldAccess + .where(_.fieldIdentifier.canonicalName("appV1")) + .referencedMember + .head + appV1.typeFullName shouldBe "__builtin.str" + } + + "correctly resolve the type of the `appV2` as an identifier argument" in { + val appV2 = cpg.call + .methodFullNameExact("fastapi.py:.FastAPI.mount") + .argument + .argumentIndex(2) + .isIdentifier + .name("appV2") + .head + appV2.typeFullName shouldBe "__builtin.str" + } + + } + "Literals as the returns of calls" should { val cpg = code(""" |def foo(): diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/RubySrc2Cpg.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/RubySrc2Cpg.scala index 61cbab6d5edb..6ec36406dd8f 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/RubySrc2Cpg.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/RubySrc2Cpg.scala @@ -96,7 +96,7 @@ object RubySrc2Cpg { List( // TODO commented below two passes, as waiting on Dependency download PR to get merged new deprecated.passes.IdentifierToCallPass(cpg), - new deprecated.passes.ImportResolverPass(cpg, packageTableInfo), + new deprecated.passes.RubyImportResolverPass(cpg, packageTableInfo), new deprecated.passes.RubyTypeRecoveryPass(cpg), new deprecated.passes.RubyTypeHintCallLinker(cpg), new NaiveCallLinker(cpg), diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/deprecated/passes/ImportResolverPass.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/deprecated/passes/RubyImportResolverPass.scala similarity index 93% rename from joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/deprecated/passes/ImportResolverPass.scala rename to joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/deprecated/passes/RubyImportResolverPass.scala index 54fd56d16bbb..2aa40763d5d0 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/deprecated/passes/ImportResolverPass.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/deprecated/passes/RubyImportResolverPass.scala @@ -11,7 +11,7 @@ import io.shiftleft.semanticcpg.language.* import java.io.File as JFile import java.util.regex.{Matcher, Pattern} -class ImportResolverPass(cpg: Cpg, packageTableInfo: PackageTable) extends XImportResolverPass(cpg) { +class RubyImportResolverPass(cpg: Cpg, packageTableInfo: PackageTable) extends XImportResolverPass(cpg) { private val pathPattern = Pattern.compile("[\"']([\\w/.]+)[\"']") @@ -23,10 +23,10 @@ class ImportResolverPass(cpg: Cpg, packageTableInfo: PackageTable) extends XImpo diffGraph: DiffGraphBuilder ): Unit = { - resolveEntities(importedEntity, importCall, fileName).foreach(x => resolvedImportToTag(x, importCall, diffGraph)) + resolveEntities(importedEntity, importCall, fileName).foreach(x => evaluatedImportToTag(x, importCall, diffGraph)) } - private def resolveEntities(expEntity: String, importCall: Call, fileName: String): Set[ResolvedImport] = { + private def resolveEntities(expEntity: String, importCall: Call, fileName: String): Set[EvaluatedImport] = { // TODO /* Currently we are considering only case where exposed module are Classes, @@ -90,7 +90,7 @@ class ImportResolverPass(cpg: Cpg, packageTableInfo: PackageTable) extends XImpo .toSet resolvedTypeDecls ++ resolvedModules ++ resolvedMethods } - } + }.collectAll[EvaluatedImport].toSet finalResolved } @@ -99,7 +99,7 @@ class ImportResolverPass(cpg: Cpg, packageTableInfo: PackageTable) extends XImpo val rawEntity = expEntity.stripPrefix("./") val matcher = pathPattern.matcher(rawEntity) val sep = Matcher.quoteReplacement(JFile.separator) - val root = s"$codeRoot${JFile.separator}" + val root = s"$codeRootDir${JFile.separator}" val currentFile = s"$root$fileName" val entity = if (matcher.find()) matcher.group(1) else rawEntity val resolvedPath = better.files diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/deprecated/passes/RubyTypeRecoveryPass.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/deprecated/passes/RubyTypeRecoveryPass.scala index 709761861e56..3c2f1d2a8661 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/deprecated/passes/RubyTypeRecoveryPass.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/deprecated/passes/RubyTypeRecoveryPass.scala @@ -45,7 +45,7 @@ private class RecoverForRubyFile(cpg: Cpg, cu: File, builder: DiffGraphBuilder, alias <- i.importedAs } { import io.joern.x2cpg.passes.frontend.ImportsPass.* - ResolvedImport.tagToResolvedImport(resolvedImport).foreach { + EvaluatedImport.tagToEvaluatedImport(resolvedImport).foreach { case ResolvedTypeDecl(fullName, _) => symbolTable.append(LocalVar(fullName.split("\\.").lastOption.getOrElse(alias)), fullName) case _ => super.visitImport(i) diff --git a/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/deprecated/passes/RubyTypeRecoveryTests.scala b/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/deprecated/passes/RubyTypeRecoveryTests.scala index 6cc50d657fa1..40c8f0b32564 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/deprecated/passes/RubyTypeRecoveryTests.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/deprecated/passes/RubyTypeRecoveryTests.scala @@ -138,10 +138,10 @@ class RubyTypeRecoveryTests // TODO Waiting for Module modelling to be done "resolve correct imports via tag nodes" ignore { val List(foo: ResolvedTypeDecl) = - cpg.file(".*foo.rb").ast.isCall.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked + cpg.file(".*foo.rb").ast.isCall.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked foo.fullName shouldBe "dbi::program.DBI" val List(bar: ResolvedTypeDecl) = - cpg.file(".*bar.rb").ast.isCall.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked + cpg.file(".*bar.rb").ast.isCall.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked bar.fullName shouldBe "foo.rb::program.FooModule" } @@ -200,7 +200,8 @@ class RubyTypeRecoveryTests |""".stripMargin).cpg "resolve correct imports via tag nodes" in { - val List(logging: ResolvedMethod, _) = cpg.call.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked + val List(logging: ResolvedMethod, _) = + cpg.call.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked logging.fullName shouldBe s"logger::program.Logger.${XDefines.ConstructorMethodName}" } diff --git a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/passes/frontend/XImportResolverPass.scala b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/passes/frontend/XImportResolverPass.scala index d2f78e7a32e3..57f436d4aa21 100644 --- a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/passes/frontend/XImportResolverPass.scala +++ b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/passes/frontend/XImportResolverPass.scala @@ -1,7 +1,8 @@ package io.joern.x2cpg.passes.frontend -import io.joern.x2cpg.passes.frontend.ImportsPass.ResolvedImport -import io.joern.x2cpg.passes.frontend.ImportsPass.ResolvedImport.* +import better.files.File +import io.joern.x2cpg.passes.frontend.ImportsPass.EvaluatedImport +import io.joern.x2cpg.passes.frontend.ImportsPass.EvaluatedImport.* import io.shiftleft.codepropertygraph.Cpg import io.shiftleft.codepropertygraph.generated.nodes.{Call, Import, Tag} import io.shiftleft.passes.ConcurrentWriterCpgPass @@ -14,14 +15,18 @@ import java.util.Base64 abstract class XImportResolverPass(cpg: Cpg) extends ConcurrentWriterCpgPass[Import](cpg) { - protected val logger: Logger = LoggerFactory.getLogger(this.getClass) - protected val codeRoot: String = cpg.metaData.root.headOption.getOrElse(JFile.separator) + protected val logger: Logger = LoggerFactory.getLogger(this.getClass) + protected val codeRootDir: String = File( + cpg.metaData.root.headOption.getOrElse(JFile.separator).stripSuffix(JFile.separator) + ) match + case f if f.isDirectory => f.pathAsString + case f => f.parent.pathAsString override def generateParts(): Array[Import] = cpg.imports.toArray override def runOnPart(builder: DiffGraphBuilder, part: Import): Unit = for { call <- part.call - fileName = call.file.name.headOption.getOrElse("").stripPrefix(codeRoot) + fileName = call.file.name.headOption.getOrElse("").stripPrefix(codeRootDir) importedAs <- part.importedAs importedEntity <- part.importedEntity } { @@ -36,7 +41,7 @@ abstract class XImportResolverPass(cpg: Cpg) extends ConcurrentWriterCpgPass[Imp diffGraph: DiffGraphBuilder ): Unit - protected def resolvedImportToTag(x: ResolvedImport, importCall: Call, diffGraph: DiffGraphBuilder): Unit = + protected def evaluatedImportToTag(x: EvaluatedImport, importCall: Call, diffGraph: DiffGraphBuilder): Unit = importCall.start.newTagNodePair(x.label, x.serialize).store()(diffGraph) } @@ -45,18 +50,28 @@ object ImportsPass { private val sep = "," - sealed trait ResolvedImport { + /** An import that has been evaluated as either resolved or not. + */ + sealed trait EvaluatedImport { def label: String def serialize: String } + /** An import that has been resolved to a node in the CPG. + */ + sealed trait ResolvedImport extends EvaluatedImport + + /** An import that has not been successfully resolved to a node in the CPG. This is likely an external dependency. + */ + sealed trait UnresolvedImport extends EvaluatedImport + implicit class TagToResolvedImportExt(traversal: Iterator[Tag]) { - def toResolvedImport: Iterator[ResolvedImport] = - traversal.flatMap(ResolvedImport.tagToResolvedImport) + def toEvaluatedImport: Iterator[EvaluatedImport] = + traversal.flatMap(EvaluatedImport.tagToEvaluatedImport) } - object ResolvedImport { + object EvaluatedImport { val RESOLVED_METHOD = "RESOLVED_METHOD" val RESOLVED_TYPE_DECL = "RESOLVED_TYPE_DECL" @@ -71,7 +86,7 @@ object ImportsPass { val OPT_BASE_PATH = "BASE_PATH" val OPT_NAME = "NAME" - def tagToResolvedImport(tag: Tag): Option[ResolvedImport] = Option(tag.name match { + def tagToEvaluatedImport(tag: Tag): Option[EvaluatedImport] = Option(tag.name match { case RESOLVED_METHOD => val opts = valueToOptions(tag.value) ResolvedMethod(opts(OPT_FULL_NAME), opts(OPT_ALIAS), opts.get(OPT_RECEIVER)) @@ -128,7 +143,7 @@ object ImportsPass { alias: String, receiver: Option[String] = None, override val label: String = UNKNOWN_METHOD - ) extends ResolvedImport { + ) extends UnresolvedImport { override def serialize: String = (Seq(OPT_FULL_NAME, fullName.encode, OPT_ALIAS, alias.encode) ++ receiver .map(r => Seq(OPT_RECEIVER, r.encode)) @@ -136,11 +151,12 @@ object ImportsPass { .mkString(sep) } - case class UnknownTypeDecl(fullName: String, override val label: String = UNKNOWN_TYPE_DECL) extends ResolvedImport { + case class UnknownTypeDecl(fullName: String, override val label: String = UNKNOWN_TYPE_DECL) + extends UnresolvedImport { override def serialize: String = fullName } - case class UnknownImport(path: String, override val label: String = UNKNOWN_IMPORT) extends ResolvedImport { + case class UnknownImport(path: String, override val label: String = UNKNOWN_IMPORT) extends UnresolvedImport { override def serialize: String = path } } diff --git a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/passes/frontend/XTypeRecovery.scala b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/passes/frontend/XTypeRecovery.scala index 59eaf1a7168b..57dc04728113 100644 --- a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/passes/frontend/XTypeRecovery.scala +++ b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/passes/frontend/XTypeRecovery.scala @@ -98,19 +98,26 @@ abstract class XTypeRecoveryPass[CompilationUnitType <: AstNode]( } private def linkMembersToTheirRefs(builder: DiffGraphBuilder): Unit = { - import XTypeRecovery.unknownTypePattern + import io.joern.x2cpg.passes.frontend.XTypeRecovery.AllNodeTypesFromIteratorExt + + def getFieldBaseTypes(fieldAccess: FieldAccess): Iterator[TypeDecl] = { + fieldAccess.argument(1) match + case x: Call if x.name == Operators.fieldAccess => + cpg.typeDecl.fullNameExact(FieldAccess(x).referencedMember.getKnownTypes.toSeq*) + case x: Call if !x.name.startsWith("") => + if (!x.typeFullName.matches(XTypeRecovery.unknownTypePattern.pattern.pattern())) + cpg.typeDecl.fullNameExact(x.typeFullName) + else + Iterator.empty + case x: Expression => + cpg.typeDecl.fullNameExact(x.getKnownTypes.toSeq*) + } + // Set all now-typed fieldAccess calls to their referencing members (if they exist) cpg.fieldAccess - .where( - _.and( - _.not(_.referencedMember), - _.argument(1).isIdentifier.typeFullNameNot(unknownTypePattern.pattern.pattern()) - ) - ) + .whereNot(_.referencedMember) .foreach { fieldAccess => - cpg.typeDecl - .fullNameExact(fieldAccess.argument(1).getKnownTypes.toSeq: _*) - .member + getFieldBaseTypes(fieldAccess).member .nameExact(fieldAccess.fieldIdentifier.canonicalName.toSeq: _*) .foreach(builder.addEdge(fieldAccess, _, EdgeTypes.REF)) } @@ -376,7 +383,7 @@ abstract class RecoverForXCompilationUnit[CompilationUnitType <: AstNode]( import io.joern.x2cpg.passes.frontend.ImportsPass.* import io.joern.x2cpg.passes.frontend.XTypeRecovery.AllNodeTypesFromIteratorExt - ResolvedImport.tagToResolvedImport(resolvedImport).foreach { + EvaluatedImport.tagToEvaluatedImport(resolvedImport).foreach { case ResolvedMethod(fullName, alias, receiver, _) => symbolTable.append(CallAlias(alias, receiver), fullName) case ResolvedTypeDecl(fullName, _) =>