diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/RubySrc2Cpg.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/RubySrc2Cpg.scala index 42f1e32d3bc9..6a2fb791ac44 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/RubySrc2Cpg.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/RubySrc2Cpg.scala @@ -2,6 +2,7 @@ package io.joern.rubysrc2cpg import better.files.File import io.joern.rubysrc2cpg.astcreation.AstCreator +import io.joern.rubysrc2cpg.astcreation.GlobalTypes import io.joern.rubysrc2cpg.datastructures.RubyProgramSummary import io.joern.rubysrc2cpg.deprecated.parser.DeprecatedRubyParser import io.joern.rubysrc2cpg.deprecated.parser.DeprecatedRubyParser.* diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstCreator.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstCreator.scala index fd02ce66ed31..c60189a6b4c3 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstCreator.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstCreator.scala @@ -35,6 +35,8 @@ class AstCreator( protected val logger: Logger = LoggerFactory.getLogger(getClass) + protected var parseLevel: AstParseLevel = AstParseLevel.FULL_AST + protected val relativeFileName: String = projectRoot.map(fileName.stripPrefix).map(_.stripPrefix(java.io.File.separator)).getOrElse(fileName) @@ -49,7 +51,7 @@ class AstCreator( * The (parsed) contents of the file are put under that fictitious METHOD node, thus * allowing for a straightforward representation of out-of-method statements. */ - private def astForRubyFile(rootStatements: StatementList): Ast = { + protected def astForRubyFile(rootStatements: StatementList): Ast = { val fileNode = NewFile().name(relativeFileName) val fullName = s"$relativeFileName:${NamespaceTraversal.globalNamespaceName}" val namespaceBlock = NewNamespaceBlock() @@ -92,3 +94,16 @@ class AstCreator( .getOrElse(Ast()) } } + +/** Determines till what depth the AST creator will parse until. + */ +enum AstParseLevel { + + /** This level will parse all types and methods signatures, but exclude method bodies. + */ + case SIGNATURES + + /** This level will parse the full AST. + */ + case FULL_AST +} diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForExpressionsCreator.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForExpressionsCreator.scala index 2d658c33737b..f90c76b247a7 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForExpressionsCreator.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForExpressionsCreator.scala @@ -342,10 +342,14 @@ trait AstForExpressionsCreator(implicit withSchemaValidation: ValidationMode) { } private def astForMethodCallWithoutBlock(node: SimpleCall, methodIdentifier: SimpleIdentifier): Ast = { - val methodName = methodIdentifier.text - val methodFullName = methodName // TODO - val argumentAst = node.arguments.map(astForMethodCallArgument) - val call = callNode(node, code(node), methodName, methodFullName, DispatchTypes.STATIC_DISPATCH) + val methodName = methodIdentifier.text + lazy val defaultFullName = s"${XDefines.UnresolvedNamespace}:$methodName" + val methodFullName = scope.tryResolveMethodInvocation(methodName, List.empty) match { + case Some(m) => scope.typeForMethod(m).map(t => s"${t.name}:${m.name}").getOrElse(defaultFullName) + case None => defaultFullName + } + val argumentAst = node.arguments.map(astForMethodCallArgument) + val call = callNode(node, code(node), methodName, methodFullName, DispatchTypes.STATIC_DISPATCH) callAst(call, argumentAst, None, None) } diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForFunctionsCreator.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForFunctionsCreator.scala index a34f9e6df25e..3b545ab0bfbe 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForFunctionsCreator.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForFunctionsCreator.scala @@ -43,22 +43,25 @@ trait AstForFunctionsCreator(implicit withSchemaValidation: ValidationMode) { th .map(statementForOptionalParam) )(TextSpan(None, None, None, None, "")) - val stmtBlockAst = node.body match - case stmtList: StatementList => - astForStatementListReturningLastExpression( - StatementList(optionalStatementList.statements ++ stmtList.statements)(stmtList.span) - ) - case _: (StaticLiteral | BinaryExpression | SingleAssignment | SimpleIdentifier | ArrayLiteral | HashLiteral | - SimpleCall | MemberAccess | MemberCall) => - astForStatementListReturningLastExpression( - StatementList(optionalStatementList.statements ++ List(node.body))(node.body.span) - ) - case body => - logger.warn( - s"Non-linear method bodies are not supported yet: ${body.text} (${body.getClass.getSimpleName}) ($relativeFileName), skipping" - ) - astForUnknown(body) - + val stmtBlockAst = if (this.parseLevel == AstParseLevel.SIGNATURES) { + Ast() + } else { + node.body match + case stmtList: StatementList => + astForStatementListReturningLastExpression( + StatementList(optionalStatementList.statements ++ stmtList.statements)(stmtList.span) + ) + case _: (StaticLiteral | BinaryExpression | SingleAssignment | SimpleIdentifier | ArrayLiteral | HashLiteral | + SimpleCall | MemberAccess | MemberCall) => + astForStatementListReturningLastExpression( + StatementList(optionalStatementList.statements ++ List(node.body))(node.body.span) + ) + case body => + logger.warn( + s"Non-linear method bodies are not supported yet: ${body.text} (${body.getClass.getSimpleName}) ($relativeFileName), skipping" + ) + astForUnknown(body) + } scope.popScope() methodAst(method, parameterAsts, stmtBlockAst, methodReturnNode(node, Defines.Any)) } diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForTypesCreator.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForTypesCreator.scala index aa7485b1b33c..0f1f380f0033 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForTypesCreator.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForTypesCreator.scala @@ -59,7 +59,7 @@ trait AstForTypesCreator(implicit withSchemaValidation: ValidationMode) { this: val classBody = node.body.asInstanceOf[StatementList] // for now (bodyStatement is a superset of stmtList) val classBodyAsts = classBody.statements.flatMap(astsForStatement) match { - case bodyAsts if scope.shouldGenerateDefaultConstructor => + case bodyAsts if scope.shouldGenerateDefaultConstructor && parseLevel == AstParseLevel.FULL_AST => val bodyStart = classBody.span.spanStart() val initBody = StatementList(List())(bodyStart) val methodDecl = astForMethodDeclaration( diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstSummaryVisitor.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstSummaryVisitor.scala index 1081bf2a0ce2..86ee6c11301c 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstSummaryVisitor.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstSummaryVisitor.scala @@ -1,100 +1,74 @@ package io.joern.rubysrc2cpg.astcreation -import io.joern.rubysrc2cpg.astcreation.RubyIntermediateAst.{ - ClassDeclaration, - FieldsDeclaration, - MandatoryParameter, - MethodDeclaration, - ModuleDeclaration, - OptionalParameter, - RubyNode, - SimpleIdentifier, - StatementList, - TypeDeclaration -} -import io.joern.rubysrc2cpg.datastructures.{ - NamespaceScope, - RubyField, - RubyMethod, - RubyProgramSummary, - RubyType, - TypeScope -} -import io.joern.rubysrc2cpg.passes.Defines +import io.joern.rubysrc2cpg.astcreation.RubyIntermediateAst.StatementList +import io.joern.rubysrc2cpg.datastructures.{RubyField, RubyMethod, RubyProgramSummary, RubyType} import io.joern.rubysrc2cpg.parser.RubyNodeCreator -import io.joern.x2cpg.datastructures.ProgramSummary -import io.joern.x2cpg.{ValidationMode, Defines as XDefines} -import io.shiftleft.codepropertygraph.generated.nodes.{NewMember, NewMethod, NewMethodParameterIn} -import io.shiftleft.semanticcpg.language.types.structure.NamespaceTraversal +import io.joern.rubysrc2cpg.passes.Defines +import io.joern.x2cpg.{Ast, ValidationMode} +import io.shiftleft.codepropertygraph.generated.Cpg +import io.shiftleft.codepropertygraph.generated.nodes.{Local, Member, Method, TypeDecl} +import io.shiftleft.semanticcpg.language.* +import overflowdb.{BatchedUpdate, Config} -trait AstSummaryVisitor(implicit withSchemaValidation: ValidationMode) { this: AstCreator => +import scala.util.Using - private def baseNamespace: String = s"$relativeFileName:${NamespaceTraversal.globalNamespaceName}" +trait AstSummaryVisitor(implicit withSchemaValidation: ValidationMode) { this: AstCreator => def summarize(): RubyProgramSummary = { - val rootNode = new RubyNodeCreator().visit(this.programCtx).asInstanceOf[StatementList] - val fullName = baseNamespace - scope.pushNewScope(NamespaceScope(fullName)) + this.parseLevel = AstParseLevel.SIGNATURES + Using.resource(Cpg.withConfig(Config.withoutOverflow())) { cpg => + // Build and store compilation unit AST + val rootNode = new RubyNodeCreator().visit(programCtx).asInstanceOf[StatementList] + val ast = astForRubyFile(rootNode) + Ast.storeInDiffGraph(ast, diffGraph) + BatchedUpdate.applyDiff(cpg.graph, diffGraph) - val newMap = scope.newProgramScope - .map { moduleScope => - scope.pushNewScope(moduleScope) - val m = rootNode.statements - .map(visitStatement) - .reduceOption((a, b) => ProgramSummary.combine(a, b)) - .getOrElse(Map.empty) - scope.popScope() - m - } - .getOrElse(Map.empty) - - scope.popScope() - RubyProgramSummary(newMap) + // Summarize findings + summarize(cpg) + } } def withSummary(newSummary: RubyProgramSummary): AstCreator = { AstCreator(fileName, programCtx, projectRoot, newSummary) } - private def visitStatement(stmt: RubyNode): Map[String, Set[RubyType]] = stmt match { - case node: TypeDeclaration => visitTypeDeclaration(node) - case _ => Map.empty - } + private def summarize(cpg: Cpg): RubyProgramSummary = { + def toMethod(m: Method): RubyMethod = { + RubyMethod(m.name, m.parameter.map(x => x.name -> x.typeFullName).l, m.methodReturn.typeFullName) + } - private def visitTypeDeclaration(classDecl: TypeDeclaration): Map[String, Set[RubyType]] = { - classDecl.name match { - case name: SimpleIdentifier => - val fullName = computeClassFullName(name.text) - Map( - scope.surroundingScopeFullName - .getOrElse(baseNamespace) -> Set(visitTypeLikeDeclaration(fullName, classDecl.body)) - ) - case _ => Map.empty + def toField(f: Member): RubyField = { + RubyField(f.name, f.typeFullName) } - } - private def visitTypeLikeDeclaration(fullName: String, body: RubyNode): RubyType = { - scope.pushNewScope(TypeScope(fullName)) - val classBody = body.asInstanceOf[StatementList] - val bodyMap = - classBody.statements.flatMap { - case MethodDeclaration(methodName, parameters, _) => - RubyMethod(methodName, visitParameters(parameters), XDefines.Any) :: Nil - case node: FieldsDeclaration => - astsForFieldDeclarations(node).flatMap(_.nodes).collect { - case x: NewMember => RubyField(x.name, x.typeFullName) - case x: NewMethod => RubyMethod(x.name, List.empty, XDefines.Any) // These are getters/setters - } - case _ => Seq.empty - } - scope.popScope() - RubyType(fullName, bodyMap.collect { case x: RubyMethod => x }, bodyMap.collect { case x: RubyField => x }) - } + def toModuleVariable(v: Local): RubyField = { + RubyField(v.name, v.typeFullName) + } - private def visitParameters(parameters: List[RubyNode]): List[(String, String)] = { - parameters.map(astForParameter(_, -1)).flatMap(_.root).collect { case x: NewMethodParameterIn => - (x.name, x.typeFullName) + def toType(m: TypeDecl): RubyType = { + RubyType(m.fullName, m.method.map(toMethod).l, m.member.map(toField).l) } + + val mapping = cpg.namespaceBlock.flatMap { namespace => + // Map module functions/variables + val moduleEntry = namespace.fullName -> namespace.method.map { module => + val moduleTypeMap = + RubyType( + module.fullName, + module.block.astChildren.collectAll[Method].map(toMethod).l, + module.local.map(toModuleVariable).l + ) + moduleTypeMap + }.toSet + // Map module types + val typeEntries = namespace.method.collectFirst { + case m: Method if m.name == Defines.Program => + s"${namespace.fullName}:${m.name}" -> m.block.astChildren.collectAll[TypeDecl].map(toType).toSet + }.toSeq + + moduleEntry +: typeEntries + }.toMap + RubyProgramSummary(mapping) } } diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/datastructures/RubyProgramSummary.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/datastructures/RubyProgramSummary.scala index 02b4af3c4e70..71cba2294db7 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/datastructures/RubyProgramSummary.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/datastructures/RubyProgramSummary.scala @@ -1,7 +1,7 @@ package io.joern.rubysrc2cpg.datastructures import io.joern.x2cpg.datastructures.{FieldLike, MethodLike, ProgramSummary, TypeLike} - +import io.joern.x2cpg.Defines as XDefines import scala.annotation.targetName class RubyProgramSummary(initialMap: Map[String, Set[RubyType]] = Map.empty) extends ProgramSummary[RubyType] { @@ -20,4 +20,8 @@ case class RubyMethod(name: String, parameterTypes: List[(String, String)], retu case class RubyField(name: String, typeName: String) extends FieldLike case class RubyType(name: String, methods: List[RubyMethod], fields: List[RubyField]) - extends TypeLike[RubyMethod, RubyField] + extends TypeLike[RubyMethod, RubyField] { + def hasConstructor: Boolean = { + methods.exists(_.name == XDefines.ConstructorMethodName) + } +} diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/datastructures/RubyScope.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/datastructures/RubyScope.scala index f919637259ad..159564be387b 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/datastructures/RubyScope.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/datastructures/RubyScope.scala @@ -1,19 +1,24 @@ package io.joern.rubysrc2cpg.datastructures +import io.joern.rubysrc2cpg.astcreation.GlobalTypes +import io.joern.x2cpg.Defines import io.joern.x2cpg.datastructures.* import io.shiftleft.codepropertygraph.generated.NodeTypes -import io.shiftleft.codepropertygraph.generated.nodes.{ - DeclarationNew, - MethodParameterIn, - NewLocal, - NewMethodParameterIn, - NewNode -} +import io.shiftleft.codepropertygraph.generated.nodes.{DeclarationNew, NewLocal, NewMethodParameterIn} + +import scala.collection.mutable class RubyScope(summary: RubyProgramSummary) extends Scope[String, DeclarationNew, TypedScopeElement] with TypedScope[RubyMethod, RubyField, RubyType](summary) { + private val builtinMethods = GlobalTypes.builtinFunctions.map(m => RubyMethod(m, List.empty, Defines.Any)).toList + + override val typesInScope: mutable.Set[RubyType] = + mutable.Set(RubyType(GlobalTypes.builtinPrefix, builtinMethods, List.empty)) + + override val membersInScope: mutable.Set[MemberLike] = mutable.Set(builtinMethods*) + // Ruby does not have overloading, so this can be set to true override protected def isOverloadedBy(method: RubyMethod, argTypes: List[String]): Boolean = true @@ -25,12 +30,6 @@ class RubyScope(summary: RubyProgramSummary) override def pushNewScope(scopeNode: TypedScopeElement): Unit = { // Use the summary to determine if there is a constructor present val mappedScopeNode = scopeNode match { - case TypeScope(fullName, _) - if !surroundingScopeFullName - .flatMap(summary.typesUnderNamespace) - .flatMap(_.methods) - .exists(_.name == "initialize") => - TypeScope(fullName, true) case n: NamespaceLikeScope => typesInScope.addAll(summary.typesUnderNamespace(n.fullName)) n @@ -67,7 +66,8 @@ class RubyScope(summary: RubyProgramSummary) */ def shouldGenerateDefaultConstructor: Boolean = stack .collectFirst { - case ScopeElement(x: TypeLikeScope, _) => x.needsDefaultConstructor + case ScopeElement(_: ModuleScope, _) => false + case ScopeElement(x: TypeLikeScope, _) => !typesInScope.find(_.name == x.fullName).exists(_.hasConstructor) case _ => false } .getOrElse(false) diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/datastructures/ScopeElement.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/datastructures/ScopeElement.scala index 61c31b207262..d1229e1882a3 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/datastructures/ScopeElement.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/datastructures/ScopeElement.scala @@ -18,11 +18,6 @@ trait TypeLikeScope extends TypedScopeElement { * the full name of the type-like. */ def fullName: String - - /** @return - * true if a default constructor is required. - */ - def needsDefaultConstructor: Boolean } /** A file-level module. @@ -32,24 +27,20 @@ trait TypeLikeScope extends TypedScopeElement { */ case class ProgramScope(fileName: String) extends TypeLikeScope { override def fullName: String = s"$fileName:${Defines.Program}" - - override def needsDefaultConstructor: Boolean = false } /** A Ruby module/abstract class. * @param fullName * the type full name. */ -case class ModuleScope(fullName: String) extends TypeLikeScope { - override def needsDefaultConstructor: Boolean = false -} +case class ModuleScope(fullName: String) extends TypeLikeScope /** A class or interface. * * @param fullName * the type full name. */ -case class TypeScope(fullName: String, needsDefaultConstructor: Boolean = false) extends TypeLikeScope +case class TypeScope(fullName: String) extends TypeLikeScope /** Represents scope objects that map to a method node. */ diff --git a/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/querying/CallTests.scala b/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/querying/CallTests.scala index 4fce7f26333c..360c388b6b13 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/querying/CallTests.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/querying/CallTests.scala @@ -140,4 +140,23 @@ class CallTests extends RubyCode2CpgFixture { } } + "a parenthesis-less call" should { + val cpg = code(""" + |def src = 1 + |def f(p) + | p += src + | p + |end + |""".stripMargin) + + "correctly create a `src` call instead of identifier" in { + inside(cpg.call("src").l) { + case src :: Nil => + src.name shouldBe "src" + src.methodFullName shouldBe "Test0.rb:::program:src" + case xs => fail(s"Expected exactly one `src` call, instead got [${xs.code.mkString(",")}]") + } + } + } + } diff --git a/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/querying/ControlStructureTests.scala b/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/querying/ControlStructureTests.scala index 473902f2c58b..ea0007fef3e9 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/querying/ControlStructureTests.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/querying/ControlStructureTests.scala @@ -249,7 +249,7 @@ class ControlStructureTests extends RubyCode2CpgFixture { whileCond.code shouldBe "true" whileCond.lineNumber shouldBe Some(2) - putsHi.methodFullName shouldBe "puts" + putsHi.methodFullName shouldBe "__builtin:puts" putsHi.code shouldBe "puts 'hi'" putsHi.lineNumber shouldBe Some(2) } diff --git a/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/querying/MethodReturnTests.scala b/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/querying/MethodReturnTests.scala index b69d65d7f79f..561eceea594b 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/querying/MethodReturnTests.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/querying/MethodReturnTests.scala @@ -71,7 +71,7 @@ class MethodReturnTests extends RubyCode2CpgFixture { r.lineNumber shouldBe Some(3) val List(c: Call) = r.astChildren.isCall.l - c.methodFullName shouldBe "puts" + c.methodFullName shouldBe "__builtin:puts" c.lineNumber shouldBe Some(3) c.code shouldBe "puts x" }