Skip to content

Commit

Permalink
[ruby] Implement RubySummary with CPG Lookahead (#4226)
Browse files Browse the repository at this point in the history
* Following [#4196](#4196), similarly implemented a look-ahead and summary using an in-memory CPG
* Testing that a parenthesis-less call is correctly created as a call instead of identifier
* Resolving direct function calls if possible using the scope object
* Fixed scope around new pre-parse & loaded builtins
  • Loading branch information
DavidBakerEffendi authored Feb 23, 2024
1 parent a69111d commit 68a09fb
Show file tree
Hide file tree
Showing 12 changed files with 140 additions and 129 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package io.joern.rubysrc2cpg

import better.files.File
import io.joern.rubysrc2cpg.astcreation.AstCreator
import io.joern.rubysrc2cpg.astcreation.GlobalTypes
import io.joern.rubysrc2cpg.datastructures.RubyProgramSummary
import io.joern.rubysrc2cpg.deprecated.parser.DeprecatedRubyParser
import io.joern.rubysrc2cpg.deprecated.parser.DeprecatedRubyParser.*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ class AstCreator(

protected val logger: Logger = LoggerFactory.getLogger(getClass)

protected var parseLevel: AstParseLevel = AstParseLevel.FULL_AST

protected val relativeFileName: String =
projectRoot.map(fileName.stripPrefix).map(_.stripPrefix(java.io.File.separator)).getOrElse(fileName)

Expand All @@ -49,7 +51,7 @@ class AstCreator(
* The (parsed) contents of the file are put under that fictitious METHOD node, thus
* allowing for a straightforward representation of out-of-method statements.
*/
private def astForRubyFile(rootStatements: StatementList): Ast = {
protected def astForRubyFile(rootStatements: StatementList): Ast = {
val fileNode = NewFile().name(relativeFileName)
val fullName = s"$relativeFileName:${NamespaceTraversal.globalNamespaceName}"
val namespaceBlock = NewNamespaceBlock()
Expand Down Expand Up @@ -92,3 +94,16 @@ class AstCreator(
.getOrElse(Ast())
}
}

/** Determines till what depth the AST creator will parse until.
*/
enum AstParseLevel {

/** This level will parse all types and methods signatures, but exclude method bodies.
*/
case SIGNATURES

/** This level will parse the full AST.
*/
case FULL_AST
}
Original file line number Diff line number Diff line change
Expand Up @@ -342,10 +342,14 @@ trait AstForExpressionsCreator(implicit withSchemaValidation: ValidationMode) {
}

private def astForMethodCallWithoutBlock(node: SimpleCall, methodIdentifier: SimpleIdentifier): Ast = {
val methodName = methodIdentifier.text
val methodFullName = methodName // TODO
val argumentAst = node.arguments.map(astForMethodCallArgument)
val call = callNode(node, code(node), methodName, methodFullName, DispatchTypes.STATIC_DISPATCH)
val methodName = methodIdentifier.text
lazy val defaultFullName = s"${XDefines.UnresolvedNamespace}:$methodName"
val methodFullName = scope.tryResolveMethodInvocation(methodName, List.empty) match {
case Some(m) => scope.typeForMethod(m).map(t => s"${t.name}:${m.name}").getOrElse(defaultFullName)
case None => defaultFullName
}
val argumentAst = node.arguments.map(astForMethodCallArgument)
val call = callNode(node, code(node), methodName, methodFullName, DispatchTypes.STATIC_DISPATCH)
callAst(call, argumentAst, None, None)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,22 +43,25 @@ trait AstForFunctionsCreator(implicit withSchemaValidation: ValidationMode) { th
.map(statementForOptionalParam)
)(TextSpan(None, None, None, None, ""))

val stmtBlockAst = node.body match
case stmtList: StatementList =>
astForStatementListReturningLastExpression(
StatementList(optionalStatementList.statements ++ stmtList.statements)(stmtList.span)
)
case _: (StaticLiteral | BinaryExpression | SingleAssignment | SimpleIdentifier | ArrayLiteral | HashLiteral |
SimpleCall | MemberAccess | MemberCall) =>
astForStatementListReturningLastExpression(
StatementList(optionalStatementList.statements ++ List(node.body))(node.body.span)
)
case body =>
logger.warn(
s"Non-linear method bodies are not supported yet: ${body.text} (${body.getClass.getSimpleName}) ($relativeFileName), skipping"
)
astForUnknown(body)

val stmtBlockAst = if (this.parseLevel == AstParseLevel.SIGNATURES) {
Ast()
} else {
node.body match
case stmtList: StatementList =>
astForStatementListReturningLastExpression(
StatementList(optionalStatementList.statements ++ stmtList.statements)(stmtList.span)
)
case _: (StaticLiteral | BinaryExpression | SingleAssignment | SimpleIdentifier | ArrayLiteral | HashLiteral |
SimpleCall | MemberAccess | MemberCall) =>
astForStatementListReturningLastExpression(
StatementList(optionalStatementList.statements ++ List(node.body))(node.body.span)
)
case body =>
logger.warn(
s"Non-linear method bodies are not supported yet: ${body.text} (${body.getClass.getSimpleName}) ($relativeFileName), skipping"
)
astForUnknown(body)
}
scope.popScope()
methodAst(method, parameterAsts, stmtBlockAst, methodReturnNode(node, Defines.Any))
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ trait AstForTypesCreator(implicit withSchemaValidation: ValidationMode) { this:
val classBody =
node.body.asInstanceOf[StatementList] // for now (bodyStatement is a superset of stmtList)
val classBodyAsts = classBody.statements.flatMap(astsForStatement) match {
case bodyAsts if scope.shouldGenerateDefaultConstructor =>
case bodyAsts if scope.shouldGenerateDefaultConstructor && parseLevel == AstParseLevel.FULL_AST =>
val bodyStart = classBody.span.spanStart()
val initBody = StatementList(List())(bodyStart)
val methodDecl = astForMethodDeclaration(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,100 +1,74 @@
package io.joern.rubysrc2cpg.astcreation

import io.joern.rubysrc2cpg.astcreation.RubyIntermediateAst.{
ClassDeclaration,
FieldsDeclaration,
MandatoryParameter,
MethodDeclaration,
ModuleDeclaration,
OptionalParameter,
RubyNode,
SimpleIdentifier,
StatementList,
TypeDeclaration
}
import io.joern.rubysrc2cpg.datastructures.{
NamespaceScope,
RubyField,
RubyMethod,
RubyProgramSummary,
RubyType,
TypeScope
}
import io.joern.rubysrc2cpg.passes.Defines
import io.joern.rubysrc2cpg.astcreation.RubyIntermediateAst.StatementList
import io.joern.rubysrc2cpg.datastructures.{RubyField, RubyMethod, RubyProgramSummary, RubyType}
import io.joern.rubysrc2cpg.parser.RubyNodeCreator
import io.joern.x2cpg.datastructures.ProgramSummary
import io.joern.x2cpg.{ValidationMode, Defines as XDefines}
import io.shiftleft.codepropertygraph.generated.nodes.{NewMember, NewMethod, NewMethodParameterIn}
import io.shiftleft.semanticcpg.language.types.structure.NamespaceTraversal
import io.joern.rubysrc2cpg.passes.Defines
import io.joern.x2cpg.{Ast, ValidationMode}
import io.shiftleft.codepropertygraph.generated.Cpg
import io.shiftleft.codepropertygraph.generated.nodes.{Local, Member, Method, TypeDecl}
import io.shiftleft.semanticcpg.language.*
import overflowdb.{BatchedUpdate, Config}

trait AstSummaryVisitor(implicit withSchemaValidation: ValidationMode) { this: AstCreator =>
import scala.util.Using

private def baseNamespace: String = s"$relativeFileName:${NamespaceTraversal.globalNamespaceName}"
trait AstSummaryVisitor(implicit withSchemaValidation: ValidationMode) { this: AstCreator =>

def summarize(): RubyProgramSummary = {
val rootNode = new RubyNodeCreator().visit(this.programCtx).asInstanceOf[StatementList]
val fullName = baseNamespace
scope.pushNewScope(NamespaceScope(fullName))
this.parseLevel = AstParseLevel.SIGNATURES
Using.resource(Cpg.withConfig(Config.withoutOverflow())) { cpg =>
// Build and store compilation unit AST
val rootNode = new RubyNodeCreator().visit(programCtx).asInstanceOf[StatementList]
val ast = astForRubyFile(rootNode)
Ast.storeInDiffGraph(ast, diffGraph)
BatchedUpdate.applyDiff(cpg.graph, diffGraph)

val newMap = scope.newProgramScope
.map { moduleScope =>
scope.pushNewScope(moduleScope)
val m = rootNode.statements
.map(visitStatement)
.reduceOption((a, b) => ProgramSummary.combine(a, b))
.getOrElse(Map.empty)
scope.popScope()
m
}
.getOrElse(Map.empty)

scope.popScope()
RubyProgramSummary(newMap)
// Summarize findings
summarize(cpg)
}
}

def withSummary(newSummary: RubyProgramSummary): AstCreator = {
AstCreator(fileName, programCtx, projectRoot, newSummary)
}

private def visitStatement(stmt: RubyNode): Map[String, Set[RubyType]] = stmt match {
case node: TypeDeclaration => visitTypeDeclaration(node)
case _ => Map.empty
}
private def summarize(cpg: Cpg): RubyProgramSummary = {
def toMethod(m: Method): RubyMethod = {
RubyMethod(m.name, m.parameter.map(x => x.name -> x.typeFullName).l, m.methodReturn.typeFullName)
}

private def visitTypeDeclaration(classDecl: TypeDeclaration): Map[String, Set[RubyType]] = {
classDecl.name match {
case name: SimpleIdentifier =>
val fullName = computeClassFullName(name.text)
Map(
scope.surroundingScopeFullName
.getOrElse(baseNamespace) -> Set(visitTypeLikeDeclaration(fullName, classDecl.body))
)
case _ => Map.empty
def toField(f: Member): RubyField = {
RubyField(f.name, f.typeFullName)
}
}

private def visitTypeLikeDeclaration(fullName: String, body: RubyNode): RubyType = {
scope.pushNewScope(TypeScope(fullName))
val classBody = body.asInstanceOf[StatementList]
val bodyMap =
classBody.statements.flatMap {
case MethodDeclaration(methodName, parameters, _) =>
RubyMethod(methodName, visitParameters(parameters), XDefines.Any) :: Nil
case node: FieldsDeclaration =>
astsForFieldDeclarations(node).flatMap(_.nodes).collect {
case x: NewMember => RubyField(x.name, x.typeFullName)
case x: NewMethod => RubyMethod(x.name, List.empty, XDefines.Any) // These are getters/setters
}
case _ => Seq.empty
}
scope.popScope()
RubyType(fullName, bodyMap.collect { case x: RubyMethod => x }, bodyMap.collect { case x: RubyField => x })
}
def toModuleVariable(v: Local): RubyField = {
RubyField(v.name, v.typeFullName)
}

private def visitParameters(parameters: List[RubyNode]): List[(String, String)] = {
parameters.map(astForParameter(_, -1)).flatMap(_.root).collect { case x: NewMethodParameterIn =>
(x.name, x.typeFullName)
def toType(m: TypeDecl): RubyType = {
RubyType(m.fullName, m.method.map(toMethod).l, m.member.map(toField).l)
}

val mapping = cpg.namespaceBlock.flatMap { namespace =>
// Map module functions/variables
val moduleEntry = namespace.fullName -> namespace.method.map { module =>
val moduleTypeMap =
RubyType(
module.fullName,
module.block.astChildren.collectAll[Method].map(toMethod).l,
module.local.map(toModuleVariable).l
)
moduleTypeMap
}.toSet
// Map module types
val typeEntries = namespace.method.collectFirst {
case m: Method if m.name == Defines.Program =>
s"${namespace.fullName}:${m.name}" -> m.block.astChildren.collectAll[TypeDecl].map(toType).toSet
}.toSeq

moduleEntry +: typeEntries
}.toMap
RubyProgramSummary(mapping)
}

}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package io.joern.rubysrc2cpg.datastructures

import io.joern.x2cpg.datastructures.{FieldLike, MethodLike, ProgramSummary, TypeLike}

import io.joern.x2cpg.Defines as XDefines
import scala.annotation.targetName

class RubyProgramSummary(initialMap: Map[String, Set[RubyType]] = Map.empty) extends ProgramSummary[RubyType] {
Expand All @@ -20,4 +20,8 @@ case class RubyMethod(name: String, parameterTypes: List[(String, String)], retu
case class RubyField(name: String, typeName: String) extends FieldLike

case class RubyType(name: String, methods: List[RubyMethod], fields: List[RubyField])
extends TypeLike[RubyMethod, RubyField]
extends TypeLike[RubyMethod, RubyField] {
def hasConstructor: Boolean = {
methods.exists(_.name == XDefines.ConstructorMethodName)
}
}
Original file line number Diff line number Diff line change
@@ -1,19 +1,24 @@
package io.joern.rubysrc2cpg.datastructures

import io.joern.rubysrc2cpg.astcreation.GlobalTypes
import io.joern.x2cpg.Defines
import io.joern.x2cpg.datastructures.*
import io.shiftleft.codepropertygraph.generated.NodeTypes
import io.shiftleft.codepropertygraph.generated.nodes.{
DeclarationNew,
MethodParameterIn,
NewLocal,
NewMethodParameterIn,
NewNode
}
import io.shiftleft.codepropertygraph.generated.nodes.{DeclarationNew, NewLocal, NewMethodParameterIn}

import scala.collection.mutable

class RubyScope(summary: RubyProgramSummary)
extends Scope[String, DeclarationNew, TypedScopeElement]
with TypedScope[RubyMethod, RubyField, RubyType](summary) {

private val builtinMethods = GlobalTypes.builtinFunctions.map(m => RubyMethod(m, List.empty, Defines.Any)).toList

override val typesInScope: mutable.Set[RubyType] =
mutable.Set(RubyType(GlobalTypes.builtinPrefix, builtinMethods, List.empty))

override val membersInScope: mutable.Set[MemberLike] = mutable.Set(builtinMethods*)

// Ruby does not have overloading, so this can be set to true
override protected def isOverloadedBy(method: RubyMethod, argTypes: List[String]): Boolean = true

Expand All @@ -25,12 +30,6 @@ class RubyScope(summary: RubyProgramSummary)
override def pushNewScope(scopeNode: TypedScopeElement): Unit = {
// Use the summary to determine if there is a constructor present
val mappedScopeNode = scopeNode match {
case TypeScope(fullName, _)
if !surroundingScopeFullName
.flatMap(summary.typesUnderNamespace)
.flatMap(_.methods)
.exists(_.name == "initialize") =>
TypeScope(fullName, true)
case n: NamespaceLikeScope =>
typesInScope.addAll(summary.typesUnderNamespace(n.fullName))
n
Expand Down Expand Up @@ -67,7 +66,8 @@ class RubyScope(summary: RubyProgramSummary)
*/
def shouldGenerateDefaultConstructor: Boolean = stack
.collectFirst {
case ScopeElement(x: TypeLikeScope, _) => x.needsDefaultConstructor
case ScopeElement(_: ModuleScope, _) => false
case ScopeElement(x: TypeLikeScope, _) => !typesInScope.find(_.name == x.fullName).exists(_.hasConstructor)
case _ => false
}
.getOrElse(false)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,6 @@ trait TypeLikeScope extends TypedScopeElement {
* the full name of the type-like.
*/
def fullName: String

/** @return
* true if a default constructor is required.
*/
def needsDefaultConstructor: Boolean
}

/** A file-level module.
Expand All @@ -32,24 +27,20 @@ trait TypeLikeScope extends TypedScopeElement {
*/
case class ProgramScope(fileName: String) extends TypeLikeScope {
override def fullName: String = s"$fileName:${Defines.Program}"

override def needsDefaultConstructor: Boolean = false
}

/** A Ruby module/abstract class.
* @param fullName
* the type full name.
*/
case class ModuleScope(fullName: String) extends TypeLikeScope {
override def needsDefaultConstructor: Boolean = false
}
case class ModuleScope(fullName: String) extends TypeLikeScope

/** A class or interface.
*
* @param fullName
* the type full name.
*/
case class TypeScope(fullName: String, needsDefaultConstructor: Boolean = false) extends TypeLikeScope
case class TypeScope(fullName: String) extends TypeLikeScope

/** Represents scope objects that map to a method node.
*/
Expand Down
Loading

0 comments on commit 68a09fb

Please sign in to comment.