Skip to content

Commit

Permalink
[ruby] Program Summary & Typed Scope (#4151)
Browse files Browse the repository at this point in the history
This is a big one, but here we go

* Created basic implementation classes for `ProgramSummary` and `TypedScopeElement` in Ruby
* Added program summary hooks with implementation in `AstSummaryVisitor` that re-uses some `AstCreator` processes.
* Implemented pre-parse of high-level structures 
* Used this to fix implicit constructor bugs (and other bugs that were lurking around due to having 3 stacks)
* Differentiated modules/types to make sure implicit constructor wasn't created for modules
* Removed other stacks, only using `scope` now

Now we should be able to figure out if an `identifier` is a call or not with the lookahead that `RubyScope` and `RubyProgramSummary` provides us with.

Resolves #4128
  • Loading branch information
DavidBakerEffendi authored Feb 12, 2024
1 parent a137174 commit 89f9fe6
Show file tree
Hide file tree
Showing 14 changed files with 467 additions and 157 deletions.
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
package io.joern.rubysrc2cpg

import better.files.File
import io.joern.rubysrc2cpg.astcreation.AstCreator
import io.joern.rubysrc2cpg.datastructures.RubyProgramSummary
import io.joern.rubysrc2cpg.deprecated.parser.DeprecatedRubyParser
import io.joern.rubysrc2cpg.deprecated.parser.DeprecatedRubyParser.*
import io.joern.rubysrc2cpg.parser.RubyParser
import io.joern.rubysrc2cpg.passes.{AstCreationPass, ConfigFileCreationPass}
import io.joern.x2cpg.X2Cpg.withNewEmptyCpg
import io.joern.x2cpg.passes.base.AstLinkerPass
Expand All @@ -17,7 +20,6 @@ import io.shiftleft.semanticcpg.language.*
import org.slf4j.LoggerFactory

import java.nio.file.{Files, Paths}
import scala.jdk.CollectionConverters.*
import scala.util.{Failure, Success, Try, Using}

class RubySrc2Cpg extends X2CpgFrontend[Config] {
Expand All @@ -40,12 +42,60 @@ class RubySrc2Cpg extends X2CpgFrontend[Config] {
private def newCreateCpgAction(cpg: Cpg, config: Config): Unit = {
Using.resource(new parser.ResourceManagedParser(config.antlrCacheMemLimit)) { parser =>
// TODO: enableDependencyDownload
val astCreationPass = new AstCreationPass(cpg, parser, config)
val astCreators = ConcurrentTaskUtil
.runUsingThreadPool(generateParserTasks(parser, config, cpg.metaData.root.headOption))
.flatMap {
case Failure(exception) => logger.warn(s"Could not parse file, skipping - ", exception); None
case Success(astCreator) => Option(astCreator)
}
// Pre-parse the AST creators for high level structures
val programSummary = ConcurrentTaskUtil
.runUsingThreadPool(astCreators.map(x => () => x.summarize()).iterator)
.flatMap {
case Failure(exception) => logger.warn(s"Unable to pre-parse Ruby file, skipping - ", exception); None
case Success(summary) => Option(summary)
}
.reduceOption((a, b) => a ++ b)
.getOrElse(RubyProgramSummary())
val astCreationPass = new AstCreationPass(cpg, astCreators.map(_.withSummary(programSummary)))
astCreationPass.createAndApply()
TypeNodePass.withTypesFromCpg(cpg).createAndApply()
}
}

private def generateParserTasks(
resourceManagedParser: parser.ResourceManagedParser,
config: Config,
projectRoot: Option[String]
): Iterator[() => AstCreator] = {
SourceFiles
.determine(
config.inputPath,
RubySourceFileExtensions,
ignoredFilesRegex = Option(config.ignoredFilesRegex),
ignoredFilesPath = Option(config.ignoredFiles)
)
.map { fileName => () =>
resourceManagedParser.parse(fileName) match {
case Failure(exception) => throw exception
case Success(ctx) => new AstCreator(fileName, ctx, projectRoot)(config.schemaValidation)
}
}
.iterator
}

private def parseFile(
fileName: String,
resourceManagedParser: parser.ResourceManagedParser
): Option[RubyParser.ProgramContext] = {
resourceManagedParser.parse(fileName) match {
case Success(programCtx) => Option(programCtx)
case Failure(exception) =>
logger.warn(s"Could not parse file: $fileName, skipping - ", exception)
None
}
}

private def deprecatedCreateCpgAction(cpg: Cpg, config: Config): Unit = try {
Using.resource(new deprecated.astcreation.ResourceManagedParser(config.antlrCacheMemLimit)) { parser =>
if (config.enableDependencyDownload && !scala.util.Properties.isWin) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,45 +1,47 @@
package io.joern.rubysrc2cpg.astcreation

import io.joern.rubysrc2cpg.astcreation.RubyIntermediateAst.*
import io.joern.rubysrc2cpg.parser.{ResourceManagedParser, RubyNodeCreator}
import io.joern.rubysrc2cpg.datastructures.{NamespaceScope, RubyProgramSummary, RubyScope}
import io.joern.rubysrc2cpg.parser.{RubyNodeCreator, RubyParser}
import io.joern.rubysrc2cpg.passes.Defines
import io.joern.x2cpg.datastructures.Stack.*
import io.joern.x2cpg.{Ast, AstCreatorBase, AstNodeBuilder, ValidationMode}
import io.joern.x2cpg.utils.NodeBuilders.newModifierNode
import io.joern.x2cpg.{Ast, AstCreatorBase, AstNodeBuilder, ValidationMode}
import io.shiftleft.codepropertygraph.generated.ModifierTypes
import io.shiftleft.codepropertygraph.generated.nodes.*
import io.shiftleft.semanticcpg.language.types.structure.NamespaceTraversal
import org.slf4j.{Logger, LoggerFactory}
import overflowdb.BatchedUpdate

import scala.util.{Failure, Success}

class AstCreator(protected val filename: String, parser: ResourceManagedParser, projectRoot: Option[String] = None)(
implicit withSchemaValidation: ValidationMode
) extends AstCreatorBase(filename)
class AstCreator(
val fileName: String,
protected val programCtx: RubyParser.ProgramContext,
protected val projectRoot: Option[String] = None,
protected val programSummary: RubyProgramSummary = RubyProgramSummary()
)(implicit withSchemaValidation: ValidationMode)
extends AstCreatorBase(fileName)
with AstCreatorHelper
with AstForStatementsCreator
with AstForExpressionsCreator
with AstForFunctionsCreator
with AstForTypesCreator
with FreshVariableCreator
with AstSummaryVisitor
with AstNodeBuilder[RubyNode, AstCreator] {

/* Used to track variable names and their LOCAL nodes.
*/
protected val scope: RubyScope = new RubyScope(programSummary)

protected val logger: Logger = LoggerFactory.getLogger(getClass)

protected val relativeFileName: String =
projectRoot.map(filename.stripPrefix).map(_.stripPrefix(java.io.File.separator)).getOrElse(filename)
projectRoot.map(fileName.stripPrefix).map(_.stripPrefix(java.io.File.separator)).getOrElse(fileName)

override def createAst(): BatchedUpdate.DiffGraphBuilder = {
parser.parse(filename) match
case Success(programCtx) =>
val rootNode = new RubyNodeCreator().visit(programCtx).asInstanceOf[StatementList]
val ast = astForRubyFile(rootNode)
Ast.storeInDiffGraph(ast, diffGraph)
diffGraph
case Failure(exception) =>
logger.warn(s"Could not parse file: $filename, skipping - ", exception)
diffGraph
val rootNode = new RubyNodeCreator().visit(programCtx).asInstanceOf[StatementList]
val ast = astForRubyFile(rootNode)
Ast.storeInDiffGraph(ast, diffGraph)
diffGraph
}

/* A Ruby file has the following AST hierarchy: FILE -> NAMESPACE_BLOCK -> METHOD.
Expand All @@ -48,21 +50,21 @@ class AstCreator(protected val filename: String, parser: ResourceManagedParser,
*/
private def astForRubyFile(rootStatements: StatementList): Ast = {
val fileNode = NewFile().name(relativeFileName)
val fullName = s"$relativeFileName:${NamespaceTraversal.globalNamespaceName}"
val namespaceBlock = NewNamespaceBlock()
.filename(relativeFileName)
.name(NamespaceTraversal.globalNamespaceName)
.fullName(s"$relativeFileName:${NamespaceTraversal.globalNamespaceName}")
.fullName(fullName)

methodAstParentStack.push(namespaceBlock)
scope.pushNewScope(namespaceBlock)
scope.pushNewScope(NamespaceScope(fullName))
val rubyFileMethod = astInFakeMethod(rootStatements)
scope.popScope()
methodAstParentStack.pop()

Ast(fileNode).withChild(Ast(namespaceBlock).withChild(rubyFileMethod))
}

private def astInFakeMethod(rootNode: StatementList): Ast = {
val name = ":program" // TODO: avoid this hardcoding. Move it into Defines?
val name = Defines.Program
val fullName = computeMethodFullName(name)
val code = rootNode.text
val methodNode_ = methodNode(
Expand All @@ -75,13 +77,14 @@ class AstCreator(protected val filename: String, parser: ResourceManagedParser,
)
val methodReturn = methodReturnNode(rootNode, Defines.Any)

methodAstParentStack.push(methodNode_)
scope.pushNewScope(methodNode_)
val statementAsts = rootNode.statements.flatMap(astsForStatement)
val bodyAst = blockAst(blockNode(rootNode), statementAsts)
scope.popScope()
methodAstParentStack.pop()

methodAst(methodNode_, Seq.empty, bodyAst, methodReturn, newModifierNode(ModifierTypes.MODULE) :: Nil)
scope.newProgramScope
.map { moduleScope =>
scope.pushNewScope(moduleScope)
val statementAsts = rootNode.statements.flatMap(astsForStatement)
val bodyAst = blockAst(blockNode(rootNode), statementAsts)
scope.popScope()
methodAst(methodNode_, Seq.empty, bodyAst, methodReturn, newModifierNode(ModifierTypes.MODULE) :: Nil)
}
.getOrElse(Ast())
}
}
Original file line number Diff line number Diff line change
@@ -1,37 +1,16 @@
package io.joern.rubysrc2cpg.astcreation
import io.joern.rubysrc2cpg.astcreation.GlobalTypes.{builtinFunctions, builtinPrefix}
import io.joern.rubysrc2cpg.astcreation.RubyIntermediateAst.RubyNode
import io.joern.x2cpg.{Ast, Defines, ValidationMode}
import io.joern.x2cpg.datastructures.Scope
import io.joern.rubysrc2cpg.datastructures.{RubyProgramSummary, RubyScope}
import io.joern.x2cpg.datastructures.Stack.*
import io.joern.x2cpg.{Ast, Defines, ValidationMode}
import io.shiftleft.codepropertygraph.generated.Operators
import io.shiftleft.codepropertygraph.generated.nodes.*
import io.shiftleft.codepropertygraph.generated.{Operators, PropertyNames}

trait AstCreatorHelper(implicit withSchemaValidation: ValidationMode) { this: AstCreator =>

/* Used to track variable names and their LOCAL nodes.
* TODO: Perhaps move this feature into a new Pass?
*/
protected val scope: Scope[String, NewNode, NewNode] = new Scope()

/* Used to compute a method's full name and parent.
* TODO: port RubyScope from the deprecated frontend here?
* */
protected val methodAstParentStack: Stack[NewNode] = new Stack()

/* Used if any constructors of classes are present to know if a default constructor should be generated
* TODO: this seems too specific to add another stack, perhaps there is a better way in checking the class body. There are some possible
* nesting edge cases which this handles better unless you recursively traverse the result of astsFor* on the class body. How common it would be in actual Ruby code is uncertain */
protected val shouldGenerateDefaultConstructorStack: Stack[Boolean] = new Stack()
protected def setNoDefaultConstructorForEnclosingTypeDecl: Unit = {
shouldGenerateDefaultConstructorStack.pop()
shouldGenerateDefaultConstructorStack.push(false)
}

protected def getEnclosingAstType: String = methodAstParentStack.head.label()
protected def getEnclosingAstFullName: String = methodAstParentStack.head.properties(PropertyNames.FULL_NAME).toString
protected def computeClassFullName(name: String): String = s"$getEnclosingAstFullName.$name"
protected def computeMethodFullName(name: String): String = s"$getEnclosingAstFullName:$name"
protected def computeClassFullName(name: String): String = s"${scope.surroundingScopeFullName.head}.$name"
protected def computeMethodFullName(name: String): String = s"${scope.surroundingScopeFullName.head}:$name"

override def column(node: RubyNode): Option[Integer] = node.column
override def columnEnd(node: RubyNode): Option[Integer] = node.columnEnd
Expand Down
Original file line number Diff line number Diff line change
@@ -1,33 +1,34 @@
package io.joern.rubysrc2cpg.astcreation

import io.joern.rubysrc2cpg.astcreation.RubyIntermediateAst.*
import io.joern.rubysrc2cpg.datastructures.{ConstructorScope, MethodScope}
import io.joern.rubysrc2cpg.passes.Defines
import io.joern.x2cpg.datastructures.Stack.*
import io.joern.x2cpg.{Ast, ValidationMode}
import io.shiftleft.codepropertygraph.generated.EvaluationStrategies
import io.joern.x2cpg.{Ast, ValidationMode, Defines as XDefines}
import io.shiftleft.codepropertygraph.generated.{EvaluationStrategies, NodeTypes}

trait AstForFunctionsCreator(implicit withSchemaValidation: ValidationMode) { this: AstCreator =>

protected def astForMethodDeclaration(node: MethodDeclaration): Ast = {

// Special case constructor methods
val isInTypeDecl = getEnclosingAstType == "TYPE_DECL"
val isInTypeDecl = scope.surroundingAstLabel.contains(NodeTypes.TYPE_DECL)
val methodName = node.methodName match {
case "initialize" if isInTypeDecl =>
setNoDefaultConstructorForEnclosingTypeDecl
"<init>"
case name => name
case "initialize" if isInTypeDecl => XDefines.ConstructorMethodName
case name => name
}
// TODO: body could be a try
val fullName = computeMethodFullName(methodName)
val method = methodNode(
node = node,
name = methodName,
fullName = computeMethodFullName(methodName),
fullName = fullName,
code = code(node),
signature = None,
fileName = relativeFileName
)
methodAstParentStack.push(method)

if (methodName == XDefines.ConstructorMethodName) scope.pushNewScope(ConstructorScope(fullName))
else scope.pushNewScope(MethodScope(fullName))

val parameterAsts = node.parameters.zipWithIndex.map { case (parameterNode, index) =>
astForParameter(parameterNode, index)
Expand All @@ -44,7 +45,7 @@ trait AstForFunctionsCreator(implicit withSchemaValidation: ValidationMode) { th
)
astForUnknown(body)

methodAstParentStack.pop()
scope.popScope()
methodAst(method, parameterAsts, stmtBlockAst, methodReturnNode(node, Defines.Any))
}

Expand Down Expand Up @@ -75,18 +76,19 @@ trait AstForFunctionsCreator(implicit withSchemaValidation: ValidationMode) { th
protected def astForSingletonMethodDeclaration(node: SingletonMethodDeclaration): Ast = {
node.target match
case _: SelfIdentifier =>
val fullName = computeMethodFullName(node.methodName)
val method = methodNode(
node = node,
name = node.methodName,
fullName = computeMethodFullName(node.methodName),
fullName = fullName,
code = code(node),
signature = None,
fileName = relativeFileName,
astParentType = Some(getEnclosingAstType),
astParentFullName = Some(getEnclosingAstFullName)
astParentType = scope.surroundingAstLabel,
astParentFullName = scope.surroundingScopeFullName
)
methodAstParentStack.push(method)
scope.pushNewScope(method)

scope.pushNewScope(MethodScope(fullName))

val parameterAsts = node.parameters.zipWithIndex.map { case (parameterNode, index) =>
astForParameter(parameterNode, index)
Expand All @@ -99,7 +101,6 @@ trait AstForFunctionsCreator(implicit withSchemaValidation: ValidationMode) { th
astForUnknown(body)

scope.popScope()
methodAstParentStack.pop()
methodAst(method, parameterAsts, stmtBlockAst, methodReturnNode(node, Defines.Any))

case targetNode =>
Expand Down
Loading

0 comments on commit 89f9fe6

Please sign in to comment.