Skip to content
This repository has been archived by the owner on Oct 15, 2020. It is now read-only.

Commit

Permalink
Stable node ids (#201)
Browse files Browse the repository at this point in the history
* Light refactoring

* Use keypools

* Make file/namespaceblock node creation accessible

* More light refactoring

* Create file and namespaceblock nodes per comp unit

* Cleanup

* Sort list of filenames prior to processing

* Fix FuzzyC2CpgCache (declaration handling)

* More work on stable ids + test

* Do not hand in a global CPG
  • Loading branch information
fabsx00 authored Jul 9, 2020
1 parent 9effb6d commit d6a4367
Show file tree
Hide file tree
Showing 11 changed files with 214 additions and 96 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@
import io.shiftleft.fuzzyc2cpg.ast.AstNode;
import io.shiftleft.fuzzyc2cpg.ast.AstNodeBuilder;
import io.shiftleft.fuzzyc2cpg.ast.logical.statements.CompoundStatement;
import io.shiftleft.fuzzyc2cpg.output.CpgOutputModule;
import io.shiftleft.fuzzyc2cpg.output.CpgOutputModuleFactory;
import io.shiftleft.passes.KeyPool;
import io.shiftleft.proto.cpg.Cpg.CpgStruct;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
Expand Down Expand Up @@ -39,16 +42,17 @@ abstract public class AntlrParserDriver {
private CommonParserContext context = null;

private List<AntlrParserDriverObserver> observers = new ArrayList<>();
private Cpg.CpgStruct.Builder cpg;
private Cpg.CpgStruct.Builder cpg = CpgStruct.newBuilder();
private Cpg.CpgStruct.Node fileNode;
private KeyPool keyPool;
private CpgOutputModuleFactory outputModuleFactory;

public AntlrParserDriver() {
super();
}

public void setCpg(Cpg.CpgStruct.Builder cpg) {
this.cpg = cpg;
public void setOutputModuleFactory(CpgOutputModuleFactory factory) {
this.outputModuleFactory = factory;
}

public void setKeyPool(KeyPool keyPool) {
Expand All @@ -63,13 +67,19 @@ public void setFileNode(Cpg.CpgStruct.Node fileNode) {

public abstract Lexer createLexer(CharStream input);

public void parseAndWalkFile(String filename) throws ParserException {
public void parseAndWalkFile(String filename) throws ParserException, IOException {
handleHiddenTokens(filename);
TokenSubStream stream = createTokenStreamFromFile(filename);
initializeContextWithFile(filename, stream);

ParseTree tree = parseTokenStream(stream);
walkTree(tree);

CpgOutputModule outputModule = outputModuleFactory.create();
outputModule.setOutputIdentifier(
filename + " driver"
);
outputModule.persistCpg(cpg);
}

private void handleHiddenTokens(String filename) {
Expand Down
23 changes: 15 additions & 8 deletions src/main/scala/io/shiftleft/fuzzyc2cpg/AstVisitor.scala
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@ import io.shiftleft.proto.cpg.Cpg.CpgStruct.Node
import org.antlr.v4.runtime.ParserRuleContext

class AstVisitor(outputModuleFactory: CpgOutputModuleFactory,
structureCpg: CpgStruct.Builder,
astParentNode: Node,
keyPool: KeyPool)
keyPool: KeyPool,
cache: FuzzyC2CpgCache,
global: Global)
extends ASTNodeVisitor
with AntlrParserDriverObserver {
private var fileNameOption = Option.empty[String]
private val structureCpg = CpgStruct.newBuilder()

/**
* Callback triggered for each function definition
Expand All @@ -37,7 +39,7 @@ class AstVisitor(outputModuleFactory: CpgOutputModuleFactory,
val bodyCpg = CpgStruct.newBuilder()
val cpgAdapter = new ProtoCpgAdapter(bodyCpg, keyPool)
val astToCpgConverter =
new AstToCpgConverter(astParentNode, cpgAdapter)
new AstToCpgConverter(astParentNode, cpgAdapter, global)
astToCpgConverter.convert(functionDef)

val astToCfgConverter =
Expand All @@ -49,9 +51,9 @@ class AstVisitor(outputModuleFactory: CpgOutputModuleFactory,
// corresponding definition, in which case the declaration will be
// removed again and is never persisted. Persisting of declarations
// happens after concurrent processing of compilation units.
FuzzyC2CpgCache.add(functionDef.getFunctionSignature(false), outputIdentifier, bodyCpg)
cache.add(functionDef.getFunctionSignature(false), outputIdentifier, bodyCpg)
} else {
FuzzyC2CpgCache.remove(functionDef.getFunctionSignature(false))
cache.remove(functionDef.getFunctionSignature(false))
outputModule.persistCpg(bodyCpg)
}
}
Expand All @@ -62,7 +64,7 @@ class AstVisitor(outputModuleFactory: CpgOutputModuleFactory,
override def visit(classDefStatement: ClassDefStatement): Unit = {
val cpgAdapter = new ProtoCpgAdapter(structureCpg, keyPool)
val astToCpgConverter =
new AstToCpgConverter(astParentNode, cpgAdapter)
new AstToCpgConverter(astParentNode, cpgAdapter, global)
astToCpgConverter.convert(classDefStatement)
}

Expand All @@ -72,7 +74,7 @@ class AstVisitor(outputModuleFactory: CpgOutputModuleFactory,
override def visit(identifierDeclStmt: IdentifierDeclStatement): Unit = {
val cpgAdapter = new ProtoCpgAdapter(structureCpg, keyPool)
val astToCpgConverter =
new AstToCpgConverter(astParentNode, cpgAdapter)
new AstToCpgConverter(astParentNode, cpgAdapter, global)
astToCpgConverter.convert(identifierDeclStmt)
}

Expand All @@ -84,7 +86,12 @@ class AstVisitor(outputModuleFactory: CpgOutputModuleFactory,
fileNameOption = Some(filename)
}

override def endOfUnit(ctx: ParserRuleContext, filename: String): Unit = {}
override def endOfUnit(ctx: ParserRuleContext, filename: String): Unit = {
val identifier = s"$filename types"
val outputModule = outputModuleFactory.create()
outputModule.setOutputIdentifier(identifier)
outputModule.persistCpg(structureCpg)
}

override def processItem[T <: AstNode](node: T, builderStack: util.Stack[AstNodeBuilder[_ <: AstNode]]): Unit = {
node.accept(this)
Expand Down
150 changes: 91 additions & 59 deletions src/main/scala/io/shiftleft/fuzzyc2cpg/FuzzyC2Cpg.scala
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,22 @@ import io.shiftleft.proto.cpg.Cpg.CpgStruct.Node.NodeType
import io.shiftleft.proto.cpg.Cpg.{CpgStruct, NodePropertyName}
import java.nio.file.{Files, Path}
import java.util.concurrent.LinkedBlockingQueue

import io.shiftleft.passes.KeyPool

import scala.collection.mutable
import scala.collection.mutable.ListBuffer
import scala.collection.parallel.CollectionConverters._
import scala.util.control.NonFatal

case class Global(usedTypes: mutable.Set[String] = new mutable.HashSet[String])

class FuzzyC2Cpg(outputModuleFactory: CpgOutputModuleFactory) {

def this(outputPath: String) = {
this(new OutputModuleFactory(outputPath, true).asInstanceOf[CpgOutputModuleFactory])
}

private val cache = new FuzzyC2CpgCache
private val logger = LoggerFactory.getLogger(getClass)

def runWithPreprocessorAndOutput(sourcePaths: Set[String],
Expand Down Expand Up @@ -73,19 +76,41 @@ class FuzzyC2Cpg(outputModuleFactory: CpgOutputModuleFactory) {

def runAndOutput(sourcePaths: Set[String], sourceFileExtensions: Set[String]): Unit = {
val sourceFileNames = SourceFiles.determine(sourcePaths, sourceFileExtensions)
val keyPools = KeyPools.obtain(sourceFileNames.size.toLong + 2)

val filenameToNodes = createStructuralCpg(sourceFileNames, IdPool)
val fileAndNamespaceKeyPool = keyPools.head
val typesKeyPool = keyPools(1)
val compilationUnitKeyPools = keyPools.slice(2, keyPools.size)

// TODO improve fuzzyc2cpg namespace support. Currently, everything
// is in the same global namespace so the code below is correctly.
filenameToNodes.par.foreach(createCpgForCompilationUnit)
addFunctionDeclarations()
addFilesAndNamespaces(fileAndNamespaceKeyPool)
val global = addCompilationUnits(sourceFileNames, compilationUnitKeyPools)
addFunctionDeclarations(cache)
addTypeNodes(global.usedTypes, typesKeyPool)
outputModuleFactory.persist()
}

private def addFunctionDeclarations(): Unit = {
FuzzyC2CpgCache.sortedSignatures.par.foreach { signature =>
FuzzyC2CpgCache.getDeclarations(signature).foreach {
private def addFilesAndNamespaces(keyPool: KeyPool): Unit = {
val fileAndNamespaceCpg = CpgStruct.newBuilder()
createStructuralCpg(keyPool, fileAndNamespaceCpg)
val outputModule = outputModuleFactory.create()
outputModule.setOutputIdentifier("__structural__")
outputModule.persistCpg(fileAndNamespaceCpg)
}

// TODO improve fuzzyc2cpg namespace support. Currently, everything
// is in the same global namespace so the code below is correct.
private def addCompilationUnits(sourceFileNames: List[String], keyPools: List[KeyPool]): Global = {
val global = Global()
sourceFileNames.zipWithIndex
.map { case (filename, i) => (filename, keyPools(i)) }
.par
.foreach { case (filename, keyPool) => createCpgForCompilationUnit(filename, keyPool, global) }
global
}

private def addFunctionDeclarations(cache: FuzzyC2CpgCache): Unit = {
cache.sortedSignatures.par.foreach { signature =>
cache.getDeclarations(signature).foreach {
case (outputIdentifier, bodyCpg) =>
val outputModule = outputModuleFactory.create()
outputModule.setOutputIdentifier(outputIdentifier)
Expand All @@ -94,75 +119,90 @@ class FuzzyC2Cpg(outputModuleFactory: CpgOutputModuleFactory) {
}
}

private def createStructuralCpg(filenames: Set[String], keyPool: KeyPool): Set[(String, NodesForFile)] = {

def addMetaDataNode(cpg: CpgStruct.Builder): Unit = {
val metaNode = newNode(NodeType.META_DATA)
.setKey(keyPool.next)
.addStringProperty(NodePropertyName.LANGUAGE, Languages.C)
.build
cpg.addNode(metaNode)
}
private def addTypeNodes(usedTypes: mutable.Set[String], keyPool: KeyPool): Unit = {
val cpg = CpgStruct.newBuilder()
val outputModule = outputModuleFactory.create()
outputModule.setOutputIdentifier("__types__")
createTypeNodes(usedTypes, keyPool, cpg)
outputModule.persistCpg(cpg)
}

def addAnyTypeAndNamespaceBlock(cpg: CpgStruct.Builder): Unit = {
val globalNamespaceBlockNotInFileNode = createNamespaceBlockNode(None)
cpg.addNode(globalNamespaceBlockNotInFileNode)
}
private def fileAndNamespaceGraph(filename: String, keyPool: KeyPool): (Node, Node) = {

def createFileNode(pathToFile: Path): Node = {
def createFileNode(pathToFile: Path, keyPool: KeyPool): Node = {
newNode(NodeType.FILE)
.setKey(keyPool.next)
.addStringProperty(NodePropertyName.NAME, pathToFile.toAbsolutePath.normalize.toString)
.build()
}

def createNodesForFiles(cpg: CpgStruct.Builder): Set[(String, NodesForFile)] =
filenames.map { filename =>
val pathToFile = new java.io.File(filename).toPath
val fileNode = createFileNode(pathToFile)
val namespaceBlock = createNamespaceBlockNode(Some(pathToFile))
cpg.addNode(fileNode)
cpg.addNode(namespaceBlock)
cpg.addEdge(newEdge(EdgeType.AST, namespaceBlock, fileNode))
filename -> NodesForFile(fileNode, namespaceBlock)
val cpg = CpgStruct.newBuilder()
val outputModule = outputModuleFactory.create()
outputModule.setOutputIdentifier(filename + " fileAndNamespace")

val pathToFile = new java.io.File(filename).toPath
val fileNode = createFileNode(pathToFile, keyPool)
val namespaceBlock = createNamespaceBlockNode(Some(pathToFile), keyPool)
cpg.addNode(fileNode)
cpg.addNode(namespaceBlock)
cpg.addEdge(newEdge(EdgeType.AST, namespaceBlock, fileNode))
outputModule.persistCpg(cpg)
(fileNode, namespaceBlock)
}

private def createNamespaceBlockNode(filePath: Option[Path], keyPool: KeyPool): Node = {
newNode(NodeType.NAMESPACE_BLOCK)
.setKey(keyPool.next)
.addStringProperty(NodePropertyName.NAME, Defines.globalNamespaceName)
.addStringProperty(NodePropertyName.FULL_NAME, getGlobalNamespaceBlockFullName(filePath.map(_.toString)))
.build
}

private def createTypeNodes(usedTypes: mutable.Set[String], keyPool: KeyPool, cpg: CpgStruct.Builder): Unit = {
usedTypes.toList.sorted
.foreach { typeName =>
val node = newNode(NodeType.TYPE)
.setKey(keyPool.next)
.addStringProperty(NodePropertyName.NAME, typeName)
.addStringProperty(NodePropertyName.FULL_NAME, typeName)
.addStringProperty(NodePropertyName.TYPE_DECL_FULL_NAME, typeName)
.build
cpg.addNode(node)
}
}

private def createStructuralCpg(keyPool: KeyPool, cpg: CpgStruct.Builder): Unit = {

def createNamespaceBlockNode(filePath: Option[Path]): Node = {
newNode(NodeType.NAMESPACE_BLOCK)
def addMetaDataNode(cpg: CpgStruct.Builder): Unit = {
val metaNode = newNode(NodeType.META_DATA)
.setKey(keyPool.next)
.addStringProperty(NodePropertyName.NAME, Defines.globalNamespaceName)
.addStringProperty(NodePropertyName.FULL_NAME, getGlobalNamespaceBlockFullName(filePath.map(_.toString)))
.addStringProperty(NodePropertyName.LANGUAGE, Languages.C)
.build
cpg.addNode(metaNode)
}

def addAnyTypeAndNamespaceBlock(cpg: CpgStruct.Builder): Unit = {
val globalNamespaceBlockNotInFileNode = createNamespaceBlockNode(None, keyPool)
cpg.addNode(globalNamespaceBlockNotInFileNode)
}

val cpg = CpgStruct.newBuilder()
addMetaDataNode(cpg)
addAnyTypeAndNamespaceBlock(cpg)
val filenameToNodes = createNodesForFiles(cpg)
val outputModule = outputModuleFactory.create()
outputModule.setOutputIdentifier("__structural__")
outputModule.persistCpg(cpg)
filenameToNodes
}

private case class NodesForFile(fileNode: CpgStruct.Node, namespaceBlockNode: CpgStruct.Node) {}

private def createCpgForCompilationUnit(filenameAndNodes: (String, NodesForFile)): Unit = {
val (filename, nodesForFile) = filenameAndNodes
val (fileNode, namespaceBlock) = (nodesForFile.fileNode, nodesForFile.namespaceBlockNode)
val cpg = CpgStruct.newBuilder
private def createCpgForCompilationUnit(filename: String, keyPool: KeyPool, global: Global): Unit = {
val (fileNode, namespaceBlock) = fileAndNamespaceGraph(filename, keyPool)

// We call the module parser here and register the `astVisitor` to
// receive callbacks as we walk the tree. The method body parser
// will the invoked by `astVisitor` as we walk the tree

val driver = new AntlrCModuleParserDriver()
val keyPool = IdPool
val astVisitor =
new AstVisitor(outputModuleFactory, cpg, namespaceBlock, keyPool)
new AstVisitor(outputModuleFactory, namespaceBlock, keyPool, cache, global)
driver.addObserver(astVisitor)
driver.setCpg(cpg)
driver.setKeyPool(keyPool)
driver.setOutputModuleFactory(outputModuleFactory)
driver.setFileNode(fileNode)

try {
Expand All @@ -171,19 +211,11 @@ class FuzzyC2Cpg(outputModuleFactory: CpgOutputModuleFactory) {
case ex: RuntimeException => {
logger.warn("Cannot parse module: " + filename + ", skipping")
logger.warn("Complete exception: ", ex)
return
}
case _: StackOverflowError => {
logger.warn("Cannot parse module: " + filename + ", skipping, StackOverflow")
return
}
}

val outputModule = outputModuleFactory.create()
outputModule.setOutputIdentifier(
s"$filename types"
)
outputModule.persistCpg(cpg)
}

}
Expand Down
12 changes: 8 additions & 4 deletions src/main/scala/io/shiftleft/fuzzyc2cpg/FuzzyC2CpgCache.scala
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import io.shiftleft.proto.cpg.Cpg.CpgStruct

import scala.collection.mutable

object FuzzyC2CpgCache {
class FuzzyC2CpgCache {
private val functionDeclarations = new mutable.HashMap[String, mutable.ListBuffer[(String, CpgStruct.Builder)]]()

/**
Expand All @@ -15,6 +15,10 @@ object FuzzyC2CpgCache {
functionDeclarations.synchronized {
if (functionDeclarations.contains(signature)) {
val declList = functionDeclarations(signature)
// null is the placeholder that indicates that we've removed
// a function with this signature before, and hence, we do
// not need to add it again
if (declList == null) return
if (declList.nonEmpty) {
declList.append((outputIdentifier, cpg))
}
Expand All @@ -32,19 +36,19 @@ object FuzzyC2CpgCache {
* */
def remove(signature: String): Unit = {
functionDeclarations.synchronized {
functionDeclarations.remove(signature)
functionDeclarations.put(signature, null)
}
}

def sortedSignatures: List[String] = {
functionDeclarations.synchronized {
functionDeclarations.keySet.toList.sorted
functionDeclarations.filter(_._2 != null).keySet.toList.sorted
}
}

def getDeclarations(signature: String): List[(String, CpgStruct.Builder)] = {
functionDeclarations.synchronized {
functionDeclarations(signature).toList
functionDeclarations(signature).toList.filter(_._2 != null)
}
}

Expand Down
Loading

0 comments on commit d6a4367

Please sign in to comment.