From d6a4367109ccf3d4e32b568f2108ec213973c93b Mon Sep 17 00:00:00 2001 From: Fabian Yamaguchi Date: Thu, 9 Jul 2020 15:52:53 +0200 Subject: [PATCH] Stable node ids (#201) * Light refactoring * Use keypools * Make file/namespaceblock node creation accessible * More light refactoring * Create file and namespaceblock nodes per comp unit * Cleanup * Sort list of filenames prior to processing * Fix FuzzyC2CpgCache (declaration handling) * More work on stable ids + test * Do not hand in a global CPG --- .../fuzzyc2cpg/parser/AntlrParserDriver.java | 18 ++- .../io/shiftleft/fuzzyc2cpg/AstVisitor.scala | 23 ++- .../io/shiftleft/fuzzyc2cpg/FuzzyC2Cpg.scala | 150 +++++++++++------- .../fuzzyc2cpg/FuzzyC2CpgCache.scala | 12 +- .../io/shiftleft/fuzzyc2cpg/KeyPools.scala | 21 +++ .../io/shiftleft/fuzzyc2cpg/SourceFiles.scala | 4 +- .../fuzzyc2cpg/astnew/AstToCpgConverter.scala | 20 +-- src/test/resources/testcode/stableid/file1.c | 12 ++ src/test/resources/testcode/stableid/file2.c | 11 ++ .../fuzzyc2cpg/StableOutputTests.scala | 33 ++++ .../fuzzyc2cpg/astnew/AstToCpgTests.scala | 6 +- 11 files changed, 214 insertions(+), 96 deletions(-) create mode 100644 src/main/scala/io/shiftleft/fuzzyc2cpg/KeyPools.scala create mode 100644 src/test/resources/testcode/stableid/file1.c create mode 100644 src/test/resources/testcode/stableid/file2.c create mode 100644 src/test/scala/io/shiftleft/fuzzyc2cpg/StableOutputTests.scala diff --git a/src/main/java/io/shiftleft/fuzzyc2cpg/parser/AntlrParserDriver.java b/src/main/java/io/shiftleft/fuzzyc2cpg/parser/AntlrParserDriver.java index 19c2d65..512b81c 100644 --- a/src/main/java/io/shiftleft/fuzzyc2cpg/parser/AntlrParserDriver.java +++ b/src/main/java/io/shiftleft/fuzzyc2cpg/parser/AntlrParserDriver.java @@ -4,7 +4,10 @@ import io.shiftleft.fuzzyc2cpg.ast.AstNode; import io.shiftleft.fuzzyc2cpg.ast.AstNodeBuilder; import io.shiftleft.fuzzyc2cpg.ast.logical.statements.CompoundStatement; +import io.shiftleft.fuzzyc2cpg.output.CpgOutputModule; +import io.shiftleft.fuzzyc2cpg.output.CpgOutputModuleFactory; import io.shiftleft.passes.KeyPool; +import io.shiftleft.proto.cpg.Cpg.CpgStruct; import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -39,16 +42,17 @@ abstract public class AntlrParserDriver { private CommonParserContext context = null; private List observers = new ArrayList<>(); - private Cpg.CpgStruct.Builder cpg; + private Cpg.CpgStruct.Builder cpg = CpgStruct.newBuilder(); private Cpg.CpgStruct.Node fileNode; private KeyPool keyPool; + private CpgOutputModuleFactory outputModuleFactory; public AntlrParserDriver() { super(); } - public void setCpg(Cpg.CpgStruct.Builder cpg) { - this.cpg = cpg; + public void setOutputModuleFactory(CpgOutputModuleFactory factory) { + this.outputModuleFactory = factory; } public void setKeyPool(KeyPool keyPool) { @@ -63,13 +67,19 @@ public void setFileNode(Cpg.CpgStruct.Node fileNode) { public abstract Lexer createLexer(CharStream input); - public void parseAndWalkFile(String filename) throws ParserException { + public void parseAndWalkFile(String filename) throws ParserException, IOException { handleHiddenTokens(filename); TokenSubStream stream = createTokenStreamFromFile(filename); initializeContextWithFile(filename, stream); ParseTree tree = parseTokenStream(stream); walkTree(tree); + + CpgOutputModule outputModule = outputModuleFactory.create(); + outputModule.setOutputIdentifier( + filename + " driver" + ); + outputModule.persistCpg(cpg); } private void handleHiddenTokens(String filename) { diff --git a/src/main/scala/io/shiftleft/fuzzyc2cpg/AstVisitor.scala b/src/main/scala/io/shiftleft/fuzzyc2cpg/AstVisitor.scala index 672ae2f..4c7e32d 100644 --- a/src/main/scala/io/shiftleft/fuzzyc2cpg/AstVisitor.scala +++ b/src/main/scala/io/shiftleft/fuzzyc2cpg/AstVisitor.scala @@ -18,12 +18,14 @@ import io.shiftleft.proto.cpg.Cpg.CpgStruct.Node import org.antlr.v4.runtime.ParserRuleContext class AstVisitor(outputModuleFactory: CpgOutputModuleFactory, - structureCpg: CpgStruct.Builder, astParentNode: Node, - keyPool: KeyPool) + keyPool: KeyPool, + cache: FuzzyC2CpgCache, + global: Global) extends ASTNodeVisitor with AntlrParserDriverObserver { private var fileNameOption = Option.empty[String] + private val structureCpg = CpgStruct.newBuilder() /** * Callback triggered for each function definition @@ -37,7 +39,7 @@ class AstVisitor(outputModuleFactory: CpgOutputModuleFactory, val bodyCpg = CpgStruct.newBuilder() val cpgAdapter = new ProtoCpgAdapter(bodyCpg, keyPool) val astToCpgConverter = - new AstToCpgConverter(astParentNode, cpgAdapter) + new AstToCpgConverter(astParentNode, cpgAdapter, global) astToCpgConverter.convert(functionDef) val astToCfgConverter = @@ -49,9 +51,9 @@ class AstVisitor(outputModuleFactory: CpgOutputModuleFactory, // corresponding definition, in which case the declaration will be // removed again and is never persisted. Persisting of declarations // happens after concurrent processing of compilation units. - FuzzyC2CpgCache.add(functionDef.getFunctionSignature(false), outputIdentifier, bodyCpg) + cache.add(functionDef.getFunctionSignature(false), outputIdentifier, bodyCpg) } else { - FuzzyC2CpgCache.remove(functionDef.getFunctionSignature(false)) + cache.remove(functionDef.getFunctionSignature(false)) outputModule.persistCpg(bodyCpg) } } @@ -62,7 +64,7 @@ class AstVisitor(outputModuleFactory: CpgOutputModuleFactory, override def visit(classDefStatement: ClassDefStatement): Unit = { val cpgAdapter = new ProtoCpgAdapter(structureCpg, keyPool) val astToCpgConverter = - new AstToCpgConverter(astParentNode, cpgAdapter) + new AstToCpgConverter(astParentNode, cpgAdapter, global) astToCpgConverter.convert(classDefStatement) } @@ -72,7 +74,7 @@ class AstVisitor(outputModuleFactory: CpgOutputModuleFactory, override def visit(identifierDeclStmt: IdentifierDeclStatement): Unit = { val cpgAdapter = new ProtoCpgAdapter(structureCpg, keyPool) val astToCpgConverter = - new AstToCpgConverter(astParentNode, cpgAdapter) + new AstToCpgConverter(astParentNode, cpgAdapter, global) astToCpgConverter.convert(identifierDeclStmt) } @@ -84,7 +86,12 @@ class AstVisitor(outputModuleFactory: CpgOutputModuleFactory, fileNameOption = Some(filename) } - override def endOfUnit(ctx: ParserRuleContext, filename: String): Unit = {} + override def endOfUnit(ctx: ParserRuleContext, filename: String): Unit = { + val identifier = s"$filename types" + val outputModule = outputModuleFactory.create() + outputModule.setOutputIdentifier(identifier) + outputModule.persistCpg(structureCpg) + } override def processItem[T <: AstNode](node: T, builderStack: util.Stack[AstNodeBuilder[_ <: AstNode]]): Unit = { node.accept(this) diff --git a/src/main/scala/io/shiftleft/fuzzyc2cpg/FuzzyC2Cpg.scala b/src/main/scala/io/shiftleft/fuzzyc2cpg/FuzzyC2Cpg.scala index e3dc0b5..b8a5c25 100644 --- a/src/main/scala/io/shiftleft/fuzzyc2cpg/FuzzyC2Cpg.scala +++ b/src/main/scala/io/shiftleft/fuzzyc2cpg/FuzzyC2Cpg.scala @@ -12,19 +12,22 @@ import io.shiftleft.proto.cpg.Cpg.CpgStruct.Node.NodeType import io.shiftleft.proto.cpg.Cpg.{CpgStruct, NodePropertyName} import java.nio.file.{Files, Path} import java.util.concurrent.LinkedBlockingQueue - import io.shiftleft.passes.KeyPool +import scala.collection.mutable import scala.collection.mutable.ListBuffer import scala.collection.parallel.CollectionConverters._ import scala.util.control.NonFatal +case class Global(usedTypes: mutable.Set[String] = new mutable.HashSet[String]) + class FuzzyC2Cpg(outputModuleFactory: CpgOutputModuleFactory) { def this(outputPath: String) = { this(new OutputModuleFactory(outputPath, true).asInstanceOf[CpgOutputModuleFactory]) } + private val cache = new FuzzyC2CpgCache private val logger = LoggerFactory.getLogger(getClass) def runWithPreprocessorAndOutput(sourcePaths: Set[String], @@ -73,19 +76,41 @@ class FuzzyC2Cpg(outputModuleFactory: CpgOutputModuleFactory) { def runAndOutput(sourcePaths: Set[String], sourceFileExtensions: Set[String]): Unit = { val sourceFileNames = SourceFiles.determine(sourcePaths, sourceFileExtensions) + val keyPools = KeyPools.obtain(sourceFileNames.size.toLong + 2) - val filenameToNodes = createStructuralCpg(sourceFileNames, IdPool) + val fileAndNamespaceKeyPool = keyPools.head + val typesKeyPool = keyPools(1) + val compilationUnitKeyPools = keyPools.slice(2, keyPools.size) - // TODO improve fuzzyc2cpg namespace support. Currently, everything - // is in the same global namespace so the code below is correctly. - filenameToNodes.par.foreach(createCpgForCompilationUnit) - addFunctionDeclarations() + addFilesAndNamespaces(fileAndNamespaceKeyPool) + val global = addCompilationUnits(sourceFileNames, compilationUnitKeyPools) + addFunctionDeclarations(cache) + addTypeNodes(global.usedTypes, typesKeyPool) outputModuleFactory.persist() } - private def addFunctionDeclarations(): Unit = { - FuzzyC2CpgCache.sortedSignatures.par.foreach { signature => - FuzzyC2CpgCache.getDeclarations(signature).foreach { + private def addFilesAndNamespaces(keyPool: KeyPool): Unit = { + val fileAndNamespaceCpg = CpgStruct.newBuilder() + createStructuralCpg(keyPool, fileAndNamespaceCpg) + val outputModule = outputModuleFactory.create() + outputModule.setOutputIdentifier("__structural__") + outputModule.persistCpg(fileAndNamespaceCpg) + } + + // TODO improve fuzzyc2cpg namespace support. Currently, everything + // is in the same global namespace so the code below is correct. + private def addCompilationUnits(sourceFileNames: List[String], keyPools: List[KeyPool]): Global = { + val global = Global() + sourceFileNames.zipWithIndex + .map { case (filename, i) => (filename, keyPools(i)) } + .par + .foreach { case (filename, keyPool) => createCpgForCompilationUnit(filename, keyPool, global) } + global + } + + private def addFunctionDeclarations(cache: FuzzyC2CpgCache): Unit = { + cache.sortedSignatures.par.foreach { signature => + cache.getDeclarations(signature).foreach { case (outputIdentifier, bodyCpg) => val outputModule = outputModuleFactory.create() outputModule.setOutputIdentifier(outputIdentifier) @@ -94,75 +119,90 @@ class FuzzyC2Cpg(outputModuleFactory: CpgOutputModuleFactory) { } } - private def createStructuralCpg(filenames: Set[String], keyPool: KeyPool): Set[(String, NodesForFile)] = { - - def addMetaDataNode(cpg: CpgStruct.Builder): Unit = { - val metaNode = newNode(NodeType.META_DATA) - .setKey(keyPool.next) - .addStringProperty(NodePropertyName.LANGUAGE, Languages.C) - .build - cpg.addNode(metaNode) - } + private def addTypeNodes(usedTypes: mutable.Set[String], keyPool: KeyPool): Unit = { + val cpg = CpgStruct.newBuilder() + val outputModule = outputModuleFactory.create() + outputModule.setOutputIdentifier("__types__") + createTypeNodes(usedTypes, keyPool, cpg) + outputModule.persistCpg(cpg) + } - def addAnyTypeAndNamespaceBlock(cpg: CpgStruct.Builder): Unit = { - val globalNamespaceBlockNotInFileNode = createNamespaceBlockNode(None) - cpg.addNode(globalNamespaceBlockNotInFileNode) - } + private def fileAndNamespaceGraph(filename: String, keyPool: KeyPool): (Node, Node) = { - def createFileNode(pathToFile: Path): Node = { + def createFileNode(pathToFile: Path, keyPool: KeyPool): Node = { newNode(NodeType.FILE) .setKey(keyPool.next) .addStringProperty(NodePropertyName.NAME, pathToFile.toAbsolutePath.normalize.toString) .build() } - def createNodesForFiles(cpg: CpgStruct.Builder): Set[(String, NodesForFile)] = - filenames.map { filename => - val pathToFile = new java.io.File(filename).toPath - val fileNode = createFileNode(pathToFile) - val namespaceBlock = createNamespaceBlockNode(Some(pathToFile)) - cpg.addNode(fileNode) - cpg.addNode(namespaceBlock) - cpg.addEdge(newEdge(EdgeType.AST, namespaceBlock, fileNode)) - filename -> NodesForFile(fileNode, namespaceBlock) + val cpg = CpgStruct.newBuilder() + val outputModule = outputModuleFactory.create() + outputModule.setOutputIdentifier(filename + " fileAndNamespace") + + val pathToFile = new java.io.File(filename).toPath + val fileNode = createFileNode(pathToFile, keyPool) + val namespaceBlock = createNamespaceBlockNode(Some(pathToFile), keyPool) + cpg.addNode(fileNode) + cpg.addNode(namespaceBlock) + cpg.addEdge(newEdge(EdgeType.AST, namespaceBlock, fileNode)) + outputModule.persistCpg(cpg) + (fileNode, namespaceBlock) + } + + private def createNamespaceBlockNode(filePath: Option[Path], keyPool: KeyPool): Node = { + newNode(NodeType.NAMESPACE_BLOCK) + .setKey(keyPool.next) + .addStringProperty(NodePropertyName.NAME, Defines.globalNamespaceName) + .addStringProperty(NodePropertyName.FULL_NAME, getGlobalNamespaceBlockFullName(filePath.map(_.toString))) + .build + } + + private def createTypeNodes(usedTypes: mutable.Set[String], keyPool: KeyPool, cpg: CpgStruct.Builder): Unit = { + usedTypes.toList.sorted + .foreach { typeName => + val node = newNode(NodeType.TYPE) + .setKey(keyPool.next) + .addStringProperty(NodePropertyName.NAME, typeName) + .addStringProperty(NodePropertyName.FULL_NAME, typeName) + .addStringProperty(NodePropertyName.TYPE_DECL_FULL_NAME, typeName) + .build + cpg.addNode(node) } + } + + private def createStructuralCpg(keyPool: KeyPool, cpg: CpgStruct.Builder): Unit = { - def createNamespaceBlockNode(filePath: Option[Path]): Node = { - newNode(NodeType.NAMESPACE_BLOCK) + def addMetaDataNode(cpg: CpgStruct.Builder): Unit = { + val metaNode = newNode(NodeType.META_DATA) .setKey(keyPool.next) - .addStringProperty(NodePropertyName.NAME, Defines.globalNamespaceName) - .addStringProperty(NodePropertyName.FULL_NAME, getGlobalNamespaceBlockFullName(filePath.map(_.toString))) + .addStringProperty(NodePropertyName.LANGUAGE, Languages.C) .build + cpg.addNode(metaNode) + } + + def addAnyTypeAndNamespaceBlock(cpg: CpgStruct.Builder): Unit = { + val globalNamespaceBlockNotInFileNode = createNamespaceBlockNode(None, keyPool) + cpg.addNode(globalNamespaceBlockNotInFileNode) } - val cpg = CpgStruct.newBuilder() addMetaDataNode(cpg) addAnyTypeAndNamespaceBlock(cpg) - val filenameToNodes = createNodesForFiles(cpg) - val outputModule = outputModuleFactory.create() - outputModule.setOutputIdentifier("__structural__") - outputModule.persistCpg(cpg) - filenameToNodes } - private case class NodesForFile(fileNode: CpgStruct.Node, namespaceBlockNode: CpgStruct.Node) {} - - private def createCpgForCompilationUnit(filenameAndNodes: (String, NodesForFile)): Unit = { - val (filename, nodesForFile) = filenameAndNodes - val (fileNode, namespaceBlock) = (nodesForFile.fileNode, nodesForFile.namespaceBlockNode) - val cpg = CpgStruct.newBuilder + private def createCpgForCompilationUnit(filename: String, keyPool: KeyPool, global: Global): Unit = { + val (fileNode, namespaceBlock) = fileAndNamespaceGraph(filename, keyPool) // We call the module parser here and register the `astVisitor` to // receive callbacks as we walk the tree. The method body parser // will the invoked by `astVisitor` as we walk the tree val driver = new AntlrCModuleParserDriver() - val keyPool = IdPool val astVisitor = - new AstVisitor(outputModuleFactory, cpg, namespaceBlock, keyPool) + new AstVisitor(outputModuleFactory, namespaceBlock, keyPool, cache, global) driver.addObserver(astVisitor) - driver.setCpg(cpg) driver.setKeyPool(keyPool) + driver.setOutputModuleFactory(outputModuleFactory) driver.setFileNode(fileNode) try { @@ -171,19 +211,11 @@ class FuzzyC2Cpg(outputModuleFactory: CpgOutputModuleFactory) { case ex: RuntimeException => { logger.warn("Cannot parse module: " + filename + ", skipping") logger.warn("Complete exception: ", ex) - return } case _: StackOverflowError => { logger.warn("Cannot parse module: " + filename + ", skipping, StackOverflow") - return } } - - val outputModule = outputModuleFactory.create() - outputModule.setOutputIdentifier( - s"$filename types" - ) - outputModule.persistCpg(cpg) } } diff --git a/src/main/scala/io/shiftleft/fuzzyc2cpg/FuzzyC2CpgCache.scala b/src/main/scala/io/shiftleft/fuzzyc2cpg/FuzzyC2CpgCache.scala index 72d50ef..10fcee0 100644 --- a/src/main/scala/io/shiftleft/fuzzyc2cpg/FuzzyC2CpgCache.scala +++ b/src/main/scala/io/shiftleft/fuzzyc2cpg/FuzzyC2CpgCache.scala @@ -4,7 +4,7 @@ import io.shiftleft.proto.cpg.Cpg.CpgStruct import scala.collection.mutable -object FuzzyC2CpgCache { +class FuzzyC2CpgCache { private val functionDeclarations = new mutable.HashMap[String, mutable.ListBuffer[(String, CpgStruct.Builder)]]() /** @@ -15,6 +15,10 @@ object FuzzyC2CpgCache { functionDeclarations.synchronized { if (functionDeclarations.contains(signature)) { val declList = functionDeclarations(signature) + // null is the placeholder that indicates that we've removed + // a function with this signature before, and hence, we do + // not need to add it again + if (declList == null) return if (declList.nonEmpty) { declList.append((outputIdentifier, cpg)) } @@ -32,19 +36,19 @@ object FuzzyC2CpgCache { * */ def remove(signature: String): Unit = { functionDeclarations.synchronized { - functionDeclarations.remove(signature) + functionDeclarations.put(signature, null) } } def sortedSignatures: List[String] = { functionDeclarations.synchronized { - functionDeclarations.keySet.toList.sorted + functionDeclarations.filter(_._2 != null).keySet.toList.sorted } } def getDeclarations(signature: String): List[(String, CpgStruct.Builder)] = { functionDeclarations.synchronized { - functionDeclarations(signature).toList + functionDeclarations(signature).toList.filter(_._2 != null) } } diff --git a/src/main/scala/io/shiftleft/fuzzyc2cpg/KeyPools.scala b/src/main/scala/io/shiftleft/fuzzyc2cpg/KeyPools.scala new file mode 100644 index 0000000..e679075 --- /dev/null +++ b/src/main/scala/io/shiftleft/fuzzyc2cpg/KeyPools.scala @@ -0,0 +1,21 @@ +package io.shiftleft.fuzzyc2cpg + +import io.shiftleft.passes.KeyPool + +object KeyPools { + + /** + * Divide the keyspace into n intervals and return + * a list of corresponding key pools. + * */ + def obtain(n: Long, maxValue: Long = Long.MaxValue): List[KeyPool] = { + val nIntervals = Math.max(n, 1) + val intervalLen: Long = maxValue / nIntervals + List.range(0, nIntervals).map { i => + val first = i * intervalLen + val last = first + intervalLen - 1 + new KeyPool(first, last) + } + } + +} diff --git a/src/main/scala/io/shiftleft/fuzzyc2cpg/SourceFiles.scala b/src/main/scala/io/shiftleft/fuzzyc2cpg/SourceFiles.scala index 7475d5a..0a8b295 100644 --- a/src/main/scala/io/shiftleft/fuzzyc2cpg/SourceFiles.scala +++ b/src/main/scala/io/shiftleft/fuzzyc2cpg/SourceFiles.scala @@ -8,7 +8,7 @@ object SourceFiles { * For a given array of input paths, determine all C/C++ * source files by inspecting filename extensions. * */ - def determine(inputPaths: Set[String], sourceFileExtensions: Set[String]): Set[String] = { + def determine(inputPaths: Set[String], sourceFileExtensions: Set[String]): List[String] = { def hasSourceFileExtension(file: File): Boolean = file.extension.exists(sourceFileExtensions.contains) @@ -21,6 +21,6 @@ object SourceFiles { .flatMap(_.listRecursively.filter(hasSourceFileExtension)) .map(_.toString) - matchingFiles ++ matchingFilesFromDirs + (matchingFiles ++ matchingFilesFromDirs).toList.sorted } } diff --git a/src/main/scala/io/shiftleft/fuzzyc2cpg/astnew/AstToCpgConverter.scala b/src/main/scala/io/shiftleft/fuzzyc2cpg/astnew/AstToCpgConverter.scala index 3d9494a..1114ab8 100644 --- a/src/main/scala/io/shiftleft/fuzzyc2cpg/astnew/AstToCpgConverter.scala +++ b/src/main/scala/io/shiftleft/fuzzyc2cpg/astnew/AstToCpgConverter.scala @@ -2,7 +2,7 @@ package io.shiftleft.fuzzyc2cpg.astnew import scala.jdk.CollectionConverters._ import io.shiftleft.codepropertygraph.generated.{EvaluationStrategies, Operators} -import io.shiftleft.fuzzyc2cpg.Defines +import io.shiftleft.fuzzyc2cpg.{Defines, Global} import io.shiftleft.fuzzyc2cpg.adapter.{CpgAdapter, EdgeKind, NodeKind, NodeProperty} import io.shiftleft.fuzzyc2cpg.adapter.NodeProperty.NodeProperty import io.shiftleft.fuzzyc2cpg.ast.AstNode @@ -27,7 +27,8 @@ object AstToCpgConverter { class AstToCpgConverter[NodeBuilderType, NodeType, EdgeBuilderType, EdgeType]( cpgParent: NodeType, - adapter: CpgAdapter[NodeBuilderType, NodeType, EdgeBuilderType, EdgeType]) + adapter: CpgAdapter[NodeBuilderType, NodeType, EdgeBuilderType, EdgeType], + global: Global) extends ASTNodeVisitor { import AstToCpgConverter._ @@ -35,7 +36,6 @@ class AstToCpgConverter[NodeBuilderType, NodeType, EdgeBuilderType, EdgeType]( private val scope = new Scope[String, (NodeType, String), NodeType]() private var methodNode = Option.empty[NodeType] private var methodReturnNode = Option.empty[NodeType] - private var typeNames = Set.empty[String] pushContext(cpgParent, 1) @@ -116,7 +116,6 @@ class AstToCpgConverter[NodeBuilderType, NodeType, EdgeBuilderType, EdgeType]( def convert(astNode: AstNode): Unit = { astNode.accept(this) - createTypeNodes() } override def visit(astFunction: FunctionDefBase): Unit = { @@ -883,21 +882,10 @@ class AstToCpgConverter[NodeBuilderType, NodeType, EdgeBuilderType, EdgeType]( } private def registerType(typeName: String): String = { - typeNames += typeName + global.usedTypes += typeName typeName } - private def createTypeNodes(): Unit = { - typeNames.foreach { typeName => - adapter - .createNodeBuilder(NodeKind.TYPE) - .addProperty(NodeProperty.NAME, typeName) - .addProperty(NodeProperty.FULL_NAME, typeName) - .addProperty(NodeProperty.TYPE_DECL_FULL_NAME, typeName) - .createNode() - } - } - // TODO Implement this method properly, the current implementation is just a // quick hack to have some implementation at all. private def deriveConstantTypeFromCode(code: String): String = { diff --git a/src/test/resources/testcode/stableid/file1.c b/src/test/resources/testcode/stableid/file1.c new file mode 100644 index 0000000..d148ae9 --- /dev/null +++ b/src/test/resources/testcode/stableid/file1.c @@ -0,0 +1,12 @@ + +int y; + +struct file1_struct { + int member; +}; + +void stub_in_file1(); + +int func_in_file1(int param) { + return 1; +} diff --git a/src/test/resources/testcode/stableid/file2.c b/src/test/resources/testcode/stableid/file2.c new file mode 100644 index 0000000..7db5a1b --- /dev/null +++ b/src/test/resources/testcode/stableid/file2.c @@ -0,0 +1,11 @@ + +int x; + +int func_in_file2(int param) { + return 2; +} + +void stub_in_file2(); + +// Declaration of function available in the other file +int func_in_file1(int param); diff --git a/src/test/scala/io/shiftleft/fuzzyc2cpg/StableOutputTests.scala b/src/test/scala/io/shiftleft/fuzzyc2cpg/StableOutputTests.scala new file mode 100644 index 0000000..51cd216 --- /dev/null +++ b/src/test/scala/io/shiftleft/fuzzyc2cpg/StableOutputTests.scala @@ -0,0 +1,33 @@ +package io.shiftleft.fuzzyc2cpg + +import io.shiftleft.fuzzyc2cpg.output.inmemory.OutputModuleFactory +import org.scalatest.{Matchers, WordSpec} + +import scala.jdk.CollectionConverters._ + +class StableOutputTests extends WordSpec with Matchers { + + def createNodeStrings(): String = { + val projectName = "stableid" + val dirName = String.format("src/test/resources/testcode/%s", projectName) + val inmemoryOutputFactory = new OutputModuleFactory() + val fuzzyc2Cpg = new FuzzyC2Cpg(inmemoryOutputFactory) + fuzzyc2Cpg.runAndOutput(Set(dirName), Set(".c", ".cc", ".cpp", ".h", ".hpp")) + val cpg = inmemoryOutputFactory.getInternalGraph + val nodes = cpg.graph.V().asScala.toList + nodes.sortBy(_.id2()).map(x => x.label + ": " + x.propertyMap().asScala.toString).mkString("\n") + } + + "Nodes in test graph" should { + "should be exactly the same on ten consecutive runs" in { + List + .range(0, 10) + .map { _ => + createNodeStrings() + } + .distinct + .size shouldBe 1 + } + } + +} diff --git a/src/test/scala/io/shiftleft/fuzzyc2cpg/astnew/AstToCpgTests.scala b/src/test/scala/io/shiftleft/fuzzyc2cpg/astnew/AstToCpgTests.scala index b331b69..f496831 100644 --- a/src/test/scala/io/shiftleft/fuzzyc2cpg/astnew/AstToCpgTests.scala +++ b/src/test/scala/io/shiftleft/fuzzyc2cpg/astnew/AstToCpgTests.scala @@ -4,9 +4,8 @@ import gremlin.scala._ import org.antlr.v4.runtime.{CharStreams, ParserRuleContext} import org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerGraph import org.scalatest.{Matchers, WordSpec} - import io.shiftleft.codepropertygraph.generated.{EdgeTypes, NodeKeys, NodeTypes, Operators} -import io.shiftleft.fuzzyc2cpg.ModuleLexer +import io.shiftleft.fuzzyc2cpg.{Global, ModuleLexer} import io.shiftleft.fuzzyc2cpg.adapter.CpgAdapter import io.shiftleft.fuzzyc2cpg.adapter.EdgeKind.EdgeKind import io.shiftleft.fuzzyc2cpg.adapter.EdgeProperty.EdgeProperty @@ -128,8 +127,9 @@ class AstToCpgTests extends WordSpec with Matchers { protected val astParent = List(astParentNode) private val cpgAdapter = new GraphAdapter(graph) + val global = Global() nodes.foreach { node => - val astToProtoConverter = new AstToCpgConverter(astParentNode, cpgAdapter) + val astToProtoConverter = new AstToCpgConverter(astParentNode, cpgAdapter, global) astToProtoConverter.convert(node) }