Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[c2cpg] Recognize more source file extensions #5173

Merged
merged 6 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import org.eclipse.cdt.core.parser.DefaultLogService
import org.eclipse.cdt.core.parser.FileContent
import org.eclipse.cdt.core.parser.ScannerInfo
import org.eclipse.cdt.internal.core.dom.parser.cpp.semantics.CPPVisitor
import org.eclipse.cdt.internal.core.parser.scanner.InternalFileContent
import org.slf4j.LoggerFactory

import java.nio.file.NoSuchFileException
Expand All @@ -35,9 +34,10 @@ object CdtParser {
failure: Option[Throwable] = None
)

private def readFileAsFileContent(path: Path): InternalFileContent = {
val lines = IOUtils.readLinesInFile(path).mkString("\n").toArray
FileContent.create(path.toString, true, lines).asInstanceOf[InternalFileContent]
private def readFileAsFileContent(file: File, lines: Option[Array[Char]] = None): FileContent = {
val codeLines = lines.getOrElse(IOUtils.readLinesInFile(file.path).mkString("\n").toArray)
val isSource = FileDefaults.hasSourceFileExtension(file.pathAsString)
FileContent.create(file.pathAsString, isSource, codeLines)
}

}
Expand All @@ -62,17 +62,17 @@ class CdtParser(config: Config, compilationDatabase: mutable.LinkedHashSet[Comma
if (config.noImageLocations) opts |= ILanguage.OPTION_NO_IMAGE_LOCATIONS

private def preprocessedFileIsFromCPPFile(file: Path, code: String): Boolean = {
if (config.withPreprocessedFiles && file.toString.endsWith(FileDefaults.PREPROCESSED_EXT)) {
val fileWithoutExt = file.toString.stripSuffix(FileDefaults.PREPROCESSED_EXT)
val filesWithCPPExt = FileDefaults.CPP_FILE_EXTENSIONS.map(ext => File(s"$fileWithoutExt$ext").name)
if (config.withPreprocessedFiles && FileDefaults.hasPreprocessedFileExtension(file.toString)) {
val fileWithoutExt = file.toString.substring(0, file.toString.lastIndexOf("."))
val filesWithCPPExt = FileDefaults.CppFileExtensions.map(ext => File(s"$fileWithoutExt$ext").name)
code.linesIterator.exists(line => filesWithCPPExt.exists(f => line.contains(s"\"$f\"")))
} else {
false
}
}

private def createParseLanguage(file: Path, code: String): ILanguage = {
if (FileDefaults.isCPPFile(file.toString) || preprocessedFileIsFromCPPFile(file, code)) {
if (FileDefaults.hasCppFileExtension(file.toString) || preprocessedFileIsFromCPPFile(file, code)) {
GPPLanguage.getDefault
} else {
GCCLanguage.getDefault
Expand All @@ -81,7 +81,7 @@ class CdtParser(config: Config, compilationDatabase: mutable.LinkedHashSet[Comma

private def createScannerInfo(file: Path): ScannerInfo = {
val additionalIncludes =
if (FileDefaults.isCPPFile(file.toString)) parserConfig.systemIncludePathsCPP
if (FileDefaults.hasCppFileExtension(file.toString)) parserConfig.systemIncludePathsCPP
else parserConfig.systemIncludePathsC
val fileSpecificDefines = parserConfig.definedSymbolsPerFile.getOrElse(file.toString, Map.empty)
val fileSpecificIncludes = parserConfig.includesPerFile.getOrElse(file.toString, mutable.LinkedHashSet.empty)
Expand All @@ -103,14 +103,13 @@ class CdtParser(config: Config, compilationDatabase: mutable.LinkedHashSet[Comma
translationUnit
}

private def parseInternal(file: Path): ParseResult = {
val realPath = File(file)
if (realPath.isRegularFile) { // handling potentially broken symlinks
private def parseInternal(file: File): ParseResult = {
if (file.isRegularFile) { // handling potentially broken symlinks
try {
val fileContent = readFileAsFileContent(realPath.path)
val fileContent = readFileAsFileContent(file.path)
val fileContentProvider = new CustomFileContentProvider(headerFileFinder)
val lang = createParseLanguage(realPath.path, fileContent.toString)
val scannerInfo = createScannerInfo(realPath.path)
val lang = createParseLanguage(file.path, fileContent.toString)
val scannerInfo = createScannerInfo(file.path)
val translationUnit = lang.getASTTranslationUnit(fileContent, scannerInfo, fileContentProvider, null, opts, log)
val problems = CPPVisitor.getProblems(translationUnit)
if (parserConfig.logProblems) logProblems(problems.toList)
Expand All @@ -131,7 +130,8 @@ class CdtParser(config: Config, compilationDatabase: mutable.LinkedHashSet[Comma
} else {
ParseResult(
None,
failure = Option(new NoSuchFileException(s"File '$realPath' does not exist. Check for broken symlinks!"))
failure =
Option(new NoSuchFileException(s"File '${file.pathAsString}' does not exist. Check for broken symlinks!"))
)
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,23 +1,37 @@
package io.joern.c2cpg.parser

import org.apache.commons.lang3.StringUtils

object FileDefaults {

val C_EXT: String = ".c"
val CPP_EXT: String = ".cpp"
val CPP_CXX_EXT: String = ".cxx"
val PREPROCESSED_EXT: String = ".i"
val CExt: String = ".c"
val CppExt: String = ".cpp"
val PreprocessedExt: String = ".i"

private val CHeaderFileExtensions: Set[String] =
Set(".h")

private val CppHeaderFileExtensions: Set[String] =
Set(".hpp", ".hh", ".hp", ".hxx", ".h++", ".tcc")

val HeaderFileExtensions: Set[String] =
CHeaderFileExtensions ++ CppHeaderFileExtensions

private val CppSourceFileExtensions: Set[String] =
Set(".cc", ".cxx", ".cpp", ".cp", ".ccm", ".cxxm", ".c++m")

private val CC_EXT = ".cc"
private val C_HEADER_EXT = ".h"
private val CPP_HEADER_EXT = ".hpp"
private val OTHER_HEADER_EXT = ".hh"
val CppFileExtensions: Set[String] =
CppSourceFileExtensions ++ CppHeaderFileExtensions

val SOURCE_FILE_EXTENSIONS: Set[String] = Set(C_EXT, CC_EXT, CPP_EXT, CPP_CXX_EXT)
val SourceFileExtensions: Set[String] =
CppSourceFileExtensions ++ Set(CExt)

val HEADER_FILE_EXTENSIONS: Set[String] = Set(C_HEADER_EXT, CPP_HEADER_EXT, OTHER_HEADER_EXT)
def hasCppFileExtension(filePath: String): Boolean =
CppFileExtensions.exists(ext => StringUtils.endsWithIgnoreCase(filePath, ext))

val CPP_FILE_EXTENSIONS: Set[String] = Set(CC_EXT, CPP_EXT, CPP_CXX_EXT, CPP_HEADER_EXT)
def hasSourceFileExtension(filePath: String): Boolean =
SourceFileExtensions.exists(ext => StringUtils.endsWithIgnoreCase(filePath, ext))

def isCPPFile(filePath: String): Boolean =
CPP_FILE_EXTENSIONS.exists(filePath.endsWith)
def hasPreprocessedFileExtension(filePath: String): Boolean =
StringUtils.endsWithIgnoreCase(filePath, PreprocessedExt)
}
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@
package io.joern.c2cpg.parser

import better.files.*
import io.joern.c2cpg.C2Cpg.DefaultIgnoredFolders
import io.joern.x2cpg.SourceFiles
import org.jline.utils.Levenshtein

import java.nio.file.Path

class HeaderFileFinder(root: String) {

private val nameToPathMap: Map[String, List[Path]] = SourceFiles
.determine(root, FileDefaults.HEADER_FILE_EXTENSIONS)
private val nameToPathMap: Map[String, List[String]] = SourceFiles
.determine(root, FileDefaults.HeaderFileExtensions, ignoredDefaultRegex = Option(DefaultIgnoredFolders))
.map { p =>
val file = File(p)
(file.name, file.path)
(file.name, file.pathAsString)
}
.groupBy(_._1)
.map(x => (x._1, x._2.map(_._2)))
Expand All @@ -22,7 +21,7 @@ class HeaderFileFinder(root: String) {
*/
def find(path: String): Option[String] = File(path).nameOption.flatMap { name =>
val matches = nameToPathMap.getOrElse(name, List())
matches.map(_.toString).sortBy(x => Levenshtein.distance(x, path)).headOption
matches.sortBy(x => Levenshtein.distance(x, path)).headOption
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,10 @@ class AstCreationPass(cpg: Cpg, config: Config, report: Report = new Report())
}

private def sourceFilesFromDirectory(): Array[String] = {
val sourceFileExtensions = FileDefaults.SOURCE_FILE_EXTENSIONS
++ FileDefaults.HEADER_FILE_EXTENSIONS
++ Option.when(config.withPreprocessedFiles)(FileDefaults.PREPROCESSED_EXT).toList
val sourceFileExtensions =
FileDefaults.SourceFileExtensions ++
FileDefaults.HeaderFileExtensions ++
Option.when(config.withPreprocessedFiles)(FileDefaults.PreprocessedExt).toList
val allSourceFiles = SourceFiles
.determine(
config.inputPath,
Expand All @@ -59,8 +60,8 @@ class AstCreationPass(cpg: Cpg, config: Config, report: Report = new Report())
.toArray
if (config.withPreprocessedFiles) {
allSourceFiles.filter {
case f if !f.endsWith(FileDefaults.PREPROCESSED_EXT) =>
val fAsPreprocessedFile = s"${f.substring(0, f.lastIndexOf("."))}${FileDefaults.PREPROCESSED_EXT}"
case f if !FileDefaults.hasPreprocessedFileExtension(f) =>
val fAsPreprocessedFile = s"${f.substring(0, f.lastIndexOf("."))}${FileDefaults.PreprocessedExt}"
!allSourceFiles.exists { sourceFile => f != sourceFile && sourceFile == fAsPreprocessedFile }
case _ => true
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class PreprocessorPass(config: Config) {
SourceFiles
.determine(
config.inputPath,
FileDefaults.SOURCE_FILE_EXTENSIONS,
FileDefaults.SourceFileExtensions,
ignoredDefaultRegex = Option(DefaultIgnoredFolders),
ignoredFilesRegex = Option(config.ignoredFilesRegex),
ignoredFilesPath = Option(config.ignoredFiles)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ object FileHandlingTests {

class FileHandlingTests
extends Code2CpgFixture(() =>
new CDefaultTestCpg(FileDefaults.C_EXT) {
new CDefaultTestCpg(FileDefaults.CExt) {
override def codeFilePreProcessing(codeFile: Path): Unit = {
if (codeFile.toString.endsWith(FileHandlingTests.brokenLinkedFile)) {
File(codeFile).delete().symbolicLinkTo(File("does/not/exist.c"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import io.joern.c2cpg.testfixtures.C2CpgSuite
import io.shiftleft.codepropertygraph.generated.ControlStructureTypes
import io.shiftleft.semanticcpg.language.*

class ControlStructureTests extends C2CpgSuite(FileDefaults.CPP_EXT) {
class ControlStructureTests extends C2CpgSuite(FileDefaults.CppExt) {

"ControlStructureTest1" should {
val cpg = code("""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,7 @@ class CfgCreationPassTests extends CfgTestFixture(() => new CCfgTestCpg) {
}
}

class CppCfgCreationPassTests extends CfgTestFixture(() => new CCfgTestCpg(FileDefaults.CPP_EXT)) {
class CppCfgCreationPassTests extends CfgTestFixture(() => new CCfgTestCpg(FileDefaults.CppExt)) {
override def code(code: String): CCfgTestCpg = {
super.code(s"RET func() { $code }")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import io.joern.c2cpg.testfixtures.C2CpgSuite
import io.shiftleft.semanticcpg.language.*
import io.shiftleft.semanticcpg.language.types.structure.NamespaceTraversal

class ClassTypeTests extends C2CpgSuite(FileDefaults.CPP_EXT) {
class ClassTypeTests extends C2CpgSuite(FileDefaults.CppExt) {

"handling C++ classes (code example 1)" should {
val cpg = code("""
Expand Down Expand Up @@ -79,7 +79,7 @@ class ClassTypeTests extends C2CpgSuite(FileDefaults.CPP_EXT) {
x.aliasTypeFullName shouldBe None
x.order shouldBe 1
x.filename shouldBe "Test0.cpp"
x.filename.endsWith(FileDefaults.CPP_EXT) shouldBe true
x.filename.endsWith(FileDefaults.CppExt) shouldBe true
}

"should contain type decl for alias `mytype` of `int`" in {
Expand All @@ -91,7 +91,7 @@ class ClassTypeTests extends C2CpgSuite(FileDefaults.CPP_EXT) {
x.code shouldBe "typedef int mytype;"
x.order shouldBe 2
x.filename shouldBe "Test0.cpp"
x.filename.endsWith(FileDefaults.CPP_EXT) shouldBe true
x.filename.endsWith(FileDefaults.CppExt) shouldBe true
}

"should contain type decl for external type `int`" in {
Expand Down Expand Up @@ -126,7 +126,7 @@ class ClassTypeTests extends C2CpgSuite(FileDefaults.CPP_EXT) {
}

"should allow traversing from type to enclosing file" in {
cpg.typeDecl.file.filter(_.name.endsWith(FileDefaults.CPP_EXT)).l should not be empty
cpg.typeDecl.file.filter(_.name.endsWith(FileDefaults.CppExt)).l should not be empty
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import io.shiftleft.codepropertygraph.generated.nodes.Identifier
import io.shiftleft.semanticcpg.language.*
import io.shiftleft.semanticcpg.language.types.structure.NamespaceTraversal

class EnumTypeTests extends C2CpgSuite(fileSuffix = FileDefaults.CPP_EXT) {
class EnumTypeTests extends C2CpgSuite(fileSuffix = FileDefaults.CppExt) {

"Enums" should {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import io.shiftleft.codepropertygraph.generated.nodes.Identifier
import io.shiftleft.semanticcpg.language.*
import io.shiftleft.semanticcpg.language.types.structure.NamespaceTraversal

class NamespaceTypeTests extends C2CpgSuite(fileSuffix = FileDefaults.CPP_EXT) {
class NamespaceTypeTests extends C2CpgSuite(fileSuffix = FileDefaults.CppExt) {

"Namespaces" should {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import io.joern.c2cpg.testfixtures.C2CpgSuite
import io.shiftleft.semanticcpg.language.*
import io.shiftleft.semanticcpg.language.types.structure.NamespaceTraversal

class TemplateTypeTests extends C2CpgSuite(fileSuffix = FileDefaults.CPP_EXT) {
class TemplateTypeTests extends C2CpgSuite(fileSuffix = FileDefaults.CppExt) {

"Templates" should {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ package io.joern.c2cpg.testfixtures
import io.joern.c2cpg.parser.FileDefaults
import io.joern.x2cpg.testfixtures.Code2CpgFixture

class AstC2CpgSuite(fileSuffix: String = FileDefaults.C_EXT) extends Code2CpgFixture(() => new CAstTestCpg(fileSuffix))
class AstC2CpgSuite(fileSuffix: String = FileDefaults.CExt) extends Code2CpgFixture(() => new CAstTestCpg(fileSuffix))
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import io.joern.dataflowengineoss.semanticsloader.{FlowSemantic, Semantics}
import io.joern.x2cpg.testfixtures.Code2CpgFixture

class C2CpgSuite(
fileSuffix: String = FileDefaults.C_EXT,
fileSuffix: String = FileDefaults.CExt,
withOssDataflow: Boolean = false,
semantics: Semantics = DefaultSemantics(),
withPostProcessing: Boolean = false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ package io.joern.c2cpg.testfixtures
import io.joern.c2cpg.parser.FileDefaults
import io.joern.x2cpg.testfixtures.CfgTestCpg

class CCfgTestCpg(override val fileSuffix: String = FileDefaults.C_EXT) extends CfgTestCpg with C2CpgFrontend {}
class CCfgTestCpg(override val fileSuffix: String = FileDefaults.CExt) extends CfgTestCpg with C2CpgFrontend {}
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import io.joern.x2cpg.testfixtures.TestCpg
import io.shiftleft.semanticcpg.layers.LayerCreatorContext

class DataFlowTestCpg extends TestCpg with C2CpgFrontend {
override val fileSuffix: String = FileDefaults.C_EXT
override val fileSuffix: String = FileDefaults.CExt

override def applyPasses(): Unit = {
X2Cpg.applyDefaultOverlays(this)
Expand Down
Loading