Skip to content

Commit

Permalink
X2CPG : Refactor implementation and usage of SourceFiles determine me…
Browse files Browse the repository at this point in the history
…thod (#3813)

* refactor implementation and usage of SourceFiles determine method

* removed a local test case

* remove redundant determine method

* fix - failing test case

* add - default regex for C preprocessor pass

* fix - miss-spelt
  • Loading branch information
khemrajrathore authored Nov 10, 2023
1 parent 1dd6c55 commit b917a05
Show file tree
Hide file tree
Showing 14 changed files with 177 additions and 69 deletions.
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
package io.joern.c2cpg

import io.joern.c2cpg.datastructures.CGlobal
import io.joern.c2cpg.passes.{AstCreationPass, TypeDeclNodePass, PreprocessorPass}
import io.joern.c2cpg.passes.{AstCreationPass, PreprocessorPass, TypeDeclNodePass}
import io.joern.c2cpg.utils.Report
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.codepropertygraph.generated.Languages
import io.joern.x2cpg.passes.frontend.{MetaDataPass, TypeNodePass}
import io.joern.x2cpg.X2Cpg.withNewEmptyCpg
import io.joern.x2cpg.X2CpgFrontend

import java.util.regex.Pattern
import scala.util.Try
import scala.util.matching.Regex

class C2Cpg extends X2CpgFrontend[Config] {

Expand All @@ -31,3 +33,14 @@ class C2Cpg extends X2CpgFrontend[Config] {
}

}

object C2Cpg {

private val EscapedFileSeparator = Pattern.quote(java.io.File.separator)

val DefaultIgnoredFolders: List[Regex] = List(
"\\..*".r,
s"(.*[$EscapedFileSeparator])?tests?[$EscapedFileSeparator].*".r,
s"(.*[$EscapedFileSeparator])?CMakeFiles[$EscapedFileSeparator].*".r
)
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package io.joern.c2cpg.passes

import io.joern.c2cpg.C2Cpg.DefaultIgnoredFolders
import io.joern.c2cpg.Config
import io.joern.c2cpg.astcreation.AstCreator
import io.joern.c2cpg.parser.{CdtParser, FileDefaults}
Expand All @@ -19,21 +20,15 @@ class AstCreationPass(cpg: Cpg, config: Config, report: Report = new Report())
private val file2OffsetTable: ConcurrentHashMap[String, Array[Int]] = new ConcurrentHashMap()
private val parser: CdtParser = new CdtParser(config)

private val EscapedFileSeparator = Pattern.quote(java.io.File.separator)
private val DefaultIgnoredFolders: List[Regex] = List(
"\\..*".r,
s"(.*[$EscapedFileSeparator])?tests?[$EscapedFileSeparator].*".r,
s"(.*[$EscapedFileSeparator])?CMakeFiles[$EscapedFileSeparator].*".r
)

override def generateParts(): Array[String] =
SourceFiles
.determine(
config.inputPath,
FileDefaults.SOURCE_FILE_EXTENSIONS ++ FileDefaults.HEADER_FILE_EXTENSIONS,
config.withDefaultIgnoredFilesRegex(DefaultIgnoredFolders)
)
.toArray
override def generateParts(): Array[String] = SourceFiles
.determine(
config.inputPath,
FileDefaults.SOURCE_FILE_EXTENSIONS ++ FileDefaults.HEADER_FILE_EXTENSIONS,
ignoredDefaultRegex = Some(DefaultIgnoredFolders),
ignoredFilesRegex = Some(config.ignoredFilesRegex),
ignoredFilesPath = Some(config.ignoredFiles)
)
.toArray

override def runOnPart(diffGraph: DiffGraphBuilder, filename: String): Unit = {
val path = Paths.get(filename).toAbsolutePath
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package io.joern.c2cpg.passes

import io.joern.c2cpg.C2Cpg.DefaultIgnoredFolders
import io.joern.c2cpg.Config
import io.joern.c2cpg.parser.{CdtParser, FileDefaults}
import io.joern.x2cpg.SourceFiles
Expand All @@ -18,7 +19,16 @@ class PreprocessorPass(config: Config) {
private val parser = new CdtParser(config)

def run(): ParIterable[String] =
SourceFiles.determine(config.inputPath, FileDefaults.SOURCE_FILE_EXTENSIONS).par.flatMap(runOnPart)
SourceFiles
.determine(
config.inputPath,
FileDefaults.SOURCE_FILE_EXTENSIONS,
ignoredDefaultRegex = Some(DefaultIgnoredFolders),
ignoredFilesRegex = Some(config.ignoredFilesRegex),
ignoredFilesPath = Some(config.ignoredFiles)
)
.par
.flatMap(runOnPart)

private def preprocessorStatement2String(stmt: IASTPreprocessorStatement): Option[String] = stmt match {
case s: IASTPreprocessorIfStatement =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,12 @@ class AstGenRunner(config: Config) {
logger.info(s"Running goastgen in '$config.inputPath' ...")
runAstGenNative(config.inputPath, out, config.ignoredFilesRegex.toString()) match {
case Success(result) =>
val srcFiles = SourceFiles.determine(out.toString(), Set(".json"))
val srcFiles = SourceFiles.determine(
out.toString(),
Set(".json"),
ignoredFilesRegex = Some(config.ignoredFilesRegex),
ignoredFilesPath = Some(config.ignoredFiles)
)
val parsedModFile = filterModFile(srcFiles, out)
val parsed = filterFiles(srcFiles, out)
val skipped = skippedFiles(in, result.toList)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@ package io.joern.javasrc2cpg.jpastprinter
import com.github.javaparser.printer.YamlPrinter
import com.github.javaparser.printer.DotPrinter
import io.shiftleft.semanticcpg.language.dotextension.Shared
import io.joern.javasrc2cpg.Config
import io.joern.javasrc2cpg.{Config, JavaSrc2Cpg}
import io.joern.javasrc2cpg.util.SourceParser
import io.joern.x2cpg.SourceFiles

import java.nio.file.Path

object JavaParserAstPrinter {
Expand All @@ -13,12 +15,20 @@ object JavaParserAstPrinter {
val sourceParser = SourceParser(config, false)
val printer = new YamlPrinter(true)

SourceParser.getSourceFilenames(config).foreach { filename =>
val relativeFilename = Path.of(config.inputPath).relativize(Path.of(filename)).toString
sourceParser.parseAnalysisFile(relativeFilename, saveFileContent = false).foreach { case (compilationUnit, _) =>
println(relativeFilename)
println(printer.output(compilationUnit))
SourceFiles
.determine(
config.inputPath,
JavaSrc2Cpg.sourceFileExtensions,
ignoredDefaultRegex = Some(JavaSrc2Cpg.DefaultIgnoredFilesRegex),
ignoredFilesRegex = Some(config.ignoredFilesRegex),
ignoredFilesPath = Some(config.ignoredFiles)
)
.foreach { filename =>
val relativeFilename = Path.of(config.inputPath).relativize(Path.of(filename)).toString
sourceParser.parseAnalysisFile(relativeFilename, saveFileContent = false).foreach { case (compilationUnit, _) =>
println(relativeFilename)
println(printer.output(compilationUnit))
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,17 @@ class AstCreationPass(config: Config, cpg: Cpg, sourcesOverride: Option[List[Str
val global: Global = new Global()
private val logger = LoggerFactory.getLogger(classOf[AstCreationPass])

private val sourceFilenames = SourceParser.getSourceFilenames(config, sourcesOverride)
private val sourceFilenames = sourcesOverride
.getOrElse(
SourceFiles.determine(
config.inputPath,
JavaSrc2Cpg.sourceFileExtensions,
ignoredDefaultRegex = Some(JavaSrc2Cpg.DefaultIgnoredFilesRegex),
ignoredFilesRegex = Some(config.ignoredFilesRegex),
ignoredFilesPath = Some(config.ignoredFiles)
)
)
.toArray

val (sourceParser, symbolSolver) = initParserAndUtils(config, sourceFilenames)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import better.files.File
import io.joern.javasrc2cpg.{Config, JavaSrc2Cpg}
import io.joern.javasrc2cpg.util.Delombok.DelombokMode
import io.joern.javasrc2cpg.util.Delombok.DelombokMode._
import io.joern.x2cpg.SourceFiles
import com.github.javaparser.{JavaParser, ParserConfiguration}
import com.github.javaparser.ParserConfiguration.LanguageLevel
import com.github.javaparser.ast.CompilationUnit
Expand Down Expand Up @@ -115,11 +114,6 @@ object SourceParser {
new SourceParser(Path.of(canonicalInputPath), Path.of(analysisDir), Path.of(typesDir))
}

def getSourceFilenames(config: Config, sourcesOverride: Option[List[String]] = None): Array[String] = {
val inputPaths = sourcesOverride.getOrElse(config.inputPath :: Nil).toSet
SourceFiles.determine(inputPaths, JavaSrc2Cpg.sourceFileExtensions, config).toArray
}

/** Implements the logic described in the option description for the "delombok-mode" option:
* - no-delombok: do not run delombok.
* - default: run delombok if a lombok dependency is found and analyse delomboked code.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,14 +74,24 @@ class Kotlin2Cpg extends X2CpgFrontend[Config] with UsesService {
case None => None
}

val filesWithKtExtension = SourceFiles.determine(sourceDir, Set(".kt"))
val filesWithKtExtension = SourceFiles.determine(
sourceDir,
Set(".kt"),
ignoredFilesRegex = Some(config.ignoredFilesRegex),
ignoredFilesPath = Some(config.ignoredFiles)
)
if (filesWithKtExtension.isEmpty) {
println(s"The provided input directory does not contain files ending in '.kt' `$sourceDir`. Exiting.")
System.exit(1)
}
logger.info(s"Starting CPG generation for input directory `$sourceDir`.")

val filesWithJavaExtension = SourceFiles.determine(sourceDir, Set(".java"))
val filesWithJavaExtension = SourceFiles.determine(
sourceDir,
Set(".java"),
ignoredFilesRegex = Some(config.ignoredFilesRegex),
ignoredFilesPath = Some(config.ignoredFiles)
)
if (filesWithJavaExtension.nonEmpty) {
logger.info(s"Found ${filesWithJavaExtension.size} files with the `.java` extension.")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,14 @@ class AstCreationPass(config: Config, cpg: Cpg, parser: PhpParser)(implicit with

val PhpSourceFileExtensions: Set[String] = Set(".php")

override def generateParts(): Array[String] = SourceFiles.determine(config.inputPath, PhpSourceFileExtensions).toArray
override def generateParts(): Array[String] = SourceFiles
.determine(
config.inputPath,
PhpSourceFileExtensions,
ignoredFilesRegex = Some(config.ignoredFilesRegex),
ignoredFilesPath = Some(config.ignoredFiles)
)
.toArray

override def runOnPart(diffGraph: DiffGraphBuilder, filename: String): Unit = {
val relativeFilename = if (filename == config.inputPath) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,12 @@ class Py2CpgOnFileSystem extends X2CpgFrontend[Py2CpgOnFileSystemConfig] {
}

val inputFiles = SourceFiles
.determine(config.inputPath, Set(".py"), config)
.determine(
config.inputPath,
Set(".py"),
ignoredFilesRegex = Some(config.ignoredFilesRegex),
ignoredFilesPath = Some(config.ignoredFiles)
)
.map(x => Path.of(x))
.filter { file => filterIgnoreDirNames(file, inputPath, ignoreDirNamesSet) }
.filter { file =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,14 @@ class AstCreationPass(
global.usedTypes.keys().asScala.toList

override def generateParts(): Array[String] =
SourceFiles.determine(config.inputPath, RubySourceFileExtensions, config).toArray
SourceFiles
.determine(
config.inputPath,
RubySourceFileExtensions,
ignoredFilesRegex = Some(config.ignoredFilesRegex),
ignoredFilesPath = Some(config.ignoredFiles)
)
.toArray

override def runOnPart(diffGraph: DiffGraphBuilder, fileName: String): Unit = {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,14 @@ class AstCreationPass(cpg: Cpg, parser: ResourceManagedParser, config: Config)
private val RubySourceFileExtensions = Set(".rb")

override def generateParts(): Array[String] = {
SourceFiles.determine(config.inputPath, RubySourceFileExtensions, config).toArray
SourceFiles
.determine(
config.inputPath,
RubySourceFileExtensions,
ignoredFilesRegex = Some(config.ignoredFilesRegex),
ignoredFilesPath = Some(config.ignoredFiles)
)
.toArray
}

override def runOnPart(diffGraph: DiffGraphBuilder, fileName: String): Unit = {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
package io.joern.x2cpg

import better.files.File.VisitOptions
import better.files._
import better.files.*
import org.slf4j.LoggerFactory

import java.io.FileNotFoundException
import java.nio.file.Paths
import scala.util.matching.Regex

object SourceFiles {

private val logger = LoggerFactory.getLogger(getClass)

private def isIgnoredByFileList(filePath: String, config: X2CpgConfig[_]): Boolean = {
val isInIgnoredFiles = config.ignoredFiles.exists {
private def isIgnoredByFileList(filePath: String, ignoredFiles: Seq[String]): Boolean = {
val isInIgnoredFiles = ignoredFiles.exists {
case ignorePath if File(ignorePath).isDirectory => filePath.startsWith(ignorePath)
case ignorePath => filePath == ignorePath
}
Expand All @@ -24,19 +25,19 @@ object SourceFiles {
}
}

private def isIgnoredByDefault(filePath: String, config: X2CpgConfig[_]): Boolean = {
val relPath = toRelativePath(filePath, config.inputPath)
if (config.defaultIgnoredFilesRegex.exists(_.matches(relPath))) {
private def isIgnoredByDefaultRegex(filePath: String, inputPath: String, ignoredDefaultRegex: Seq[Regex]): Boolean = {
val relPath = toRelativePath(filePath, inputPath)
if (ignoredDefaultRegex.exists(_.matches(relPath))) {
logger.debug(s"'$relPath' ignored by default")
true
} else {
false
}
}

private def isIgnoredByRegex(filePath: String, config: X2CpgConfig[_]): Boolean = {
val relPath = toRelativePath(filePath, config.inputPath)
val isInIgnoredFilesRegex = config.ignoredFilesRegex.matches(relPath)
private def isIgnoredByRegex(filePath: String, inputPath: String, ignoredFilesRegex: Regex): Boolean = {
val relPath = toRelativePath(filePath, inputPath)
val isInIgnoredFilesRegex = ignoredFilesRegex.matches(relPath)
if (isInIgnoredFilesRegex) {
logger.debug(s"'$relPath' ignored (--exclude-regex)")
true
Expand All @@ -45,31 +46,48 @@ object SourceFiles {
}
}

private def filterFiles(files: List[String], config: X2CpgConfig[_]): List[String] = files.filter {
case filePath if isIgnoredByDefault(filePath, config) => false
case filePath if isIgnoredByFileList(filePath, config) => false
case filePath if isIgnoredByRegex(filePath, config) => false
case _ => true
}

/** For a given input path, determine all source files by inspecting filename extensions.
*/
def determine(inputPath: String, sourceFileExtensions: Set[String]): List[String] = {
determine(Set(inputPath), sourceFileExtensions)
}

/** For a given input path, determine all source files by inspecting filename extensions and filter the result
* according to the given config (by its ignoredFilesRegex and ignoredFiles).
*/
def determine(inputPath: String, sourceFileExtensions: Set[String], config: X2CpgConfig[_]): List[String] = {
determine(Set(inputPath), sourceFileExtensions, config)
private def filterFiles(
files: List[String],
inputPath: String,
ignoredDefaultRegex: Option[Seq[Regex]] = None,
ignoredFilesRegex: Option[Regex] = None,
ignoredFilesPath: Option[Seq[String]] = None
): List[String] = files.filter {
case filePath
if ignoredDefaultRegex.isDefined && ignoredDefaultRegex.get.nonEmpty && isIgnoredByDefaultRegex(
filePath,
inputPath,
ignoredDefaultRegex.get
) =>
false
case filePath if ignoredFilesRegex.isDefined && isIgnoredByRegex(filePath, inputPath, ignoredFilesRegex.get) =>
false
case filePath
if ignoredFilesPath.isDefined && ignoredFilesPath.get.nonEmpty && isIgnoredByFileList(
filePath,
ignoredFilesPath.get
) =>
false
case _ => true
}

/** For given input paths, determine all source files by inspecting filename extensions and filter the result
* according to the given config (by its ignoredFilesRegex and ignoredFiles).
/** For given input paths, determine all source files by inspecting filename extensions and filter the result if
* following arguments ignoredDefaultRegex, ignoredFilesRegex and ignoredFilesPath are used
*/
def determine(inputPaths: Set[String], sourceFileExtensions: Set[String], config: X2CpgConfig[_]): List[String] = {
filterFiles(determine(inputPaths, sourceFileExtensions), config)
def determine(
inputPath: String,
sourceFileExtensions: Set[String],
ignoredDefaultRegex: Option[Seq[Regex]] = None,
ignoredFilesRegex: Option[Regex] = None,
ignoredFilesPath: Option[Seq[String]] = None
): List[String] = {
filterFiles(
determine(Set(inputPath), sourceFileExtensions),
inputPath,
ignoredDefaultRegex,
ignoredFilesRegex,
ignoredFilesPath
)
}

/** For a given array of input paths, determine all source files by inspecting filename extensions.
Expand Down
Loading

0 comments on commit b917a05

Please sign in to comment.