diff --git a/src/lib/parser-json-sarif.cc b/src/lib/parser-json-sarif.cc index 99ff3ca8..33676aea 100644 --- a/src/lib/parser-json-sarif.cc +++ b/src/lib/parser-json-sarif.cc @@ -29,6 +29,8 @@ struct SarifTreeDecoder::Private { void readToolInfo(TScanProps *pScanProps, const pt::ptree *toolNode); std::string singleChecker = "UNKNOWN_SARIF_WARNING"; + std::string pwd; + const RE reFileUrl = RE("^file://"); const RE reCwe = RE("^CWE-([0-9]+)$"); const RE reVersion = RE("^([0-9][0-9.]+).*$"); const RE reRuleId = @@ -156,6 +158,22 @@ void SarifTreeDecoder::readScanProps( const pt::ptree *toolNode; if (findChildOf(&toolNode, run0, "tool")) d->readToolInfo(pDst, toolNode); + + // read PWD so that we can reconstruct absolute paths later on + const pt::ptree *uriBase, *pwdNode, *uriNode; + if (findChildOf(&uriBase, run0, "originalUriBaseIds") + && findChildOf(&pwdNode, *uriBase, "PWD") + && findChildOf(&uriNode, *pwdNode, "uri")) + { + // remove the "file://" prefix + const auto &pwd = uriNode->data(); + d->pwd = boost::regex_replace(pwd, d->reFileUrl, ""); + // FIXME: Should we check whether d->pwd begins with '/'? + + // make sure that d->pwd ends with '/' + if (!d->pwd.empty() && *d->pwd.rbegin() != '/') + d->pwd += '/'; + } } void SarifTreeDecoder::readRoot(const pt::ptree *runs) @@ -321,6 +339,32 @@ static int sarifCweFromDefNode(const pt::ptree &defNode) return 0; } +static void expandRelativePaths(Defect *pDef, const std::string &pwd) +{ + if (pwd.empty()) + // no PWD info provided + return; + + // go through all events + for (DefEvent &evt : pDef->events) { + std::string &fileName = evt.fileName; + if (fileName.empty()) + // no file path to expand + continue; + + const unsigned char beginsWith = *fileName.begin(); + switch (beginsWith) { + case '/': // absolute path + case '<': // and the like + continue; + + default: + // prepend `pwd` to relative path + fileName = pwd + fileName; + } + } +} + bool SarifTreeDecoder::readNode(Defect *def) { // move the iterator after we get the current position @@ -388,6 +432,7 @@ bool SarifTreeDecoder::readNode(Defect *def) if (findChildOf(&relatedLocs, defNode, "relatedLocations")) sarifReadComments(def, *relatedLocs); + expandRelativePaths(def, d->pwd); d->digger.inferLangFromChecker(def); d->digger.inferToolFromChecker(def); diff --git a/tests/csgrep/0129-sarif-gcc-pwd-args.txt b/tests/csgrep/0129-sarif-gcc-pwd-args.txt new file mode 100644 index 00000000..7df3c951 --- /dev/null +++ b/tests/csgrep/0129-sarif-gcc-pwd-args.txt @@ -0,0 +1 @@ +--mode=json diff --git a/tests/csgrep/0129-sarif-gcc-pwd-stdin.txt b/tests/csgrep/0129-sarif-gcc-pwd-stdin.txt new file mode 100644 index 00000000..bcb88f11 --- /dev/null +++ b/tests/csgrep/0129-sarif-gcc-pwd-stdin.txt @@ -0,0 +1,209 @@ +{ + "$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json", + "version": "2.1.0", + "runs": [ + { + "tool": { + "driver": { + "name": "GNU C17", + "fullName": "GNU C17 (GCC) version 14.2.1 20240912 (Red Hat 14.2.1-4) (x86_64-redhat-linux)", + "version": "14.2.1 20240912 (Red Hat 14.2.1-4)", + "informationUri": "https://gcc.gnu.org/gcc-14/", + "rules": [ + { + "id": "-Wanalyzer-null-dereference", + "helpUri": "https://gcc.gnu.org/onlinedocs/gcc-14.2.0/gcc/Static-Analyzer-Options.html#index-Wanalyzer-null-dereference" + } + ] + } + }, + "taxonomies": [ + { + "name": "CWE", + "version": "4.7", + "organization": "MITRE", + "shortDescription": { + "text": "The MITRE Common Weakness Enumeration" + }, + "taxa": [ + { + "id": "476", + "helpUri": "https://cwe.mitre.org/data/definitions/476.html" + } + ] + } + ], + "invocations": [ + { + "executionSuccessful": true, + "toolExecutionNotifications": [] + } + ], + "originalUriBaseIds": { + "PWD": { + "uri": "file:///home/kdudka/" + } + }, + "artifacts": [ + { + "location": { + "uri": "xxx.c", + "uriBaseId": "PWD" + }, + "contents": { + "text": "int main()\n{\n int *p = 0;\n return *p;\n}\n" + }, + "sourceLanguage": "c" + } + ], + "results": [ + { + "ruleId": "-Wanalyzer-null-dereference", + "taxa": [ + { + "id": "476", + "toolComponent": { + "name": "cwe" + } + } + ], + "properties": { + "gcc/analyzer/saved_diagnostic/sm": "malloc", + "gcc/analyzer/saved_diagnostic/enode": 4, + "gcc/analyzer/saved_diagnostic/snode": 1, + "gcc/analyzer/saved_diagnostic/sval": "(int *)0B", + "gcc/analyzer/saved_diagnostic/state": "null", + "gcc/analyzer/saved_diagnostic/idx": 0 + }, + "level": "warning", + "message": { + "text": "dereference of NULL \u2018p\u2019" + }, + "locations": [ + { + "physicalLocation": { + "artifactLocation": { + "uri": "xxx.c", + "uriBaseId": "PWD" + }, + "region": { + "startLine": 4, + "startColumn": 12, + "endColumn": 14 + }, + "contextRegion": { + "startLine": 4, + "snippet": { + "text": " return *p;\n" + } + } + }, + "logicalLocations": [ + { + "name": "main", + "fullyQualifiedName": "main", + "decoratedName": "main", + "kind": "function" + } + ] + } + ], + "codeFlows": [ + { + "threadFlows": [ + { + "id": "main", + "locations": [ + { + "properties": { + "gcc/analyzer/checker_event/emission_id": "(1)", + "gcc/analyzer/checker_event/kind": "EK_STATE_CHANGE" + }, + "location": { + "physicalLocation": { + "artifactLocation": { + "uri": "xxx.c", + "uriBaseId": "PWD" + }, + "region": { + "startLine": 3, + "startColumn": 10, + "endColumn": 11 + }, + "contextRegion": { + "startLine": 3, + "snippet": { + "text": " int *p = 0;\n" + } + } + }, + "logicalLocations": [ + { + "name": "main", + "fullyQualifiedName": "main", + "decoratedName": "main", + "kind": "function" + } + ], + "message": { + "text": "\u2018p\u2019 is NULL" + } + }, + "kinds": [ + "release", + "memory" + ], + "nestingLevel": 1, + "executionOrder": 1 + }, + { + "properties": { + "gcc/analyzer/checker_event/emission_id": "(2)", + "gcc/analyzer/checker_event/kind": "EK_WARNING" + }, + "location": { + "physicalLocation": { + "artifactLocation": { + "uri": "xxx.c", + "uriBaseId": "PWD" + }, + "region": { + "startLine": 4, + "startColumn": 12, + "endColumn": 14 + }, + "contextRegion": { + "startLine": 4, + "snippet": { + "text": " return *p;\n" + } + } + }, + "logicalLocations": [ + { + "name": "main", + "fullyQualifiedName": "main", + "decoratedName": "main", + "kind": "function" + } + ], + "message": { + "text": "dereference of NULL \u2018p\u2019" + } + }, + "kinds": [ + "danger" + ], + "nestingLevel": 1, + "executionOrder": 2 + } + ] + } + ] + } + ] + } + ] + } + ] +} diff --git a/tests/csgrep/0129-sarif-gcc-pwd-stdout.txt b/tests/csgrep/0129-sarif-gcc-pwd-stdout.txt new file mode 100644 index 00000000..d42c906e --- /dev/null +++ b/tests/csgrep/0129-sarif-gcc-pwd-stdout.txt @@ -0,0 +1,43 @@ +{ + "scan": { + "analyzer-version-gcc": "14.2.1" + }, + "defects": [ + { + "checker": "GCC_ANALYZER_WARNING", + "cwe": 476, + "language": "c/c++", + "tool": "gcc-analyzer", + "key_event_idx": 0, + "events": [ + { + "file_name": "/home/kdudka/xxx.c", + "line": 4, + "column": 12, + "h_size": 2, + "event": "warning[-Wanalyzer-null-dereference]", + "message": "dereference of NULL ‘p’", + "verbosity_level": 0 + }, + { + "file_name": "/home/kdudka/xxx.c", + "line": 3, + "column": 10, + "h_size": 1, + "event": "release_memory", + "message": "‘p’ is NULL", + "verbosity_level": 1 + }, + { + "file_name": "/home/kdudka/xxx.c", + "line": 4, + "column": 12, + "h_size": 2, + "event": "danger", + "message": "dereference of NULL ‘p’", + "verbosity_level": 1 + } + ] + } + ] +} diff --git a/tests/csgrep/CMakeLists.txt b/tests/csgrep/CMakeLists.txt index 233d5749..f0644446 100644 --- a/tests/csgrep/CMakeLists.txt +++ b/tests/csgrep/CMakeLists.txt @@ -172,4 +172,5 @@ test_csgrep("0125-sarif-parser-bom" ) test_csgrep("0126-cov-parser-imp-flag" ) test_csgrep("0127-cov-writer-noloc" ) test_csgrep("0128-cov-parser-noloc" ) +test_csgrep("0129-sarif-gcc-pwd" ) test_csgrep("0131-unicontrol-perl-man-page" )