Skip to content

parser: skip BOM at the beginning of input #182

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 21 additions & 5 deletions src/lib/instream.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,21 +57,37 @@ void InStream::handleError(const std::string &msg, const unsigned long line)
InStreamLookAhead::InStreamLookAhead(
InStream &input,
const unsigned size,
const bool skipBOM,
bool skipWhiteSpaces)
{
std::istream &inStr = input.str();

// read `size` chars from input
while (buf_.size() < size) {
const int c = inStr.get();
if (skipWhiteSpaces && isspace(c) && !!inStr)
int c = inStr.get();
if (skipBOM
// try to read BOM ... [0xEF, 0xBB, 0xBF]
&& (0xEF == c)
&& (0xBB == (c = inStr.get()))
&& (0xBF == (c = inStr.get())))
// BOM successfully read -> read the next char
c = inStr.get();

// read chars from input
for (;;) {
if (skipWhiteSpaces && isspace(c))
// skip a white-space
continue;
goto next;

// only the leading white-spaces are skipped
skipWhiteSpaces = false;

// append one char to the buffer
buf_.push_back(c);
if (size <= buf_.size())
// the requested number of chars have been read
break;
next:
// read the next char
c = inStr.get();
}

// put the chars back to the input stream
Expand Down
3 changes: 2 additions & 1 deletion src/lib/instream.hh
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ class InStreamLookAhead {
InStreamLookAhead(
InStream &input,
unsigned size,
bool skipWhiteSpaces = false);
bool skipBOM,
bool skipWhiteSpaces);

char operator[](const unsigned idx) const {
return buf_.at(idx);
Expand Down
4 changes: 3 additions & 1 deletion src/lib/parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ static inline std::unique_ptr<T> make_unique(InStream &input) {
AbstractParserPtr createParser(InStream &input)
{
// skip all white-spaces and sniff the first two chars from the input
InStreamLookAhead head(input, 2U, /* skipWhiteSpaces */ true);
InStreamLookAhead head(input, 2U,
/* skipBOM */ true,
/* skipWhiteSpaces */ true);

switch (head[0]) {
case '{':
Expand Down
1 change: 1 addition & 0 deletions tests/csgrep/0125-sarif-parser-bom-args.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--mode=json
10,604 changes: 10,604 additions & 0 deletions tests/csgrep/0125-sarif-parser-bom-stdin.txt

Large diffs are not rendered by default.

245 changes: 245 additions & 0 deletions tests/csgrep/0125-sarif-parser-bom-stdout.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
{
"scan": {
"analyzer-version-snyk-code": "1.0.0"
},
"defects": [
{
"checker": "SNYK_CODE_WARNING",
"cwe": 290,
"tool": "snyk-code",
"key_event_idx": 0,
"events": [
{
"file_name": "vendor/krb5-src/krb5/src/lib/krb5/krb/t_princ.c",
"line": 381,
"column": 26,
"h_size": 22,
"event": "note[cpp/WeakGuard]",
"message": "An hardcoded domain name is compared in strcmp. This check could lead to a bypass since the domain name can be spoofed or controlled by an attacker.",
"verbosity_level": 0
}
]
},
{
"checker": "SNYK_CODE_WARNING",
"cwe": 290,
"tool": "snyk-code",
"key_event_idx": 0,
"events": [
{
"file_name": "vendor/krb5-src/krb5/src/wconfig.c",
"line": 140,
"column": 21,
"h_size": 12,
"event": "note[cpp/WeakGuard]",
"message": "An hardcoded domain name is compared in strcmp. This check could lead to a bypass since the domain name can be spoofed or controlled by an attacker.",
"verbosity_level": 0
}
]
},
{
"checker": "SNYK_CODE_WARNING",
"cwe": 290,
"tool": "snyk-code",
"key_event_idx": 0,
"events": [
{
"file_name": "vendor/krb5-src/krb5/src/wconfig.c",
"line": 145,
"column": 21,
"h_size": 13,
"event": "note[cpp/WeakGuard]",
"message": "An hardcoded domain name is compared in strcmp. This check could lead to a bypass since the domain name can be spoofed or controlled by an attacker.",
"verbosity_level": 0
}
]
},
{
"checker": "SNYK_CODE_WARNING",
"cwe": 190,
"tool": "snyk-code",
"key_event_idx": 0,
"events": [
{
"file_name": "vendor/sasl2-sys/sasl2/utils/smtptest.c",
"line": 360,
"column": 3,
"h_size": 4,
"event": "note[cpp/IntegerOverflow/test]",
"message": "Unsanitized input from a file flows into an addition operator (+), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
"verbosity_level": 0
}
]
},
{
"checker": "SNYK_CODE_WARNING",
"cwe": 190,
"tool": "snyk-code",
"key_event_idx": 0,
"events": [
{
"file_name": "vendor/krb5-src/krb5/src/tests/asn.1/trval.c",
"line": 161,
"column": 19,
"h_size": 7,
"event": "note[cpp/IntegerOverflow/test]",
"message": "Unsanitized input from a file flows into an addition operator (+), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
"verbosity_level": 0
}
]
},
{
"checker": "SNYK_CODE_WARNING",
"cwe": 190,
"tool": "snyk-code",
"key_event_idx": 0,
"events": [
{
"file_name": "vendor/krb5-src/krb5/src/tests/asn.1/trval.c",
"line": 161,
"column": 30,
"h_size": 2,
"event": "note[cpp/IntegerOverflow/test]",
"message": "Unsanitized input from a file flows into an addition operator (+), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
"verbosity_level": 0
}
]
},
{
"checker": "SNYK_CODE_WARNING",
"cwe": 190,
"tool": "snyk-code",
"key_event_idx": 0,
"events": [
{
"file_name": "vendor/rdkafka-sys/librdkafka/tests/0098-consumer-txn.cpp",
"line": 307,
"column": 35,
"h_size": 7,
"event": "note[cpp/IntegerOverflow/test]",
"message": "Unsanitized input from a file flows into an addition operator (+), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
"verbosity_level": 0
}
]
},
{
"checker": "SNYK_CODE_WARNING",
"cwe": 190,
"tool": "snyk-code",
"key_event_idx": 0,
"events": [
{
"file_name": "vendor/rdkafka-sys/librdkafka/tests/0098-consumer-txn.cpp",
"line": 444,
"column": 7,
"h_size": 29,
"event": "note[cpp/IntegerOverflow/test]",
"message": "Unsanitized input from a file flows into an addition operator (+), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
"verbosity_level": 0
}
]
},
{
"checker": "SNYK_CODE_WARNING",
"cwe": 190,
"tool": "snyk-code",
"key_event_idx": 0,
"events": [
{
"file_name": "vendor/lz4-sys/liblz4/tests/abiTest.c",
"line": 81,
"column": 27,
"h_size": 26,
"event": "note[cpp/IntegerOverflow/test]",
"message": "Unsanitized input from a file flows into a subtraction operator (-), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
"verbosity_level": 0
}
]
},
{
"checker": "SNYK_CODE_WARNING",
"cwe": 190,
"tool": "snyk-code",
"key_event_idx": 0,
"events": [
{
"file_name": "vendor/krb5-src/krb5/src/tests/asn.1/trval.c",
"line": 126,
"column": 17,
"h_size": 2,
"event": "note[cpp/IntegerOverflow/test]",
"message": "Unsanitized input from a file flows into a subtraction operator (-), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
"verbosity_level": 0
}
]
},
{
"checker": "SNYK_CODE_WARNING",
"cwe": 190,
"tool": "snyk-code",
"key_event_idx": 0,
"events": [
{
"file_name": "vendor/krb5-src/krb5/src/tests/asn.1/trval.c",
"line": 128,
"column": 17,
"h_size": 2,
"event": "note[cpp/IntegerOverflow/test]",
"message": "Unsanitized input from a file flows into a subtraction operator (-), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
"verbosity_level": 0
}
]
},
{
"checker": "SNYK_CODE_WARNING",
"cwe": 190,
"tool": "snyk-code",
"key_event_idx": 0,
"events": [
{
"file_name": "vendor/krb5-src/krb5/src/tests/asn.1/trval.c",
"line": 130,
"column": 17,
"h_size": 2,
"event": "note[cpp/IntegerOverflow/test]",
"message": "Unsanitized input from a file flows into a subtraction operator (-), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
"verbosity_level": 0
}
]
},
{
"checker": "SNYK_CODE_WARNING",
"cwe": 190,
"tool": "snyk-code",
"key_event_idx": 0,
"events": [
{
"file_name": "vendor/openssl-src/openssl/test/confdump.c",
"line": 37,
"column": 25,
"h_size": 10,
"event": "note[cpp/IntegerOverflow/test]",
"message": "Unsanitized input from a command line argument flows into a subtraction operator (-), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
"verbosity_level": 0
}
]
},
{
"checker": "SNYK_CODE_WARNING",
"cwe": 190,
"tool": "snyk-code",
"key_event_idx": 0,
"events": [
{
"file_name": "vendor/lz4-sys/liblz4/tests/fuzzer.c",
"line": 378,
"column": 95,
"h_size": 9,
"event": "note[cpp/IntegerOverflow/test]",
"message": "Unsanitized input from a command line argument flows into a subtraction operator (-), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
"verbosity_level": 0
}
]
}
]
}
1 change: 1 addition & 0 deletions tests/csgrep/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -168,3 +168,4 @@ test_csgrep("0121-cov-parser-lock-evasion" )
test_csgrep("0122-json-parser-cov-v10-column" )
test_csgrep("0123-csgrep-hash-v1" )
test_csgrep("0124-sarif-writer-imp" )
test_csgrep("0125-sarif-parser-bom" )