Skip to content

Commit 620dc47

Browse files
committed
json-writer: replace invalid sequences in messages with �
Fixes: #42
1 parent 6c1e77c commit 620dc47

9 files changed

+99
-5
lines changed

src/json-writer.cc

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2011 Red Hat, Inc.
2+
* Copyright (C) 2011-2022 Red Hat, Inc.
33
*
44
* This file is part of csdiff.
55
*
@@ -28,8 +28,18 @@
2828

2929
#include <boost/iostreams/filtering_stream.hpp>
3030
#include <boost/iostreams/filter/regex.hpp>
31+
#include <boost/nowide/utf/convert.hpp>
3132
#include <boost/property_tree/json_parser.hpp>
3233

34+
static inline std::string sanitizeUTF8(const std::string &str)
35+
{
36+
using boost::nowide::utf::convert_string;
37+
38+
// every non-UTF8 sequence will be replaced with 0xEF 0xBF 0xBD which
39+
// corresponds to REPLACEMENT CHARACTER U+FFFD
40+
return convert_string<char>(str.data(), str.data() + str.size());
41+
}
42+
3343
typedef SharedStringPTree PTree;
3444

3545
class SimpleTreeEncoder: public AbstractTreeEncoder {
@@ -77,7 +87,7 @@ void SimpleTreeEncoder::appendDef(const Defect &def)
7787

7888
// describe the event
7989
evtNode.put<string>("event", evt.event);
80-
evtNode.put<string>("message", evt.msg);
90+
evtNode.put<string>("message", sanitizeUTF8(evt.msg));
8191
evtNode.put<int>("verbosity_level", evt.verbosityLevel);
8292

8393
// append the event to the list
@@ -195,10 +205,10 @@ void SarifTreeEncoder::importScanProps(const TScanProps &scanProps)
195205
scanProps_ = scanProps;
196206
}
197207

198-
static void sarifEncodeMsg(PTree *pDst, const std::string text)
208+
static void sarifEncodeMsg(PTree *pDst, const std::string& text)
199209
{
200210
PTree msg;
201-
msg.put<std::string>("text", text);
211+
msg.put<std::string>("text", sanitizeUTF8(text));
202212
pDst->put_child("message", msg);
203213
}
204214

tests/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (C) 2011 - 2020 Red Hat, Inc.
1+
# Copyright (C) 2011 - 2022 Red Hat, Inc.
22
#
33
# This file is part of csdiff.
44
#
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
--mode=json
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
test.c: warning: illegal sequence '�' will be replaced with '�'
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"defects": [
3+
{
4+
"checker": "COMPILER_WARNING",
5+
"language": "c/c++",
6+
"key_event_idx": 0,
7+
"events": [
8+
{
9+
"file_name": "test.c",
10+
"line": 0,
11+
"event": "warning",
12+
"message": "illegal sequence '�' will be replaced with '�'",
13+
"verbosity_level": 0
14+
}
15+
]
16+
}
17+
]
18+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
--mode=sarif
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
test.c: warning: illegal sequence '�' will be replaced with '�'
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
{
2+
"$schema": "https://json.schemastore.org/sarif-2.1.0.json",
3+
"version": "",
4+
"runs": [
5+
{
6+
"tool": {
7+
"driver": {
8+
"name": "csdiff",
9+
"version": "",
10+
"informationUri": "https://github.com/csutils/csdiff"
11+
}
12+
},
13+
"results": [
14+
{
15+
"ruleId": "COMPILER_WARNING: warning",
16+
"locations": [
17+
{
18+
"id": 0,
19+
"physicalLocation": {
20+
"artifactLocation": {
21+
"uri": "test.c"
22+
}
23+
}
24+
}
25+
],
26+
"message": {
27+
"text": "illegal sequence '�' will be replaced with '�'"
28+
},
29+
"codeFlows": [
30+
{
31+
"threadFlows": [
32+
{
33+
"locations": [
34+
{
35+
"location": {
36+
"id": 0,
37+
"physicalLocation": {
38+
"artifactLocation": {
39+
"uri": "test.c"
40+
}
41+
},
42+
"message": {
43+
"text": "illegal sequence '�' will be replaced with '�'"
44+
}
45+
},
46+
"nestingLevel": 0,
47+
"kinds": [
48+
"warning"
49+
]
50+
}
51+
]
52+
}
53+
]
54+
}
55+
]
56+
}
57+
]
58+
}
59+
]
60+
}

tests/csgrep/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,3 +132,5 @@ test_csgrep("85-sarif-writer" )
132132
test_csgrep("86-sarif-parser" )
133133
test_csgrep("87-json-writer-empty" )
134134
test_csgrep("88-csparser-cert-c" )
135+
test_csgrep("89-json-writer-illegal-utf8-sequence" )
136+
test_csgrep("90-sarif-writer-illegal-utf8-sequence" )

0 commit comments

Comments
 (0)