Skip to content

Commit f025430

Browse files
authored
Merge pull request #15319 from Sim4n6/main
[Python] Add Unicode DoS (qhelp, tests and the query)
2 parents d78efdb + 1af8167 commit f025430

File tree

7 files changed

+388
-0
lines changed

7 files changed

+388
-0
lines changed
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
6+
<overview>
7+
<p>When a remote user-controlled data can reach a costly Unicode normalization with either form, NFKC or NFKD, an attack such as the One Million Unicode Characters, could lead to a denial of service on Windows OS.</p>
8+
9+
<p>And, with the use of special Unicode characters, like U+2100 (℀) or U+2105 (℅), the payload size could be tripled after the compatibility normalization.</p>
10+
11+
</overview>
12+
<recommendation>
13+
14+
<p>Ensure limiting the size of any incoming data that would go through a costly operations, including a Windows Unicode normalization with NFKC or NFKD. Such a recommandation would avoid a potential denial of service.</p>
15+
16+
</recommendation>
17+
18+
<example>
19+
<p>
20+
In this example a simple user-controlled data reaches a Unicode normalization with the form "NFKC".
21+
</p>
22+
23+
<sample src="bad.py" />
24+
25+
<p>To fix this vulnerability, we need restrain the size of the user input.</p>
26+
27+
<p>For example, we can use the <code>len()</code> builtin function to limit the size of the user input.</p>
28+
29+
<sample src="good.py" />
30+
31+
</example>
32+
<references>
33+
34+
<li>
35+
<a href="https://hackerone.com/reports/2258758">CVE-2023-46695: Potential denial of service vulnerability in Django UsernameField on Windows.</a>
36+
</li>
37+
</references>
38+
</qhelp>
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
/**
2+
* @name Denial of Service using Unicode Characters
3+
* @description A remote user-controlled data can reach a costly Unicode normalization with either form NFKC or NFKD. On Windows OS, with an attack such as the One Million Unicode Characters, this could lead to a denial of service. And, with the use of special Unicode characters, like U+2100 (℀) or U+2105 (℅), the payload size could be tripled.
4+
* @kind path-problem
5+
* @id py/unicode-dos
6+
* @precision high
7+
* @problem.severity error
8+
* @tags security
9+
* experimental
10+
* external/cwe/cwe-770
11+
*/
12+
13+
import python
14+
import semmle.python.ApiGraphs
15+
import semmle.python.Concepts
16+
import semmle.python.dataflow.new.TaintTracking
17+
import semmle.python.dataflow.new.internal.DataFlowPublic
18+
import semmle.python.dataflow.new.RemoteFlowSources
19+
20+
// The Unicode compatibility normalization calls from unicodedata, unidecode, pyunormalize
21+
// and textnorm modules. The use of argIdx is to constraint the argument being normalized.
22+
class UnicodeCompatibilityNormalize extends API::CallNode {
23+
int argIdx;
24+
25+
UnicodeCompatibilityNormalize() {
26+
(
27+
this = API::moduleImport("unicodedata").getMember("normalize").getACall() and
28+
this.getParameter(0).getAValueReachingSink().asExpr().(StrConst).getText() in ["NFKC", "NFKD"]
29+
or
30+
this = API::moduleImport("pyunormalize").getMember("normalize").getACall() and
31+
this.getParameter(0).getAValueReachingSink().asExpr().(StrConst).getText() in ["NFKC", "NFKD"]
32+
) and
33+
argIdx = 1
34+
or
35+
(
36+
this = API::moduleImport("textnorm").getMember("normalize_unicode").getACall() and
37+
this.getParameter(1).getAValueReachingSink().asExpr().(StrConst).getText() in ["NFKC", "NFKD"]
38+
or
39+
this = API::moduleImport("unidecode").getMember("unidecode").getACall()
40+
or
41+
this = API::moduleImport("pyunormalize").getMember(["NFKC", "NFKD"]).getACall()
42+
) and
43+
argIdx = 0
44+
}
45+
46+
DataFlow::Node getPathArg() { result = this.getArg(argIdx) }
47+
}
48+
49+
predicate underAValue(DataFlow::GuardNode g, ControlFlowNode node, boolean branch) {
50+
exists(CompareNode cn | cn = g |
51+
exists(API::CallNode lenCall, Cmpop op, Node n |
52+
lenCall = n.getALocalSource() and
53+
(
54+
// arg <= LIMIT OR arg < LIMIT
55+
(op instanceof LtE or op instanceof Lt) and
56+
branch = true and
57+
cn.operands(n.asCfgNode(), op, _)
58+
or
59+
// LIMIT >= arg OR LIMIT > arg
60+
(op instanceof GtE or op instanceof Gt) and
61+
branch = true and
62+
cn.operands(_, op, n.asCfgNode())
63+
or
64+
// not arg >= LIMIT OR not arg > LIMIT
65+
(op instanceof GtE or op instanceof Gt) and
66+
branch = false and
67+
cn.operands(n.asCfgNode(), op, _)
68+
or
69+
// not LIMIT <= arg OR not LIMIT < arg
70+
(op instanceof LtE or op instanceof Lt) and
71+
branch = false and
72+
cn.operands(_, op, n.asCfgNode())
73+
)
74+
|
75+
lenCall = API::builtin("len").getACall() and
76+
node = lenCall.getArg(0).asCfgNode()
77+
) //and
78+
//not cn.getLocation().getFile().inStdlib()
79+
)
80+
}
81+
82+
private module UnicodeDoSConfig implements DataFlow::ConfigSig {
83+
predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
84+
85+
predicate isBarrier(DataFlow::Node sanitizer) {
86+
// underAValue is a check to ensure that the length of the user-provided value is limited to a certain amount
87+
sanitizer = DataFlow::BarrierGuard<underAValue/3>::getABarrierNode()
88+
}
89+
90+
predicate isSink(DataFlow::Node sink) {
91+
// Any call to the Unicode compatibility normalization is a costly operation
92+
sink = any(UnicodeCompatibilityNormalize ucn).getPathArg()
93+
or
94+
// The call to secure_filename() from pallets/werkzeug uses the Unicode compatibility normalization
95+
// under the hood, https://github.com/pallets/werkzeug/blob/d3dd65a27388fbd39d146caacf2563639ba622f0/src/werkzeug/utils.py#L218
96+
sink = API::moduleImport("werkzeug").getMember("secure_filename").getACall().getArg(_)
97+
or
98+
sink =
99+
API::moduleImport("werkzeug")
100+
.getMember("utils")
101+
.getMember("secure_filename")
102+
.getACall()
103+
.getArg(_)
104+
}
105+
}
106+
107+
module UnicodeDoSFlow = TaintTracking::Global<UnicodeDoSConfig>;
108+
109+
import UnicodeDoSFlow::PathGraph
110+
111+
from UnicodeDoSFlow::PathNode source, UnicodeDoSFlow::PathNode sink
112+
where UnicodeDoSFlow::flowPath(source, sink)
113+
select sink.getNode(), source, sink, "This $@ can reach a $@.", source.getNode(),
114+
"user-provided value", sink.getNode(), "costly Unicode normalization operation"
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from flask import Flask, jsonify, request
2+
import unicodedata
3+
4+
app = Flask(__name__)
5+
6+
7+
@app.route("/bad_1")
8+
def bad_1():
9+
# User controlled data
10+
file_path = request.args.get("file_path", "")
11+
12+
# Normalize the file path using NFKC Unicode normalization
13+
return (
14+
unicodedata.normalize("NFKC", file_path),
15+
200,
16+
{"Content-Type": "application/octet-stream"},
17+
)
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from flask import Flask, jsonify, request
2+
import unicodedata
3+
4+
app = Flask(__name__)
5+
6+
7+
@app.route("/good_1")
8+
def good_1():
9+
r = request.args.get("file_path", "")
10+
11+
if len(r) <= 1_000:
12+
# Normalize the r using NFKD Unicode normalization
13+
r = unicodedata.normalize("NFKD", r)
14+
return r, 200, {"Content-Type": "application/octet-stream"}
15+
else:
16+
return jsonify({"error": "File not found"}), 404
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
edges
2+
| tests.py:1:35:1:41 | ControlFlowNode for ImportMember | tests.py:1:35:1:41 | ControlFlowNode for request | provenance | |
3+
| tests.py:1:35:1:41 | ControlFlowNode for request | tests.py:12:17:12:23 | ControlFlowNode for request | provenance | |
4+
| tests.py:1:35:1:41 | ControlFlowNode for request | tests.py:24:9:24:15 | ControlFlowNode for request | provenance | |
5+
| tests.py:1:35:1:41 | ControlFlowNode for request | tests.py:36:9:36:15 | ControlFlowNode for request | provenance | |
6+
| tests.py:1:35:1:41 | ControlFlowNode for request | tests.py:48:9:48:15 | ControlFlowNode for request | provenance | |
7+
| tests.py:1:35:1:41 | ControlFlowNode for request | tests.py:60:9:60:15 | ControlFlowNode for request | provenance | |
8+
| tests.py:1:35:1:41 | ControlFlowNode for request | tests.py:72:9:72:15 | ControlFlowNode for request | provenance | |
9+
| tests.py:12:5:12:13 | ControlFlowNode for file_path | tests.py:16:39:16:47 | ControlFlowNode for file_path | provenance | |
10+
| tests.py:12:17:12:23 | ControlFlowNode for request | tests.py:12:17:12:28 | ControlFlowNode for Attribute | provenance | |
11+
| tests.py:12:17:12:28 | ControlFlowNode for Attribute | tests.py:12:17:12:49 | ControlFlowNode for Attribute() | provenance | |
12+
| tests.py:12:17:12:49 | ControlFlowNode for Attribute() | tests.py:12:5:12:13 | ControlFlowNode for file_path | provenance | |
13+
| tests.py:24:5:24:5 | ControlFlowNode for r | tests.py:28:43:28:43 | ControlFlowNode for r | provenance | |
14+
| tests.py:24:9:24:15 | ControlFlowNode for request | tests.py:24:9:24:20 | ControlFlowNode for Attribute | provenance | |
15+
| tests.py:24:9:24:20 | ControlFlowNode for Attribute | tests.py:24:9:24:33 | ControlFlowNode for Attribute() | provenance | |
16+
| tests.py:24:9:24:33 | ControlFlowNode for Attribute() | tests.py:24:5:24:5 | ControlFlowNode for r | provenance | |
17+
| tests.py:36:5:36:5 | ControlFlowNode for r | tests.py:40:43:40:43 | ControlFlowNode for r | provenance | |
18+
| tests.py:36:9:36:15 | ControlFlowNode for request | tests.py:36:9:36:20 | ControlFlowNode for Attribute | provenance | |
19+
| tests.py:36:9:36:20 | ControlFlowNode for Attribute | tests.py:36:9:36:33 | ControlFlowNode for Attribute() | provenance | |
20+
| tests.py:36:9:36:33 | ControlFlowNode for Attribute() | tests.py:36:5:36:5 | ControlFlowNode for r | provenance | |
21+
| tests.py:48:5:48:5 | ControlFlowNode for r | tests.py:52:43:52:43 | ControlFlowNode for r | provenance | |
22+
| tests.py:48:9:48:15 | ControlFlowNode for request | tests.py:48:9:48:20 | ControlFlowNode for Attribute | provenance | |
23+
| tests.py:48:9:48:20 | ControlFlowNode for Attribute | tests.py:48:9:48:33 | ControlFlowNode for Attribute() | provenance | |
24+
| tests.py:48:9:48:33 | ControlFlowNode for Attribute() | tests.py:48:5:48:5 | ControlFlowNode for r | provenance | |
25+
| tests.py:60:5:60:5 | ControlFlowNode for r | tests.py:64:43:64:43 | ControlFlowNode for r | provenance | |
26+
| tests.py:60:9:60:15 | ControlFlowNode for request | tests.py:60:9:60:20 | ControlFlowNode for Attribute | provenance | |
27+
| tests.py:60:9:60:20 | ControlFlowNode for Attribute | tests.py:60:9:60:33 | ControlFlowNode for Attribute() | provenance | |
28+
| tests.py:60:9:60:33 | ControlFlowNode for Attribute() | tests.py:60:5:60:5 | ControlFlowNode for r | provenance | |
29+
| tests.py:72:5:72:5 | ControlFlowNode for r | tests.py:76:43:76:43 | ControlFlowNode for r | provenance | |
30+
| tests.py:72:9:72:15 | ControlFlowNode for request | tests.py:72:9:72:20 | ControlFlowNode for Attribute | provenance | |
31+
| tests.py:72:9:72:20 | ControlFlowNode for Attribute | tests.py:72:9:72:33 | ControlFlowNode for Attribute() | provenance | |
32+
| tests.py:72:9:72:33 | ControlFlowNode for Attribute() | tests.py:72:5:72:5 | ControlFlowNode for r | provenance | |
33+
nodes
34+
| tests.py:1:35:1:41 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
35+
| tests.py:1:35:1:41 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
36+
| tests.py:12:5:12:13 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path |
37+
| tests.py:12:17:12:23 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
38+
| tests.py:12:17:12:28 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
39+
| tests.py:12:17:12:49 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
40+
| tests.py:16:39:16:47 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path |
41+
| tests.py:24:5:24:5 | ControlFlowNode for r | semmle.label | ControlFlowNode for r |
42+
| tests.py:24:9:24:15 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
43+
| tests.py:24:9:24:20 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
44+
| tests.py:24:9:24:33 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
45+
| tests.py:28:43:28:43 | ControlFlowNode for r | semmle.label | ControlFlowNode for r |
46+
| tests.py:36:5:36:5 | ControlFlowNode for r | semmle.label | ControlFlowNode for r |
47+
| tests.py:36:9:36:15 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
48+
| tests.py:36:9:36:20 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
49+
| tests.py:36:9:36:33 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
50+
| tests.py:40:43:40:43 | ControlFlowNode for r | semmle.label | ControlFlowNode for r |
51+
| tests.py:48:5:48:5 | ControlFlowNode for r | semmle.label | ControlFlowNode for r |
52+
| tests.py:48:9:48:15 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
53+
| tests.py:48:9:48:20 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
54+
| tests.py:48:9:48:33 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
55+
| tests.py:52:43:52:43 | ControlFlowNode for r | semmle.label | ControlFlowNode for r |
56+
| tests.py:60:5:60:5 | ControlFlowNode for r | semmle.label | ControlFlowNode for r |
57+
| tests.py:60:9:60:15 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
58+
| tests.py:60:9:60:20 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
59+
| tests.py:60:9:60:33 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
60+
| tests.py:64:43:64:43 | ControlFlowNode for r | semmle.label | ControlFlowNode for r |
61+
| tests.py:72:5:72:5 | ControlFlowNode for r | semmle.label | ControlFlowNode for r |
62+
| tests.py:72:9:72:15 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
63+
| tests.py:72:9:72:20 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
64+
| tests.py:72:9:72:33 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
65+
| tests.py:76:43:76:43 | ControlFlowNode for r | semmle.label | ControlFlowNode for r |
66+
subpaths
67+
#select
68+
| tests.py:16:39:16:47 | ControlFlowNode for file_path | tests.py:1:35:1:41 | ControlFlowNode for ImportMember | tests.py:16:39:16:47 | ControlFlowNode for file_path | This $@ can reach a $@. | tests.py:1:35:1:41 | ControlFlowNode for ImportMember | user-provided value | tests.py:16:39:16:47 | ControlFlowNode for file_path | costly Unicode normalization operation |
69+
| tests.py:28:43:28:43 | ControlFlowNode for r | tests.py:1:35:1:41 | ControlFlowNode for ImportMember | tests.py:28:43:28:43 | ControlFlowNode for r | This $@ can reach a $@. | tests.py:1:35:1:41 | ControlFlowNode for ImportMember | user-provided value | tests.py:28:43:28:43 | ControlFlowNode for r | costly Unicode normalization operation |
70+
| tests.py:40:43:40:43 | ControlFlowNode for r | tests.py:1:35:1:41 | ControlFlowNode for ImportMember | tests.py:40:43:40:43 | ControlFlowNode for r | This $@ can reach a $@. | tests.py:1:35:1:41 | ControlFlowNode for ImportMember | user-provided value | tests.py:40:43:40:43 | ControlFlowNode for r | costly Unicode normalization operation |
71+
| tests.py:52:43:52:43 | ControlFlowNode for r | tests.py:1:35:1:41 | ControlFlowNode for ImportMember | tests.py:52:43:52:43 | ControlFlowNode for r | This $@ can reach a $@. | tests.py:1:35:1:41 | ControlFlowNode for ImportMember | user-provided value | tests.py:52:43:52:43 | ControlFlowNode for r | costly Unicode normalization operation |
72+
| tests.py:64:43:64:43 | ControlFlowNode for r | tests.py:1:35:1:41 | ControlFlowNode for ImportMember | tests.py:64:43:64:43 | ControlFlowNode for r | This $@ can reach a $@. | tests.py:1:35:1:41 | ControlFlowNode for ImportMember | user-provided value | tests.py:64:43:64:43 | ControlFlowNode for r | costly Unicode normalization operation |
73+
| tests.py:76:43:76:43 | ControlFlowNode for r | tests.py:1:35:1:41 | ControlFlowNode for ImportMember | tests.py:76:43:76:43 | ControlFlowNode for r | This $@ can reach a $@. | tests.py:1:35:1:41 | ControlFlowNode for ImportMember | user-provided value | tests.py:76:43:76:43 | ControlFlowNode for r | costly Unicode normalization operation |
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
experimental/Security/CWE-770/UnicodeDoS.ql
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
from flask import Flask, jsonify, request
2+
import unicodedata
3+
4+
app = Flask(__name__)
5+
6+
STATIC_DIR = "/home/unknown/"
7+
8+
9+
@app.route("/bad_1")
10+
def bad_1():
11+
# User controlled data
12+
file_path = request.args.get("file_path", "")
13+
14+
# Normalize the file path using NFKC Unicode normalization
15+
return (
16+
unicodedata.normalize("NFKC", file_path),
17+
200,
18+
{"Content-Type": "application/octet-stream"},
19+
)
20+
21+
22+
@app.route("/bad_2")
23+
def bad_2():
24+
r = request.args.get("r", "")
25+
26+
if len(r) >= 10:
27+
# Normalize the r using NFKD Unicode normalization
28+
r = unicodedata.normalize("NFKD", r)
29+
return r, 200, {"Content-Type": "application/octet-stream"}
30+
else:
31+
return jsonify({"error": "File not found"}), 404
32+
33+
34+
@app.route("/bad_3")
35+
def bad_3():
36+
r = request.args.get("r", "")
37+
length = len(r)
38+
if length >= 1_000:
39+
# Normalize the r using NFKD Unicode normalization
40+
r = unicodedata.normalize("NFKD", r)
41+
return r, 200, {"Content-Type": "application/octet-stream"}
42+
else:
43+
return jsonify({"error": "File not found"}), 404
44+
45+
46+
@app.route("/bad_4")
47+
def bad_4():
48+
r = request.args.get("r", "")
49+
length = len(r)
50+
if 1_000 <= length:
51+
# Normalize the r using NFKD Unicode normalization
52+
r = unicodedata.normalize("NFKD", r)
53+
return r, 200, {"Content-Type": "application/octet-stream"}
54+
else:
55+
return jsonify({"error": "File not found"}), 404
56+
57+
58+
@app.route("/bad_5")
59+
def bad_5():
60+
r = request.args.get("r", "")
61+
length = len(r)
62+
if not length < 1_000:
63+
# Normalize the r using NFKD Unicode normalization
64+
r = unicodedata.normalize("NFKD", r)
65+
return r, 200, {"Content-Type": "application/octet-stream"}
66+
else:
67+
return jsonify({"error": "File not found"}), 404
68+
69+
70+
@app.route("/bad_6")
71+
def bad_6():
72+
r = request.args.get("r", "")
73+
length = len(r)
74+
if not 1_000 > length:
75+
# Normalize the r using NFKD Unicode normalization
76+
r = unicodedata.normalize("NFKD", r)
77+
return r, 200, {"Content-Type": "application/octet-stream"}
78+
else:
79+
return jsonify({"error": "File not found"}), 404
80+
81+
82+
@app.route("/good_1")
83+
def good_1():
84+
r = request.args.get("r", "")
85+
86+
if len(r) <= 1_000:
87+
# Normalize the r using NFKD Unicode normalization
88+
r = unicodedata.normalize("NFKD", r)
89+
return r, 200, {"Content-Type": "application/octet-stream"}
90+
else:
91+
return jsonify({"error": "File not found"}), 404
92+
93+
94+
@app.route("/good_2")
95+
def good_2():
96+
r = request.args.get("r", "")
97+
MAX_LENGTH = 1_000
98+
length = len(r)
99+
if length <= MAX_LENGTH:
100+
# Normalize the r using NFKD Unicode normalization
101+
r = unicodedata.normalize("NFKD", r)
102+
return r, 200, {"Content-Type": "application/octet-stream"}
103+
else:
104+
return jsonify({"error": "File not found"}), 404
105+
106+
@app.route("/good_3")
107+
def good_3():
108+
r = request.args.get("r", "")
109+
MAX_LENGTH = 1_000
110+
length = len(r)
111+
if not length >= MAX_LENGTH:
112+
# Normalize the r using NFKD Unicode normalization
113+
r = unicodedata.normalize("NFKD", r)
114+
return r, 200, {"Content-Type": "application/octet-stream"}
115+
else:
116+
return jsonify({"error": "File not found"}), 404
117+
118+
119+
@app.route("/good_4")
120+
def good_4():
121+
r = request.args.get("r", "")
122+
MAX_LENGTH = 1_000
123+
length = len(r)
124+
if not MAX_LENGTH <= length:
125+
# Normalize the r using NFKD Unicode normalization
126+
r = unicodedata.normalize("NFKD", r)
127+
return r, 200, {"Content-Type": "application/octet-stream"}
128+
else:
129+
return jsonify({"error": "File not found"}), 404

0 commit comments

Comments
 (0)