Skip to content

Commit

Permalink
Merge pull request #15319 from Sim4n6/main
Browse files Browse the repository at this point in the history
[Python] Add Unicode DoS (qhelp, tests and the query)
  • Loading branch information
yoff authored Mar 19, 2024
2 parents d78efdb + 1af8167 commit f025430
Show file tree
Hide file tree
Showing 7 changed files with 388 additions and 0 deletions.
38 changes: 38 additions & 0 deletions python/ql/src/experimental/Security/CWE-770/UnicodeDoS.qhelp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>

<overview>
<p>When a remote user-controlled data can reach a costly Unicode normalization with either form, NFKC or NFKD, an attack such as the One Million Unicode Characters, could lead to a denial of service on Windows OS.</p>

<p>And, with the use of special Unicode characters, like U+2100 (℀) or U+2105 (℅), the payload size could be tripled after the compatibility normalization.</p>

</overview>
<recommendation>

<p>Ensure limiting the size of any incoming data that would go through a costly operations, including a Windows Unicode normalization with NFKC or NFKD. Such a recommandation would avoid a potential denial of service.</p>

</recommendation>

<example>
<p>
In this example a simple user-controlled data reaches a Unicode normalization with the form "NFKC".
</p>

<sample src="bad.py" />

<p>To fix this vulnerability, we need restrain the size of the user input.</p>

<p>For example, we can use the <code>len()</code> builtin function to limit the size of the user input.</p>

<sample src="good.py" />

</example>
<references>

<li>
<a href="https://hackerone.com/reports/2258758">CVE-2023-46695: Potential denial of service vulnerability in Django UsernameField on Windows.</a>
</li>
</references>
</qhelp>
114 changes: 114 additions & 0 deletions python/ql/src/experimental/Security/CWE-770/UnicodeDoS.ql
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/**
* @name Denial of Service using Unicode Characters
* @description A remote user-controlled data can reach a costly Unicode normalization with either form NFKC or NFKD. On Windows OS, with an attack such as the One Million Unicode Characters, this could lead to a denial of service. And, with the use of special Unicode characters, like U+2100 (℀) or U+2105 (℅), the payload size could be tripled.
* @kind path-problem
* @id py/unicode-dos
* @precision high
* @problem.severity error
* @tags security
* experimental
* external/cwe/cwe-770
*/

import python
import semmle.python.ApiGraphs
import semmle.python.Concepts
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.internal.DataFlowPublic
import semmle.python.dataflow.new.RemoteFlowSources

// The Unicode compatibility normalization calls from unicodedata, unidecode, pyunormalize
// and textnorm modules. The use of argIdx is to constraint the argument being normalized.
class UnicodeCompatibilityNormalize extends API::CallNode {
int argIdx;

UnicodeCompatibilityNormalize() {
(
this = API::moduleImport("unicodedata").getMember("normalize").getACall() and
this.getParameter(0).getAValueReachingSink().asExpr().(StrConst).getText() in ["NFKC", "NFKD"]
or
this = API::moduleImport("pyunormalize").getMember("normalize").getACall() and
this.getParameter(0).getAValueReachingSink().asExpr().(StrConst).getText() in ["NFKC", "NFKD"]
) and
argIdx = 1
or
(
this = API::moduleImport("textnorm").getMember("normalize_unicode").getACall() and
this.getParameter(1).getAValueReachingSink().asExpr().(StrConst).getText() in ["NFKC", "NFKD"]
or
this = API::moduleImport("unidecode").getMember("unidecode").getACall()
or
this = API::moduleImport("pyunormalize").getMember(["NFKC", "NFKD"]).getACall()
) and
argIdx = 0
}

DataFlow::Node getPathArg() { result = this.getArg(argIdx) }
}

predicate underAValue(DataFlow::GuardNode g, ControlFlowNode node, boolean branch) {
exists(CompareNode cn | cn = g |
exists(API::CallNode lenCall, Cmpop op, Node n |
lenCall = n.getALocalSource() and
(
// arg <= LIMIT OR arg < LIMIT
(op instanceof LtE or op instanceof Lt) and
branch = true and
cn.operands(n.asCfgNode(), op, _)
or
// LIMIT >= arg OR LIMIT > arg
(op instanceof GtE or op instanceof Gt) and
branch = true and
cn.operands(_, op, n.asCfgNode())
or
// not arg >= LIMIT OR not arg > LIMIT
(op instanceof GtE or op instanceof Gt) and
branch = false and
cn.operands(n.asCfgNode(), op, _)
or
// not LIMIT <= arg OR not LIMIT < arg
(op instanceof LtE or op instanceof Lt) and
branch = false and
cn.operands(_, op, n.asCfgNode())
)
|
lenCall = API::builtin("len").getACall() and
node = lenCall.getArg(0).asCfgNode()
) //and
//not cn.getLocation().getFile().inStdlib()
)
}

private module UnicodeDoSConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }

predicate isBarrier(DataFlow::Node sanitizer) {
// underAValue is a check to ensure that the length of the user-provided value is limited to a certain amount
sanitizer = DataFlow::BarrierGuard<underAValue/3>::getABarrierNode()
}

predicate isSink(DataFlow::Node sink) {
// Any call to the Unicode compatibility normalization is a costly operation
sink = any(UnicodeCompatibilityNormalize ucn).getPathArg()
or
// The call to secure_filename() from pallets/werkzeug uses the Unicode compatibility normalization
// under the hood, https://github.com/pallets/werkzeug/blob/d3dd65a27388fbd39d146caacf2563639ba622f0/src/werkzeug/utils.py#L218
sink = API::moduleImport("werkzeug").getMember("secure_filename").getACall().getArg(_)
or
sink =
API::moduleImport("werkzeug")
.getMember("utils")
.getMember("secure_filename")
.getACall()
.getArg(_)
}
}

module UnicodeDoSFlow = TaintTracking::Global<UnicodeDoSConfig>;

import UnicodeDoSFlow::PathGraph

from UnicodeDoSFlow::PathNode source, UnicodeDoSFlow::PathNode sink
where UnicodeDoSFlow::flowPath(source, sink)
select sink.getNode(), source, sink, "This $@ can reach a $@.", source.getNode(),
"user-provided value", sink.getNode(), "costly Unicode normalization operation"
17 changes: 17 additions & 0 deletions python/ql/src/experimental/Security/CWE-770/bad.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from flask import Flask, jsonify, request
import unicodedata

app = Flask(__name__)


@app.route("/bad_1")
def bad_1():
# User controlled data
file_path = request.args.get("file_path", "")

# Normalize the file path using NFKC Unicode normalization
return (
unicodedata.normalize("NFKC", file_path),
200,
{"Content-Type": "application/octet-stream"},
)
16 changes: 16 additions & 0 deletions python/ql/src/experimental/Security/CWE-770/good.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from flask import Flask, jsonify, request
import unicodedata

app = Flask(__name__)


@app.route("/good_1")
def good_1():
r = request.args.get("file_path", "")

if len(r) <= 1_000:
# Normalize the r using NFKD Unicode normalization
r = unicodedata.normalize("NFKD", r)
return r, 200, {"Content-Type": "application/octet-stream"}
else:
return jsonify({"error": "File not found"}), 404
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
edges
| tests.py:1:35:1:41 | ControlFlowNode for ImportMember | tests.py:1:35:1:41 | ControlFlowNode for request | provenance | |
| tests.py:1:35:1:41 | ControlFlowNode for request | tests.py:12:17:12:23 | ControlFlowNode for request | provenance | |
| tests.py:1:35:1:41 | ControlFlowNode for request | tests.py:24:9:24:15 | ControlFlowNode for request | provenance | |
| tests.py:1:35:1:41 | ControlFlowNode for request | tests.py:36:9:36:15 | ControlFlowNode for request | provenance | |
| tests.py:1:35:1:41 | ControlFlowNode for request | tests.py:48:9:48:15 | ControlFlowNode for request | provenance | |
| tests.py:1:35:1:41 | ControlFlowNode for request | tests.py:60:9:60:15 | ControlFlowNode for request | provenance | |
| tests.py:1:35:1:41 | ControlFlowNode for request | tests.py:72:9:72:15 | ControlFlowNode for request | provenance | |
| tests.py:12:5:12:13 | ControlFlowNode for file_path | tests.py:16:39:16:47 | ControlFlowNode for file_path | provenance | |
| tests.py:12:17:12:23 | ControlFlowNode for request | tests.py:12:17:12:28 | ControlFlowNode for Attribute | provenance | |
| tests.py:12:17:12:28 | ControlFlowNode for Attribute | tests.py:12:17:12:49 | ControlFlowNode for Attribute() | provenance | |
| tests.py:12:17:12:49 | ControlFlowNode for Attribute() | tests.py:12:5:12:13 | ControlFlowNode for file_path | provenance | |
| tests.py:24:5:24:5 | ControlFlowNode for r | tests.py:28:43:28:43 | ControlFlowNode for r | provenance | |
| tests.py:24:9:24:15 | ControlFlowNode for request | tests.py:24:9:24:20 | ControlFlowNode for Attribute | provenance | |
| tests.py:24:9:24:20 | ControlFlowNode for Attribute | tests.py:24:9:24:33 | ControlFlowNode for Attribute() | provenance | |
| tests.py:24:9:24:33 | ControlFlowNode for Attribute() | tests.py:24:5:24:5 | ControlFlowNode for r | provenance | |
| tests.py:36:5:36:5 | ControlFlowNode for r | tests.py:40:43:40:43 | ControlFlowNode for r | provenance | |
| tests.py:36:9:36:15 | ControlFlowNode for request | tests.py:36:9:36:20 | ControlFlowNode for Attribute | provenance | |
| tests.py:36:9:36:20 | ControlFlowNode for Attribute | tests.py:36:9:36:33 | ControlFlowNode for Attribute() | provenance | |
| tests.py:36:9:36:33 | ControlFlowNode for Attribute() | tests.py:36:5:36:5 | ControlFlowNode for r | provenance | |
| tests.py:48:5:48:5 | ControlFlowNode for r | tests.py:52:43:52:43 | ControlFlowNode for r | provenance | |
| tests.py:48:9:48:15 | ControlFlowNode for request | tests.py:48:9:48:20 | ControlFlowNode for Attribute | provenance | |
| tests.py:48:9:48:20 | ControlFlowNode for Attribute | tests.py:48:9:48:33 | ControlFlowNode for Attribute() | provenance | |
| tests.py:48:9:48:33 | ControlFlowNode for Attribute() | tests.py:48:5:48:5 | ControlFlowNode for r | provenance | |
| tests.py:60:5:60:5 | ControlFlowNode for r | tests.py:64:43:64:43 | ControlFlowNode for r | provenance | |
| tests.py:60:9:60:15 | ControlFlowNode for request | tests.py:60:9:60:20 | ControlFlowNode for Attribute | provenance | |
| tests.py:60:9:60:20 | ControlFlowNode for Attribute | tests.py:60:9:60:33 | ControlFlowNode for Attribute() | provenance | |
| tests.py:60:9:60:33 | ControlFlowNode for Attribute() | tests.py:60:5:60:5 | ControlFlowNode for r | provenance | |
| tests.py:72:5:72:5 | ControlFlowNode for r | tests.py:76:43:76:43 | ControlFlowNode for r | provenance | |
| tests.py:72:9:72:15 | ControlFlowNode for request | tests.py:72:9:72:20 | ControlFlowNode for Attribute | provenance | |
| tests.py:72:9:72:20 | ControlFlowNode for Attribute | tests.py:72:9:72:33 | ControlFlowNode for Attribute() | provenance | |
| tests.py:72:9:72:33 | ControlFlowNode for Attribute() | tests.py:72:5:72:5 | ControlFlowNode for r | provenance | |
nodes
| tests.py:1:35:1:41 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
| tests.py:1:35:1:41 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| tests.py:12:5:12:13 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path |
| tests.py:12:17:12:23 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| tests.py:12:17:12:28 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| tests.py:12:17:12:49 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tests.py:16:39:16:47 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path |
| tests.py:24:5:24:5 | ControlFlowNode for r | semmle.label | ControlFlowNode for r |
| tests.py:24:9:24:15 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| tests.py:24:9:24:20 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| tests.py:24:9:24:33 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tests.py:28:43:28:43 | ControlFlowNode for r | semmle.label | ControlFlowNode for r |
| tests.py:36:5:36:5 | ControlFlowNode for r | semmle.label | ControlFlowNode for r |
| tests.py:36:9:36:15 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| tests.py:36:9:36:20 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| tests.py:36:9:36:33 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tests.py:40:43:40:43 | ControlFlowNode for r | semmle.label | ControlFlowNode for r |
| tests.py:48:5:48:5 | ControlFlowNode for r | semmle.label | ControlFlowNode for r |
| tests.py:48:9:48:15 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| tests.py:48:9:48:20 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| tests.py:48:9:48:33 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tests.py:52:43:52:43 | ControlFlowNode for r | semmle.label | ControlFlowNode for r |
| tests.py:60:5:60:5 | ControlFlowNode for r | semmle.label | ControlFlowNode for r |
| tests.py:60:9:60:15 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| tests.py:60:9:60:20 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| tests.py:60:9:60:33 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tests.py:64:43:64:43 | ControlFlowNode for r | semmle.label | ControlFlowNode for r |
| tests.py:72:5:72:5 | ControlFlowNode for r | semmle.label | ControlFlowNode for r |
| tests.py:72:9:72:15 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| tests.py:72:9:72:20 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| tests.py:72:9:72:33 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tests.py:76:43:76:43 | ControlFlowNode for r | semmle.label | ControlFlowNode for r |
subpaths
#select
| tests.py:16:39:16:47 | ControlFlowNode for file_path | tests.py:1:35:1:41 | ControlFlowNode for ImportMember | tests.py:16:39:16:47 | ControlFlowNode for file_path | This $@ can reach a $@. | tests.py:1:35:1:41 | ControlFlowNode for ImportMember | user-provided value | tests.py:16:39:16:47 | ControlFlowNode for file_path | costly Unicode normalization operation |
| tests.py:28:43:28:43 | ControlFlowNode for r | tests.py:1:35:1:41 | ControlFlowNode for ImportMember | tests.py:28:43:28:43 | ControlFlowNode for r | This $@ can reach a $@. | tests.py:1:35:1:41 | ControlFlowNode for ImportMember | user-provided value | tests.py:28:43:28:43 | ControlFlowNode for r | costly Unicode normalization operation |
| tests.py:40:43:40:43 | ControlFlowNode for r | tests.py:1:35:1:41 | ControlFlowNode for ImportMember | tests.py:40:43:40:43 | ControlFlowNode for r | This $@ can reach a $@. | tests.py:1:35:1:41 | ControlFlowNode for ImportMember | user-provided value | tests.py:40:43:40:43 | ControlFlowNode for r | costly Unicode normalization operation |
| tests.py:52:43:52:43 | ControlFlowNode for r | tests.py:1:35:1:41 | ControlFlowNode for ImportMember | tests.py:52:43:52:43 | ControlFlowNode for r | This $@ can reach a $@. | tests.py:1:35:1:41 | ControlFlowNode for ImportMember | user-provided value | tests.py:52:43:52:43 | ControlFlowNode for r | costly Unicode normalization operation |
| tests.py:64:43:64:43 | ControlFlowNode for r | tests.py:1:35:1:41 | ControlFlowNode for ImportMember | tests.py:64:43:64:43 | ControlFlowNode for r | This $@ can reach a $@. | tests.py:1:35:1:41 | ControlFlowNode for ImportMember | user-provided value | tests.py:64:43:64:43 | ControlFlowNode for r | costly Unicode normalization operation |
| tests.py:76:43:76:43 | ControlFlowNode for r | tests.py:1:35:1:41 | ControlFlowNode for ImportMember | tests.py:76:43:76:43 | ControlFlowNode for r | This $@ can reach a $@. | tests.py:1:35:1:41 | ControlFlowNode for ImportMember | user-provided value | tests.py:76:43:76:43 | ControlFlowNode for r | costly Unicode normalization operation |
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
experimental/Security/CWE-770/UnicodeDoS.ql
129 changes: 129 additions & 0 deletions python/ql/test/experimental/query-tests/Security/CWE-770/tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
from flask import Flask, jsonify, request
import unicodedata

app = Flask(__name__)

STATIC_DIR = "/home/unknown/"


@app.route("/bad_1")
def bad_1():
# User controlled data
file_path = request.args.get("file_path", "")

# Normalize the file path using NFKC Unicode normalization
return (
unicodedata.normalize("NFKC", file_path),
200,
{"Content-Type": "application/octet-stream"},
)


@app.route("/bad_2")
def bad_2():
r = request.args.get("r", "")

if len(r) >= 10:
# Normalize the r using NFKD Unicode normalization
r = unicodedata.normalize("NFKD", r)
return r, 200, {"Content-Type": "application/octet-stream"}
else:
return jsonify({"error": "File not found"}), 404


@app.route("/bad_3")
def bad_3():
r = request.args.get("r", "")
length = len(r)
if length >= 1_000:
# Normalize the r using NFKD Unicode normalization
r = unicodedata.normalize("NFKD", r)
return r, 200, {"Content-Type": "application/octet-stream"}
else:
return jsonify({"error": "File not found"}), 404


@app.route("/bad_4")
def bad_4():
r = request.args.get("r", "")
length = len(r)
if 1_000 <= length:
# Normalize the r using NFKD Unicode normalization
r = unicodedata.normalize("NFKD", r)
return r, 200, {"Content-Type": "application/octet-stream"}
else:
return jsonify({"error": "File not found"}), 404


@app.route("/bad_5")
def bad_5():
r = request.args.get("r", "")
length = len(r)
if not length < 1_000:
# Normalize the r using NFKD Unicode normalization
r = unicodedata.normalize("NFKD", r)
return r, 200, {"Content-Type": "application/octet-stream"}
else:
return jsonify({"error": "File not found"}), 404


@app.route("/bad_6")
def bad_6():
r = request.args.get("r", "")
length = len(r)
if not 1_000 > length:
# Normalize the r using NFKD Unicode normalization
r = unicodedata.normalize("NFKD", r)
return r, 200, {"Content-Type": "application/octet-stream"}
else:
return jsonify({"error": "File not found"}), 404


@app.route("/good_1")
def good_1():
r = request.args.get("r", "")

if len(r) <= 1_000:
# Normalize the r using NFKD Unicode normalization
r = unicodedata.normalize("NFKD", r)
return r, 200, {"Content-Type": "application/octet-stream"}
else:
return jsonify({"error": "File not found"}), 404


@app.route("/good_2")
def good_2():
r = request.args.get("r", "")
MAX_LENGTH = 1_000
length = len(r)
if length <= MAX_LENGTH:
# Normalize the r using NFKD Unicode normalization
r = unicodedata.normalize("NFKD", r)
return r, 200, {"Content-Type": "application/octet-stream"}
else:
return jsonify({"error": "File not found"}), 404

@app.route("/good_3")
def good_3():
r = request.args.get("r", "")
MAX_LENGTH = 1_000
length = len(r)
if not length >= MAX_LENGTH:
# Normalize the r using NFKD Unicode normalization
r = unicodedata.normalize("NFKD", r)
return r, 200, {"Content-Type": "application/octet-stream"}
else:
return jsonify({"error": "File not found"}), 404


@app.route("/good_4")
def good_4():
r = request.args.get("r", "")
MAX_LENGTH = 1_000
length = len(r)
if not MAX_LENGTH <= length:
# Normalize the r using NFKD Unicode normalization
r = unicodedata.normalize("NFKD", r)
return r, 200, {"Content-Type": "application/octet-stream"}
else:
return jsonify({"error": "File not found"}), 404

0 comments on commit f025430

Please sign in to comment.