diff --git a/python/ql/lib/semmle/python/ApiGraphs.qll b/python/ql/lib/semmle/python/ApiGraphs.qll index 1ce04852f3ad..4385259ca9b3 100644 --- a/python/ql/lib/semmle/python/ApiGraphs.qll +++ b/python/ql/lib/semmle/python/ApiGraphs.qll @@ -843,6 +843,13 @@ module API { ref = pred.getSubscript(_) and ref.asCfgNode().isLoad() or + // Subscript via comprehension + lbl = Label::subscript() and + exists(PY::Comp comp | + pred.asExpr() = comp.getIterable() and + ref.asExpr() = comp.getNthInnerLoop(0).getTarget() + ) + or // Subclassing a node lbl = Label::subclass() and exists(PY::ClassExpr clsExpr, DataFlow::Node superclass | pred.flowsTo(superclass) | diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll index f3e4ff40800b..415028ad8277 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll @@ -304,7 +304,7 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { var.hasDefiningNode(def) | nodeTo.(DataFlowPublic::ScopeEntryDefinitionNode).getDefinition() = e and - nodeFrom.asCfgNode() = def.getValue() and + nodeFrom.asCfgNode() = def and var.getScope().getScope*() = nodeFrom.getScope() ) } diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 02d5f488f022..43f546642ef0 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -3284,6 +3284,14 @@ module StdlibPrivate { } } + private API::Node re(string name) { + name = "re.Match" and + result = API::moduleImport("re") + or + name = "compiled re.Match" and + result = any(RePatternSummary c).getACall().(API::CallNode).getReturn() + } + /** * A flow summary for methods returning a `re.Match` object * @@ -3293,17 +3301,9 @@ module StdlibPrivate { ReMatchSummary() { this = ["re.Match", "compiled re.Match"] } override DataFlow::CallCfgNode getACall() { - this = "re.Match" and - result = API::moduleImport("re").getMember(["match", "search", "fullmatch"]).getACall() - or - this = "compiled re.Match" and - result = - any(RePatternSummary c) - .getACall() - .(API::CallNode) - .getReturn() - .getMember(["match", "search", "fullmatch"]) - .getACall() + exists(API::Node re | re = re(this) | + result = re.getMember(["match", "search", "fullmatch"]).getACall() + ) } override DataFlow::ArgumentNode getACallback() { none() } @@ -3340,6 +3340,12 @@ module StdlibPrivate { } } + private API::Node match() { + result = any(ReMatchSummary c).getACall().(API::CallNode).getReturn() + or + result = re(_).getMember("finditer").getReturn().getASubscript() + } + /** * A flow summary for methods on a `re.Match` object * @@ -3353,15 +3359,7 @@ module StdlibPrivate { methodName in ["expand", "group", "groups", "groupdict"] } - override DataFlow::CallCfgNode getACall() { - result = - any(ReMatchSummary c) - .getACall() - .(API::CallNode) - .getReturn() - .getMember(methodName) - .getACall() - } + override DataFlow::CallCfgNode getACall() { result = match().getMember(methodName).getACall() } override DataFlow::ArgumentNode getACallback() { none() } @@ -3447,6 +3445,9 @@ module StdlibPrivate { or methodName = "subn" and output = "ReturnValue.TupleElement[0]" + or + methodName = "finditer" and + output = "ReturnValue.ListElement.Attribute[string]" ) ) or diff --git a/python/ql/test/library-tests/dataflow/tainttracking/DSVW/repro.py b/python/ql/test/library-tests/dataflow/tainttracking/DSVW/repro.py index 082456ecc0ee..ae9761cefbfd 100644 --- a/python/ql/test/library-tests/dataflow/tainttracking/DSVW/repro.py +++ b/python/ql/test/library-tests/dataflow/tainttracking/DSVW/repro.py @@ -1,6 +1,7 @@ import re import urllib.parse import sys +import http.client def generator_dict_re_combo(): query = TAINTED_STRING @@ -28,4 +29,17 @@ def parse_qs(): params = urllib.parse.parse_qs(query) - ensure_tainted(params) # $ tainted \ No newline at end of file + ensure_tainted(params) # $ tainted + +HTML_PREFIX = """""" + +def flat(): + self_path = TAINTED_STRING + + path, query = self_path.split('?', 1) if '?' in self_path else (self_path, "") + code, content, params, cursor = http.client.OK, HTML_PREFIX, dict((match.group("parameter"), urllib.parse.unquote(','.join(re.findall(r"(?:\A|[?&])%s=([^&]+)" % match.group("parameter"), query)))) for match in re.finditer(r"((\A|[?&])(?P[\w\[\]]+)=)([^&]+)", query)), "Cursor" + + print(code) + print(content) + ensure_tainted(params) # $ tainted + print(cursor)