Skip to content

Commit 6df1f5a

Browse files
committed
Python: missing steps for repro
- API graph subscript operator to understand comprehensions - captureJumpStep to not require definig value to exist - stdlib modelling: finditer returns list of match objects - adjust taint output of finditer - adjust `ReMatchMethodsSummary.getACall`
1 parent dff02cf commit 6df1f5a

File tree

4 files changed

+44
-22
lines changed

4 files changed

+44
-22
lines changed

python/ql/lib/semmle/python/ApiGraphs.qll

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -843,6 +843,13 @@ module API {
843843
ref = pred.getSubscript(_) and
844844
ref.asCfgNode().isLoad()
845845
or
846+
// Subscript via comprehension
847+
lbl = Label::subscript() and
848+
exists(PY::Comp comp |
849+
pred.asExpr() = comp.getIterable() and
850+
ref.asExpr() = comp.getNthInnerLoop(0).getTarget()
851+
)
852+
or
846853
// Subclassing a node
847854
lbl = Label::subclass() and
848855
exists(PY::ClassExpr clsExpr, DataFlow::Node superclass | pred.flowsTo(superclass) |

python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,7 @@ module TypeTrackingInput implements Shared::TypeTrackingInput {
304304
var.hasDefiningNode(def)
305305
|
306306
nodeTo.(DataFlowPublic::ScopeEntryDefinitionNode).getDefinition() = e and
307-
nodeFrom.asCfgNode() = def.getValue() and
307+
nodeFrom.asCfgNode() = def and
308308
var.getScope().getScope*() = nodeFrom.getScope()
309309
)
310310
}

python/ql/lib/semmle/python/frameworks/Stdlib.qll

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3284,6 +3284,14 @@ module StdlibPrivate {
32843284
}
32853285
}
32863286

3287+
private API::Node re(string name) {
3288+
name = "re.Match" and
3289+
result = API::moduleImport("re")
3290+
or
3291+
name = "compiled re.Match" and
3292+
result = any(RePatternSummary c).getACall().(API::CallNode).getReturn()
3293+
}
3294+
32873295
/**
32883296
* A flow summary for methods returning a `re.Match` object
32893297
*
@@ -3293,17 +3301,9 @@ module StdlibPrivate {
32933301
ReMatchSummary() { this = ["re.Match", "compiled re.Match"] }
32943302

32953303
override DataFlow::CallCfgNode getACall() {
3296-
this = "re.Match" and
3297-
result = API::moduleImport("re").getMember(["match", "search", "fullmatch"]).getACall()
3298-
or
3299-
this = "compiled re.Match" and
3300-
result =
3301-
any(RePatternSummary c)
3302-
.getACall()
3303-
.(API::CallNode)
3304-
.getReturn()
3305-
.getMember(["match", "search", "fullmatch"])
3306-
.getACall()
3304+
exists(API::Node re | re = re(this) |
3305+
result = re.getMember(["match", "search", "fullmatch"]).getACall()
3306+
)
33073307
}
33083308

33093309
override DataFlow::ArgumentNode getACallback() { none() }
@@ -3340,6 +3340,12 @@ module StdlibPrivate {
33403340
}
33413341
}
33423342

3343+
private API::Node match() {
3344+
result = any(ReMatchSummary c).getACall().(API::CallNode).getReturn()
3345+
or
3346+
result = re(_).getMember("finditer").getReturn().getASubscript()
3347+
}
3348+
33433349
/**
33443350
* A flow summary for methods on a `re.Match` object
33453351
*
@@ -3353,15 +3359,7 @@ module StdlibPrivate {
33533359
methodName in ["expand", "group", "groups", "groupdict"]
33543360
}
33553361

3356-
override DataFlow::CallCfgNode getACall() {
3357-
result =
3358-
any(ReMatchSummary c)
3359-
.getACall()
3360-
.(API::CallNode)
3361-
.getReturn()
3362-
.getMember(methodName)
3363-
.getACall()
3364-
}
3362+
override DataFlow::CallCfgNode getACall() { result = match().getMember(methodName).getACall() }
33653363

33663364
override DataFlow::ArgumentNode getACallback() { none() }
33673365

@@ -3447,6 +3445,9 @@ module StdlibPrivate {
34473445
or
34483446
methodName = "subn" and
34493447
output = "ReturnValue.TupleElement[0]"
3448+
or
3449+
methodName = "finditer" and
3450+
output = "ReturnValue.ListElement.Attribute[string]"
34503451
)
34513452
)
34523453
or

python/ql/test/library-tests/dataflow/tainttracking/DSVW/repro.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import re
22
import urllib.parse
33
import sys
4+
import http.client
45

56
def generator_dict_re_combo():
67
query = TAINTED_STRING
@@ -28,4 +29,17 @@ def parse_qs():
2829

2930
params = urllib.parse.parse_qs(query)
3031

31-
ensure_tainted(params) # $ tainted
32+
ensure_tainted(params) # $ tainted
33+
34+
HTML_PREFIX = """<!DOCTYPE html>"""
35+
36+
def flat():
37+
self_path = TAINTED_STRING
38+
39+
path, query = self_path.split('?', 1) if '?' in self_path else (self_path, "")
40+
code, content, params, cursor = http.client.OK, HTML_PREFIX, dict((match.group("parameter"), urllib.parse.unquote(','.join(re.findall(r"(?:\A|[?&])%s=([^&]+)" % match.group("parameter"), query)))) for match in re.finditer(r"((\A|[?&])(?P<parameter>[\w\[\]]+)=)([^&]+)", query)), "Cursor"
41+
42+
print(code)
43+
print(content)
44+
ensure_tainted(params) # $ tainted
45+
print(cursor)

0 commit comments

Comments
 (0)