Skip to content

Commit

Permalink
Python: missing steps for repro
Browse files Browse the repository at this point in the history
- API graph subscript operator to understand comprehensions
- captureJumpStep to not require definig value to exist
- stdlib modelling: finditer returns list of match objects
  - adjust taint output of finditer
  - adjust `ReMatchMethodsSummary.getACall`
  • Loading branch information
yoff committed Oct 2, 2024
1 parent dff02cf commit 6df1f5a
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 22 deletions.
7 changes: 7 additions & 0 deletions python/ql/lib/semmle/python/ApiGraphs.qll
Original file line number Diff line number Diff line change
Expand Up @@ -843,6 +843,13 @@ module API {
ref = pred.getSubscript(_) and
ref.asCfgNode().isLoad()
or
// Subscript via comprehension
lbl = Label::subscript() and
exists(PY::Comp comp |
pred.asExpr() = comp.getIterable() and
ref.asExpr() = comp.getNthInnerLoop(0).getTarget()
)
or
// Subclassing a node
lbl = Label::subclass() and
exists(PY::ClassExpr clsExpr, DataFlow::Node superclass | pred.flowsTo(superclass) |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ module TypeTrackingInput implements Shared::TypeTrackingInput {
var.hasDefiningNode(def)
|
nodeTo.(DataFlowPublic::ScopeEntryDefinitionNode).getDefinition() = e and
nodeFrom.asCfgNode() = def.getValue() and
nodeFrom.asCfgNode() = def and
var.getScope().getScope*() = nodeFrom.getScope()
)
}
Expand Down
41 changes: 21 additions & 20 deletions python/ql/lib/semmle/python/frameworks/Stdlib.qll
Original file line number Diff line number Diff line change
Expand Up @@ -3284,6 +3284,14 @@ module StdlibPrivate {
}
}

private API::Node re(string name) {
name = "re.Match" and
result = API::moduleImport("re")
or
name = "compiled re.Match" and
result = any(RePatternSummary c).getACall().(API::CallNode).getReturn()
}

/**
* A flow summary for methods returning a `re.Match` object
*
Expand All @@ -3293,17 +3301,9 @@ module StdlibPrivate {
ReMatchSummary() { this = ["re.Match", "compiled re.Match"] }

override DataFlow::CallCfgNode getACall() {
this = "re.Match" and
result = API::moduleImport("re").getMember(["match", "search", "fullmatch"]).getACall()
or
this = "compiled re.Match" and
result =
any(RePatternSummary c)
.getACall()
.(API::CallNode)
.getReturn()
.getMember(["match", "search", "fullmatch"])
.getACall()
exists(API::Node re | re = re(this) |
result = re.getMember(["match", "search", "fullmatch"]).getACall()
)
}

override DataFlow::ArgumentNode getACallback() { none() }
Expand Down Expand Up @@ -3340,6 +3340,12 @@ module StdlibPrivate {
}
}

private API::Node match() {
result = any(ReMatchSummary c).getACall().(API::CallNode).getReturn()
or
result = re(_).getMember("finditer").getReturn().getASubscript()
}

/**
* A flow summary for methods on a `re.Match` object
*
Expand All @@ -3353,15 +3359,7 @@ module StdlibPrivate {
methodName in ["expand", "group", "groups", "groupdict"]
}

override DataFlow::CallCfgNode getACall() {
result =
any(ReMatchSummary c)
.getACall()
.(API::CallNode)
.getReturn()
.getMember(methodName)
.getACall()
}
override DataFlow::CallCfgNode getACall() { result = match().getMember(methodName).getACall() }

override DataFlow::ArgumentNode getACallback() { none() }

Expand Down Expand Up @@ -3447,6 +3445,9 @@ module StdlibPrivate {
or
methodName = "subn" and
output = "ReturnValue.TupleElement[0]"
or
methodName = "finditer" and
output = "ReturnValue.ListElement.Attribute[string]"
)
)
or
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import re
import urllib.parse
import sys
import http.client

def generator_dict_re_combo():
query = TAINTED_STRING
Expand Down Expand Up @@ -28,4 +29,17 @@ def parse_qs():

params = urllib.parse.parse_qs(query)

ensure_tainted(params) # $ tainted
ensure_tainted(params) # $ tainted

HTML_PREFIX = """<!DOCTYPE html>"""

def flat():
self_path = TAINTED_STRING

path, query = self_path.split('?', 1) if '?' in self_path else (self_path, "")
code, content, params, cursor = http.client.OK, HTML_PREFIX, dict((match.group("parameter"), urllib.parse.unquote(','.join(re.findall(r"(?:\A|[?&])%s=([^&]+)" % match.group("parameter"), query)))) for match in re.finditer(r"((\A|[?&])(?P<parameter>[\w\[\]]+)=)([^&]+)", query)), "Cursor"

print(code)
print(content)
ensure_tainted(params) # $ tainted
print(cursor)

0 comments on commit 6df1f5a

Please sign in to comment.