diff --git a/python/ql/lib/semmle/python/ApiGraphs.qll b/python/ql/lib/semmle/python/ApiGraphs.qll index 18202ebb5241..98936927a8ca 100644 --- a/python/ql/lib/semmle/python/ApiGraphs.qll +++ b/python/ql/lib/semmle/python/ApiGraphs.qll @@ -235,7 +235,11 @@ module API { * Gets a node representing a subscript of this node. * For example `obj[x]` is a subscript of `obj`. */ - Node getASubscript() { result = this.getASuccessor(Label::subscript()) } + Node getASubscript() { + result = this.getASuccessor(Label::subscript()) + or + result = this.getASuccessor(Label::intSubscript(_)) + } /** * Gets a node representing an index of a subscript of this node. @@ -261,6 +265,25 @@ module API { ) } + /** + * Gets a node representing a subscript of this node at (int) index `i`. + * This requires that the index can be statically determined. + * + * For example, the string `value` can be found as subscripts of + * both `a` and `b` below using the index `1`: + * ```py + * a[1] = 'value' + * b = ['list', 'value'] + * ``` + */ + Node getIntSubscript(int i) { + exists(API::Node index | result = this.getSubscriptAt(index) | + i = index.getAValueReachingSink().asExpr().(PY::IntegerLiteral).getValue() + ) + or + result = this.getASuccessor(Label::intSubscript(i)) + } + /** * Gets a node representing a subscript of this node at index `index`. */ @@ -758,9 +781,9 @@ module API { // TODO: once convenient, this should be done at a higher level than the AST, // at least at the CFG layer, to take splitting into account. // Also consider `SequenceNode for generality. - exists(PY::List list | list = pred.(DataFlow::ExprNode).getNode().getNode() | - rhs.(DataFlow::ExprNode).getNode().getNode() = list.getAnElt() and - lbl = Label::subscript() + exists(PY::List list, int index | list = pred.(DataFlow::ExprNode).getNode().getNode() | + rhs.(DataFlow::ExprNode).getNode().getNode() = list.getElt(index) and + lbl = Label::intSubscript(index) ) or exists(PY::CallableExpr fn | fn = pred.(DataFlow::ExprNode).getNode().getNode() | @@ -1068,6 +1091,7 @@ module API { MkLabelAwait() or MkLabelSubscript() or MkLabelIndex() or + MkLabelIntSubscript(int index) { exists(PY::List l | exists(l.getElt(index))) } or MkLabelEntryPoint(EntryPoint ep) /** A label for a module. */ @@ -1148,6 +1172,17 @@ module API { override string toString() { result = "getASubscript()" } } + /** A label that gets the integer subscript of a sequence/mapping. */ + class LabelIntSubscript extends ApiLabel, MkLabelIntSubscript { + int index; + + LabelIntSubscript() { this = MkLabelIntSubscript(index) } + + override string toString() { result = "getIntSubscript(" + index.toString() + ")" } + + int getIndex() { result = index } + } + /** A label that gets the index of a subscript. */ class LabelIndex extends ApiLabel, MkLabelIndex { override string toString() { result = "getIndex()" } @@ -1201,6 +1236,9 @@ module API { /** Gets the `subscript` edge label. */ LabelSubscript subscript() { any() } + /** Gets the `intSubscript` edge label. */ + LabelIntSubscript intSubscript(int index) { result.getIndex() = index } + /** Gets the `subscript` edge label. */ LabelIndex index() { any() } diff --git a/python/ql/src/meta/StdLib/AllStdLibCalls.ql b/python/ql/src/meta/StdLib/AllStdLibCalls.ql new file mode 100644 index 000000000000..e9be29007693 --- /dev/null +++ b/python/ql/src/meta/StdLib/AllStdLibCalls.ql @@ -0,0 +1,19 @@ +import python +private import semmle.python.dataflow.new.internal.DataFlowDispatch + +predicate resolvedCall(CallNode call, Function callable) { + exists(DataFlowCallable dfCallable, DataFlowCall dfCall | + dfCallable.getScope() = callable and + dfCall.getNode() = call and + dfCallable = viableCallable(dfCall) + ) +} + +from Function f, CallNode call, string name +where + resolvedCall(call, f) and + not call.getLocation().getFile().inStdlib() and + f.getLocation().getFile().inStdlib() and + f.getName() = name and + name != "__init__" +select name, f.getScope() diff --git a/python/ql/src/meta/StdLib/FindUses.ql b/python/ql/src/meta/StdLib/FindUses.ql new file mode 100644 index 000000000000..0def1bccd957 --- /dev/null +++ b/python/ql/src/meta/StdLib/FindUses.ql @@ -0,0 +1,136 @@ +import python +import semmle.python.dataflow.new.DataFlow +import semmle.python.dataflow.new.TaintTracking + +pragma[inline] +predicate inStdLib(DataFlow::Node node) { node.getLocation().getFile().inStdlib() } + +pragma[inline] +string stepsTo(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + if DataFlow::localFlow(nodeFrom, nodeTo) + then result = "local" + else + if + TaintTracking::localTaint(nodeFrom, nodeTo) + or + exists(TaintTracking::AdditionalTaintStep s | s.step(nodeFrom, nodeTo)) + or + exists( + TaintTracking::AdditionalTaintStep s, DataFlow::Node entryNode, DataFlow::Node exitNode + | + s.step(entryNode, exitNode) + | + TaintTracking::localTaint(nodeFrom, entryNode) and + TaintTracking::localTaint(exitNode, nodeTo) + ) + then result = "taint" + else result = "no" +} + +abstract class EntryPointsByQuery extends string { + bindingset[this] + EntryPointsByQuery() { any() } + + abstract predicate subpath( + DataFlow::Node argument, DataFlow::ParameterNode parameter, DataFlow::Node outNode + ); + + predicate entryPoint( + DataFlow::Node argument, string parameterName, string functionName, DataFlow::Node outNode, + string alreadyModelled + ) { + exists(DataFlow::ParameterNode parameter, Function function | + parameterName = parameter.getParameter().getName() and + functionName = function.getLocation().getFile().getShortName() + ":" + function.getName() + | + this.subpath(argument, parameter, outNode) and + not inStdLib(argument) and + inStdLib(parameter) and + function = parameter.getScope() and + alreadyModelled = stepsTo(argument, outNode) + ) + } +} + +module EntryPointsForRegexInjectionQuery { + private import semmle.python.security.dataflow.RegexInjectionQuery + + module Flow = RegexInjectionFlow; + + private import Flow::PathGraph + + private class EntryPointsForRegexInjectionQuery extends EntryPointsByQuery { + EntryPointsForRegexInjectionQuery() { this = "RegexInjectionQuery" } + + override predicate subpath( + DataFlow::Node argument, DataFlow::ParameterNode parameter, DataFlow::Node outNode + ) { + exists(Flow::PathNode arg, Flow::PathNode par, Flow::PathNode out | + subpaths(arg, par, _, out) + | + argument = arg.getNode() and + parameter = par.getNode() and + outNode = out.getNode() + ) + } + } +} + +module EntryPointsForUnsafeShellCommandConstructionQuery { + private import semmle.python.security.dataflow.UnsafeShellCommandConstructionQuery + + module Flow = UnsafeShellCommandConstructionFlow; + + private import Flow::PathGraph + + private class EntryPointsForUnsafeShellCommandConstructionQuery extends EntryPointsByQuery { + EntryPointsForUnsafeShellCommandConstructionQuery() { + this = "UnsafeShellCommandConstructionQuery" + } + + override predicate subpath( + DataFlow::Node argument, DataFlow::ParameterNode parameter, DataFlow::Node outNode + ) { + exists(Flow::PathNode arg, Flow::PathNode par, Flow::PathNode out | + subpaths(arg, par, _, out) + | + argument = arg.getNode() and + parameter = par.getNode() and + outNode = out.getNode() + ) + } + } +} + +module EntryPointsForPolynomialReDoSQuery { + private import semmle.python.security.dataflow.PolynomialReDoSQuery + + module Flow = PolynomialReDoSFlow; + + private import Flow::PathGraph + + private class EntryPointsForPolynomialReDoSQuery extends EntryPointsByQuery { + EntryPointsForPolynomialReDoSQuery() { this = "PolynomialReDoSQuery" } + + override predicate subpath( + DataFlow::Node argument, DataFlow::ParameterNode parameter, DataFlow::Node outNode + ) { + exists(Flow::PathNode arg, Flow::PathNode par, Flow::PathNode out | + subpaths(arg, par, _, out) + | + argument = arg.getNode() and + parameter = par.getNode() and + outNode = out.getNode() + ) + } + } +} + +from + EntryPointsByQuery e, DataFlow::Node argument, string parameter, string functionName, + DataFlow::Node outNode, string alreadyModelled +where + e.entryPoint(argument, parameter, functionName, outNode, alreadyModelled) and + alreadyModelled = "no" +// select e, argument, parameter, functionName, outNode, alreadyModelled +select e, parameter, functionName, alreadyModelled diff --git a/python/ql/test/library-tests/ApiGraphs/py3/test_subscript.py b/python/ql/test/library-tests/ApiGraphs/py3/test_subscript.py index c4d3c655983b..bea1317f3599 100644 --- a/python/ql/test/library-tests/ApiGraphs/py3/test_subscript.py +++ b/python/ql/test/library-tests/ApiGraphs/py3/test_subscript.py @@ -6,3 +6,17 @@ def test_subscript(): mypkg.foo()["qux"] += 42 #$ use=moduleImport("mypkg").getMember("foo").getReturn().getASubscript() mypkg.foo()["qux"] += 42 #$ def=moduleImport("mypkg").getMember("foo").getReturn().getASubscript() mypkg.foo()[mypkg.index] = mypkg.value #$ def=moduleImport("mypkg").getMember("foo").getReturn().getASubscript() + +import gradio as gr + +def greet(name, surname): + return "Hello " + name + surname + "!" + +with gr.Blocks() as demo: + name = gr.Textbox(label="Name") + surname = gr.Textbox(label="Surname") + output = gr.Textbox(label="Output Box") + greet_btn = gr.Button("Greet") + greet_btn.click(fn=greet, inputs=[name, surname], outputs=output, api_name="greet") #$ def=moduleImport("gradio").getMember("Button").getReturn().getMember("click").getKeywordParameter("inputs").getIntSubscript(1) + +demo.launch() \ No newline at end of file