Skip to content

Commit 29a5bdb

Browse files
authored
Merge pull request #7339 from erik-krogh/pyPerf
Python: Cache more predicates to improve performance.
2 parents f480ab9 + eae2a6a commit 29a5bdb

File tree

23 files changed

+338
-50
lines changed

23 files changed

+338
-50
lines changed

python/ql/lib/semmle/python/ApiGraphs.qll

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@
77
*/
88

99
// Importing python under the `py` namespace to avoid importing `CallNode` from `Flow.qll` and thereby having a naming conflict with `API::CallNode`.
10-
private import python as py
10+
private import python as PY
1111
import semmle.python.dataflow.new.DataFlow
12+
private import semmle.python.internal.CachedStages
1213

1314
/**
1415
* Provides classes and predicates for working with APIs used in a database.
@@ -427,13 +428,13 @@ module API {
427428
/** An abstract representative for imports of the module called `name`. */
428429
MkModuleImport(string name) {
429430
// Ignore the following module name for Python 2, as we alias `__builtin__` to `builtins` elsewhere
430-
(name != "__builtin__" or py::major_version() = 3) and
431+
(name != "__builtin__" or PY::major_version() = 3) and
431432
(
432433
imports(_, name)
433434
or
434435
// When we `import foo.bar.baz` we want to create API graph nodes also for the prefixes
435436
// `foo` and `foo.bar`:
436-
name = any(py::ImportExpr e | not e.isRelative()).getAnImportedModuleName()
437+
name = any(PY::ImportExpr e | not e.isRelative()).getAnImportedModuleName()
437438
)
438439
or
439440
// The `builtins` module should always be implicitly available
@@ -469,7 +470,7 @@ module API {
469470
* Ignores relative imports, such as `from ..foo.bar import baz`.
470471
*/
471472
private predicate imports(DataFlow::Node imp, string name) {
472-
exists(py::ImportExprNode iexpr |
473+
exists(PY::ImportExprNode iexpr |
473474
imp.asCfgNode() = iexpr and
474475
not iexpr.getNode().isRelative() and
475476
name = iexpr.getNode().getImportedModuleName()
@@ -492,7 +493,7 @@ module API {
492493
*
493494
* `moduleImport("foo").getMember("bar")`
494495
*/
495-
private TApiNode potential_import_star_base(py::Scope s) {
496+
private TApiNode potential_import_star_base(PY::Scope s) {
496497
exists(DataFlow::Node n |
497498
n.asCfgNode() = ImportStar::potentialImportStarBase(s) and
498499
use(result, n)
@@ -515,17 +516,17 @@ module API {
515516
)
516517
or
517518
// TODO: I had expected `DataFlow::AttrWrite` to contain the attribute writes from a dict, that's how JS works.
518-
exists(py::Dict dict, py::KeyValuePair item |
519+
exists(PY::Dict dict, PY::KeyValuePair item |
519520
dict = pred.asExpr() and
520521
dict.getItem(_) = item and
521-
lbl = Label::member(item.getKey().(py::StrConst).getS()) and
522+
lbl = Label::member(item.getKey().(PY::StrConst).getS()) and
522523
rhs.asExpr() = item.getValue()
523524
)
524525
or
525-
exists(py::CallableExpr fn | fn = pred.asExpr() |
526+
exists(PY::CallableExpr fn | fn = pred.asExpr() |
526527
not fn.getInnerScope().isAsync() and
527528
lbl = Label::return() and
528-
exists(py::Return ret |
529+
exists(PY::Return ret |
529530
rhs.asExpr() = ret.getValue() and
530531
ret.getScope() = fn.getInnerScope()
531532
)
@@ -568,7 +569,7 @@ module API {
568569
// Subclassing a node
569570
lbl = Label::subclass() and
570571
exists(DataFlow::Node superclass | pred.flowsTo(superclass) |
571-
ref.asExpr().(py::ClassExpr).getABase() = superclass.asExpr()
572+
ref.asExpr().(PY::ClassExpr).getABase() = superclass.asExpr()
572573
)
573574
or
574575
// awaiting
@@ -579,7 +580,7 @@ module API {
579580
)
580581
)
581582
or
582-
exists(DataFlow::Node def, py::CallableExpr fn |
583+
exists(DataFlow::Node def, PY::CallableExpr fn |
583584
rhs(base, def) and fn = trackDefNode(def).asExpr()
584585
|
585586
exists(int i |
@@ -598,7 +599,7 @@ module API {
598599
lbl = Label::member(any(string name | ref = Builtins::likelyBuiltin(name)))
599600
or
600601
// Unknown variables that may belong to a module imported with `import *`
601-
exists(py::Scope s |
602+
exists(PY::Scope s |
602603
base = potential_import_star_base(s) and
603604
lbl =
604605
Label::member(any(string name |
@@ -618,7 +619,7 @@ module API {
618619
)
619620
or
620621
// Ensure the Python 2 `__builtin__` module gets the name of the Python 3 `builtins` module.
621-
py::major_version() = 2 and
622+
PY::major_version() = 2 and
622623
nd = MkModuleImport("builtins") and
623624
imports(ref, "__builtin__")
624625
or
@@ -683,6 +684,7 @@ module API {
683684
*/
684685
cached
685686
DataFlow::LocalSourceNode trackUseNode(DataFlow::LocalSourceNode src) {
687+
Stages::TypeTracking::ref() and
686688
result = trackUseNode(src, DataFlow::TypeTracker::end()) and
687689
not result instanceof DataFlow::ModuleVariableNode
688690
}
@@ -759,18 +761,18 @@ module API {
759761
exists(Builtins::likelyBuiltin(member)) or
760762
ImportStar::namePossiblyDefinedInImportStar(_, member, _) or
761763
Impl::prefix_member(_, member, _) or
762-
member = any(py::Dict d).getAnItem().(py::KeyValuePair).getKey().(py::StrConst).getS()
764+
member = any(PY::Dict d).getAnItem().(PY::KeyValuePair).getKey().(PY::StrConst).getS()
763765
} or
764766
MkLabelUnknownMember() or
765767
MkLabelParameter(int i) {
766768
exists(any(DataFlow::CallCfgNode c).getArg(i))
767769
or
768-
exists(any(py::Function f).getArg(i))
770+
exists(any(PY::Function f).getArg(i))
769771
} or
770772
MkLabelKeywordParameter(string name) {
771773
exists(any(DataFlow::CallCfgNode c).getArgByName(name))
772774
or
773-
exists(any(py::Function f).getArgByName(name))
775+
exists(any(PY::Function f).getArgByName(name))
774776
} or
775777
MkLabelReturn() or
776778
MkLabelSubclass() or

python/ql/lib/semmle/python/AstExtended.qll

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import python
2+
private import semmle.python.internal.CachedStages
23

34
/** A syntactic node (Class, Function, Module, Expr, Stmt or Comprehension) corresponding to a flow node */
45
abstract class AstNode extends AstNode_ {
@@ -17,9 +18,14 @@ abstract class AstNode extends AstNode_ {
1718
* NOTE: For some statements and other purely syntactic elements,
1819
* there may not be a `ControlFlowNode`
1920
*/
20-
ControlFlowNode getAFlowNode() { py_flow_bb_node(result, this, _, _) }
21+
cached
22+
ControlFlowNode getAFlowNode() {
23+
Stages::AST::ref() and
24+
py_flow_bb_node(result, this, _, _)
25+
}
2126

2227
/** Gets the location for this AST node */
28+
cached
2329
Location getLocation() { none() }
2430

2531
/**
@@ -35,6 +41,7 @@ abstract class AstNode extends AstNode_ {
3541
* Expr.getASubExpression(), Stmt.getASubStatement(), Stmt.getASubExpression() or
3642
* Scope.getAStmt().
3743
*/
44+
cached
3845
abstract AstNode getAChildNode();
3946

4047
/**
@@ -44,12 +51,16 @@ abstract class AstNode extends AstNode_ {
4451
* Expr.getASubExpression(), Stmt.getASubStatement(), Stmt.getASubExpression() or
4552
* Scope.getAStmt() applied to the parent.
4653
*/
47-
AstNode getParentNode() { result.getAChildNode() = this }
54+
cached
55+
AstNode getParentNode() {
56+
Stages::AST::ref() and
57+
result.getAChildNode() = this
58+
}
4859

4960
/** Whether this contains `inner` syntactically */
5061
predicate contains(AstNode inner) { this.getAChildNode+() = inner }
5162

52-
pragma[noinline]
63+
pragma[nomagic]
5364
private predicate containsInScope(AstNode inner, Scope scope) {
5465
this.contains(inner) and
5566
not inner instanceof Scope and
@@ -106,9 +117,16 @@ class Comprehension extends Comprehension_, AstNode {
106117

107118
override string toString() { result = "Comprehension" }
108119

109-
override Location getLocation() { result = Comprehension_.super.getLocation() }
120+
override Location getLocation() {
121+
Stages::AST::ref() and
122+
result = Comprehension_.super.getLocation()
123+
}
110124

111-
override AstNode getAChildNode() { result = this.getASubExpression() }
125+
pragma[nomagic]
126+
override AstNode getAChildNode() {
127+
Stages::AST::ref() and
128+
result = this.getASubExpression()
129+
}
112130

113131
Expr getASubExpression() {
114132
result = this.getIter() or

python/ql/lib/semmle/python/Exprs.qll

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,19 @@
11
import python
22
private import semmle.python.pointsto.PointsTo
33
private import semmle.python.objects.ObjectInternal
4+
private import semmle.python.internal.CachedStages
45

56
/** An expression */
67
class Expr extends Expr_, AstNode {
78
/** Gets the scope of this expression */
89
override Scope getScope() { py_scopes(this, result) }
910

1011
/** Gets a textual representation of this element. */
11-
override string toString() { result = "Expression" }
12+
cached
13+
override string toString() {
14+
Stages::AST::ref() and
15+
result = "Expression"
16+
}
1217

1318
/** Gets the module in which this expression occurs */
1419
Module getEnclosingModule() { result = this.getScope().getEnclosingModule() }

python/ql/lib/semmle/python/Flow.qll

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import python
22
private import semmle.python.pointsto.PointsTo
3+
private import semmle.python.internal.CachedStages
34

45
/*
56
* Note about matching parent and child nodes and CFG splitting:
@@ -122,7 +123,9 @@ class ControlFlowNode extends @py_flow_node {
122123
AstNode getNode() { py_flow_bb_node(this, result, _, _) }
123124

124125
/** Gets a textual representation of this element. */
126+
cached
125127
string toString() {
128+
Stages::DataFlow::ref() and
126129
exists(Scope s | s.getEntryNode() = this | result = "Entry node for " + s.toString())
127130
or
128131
exists(Scope s | s.getANormalExit() = this | result = "Exit node for " + s.toString())
@@ -191,7 +194,9 @@ class ControlFlowNode extends @py_flow_node {
191194
BasicBlock getBasicBlock() { result.contains(this) }
192195

193196
/** Gets the scope containing this flow node */
197+
cached
194198
Scope getScope() {
199+
Stages::AST::ref() and
195200
if this.getNode() instanceof Scope
196201
then
197202
/* Entry or exit node */
@@ -614,7 +619,9 @@ class UnaryExprNode extends ControlFlowNode {
614619
* and nodes implicitly assigned in class and function definitions and imports.
615620
*/
616621
class DefinitionNode extends ControlFlowNode {
622+
cached
617623
DefinitionNode() {
624+
Stages::AST::ref() and
618625
exists(Assign a | a.getATarget().getAFlowNode() = this)
619626
or
620627
exists(AnnAssign a | a.getTarget().getAFlowNode() = this and exists(a.getValue()))
@@ -673,6 +680,7 @@ abstract class SequenceNode extends ControlFlowNode {
673680
ControlFlowNode getAnElement() { result = this.getElement(_) }
674681

675682
/** Gets the control flow node for the nth element of this sequence */
683+
cached
676684
abstract ControlFlowNode getElement(int n);
677685
}
678686

@@ -681,6 +689,7 @@ class TupleNode extends SequenceNode {
681689
TupleNode() { toAst(this) instanceof Tuple }
682690

683691
override ControlFlowNode getElement(int n) {
692+
Stages::AST::ref() and
684693
exists(Tuple t | this.getNode() = t and result.getNode() = t.getElt(n)) and
685694
(
686695
result.getBasicBlock().dominates(this.getBasicBlock())
@@ -998,11 +1007,13 @@ class BasicBlock extends @py_flow_node {
9981007
string toString() { result = "BasicBlock" }
9991008

10001009
/** Whether this basic block strictly dominates the other */
1001-
pragma[nomagic]
1002-
predicate strictlyDominates(BasicBlock other) { other.getImmediateDominator+() = this }
1010+
cached
1011+
predicate strictlyDominates(BasicBlock other) {
1012+
Stages::AST::ref() and
1013+
other.getImmediateDominator+() = this
1014+
}
10031015

10041016
/** Whether this basic block dominates the other */
1005-
pragma[nomagic]
10061017
predicate dominates(BasicBlock other) {
10071018
this = other
10081019
or
@@ -1011,6 +1022,7 @@ class BasicBlock extends @py_flow_node {
10111022

10121023
cached
10131024
BasicBlock getImmediateDominator() {
1025+
Stages::AST::ref() and
10141026
this.firstNode().getImmediateDominator().getBasicBlock() = result
10151027
}
10161028

@@ -1048,7 +1060,11 @@ class BasicBlock extends @py_flow_node {
10481060
}
10491061

10501062
/** Gets a successor to this basic block */
1051-
BasicBlock getASuccessor() { result = this.getLastNode().getASuccessor().getBasicBlock() }
1063+
cached
1064+
BasicBlock getASuccessor() {
1065+
Stages::AST::ref() and
1066+
result = this.getLastNode().getASuccessor().getBasicBlock()
1067+
}
10521068

10531069
/** Gets a predecessor to this basic block */
10541070
BasicBlock getAPredecessor() { result.getASuccessor() = this }
@@ -1118,7 +1134,11 @@ class BasicBlock extends @py_flow_node {
11181134
}
11191135

11201136
/** Holds if this basic block strictly reaches the other. Is the start of other reachable from the end of this. */
1121-
predicate strictlyReaches(BasicBlock other) { this.getASuccessor+() = other }
1137+
cached
1138+
predicate strictlyReaches(BasicBlock other) {
1139+
Stages::AST::ref() and
1140+
this.getASuccessor+() = other
1141+
}
11221142

11231143
/** Holds if this basic block reaches the other. Is the start of other reachable from the end of this. */
11241144
predicate reaches(BasicBlock other) { this = other or this.strictlyReaches(other) }

python/ql/lib/semmle/python/Function.qll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -167,18 +167,18 @@ class Function extends Function_, Scope, AstNode {
167167

168168
/** A def statement. Note that FunctionDef extends Assign as a function definition binds the newly created function */
169169
class FunctionDef extends Assign {
170+
FunctionExpr f;
171+
170172
/* syntax: def name(...): ... */
171173
FunctionDef() {
172174
/* This is an artificial assignment the rhs of which is a (possibly decorated) FunctionExpr */
173-
exists(FunctionExpr f | this.getValue() = f or this.getValue() = f.getADecoratorCall())
175+
this.getValue() = f or this.getValue() = f.getADecoratorCall()
174176
}
175177

176178
override string toString() { result = "FunctionDef" }
177179

178180
/** Gets the function for this statement */
179-
Function getDefinedFunction() {
180-
exists(FunctionExpr func | this.containsInScope(func) and result = func.getInnerScope())
181-
}
181+
Function getDefinedFunction() { result = f.getInnerScope() }
182182

183183
override Stmt getLastStatement() { result = this.getDefinedFunction().getLastStatement() }
184184
}

python/ql/lib/semmle/python/Import.qll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import python
22
private import semmle.python.types.Builtins
3+
private import semmle.python.internal.CachedStages
34

45
/**
56
* An alias in an import statement, the `mod as name` part of `import mod as name`. May be artificial;
@@ -203,7 +204,9 @@ class Import extends Import_ {
203204
/** An import * statement */
204205
class ImportStar extends ImportStar_ {
205206
/* syntax: from modname import * */
207+
cached
206208
ImportExpr getModuleExpr() {
209+
Stages::AST::ref() and
207210
result = this.getModule()
208211
or
209212
result = this.getModule().(ImportMember).getModule()

python/ql/lib/semmle/python/Module.qll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import python
22
private import semmle.python.objects.ObjectAPI
33
private import semmle.python.objects.Modules
4+
private import semmle.python.internal.CachedStages
45

56
/**
67
* A module. This is the top level element in an AST, corresponding to a source file.
@@ -221,7 +222,9 @@ private predicate transitively_imported_from_entry_point(File file) {
221222
)
222223
}
223224

225+
cached
224226
string moduleNameFromFile(Container file) {
227+
Stages::AST::ref() and
225228
exists(string basename |
226229
basename = moduleNameFromBase(file) and
227230
legalShortName(basename)

0 commit comments

Comments
 (0)