Skip to content

Commit dba8f03

Browse files
authored
simplify inline cost computation, update its docs (#42997)
Removes a special inline treatment that seems probably antiquated.
1 parent 6fbfc4f commit dba8f03

File tree

3 files changed

+24
-41
lines changed

3 files changed

+24
-41
lines changed

base/compiler/optimize.jl

Lines changed: 17 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -174,29 +174,6 @@ const TOP_TUPLE = GlobalRef(Core, :tuple)
174174

175175
_topmod(sv::OptimizationState) = _topmod(sv.mod)
176176

177-
function isinlineable(m::Method, me::OptimizationState, params::OptimizationParams, union_penalties::Bool, bonus::Int=0)
178-
# compute the cost (size) of inlining this code
179-
inlineable = false
180-
cost_threshold = params.inline_cost_threshold
181-
if m.module === _topmod(m.module)
182-
# a few functions get special treatment
183-
name = m.name
184-
sig = m.sig
185-
if ((name === :+ || name === :* || name === :min || name === :max) &&
186-
isa(sig,DataType) &&
187-
sig == Tuple{sig.parameters[1],Any,Any,Any,Vararg{Any}})
188-
inlineable = true
189-
elseif (name === :iterate || name === :unsafe_convert ||
190-
name === :cconvert)
191-
cost_threshold *= 4
192-
end
193-
end
194-
if !inlineable
195-
inlineable = inline_worthy(me.ir::IRCode, params, union_penalties, cost_threshold + bonus)
196-
end
197-
return inlineable
198-
end
199-
200177
is_stmt_inline(stmt_flag::UInt8) = stmt_flag & IR_FLAG_INLINE 0
201178
is_stmt_noinline(stmt_flag::UInt8) = stmt_flag & IR_FLAG_NOINLINE 0
202179
is_stmt_throw_block(stmt_flag::UInt8) = stmt_flag & IR_FLAG_THROW_BLOCK 0
@@ -296,19 +273,27 @@ function finish(interp::AbstractInterpreter, opt::OptimizationState, params::Opt
296273
if src.inlineable && isdispatchtuple(specTypes)
297274
# obey @inline declaration if a dispatch barrier would not help
298275
else
299-
bonus = 0
276+
# compute the cost (size) of inlining this code
277+
cost_threshold = default = params.inline_cost_threshold
300278
if result Tuple && !isconcretetype(widenconst(result))
301-
bonus = params.inline_tupleret_bonus
279+
cost_threshold += params.inline_tupleret_bonus
302280
end
281+
# if the method is declared as `@inline`, increase the cost threshold 20x
303282
if src.inlineable
304-
# For functions declared @inline, increase the cost threshold 20x
305-
bonus += params.inline_cost_threshold*19
283+
cost_threshold += 19*default
284+
end
285+
# a few functions get special treatment
286+
if def.module === _topmod(def.module)
287+
name = def.name
288+
if name === :iterate || name === :unsafe_convert || name === :cconvert
289+
cost_threshold += 4*default
290+
end
306291
end
307-
src.inlineable = isinlineable(def, opt, params, union_penalties, bonus)
292+
src.inlineable = inline_worthy(ir, params, union_penalties, cost_threshold)
308293
end
309294
end
310295

311-
nothing
296+
return nothing
312297
end
313298

314299
# run the optimization work
@@ -393,7 +378,9 @@ function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
393378
cfg = compute_basic_blocks(code)
394379
types = Any[]
395380
stmts = InstructionStream(code, types, stmtinfo, codelocs, ssaflags)
396-
ir = IRCode(stmts, cfg, collect(LineInfoNode, ci.linetable::Union{Vector{LineInfoNode},Vector{Any}}), sv.slottypes, meta, sv.sptypes)
381+
linetable = ci.linetable
382+
isa(linetable, Vector{LineInfoNode}) || (linetable = collect(LineInfoNode, linetable::Vector{Any}))
383+
ir = IRCode(stmts, cfg, linetable, sv.slottypes, meta, sv.sptypes)
397384
return ir
398385
end
399386

base/operators.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -636,7 +636,7 @@ for op in (:+, :*, :&, :|, :xor, :min, :max, :kron)
636636
# note: these definitions must not cause a dispatch loop when +(a,b) is
637637
# not defined, and must only try to call 2-argument definitions, so
638638
# that defining +(a,b) is sufficient for full functionality.
639-
($op)(a, b, c, xs...) = afoldl($op, ($op)(($op)(a,b),c), xs...)
639+
($op)(a, b, c, xs...) = (@inline; afoldl($op, ($op)(($op)(a,b),c), xs...))
640640
# a further concern is that it's easy for a type like (Int,Int...)
641641
# to match many definitions, so we need to keep the number of
642642
# definitions down to avoid losing type information.

doc/src/devdocs/inference.md

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -49,16 +49,12 @@ A `CodeInfo` object may be obtained with
4949
ci = (@code_typed convert(Int, UInt(1)))[1]
5050
```
5151

52-
## The inlining algorithm (inline_worthy)
52+
## The inlining algorithm (`inline_worthy`)
5353

54-
Much of the hardest work for inlining runs in
55-
`inlining_pass`. However, if your question is "why didn't my function
56-
inline?" then you will most likely be interested in `isinlineable` and
57-
its primary callee, `inline_worthy`. `isinlineable` handles a number
58-
of special cases (e.g., critical functions like `next` and `done`,
59-
incorporating a bonus for functions that return tuples, etc.). The
60-
main decision-making happens in `inline_worthy`, which returns `true`
61-
if the function should be inlined.
54+
Much of the hardest work for inlining runs in `ssa_inlining_pass!`.
55+
However, if your question is "why didn't my function inline?"
56+
then you will most likely be interested in `inline_worthy`,
57+
which makes a decision to inline the function call or not.
6258

6359
`inline_worthy` implements a cost-model, where "cheap" functions get
6460
inlined; more specifically, we inline functions if their anticipated
@@ -90,7 +86,7 @@ input and output types were inferred in advance) is assigned a fixed
9086
cost (currently 20 cycles). In contrast, a `:call` expression, for
9187
functions other than intrinsics/builtins, indicates that the call will
9288
require dynamic dispatch, in which case we assign a cost set by
93-
`Params.inline_nonleaf_penalty` (currently set at 1000). Note
89+
`Params.inline_nonleaf_penalty` (currently set at `1000`). Note
9490
that this is not a "first-principles" estimate of the raw cost of
9591
dynamic dispatch, but a mere heuristic indicating that dynamic
9692
dispatch is extremely expensive.

0 commit comments

Comments
 (0)