Skip to content

Commit e099867

Browse files
committed
Merge pull request #9349 from JuliaLang/teh/typewarnings
RFC: highlighting type problems
2 parents 904d504 + 7cd255b commit e099867

File tree

5 files changed

+206
-31
lines changed

5 files changed

+206
-31
lines changed

base/exports.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1069,6 +1069,7 @@ export
10691069
current_module,
10701070
edit,
10711071
code_typed,
1072+
code_warntype,
10721073
code_lowered,
10731074
code_llvm,
10741075
code_native,
@@ -1375,6 +1376,7 @@ export
13751376
@edit,
13761377
@less,
13771378
@code_typed,
1379+
@code_warntype,
13781380
@code_lowered,
13791381
@code_llvm,
13801382
@code_native,

base/interactiveutil.jl

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,29 @@ function which(f, t::(Type...))
203203
ms[1]
204204
end
205205

206+
# displaying type-ambiguity warnings
207+
208+
function code_warntype(f, t::(Type...))
209+
global show_expr_type_colorize
210+
state = show_expr_type_colorize::Bool
211+
ct = code_typed(f, t)
212+
show_expr_type_colorize::Bool = true
213+
for ast in ct
214+
println(STDOUT, "Variables:")
215+
vars = ast.args[2][2]
216+
for v in vars
217+
print(STDOUT, " ", v[1])
218+
show_expr_type(STDOUT, v[2])
219+
print(STDOUT, '\n')
220+
end
221+
print(STDOUT, "\nBody:\n ")
222+
show_unquoted(STDOUT, ast.args[3], 2)
223+
print(STDOUT, '\n')
224+
end
225+
show_expr_type_colorize::Bool = false
226+
nothing
227+
end
228+
206229
typesof(args...) = map(a->(isa(a,Type) ? Type{a} : typeof(a)), args)
207230

208231
function gen_call_with_extracted_types(fcn, ex0)
@@ -248,7 +271,7 @@ function gen_call_with_extracted_types(fcn, ex0)
248271
exret
249272
end
250273

251-
for fname in [:which, :less, :edit, :code_typed, :code_lowered, :code_llvm, :code_native]
274+
for fname in [:which, :less, :edit, :code_typed, :code_warntype, :code_lowered, :code_llvm, :code_native]
252275
@eval begin
253276
macro ($fname)(ex0)
254277
gen_call_with_extracted_types($(Expr(:quote,fname)), ex0)

base/show.jl

Lines changed: 45 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -291,15 +291,20 @@ unquoted(ex::Expr) = ex.args[1]
291291
## AST printing helpers ##
292292

293293
const indent_width = 4
294+
show_expr_type_colorize = false
294295

295296
function show_expr_type(io::IO, ty)
296-
if !is(ty, Any)
297-
if is(ty, Function)
298-
print(io, "::F")
299-
elseif is(ty, IntrinsicFunction)
300-
print(io, "::I")
297+
if is(ty, Function)
298+
print(io, "::F")
299+
elseif is(ty, IntrinsicFunction)
300+
print(io, "::I")
301+
else
302+
if show_expr_type_colorize::Bool && !isleaftype(ty)
303+
print_with_color(:red, io, "::$ty")
301304
else
302-
print(io, "::$ty")
305+
if !is(ty, Any)
306+
print(io, "::$ty")
307+
end
303308
end
304309
end
305310
end
@@ -410,6 +415,9 @@ end
410415
# TODO: implement interpolated strings
411416
function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int)
412417
head, args, nargs = ex.head, ex.args, length(ex.args)
418+
show_type = true
419+
global show_expr_type_colorize
420+
state = show_expr_type_colorize::Bool
413421

414422
# dot (i.e. "x.y")
415423
if is(head, :(.))
@@ -459,6 +467,10 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int)
459467
func_prec = operator_precedence(func)
460468
func_args = args[2:end]
461469

470+
if in(ex.args[1], (:box, TopNode(:box), :throw)) || ismodulecall(ex)
471+
show_expr_type_colorize::Bool = show_type = false
472+
end
473+
462474
# scalar multiplication (i.e. "100x")
463475
if (func == :(*) && length(func_args)==2 &&
464476
isa(func_args[1], Real) && isa(func_args[2], Symbol))
@@ -578,6 +590,7 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int)
578590
show_list(io, args, ' ', indent)
579591

580592
elseif is(head, :line) && 1 <= nargs <= 2
593+
show_type = false
581594
show_linenumber(io, args...)
582595

583596
elseif is(head, :if) && nargs == 3 # if/else
@@ -659,6 +672,7 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int)
659672

660673
# print anything else as "Expr(head, args...)"
661674
else
675+
show_expr_type_colorize::Bool = show_type = false
662676
print(io, "\$(Expr(")
663677
show(io, ex.head)
664678
for arg in args
@@ -668,7 +682,31 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int)
668682
print(io, "))")
669683
end
670684

671-
show_expr_type(io, ex.typ)
685+
if ex.head == :(=)
686+
show_type = show_type_assignment(ex.args[2])
687+
elseif in(ex.head, (:boundscheck, :gotoifnot, :return))
688+
show_type = false
689+
end
690+
691+
if show_type
692+
show_expr_type(io, ex.typ)
693+
end
694+
695+
show_expr_type_colorize::Bool = state
696+
end
697+
698+
show_type_assignment(::Number) = false
699+
show_type_assignment(::(Number...)) = false
700+
show_type_assignment(::Expr) = false
701+
show_type_assignment(::SymbolNode) = false
702+
show_type_assignment(::LambdaStaticData) = false
703+
show_type_assignment(a) = true
704+
705+
function ismodulecall(ex::Expr)
706+
ex.head == :call && ex.args[1] == TopNode(:getfield) &&
707+
isa(ex.args[2], Symbol) &&
708+
isdefined(current_module(), ex.args[2]) &&
709+
isa(getfield(current_module(), ex.args[2]), Module)
672710
end
673711

674712
# dump & xdump - structured tree representation like R's str()

doc/manual/performance-tips.rst

Lines changed: 127 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -113,14 +113,12 @@ diagnose problems and improve the performance of your code:
113113
``*.mem`` files to see information about where those allocations
114114
occur. See :ref:`stdlib-track-allocation`.
115115

116-
- The `TypeCheck <https://github.com/astrieanna/TypeCheck.jl>`_
117-
package can help identify certain kinds of type problems. A more
118-
laborious but comprehensive tool is :func:`code_typed`. Look
119-
particularly for variables that have type :class:`Any` (in the header) or
120-
statements declared as :class:`Union` types. Such problems can usually
121-
be fixed using the tips below.
122-
123-
- The `Lint <https://github.com/tonyhffong/Lint.jl>`_ package can also
116+
- ``@code_warntype`` generates a representation of your code that can
117+
be helpful in finding expressions that result in type uncertainty.
118+
See :ref:`man-code-warntype` below.
119+
120+
- The `Lint <https://github.com/tonyhffong/Lint.jl>`_ and `TypeCheck
121+
<https://github.com/astrieanna/TypeCheck.jl>`_ packages can also
124122
warn you of certain types of programming errors.
125123

126124

@@ -474,7 +472,7 @@ with
474472
end
475473
y
476474
end
477-
475+
478476
Timing results::
479477

480478
julia> @time loopinc()
@@ -549,8 +547,8 @@ properties.
549547
Be certain before doing this. If the subscripts are ever out of bounds,
550548
you may suffer crashes or silent corruption.
551549
- Write :obj:`@simd` in front of ``for`` loops that are amenable to vectorization.
552-
**This feature is experimental** and could change or disappear in future
553-
versions of Julia.
550+
**This feature is experimental** and could change or disappear in future
551+
versions of Julia.
554552

555553
Here is an example with both forms of markup::
556554

@@ -569,7 +567,7 @@ Here is an example with both forms of markup::
569567
end
570568
s
571569
end
572-
570+
573571
function timeit( n, reps )
574572
x = rand(Float32,n)
575573
y = rand(Float32,n)
@@ -602,24 +600,130 @@ properties of the loop:
602600
possibly causing different results than without :obj:`@simd`.
603601
- No iteration ever waits on another iteration to make forward progress.
604602

605-
A loop containing ``break``, ``continue``, or :obj:`@goto` will cause a
603+
A loop containing ``break``, ``continue``, or :obj:`@goto` will cause a
606604
compile-time error.
607605

608-
Using :obj:`@simd` merely gives the compiler license to vectorize. Whether
609-
it actually does so depends on the compiler. To actually benefit from the
610-
current implementation, your loop should have the following additional
606+
Using :obj:`@simd` merely gives the compiler license to vectorize. Whether
607+
it actually does so depends on the compiler. To actually benefit from the
608+
current implementation, your loop should have the following additional
611609
properties:
612610

613611
- The loop must be an innermost loop.
614-
- The loop body must be straight-line code. This is why :obj:`@inbounds` is
612+
- The loop body must be straight-line code. This is why :obj:`@inbounds` is
615613
currently needed for all array accesses. The compiler can sometimes turn
616-
short ``&&``, ``||``, and ``?:`` expressions into straight-line code,
617-
if it is safe to evaluate all operands unconditionally. Consider using
614+
short ``&&``, ``||``, and ``?:`` expressions into straight-line code,
615+
if it is safe to evaluate all operands unconditionally. Consider using
618616
:func:`ifelse` instead of ``?:`` in the loop if it is safe to do so.
619-
- Accesses must have a stride pattern and cannot be "gathers" (random-index reads)
617+
- Accesses must have a stride pattern and cannot be "gathers" (random-index reads)
620618
or "scatters" (random-index writes).
621619
- The stride should be unit stride.
622-
- In some simple cases, for example with 2-3 arrays accessed in a loop, the
623-
LLVM auto-vectorization may kick in automatically, leading to no further
624-
speedup with :obj:`@simd`.
620+
- In some simple cases, for example with 2-3 arrays accessed in a loop, the
621+
LLVM auto-vectorization may kick in automatically, leading to no further
622+
speedup with :obj:`@simd`.
623+
speedup with ``@simd``.
624+
625+
.. raw:: html
626+
<style> .red {color:red} </style>
627+
628+
.. _man-code-warntype:
629+
630+
``@code_warntype``
631+
------------------
632+
633+
The macro ``@code_warntype`` (or its function variant) can sometimes be helpful in diagnosing type-related problems. Here's an example::
634+
635+
pos(x) = x < 0 ? 0 : x
636+
637+
function f(x)
638+
y = pos(x)
639+
sin(y*x+1)
640+
end
625641

642+
julia> @code_warntype f(3.2)
643+
1-element Array{Union(Expr,Array{T,N}),1}:
644+
:($(Expr(:lambda, Any[:x], Any[Any[:y,:_var0,:_var3,:_var4,:_var1,:_var2],Any[Any[:x,Float64,0],Any[:y,UNION(FLOAT64,INT64),18],Any[:_var0,Float64,18],Any[:_var3,(Int64,),0],Any[:_var4,UNION(FLOAT64,INT64),2],Any[:_var1,Float64,18],Any[:_var2,Float64,18]],Any[]], :(begin # none, line 2:
645+
_var0 = (top(box))(Float64,(top(sitofp))(Float64,0))::Float64
646+
unless (top(box))(Bool,(top(or_int))((top(lt_float))(x::Float64,_var0::Float64)::Bool,(top(box))(Bool,(top(and_int))((top(box))(Bool,(top(and_int))((top(eq_float))(x::Float64,_var0::Float64)::Bool,(top(lt_float))(_var0::Float64,9.223372036854776e18)::Bool))::Bool,(top(slt_int))((top(box))(Int64,(top(fptosi))(Int64,_var0::Float64))::Int64,0)::Bool))::Bool))::Bool goto 1
647+
_var4 = 0
648+
goto 2
649+
1:
650+
_var4 = x::Float64
651+
2:
652+
y = _var4::UNION(FLOAT64,INT64) # line 3:
653+
_var1 = y::UNION(FLOAT64,INT64) * x::Float64::Float64
654+
_var2 = (top(box))(Float64,(top(add_float))(_var1::Float64,(top(box))(Float64,(top(sitofp))(Float64,1))::Float64))::Float64
655+
return (GetfieldNode(Base.Math,:nan_dom_err,Any))((top(ccall))($(Expr(:call1, :(top(tuple)), "sin", GetfieldNode(Base.Math,:libm,Any)))::(ASCIIString,ASCIIString),Float64,$(Expr(:call1, :(top(tuple)), :Float64))::(Type{Float64},),_var2::Float64,0)::Float64,_var2::Float64)::Float64
656+
end::Float64))))
657+
658+
Interpreting the output of ``@code_warntype``, like that of its cousins
659+
``@code_lowered``, ``@code_typed``, ``@code_llvm``, and
660+
``@code_native``, takes a little practice. Your
661+
code is being presented in form that has been partially-digested on
662+
its way to generating compiled machine code. Most of the expressions
663+
are annotated by a type, indicated by the ``::T`` (where ``T`` might
664+
be ``Float64``, for example). The particular characteristic of
665+
``@code_warntype`` is that non-concrete types are displayed in red; in
666+
the above example, such output is shown in all-caps.
667+
668+
The first line of the output, beginning with ``1-element Array``,
669+
simply means that there is only one method that matches the function
670+
and input types you provided. The next line of the output summarizes
671+
information about the different variables. You can see that ``y``, one
672+
of the variables you created, is a ``Union(Float64,Int64)``, due to
673+
the type-instability of ``pos``. There is another variable,
674+
``_var1``, which you can see also has the same type.
675+
676+
The next lines represent the body of ``f``. The lines starting with a
677+
number followed by a colon (``1:``, ``2:``) are labels, and represent
678+
targets for jumps (via ``goto``) in your code. Looking at the body,
679+
you can see that ``pos`` has been *inlined* into ``f``---everything
680+
before ``2:`` comes from code defined in ``pos``.
681+
682+
Starting at ``2:``, the variable ``y`` is defined, and again annotated
683+
as a ``Union`` type. Next, we see that the compiler created the
684+
temporary variable ``_var1`` to hold the result of ``y*x``; it too is
685+
type-unstable. However, at the next line, all type-instability ends:
686+
because ``sin`` converts integer inputs into ``Float64``, the final
687+
return value of ``f`` is a ``Float64``. So calls of the form
688+
``f(x::Float64)`` will not be type-unstable in their output, even if
689+
some of the intermediate computations are type-unstable.
690+
691+
How you use this information is up to you. Obviously, it would be far
692+
and away best to fix ``pos`` to be type-stable: if you did so, all of
693+
the variables in ``f`` would be concrete, and its performance would be
694+
optimal. However, there are circumstances where this kind of
695+
*ephemeral* type instability might not matter too much: for example,
696+
if ``pos`` is never used in isolation, the fact that ``f``\'s output
697+
is type-stable (for ``Float64`` inputs) will shield later code from
698+
the propagating effects of type instability. This is particularly
699+
relevant in cases where fixing the type instability is difficult or
700+
impossible: for example, currently it's not possible to infer the
701+
return type of an anonymous function. In such cases, the tips above
702+
(e.g., adding type annotations and/or breaking up functions) are your
703+
best tools to contain the "damage" from type instability.
704+
705+
Here is a table which can help you interpret expressions marked as
706+
containing non-leaf types:
707+
708+
.. |I0| replace:: Interpretation
709+
.. |F0| replace:: Possible fix
710+
.. |I1| replace:: Function with unstable return type
711+
.. |F1| replace:: Make the return value type-stable, even if you have to annotate it
712+
.. |I2| replace:: Call to a type-unstable function
713+
.. |F2| replace:: Fix the function, or annotate the return value
714+
.. |I3| replace:: Accessing elements of poorly-typed arrays
715+
.. |F3| replace:: Use arrays with better-defined types, or annotate the type of individual element accesses
716+
.. |I4| replace:: Getting a field that is of non-leaf type. In this case, ``ArrayContainer`` had a field ``data::Array{T}``. But ``Array`` needs the dimension ``N``, too.
717+
.. |F4| replace:: Use concrete types like ``Array{T,3}`` or ``Array{T,N}``, where ``N`` is now a parameter of ``ArrayContainer``
718+
719+
+-------------------------------------------------------------------------+------+------+
720+
| Marked expression | |I0| | |F0| |
721+
+=========================================================================+======+======+
722+
| Function ending in ``end::Union(T1,T2)))`` | |I1| | |F1| |
723+
+-------------------------------------------------------------------------+------+------+
724+
| ``f(x::T)::Union(T1,T2)`` | |I2| | |F2| |
725+
+-------------------------------------------------------------------------+------+------+
726+
| ``(top(arrayref))(A::Array{Any,1},1)::Any`` | |I3| | |F3| |
727+
+-------------------------------------------------------------------------+------+------+
728+
| ``(top(getfield))(A::ArrayContainer{Float64},:data)::Array{Float64,N}`` | |I4| | |F4| |
729+
+-------------------------------------------------------------------------+------+------+

doc/stdlib/base.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6441,6 +6441,14 @@ Internals
64416441

64426442
Evaluates the arguments to the function call, determines their types, and calls the ``code_typed`` function on the resulting expression
64436443

6444+
.. function:: code_warntype(f, types)
6445+
6446+
Returns an array of lowered and type-inferred ASTs for the methods matching the given generic function and type signature. The ASTs are annotated in such a way as to cause "non-leaf" types to be displayed in red. This serves as a warning of potential type instability. Not all non-leaf types are particularly problematic for performance, so the results need to be used judiciously. See :ref:`man-code-warntype` for more information.
6447+
6448+
.. function:: @code_warntype
6449+
6450+
Evaluates the arguments to the function call, determines their types, and calls the ``code_warntype`` function on the resulting expression
6451+
64446452
.. function:: code_llvm(f, types)
64456453

64466454
Prints the LLVM bitcodes generated for running the method matching the given generic function and type signature to STDOUT.

0 commit comments

Comments
 (0)