From 7a76e32c0e28133c3e229df7009c1eb7a6cc86d5 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Wed, 25 Sep 2024 19:35:03 -0400 Subject: [PATCH 01/45] codegen: fix alignment typos (#55880) So easy to type jl_datatype_align to get the natural alignment instead of julia_alignment to get the actual alignment. This should fix the Revise workload. Change is visible with ``` julia> code_llvm(Random.XoshiroSimd.forkRand, (Random.TaskLocalRNG, Base.Val{8})) ``` --- src/cgutils.cpp | 2 ++ src/codegen.cpp | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 9124638ce7446..7f96bb1047abc 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -3637,6 +3637,8 @@ static void union_alloca_type(jl_uniontype_t *ut, }, (jl_value_t*)ut, counter); + if (align > JL_HEAP_ALIGNMENT) + align = JL_HEAP_ALIGNMENT; } static AllocaInst *try_emit_union_alloca(jl_codectx_t &ctx, jl_uniontype_t *ut, bool &allunbox, size_t &min_align, size_t &nbytes) diff --git a/src/codegen.cpp b/src/codegen.cpp index abb21fcbca27e..a452e0fccd0c5 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -5151,7 +5151,7 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos Value *val = arg.V; SmallVector roots(arg.inline_roots); if (roots.empty()) - std::tie(val, roots) = split_value(ctx, arg, Align(jl_datatype_align(jt))); + std::tie(val, roots) = split_value(ctx, arg, Align(julia_alignment(jt))); AllocaInst *proots = emit_static_roots(ctx, roots.size()); for (size_t i = 0; i < roots.size(); i++) ctx.builder.CreateAlignedStore(roots[i], emit_ptrgep(ctx, proots, i * sizeof(void*)), Align(sizeof(void*))); @@ -7859,7 +7859,7 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value } props.cc = jl_returninfo_t::SRet; props.union_bytes = jl_datatype_size(jlrettype); - props.union_align = props.union_minalign = jl_datatype_align(jlrettype); + props.union_align = props.union_minalign = julia_alignment(jlrettype); // sret is always passed from alloca assert(M); fsig.push_back(rt->getPointerTo(M->getDataLayout().getAllocaAddrSpace())); From e4b29f71e7ca0e033ff3510b06d7534e4045e068 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mos=C3=A8=20Giordano?= <765740+giordano@users.noreply.github.com> Date: Thu, 26 Sep 2024 15:34:03 +0100 Subject: [PATCH 02/45] Fix some corner cases of `isapprox` with unsigned integers (#55828) --- base/floatfuncs.jl | 4 +++- test/floatfuncs.jl | 29 +++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/base/floatfuncs.jl b/base/floatfuncs.jl index 67e7899b4107c..2c26f7cff1133 100644 --- a/base/floatfuncs.jl +++ b/base/floatfuncs.jl @@ -232,7 +232,9 @@ function isapprox(x::Integer, y::Integer; if norm === abs && atol < 1 && rtol == 0 return x == y else - return norm(x - y) <= max(atol, rtol*max(norm(x), norm(y))) + # We need to take the difference `max` - `min` when comparing unsigned integers. + _x, _y = x < y ? (x, y) : (y, x) + return norm(_y - _x) <= max(atol, rtol*max(norm(_x), norm(_y))) end end diff --git a/test/floatfuncs.jl b/test/floatfuncs.jl index f33ec75b58322..d5d697634bcfa 100644 --- a/test/floatfuncs.jl +++ b/test/floatfuncs.jl @@ -257,6 +257,35 @@ end end end +@testset "isapprox and unsigned integers" begin + for T in Base.BitUnsigned_types + # Test also combinations of different integer types + W = widen(T) + # The order of the operands for difference between unsigned integers is + # very important, test both combinations. + @test isapprox(T(42), T(42); rtol=T(0), atol=0.5) + @test isapprox(T(42), W(42); rtol=T(0), atol=0.5) + @test !isapprox(T(0), T(1); rtol=T(0), atol=0.5) + @test !isapprox(T(1), T(0); rtol=T(0), atol=0.5) + @test isapprox(T(1), T(3); atol=T(2)) + @test isapprox(T(4), T(2); atol=T(2)) + @test isapprox(T(1), W(3); atol=T(2)) + @test isapprox(T(4), W(2); atol=T(2)) + @test isapprox(T(5), T(7); atol=typemax(T)) + @test isapprox(T(8), T(6); atol=typemax(T)) + @test isapprox(T(1), T(2); rtol=1) + @test isapprox(T(6), T(3); rtol=1) + @test isapprox(T(1), W(2); rtol=1) + @test isapprox(T(6), W(3); rtol=1) + @test !isapprox(typemin(T), typemax(T)) + @test !isapprox(typemax(T), typemin(T)) + @test !isapprox(typemin(T), typemax(T); atol=typemax(T)-T(1)) + @test !isapprox(typemax(T), typemin(T); atol=typemax(T)-T(1)) + @test isapprox(typemin(T), typemax(T); atol=typemax(T)) + @test isapprox(typemax(T), typemin(T); atol=typemax(T)) + end +end + @testset "Conversion from floating point to unsigned integer near extremes (#51063)" begin @test_throws InexactError UInt32(4.2949673f9) @test_throws InexactError UInt64(1.8446744f19) From a5178a7c71d2253dd4b714dd2257f6d721e08534 Mon Sep 17 00:00:00 2001 From: DilumAluthgeBot <43731525+DilumAluthgeBot@users.noreply.github.com> Date: Thu, 26 Sep 2024 20:19:51 -0400 Subject: [PATCH 03/45] =?UTF-8?q?=F0=9F=A4=96=20[master]=20Bump=20the=20Pk?= =?UTF-8?q?g=20stdlib=20from=20ef9f76c17=20to=2051d4910c1=20(#55896)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Pkg-51d4910c114a863d888659cb8962c1e161b2a421.tar.gz/md5 | 1 + .../Pkg-51d4910c114a863d888659cb8962c1e161b2a421.tar.gz/sha512 | 1 + .../Pkg-ef9f76c175872bab6803da4a5fa3fd99bce3d03a.tar.gz/md5 | 1 - .../Pkg-ef9f76c175872bab6803da4a5fa3fd99bce3d03a.tar.gz/sha512 | 1 - stdlib/Pkg.version | 2 +- 5 files changed, 3 insertions(+), 3 deletions(-) create mode 100644 deps/checksums/Pkg-51d4910c114a863d888659cb8962c1e161b2a421.tar.gz/md5 create mode 100644 deps/checksums/Pkg-51d4910c114a863d888659cb8962c1e161b2a421.tar.gz/sha512 delete mode 100644 deps/checksums/Pkg-ef9f76c175872bab6803da4a5fa3fd99bce3d03a.tar.gz/md5 delete mode 100644 deps/checksums/Pkg-ef9f76c175872bab6803da4a5fa3fd99bce3d03a.tar.gz/sha512 diff --git a/deps/checksums/Pkg-51d4910c114a863d888659cb8962c1e161b2a421.tar.gz/md5 b/deps/checksums/Pkg-51d4910c114a863d888659cb8962c1e161b2a421.tar.gz/md5 new file mode 100644 index 0000000000000..b5b82565470c0 --- /dev/null +++ b/deps/checksums/Pkg-51d4910c114a863d888659cb8962c1e161b2a421.tar.gz/md5 @@ -0,0 +1 @@ +88b8a25a8d465ac8cc94d13bc5f51707 diff --git a/deps/checksums/Pkg-51d4910c114a863d888659cb8962c1e161b2a421.tar.gz/sha512 b/deps/checksums/Pkg-51d4910c114a863d888659cb8962c1e161b2a421.tar.gz/sha512 new file mode 100644 index 0000000000000..a746b269d91f0 --- /dev/null +++ b/deps/checksums/Pkg-51d4910c114a863d888659cb8962c1e161b2a421.tar.gz/sha512 @@ -0,0 +1 @@ +22262687f3bf75292ab0170e19a9c4a494022a653b2811443b8c52bc099dee0fddd09f6632ae42b3193adf3b0693ddcb6679b5d91e50a500f65261df5b7ced7d diff --git a/deps/checksums/Pkg-ef9f76c175872bab6803da4a5fa3fd99bce3d03a.tar.gz/md5 b/deps/checksums/Pkg-ef9f76c175872bab6803da4a5fa3fd99bce3d03a.tar.gz/md5 deleted file mode 100644 index 39dbb56dbaf53..0000000000000 --- a/deps/checksums/Pkg-ef9f76c175872bab6803da4a5fa3fd99bce3d03a.tar.gz/md5 +++ /dev/null @@ -1 +0,0 @@ -080b5cb82d208245cba014f1dfcb8033 diff --git a/deps/checksums/Pkg-ef9f76c175872bab6803da4a5fa3fd99bce3d03a.tar.gz/sha512 b/deps/checksums/Pkg-ef9f76c175872bab6803da4a5fa3fd99bce3d03a.tar.gz/sha512 deleted file mode 100644 index 2f95d4a0e28da..0000000000000 --- a/deps/checksums/Pkg-ef9f76c175872bab6803da4a5fa3fd99bce3d03a.tar.gz/sha512 +++ /dev/null @@ -1 +0,0 @@ -1b91505c78d2608afa89ceea16f645bb41c0737815aec1853ad72c9751e7299b264135c9a40a6319f68b973073a151619b925d7a9655c46526bccf501b116113 diff --git a/stdlib/Pkg.version b/stdlib/Pkg.version index f5ca169a775c6..34233c58702b4 100644 --- a/stdlib/Pkg.version +++ b/stdlib/Pkg.version @@ -1,4 +1,4 @@ PKG_BRANCH = master -PKG_SHA1 = ef9f76c175872bab6803da4a5fa3fd99bce3d03a +PKG_SHA1 = 51d4910c114a863d888659cb8962c1e161b2a421 PKG_GIT_URL := https://github.com/JuliaLang/Pkg.jl.git PKG_TAR_URL = https://api.github.com/repos/JuliaLang/Pkg.jl/tarball/$1 From 32b9e1ac9fa31019aa3779b3c401a80bc94cb61f Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Thu, 26 Sep 2024 21:06:46 -0400 Subject: [PATCH 04/45] Profile: fix order of fields in heapsnapshot & improve formatting (#55890) --- src/gc-heap-snapshot.cpp | 42 ++++++++----------- stdlib/Profile/src/heapsnapshot_reassemble.jl | 10 ++++- 2 files changed, 27 insertions(+), 25 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index fcda11dad4f8a..72eb17115f4c7 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -618,38 +618,32 @@ void final_serialize_heap_snapshot(ios_t *json, ios_t *strings, HeapSnapshot &sn { // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2567-L2567 // also https://github.com/microsoft/vscode-v8-heap-tools/blob/c5b34396392397925ecbb4ecb904a27a2754f2c1/v8-heap-parser/src/decoder.rs#L43-L51 - ios_printf(json, "{\"snapshot\":{"); + ios_printf(json, "{\"snapshot\":{\n"); - ios_printf(json, "\"meta\":{"); - ios_printf(json, "\"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"],"); - ios_printf(json, "\"node_types\":["); + ios_printf(json, " \"meta\":{\n"); + ios_printf(json, " \"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"],\n"); + ios_printf(json, " \"node_types\":["); snapshot.node_types.print_json_array(json, false); ios_printf(json, ","); - ios_printf(json, "\"string\", \"number\", \"number\", \"number\", \"number\", \"number\"],"); - ios_printf(json, "\"edge_fields\":[\"type\",\"name_or_index\",\"to_node\"],"); - ios_printf(json, "\"edge_types\":["); + ios_printf(json, "\"string\", \"number\", \"number\", \"number\", \"number\", \"number\"],\n"); + ios_printf(json, " \"edge_fields\":[\"type\",\"name_or_index\",\"to_node\"],\n"); + ios_printf(json, " \"edge_types\":["); snapshot.edge_types.print_json_array(json, false); ios_printf(json, ","); - ios_printf(json, "\"string_or_number\",\"from_node\"],"); + ios_printf(json, "\"string_or_number\",\"from_node\"],\n"); // not used. Required by microsoft/vscode-v8-heap-tools - ios_printf(json, "\"trace_function_info_fields\":[\"function_id\",\"name\",\"script_name\",\"script_id\",\"line\",\"column\"],"); - ios_printf(json, "\"trace_node_fields\":[\"id\",\"function_info_index\",\"count\",\"size\",\"children\"],"); - ios_printf(json, "\"sample_fields\":[\"timestamp_us\",\"last_assigned_id\"],"); - ios_printf(json, "\"location_fields\":[\"object_index\",\"script_id\",\"line\",\"column\"]"); + ios_printf(json, " \"trace_function_info_fields\":[\"function_id\",\"name\",\"script_name\",\"script_id\",\"line\",\"column\"],\n"); + ios_printf(json, " \"trace_node_fields\":[\"id\",\"function_info_index\",\"count\",\"size\",\"children\"],\n"); + ios_printf(json, " \"sample_fields\":[\"timestamp_us\",\"last_assigned_id\"],\n"); + ios_printf(json, " \"location_fields\":[\"object_index\",\"script_id\",\"line\",\"column\"]\n"); // end not used - ios_printf(json, "},\n"); // end "meta" + ios_printf(json, " },\n"); // end "meta" - ios_printf(json, "\"node_count\":%zu,", snapshot.num_nodes); - ios_printf(json, "\"edge_count\":%zu,", snapshot.num_edges); - ios_printf(json, "\"trace_function_count\":0"); // not used. Required by microsoft/vscode-v8-heap-tools - ios_printf(json, "},\n"); // end "snapshot" - - // not used. Required by microsoft/vscode-v8-heap-tools - ios_printf(json, "\"trace_function_infos\":[],"); - ios_printf(json, "\"trace_tree\":[],"); - ios_printf(json, "\"samples\":[],"); - ios_printf(json, "\"locations\":[]"); - // end not used + ios_printf(json, " \"node_count\":%zu,\n", snapshot.num_nodes); + ios_printf(json, " \"edge_count\":%zu,\n", snapshot.num_edges); + ios_printf(json, " \"trace_function_count\":0\n"); // not used. Required by microsoft/vscode-v8-heap-tools + ios_printf(json, "}\n"); // end "snapshot" + // this } is removed by the julia reassembler in Profile ios_printf(json, "}"); } diff --git a/stdlib/Profile/src/heapsnapshot_reassemble.jl b/stdlib/Profile/src/heapsnapshot_reassemble.jl index 2413ae538b8ac..b2d86ee1f27b6 100644 --- a/stdlib/Profile/src/heapsnapshot_reassemble.jl +++ b/stdlib/Profile/src/heapsnapshot_reassemble.jl @@ -155,7 +155,8 @@ function assemble_snapshot(in_prefix, io::IO) _write_decimal_number(io, nodes.edge_count[i], _digits_buf) print(io, ",0,0") end - print(io, "],\"edges\":[") + print(io, "],\n") + print(io, "\"edges\":[") e = 1 for n in 1:length(nodes) count = nodes.edge_count[n] @@ -177,6 +178,13 @@ function assemble_snapshot(in_prefix, io::IO) end println(io, "],") + # not used. Required by microsoft/vscode-v8-heap-tools + # This order of these fields is required by chrome dev tools otherwise loading fails + println(io, "\"trace_function_infos\":[],") + println(io, "\"trace_tree\":[],") + println(io, "\"samples\":[],") + println(io, "\"locations\":[],") + println(io, "\"strings\":[") open(string(in_prefix, ".strings"), "r") do strings_io first = true From 60be4094fb4eb4d4e4780b920a96e027522cd692 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Thu, 26 Sep 2024 21:07:47 -0400 Subject: [PATCH 05/45] Profile: Improve generation of clickable terminal links (#55857) --- base/path.jl | 13 ++++----- stdlib/Profile/src/Profile.jl | 53 +++++++++++++++++++++-------------- 2 files changed, 37 insertions(+), 29 deletions(-) diff --git a/base/path.jl b/base/path.jl index f6d3266d9738c..69c8d22c63c54 100644 --- a/base/path.jl +++ b/base/path.jl @@ -614,6 +614,11 @@ for f in (:isdirpath, :splitdir, :splitdrive, :splitext, :normpath, :abspath) @eval $f(path::AbstractString) = $f(String(path)) end +# RFC3986 Section 2.1 +percent_escape(s) = '%' * join(map(b -> uppercase(string(b, base=16)), codeunits(s)), '%') +# RFC3986 Section 2.3 +encode_uri_component(s) = replace(s, r"[^A-Za-z0-9\-_.~/]+" => percent_escape) + """ uripath(path::AbstractString) @@ -636,10 +641,6 @@ function uripath end @static if Sys.iswindows() function uripath(path::String) - percent_escape(s) = # RFC3986 Section 2.1 - '%' * join(map(b -> uppercase(string(b, base=16)), codeunits(s)), '%') - encode_uri_component(s) = # RFC3986 Section 2.3 - replace(s, r"[^A-Za-z0-9\-_.~/]+" => percent_escape) path = abspath(path) if startswith(path, "\\\\") # UNC path, RFC8089 Appendix E.3 unixpath = join(eachsplit(path, path_separator_re, keepempty=false), '/') @@ -653,10 +654,6 @@ function uripath end end else function uripath(path::String) - percent_escape(s) = # RFC3986 Section 2.1 - '%' * join(map(b -> uppercase(string(b, base=16)), codeunits(s)), '%') - encode_uri_component(s) = # RFC3986 Section 2.3 - replace(s, r"[^A-Za-z0-9\-_.~/]+" => percent_escape) localpath = join(eachsplit(abspath(path), path_separator_re, keepempty=false), '/') host = if ispath("/proc/sys/fs/binfmt_misc/WSLInterop") # WSL sigil distro = get(ENV, "WSL_DISTRO_NAME", "") # See diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl index c7ef1efb35945..b753c9ca88f24 100644 --- a/stdlib/Profile/src/Profile.jl +++ b/stdlib/Profile/src/Profile.jl @@ -7,7 +7,7 @@ Profiling support. ## CPU profiling - `@profile foo()` to profile a specific call. -- `Profile.print()` to print the report. +- `Profile.print()` to print the report. Paths are clickable links in supported terminals and specialized for JULIA_EDITOR etc. - `Profile.clear()` to clear the buffer. - Send a $(Sys.isbsd() ? "SIGINFO (ctrl-t)" : "SIGUSR1") signal to the process to automatically trigger a profile and print. @@ -198,7 +198,9 @@ const META_OFFSET_THREADID = 5 Prints profiling results to `io` (by default, `stdout`). If you do not supply a `data` vector, the internal buffer of accumulated backtraces -will be used. +will be used. Paths are clickable links in supported terminals and +specialized for [`JULIA_EDITOR`](@ref) with line numbers, or just file +links if no editor is set. The keyword arguments can be any combination of: @@ -807,26 +809,35 @@ end # make a terminal-clickable link to the file and linenum. # Similar to `define_default_editors` in `Base.Filesystem` but for creating URIs not commands function editor_link(path::String, linenum::Int) - editor = get(ENV, "JULIA_EDITOR", "") - - if editor == "code" - return "vscode://file/$path:$linenum" - elseif editor == "subl" || editor == "sublime_text" - return "subl://$path:$linenum" - elseif editor == "idea" || occursin("idea", editor) - return "idea://open?file=$path&line=$linenum" - elseif editor == "pycharm" - return "pycharm://open?file=$path&line=$linenum" - elseif editor == "atom" - return "atom://core/open/file?filename=$path&line=$linenum" - elseif editor == "emacsclient" - return "emacs://open?file=$path&line=$linenum" - elseif editor == "vim" || editor == "nvim" - return "vim://open?file=$path&line=$linenum" - else - # TODO: convert the path to a generic URI (line numbers are not supported by generic URI) - return path + # Note: the editor path can include spaces (if escaped) and flags. + editor = nothing + for var in ["JULIA_EDITOR", "VISUAL", "EDITOR"] + str = get(ENV, var, nothing) + str isa String || continue + editor = str + break + end + path_encoded = Base.Filesystem.encode_uri_component(path) + if editor !== nothing + if editor == "code" + return "vscode://file/$path_encoded:$linenum" + elseif editor == "subl" || editor == "sublime_text" + return "subl://open?url=file://$path_encoded&line=$linenum" + elseif editor == "idea" || occursin("idea", editor) + return "idea://open?file=$path_encoded&line=$linenum" + elseif editor == "pycharm" + return "pycharm://open?file=$path_encoded&line=$linenum" + elseif editor == "atom" + return "atom://core/open/file?filename=$path_encoded&line=$linenum" + elseif editor == "emacsclient" || editor == "emacs" + return "emacs://open?file=$path_encoded&line=$linenum" + elseif editor == "vim" || editor == "nvim" + # Note: Vim/Nvim may not support standard URI schemes without specific plugins + return "vim://open?file=$path_encoded&line=$linenum" + end end + # fallback to generic URI, but line numbers are not supported by generic URI + return Base.Filesystem.uripath(path) end function print_flat(io::IO, lilist::Vector{StackFrame}, From 4b27a169bda6ac970fc677962c30af51a6a9ca74 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Fri, 27 Sep 2024 11:34:58 +0900 Subject: [PATCH 06/45] inference: add missing `TypeVar` handling for `instanceof_tfunc` (#55884) I thought these sort of problems had been addressed by d60f92c, but it seems some were missed. Specifically, `t.a` and `t.b` from `t::Union` could be `TypeVar`, and if they are passed to a subroutine or recursed without being unwrapped or rewrapped, errors like JuliaLang/julia#55882 could occur. This commit resolves the issue by calling `unwraptv` in the `Union` handling within `instanceof_tfunc`. I also found a similar issue inside `nfields_tfunc`, so that has also been fixed, and test cases have been added. While I haven't been able to make up a test case specifically for the fix in `instanceof_tfunc`, I have confirmed that this commit certainly fixes the issue reported in JuliaLang/julia#55882. - fixes JuliaLang/julia#55882 --- base/compiler/tfuncs.jl | 8 ++++---- test/compiler/inference.jl | 3 +++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl index ab3b50763deec..cc8ba227bd088 100644 --- a/base/compiler/tfuncs.jl +++ b/base/compiler/tfuncs.jl @@ -135,8 +135,8 @@ function instanceof_tfunc(@nospecialize(t), astag::Bool=false, @nospecialize(tro end return tr, isexact, isconcrete, istype elseif isa(t, Union) - ta, isexact_a, isconcrete_a, istype_a = instanceof_tfunc(t.a, astag, troot) - tb, isexact_b, isconcrete_b, istype_b = instanceof_tfunc(t.b, astag, troot) + ta, isexact_a, isconcrete_a, istype_a = instanceof_tfunc(unwraptv(t.a), astag, troot) + tb, isexact_b, isconcrete_b, istype_b = instanceof_tfunc(unwraptv(t.b), astag, troot) isconcrete = isconcrete_a && isconcrete_b istype = istype_a && istype_b # most users already handle the Union case, so here we assume that @@ -563,9 +563,9 @@ add_tfunc(Core.sizeof, 1, 1, sizeof_tfunc, 1) end end if isa(x, Union) - na = nfields_tfunc(𝕃, x.a) + na = nfields_tfunc(𝕃, unwraptv(x.a)) na === Int && return Int - return tmerge(na, nfields_tfunc(𝕃, x.b)) + return tmerge(𝕃, na, nfields_tfunc(𝕃, unwraptv(x.b))) end return Int end diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl index d1382d3c84b82..46009e0790942 100644 --- a/test/compiler/inference.jl +++ b/test/compiler/inference.jl @@ -6152,3 +6152,6 @@ end t155751 = Union{AbstractArray{UInt8, 4}, Array{Float32, 4}, Grid55751{Float32, 3, _A} where _A} t255751 = Array{Float32, 3} @test Core.Compiler.tmerge_types_slow(t155751,t255751) == AbstractArray # shouldn't hang + +issue55882_nfields(x::Union{T,Nothing}) where T<:Number = nfields(x) +@test Base.infer_return_type(issue55882_nfields) <: Int From 0dbb6eb679c1c124c212ae9ce399004873041cf1 Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Fri, 27 Sep 2024 10:29:32 +0200 Subject: [PATCH 07/45] Install terminfo data under /usr/share/julia (#55881) Just like all other libraries, we don't want internal Julia files to mess with system files. Introduced by https://github.com/JuliaLang/julia/pull/55411. --- Makefile | 2 +- base/terminfo.jl | 2 +- deps/terminfo.mk | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 735d342a79eb5..e440f243d876e 100644 --- a/Makefile +++ b/Makefile @@ -410,7 +410,7 @@ endif $(INSTALL_F) $(JULIAHOME)/contrib/julia.appdata.xml $(DESTDIR)$(datarootdir)/metainfo/ # Install terminal info database ifneq ($(WITH_TERMINFO),0) - cp -R -L $(build_datarootdir)/terminfo $(DESTDIR)$(datarootdir) + cp -R -L $(build_datarootdir)/julia/terminfo $(DESTDIR)$(datarootdir)/julia/ endif # Update RPATH entries and JL_SYSTEM_IMAGE_PATH if $(private_libdir_rel) != $(build_private_libdir_rel) diff --git a/base/terminfo.jl b/base/terminfo.jl index 79713f4a86aa3..8ea8387077d36 100644 --- a/base/terminfo.jl +++ b/base/terminfo.jl @@ -262,7 +262,7 @@ function find_terminfo_file(term::String) append!(terminfo_dirs, replace(split(ENV["TERMINFO_DIRS"], ':'), "" => "/usr/share/terminfo")) - push!(terminfo_dirs, normpath(Sys.BINDIR, DATAROOTDIR, "terminfo")) + push!(terminfo_dirs, normpath(Sys.BINDIR, DATAROOTDIR, "julia", "terminfo")) Sys.isunix() && push!(terminfo_dirs, "/etc/terminfo", "/lib/terminfo", "/usr/share/terminfo") for dir in terminfo_dirs diff --git a/deps/terminfo.mk b/deps/terminfo.mk index 63194f786f566..60865838a813e 100644 --- a/deps/terminfo.mk +++ b/deps/terminfo.mk @@ -22,8 +22,8 @@ $(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/build-checked: $(BUILDDIR)/TermInfoDB-v$ echo 1 > $@ define TERMINFO_INSTALL - mkdir -p $2/$$(build_datarootdir) - cp -R $1/terminfo $2/$$(build_datarootdir) + mkdir -p $2/$$(build_datarootdir)/julia + cp -R $1/terminfo $2/$$(build_datarootdir)/julia/ endef $(eval $(call staged-install, \ terminfo,TermInfoDB-v$(TERMINFO_VER), \ From 6e33dfb202e5a0adce02fd29220f6314101edc1c Mon Sep 17 00:00:00 2001 From: Diogo Netto <61364108+d-netto@users.noreply.github.com> Date: Fri, 27 Sep 2024 08:37:07 -0300 Subject: [PATCH 08/45] expose metric to report reasons why full GCs were triggered (#55826) Additional GC observability tool. This will help us to diagnose why some of our servers are triggering so many full GCs in certain circumstances. --- base/timing.jl | 27 +++++++++++++++++++++++++++ src/gc-stock.c | 15 +++++++++++++-- src/gc-stock.h | 14 ++++++++++++++ test/gc.jl | 11 +++++++++++ 4 files changed, 65 insertions(+), 2 deletions(-) diff --git a/base/timing.jl b/base/timing.jl index 80ebb74abee26..6d97d70d2f04c 100644 --- a/base/timing.jl +++ b/base/timing.jl @@ -104,6 +104,33 @@ function gc_page_utilization_data() return Base.unsafe_wrap(Array, page_utilization_raw, JL_GC_N_MAX_POOLS, own=false) end +# must be kept in sync with `src/gc-stock.h`` +const FULL_SWEEP_REASONS = [:FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL, :FULL_SWEEP_REASON_FORCED_FULL_SWEEP, + :FULL_SWEEP_REASON_USER_MAX_EXCEEDED, :FULL_SWEEP_REASON_LARGE_PROMOTION_RATE] + +""" + Base.full_sweep_reasons() + +Return a dictionary of the number of times each full sweep reason has occurred. + +The reasons are: +- `:FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL`: Full sweep was caused due to `always_full` being set in the GC debug environment +- `:FULL_SWEEP_REASON_FORCED_FULL_SWEEP`: Full sweep was forced by `GC.gc(true)` +- `:FULL_SWEEP_REASON_USER_MAX_EXCEEDED`: Full sweep was forced due to the system reaching the heap soft size limit +- `:FULL_SWEEP_REASON_LARGE_PROMOTION_RATE`: Full sweep was forced by a large promotion rate across GC generations + +Note that the set of reasons is not guaranteed to be stable across minor versions of Julia. +""" +function full_sweep_reasons() + reason = cglobal(:jl_full_sweep_reasons, UInt64) + reasons_as_array = Base.unsafe_wrap(Vector{UInt64}, reason, length(FULL_SWEEP_REASONS), own=false) + d = Dict{Symbol, Int64}() + for (i, r) in enumerate(FULL_SWEEP_REASONS) + d[r] = reasons_as_array[i] + end + return d +end + """ Base.jit_total_bytes() diff --git a/src/gc-stock.c b/src/gc-stock.c index d25f8917f302d..6b97881909bbd 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -40,6 +40,8 @@ uv_sem_t gc_sweep_assists_needed; uv_mutex_t gc_queue_observer_lock; // Tag for sentinel nodes in bigval list uintptr_t gc_bigval_sentinel_tag; +// Table recording number of full GCs due to each reason +JL_DLLEXPORT uint64_t jl_full_sweep_reasons[FULL_SWEEP_NUM_REASONS]; // Flag that tells us whether we need to support conservative marking // of objects. @@ -3043,10 +3045,12 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) // we either free some space or get an OOM error. if (gc_sweep_always_full) { sweep_full = 1; + gc_count_full_sweep_reason(FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL); } if (collection == JL_GC_FULL && !prev_sweep_full) { sweep_full = 1; recollect = 1; + gc_count_full_sweep_reason(FULL_SWEEP_REASON_FORCED_FULL_SWEEP); } if (sweep_full) { // these are the difference between the number of gc-perm bytes scanned @@ -3182,10 +3186,17 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) } double old_ratio = (double)promoted_bytes/(double)heap_size; - if (heap_size > user_max || old_ratio > 0.15) + if (heap_size > user_max) { next_sweep_full = 1; - else + gc_count_full_sweep_reason(FULL_SWEEP_REASON_USER_MAX_EXCEEDED); + } + else if (old_ratio > 0.15) { + next_sweep_full = 1; + gc_count_full_sweep_reason(FULL_SWEEP_REASON_LARGE_PROMOTION_RATE); + } + else { next_sweep_full = 0; + } if (heap_size > user_max || thrashing) under_pressure = 1; // sweeping is over diff --git a/src/gc-stock.h b/src/gc-stock.h index 45c93bf4289ae..46f7d3e11e105 100644 --- a/src/gc-stock.h +++ b/src/gc-stock.h @@ -505,6 +505,20 @@ FORCE_INLINE void gc_big_object_link(bigval_t *sentinel_node, bigval_t *node) JL sentinel_node->next = node; } +// Must be kept in sync with `base/timing.jl` +#define FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL (0) +#define FULL_SWEEP_REASON_FORCED_FULL_SWEEP (1) +#define FULL_SWEEP_REASON_USER_MAX_EXCEEDED (2) +#define FULL_SWEEP_REASON_LARGE_PROMOTION_RATE (3) +#define FULL_SWEEP_NUM_REASONS (4) + +extern JL_DLLEXPORT uint64_t jl_full_sweep_reasons[FULL_SWEEP_NUM_REASONS]; +STATIC_INLINE void gc_count_full_sweep_reason(int reason) JL_NOTSAFEPOINT +{ + assert(reason >= 0 && reason < FULL_SWEEP_NUM_REASONS); + jl_full_sweep_reasons[reason]++; +} + extern uv_mutex_t gc_perm_lock; extern uv_mutex_t gc_threads_lock; extern uv_cond_t gc_threads_cond; diff --git a/test/gc.jl b/test/gc.jl index e46ff0ed73fd9..c532f17f04eb5 100644 --- a/test/gc.jl +++ b/test/gc.jl @@ -49,6 +49,13 @@ function issue_54275_test() @test !live_bytes_has_grown_too_much end +function full_sweep_reasons_test() + GC.gc() + reasons = Base.full_sweep_reasons() + @test reasons[:FULL_SWEEP_REASON_FORCED_FULL_SWEEP] >= 1 + @test keys(reasons) == Set(Base.FULL_SWEEP_REASONS) +end + # !!! note: # Since we run our tests on 32bit OS as well we confine ourselves # to parameters that allocate about 512MB of objects. Max RSS is lower @@ -73,6 +80,10 @@ end @test isempty(Docs.undocumented_names(GC)) end +@testset "Full GC reasons" begin + full_sweep_reasons_test() +end + #testset doesn't work here because this needs to run in top level #Check that we ensure objects in toplevel exprs are rooted global dims54422 = [] # allocate the Binding From 3aad027fc5631f2b5ca81e0133518f134b2b6c03 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Fri, 27 Sep 2024 10:15:24 -0400 Subject: [PATCH 09/45] Revert "Improve printing of several arguments" (#55894) Reverts JuliaLang/julia#55754 as it overrode some performance heuristics which appeared to be giving a significant gain/loss in performance: Closes https://github.com/JuliaLang/julia/issues/55893 --- base/strings/io.jl | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/base/strings/io.jl b/base/strings/io.jl index c78e3e2e043b6..754e058cd2f54 100644 --- a/base/strings/io.jl +++ b/base/strings/io.jl @@ -42,7 +42,9 @@ end function print(io::IO, xs...) lock(io) try - foreach(Fix1(print, io), xs) + for x in xs + print(io, x) + end finally unlock(io) end @@ -136,9 +138,15 @@ function print_to_string(xs...) if isempty(xs) return "" end - siz = sum(_str_sizehint, xs; init = 0) + siz::Int = 0 + for x in xs + siz += _str_sizehint(x) + end + # specialized for performance reasons s = IOBuffer(sizehint=siz) - print(s, xs...) + for x in xs + print(s, x) + end String(_unsafe_take!(s)) end @@ -146,10 +154,16 @@ function string_with_env(env, xs...) if isempty(xs) return "" end - siz = sum(_str_sizehint, xs; init = 0) + siz::Int = 0 + for x in xs + siz += _str_sizehint(x) + end + # specialized for performance reasons s = IOBuffer(sizehint=siz) env_io = IOContext(s, env) - print(env_io, xs...) + for x in xs + print(env_io, x) + end String(_unsafe_take!(s)) end From 00f0a6c63c1e5ce996fba5ef187522f4990ee9b4 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Sat, 28 Sep 2024 02:21:21 +0200 Subject: [PATCH 10/45] Do not trigger deprecation warnings in `Test.detect_ambiguities` and `Test.detect_unbound_args` (#55869) #55868 --- stdlib/Test/src/Test.jl | 4 ++-- test/ambiguous.jl | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/stdlib/Test/src/Test.jl b/stdlib/Test/src/Test.jl index b4ada2ce3a9cf..46bc2d8790cec 100644 --- a/stdlib/Test/src/Test.jl +++ b/stdlib/Test/src/Test.jl @@ -2087,7 +2087,7 @@ function detect_ambiguities(mods::Module...; while !isempty(work) mod = pop!(work) for n in names(mod, all = true) - Base.isdeprecated(mod, n) && continue + (!Base.isbindingresolved(mod, n) || Base.isdeprecated(mod, n)) && continue if !isdefined(mod, n) if is_in_mods(mod, recursive, mods) if allowed_undefineds === nothing || GlobalRef(mod, n) ∉ allowed_undefineds @@ -2158,7 +2158,7 @@ function detect_unbound_args(mods...; while !isempty(work) mod = pop!(work) for n in names(mod, all = true) - Base.isdeprecated(mod, n) && continue + (!Base.isbindingresolved(mod, n) || Base.isdeprecated(mod, n)) && continue if !isdefined(mod, n) if is_in_mods(mod, recursive, mods) if allowed_undefineds === nothing || GlobalRef(mod, n) ∉ allowed_undefineds diff --git a/test/ambiguous.jl b/test/ambiguous.jl index acdfdc70ba30c..2f8a4193cf592 100644 --- a/test/ambiguous.jl +++ b/test/ambiguous.jl @@ -162,6 +162,22 @@ end ambs = detect_ambiguities(Ambig48312) @test length(ambs) == 4 +module UnboundAmbig55868 + module B + struct C end + export C + Base.@deprecate_binding D C + end + using .B + export C, D +end +@test !Base.isbindingresolved(UnboundAmbig55868, :C) +@test !Base.isbindingresolved(UnboundAmbig55868, :D) +@test isempty(detect_unbound_args(UnboundAmbig55868)) +@test isempty(detect_ambiguities(UnboundAmbig55868)) +@test !Base.isbindingresolved(UnboundAmbig55868, :C) +@test !Base.isbindingresolved(UnboundAmbig55868, :D) + # Test that Core and Base are free of ambiguities # not using isempty so this prints more information when it fails @testset "detect_ambiguities" begin From 4a4ca9c815207a80ea81b884b196dfeafc3cb877 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Fri, 27 Sep 2024 21:49:29 -0400 Subject: [PATCH 11/45] do not intentionally suppress errors in precompile script from being reported or failing the result (#55909) I was slightly annoying that the build was set up to succeed if this step failed, so I removed the error suppression and fixed up the script slightly --- contrib/generate_precompile.jl | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/contrib/generate_precompile.jl b/contrib/generate_precompile.jl index d3e73a1b1865a..60f7290c7a0ac 100644 --- a/contrib/generate_precompile.jl +++ b/contrib/generate_precompile.jl @@ -347,8 +347,7 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe print_state("step1" => "F$n_step1") return :ok end - Base.errormonitor(step1) - !PARALLEL_PRECOMPILATION && wait(step1) + PARALLEL_PRECOMPILATION ? bind(statements_step1, step1) : wait(step1) # Create a staging area where all the loaded packages are available PrecompileStagingArea = Module() @@ -362,7 +361,7 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe # Make statements unique statements = Set{String}() # Execute the precompile statements - for sts in [statements_step1,], statement in sts + for statement in statements_step1 # Main should be completely clean occursin("Main.", statement) && continue Base.in!(statement, statements) && continue @@ -398,6 +397,7 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe println() # Seems like a reasonable number right now, adjust as needed # comment out if debugging script + have_repl = false n_succeeded > (have_repl ? 650 : 90) || @warn "Only $n_succeeded precompile statements" fetch(step1) == :ok || throw("Step 1 of collecting precompiles failed.") @@ -408,7 +408,6 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe finally fancyprint && print(ansi_enablecursor) GC.gc(true); GC.gc(false); # reduce memory footprint - return end generate_precompile_statements() From ff0a1befb9cff915abb06c551e1bed4ab7331790 Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Sat, 28 Sep 2024 15:45:14 +0530 Subject: [PATCH 12/45] Remove eigvecs method for SymTridiagonal (#55903) The fallback method does the same, so this specialized method isn't necessary --- stdlib/LinearAlgebra/src/tridiag.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/stdlib/LinearAlgebra/src/tridiag.jl b/stdlib/LinearAlgebra/src/tridiag.jl index e755ce63e9b2a..ca61eb8519d42 100644 --- a/stdlib/LinearAlgebra/src/tridiag.jl +++ b/stdlib/LinearAlgebra/src/tridiag.jl @@ -319,8 +319,6 @@ eigmax(A::SymTridiagonal) = eigvals(A, size(A, 1):size(A, 1))[1] eigmin(A::SymTridiagonal) = eigvals(A, 1:1)[1] #Compute selected eigenvectors only corresponding to particular eigenvalues -eigvecs(A::SymTridiagonal) = eigen(A).vectors - """ eigvecs(A::SymTridiagonal[, eigvals]) -> Matrix From 97ecdb8595c4a1fbe68ba6f39b3244e8cdabc2c6 Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Sat, 28 Sep 2024 19:02:49 -0400 Subject: [PATCH 13/45] add --trim option for generating smaller binaries (#55047) This adds a command line option `--trim` that builds images where code is only included if it is statically reachable from methods marked using the new function `entrypoint`. Compile-time errors are given for call sites that are too dynamic to allow trimming the call graph (however there is an `unsafe` option if you want to try building anyway to see what happens). The PR has two other components. One is changes to Base that generally allow more code to be compiled in this mode. These changes will either be merged in separate PRs or moved to a separate part of the workflow (where we will build a custom system image for this purpose). The branch is set up this way to make it easy to check out and try the functionality. The other component is everything in the `juliac/` directory, which implements a compiler driver script based on this new option, along with some examples and tests. This will eventually become a package "app" that depends on PackageCompiler and provides a CLI for all of this stuff, so it will not be merged here. To try an example: ``` julia contrib/juliac.jl --output-exe hello --trim test/trimming/hello.jl ``` When stripped the resulting executable is currently about 900kb on my machine. Also includes a lot of work by @topolarity --------- Co-authored-by: Gabriel Baraldi Co-authored-by: Tim Holy Co-authored-by: Cody Tapscott --- Makefile | 4 +- NEWS.md | 2 + base/experimental.jl | 14 + base/libuv.jl | 5 +- base/options.jl | 1 + base/reflection.jl | 11 +- base/strings/io.jl | 4 + contrib/julia-config.jl | 2 +- contrib/juliac-buildscript.jl | 277 ++++++++++++++++++ contrib/juliac.jl | 110 +++++++ doc/src/devdocs/sysimg.md | 77 +++++ doc/src/manual/command-line-interface.md | 2 +- src/aotcompile.cpp | 69 ++++- src/cgutils.cpp | 12 + src/codegen-stubs.c | 1 + src/codegen.cpp | 355 +++++++++++++++++++++-- src/gf.c | 26 +- src/init.c | 11 +- src/jitlayers.h | 6 +- src/jl_exported_funcs.inc | 3 + src/jloptions.c | 26 +- src/jloptions.h | 1 + src/julia.expmap.in | 4 +- src/julia.h | 7 + src/julia_internal.h | 7 +- src/module.c | 2 +- src/precompile.c | 10 +- src/precompile_utils.c | 80 +++++ src/staticdata.c | 190 ++++++++++-- src/support/arraylist.h | 17 +- stdlib/LinearAlgebra/src/blas.jl | 2 +- stdlib/LinearAlgebra/src/lbt.jl | 4 +- test/Makefile | 8 +- test/trimming/Makefile | 55 ++++ test/trimming/hello.jl | 6 + test/trimming/init.c | 9 + test/trimming/trimming.jl | 7 + 37 files changed, 1338 insertions(+), 89 deletions(-) create mode 100644 contrib/juliac-buildscript.jl create mode 100644 contrib/juliac.jl create mode 100644 test/trimming/Makefile create mode 100644 test/trimming/hello.jl create mode 100644 test/trimming/init.c create mode 100644 test/trimming/trimming.jl diff --git a/Makefile b/Makefile index e440f243d876e..4fd8b878c5d1f 100644 --- a/Makefile +++ b/Makefile @@ -82,7 +82,7 @@ julia-deps: | $(DIRS) $(build_datarootdir)/julia/base $(build_datarootdir)/julia julia-stdlib: | $(DIRS) julia-deps @$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/stdlib -julia-base: julia-deps $(build_sysconfdir)/julia/startup.jl $(build_man1dir)/julia.1 $(build_datarootdir)/julia/julia-config.jl +julia-base: julia-deps $(build_sysconfdir)/julia/startup.jl $(build_man1dir)/julia.1 $(build_datarootdir)/julia/julia-config.jl $(build_datarootdir)/julia/juliac.jl $(build_datarootdir)/julia/juliac-buildscript.jl @$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/base julia-libccalltest: julia-deps @@ -181,7 +181,7 @@ $(build_sysconfdir)/julia/startup.jl: $(JULIAHOME)/etc/startup.jl | $(build_sysc @echo Creating usr/etc/julia/startup.jl @cp $< $@ -$(build_datarootdir)/julia/julia-config.jl: $(JULIAHOME)/contrib/julia-config.jl | $(build_datarootdir)/julia +$(build_datarootdir)/julia/%: $(JULIAHOME)/contrib/% | $(build_datarootdir)/julia $(INSTALL_M) $< $(dir $@) $(build_depsbindir)/stringreplace: $(JULIAHOME)/contrib/stringreplace.c | $(build_depsbindir) diff --git a/NEWS.md b/NEWS.md index 9ecdd87f0c2bb..ca2bf1f615012 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,8 @@ Julia v1.12 Release Notes New language features --------------------- +- New option `--trim` for building "trimmed" binaries, where code not provably reachable from entry points + is removed. Entry points can be marked using `Base.Experimental.entrypoint` ([#55047]). - A new keyword argument `usings::Bool` has been added to `names`. By using this, we can now find all the names available in module `A` by `names(A; all=true, imported=true, usings=true)`. ([#54609]) - the `@atomic(...)` macro family supports now the reference assignment syntax, e.g. diff --git a/base/experimental.jl b/base/experimental.jl index 58c7258120f3f..6e757e9fa0e5f 100644 --- a/base/experimental.jl +++ b/base/experimental.jl @@ -457,4 +457,18 @@ without adding them to the global method table. """ :@MethodTable +""" + Base.Experimental.entrypoint(f, argtypes::Tuple) + +Mark a method for inclusion when the `--trim` option is specified. +""" +function entrypoint(@nospecialize(f), @nospecialize(argtypes::Tuple)) + entrypoint(Tuple{Core.Typeof(f), argtypes...}) +end + +function entrypoint(@nospecialize(argt::Type)) + ccall(:jl_add_entrypoint, Int32, (Any,), argt) + nothing +end + end diff --git a/base/libuv.jl b/base/libuv.jl index 143201598fde0..3c9f79dfa7b2c 100644 --- a/base/libuv.jl +++ b/base/libuv.jl @@ -133,7 +133,10 @@ function uv_return_spawn end function uv_asynccb end function uv_timercb end -function reinit_stdio() +reinit_stdio() = _reinit_stdio() +# we need this so it can be called by codegen to print errors, even after +# reinit_stdio has been redefined by the juliac build script. +function _reinit_stdio() global stdin = init_stdio(ccall(:jl_stdin_stream, Ptr{Cvoid}, ()))::IO global stdout = init_stdio(ccall(:jl_stdout_stream, Ptr{Cvoid}, ()))::IO global stderr = init_stdio(ccall(:jl_stderr_stream, Ptr{Cvoid}, ()))::IO diff --git a/base/options.jl b/base/options.jl index 41ce3c9e20909..1de7a2acb1e06 100644 --- a/base/options.jl +++ b/base/options.jl @@ -58,6 +58,7 @@ struct JLOptions permalloc_pkgimg::Int8 heap_size_hint::UInt64 trace_compile_timing::Int8 + trim::Int8 end # This runs early in the sysimage != is not defined yet diff --git a/base/reflection.jl b/base/reflection.jl index 5b395efc58190..fe48b6f9aa6b9 100644 --- a/base/reflection.jl +++ b/base/reflection.jl @@ -1504,6 +1504,13 @@ struct CodegenParams """ use_jlplt::Cint + """ + If enabled, only provably reachable code (from functions marked with `entrypoint`) is included + in the output system image. Errors or warnings can be given for call sites too dynamic to handle. + The option is disabled by default. (0=>disabled, 1=>safe (static errors), 2=>unsafe, 3=>unsafe plus warnings) + """ + trim::Cint + """ A pointer of type @@ -1519,14 +1526,14 @@ struct CodegenParams prefer_specsig::Bool=false, gnu_pubnames::Bool=true, debug_info_kind::Cint = default_debug_info_kind(), debug_info_level::Cint = Cint(JLOptions().debug_level), safepoint_on_entry::Bool=true, - gcstack_arg::Bool=true, use_jlplt::Bool=true, + gcstack_arg::Bool=true, use_jlplt::Bool=true, trim::Cint=Cint(0), lookup::Ptr{Cvoid}=unsafe_load(cglobal(:jl_rettype_inferred_addr, Ptr{Cvoid}))) return new( Cint(track_allocations), Cint(code_coverage), Cint(prefer_specsig), Cint(gnu_pubnames), debug_info_kind, debug_info_level, Cint(safepoint_on_entry), - Cint(gcstack_arg), Cint(use_jlplt), + Cint(gcstack_arg), Cint(use_jlplt), Cint(trim), lookup) end end diff --git a/base/strings/io.jl b/base/strings/io.jl index 754e058cd2f54..df34712b519d5 100644 --- a/base/strings/io.jl +++ b/base/strings/io.jl @@ -51,6 +51,8 @@ function print(io::IO, xs...) return nothing end +setfield!(typeof(print).name.mt, :max_args, 10, :monotonic) + """ println([io::IO], xs...) @@ -74,6 +76,7 @@ julia> String(take!(io)) """ println(io::IO, xs...) = print(io, xs..., "\n") +setfield!(typeof(println).name.mt, :max_args, 10, :monotonic) ## conversion of general objects to strings ## """ @@ -149,6 +152,7 @@ function print_to_string(xs...) end String(_unsafe_take!(s)) end +setfield!(typeof(print_to_string).name.mt, :max_args, 10, :monotonic) function string_with_env(env, xs...) if isempty(xs) diff --git a/contrib/julia-config.jl b/contrib/julia-config.jl index c692b3f522fb2..8b1eb55cbe4f4 100755 --- a/contrib/julia-config.jl +++ b/contrib/julia-config.jl @@ -67,7 +67,7 @@ function ldlibs(doframework) "julia" end if Sys.isunix() - return "-Wl,-rpath,$(shell_escape(libDir())) -Wl,-rpath,$(shell_escape(private_libDir())) -l$libname" + return "-L$(shell_escape(private_libDir())) -Wl,-rpath,$(shell_escape(libDir())) -Wl,-rpath,$(shell_escape(private_libDir())) -l$libname" else return "-l$libname -lopenlibm" end diff --git a/contrib/juliac-buildscript.jl b/contrib/juliac-buildscript.jl new file mode 100644 index 0000000000000..50f96198c416b --- /dev/null +++ b/contrib/juliac-buildscript.jl @@ -0,0 +1,277 @@ +# Script to run in the process that generates juliac's object file output + +inputfile = ARGS[1] +output_type = ARGS[2] +add_ccallables = ARGS[3] == "true" + +# Initialize some things not usually initialized when output is requested +Sys.__init__() +Base.init_depot_path() +Base.init_load_path() +Base.init_active_project() +task = current_task() +task.rngState0 = 0x5156087469e170ab +task.rngState1 = 0x7431eaead385992c +task.rngState2 = 0x503e1d32781c2608 +task.rngState3 = 0x3a77f7189200c20b +task.rngState4 = 0x5502376d099035ae +uuid_tuple = (UInt64(0), UInt64(0)) +ccall(:jl_set_module_uuid, Cvoid, (Any, NTuple{2, UInt64}), Base.__toplevel__, uuid_tuple) +ccall(:jl_set_newly_inferred, Cvoid, (Any,), Core.Compiler.newly_inferred) + +# Patch methods in Core and Base + +@eval Core begin + DomainError(@nospecialize(val), @nospecialize(msg::AbstractString)) = (@noinline; $(Expr(:new, :DomainError, :val, :msg))) +end + +(f::Base.RedirectStdStream)(io::Core.CoreSTDOUT) = Base._redirect_io_global(io, f.unix_fd) + +@eval Base begin + _assert_tostring(msg) = "" + reinit_stdio() = nothing + JuliaSyntax.enable_in_core!() = nothing + init_active_project() = ACTIVE_PROJECT[] = nothing + set_active_project(projfile::Union{AbstractString,Nothing}) = ACTIVE_PROJECT[] = projfile + disable_library_threading() = nothing + start_profile_listener() = nothing + @inline function invokelatest(f::F, args...; kwargs...) where F + return f(args...; kwargs...) + end + function sprint(f::F, args::Vararg{Any,N}; context=nothing, sizehint::Integer=0) where {F<:Function,N} + s = IOBuffer(sizehint=sizehint) + if context isa Tuple + f(IOContext(s, context...), args...) + elseif context !== nothing + f(IOContext(s, context), args...) + else + f(s, args...) + end + String(_unsafe_take!(s)) + end + function show_typeish(io::IO, @nospecialize(T)) + if T isa Type + show(io, T) + elseif T isa TypeVar + print(io, (T::TypeVar).name) + else + print(io, "?") + end + end + function show(io::IO, T::Type) + if T isa DataType + print(io, T.name.name) + if T !== T.name.wrapper && length(T.parameters) > 0 + print(io, "{") + first = true + for p in T.parameters + if !first + print(io, ", ") + end + first = false + if p isa Int + show(io, p) + elseif p isa Type + show(io, p) + elseif p isa Symbol + print(io, ":") + print(io, p) + elseif p isa TypeVar + print(io, p.name) + else + print(io, "?") + end + end + print(io, "}") + end + elseif T isa Union + print(io, "Union{") + show_typeish(io, T.a) + print(io, ", ") + show_typeish(io, T.b) + print(io, "}") + elseif T isa UnionAll + print(io, T.body::Type) + print(io, " where ") + print(io, T.var.name) + end + end + show_type_name(io::IO, tn::Core.TypeName) = print(io, tn.name) + + mapreduce(f::F, op::F2, A::AbstractArrayOrBroadcasted; dims=:, init=_InitialValue()) where {F, F2} = + _mapreduce_dim(f, op, init, A, dims) + mapreduce(f::F, op::F2, A::AbstractArrayOrBroadcasted...; kw...) where {F, F2} = + reduce(op, map(f, A...); kw...) + + _mapreduce_dim(f::F, op::F2, nt, A::AbstractArrayOrBroadcasted, ::Colon) where {F, F2} = + mapfoldl_impl(f, op, nt, A) + + _mapreduce_dim(f::F, op::F2, ::_InitialValue, A::AbstractArrayOrBroadcasted, ::Colon) where {F, F2} = + _mapreduce(f, op, IndexStyle(A), A) + + _mapreduce_dim(f::F, op::F2, nt, A::AbstractArrayOrBroadcasted, dims) where {F, F2} = + mapreducedim!(f, op, reducedim_initarray(A, dims, nt), A) + + _mapreduce_dim(f::F, op::F2, ::_InitialValue, A::AbstractArrayOrBroadcasted, dims) where {F,F2} = + mapreducedim!(f, op, reducedim_init(f, op, A, dims), A) + + mapreduce_empty_iter(f::F, op::F2, itr, ItrEltype) where {F, F2} = + reduce_empty_iter(MappingRF(f, op), itr, ItrEltype) + mapreduce_first(f::F, op::F2, x) where {F,F2} = reduce_first(op, f(x)) + + _mapreduce(f::F, op::F2, A::AbstractArrayOrBroadcasted) where {F,F2} = _mapreduce(f, op, IndexStyle(A), A) + mapreduce_empty(::typeof(identity), op::F, T) where {F} = reduce_empty(op, T) + mapreduce_empty(::typeof(abs), op::F, T) where {F} = abs(reduce_empty(op, T)) + mapreduce_empty(::typeof(abs2), op::F, T) where {F} = abs2(reduce_empty(op, T)) +end +@eval Base.Unicode begin + function utf8proc_map(str::Union{String,SubString{String}}, options::Integer, chartransform::F = identity) where F + nwords = utf8proc_decompose(str, options, C_NULL, 0, chartransform) + buffer = Base.StringVector(nwords*4) + nwords = utf8proc_decompose(str, options, buffer, nwords, chartransform) + nbytes = ccall(:utf8proc_reencode, Int, (Ptr{UInt8}, Int, Cint), buffer, nwords, options) + nbytes < 0 && utf8proc_error(nbytes) + return String(resize!(buffer, nbytes)) + end +end +@eval Base.GMP begin + function __init__() + try + ccall((:__gmp_set_memory_functions, libgmp), Cvoid, + (Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid}), + cglobal(:jl_gc_counted_malloc), + cglobal(:jl_gc_counted_realloc_with_old_size), + cglobal(:jl_gc_counted_free_with_size)) + ZERO.alloc, ZERO.size, ZERO.d = 0, 0, C_NULL + ONE.alloc, ONE.size, ONE.d = 1, 1, pointer(_ONE) + catch ex + Base.showerror_nostdio(ex, "WARNING: Error during initialization of module GMP") + end + # This only works with a patched version of GMP, ignore otherwise + try + ccall((:__gmp_set_alloc_overflow_function, libgmp), Cvoid, + (Ptr{Cvoid},), + cglobal(:jl_throw_out_of_memory_error)) + ALLOC_OVERFLOW_FUNCTION[] = true + catch ex + # ErrorException("ccall: could not find function...") + if typeof(ex) != ErrorException + rethrow() + end + end + end +end +@eval Base.Sort begin + issorted(itr; + lt::T=isless, by::F=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward) where {T,F} = + issorted(itr, ord(lt,by,rev,order)) +end +@eval Base.TOML begin + function try_return_datetime(p, year, month, day, h, m, s, ms) + return DateTime(year, month, day, h, m, s, ms) + end + function try_return_date(p, year, month, day) + return Date(year, month, day) + end + function parse_local_time(l::Parser) + h = @try parse_int(l, false) + h in 0:23 || return ParserError(ErrParsingDateTime) + _, m, s, ms = @try _parse_local_time(l, true) + # TODO: Could potentially parse greater accuracy for the + # fractional seconds here. + return try_return_time(l, h, m, s, ms) + end + function try_return_time(p, h, m, s, ms) + return Time(h, m, s, ms) + end +end + +# Load user code + +import Base.Experimental.entrypoint + +let mod = Base.include(Base.__toplevel__, inputfile) + if !isa(mod, Module) + mod = Main + end + if output_type == "--output-exe" && isdefined(mod, :main) && !add_ccallables + entrypoint(mod.main, ()) + end + #entrypoint(join, (Base.GenericIOBuffer{Memory{UInt8}}, Array{Base.SubString{String}, 1}, String)) + #entrypoint(join, (Base.GenericIOBuffer{Memory{UInt8}}, Array{String, 1}, Char)) + entrypoint(Base.task_done_hook, (Task,)) + entrypoint(Base.wait, ()) + entrypoint(Base.trypoptask, (Base.StickyWorkqueue,)) + entrypoint(Base.checktaskempty, ()) + if add_ccallables + ccall(:jl_add_ccallable_entrypoints, Cvoid, ()) + end +end + +# Additional method patches depending on whether user code loads certain stdlibs + +let loaded = Symbol.(Base.loaded_modules_array()) # TODO better way to do this + if :SparseArrays in loaded + using SparseArrays + @eval SparseArrays.CHOLMOD begin + function __init__() + ccall((:SuiteSparse_config_malloc_func_set, :libsuitesparseconfig), + Cvoid, (Ptr{Cvoid},), cglobal(:jl_malloc, Ptr{Cvoid})) + ccall((:SuiteSparse_config_calloc_func_set, :libsuitesparseconfig), + Cvoid, (Ptr{Cvoid},), cglobal(:jl_calloc, Ptr{Cvoid})) + ccall((:SuiteSparse_config_realloc_func_set, :libsuitesparseconfig), + Cvoid, (Ptr{Cvoid},), cglobal(:jl_realloc, Ptr{Cvoid})) + ccall((:SuiteSparse_config_free_func_set, :libsuitesparseconfig), + Cvoid, (Ptr{Cvoid},), cglobal(:jl_free, Ptr{Cvoid})) + end + end + end + if :Artifacts in loaded + using Artifacts + @eval Artifacts begin + function _artifact_str(__module__, artifacts_toml, name, path_tail, artifact_dict, hash, platform, _::Val{lazyartifacts}) where lazyartifacts + moduleroot = Base.moduleroot(__module__) + if haskey(Base.module_keys, moduleroot) + # Process overrides for this UUID, if we know what it is + process_overrides(artifact_dict, Base.module_keys[moduleroot].uuid) + end + + # If the artifact exists, we're in the happy path and we can immediately + # return the path to the artifact: + dirs = artifact_paths(hash; honor_overrides=true) + for dir in dirs + if isdir(dir) + return jointail(dir, path_tail) + end + end + end + end + end + if :Pkg in loaded + using Pkg + @eval Pkg begin + __init__() = rand() #TODO, methods that do nothing don't get codegened + end + end + if :StyledStrings in loaded + using StyledStrings + @eval StyledStrings begin + __init__() = rand() + end + end +end + +empty!(Core.ARGS) +empty!(Base.ARGS) +empty!(LOAD_PATH) +empty!(DEPOT_PATH) +empty!(Base.TOML_CACHE.d) +Base.TOML.reinit!(Base.TOML_CACHE.p, "") +Base.ACTIVE_PROJECT[] = nothing +@eval Base begin + PROGRAM_FILE = "" +end +@eval Sys begin + BINDIR = "" + STDLIB = "" +end diff --git a/contrib/juliac.jl b/contrib/juliac.jl new file mode 100644 index 0000000000000..61e0e91958667 --- /dev/null +++ b/contrib/juliac.jl @@ -0,0 +1,110 @@ +# Julia compiler wrapper script +# NOTE: The interface and location of this script are considered unstable/experimental + +cmd = Base.julia_cmd() +cmd = `$cmd --startup-file=no --history-file=no` +output_type = nothing # exe, sharedlib, sysimage +trim = nothing +outname = nothing +file = nothing +add_ccallables = false + +help = findfirst(x->x == "--help", ARGS) +if help !== nothing + println( + """ + Usage: julia juliac.jl [--output-exe | --output-lib | --output-sysimage] [options] + --trim= Only output code statically determined to be reachable + --compile-ccallable Include all methods marked `@ccallable` in output + --verbose Request verbose output + """) + exit(0) +end + +let i = 1 + while i <= length(ARGS) + arg = ARGS[i] + if arg == "--output-exe" || arg == "--output-lib" || arg == "--output-sysimage" + isnothing(output_type) || error("Multiple output types specified") + global output_type = arg + i == length(ARGS) && error("Output specifier requires an argument") + global outname = ARGS[i+1] + i += 1 + elseif startswith(arg, "--trim") + arg = split(arg, '=') + if length(arg) == 1 + global trim = "safe" + else + global trim = arg[2] + end + elseif arg == "--compile-ccallable" + global add_ccallables = true + else + if arg[1] == '-' || !isnothing(file) + println("Unexpected argument `$arg`") + exit(1) + end + global file = arg + end + i += 1 + end +end + +isnothing(outname) && error("No output file specified") +isnothing(file) && error("No input file specified") + +absfile = abspath(file) +cflags = readchomp(`$(cmd) $(joinpath(Sys.BINDIR, Base.DATAROOTDIR,"julia", "julia-config.jl")) --cflags `) +cflags = Base.shell_split(cflags) +allflags = readchomp(`$(cmd) $(joinpath(Sys.BINDIR, Base.DATAROOTDIR,"julia", "julia-config.jl")) --allflags`) +allflags = Base.shell_split(allflags) +tmpdir = mktempdir(cleanup=false) +initsrc_path = joinpath(tmpdir, "init.c") +init_path = joinpath(tmpdir, "init.a") +img_path = joinpath(tmpdir, "img.a") +bc_path = joinpath(tmpdir, "img-bc.a") + +open(initsrc_path, "w") do io + print(io, """ + #include + __attribute__((constructor)) void static_init(void) { + if (jl_is_initialized()) + return; + julia_init(JL_IMAGE_IN_MEMORY); + jl_exception_clear(); + } + """) +end + +static_call_graph_arg() = isnothing(trim) ? `` : `--trim=$(trim)` +is_verbose() = verbose ? `--verbose-compilation=yes` : `` +cmd = addenv(`$cmd --project=$(Base.active_project()) --output-o $img_path --output-incremental=no --strip-ir --strip-metadata $(static_call_graph_arg()) $(joinpath(@__DIR__,"juliac-buildscript.jl")) $absfile $output_type $add_ccallables`, "OPENBLAS_NUM_THREADS" => 1, "JULIA_NUM_THREADS" => 1) + +if !success(pipeline(cmd; stdout, stderr)) + println(stderr, "\nFailed to compile $file") + exit(1) +end + +run(`cc $(cflags) -g -c -o $init_path $initsrc_path`) + +if output_type == "--output-lib" || output_type == "--output-sysimage" + of, ext = splitext(outname) + soext = "." * Base.BinaryPlatforms.platform_dlext() + if ext == "" + outname = of * soext + end +end + +julia_libs = Base.shell_split(Base.isdebugbuild() ? "-ljulia-debug -ljulia-internal-debug" : "-ljulia -ljulia-internal") +try + if output_type == "--output-lib" + run(`cc $(allflags) -o $outname -shared -Wl,$(Base.Linking.WHOLE_ARCHIVE) $img_path -Wl,$(Base.Linking.NO_WHOLE_ARCHIVE) $init_path $(julia_libs)`) + elseif output_type == "--output-sysimage" + run(`cc $(allflags) -o $outname -shared -Wl,$(Base.Linking.WHOLE_ARCHIVE) $img_path -Wl,$(Base.Linking.NO_WHOLE_ARCHIVE) $(julia_libs)`) + else + run(`cc $(allflags) -o $outname -Wl,$(Base.Linking.WHOLE_ARCHIVE) $img_path -Wl,$(Base.Linking.NO_WHOLE_ARCHIVE) $init_path $(julia_libs)`) + end +catch + println("\nCompilation failed.") + exit(1) +end diff --git a/doc/src/devdocs/sysimg.md b/doc/src/devdocs/sysimg.md index 7d4f7afdbb86a..64c309e1fb02a 100644 --- a/doc/src/devdocs/sysimg.md +++ b/doc/src/devdocs/sysimg.md @@ -117,3 +117,80 @@ See code comments for each components for more implementation details. depending on the ISA. The target selection will prefer exact CPU name match, larger vector register size, and larger number of features. An overview of this process is in `src/processor.cpp`. + +## Trimming + +System images are typically quite large, since Base includes a lot of functionality, and by +default system images also include several packages such as LinearAlgebra for convenience +and backwards compatibility. Most programs will use only a fraction of the functions in +these packages. Therefore it makes sense to build binaries that exclude unused functions +to save space, referred to as "trimming". + +While the basic idea of trimming is sound, Julia has dynamic and reflective features that make it +difficult (or impossible) to know in general which functions are unused. As an extreme example, +consider code like + +``` +getglobal(Base, Symbol(readchomp(stdin)))(1) +``` + +This code reads a function name from `stdin` and calls the named function from Base on the value +`1`. In this case it is impossible to predict which function will be called, so no functions +can reliably be considered "unused". With some noteworthy exceptions (Julia's own REPL being +one of them), most real-world programs do not do things like this. + +Less extreme cases occur, for example, when there are type instabilities that make it impossible +for the compiler to predict which method will be called. However, if code is well-typed and does +not use reflection, a complete and (hopefully) relatively small set of needed methods can be +determined, and the rest can be removed. The `--trim` command-line option requests this kind of +compilation. + +When `--trim` is specified in a command used to build a system image, the compiler begins +tracing calls starting at methods marked using `Base.Experimental.entrypoint`. If a call is too +dynamic to reasonably narrow down the possible call targets, an error is given at compile +time showing the location of the call. For testing purposes, it is possible to skip these +errors by specifying `--trim=unsafe` or `--trim=unsafe-warn`. Then you will get a system +image built, but it may crash at run time if needed code is not present. + +It typically makes sense to specify `--strip-ir` along with `--trim`, since trimmed binaries +are fully compiled and therefore don't need Julia IR. At some point we may make `--trim` imply +`--strip-ir`, but for now we have kept them orthogonal. + +To get the smallest possible binary, it will also help to specify `--strip-metadata` and +run the Unix `strip` utility. However, those steps remove Julia-specific and native (DWARF format) +debug info, respectively, and so will make debugging more difficult. + +### Common problems + +- The Base global variables `stdin`, `stdout`, and `stderr` are non-constant and so their + types are not known. All printing should use a specific IO object with a known type. + The easiest substitution is to use `print(Core.stdout, x)` instead of `print(x)` or + `print(stdout, x)`. +- Use tools like `JET`, `Cthulhu`, and/or `SnoopCompile` to identify failures of type-inference, and + follow our [Performance Tips](@ref) to fix them. + +### Compatibility concerns + +We have identified many small changes to Base that significantly increase the set of programs +that can be reliably trimmed. Unfortunately some of those changes would be considered breaking, +and so are only applied when trimming is requested (this is done by an external build script, +currently maintained inside the test suite as `test/trimming/buildscript.jl`). +Therefore in many cases trimming will require you to opt in to new variants of Base and some +standard libraries. + +If you want to use trimming, it is important to set up continuous integration testing that +performs a trimmed build and fully tests the resulting program. +Fortunately, if your program successfully compiles with `--trim` then it is very likely to work +the same as it did before. However, CI is needed to ensure that your program continues to build +with trimming as you develop it. + +Package authors may wish to test that their package is "trimming safe", however this is impossible +in general. Trimming is only expected to work given concrete entry points such as `main()` and +library entry points meant to be called from outside Julia. For generic packages, existing tests +for type stability like `@inferred` and `JET` are about as close as you can get to checking +trim compatibility. + +Trimming also introduces new compatibility issues between minor versions of Julia. At this time, +we are not able to guarantee that a program that can be trimmed in one version of Julia +can also be trimmed in all future versions of Julia. However, breakage of that kind is expected +to be rare. We also plan to try to *increase* the set of programs that can be trimmed over time. diff --git a/doc/src/manual/command-line-interface.md b/doc/src/manual/command-line-interface.md index 41c3eacd61d26..ef20e51ea6e4e 100644 --- a/doc/src/manual/command-line-interface.md +++ b/doc/src/manual/command-line-interface.md @@ -218,7 +218,7 @@ The following is a complete list of command-line switches available when launchi |`--trace-compile-timing` |If --trace-compile is enabled show how long each took to compile in ms| |`--image-codegen` |Force generate code in imaging mode| |`--permalloc-pkgimg={yes\|no*}` |Copy the data section of package images into memory| - +|`--trim={no*|safe|unsafe|unsafe-warn}` |Build a sysimage including only code provably reachable from methods marked by calling `entrypoint`. The three non-default options differ in how they handle dynamic call sites. In safe mode, such sites result in compile-time errors. In unsafe mode, such sites are allowed but the resulting binary might be missing needed code and can throw runtime errors. With unsafe-warn, such sites will trigger warnings at compile-time and might error at runtime.| !!! compat "Julia 1.1" In Julia 1.0, the default `--project=@.` option did not search up from the root diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index b4c8ef6095a55..c2f112f9c9d5c 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -95,6 +95,17 @@ void jl_get_function_id_impl(void *native_code, jl_code_instance_t *codeinst, } } +extern "C" JL_DLLEXPORT_CODEGEN +void jl_get_llvm_mis_impl(void *native_code, arraylist_t* MIs) +{ + jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code; + auto map = data->jl_fvar_map; + for (auto &ci : map) { + jl_method_instance_t *mi = ci.first->def; + arraylist_push(MIs, mi); + } +} + extern "C" JL_DLLEXPORT_CODEGEN void jl_get_llvm_gvs_impl(void *native_code, arraylist_t *gvs) { @@ -284,6 +295,7 @@ jl_code_instance_t *jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_ jl_value_t *ci = cgparams.lookup(mi, world, world); JL_GC_PROMISE_ROOTED(ci); jl_code_instance_t *codeinst = NULL; + JL_GC_PUSH1(&codeinst); if (ci != jl_nothing && jl_atomic_load_relaxed(&((jl_code_instance_t *)ci)->inferred) != jl_nothing) { codeinst = (jl_code_instance_t*)ci; } @@ -301,9 +313,11 @@ jl_code_instance_t *jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_ jl_mi_cache_insert(mi, codeinst); } } + JL_GC_POP(); return codeinst; } +arraylist_t new_invokes; // takes the running content that has collected in the shadow module and dump it to disk // this builds the object file portion of the sysimage files for fast startup, and can // also be used be extern consumers like GPUCompiler.jl to obtain a module containing @@ -353,8 +367,12 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm params.imaging_mode = imaging; params.debug_level = cgparams->debug_info_level; params.external_linkage = _external_linkage; + arraylist_new(&new_invokes, 0); size_t compile_for[] = { jl_typeinf_world, _world }; - for (int worlds = 0; worlds < 2; worlds++) { + int worlds = 0; + if (jl_options.trim != JL_TRIM_NO) + worlds = 1; + for (; worlds < 2; worlds++) { JL_TIMING(NATIVE_AOT, NATIVE_Codegen); size_t this_world = compile_for[worlds]; if (!this_world) @@ -373,6 +391,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm continue; } mi = (jl_method_instance_t*)item; +compile_mi: src = NULL; // if this method is generally visible to the current compilation world, // and this is either the primary world, or not applicable in the primary world @@ -380,16 +399,47 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm if (jl_atomic_load_relaxed(&mi->def.method->primary_world) <= this_world && this_world <= jl_atomic_load_relaxed(&mi->def.method->deleted_world)) { // find and prepare the source code to compile jl_code_instance_t *codeinst = jl_ci_cache_lookup(*cgparams, mi, this_world); - if (codeinst && !params.compiled_functions.count(codeinst)) { + if (jl_options.trim != JL_TRIM_NO && !codeinst) { + // If we're building a small image, we need to compile everything + // to ensure that we have all the information we need. + jl_safe_printf("Codegen decided not to compile code root"); + jl_(mi); + abort(); + } + if (codeinst && !params.compiled_functions.count(codeinst) && !data->jl_fvar_map.count(codeinst)) { // now add it to our compilation results - JL_GC_PROMISE_ROOTED(codeinst->rettype); - orc::ThreadSafeModule result_m = jl_create_ts_module(name_from_method_instance(codeinst->def), - params.tsctx, clone.getModuleUnlocked()->getDataLayout(), - Triple(clone.getModuleUnlocked()->getTargetTriple())); - jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, NULL, params); - if (result_m) - params.compiled_functions[codeinst] = {std::move(result_m), std::move(decls)}; + // Const returns do not do codegen, but juliac inspects codegen results so make a dummy fvar entry to represent it + if (jl_options.trim != JL_TRIM_NO && jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr) { + data->jl_fvar_map[codeinst] = std::make_tuple((uint32_t)-3, (uint32_t)-3); + } else { + JL_GC_PROMISE_ROOTED(codeinst->rettype); + orc::ThreadSafeModule result_m = jl_create_ts_module(name_from_method_instance(codeinst->def), + params.tsctx, clone.getModuleUnlocked()->getDataLayout(), + Triple(clone.getModuleUnlocked()->getTargetTriple())); + jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, NULL, params); + if (result_m) + params.compiled_functions[codeinst] = {std::move(result_m), std::move(decls)}; + else if (jl_options.trim != JL_TRIM_NO) { + // if we're building a small image, we need to compile everything + // to ensure that we have all the information we need. + jl_safe_printf("codegen failed to compile code root"); + jl_(mi); + abort(); + } + } } + } else if (this_world != jl_typeinf_world) { + /* + jl_safe_printf("Codegen could not find requested codeinstance to be compiled\n"); + jl_(mi); + abort(); + */ + } + // TODO: is goto the best way to do this? + jl_compile_workqueue(params, policy); + mi = (jl_method_instance_t*)arraylist_pop(&new_invokes); + if (mi != NULL) { + goto compile_mi; } } @@ -397,6 +447,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm jl_compile_workqueue(params, policy); } JL_GC_POP(); + arraylist_free(&new_invokes); // process the globals array, before jl_merge_module destroys them SmallVector gvars(params.global_targets.size()); diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 7f96bb1047abc..4547e693755cd 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -2336,6 +2336,12 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, ret = emit_invoke(ctx, *modifyop, argv, 3, (jl_value_t*)jl_any_type); } else { + if (trim_may_error(ctx.params->trim)) { + // if we know the return type, we can assume the result is of that type + errs() << "ERROR: Dynamic call to setfield/modifyfield\n"; + errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n"; + print_stacktrace(ctx, ctx.params->trim); + } Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, 3, julia_call); ret = mark_julia_type(ctx, callval, true, jl_any_type); } @@ -4077,6 +4083,12 @@ static jl_cgval_t union_store(jl_codectx_t &ctx, rhs = emit_invoke(ctx, *modifyop, argv, 3, (jl_value_t*)jl_any_type); } else { + if (trim_may_error(ctx.params->trim)) { + // if we know the return type, we can assume the result is of that type + errs() << "ERROR: Dynamic call to setfield/modifyfield\n"; + errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n"; + print_stacktrace(ctx, ctx.params->trim); + } Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, 3, julia_call); rhs = mark_julia_type(ctx, callval, true, jl_any_type); } diff --git a/src/codegen-stubs.c b/src/codegen-stubs.c index 41812d903816c..7ddb68fd6b036 100644 --- a/src/codegen-stubs.c +++ b/src/codegen-stubs.c @@ -15,6 +15,7 @@ JL_DLLEXPORT void jl_dump_native_fallback(void *native_code, ios_t *z, ios_t *s) UNAVAILABLE JL_DLLEXPORT void jl_get_llvm_gvs_fallback(void *native_code, arraylist_t *gvs) UNAVAILABLE JL_DLLEXPORT void jl_get_llvm_external_fns_fallback(void *native_code, arraylist_t *gvs) UNAVAILABLE +JL_DLLEXPORT void jl_get_llvm_mis_fallback(void *native_code, arraylist_t* MIs) UNAVAILABLE JL_DLLEXPORT void jl_extern_c_fallback(jl_function_t *f, jl_value_t *rt, jl_value_t *argt, char *name) UNAVAILABLE JL_DLLEXPORT jl_value_t *jl_dump_method_asm_fallback(jl_method_instance_t *linfo, size_t world, diff --git a/src/codegen.cpp b/src/codegen.cpp index a452e0fccd0c5..a7a985284c87b 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include // target machine computation @@ -1651,31 +1652,23 @@ static const auto &builtin_func_map() { return builtins; } +static const auto &may_dispatch_builtins() { + static std::unordered_set builtins( + {jl_f__apply_iterate_addr, + jl_f__apply_pure_addr, + jl_f__call_in_world_addr, + jl_f__call_in_world_total_addr, + jl_f__call_latest_addr, + }); + return builtins; +} + static const auto jl_new_opaque_closure_jlcall_func = new JuliaFunction<>{XSTR(jl_new_opaque_closure_jlcall), get_func_sig, get_func_attrs}; static _Atomic(uint64_t) globalUniqueGeneratedNames{1}; // --- code generation --- -extern "C" { - jl_cgparams_t jl_default_cgparams = { - /* track_allocations */ 1, - /* code_coverage */ 1, - /* prefer_specsig */ 0, -#ifdef _OS_WINDOWS_ - /* gnu_pubnames */ 0, -#else - /* gnu_pubnames */ 1, -#endif - /* debug_info_kind */ (int) DICompileUnit::DebugEmissionKind::FullDebug, - /* debug_line_info */ 1, - /* safepoint_on_entry */ 1, - /* gcstack_arg */ 1, - /* use_jlplt*/ 1, - /* lookup */ jl_rettype_inferred_addr }; -} - - static MDNode *best_tbaa(jl_tbaacache_t &tbaa_cache, jl_value_t *jt) { jt = jl_unwrap_unionall(jt); if (jt == (jl_value_t*)jl_datatype_type || @@ -1987,7 +1980,7 @@ class jl_codectx_t { size_t max_world = -1; const char *name = NULL; StringRef file{}; - ssize_t *line = NULL; + int32_t line = -1; Value *spvals_ptr = NULL; Value *argArray = NULL; Value *argCount = NULL; @@ -2146,6 +2139,179 @@ static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p); static unsigned julia_alignment(jl_value_t *jt); static void recombine_value(jl_codectx_t &ctx, const jl_cgval_t &x, Value *dst, jl_aliasinfo_t const &dst_ai, Align alignment, bool isVolatile); +static void print_stack_crumbs(jl_codectx_t &ctx) +{ + errs() << "\n"; + errs() << "Stacktrace:\n"; + jl_method_instance_t *caller = ctx.linfo; + jl_((jl_value_t*)caller); + errs() << "In " << ctx.file << ":" << ctx.line << "\n"; + while (true) { + auto it = ctx.emission_context.enqueuers.find(caller); + if (it != ctx.emission_context.enqueuers.end()) { + caller = std::get(it->second); + } else { + break; + } + if (caller) { + if (jl_is_method_instance(caller)) { + for (auto it2 = std::get(it->second).begin(); it2 != (std::prev(std::get(it->second).end())); ++it2) { + auto frame = *it2; + errs() << std::get<0>(frame) << " \n"; + errs() << "In " << std::get<1>(frame) << ":" << std::get(frame) << "\n"; + } + auto &frame = std::get(it->second).front(); + jl_((jl_value_t*)caller); + errs() << "In " << std::get<1>(frame) << ":" << std::get(frame) << "\n"; + } + } + else + break; + } + abort(); +} + +static jl_value_t *StackFrame( + jl_value_t *linfo, + std::string fn_name, + std::string filepath, + int32_t lineno, + jl_value_t *inlined) +{ + jl_value_t *StackFrame = jl_get_global(jl_base_module, jl_symbol("StackFrame")); + assert(StackFrame != nullptr); + + jl_value_t *args[7] = { + /* func */ (jl_value_t *)jl_symbol(fn_name.c_str()), + /* line */ (jl_value_t *)jl_symbol(filepath.c_str()), + /* line */ jl_box_int32(lineno), + /* linfo */ (jl_value_t *)linfo, + /* from_c */ jl_false, + /* inlined */ inlined, + /* pointer */ jl_box_uint64(0) + }; + + jl_value_t *frame = nullptr; + JL_TRY { + frame = jl_apply_generic(StackFrame, args, 7); + } JL_CATCH { + jl_safe_printf("Error creating stack frame\n"); + } + return frame; +} + +static void push_frames(jl_codectx_t &ctx, jl_method_instance_t *caller, jl_method_instance_t *callee, int no_debug=false) +{ + CallFrames frames; + auto it = ctx.emission_context.enqueuers.find(callee); + if (it != ctx.emission_context.enqueuers.end()) + return; + if (no_debug) { // Used in tojlinvoke + frames.push_back({ctx.funcName, "", 0}); + ctx.emission_context.enqueuers.insert({callee, {caller, std::move(frames)}}); + return; + } + auto DL = ctx.builder.getCurrentDebugLocation(); + auto filename = std::string(DL->getFilename()); + auto line = DL->getLine(); + auto fname = std::string(DL->getScope()->getSubprogram()->getName()); + frames.push_back({fname, filename, line}); + auto DI = DL.getInlinedAt(); + while (DI) { + auto filename = std::string(DI->getFilename()); + auto line = DI->getLine(); + auto fname = std::string(DI->getScope()->getSubprogram()->getName()); + frames.push_back({fname, filename, line}); + DI = DI->getInlinedAt(); + } + ctx.emission_context.enqueuers.insert({callee, {caller, std::move(frames)}}); +} + +static jl_array_t* build_stack_crumbs(jl_codectx_t &ctx) JL_NOTSAFEPOINT +{ + static intptr_t counter = 5; + jl_method_instance_t *caller = (jl_method_instance_t*)counter; //nothing serves as a sentinel for the bottom for the stack + push_frames(ctx, ctx.linfo, (jl_method_instance_t*)caller); + counter++; + jl_array_t *out = jl_alloc_array_1d(jl_array_any_type, 0); + JL_GC_PUSH1(&out); + while (true) { + auto it = ctx.emission_context.enqueuers.find(caller); + if (it != ctx.emission_context.enqueuers.end()) { + caller = std::get(it->second); + } else { + break; + } + if (caller) { + assert(ctx.emission_context.enqueuers.count(caller) == 1); + if (jl_is_method_instance(caller)) { + //TODO: Use a subrange when C++20 is a thing + for (auto it2 = std::get(it->second).begin(); it2 != (std::prev(std::get(it->second).end())); ++it2) { + auto frame = *it2; + jl_value_t *stackframe = StackFrame(jl_nothing, std::get<0>(frame), std::get<1>(frame), std::get(frame), jl_true); + if (stackframe == nullptr) + print_stack_crumbs(ctx); + jl_array_ptr_1d_push(out, stackframe); + } + auto &frame = std::get(it->second).back(); + jl_value_t *stackframe = StackFrame((jl_value_t *)caller, std::get<0>(frame), std::get<1>(frame), std::get(frame), jl_false); + if (stackframe == nullptr) + print_stack_crumbs(ctx); + jl_array_ptr_1d_push(out, stackframe); + } + } + else + break; + } + JL_GC_POP(); + return out; +} + +static void print_stacktrace(jl_codectx_t &ctx, int trim) +{ + jl_task_t *ct = jl_get_current_task(); + assert(ct); + + // Temporarily operate in the current age + size_t last_age = ct->world_age; + ct->world_age = jl_get_world_counter(); + jl_array_t* bt = build_stack_crumbs(ctx); + JL_GC_PUSH1(&bt); + + // Call `reinit_stdio` to get TTY IO objects (w/ color) + jl_value_t *reinit_stdio = jl_get_global(jl_base_module, jl_symbol("_reinit_stdio")); + assert(reinit_stdio); + jl_apply_generic(reinit_stdio, nullptr, 0); + + // Show the backtrace + jl_value_t *show_backtrace = jl_get_global(jl_base_module, jl_symbol("show_backtrace")); + jl_value_t *base_stderr = jl_get_global(jl_base_module, jl_symbol("stderr")); + assert(show_backtrace && base_stderr); + + JL_TRY { + jl_value_t *args[2] = { base_stderr, (jl_value_t *)bt }; + jl_apply_generic(show_backtrace, args, 2); + } JL_CATCH { + jl_printf(JL_STDERR,"Error showing backtrace\n"); + print_stack_crumbs(ctx); + } + + jl_printf(JL_STDERR, "\n\n"); + JL_GC_POP(); + ct->world_age = last_age; + + if (trim == JL_TRIM_SAFE) { + jl_printf(JL_STDERR,"Aborting compilation due to finding a dynamic dispatch"); + exit(1); + } + return; +} + +static int trim_may_error(int trim) +{ + return (trim == JL_TRIM_SAFE) || (trim == JL_TRIM_UNSAFE_WARN); +} + static GlobalVariable *prepare_global_in(Module *M, JuliaVariable *G) { return G->realize(M); @@ -4281,6 +4447,12 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, Value *theArgs = emit_ptrgep(ctx, ctx.argArray, ctx.nReqArgs * sizeof(jl_value_t*)); Value *r = ctx.builder.CreateCall(prepare_call(jlapplygeneric_func), { theF, theArgs, nva }); *ret = mark_julia_type(ctx, r, true, jl_any_type); + if (trim_may_error(ctx.params->trim)) { + // if we know the return type, we can assume the result is of that type + errs() << "ERROR: Dynamic call to Core._apply_iterate detected\n"; + errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n"; + print_stacktrace(ctx, ctx.params->trim); + } return true; } } @@ -5388,12 +5560,25 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, ArrayR if (need_to_emit) { Function *trampoline_decl = cast(jl_Module->getNamedValue(protoname)); ctx.call_targets[codeinst] = {cc, return_roots, trampoline_decl, specsig}; + if (trim_may_error(ctx.params->trim)) + push_frames(ctx, ctx.linfo, mi); } } } } } if (!handled) { + if (trim_may_error(ctx.params->trim)) { + if (lival.constant) { + arraylist_push(&new_invokes, lival.constant); + push_frames(ctx, ctx.linfo, (jl_method_instance_t*)lival.constant); + } else { + errs() << "Dynamic call to unknown function"; + errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n"; + + print_stacktrace(ctx, ctx.params->trim); + } + } Value *r = emit_jlcall(ctx, jlinvoke_func, boxed(ctx, lival), argv, nargs, julia_call2); result = mark_julia_type(ctx, r, true, rt); } @@ -5453,7 +5638,12 @@ static jl_cgval_t emit_invoke_modify(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_ return mark_julia_type(ctx, oldnew, true, rt); } } - + if (trim_may_error(ctx.params->trim)) { + errs() << "ERROR: dynamic invoke modify call to"; + jl_(args[0]); + errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n"; + print_stacktrace(ctx, ctx.params->trim); + } // emit function and arguments Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, nargs, julia_call); return mark_julia_type(ctx, callval, true, rt); @@ -5523,10 +5713,15 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bo bool handled = emit_builtin_call(ctx, &result, f.constant, argv, nargs - 1, rt, ex, is_promotable); if (handled) return result; - + jl_fptr_args_t builtin_fptr = jl_get_builtin_fptr((jl_datatype_t*)jl_typeof(f.constant)); // special case for some known builtin not handled by emit_builtin_call - auto it = builtin_func_map().find(jl_get_builtin_fptr((jl_datatype_t*)jl_typeof(f.constant))); + auto it = builtin_func_map().find(builtin_fptr); if (it != builtin_func_map().end()) { + if (trim_may_error(ctx.params->trim) && may_dispatch_builtins().count(builtin_fptr)) { + errs() << "ERROR: Dynamic call to builtin" << jl_symbol_name(((jl_datatype_t*)jl_typeof(f.constant))->name->name); + errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n"; + print_stacktrace(ctx, ctx.params->trim); + } Value *ret = emit_jlcall(ctx, it->second, Constant::getNullValue(ctx.types().T_prjlvalue), ArrayRef(argv).drop_front(), nargs - 1, julia_call); setName(ctx.emission_context, ret, it->second->name + "_ret"); return mark_julia_type(ctx, ret, true, rt); @@ -5565,7 +5760,79 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bo } } } + int failed_dispatch = !argv[0].constant; + if (ctx.params->trim != JL_TRIM_NO) { + size_t min_valid = 1; + size_t max_valid = ~(size_t)0; + size_t latest_world = jl_get_world_counter(); // TODO: marshal the world age of the compilation here. + + // Find all methods matching the call signature + jl_array_t *matches = NULL; + jl_value_t *tup = NULL; + JL_GC_PUSH2(&tup, &matches); + if (!failed_dispatch) { + SmallVector argtypes; + for (auto& arg: argv) + argtypes.push_back(arg.typ); + tup = jl_apply_tuple_type_v(argtypes.data(), argtypes.size()); + matches = (jl_array_t*)jl_matching_methods((jl_tupletype_t*)tup, jl_nothing, 10 /*TODO: make global*/, 1, + latest_world, &min_valid, &max_valid, NULL); + if ((jl_value_t*)matches == jl_nothing) + failed_dispatch = 1; + } + + // Expand each matching method to its unique specialization, if it has exactly one + if (!failed_dispatch) { + size_t k; + size_t len = new_invokes.len; + for (k = 0; k < jl_array_nrows(matches); k++) { + jl_method_match_t *match = (jl_method_match_t *)jl_array_ptr_ref(matches, k); + jl_method_instance_t *mi = jl_method_match_to_mi(match, latest_world, min_valid, max_valid, 0); + if (!mi) { + if (jl_array_nrows(matches) == 1) { + // if the method match is not compileable, but there is only one, fall back to + // unspecialized implementation + mi = jl_get_unspecialized(match->method); + } + else { + new_invokes.len = len; + failed_dispatch = 1; + break; + } + } + arraylist_push(&new_invokes, mi); + } + } + JL_GC_POP(); + } + if (failed_dispatch && trim_may_error(ctx.params->trim)) { + errs() << "Dynamic call to "; + jl_jmp_buf *old_buf = jl_get_safe_restore(); + jl_jmp_buf buf; + jl_set_safe_restore(&buf); + if (!jl_setjmp(buf, 0)) { + jl_static_show((JL_STREAM*)STDERR_FILENO, (jl_value_t*)args[0]); + jl_printf((JL_STREAM*)STDERR_FILENO,"("); + for (size_t i = 1; i < nargs; ++i) { + jl_value_t *typ = argv[i].typ; + if (!jl_is_concrete_type(typ)) // Print type in red + jl_printf((JL_STREAM*)STDERR_FILENO, "\x1b[31m"); + jl_static_show((JL_STREAM*)STDERR_FILENO, (jl_value_t*)argv[i].typ); + if (!jl_is_concrete_type(typ)) + jl_printf((JL_STREAM*)STDERR_FILENO, "\x1b[0m"); + if (i != nargs-1) + jl_printf((JL_STREAM*)STDERR_FILENO,", "); + } + jl_printf((JL_STREAM*)STDERR_FILENO,")\n"); + } + else { + jl_printf((JL_STREAM*)STDERR_FILENO, "\n!!! ERROR while printing error -- ABORTING !!!\n"); + } + jl_set_safe_restore(old_buf); + errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n"; + print_stacktrace(ctx, ctx.params->trim); + } // emit function and arguments Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, n_generic_args, julia_call); return mark_julia_type(ctx, callval, true, rt); @@ -6710,6 +6977,13 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_ ((jl_method_t*)source.constant)->nargs > 0 && jl_is_valid_oc_argtype((jl_tupletype_t*)argt.constant, (jl_method_t*)source.constant); + if (!can_optimize && trim_may_error(ctx.params->trim)) { + // if we know the return type, we can assume the result is of that type + errs() << "ERROR: Dynamic call to OpaqueClosure method\n"; + errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n"; + print_stacktrace(ctx, ctx.params->trim); + } + if (can_optimize) { jl_value_t *closure_t = NULL; jl_value_t *env_t = NULL; @@ -6909,6 +7183,11 @@ static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptr GlobalVariable::InternalLinkage, name, M); jl_init_function(f, params.TargetTriple); + if (trim_may_error(params.params->trim)) { + arraylist_push(&new_invokes, codeinst->def); // Try t compile this invoke + // TODO: Debuginfo! + push_frames(ctx, ctx.linfo, codeinst->def, 1); + } jl_name_jlfunc_args(params, f); //f->setAlwaysInline(); ctx.f = f; // for jl_Module @@ -8126,6 +8405,7 @@ static jl_llvm_functions_t if (lam && jl_is_method(lam->def.method)) { toplineno = lam->def.method->line; ctx.file = jl_symbol_name(lam->def.method->file); + ctx.line = lam->def.method->line; } else if ((jl_value_t*)src->debuginfo != jl_nothing) { // look for the file and line info of the original start of this block, as reported by lowering @@ -8134,6 +8414,7 @@ static jl_llvm_functions_t debuginfo = debuginfo->linetable; ctx.file = jl_debuginfo_file(debuginfo); struct jl_codeloc_t lineidx = jl_uncompress1_codeloc(debuginfo->codelocs, 0); + ctx.line = lineidx.line; toplineno = std::max((int32_t)0, lineidx.line); } if (ctx.file.empty()) @@ -9904,7 +10185,7 @@ void jl_compile_workqueue( if (it == params.compiled_functions.end()) { // Reinfer the function. The JIT came along and removed the inferred // method body. See #34993 - if (policy != CompilationPolicy::Default && + if ((policy != CompilationPolicy::Default || params.params->trim) && jl_atomic_load_relaxed(&codeinst->inferred) == jl_nothing) { // XXX: SOURCE_MODE_FORCE_SOURCE is wrong here (neither sufficient nor necessary) codeinst = jl_type_infer(codeinst->def, jl_atomic_load_relaxed(&codeinst->max_world), SOURCE_MODE_FORCE_SOURCE); @@ -9935,6 +10216,16 @@ void jl_compile_workqueue( if (proto.specsig) { // expected specsig if (!preal_specsig) { + if (params.params->trim) { + auto it = params.compiled_functions.find(codeinst); //TODO: What to do about this + errs() << "Bailed out to invoke when compiling:"; + jl_(codeinst->def); + if (it != params.compiled_functions.end()) { + errs() << it->second.second.functionObject << "\n"; + errs() << it->second.second.specFunctionObject << "\n"; + } else + errs() << "codeinst not in compile_functions\n"; + } // emit specsig-to-(jl)invoke conversion StringRef invokeName; if (invoke != NULL) @@ -10124,6 +10415,22 @@ int jl_opaque_ptrs_set = 0; extern "C" void jl_init_llvm(void) { + jl_default_cgparams = { + /* track_allocations */ 1, + /* code_coverage */ 1, + /* prefer_specsig */ 0, +#ifdef _OS_WINDOWS_ + /* gnu_pubnames */ 0, +#else + /* gnu_pubnames */ 1, +#endif + /* debug_info_kind */ (int) DICompileUnit::DebugEmissionKind::FullDebug, + /* debug_info_level */ (int) jl_options.debug_level, + /* safepoint_on_entry */ 1, + /* gcstack_arg */ 1, + /* use_jlplt*/ 1, + /* trim */ 0, + /* lookup */ jl_rettype_inferred_addr }; jl_page_size = jl_getpagesize(); jl_default_debug_info_kind = (int) DICompileUnit::DebugEmissionKind::FullDebug; jl_default_cgparams.debug_info_level = (int) jl_options.debug_level; diff --git a/src/gf.c b/src/gf.c index e6f5b4ee007f7..321711c839aa8 100644 --- a/src/gf.c +++ b/src/gf.c @@ -1360,8 +1360,7 @@ static inline jl_typemap_entry_t *lookup_leafcache(jl_genericmemory_t *leafcache } return NULL; } - -static jl_method_instance_t *cache_method( +jl_method_instance_t *cache_method( jl_methtable_t *mt, _Atomic(jl_typemap_t*) *cache, jl_value_t *parent JL_PROPAGATES_ROOT, jl_tupletype_t *tt, // the original tupletype of the signature jl_method_t *definition, @@ -1707,7 +1706,7 @@ static void method_overwrite(jl_typemap_entry_t *newentry, jl_method_t *oldvalue jl_printf(s, ".\n"); jl_uv_flush(s); } - if (jl_generating_output()) { + if (jl_generating_output() && jl_options.incremental) { jl_printf(JL_STDERR, "ERROR: Method overwriting is not permitted during Module precompilation. Use `__precompile__(false)` to opt-out of precompilation.\n"); jl_throw(jl_precompilable_error); } @@ -2411,7 +2410,7 @@ JL_DLLEXPORT jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t na // spvals is any matched static parameter values, m is the Method, // full is a boolean indicating if that method fully covers the input // -// lim is the max # of methods to return. if there are more, returns jl_false. +// lim is the max # of methods to return. if there are more, returns jl_nothing. // Negative values stand for no limit. // Unless lim == -1, remove matches that are unambiguously covered by earlier ones JL_DLLEXPORT jl_value_t *jl_matching_methods(jl_tupletype_t *types, jl_value_t *mt, int lim, int include_ambiguous, @@ -2431,7 +2430,7 @@ JL_DLLEXPORT jl_value_t *jl_matching_methods(jl_tupletype_t *types, jl_value_t * return ml_matches((jl_methtable_t*)mt, types, lim, include_ambiguous, 1, world, 1, min_valid, max_valid, ambig); } -jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROPAGATES_ROOT) +JL_DLLEXPORT jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROPAGATES_ROOT) { // one unspecialized version of a function can be shared among all cached specializations if (!jl_is_method(def) || def->source == NULL) { @@ -2910,7 +2909,7 @@ jl_method_instance_t *jl_normalize_to_compilable_mi(jl_method_instance_t *mi JL_ } // return a MethodInstance for a compileable method_match -jl_method_instance_t *jl_method_match_to_mi(jl_method_match_t *match, size_t world, size_t min_valid, size_t max_valid, int mt_cache) +JL_DLLEXPORT jl_method_instance_t *jl_method_match_to_mi(jl_method_match_t *match, size_t world, size_t min_valid, size_t max_valid, int mt_cache) { jl_method_t *m = match->method; jl_svec_t *env = match->sparams; @@ -3112,6 +3111,21 @@ JL_DLLEXPORT int jl_compile_hint(jl_tupletype_t *types) return 1; } +JL_DLLEXPORT int jl_add_entrypoint(jl_tupletype_t *types) +{ + size_t world = jl_atomic_load_acquire(&jl_world_counter); + size_t min_valid = 0; + size_t max_valid = ~(size_t)0; + jl_method_instance_t *mi = jl_get_compile_hint_specialization(types, world, &min_valid, &max_valid, 1); + if (mi == NULL) + return 0; + JL_GC_PROMISE_ROOTED(mi); + if (jl_generating_output() && jl_options.trim) { + arraylist_push(jl_entrypoint_mis, mi); + } + return 1; +} + // add type of `f` to front of argument tuple type jl_value_t *jl_argtype_with_function(jl_value_t *f, jl_value_t *types0) { diff --git a/src/init.c b/src/init.c index 86c0877b14289..413d4e8055e54 100644 --- a/src/init.c +++ b/src/init.c @@ -44,6 +44,7 @@ extern BOOL (WINAPI *hSymRefreshModuleList)(HANDLE); // list of modules being deserialized with __init__ methods jl_array_t *jl_module_init_order; +arraylist_t *jl_entrypoint_mis; JL_DLLEXPORT size_t jl_page_size; @@ -721,6 +722,7 @@ static void restore_fp_env(void) static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_task_t *ct); JL_DLLEXPORT int jl_default_debug_info_kind; +JL_DLLEXPORT jl_cgparams_t jl_default_cgparams; static void init_global_mutexes(void) { JL_MUTEX_INIT(&jl_modules_mutex, "jl_modules_mutex"); @@ -841,8 +843,10 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_ JL_TIMING(JULIA_INIT, JULIA_INIT); jl_resolve_sysimg_location(rel); // loads sysimg if available, and conditionally sets jl_options.cpu_target - if (rel == JL_IMAGE_IN_MEMORY) + if (rel == JL_IMAGE_IN_MEMORY) { jl_set_sysimg_so(jl_exe_handle); + jl_options.image_file = jl_options.julia_bin; + } else if (jl_options.image_file) jl_preload_sysimg_so(jl_options.image_file); if (jl_options.cpu_target == NULL) @@ -899,6 +903,11 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_ JL_GC_POP(); } + if (jl_options.trim) { + jl_entrypoint_mis = (arraylist_t *)malloc_s(sizeof(arraylist_t)); + arraylist_new(jl_entrypoint_mis, 0); + } + if (jl_options.handle_signals == JL_OPTIONS_HANDLE_SIGNALS_ON) jl_install_sigint_handler(); } diff --git a/src/jitlayers.h b/src/jitlayers.h index 107782e354d4a..93669c2351d88 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -29,7 +29,7 @@ #include "llvm-version.h" #include #include - +#include // As of LLVM 13, there are two runtime JIT linker implementations, the older // RuntimeDyld (used via orc::RTDyldObjectLinkingLayer) and the newer JITLink @@ -65,6 +65,7 @@ using namespace llvm; extern "C" jl_cgparams_t jl_default_cgparams; +extern arraylist_t new_invokes; DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::ThreadSafeContext, LLVMOrcThreadSafeContextRef) DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::ThreadSafeModule, LLVMOrcThreadSafeModuleRef) @@ -211,7 +212,7 @@ struct jl_codegen_call_target_t { typedef SmallVector, 0> jl_workqueue_t; // TODO DenseMap? typedef std::map> jl_compiled_functions_t; - +typedef std::list> CallFrames; struct jl_codegen_params_t { orc::ThreadSafeContext tsctx; orc::ThreadSafeContext::Lock tsctx_lock; @@ -230,6 +231,7 @@ struct jl_codegen_params_t { std::map ditypes; std::map llvmtypes; DenseMap mergedConstants; + llvm::MapVector> enqueuers; // Map from symbol name (in a certain library) to its GV in sysimg and the // DL handle address in the current session. StringMap> libMapGV; diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index 7abf2b055bb8c..0c712ef37cb5b 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -344,6 +344,8 @@ XX(jl_new_typevar) \ XX(jl_next_from_addrinfo) \ XX(jl_normalize_to_compilable_sig) \ + XX(jl_method_match_to_mi) \ + XX(jl_get_unspecialized) \ XX(jl_no_exc_handler) \ XX(jl_object_id) \ XX(jl_object_id_) \ @@ -522,6 +524,7 @@ YY(jl_dump_native) \ YY(jl_get_llvm_gvs) \ YY(jl_get_llvm_external_fns) \ + YY(jl_get_llvm_mis) \ YY(jl_dump_function_asm) \ YY(jl_LLVMCreateDisasm) \ YY(jl_LLVMDisasmInstruction) \ diff --git a/src/jloptions.c b/src/jloptions.c index f63f4de020e26..530d5e2577a9a 100644 --- a/src/jloptions.c +++ b/src/jloptions.c @@ -101,6 +101,7 @@ JL_DLLEXPORT void jl_init_options(void) 0, // permalloc_pkgimg 0, // heap-size-hint 0, // trace_compile_timing + 0, // trim }; jl_options_initialized = 1; } @@ -251,7 +252,7 @@ static const char opts_hidden[] = " --strip-ir Remove IR (intermediate representation) of compiled\n" " functions\n\n" - // compiler debugging (see the devdocs for tips on using these options) + // compiler debugging and experimental (see the devdocs for tips on using these options) " --output-unopt-bc Generate unoptimized LLVM bitcode (.bc)\n" " --output-bc Generate LLVM bitcode (.bc)\n" " --output-asm Generate an assembly file (.s)\n" @@ -265,6 +266,13 @@ static const char opts_hidden[] = " compile in ms\n" " --image-codegen Force generate code in imaging mode\n" " --permalloc-pkgimg={yes|no*} Copy the data section of package images into memory\n" + " --trim={no*|safe|unsafe|unsafe-warn}\n" + " Build a sysimage including only code provably reachable\n" + " from methods marked by calling `entrypoint`. In unsafe\n" + " mode, the resulting binary might be missing needed code\n" + " and can throw errors. With unsafe-warn warnings will be\n" + " printed for dynamic call sites that might lead to such\n" + " errors. In safe mode compile-time errors are given instead.\n" ; JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) @@ -311,7 +319,8 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) opt_strip_ir, opt_heap_size_hint, opt_gc_threads, - opt_permalloc_pkgimg + opt_permalloc_pkgimg, + opt_trim, }; static const char* const shortopts = "+vhqH:e:E:L:J:C:it:p:O:g:m:"; static const struct option longopts[] = { @@ -375,6 +384,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) { "strip-ir", no_argument, 0, opt_strip_ir }, { "permalloc-pkgimg",required_argument, 0, opt_permalloc_pkgimg }, { "heap-size-hint", required_argument, 0, opt_heap_size_hint }, + { "trim", optional_argument, 0, opt_trim }, { 0, 0, 0, 0 } }; @@ -934,6 +944,18 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) else jl_errorf("julia: invalid argument to --permalloc-pkgimg={yes|no} (%s)", optarg); break; + case opt_trim: + if (optarg == NULL || !strcmp(optarg,"safe")) + jl_options.trim = JL_TRIM_SAFE; + else if (!strcmp(optarg,"no")) + jl_options.trim = JL_TRIM_NO; + else if (!strcmp(optarg,"unsafe")) + jl_options.trim = JL_TRIM_UNSAFE; + else if (!strcmp(optarg,"unsafe-warn")) + jl_options.trim = JL_TRIM_UNSAFE_WARN; + else + jl_errorf("julia: invalid argument to --trim={safe|no|unsafe|unsafe-warn} (%s)", optarg); + break; default: jl_errorf("julia: unhandled option -- %c\n" "This is a bug, please report it.", c); diff --git a/src/jloptions.h b/src/jloptions.h index aac2a64a373a8..3d7deedb59e15 100644 --- a/src/jloptions.h +++ b/src/jloptions.h @@ -62,6 +62,7 @@ typedef struct { int8_t permalloc_pkgimg; uint64_t heap_size_hint; int8_t trace_compile_timing; + int8_t trim; } jl_options_t; #endif diff --git a/src/julia.expmap.in b/src/julia.expmap.in index e5f9ee890205f..29366f6296a85 100644 --- a/src/julia.expmap.in +++ b/src/julia.expmap.in @@ -5,8 +5,8 @@ asprintf; bitvector_*; ios_*; - arraylist_grow; - small_arraylist_grow; + arraylist_*; + small_arraylist_*; jl_*; ijl_*; _jl_mutex_*; diff --git a/src/julia.h b/src/julia.h index abb8a57ff13b0..73b96cf0183d1 100644 --- a/src/julia.h +++ b/src/julia.h @@ -2579,6 +2579,11 @@ JL_DLLEXPORT int jl_generating_output(void) JL_NOTSAFEPOINT; #define JL_OPTIONS_USE_PKGIMAGES_YES 1 #define JL_OPTIONS_USE_PKGIMAGES_NO 0 +#define JL_TRIM_NO 0 +#define JL_TRIM_SAFE 1 +#define JL_TRIM_UNSAFE 2 +#define JL_TRIM_UNSAFE_WARN 3 + // Version information #include // Generated file @@ -2626,10 +2631,12 @@ typedef struct { int gcstack_arg; // Pass the ptls value as an argument with swiftself int use_jlplt; // Whether to use the Julia PLT mechanism or emit symbols directly + int trim; // can we emit dynamic dispatches? // Cache access. Default: jl_rettype_inferred_native. jl_codeinstance_lookup_t lookup; } jl_cgparams_t; extern JL_DLLEXPORT int jl_default_debug_info_kind; +extern JL_DLLEXPORT jl_cgparams_t jl_default_cgparams; typedef struct { int emit_metadata; diff --git a/src/julia_internal.h b/src/julia_internal.h index f00667d016796..9a61c3d18356f 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -664,8 +664,9 @@ JL_DLLEXPORT jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t JL_DLLEXPORT jl_code_instance_t *jl_get_method_inferred( jl_method_instance_t *mi JL_PROPAGATES_ROOT, jl_value_t *rettype, size_t min_world, size_t max_world, jl_debuginfo_t *edges); -jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROPAGATES_ROOT); +JL_DLLEXPORT jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROPAGATES_ROOT); JL_DLLEXPORT void jl_read_codeinst_invoke(jl_code_instance_t *ci, uint8_t *specsigflags, jl_callptr_t *invoke, void **specptr, int waitcompile) JL_NOTSAFEPOINT; +JL_DLLEXPORT jl_method_instance_t *jl_method_match_to_mi(jl_method_match_t *match, size_t world, size_t min_valid, size_t max_valid, int mt_cache); JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst_uninit(jl_method_instance_t *mi, jl_value_t *owner); JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst( @@ -683,6 +684,7 @@ JL_DLLEXPORT const char *jl_debuginfo_name(jl_value_t *func) JL_NOTSAFEPOINT; JL_DLLEXPORT void jl_compile_method_instance(jl_method_instance_t *mi, jl_tupletype_t *types, size_t world); JL_DLLEXPORT int jl_compile_hint(jl_tupletype_t *types); +JL_DLLEXPORT int jl_add_entrypoint(jl_tupletype_t *types); jl_code_info_t *jl_code_for_interpreter(jl_method_instance_t *lam JL_PROPAGATES_ROOT, size_t world); jl_value_t *jl_code_or_ci_for_interpreter(jl_method_instance_t *lam JL_PROPAGATES_ROOT, size_t world); int jl_code_requires_compiler(jl_code_info_t *src, int include_force_compile); @@ -853,6 +855,7 @@ extern htable_t jl_current_modules JL_GLOBALLY_ROOTED; extern JL_DLLEXPORT jl_module_t *jl_precompile_toplevel_module JL_GLOBALLY_ROOTED; extern jl_genericmemory_t *jl_global_roots_list JL_GLOBALLY_ROOTED; extern jl_genericmemory_t *jl_global_roots_keyset JL_GLOBALLY_ROOTED; +extern arraylist_t *jl_entrypoint_mis; JL_DLLEXPORT int jl_is_globally_rooted(jl_value_t *val JL_MAYBE_UNROOTED) JL_NOTSAFEPOINT; JL_DLLEXPORT jl_value_t *jl_as_global_root(jl_value_t *val, int insert) JL_GLOBALLY_ROOTED; @@ -1902,7 +1905,7 @@ JL_DLLIMPORT void jl_get_function_id(void *native_code, jl_code_instance_t *ncod int32_t *func_idx, int32_t *specfunc_idx); JL_DLLIMPORT void jl_register_fptrs(uint64_t image_base, const struct _jl_image_fptrs_t *fptrs, jl_method_instance_t **linfos, size_t n); - +JL_DLLIMPORT void jl_get_llvm_mis(void *native_code, arraylist_t* MIs); JL_DLLIMPORT void jl_init_codegen(void); JL_DLLIMPORT void jl_teardown_codegen(void) JL_NOTSAFEPOINT; JL_DLLIMPORT int jl_getFunctionInfo(jl_frame_t **frames, uintptr_t pointer, int skipC, int noInline) JL_NOTSAFEPOINT; diff --git a/src/module.c b/src/module.c index 96d94049cff13..a6c05d279f5b0 100644 --- a/src/module.c +++ b/src/module.c @@ -856,7 +856,7 @@ JL_DLLEXPORT int jl_binding_resolved_p(jl_module_t *m, jl_sym_t *var) return kind == BINDING_KIND_DECLARED || !jl_bkind_is_some_guard(kind); } -static uint_t bindingkey_hash(size_t idx, jl_value_t *data) +uint_t bindingkey_hash(size_t idx, jl_value_t *data) { jl_binding_t *b = (jl_binding_t*)jl_svecref(data, idx); // This must always happen inside the lock jl_sym_t *var = b->globalref->name; diff --git a/src/precompile.c b/src/precompile.c index c40e867ea699e..5088d45a5ad74 100644 --- a/src/precompile.c +++ b/src/precompile.c @@ -116,14 +116,16 @@ JL_DLLEXPORT void jl_write_compiler_output(void) if (f) { jl_array_ptr_1d_push(jl_module_init_order, m); int setting = jl_get_module_compile((jl_module_t*)m); - if (setting != JL_OPTIONS_COMPILE_OFF && - setting != JL_OPTIONS_COMPILE_MIN) { + if ((setting != JL_OPTIONS_COMPILE_OFF && (jl_options.trim || + (setting != JL_OPTIONS_COMPILE_MIN)))) { // TODO: this would be better handled if moved entirely to jl_precompile // since it's a slightly duplication of effort jl_value_t *tt = jl_is_type(f) ? (jl_value_t*)jl_wrap_Type(f) : jl_typeof(f); JL_GC_PUSH1(&tt); tt = jl_apply_tuple_type_v(&tt, 1); jl_compile_hint((jl_tupletype_t*)tt); + if (jl_options.trim) + jl_add_entrypoint((jl_tupletype_t*)tt); JL_GC_POP(); } } @@ -188,6 +190,10 @@ JL_DLLEXPORT void jl_write_compiler_output(void) jl_printf(JL_STDERR, "\n ** incremental compilation may be broken for this module **\n\n"); } } + if (jl_options.trim) { + exit(0); // Some finalizers need to run and we've blown up the bindings table + // TODO: Is this still needed + } JL_GC_POP(); jl_gc_enable_finalizers(ct, 1); } diff --git a/src/precompile_utils.c b/src/precompile_utils.c index 5a4f599d1f0eb..a78d1e66dbb51 100644 --- a/src/precompile_utils.c +++ b/src/precompile_utils.c @@ -321,3 +321,83 @@ static void *jl_precompile_worklist(jl_array_t *worklist, jl_array_t *extext_met JL_GC_POP(); return native_code; } + +static int enq_ccallable_entrypoints_(jl_typemap_entry_t *def, void *closure) +{ + jl_method_t *m = def->func.method; + if (m->external_mt) + return 1; + if (m->ccallable) + jl_add_entrypoint((jl_tupletype_t*)jl_svecref(m->ccallable, 1)); + return 1; +} + +static int enq_ccallable_entrypoints(jl_methtable_t *mt, void *env) +{ + return jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), enq_ccallable_entrypoints_, env); +} + +JL_DLLEXPORT void jl_add_ccallable_entrypoints(void) +{ + jl_foreach_reachable_mtable(enq_ccallable_entrypoints, NULL); +} + +static void *jl_precompile_trimmed(size_t world) +{ + // array of MethodInstances and ccallable aliases to include in the output + jl_array_t *m = jl_alloc_vec_any(0); + jl_value_t *ccallable = NULL; + JL_GC_PUSH2(&m, &ccallable); + jl_method_instance_t *mi; + while (1) + { + mi = (jl_method_instance_t*)arraylist_pop(jl_entrypoint_mis); + if (mi == NULL) + break; + assert(jl_is_method_instance(mi)); + + jl_array_ptr_1d_push(m, (jl_value_t*)mi); + ccallable = (jl_value_t *)mi->def.method->ccallable; + if (ccallable) + jl_array_ptr_1d_push(m, ccallable); + } + + jl_cgparams_t params = jl_default_cgparams; + params.trim = jl_options.trim; + void *native_code = jl_create_native(m, NULL, ¶ms, 0, /* imaging */ 1, 0, + world); + JL_GC_POP(); + return native_code; +} + +static void jl_rebuild_methtables(arraylist_t* MIs, htable_t* mtables) +{ + size_t i; + for (i = 0; i < MIs->len; i++) { + jl_method_instance_t *mi = (jl_method_instance_t*)MIs->items[i]; + jl_method_t *m = mi->def.method; + jl_methtable_t *old_mt = jl_method_get_table(m); + if ((jl_value_t *)old_mt == jl_nothing) + continue; + jl_sym_t *name = old_mt->name; + if (!ptrhash_has(mtables, old_mt)) + ptrhash_put(mtables, old_mt, jl_new_method_table(name, m->module)); + jl_methtable_t *mt = (jl_methtable_t*)ptrhash_get(mtables, old_mt); + size_t world = jl_atomic_load_acquire(&jl_world_counter); + jl_value_t * lookup = jl_methtable_lookup(mt, m->sig, world); + // Check if the method is already in the new table, if not then insert it there + if (lookup == jl_nothing || (jl_method_t*)lookup != m) { + //TODO: should this be a function like unsafe_insert_method? + size_t min_world = jl_atomic_load_relaxed(&m->primary_world); + size_t max_world = jl_atomic_load_relaxed(&m->deleted_world); + jl_atomic_store_relaxed(&m->primary_world, ~(size_t)0); + jl_atomic_store_relaxed(&m->deleted_world, 1); + jl_typemap_entry_t *newentry = jl_method_table_add(mt, m, NULL); + jl_atomic_store_relaxed(&m->primary_world, min_world); + jl_atomic_store_relaxed(&m->deleted_world, max_world); + jl_atomic_store_relaxed(&newentry->min_world, min_world); + jl_atomic_store_relaxed(&newentry->max_world, max_world); + } + } + +} diff --git a/src/staticdata.c b/src/staticdata.c index 363aa46b62221..f54cc9692eaea 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -363,6 +363,9 @@ static void *to_seroder_entry(size_t idx) return (void*)((char*)HT_NOTFOUND + 1 + idx); } +static htable_t new_methtables; +static size_t precompilation_world; + static int ptr_cmp(const void *l, const void *r) { uintptr_t left = *(const uintptr_t*)l; @@ -770,22 +773,41 @@ static uintptr_t jl_fptr_id(void *fptr) #define jl_queue_for_serialization(s, v) jl_queue_for_serialization_((s), (jl_value_t*)(v), 1, 0) static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate) JL_GC_DISABLED; - static void jl_queue_module_for_serialization(jl_serializer_state *s, jl_module_t *m) JL_GC_DISABLED { jl_queue_for_serialization(s, m->name); jl_queue_for_serialization(s, m->parent); - jl_queue_for_serialization(s, jl_atomic_load_relaxed(&m->bindings)); + if (jl_options.trim) { + jl_queue_for_serialization_(s, (jl_value_t*)jl_atomic_load_relaxed(&m->bindings), 0, 1); + } else { + jl_queue_for_serialization(s, jl_atomic_load_relaxed(&m->bindings)); + } jl_queue_for_serialization(s, jl_atomic_load_relaxed(&m->bindingkeyset)); - if (jl_options.strip_metadata) { + if (jl_options.strip_metadata || jl_options.trim) { jl_svec_t *table = jl_atomic_load_relaxed(&m->bindings); for (size_t i = 0; i < jl_svec_len(table); i++) { jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i); if ((void*)b == jl_nothing) break; - jl_sym_t *name = b->globalref->name; - if (name == jl_docmeta_sym && jl_get_binding_value(b)) - record_field_change((jl_value_t**)&b->value, jl_nothing); + if (jl_options.strip_metadata) { + jl_sym_t *name = b->globalref->name; + if (name == jl_docmeta_sym && jl_get_binding_value(b)) + record_field_change((jl_value_t**)&b->value, jl_nothing); + } + if (jl_options.trim) { + jl_value_t *val = jl_get_binding_value(b); + // keep binding objects that are defined and ... + if (val && + // ... point to modules ... + (jl_is_module(val) || + // ... or point to __init__ methods ... + !strcmp(jl_symbol_name(b->globalref->name), "__init__") || + // ... or point to Base functions accessed by the runtime + (m == jl_base_module && (!strcmp(jl_symbol_name(b->globalref->name), "wait") || + !strcmp(jl_symbol_name(b->globalref->name), "task_done_hook"))))) { + jl_queue_for_serialization(s, b); + } + } } } @@ -944,6 +966,23 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_ jl_queue_for_serialization_(s, get_replaceable_field((jl_value_t**)&bpart->next, 0), 1, immediate); } else if (layout->nfields > 0) { + if (jl_options.trim) { + if (jl_is_method(v)) { + jl_method_t *m = (jl_method_t *)v; + if (jl_is_svec(jl_atomic_load_relaxed(&m->specializations))) + jl_queue_for_serialization_(s, (jl_value_t*)jl_atomic_load_relaxed(&m->specializations), 0, 1); + } + else if (jl_typetagis(v, jl_typename_type)) { + jl_typename_t *tn = (jl_typename_t*)v; + if (tn->mt != NULL && !tn->mt->frozen) { + jl_methtable_t * new_methtable = (jl_methtable_t *)ptrhash_get(&new_methtables, tn->mt); + if (new_methtable != HT_NOTFOUND) + record_field_change((jl_value_t **)&tn->mt, (jl_value_t*)new_methtable); + else + record_field_change((jl_value_t **)&tn->mt, NULL); + } + } + } char *data = (char*)jl_data_ptr(v); size_t i, np = layout->npointers; for (i = 0; i < np; i++) { @@ -989,6 +1028,7 @@ done_fields: ; } } + static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate) JL_GC_DISABLED { if (!jl_needs_serialization(s, v)) @@ -2407,6 +2447,53 @@ static void jl_prune_type_cache_linear(jl_svec_t *cache) jl_svecset(cache, ins++, jl_nothing); } +uint_t bindingkey_hash(size_t idx, jl_value_t *data); + +static void jl_prune_module_bindings(jl_module_t * m) JL_GC_DISABLED +{ + jl_svec_t * bindings = jl_atomic_load_relaxed(&m->bindings); + size_t l = jl_svec_len(bindings), i; + arraylist_t bindings_list; + arraylist_new(&bindings_list, 0); + if (l == 0) + return; + for (i = 0; i < l; i++) { + jl_value_t *ti = jl_svecref(bindings, i); + if (ti == jl_nothing) + continue; + jl_binding_t *ref = ((jl_binding_t*)ti); + if (!((ptrhash_get(&serialization_order, ref) == HT_NOTFOUND) && + (ptrhash_get(&serialization_order, ref->globalref) == HT_NOTFOUND))) { + jl_svecset(bindings, i, jl_nothing); + arraylist_push(&bindings_list, ref); + } + } + jl_genericmemory_t* bindingkeyset = jl_atomic_load_relaxed(&m->bindingkeyset); + _Atomic(jl_genericmemory_t*)bindingkeyset2; + jl_atomic_store_relaxed(&bindingkeyset2,(jl_genericmemory_t*)jl_an_empty_memory_any); + jl_svec_t *bindings2 = jl_alloc_svec_uninit(bindings_list.len); + for (i = 0; i < bindings_list.len; i++) { + jl_binding_t *ref = (jl_binding_t*)bindings_list.items[i]; + jl_svecset(bindings2, i, ref); + jl_smallintset_insert(&bindingkeyset2, (jl_value_t*)m, bindingkey_hash, i, (jl_value_t*)bindings2); + } + void *idx = ptrhash_get(&serialization_order, bindings); + assert(idx != HT_NOTFOUND && idx != (void*)(uintptr_t)-1); + assert(serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] == bindings); + ptrhash_put(&serialization_order, bindings2, idx); + serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] = bindings2; + + idx = ptrhash_get(&serialization_order, bindingkeyset); + assert(idx != HT_NOTFOUND && idx != (void*)(uintptr_t)-1); + assert(serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] == bindingkeyset); + ptrhash_put(&serialization_order, jl_atomic_load_relaxed(&bindingkeyset2), idx); + serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] = jl_atomic_load_relaxed(&bindingkeyset2); + jl_atomic_store_relaxed(&m->bindings, bindings2); + jl_atomic_store_relaxed(&m->bindingkeyset, jl_atomic_load_relaxed(&bindingkeyset2)); + jl_gc_wb(m, bindings2); + jl_gc_wb(m, jl_atomic_load_relaxed(&bindingkeyset2)); +} + static void strip_slotnames(jl_array_t *slotnames) { // replace slot names with `?`, except unused_sym since the compiler looks at it @@ -2473,7 +2560,7 @@ static int strip_all_codeinfos__(jl_typemap_entry_t *def, void *_env) if (m->source) { int stripped_ir = 0; if (jl_options.strip_ir) { - int should_strip_ir = 0; + int should_strip_ir = jl_options.trim; if (!should_strip_ir) { if (jl_atomic_load_relaxed(&m->unspecialized)) { jl_code_instance_t *unspec = jl_atomic_load_relaxed(&jl_atomic_load_relaxed(&m->unspecialized)->cache); @@ -2675,8 +2762,46 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array, // strip metadata and IR when requested if (jl_options.strip_metadata || jl_options.strip_ir) jl_strip_all_codeinfos(); + // collect needed methods and replace method tables that are in the tags array + htable_new(&new_methtables, 0); + arraylist_t MIs; + arraylist_new(&MIs, 0); + arraylist_t gvars; + arraylist_new(&gvars, 0); + arraylist_t external_fns; + arraylist_new(&external_fns, 0); int en = jl_gc_enable(0); + if (native_functions) { + jl_get_llvm_gvs(native_functions, &gvars); + jl_get_llvm_external_fns(native_functions, &external_fns); + if (jl_options.trim) + jl_get_llvm_mis(native_functions, &MIs); + } + if (jl_options.trim) { + jl_rebuild_methtables(&MIs, &new_methtables); + jl_methtable_t *mt = (jl_methtable_t *)ptrhash_get(&new_methtables, jl_type_type_mt); + JL_GC_PROMISE_ROOTED(mt); + if (mt != HT_NOTFOUND) + jl_type_type_mt = mt; + else + jl_type_type_mt = jl_new_method_table(jl_type_type_mt->name, jl_type_type_mt->module); + + mt = (jl_methtable_t *)ptrhash_get(&new_methtables, jl_kwcall_mt); + JL_GC_PROMISE_ROOTED(mt); + if (mt != HT_NOTFOUND) + jl_kwcall_mt = mt; + else + jl_kwcall_mt = jl_new_method_table(jl_kwcall_mt->name, jl_kwcall_mt->module); + + mt = (jl_methtable_t *)ptrhash_get(&new_methtables, jl_nonfunction_mt); + JL_GC_PROMISE_ROOTED(mt); + if (mt != HT_NOTFOUND) + jl_nonfunction_mt = mt; + else + jl_nonfunction_mt = jl_new_method_table(jl_nonfunction_mt->name, jl_nonfunction_mt->module); + } + nsym_tag = 0; htable_new(&symbol_table, 0); htable_new(&fptr_to_id, sizeof(id_to_fptrs) / sizeof(*id_to_fptrs)); @@ -2722,14 +2847,6 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array, htable_new(&s.callers_with_edges, 0); jl_value_t **const*const tags = get_tags(); // worklist == NULL ? get_tags() : NULL; - arraylist_t gvars; - arraylist_t external_fns; - arraylist_new(&gvars, 0); - arraylist_new(&external_fns, 0); - if (native_functions) { - jl_get_llvm_gvs(native_functions, &gvars); - jl_get_llvm_external_fns(native_functions, &external_fns); - } if (worklist == NULL) { // empty!(Core.ARGS) @@ -2788,6 +2905,8 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array, // step 1.2: ensure all gvars are part of the sysimage too record_gvars(&s, &gvars); record_external_fns(&s, &external_fns); + if (jl_options.trim) + record_gvars(&s, &MIs); jl_serialize_reachable(&s); // step 1.3: prune (garbage collect) special weak references from the jl_global_roots_list if (worklist == NULL) { @@ -2808,8 +2927,30 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array, // step 1.4: prune (garbage collect) some special weak references from // built-in type caches too for (i = 0; i < serialization_queue.len; i++) { - jl_typename_t *tn = (jl_typename_t*)serialization_queue.items[i]; - if (jl_is_typename(tn)) { + jl_value_t *v = (jl_value_t*)serialization_queue.items[i]; + if (jl_options.trim) { + if (jl_is_method(v)){ + jl_method_t *m = (jl_method_t*)v; + jl_value_t *specializations_ = jl_atomic_load_relaxed(&m->specializations); + if (!jl_is_svec(specializations_)) + continue; + + jl_svec_t *specializations = (jl_svec_t *)specializations_; + size_t l = jl_svec_len(specializations), i; + for (i = 0; i < l; i++) { + jl_value_t *mi = jl_svecref(specializations, i); + if (mi == jl_nothing) + continue; + if (ptrhash_get(&serialization_order, mi) == HT_NOTFOUND) + jl_svecset(specializations, i, jl_nothing); + } + } else if (jl_is_module(v)) { + jl_prune_module_bindings((jl_module_t*)v); + } + } + // Not else + if (jl_is_typename(v)) { + jl_typename_t *tn = (jl_typename_t*)v; jl_atomic_store_relaxed(&tn->cache, jl_prune_type_cache_hash(jl_atomic_load_relaxed(&tn->cache))); jl_gc_wb(tn, jl_atomic_load_relaxed(&tn->cache)); @@ -2918,7 +3059,9 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array, jl_write_value(&s, global_roots_keyset); jl_write_value(&s, s.ptls->root_task->tls); write_uint32(f, jl_get_gs_ctr()); - write_uint(f, jl_atomic_load_acquire(&jl_world_counter)); + size_t world = jl_atomic_load_acquire(&jl_world_counter); + // assert(world == precompilation_world); // This triggers on a normal build of julia + write_uint(f, world); write_uint(f, jl_typeinf_world); } else { @@ -2971,6 +3114,7 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array, htable_free(&nullptrs); htable_free(&symbol_table); htable_free(&fptr_to_id); + htable_free(&new_methtables); nsym_tag = 0; jl_gc_enable(en); @@ -3000,6 +3144,10 @@ static void jl_write_header_for_incremental(ios_t *f, jl_array_t *worklist, jl_a JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *worklist, bool_t emit_split, ios_t **s, ios_t **z, jl_array_t **udeps, int64_t *srctextpos) { + if (jl_options.strip_ir || jl_options.trim) { + // make sure this is precompiled for jl_foreach_reachable_mtable + jl_get_loaded_modules(); + } jl_gc_collect(JL_GC_FULL); jl_gc_collect(JL_GC_INCREMENTAL); // sweep finalizers JL_TIMING(SYSIMG_DUMP, SYSIMG_DUMP); @@ -3049,7 +3197,11 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli } } else if (_native_data != NULL) { - *_native_data = jl_precompile(jl_options.compile_enabled == JL_OPTIONS_COMPILE_ALL); + precompilation_world = jl_atomic_load_acquire(&jl_world_counter); + if (jl_options.trim) + *_native_data = jl_precompile_trimmed(precompilation_world); + else + *_native_data = jl_precompile(jl_options.compile_enabled == JL_OPTIONS_COMPILE_ALL); } // Make sure we don't run any Julia code concurrently after this point diff --git a/src/support/arraylist.h b/src/support/arraylist.h index 6ad2f0e2f28c9..a83bd2808756c 100644 --- a/src/support/arraylist.h +++ b/src/support/arraylist.h @@ -20,11 +20,11 @@ typedef struct { void *_space[AL_N_INLINE]; } arraylist_t; -arraylist_t *arraylist_new(arraylist_t *a, size_t size) JL_NOTSAFEPOINT; -void arraylist_free(arraylist_t *a) JL_NOTSAFEPOINT; +JL_DLLEXPORT arraylist_t *arraylist_new(arraylist_t *a, size_t size) JL_NOTSAFEPOINT; +JL_DLLEXPORT void arraylist_free(arraylist_t *a) JL_NOTSAFEPOINT; -void arraylist_push(arraylist_t *a, void *elt) JL_NOTSAFEPOINT; -void *arraylist_pop(arraylist_t *a) JL_NOTSAFEPOINT; +JL_DLLEXPORT void arraylist_push(arraylist_t *a, void *elt) JL_NOTSAFEPOINT; +JL_DLLEXPORT void *arraylist_pop(arraylist_t *a) JL_NOTSAFEPOINT; JL_DLLEXPORT void arraylist_grow(arraylist_t *a, size_t n) JL_NOTSAFEPOINT; typedef struct { @@ -34,11 +34,12 @@ typedef struct { void *_space[SMALL_AL_N_INLINE]; } small_arraylist_t; -small_arraylist_t *small_arraylist_new(small_arraylist_t *a, uint32_t size) JL_NOTSAFEPOINT; -void small_arraylist_free(small_arraylist_t *a) JL_NOTSAFEPOINT; -void small_arraylist_push(small_arraylist_t *a, void *elt) JL_NOTSAFEPOINT; -void *small_arraylist_pop(small_arraylist_t *a) JL_NOTSAFEPOINT; +JL_DLLEXPORT small_arraylist_t *small_arraylist_new(small_arraylist_t *a, uint32_t size) JL_NOTSAFEPOINT; +JL_DLLEXPORT void small_arraylist_free(small_arraylist_t *a) JL_NOTSAFEPOINT; + +JL_DLLEXPORT void small_arraylist_push(small_arraylist_t *a, void *elt) JL_NOTSAFEPOINT; +JL_DLLEXPORT void *small_arraylist_pop(small_arraylist_t *a) JL_NOTSAFEPOINT; JL_DLLEXPORT void small_arraylist_grow(small_arraylist_t *a, uint32_t n) JL_NOTSAFEPOINT; #ifdef __cplusplus diff --git a/stdlib/LinearAlgebra/src/blas.jl b/stdlib/LinearAlgebra/src/blas.jl index 413b7866c5444..3c15630091162 100644 --- a/stdlib/LinearAlgebra/src/blas.jl +++ b/stdlib/LinearAlgebra/src/blas.jl @@ -159,7 +159,7 @@ function check() interface = USE_BLAS64 ? :ilp64 : :lp64 if !any(lib.interface == interface for lib in config.loaded_libs) interfacestr = uppercase(string(interface)) - @error("No loaded BLAS libraries were built with $interfacestr support.") + println(Core.stderr, "No loaded BLAS libraries were built with $interfacestr support.") exit(1) end end diff --git a/stdlib/LinearAlgebra/src/lbt.jl b/stdlib/LinearAlgebra/src/lbt.jl index 606ddedbe1343..81d10f930c8c5 100644 --- a/stdlib/LinearAlgebra/src/lbt.jl +++ b/stdlib/LinearAlgebra/src/lbt.jl @@ -17,7 +17,7 @@ end macro get_warn(map, key) return quote if !haskey($(esc(map)), $(esc(key))) - @warn(string("[LBT] Unknown key into ", $(string(map)), ": ", $(esc(key)), ", defaulting to :unknown")) + println(Core.stderr, string("Warning: [LBT] Unknown key into ", $(string(map)), ": ", $(esc(key)), ", defaulting to :unknown")) # All the unknown values share a common value: `-1` $(esc(map))[$(esc(LBT_INTERFACE_UNKNOWN))] else @@ -132,7 +132,7 @@ struct LBTConfig if str_ptr != C_NULL push!(exported_symbols, unsafe_string(str_ptr)) else - @error("NULL string in lbt_config.exported_symbols[$(sym_idx)]") + println(Core.stderr, "Error: NULL string in lbt_config.exported_symbols[$(sym_idx)]") end end diff --git a/test/Makefile b/test/Makefile index 1b9cb377c943d..6ebdd3c764fd5 100644 --- a/test/Makefile +++ b/test/Makefile @@ -24,6 +24,8 @@ EMBEDDING_ARGS := "JULIA=$(JULIA_EXECUTABLE)" "BIN=$(SRCDIR)/embedding" "CC=$(CC GCEXT_ARGS := "JULIA=$(JULIA_EXECUTABLE)" "BIN=$(SRCDIR)/gcext" "CC=$(CC)" +TRIMMING_ARGS := "JULIA=$(JULIA_EXECUTABLE)" "BIN=$(JULIAHOME)/usr/bin" "CC=$(CC)" + default: $(TESTS): @@ -66,6 +68,9 @@ embedding: gcext: @$(MAKE) -C $(SRCDIR)/$@ check $(GCEXT_ARGS) +trimming: + @$(MAKE) -C $(SRCDIR)/$@ check $(TRIMMING_ARGS) + clangsa: @$(MAKE) -C $(SRCDIR)/$@ @@ -73,5 +78,6 @@ clean: @$(MAKE) -C embedding $@ $(EMBEDDING_ARGS) @$(MAKE) -C gcext $@ $(GCEXT_ARGS) @$(MAKE) -C llvmpasses $@ + @$(MAKE) -C trimming $@ $(TRIMMING_ARGS) -.PHONY: $(TESTS) $(addprefix revise-, $(TESTS)) relocatedepot revise-relocatedepot embedding gcext clangsa clean +.PHONY: $(TESTS) $(addprefix revise-, $(TESTS)) relocatedepot revise-relocatedepot embedding gcext trimming clangsa clean diff --git a/test/trimming/Makefile b/test/trimming/Makefile new file mode 100644 index 0000000000000..c6e105d637013 --- /dev/null +++ b/test/trimming/Makefile @@ -0,0 +1,55 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +# This Makefile template requires the following variables to be set +# in the environment or on the command-line: +# JULIA: path to julia[.exe] executable +# BIN: binary build directory + +ifndef JULIA + $(error "Please pass JULIA=[path of target julia binary], or set as environment variable!") +endif +ifndef BIN + $(error "Please pass BIN=[path of build directory], or set as environment variable!") +endif + +#============================================================================= +# location of test source +SRCDIR := $(abspath $(dir $(lastword $(MAKEFILE_LIST)))) +JULIAHOME := $(abspath $(SRCDIR)/../..) +BUILDSCRIPT := $(BIN)/../share/julia/juliac-buildscript.jl +include $(JULIAHOME)/Make.inc + +# get the executable suffix, if any +EXE := $(suffix $(abspath $(JULIA))) + +# get compiler and linker flags. (see: `contrib/julia-config.jl`) +JULIA_CONFIG := $(JULIA) -e 'include(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "julia-config.jl"))' -- +CPPFLAGS_ADD := +CFLAGS_ADD = $(shell $(JULIA_CONFIG) --cflags) +LDFLAGS_ADD = -lm $(shell $(JULIA_CONFIG) --ldflags --ldlibs) -ljulia-internal + +#============================================================================= + +release: hello$(EXE) + +hello.o: $(SRCDIR)/hello.jl $(BUILDSCRIPT) + $(JULIA) -t 1 -J $(BIN)/../lib/julia/sys.so --startup-file=no --history-file=no --output-o $@ --output-incremental=no --strip-ir --strip-metadata --trim $(BUILDSCRIPT) $(SRCDIR)/hello.jl --output-exe true + +init.o: $(SRCDIR)/init.c + $(CC) -c -o $@ $< $(CPPFLAGS_ADD) $(CPPFLAGS) $(CFLAGS_ADD) $(CFLAGS) + +hello$(EXE): hello.o init.o + $(CC) -o $@ $(WHOLE_ARCHIVE) hello.o $(NO_WHOLE_ARCHIVE) init.o $(CPPFLAGS_ADD) $(CPPFLAGS) $(CFLAGS_ADD) $(CFLAGS) $(LDFLAGS_ADD) $(LDFLAGS) + +check: hello$(EXE) + $(JULIA) --depwarn=error $(SRCDIR)/../runtests.jl $(SRCDIR)/trimming + +clean: + -rm -f hello$(EXE) init.o hello.o + +.PHONY: release clean check + +# Makefile debugging trick: +# call print-VARIABLE to see the runtime value of any variable +print-%: + @echo '$*=$($*)' diff --git a/test/trimming/hello.jl b/test/trimming/hello.jl new file mode 100644 index 0000000000000..307bf820f325b --- /dev/null +++ b/test/trimming/hello.jl @@ -0,0 +1,6 @@ +module MyApp +Base.@ccallable function main()::Cint + println(Core.stdout, "Hello, world!") + return 0 +end +end diff --git a/test/trimming/init.c b/test/trimming/init.c new file mode 100644 index 0000000000000..ea1b02f8e5c8f --- /dev/null +++ b/test/trimming/init.c @@ -0,0 +1,9 @@ +#include + +__attribute__((constructor)) void static_init(void) +{ + if (jl_is_initialized()) + return; + julia_init(JL_IMAGE_IN_MEMORY); + jl_exception_clear(); +} diff --git a/test/trimming/trimming.jl b/test/trimming/trimming.jl new file mode 100644 index 0000000000000..dfacae7f8e531 --- /dev/null +++ b/test/trimming/trimming.jl @@ -0,0 +1,7 @@ +using Test + +exe_path = joinpath(@__DIR__, "hello"*splitext(Base.julia_exename())[2]) + +@test readchomp(`$exe_path`) == "Hello, world!" + +@test filesize(exe_path) < filesize(unsafe_string(Base.JLOptions().image_file))/10 From 17445fe752b7b99633ca306af0981baca9f66bda Mon Sep 17 00:00:00 2001 From: Neven Sajko Date: Sun, 29 Sep 2024 11:41:59 +0200 Subject: [PATCH 14/45] fix rawbigints OOB issues (#55917) Fixes issues introduced in #50691 and found in #55906: * use `@inbounds` and `@boundscheck` macros in rawbigints, for catching OOB with `--check-bounds=yes` * fix OOB in `truncate` --- base/rawbigints.jl | 31 ++++++++++++++++++++++--------- test/mpfr.jl | 9 +++++++++ 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/base/rawbigints.jl b/base/rawbigints.jl index 6508bea05be0f..a9bb18e163e2d 100644 --- a/base/rawbigints.jl +++ b/base/rawbigints.jl @@ -21,14 +21,21 @@ reversed_index(n::Int, i::Int) = n - i - 1 reversed_index(x, i::Int, v::Val) = reversed_index(elem_count(x, v), i)::Int split_bit_index(x::RawBigInt, i::Int) = divrem(i, word_length(x), RoundToZero) +function get_elem_words_raw(x::RawBigInt{T}, i::Int) where {T} + @boundscheck if (i < 0) || (elem_count(x, Val(:words)) ≤ i) + throw(BoundsError(x, i)) + end + d = x.d + j = i + 1 + (GC.@preserve d unsafe_load(Ptr{T}(pointer(d)), j))::T +end + """ `i` is the zero-based index of the wanted word in `x`, starting from the less significant words. """ -function get_elem(x::RawBigInt{T}, i::Int, ::Val{:words}, ::Val{:ascending}) where {T} - # `i` must be non-negative and less than `x.word_count` - d = x.d - (GC.@preserve d unsafe_load(Ptr{T}(pointer(d)), i + 1))::T +function get_elem(x::RawBigInt, i::Int, ::Val{:words}, ::Val{:ascending}) + @inbounds @inline get_elem_words_raw(x, i) end function get_elem(x, i::Int, v::Val, ::Val{:descending}) @@ -96,7 +103,8 @@ end """ Returns an integer of type `R`, consisting of the `len` most -significant bits of `x`. +significant bits of `x`. If there are less than `len` bits in `x`, +the least significant bits are zeroed. """ function truncated(::Type{R}, x::RawBigInt, len::Int) where {R<:Integer} ret = zero(R) @@ -104,17 +112,22 @@ function truncated(::Type{R}, x::RawBigInt, len::Int) where {R<:Integer} word_count, bit_count_in_word = split_bit_index(x, len) k = word_length(x) vals = (Val(:words), Val(:descending)) + lenx = elem_count(x, first(vals)) for w ∈ 0:(word_count - 1) ret <<= k - word = get_elem(x, w, vals...) - ret |= R(word) + if w < lenx + word = get_elem(x, w, vals...) + ret |= R(word) + end end if !iszero(bit_count_in_word) ret <<= bit_count_in_word - wrd = get_elem(x, word_count, vals...) - ret |= R(wrd >>> (k - bit_count_in_word)) + if word_count < lenx + wrd = get_elem(x, word_count, vals...) + ret |= R(wrd >>> (k - bit_count_in_word)) + end end end ret::R diff --git a/test/mpfr.jl b/test/mpfr.jl index 9a9698ba72c2c..63da732df1c09 100644 --- a/test/mpfr.jl +++ b/test/mpfr.jl @@ -1088,3 +1088,12 @@ end clear_flags() end end + +@testset "RawBigInt truncation OOB read" begin + @testset "T: $T" for T ∈ (UInt8, UInt16, UInt32, UInt64, UInt128) + v = Base.RawBigInt{T}("a"^sizeof(T), 1) + @testset "bit_count: $bit_count" for bit_count ∈ (0:10:80) + @test Base.truncated(UInt128, v, bit_count) isa Any + end + end +end From 4da067167fc414ea4329be3b4fdc516914e102cd Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Mon, 30 Sep 2024 12:52:59 +0200 Subject: [PATCH 15/45] prevent loading other extensions when precompiling an extension (#55589) The current way of loading extensions when precompiling an extension very easily leads to cycles. For example, if you have more than one extension and you happen to transitively depend on the triggers of one of your extensions you will immediately hit a cycle where the extensions will try to load each other indefinitely. This is an issue because you cannot directly influence your transitive dependency graph so from this p.o.v the current system of loading extension is "unsound". The test added here checks this scenario and we can now precompile and load it without any warnings or issues. Would have made https://github.com/JuliaLang/julia/issues/55517 a non issue. Fixes https://github.com/JuliaLang/julia/issues/55557 --------- Co-authored-by: KristofferC --- base/loading.jl | 16 ++++--- base/precompilation.jl | 47 +------------------ test/loading.jl | 13 +++++ .../Extensions/CyclicExtensions/Manifest.toml | 21 +++++++++ .../Extensions/CyclicExtensions/Project.toml | 13 +++++ .../Extensions/CyclicExtensions/ext/ExtA.jl | 6 +++ .../Extensions/CyclicExtensions/ext/ExtB.jl | 6 +++ .../CyclicExtensions/src/CyclicExtensions.jl | 7 +++ 8 files changed, 76 insertions(+), 53 deletions(-) create mode 100644 test/project/Extensions/CyclicExtensions/Manifest.toml create mode 100644 test/project/Extensions/CyclicExtensions/Project.toml create mode 100644 test/project/Extensions/CyclicExtensions/ext/ExtA.jl create mode 100644 test/project/Extensions/CyclicExtensions/ext/ExtB.jl create mode 100644 test/project/Extensions/CyclicExtensions/src/CyclicExtensions.jl diff --git a/base/loading.jl b/base/loading.jl index cf7e41a0b5b2b..fbf6bb4af50aa 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -1387,7 +1387,9 @@ function run_module_init(mod::Module, i::Int=1) end function run_package_callbacks(modkey::PkgId) - run_extension_callbacks(modkey) + if !precompiling_extension + run_extension_callbacks(modkey) + end assert_havelock(require_lock) unlock(require_lock) try @@ -2843,7 +2845,7 @@ end const PRECOMPILE_TRACE_COMPILE = Ref{String}() function create_expr_cache(pkg::PkgId, input::String, output::String, output_o::Union{Nothing, String}, - concrete_deps::typeof(_concrete_dependencies), flags::Cmd=``, internal_stderr::IO = stderr, internal_stdout::IO = stdout) + concrete_deps::typeof(_concrete_dependencies), flags::Cmd=``, internal_stderr::IO = stderr, internal_stdout::IO = stdout, isext::Bool=false) @nospecialize internal_stderr internal_stdout rm(output, force=true) # Remove file if it exists output_o === nothing || rm(output_o, force=true) @@ -2912,7 +2914,7 @@ function create_expr_cache(pkg::PkgId, input::String, output::String, output_o:: write(io.in, """ empty!(Base.EXT_DORMITORY) # If we have a custom sysimage with `EXT_DORMITORY` prepopulated Base.track_nested_precomp($precomp_stack) - Base.precompiling_extension = $(loading_extension) + Base.precompiling_extension = $(loading_extension | isext) Base.precompiling_package = true Base.include_package_for_output($(pkg_str(pkg)), $(repr(abspath(input))), $(repr(depot_path)), $(repr(dl_load_path)), $(repr(load_path)), $deps, $(repr(source_path(nothing)))) @@ -2970,18 +2972,18 @@ This can be used to reduce package load times. Cache files are stored in `DEPOT_PATH[1]/compiled`. See [Module initialization and precompilation](@ref) for important notes. """ -function compilecache(pkg::PkgId, internal_stderr::IO = stderr, internal_stdout::IO = stdout; flags::Cmd=``, reasons::Union{Dict{String,Int},Nothing}=Dict{String,Int}()) +function compilecache(pkg::PkgId, internal_stderr::IO = stderr, internal_stdout::IO = stdout; flags::Cmd=``, reasons::Union{Dict{String,Int},Nothing}=Dict{String,Int}(), isext::Bool=false) @nospecialize internal_stderr internal_stdout path = locate_package(pkg) path === nothing && throw(ArgumentError("$(repr("text/plain", pkg)) not found during precompilation")) - return compilecache(pkg, path, internal_stderr, internal_stdout; flags, reasons) + return compilecache(pkg, path, internal_stderr, internal_stdout; flags, reasons, isext) end const MAX_NUM_PRECOMPILE_FILES = Ref(10) function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, internal_stdout::IO = stdout, keep_loaded_modules::Bool = true; flags::Cmd=``, cacheflags::CacheFlags=CacheFlags(), - reasons::Union{Dict{String,Int},Nothing}=Dict{String,Int}()) + reasons::Union{Dict{String,Int},Nothing}=Dict{String,Int}(), isext::Bool=false) @nospecialize internal_stderr internal_stdout # decide where to put the resulting cache file @@ -3021,7 +3023,7 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in close(tmpio_o) close(tmpio_so) end - p = create_expr_cache(pkg, path, tmppath, tmppath_o, concrete_deps, flags, internal_stderr, internal_stdout) + p = create_expr_cache(pkg, path, tmppath, tmppath_o, concrete_deps, flags, internal_stderr, internal_stdout, isext) if success(p) if cache_objects diff --git a/base/precompilation.jl b/base/precompilation.jl index d3f076633f386..b351ce67cfbad 100644 --- a/base/precompilation.jl +++ b/base/precompilation.jl @@ -435,51 +435,6 @@ function precompilepkgs(pkgs::Vector{String}=String[]; # consider exts of direct deps to be direct deps so that errors are reported append!(direct_deps, keys(filter(d->last(d) in keys(env.project_deps), exts))) - # An extension effectively depends on another extension if it has all the the - # dependencies of that other extension - function expand_dependencies(depsmap) - function visit!(visited, node, all_deps) - if node in visited - return - end - push!(visited, node) - for dep in get(Set{Base.PkgId}, depsmap, node) - if !(dep in all_deps) - push!(all_deps, dep) - visit!(visited, dep, all_deps) - end - end - end - - depsmap_transitive = Dict{Base.PkgId, Set{Base.PkgId}}() - for package in keys(depsmap) - # Initialize a set to keep track of all dependencies for 'package' - all_deps = Set{Base.PkgId}() - visited = Set{Base.PkgId}() - visit!(visited, package, all_deps) - # Update depsmap with the complete set of dependencies for 'package' - depsmap_transitive[package] = all_deps - end - return depsmap_transitive - end - - depsmap_transitive = expand_dependencies(depsmap) - - for (_, extensions_1) in pkg_exts_map - for extension_1 in extensions_1 - deps_ext_1 = depsmap_transitive[extension_1] - for (_, extensions_2) in pkg_exts_map - for extension_2 in extensions_2 - extension_1 == extension_2 && continue - deps_ext_2 = depsmap_transitive[extension_2] - if issubset(deps_ext_2, deps_ext_1) - push!(depsmap[extension_1], extension_2) - end - end - end - end - end - @debug "precompile: deps collected" # this loop must be run after the full depsmap has been populated for (pkg, pkg_exts) in pkg_exts_map @@ -852,7 +807,7 @@ function precompilepkgs(pkgs::Vector{String}=String[]; t = @elapsed ret = precompile_pkgs_maybe_cachefile_lock(io, print_lock, fancyprint, pkg_config, pkgspidlocked, hascolor) do Base.with_logger(Base.NullLogger()) do # The false here means we ignore loaded modules, so precompile for a fresh session - Base.compilecache(pkg, sourcepath, std_pipe, std_pipe, false; flags, cacheflags) + Base.compilecache(pkg, sourcepath, std_pipe, std_pipe, false; flags, cacheflags, isext = haskey(exts, pkg)) end end if ret isa Base.PrecompilableError diff --git a/test/loading.jl b/test/loading.jl index bdaca7f9dc69e..b66fd632f23fa 100644 --- a/test/loading.jl +++ b/test/loading.jl @@ -1155,6 +1155,19 @@ end finally copy!(LOAD_PATH, old_load_path) end + + # Extension with cycles in dependencies + code = """ + using CyclicExtensions + Base.get_extension(CyclicExtensions, :ExtA) isa Module || error("expected extension to load") + Base.get_extension(CyclicExtensions, :ExtB) isa Module || error("expected extension to load") + CyclicExtensions.greet() + """ + proj = joinpath(@__DIR__, "project", "Extensions", "CyclicExtensions") + cmd = `$(Base.julia_cmd()) --startup-file=no -e $code` + cmd = addenv(cmd, "JULIA_LOAD_PATH" => proj) + @test occursin("Hello Cycles!", String(read(cmd))) + finally try rm(depot_path, force=true, recursive=true) diff --git a/test/project/Extensions/CyclicExtensions/Manifest.toml b/test/project/Extensions/CyclicExtensions/Manifest.toml new file mode 100644 index 0000000000000..a506825cf7995 --- /dev/null +++ b/test/project/Extensions/CyclicExtensions/Manifest.toml @@ -0,0 +1,21 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.10.4" +manifest_format = "2.0" +project_hash = "ec25ff8df3a5e2212a173c3de2c7d716cc47cd36" + +[[deps.ExtDep]] +deps = ["SomePackage"] +path = "../ExtDep.jl" +uuid = "fa069be4-f60b-4d4c-8b95-f8008775090c" +version = "0.1.0" + +[[deps.ExtDep2]] +path = "../ExtDep2" +uuid = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d" +version = "0.1.0" + +[[deps.SomePackage]] +path = "../SomePackage" +uuid = "678608ae-7bb3-42c7-98b1-82102067a3d8" +version = "0.1.0" diff --git a/test/project/Extensions/CyclicExtensions/Project.toml b/test/project/Extensions/CyclicExtensions/Project.toml new file mode 100644 index 0000000000000..08d539dcc40ae --- /dev/null +++ b/test/project/Extensions/CyclicExtensions/Project.toml @@ -0,0 +1,13 @@ +name = "CyclicExtensions" +uuid = "17d4f0df-b55c-4714-ac4b-55fa23f7355c" +version = "0.1.0" + +[deps] +ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c" + +[weakdeps] +SomePackage = "678608ae-7bb3-42c7-98b1-82102067a3d8" + +[extensions] +ExtA = ["SomePackage"] +ExtB = ["SomePackage"] diff --git a/test/project/Extensions/CyclicExtensions/ext/ExtA.jl b/test/project/Extensions/CyclicExtensions/ext/ExtA.jl new file mode 100644 index 0000000000000..fa0c0961633cb --- /dev/null +++ b/test/project/Extensions/CyclicExtensions/ext/ExtA.jl @@ -0,0 +1,6 @@ +module ExtA + +using CyclicExtensions +using SomePackage + +end diff --git a/test/project/Extensions/CyclicExtensions/ext/ExtB.jl b/test/project/Extensions/CyclicExtensions/ext/ExtB.jl new file mode 100644 index 0000000000000..8f6da556d39b8 --- /dev/null +++ b/test/project/Extensions/CyclicExtensions/ext/ExtB.jl @@ -0,0 +1,6 @@ +module ExtB + +using CyclicExtensions +using SomePackage + +end diff --git a/test/project/Extensions/CyclicExtensions/src/CyclicExtensions.jl b/test/project/Extensions/CyclicExtensions/src/CyclicExtensions.jl new file mode 100644 index 0000000000000..f1c2ec2077562 --- /dev/null +++ b/test/project/Extensions/CyclicExtensions/src/CyclicExtensions.jl @@ -0,0 +1,7 @@ +module CyclicExtensions + +using ExtDep + +greet() = print("Hello Cycles!") + +end # module CyclicExtensions From 2a2878c143b87e5184565c895d090aab6e9017e9 Mon Sep 17 00:00:00 2001 From: Cody Tapscott <84105208+topolarity@users.noreply.github.com> Date: Mon, 30 Sep 2024 14:02:59 -0400 Subject: [PATCH 16/45] TOML: Avoid type-pirating `Base.TOML.Parser` (#55892) Since stdlibs can be duplicated but Base never is, `Base.require_stdlib` makes type piracy even more complicated than it normally would be. To adapt, this changes `TOML.Parser` to be a type defined by the TOML stdlib, so that we can define methods on it without committing type-piracy and avoid problems like Pkg.jl#4017 Resolves https://github.com/JuliaLang/Pkg.jl/issues/4017#issuecomment-2377589989 --- stdlib/TOML/src/TOML.jl | 43 ++++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/stdlib/TOML/src/TOML.jl b/stdlib/TOML/src/TOML.jl index 94d2808c0bc24..b37a5ca83c251 100644 --- a/stdlib/TOML/src/TOML.jl +++ b/stdlib/TOML/src/TOML.jl @@ -25,7 +25,7 @@ module Internals end # https://github.com/JuliaLang/julia/issues/36605 -readstring(f::AbstractString) = isfile(f) ? read(f, String) : error(repr(f), ": No such file") +_readstring(f::AbstractString) = isfile(f) ? read(f, String) : error(repr(f), ": No such file") """ Parser() @@ -36,12 +36,14 @@ explicitly create a `Parser` but instead one directly use use will however reuse some internal data structures which can be beneficial for performance if a larger number of small files are parsed. """ -const Parser = Internals.Parser +struct Parser + _p::Internals.Parser{Dates} +end # Dates-enabled constructors -Parser() = Parser{Dates}() -Parser(io::IO) = Parser{Dates}(io) -Parser(str::String; filepath=nothing) = Parser{Dates}(str; filepath) +Parser() = Parser(Internals.Parser{Dates}()) +Parser(io::IO) = Parser(Internals.Parser{Dates}(io)) +Parser(str::String; filepath=nothing) = Parser(Internals.Parser{Dates}(str; filepath)) """ parsefile(f::AbstractString) @@ -53,9 +55,9 @@ Parse file `f` and return the resulting table (dictionary). Throw a See also [`TOML.tryparsefile`](@ref). """ parsefile(f::AbstractString) = - Internals.parse(Parser(readstring(f); filepath=abspath(f))) + Internals.parse(Internals.Parser{Dates}(_readstring(f); filepath=abspath(f))) parsefile(p::Parser, f::AbstractString) = - Internals.parse(Internals.reinit!(p, readstring(f); filepath=abspath(f))) + Internals.parse(Internals.reinit!(p._p, _readstring(f); filepath=abspath(f))) """ tryparsefile(f::AbstractString) @@ -67,9 +69,9 @@ Parse file `f` and return the resulting table (dictionary). Return a See also [`TOML.parsefile`](@ref). """ tryparsefile(f::AbstractString) = - Internals.tryparse(Parser(readstring(f); filepath=abspath(f))) + Internals.tryparse(Internals.Parser{Dates}(_readstring(f); filepath=abspath(f))) tryparsefile(p::Parser, f::AbstractString) = - Internals.tryparse(Internals.reinit!(p, readstring(f); filepath=abspath(f))) + Internals.tryparse(Internals.reinit!(p._p, _readstring(f); filepath=abspath(f))) """ parse(x::Union{AbstractString, IO}) @@ -80,10 +82,11 @@ Throw a [`ParserError`](@ref) upon failure. See also [`TOML.tryparse`](@ref). """ +parse(p::Parser) = Internals.parse(p._p) parse(str::AbstractString) = - Internals.parse(Parser(String(str))) + Internals.parse(Internals.Parser{Dates}(String(str))) parse(p::Parser, str::AbstractString) = - Internals.parse(Internals.reinit!(p, String(str))) + Internals.parse(Internals.reinit!(p._p, String(str))) parse(io::IO) = parse(read(io, String)) parse(p::Parser, io::IO) = parse(p, read(io, String)) @@ -96,10 +99,11 @@ Return a [`ParserError`](@ref) upon failure. See also [`TOML.parse`](@ref). """ +tryparse(p::Parser) = Internals.tryparse(p._p) tryparse(str::AbstractString) = - Internals.tryparse(Parser(String(str))) + Internals.tryparse(Internals.Parser{Dates}(String(str))) tryparse(p::Parser, str::AbstractString) = - Internals.tryparse(Internals.reinit!(p, String(str))) + Internals.tryparse(Internals.reinit!(p._p, String(str))) tryparse(io::IO) = tryparse(read(io, String)) tryparse(p::Parser, io::IO) = tryparse(p, read(io, String)) @@ -131,4 +135,17 @@ supported type. """ const print = Internals.Printer.print +public Parser, parsefile, tryparsefile, parse, tryparse, ParserError, print + +# These methods are private Base interfaces, but we do our best to support them over +# the TOML stdlib types anyway to minimize downstream breakage. +Base.TOMLCache(p::Parser) = Base.TOMLCache(p._p, Dict{String, Base.CachedTOMLDict}()) +Base.TOMLCache(p::Parser, d::Base.CachedTOMLDict) = Base.TOMLCache(p._p, d) +Base.TOMLCache(p::Parser, d::Dict{String, Dict{String, Any}}) = Base.TOMLCache(p._p, d) + +Internals.reinit!(p::Parser, str::String; filepath::Union{Nothing, String}=nothing) = + Internals.reinit!(p._p, str; filepath) +Internals.parse(p::Parser) = Internals.parse(p._p) +Internals.tryparse(p::Parser) = Internals.tryparse(p._p) + end From e500754118c64ecc16836f426c251582fddbffb5 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Wed, 25 Sep 2024 11:22:23 -0400 Subject: [PATCH 17/45] [FileWatching] fix PollingFileWatcher design and add workaround for a stat bug What started as an innocent fix for a stat bug on Apple (#48667) turned into a full blown investigation into the design problems with the libuv backend for PollingFileWatcher, and writing my own implementation of it instead which could avoid those singled-threaded concurrency bugs. --- base/libuv.jl | 8 +- base/reflection.jl | 3 +- base/stat.jl | 111 +++++------ src/sys.c | 1 - stdlib/FileWatching/src/FileWatching.jl | 240 ++++++++++++++---------- stdlib/FileWatching/test/runtests.jl | 9 +- test/file.jl | 10 + 7 files changed, 215 insertions(+), 167 deletions(-) diff --git a/base/libuv.jl b/base/libuv.jl index 3c9f79dfa7b2c..306854e9f4436 100644 --- a/base/libuv.jl +++ b/base/libuv.jl @@ -26,10 +26,10 @@ for r in uv_req_types @eval const $(Symbol("_sizeof_", lowercase(string(r)))) = uv_sizeof_req($r) end -uv_handle_data(handle) = ccall(:jl_uv_handle_data, Ptr{Cvoid}, (Ptr{Cvoid},), handle) -uv_req_data(handle) = ccall(:jl_uv_req_data, Ptr{Cvoid}, (Ptr{Cvoid},), handle) -uv_req_set_data(req, data) = ccall(:jl_uv_req_set_data, Cvoid, (Ptr{Cvoid}, Any), req, data) -uv_req_set_data(req, data::Ptr{Cvoid}) = ccall(:jl_uv_req_set_data, Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}), req, data) +uv_handle_data(handle) = ccall(:uv_handle_get_data, Ptr{Cvoid}, (Ptr{Cvoid},), handle) +uv_req_data(handle) = ccall(:uv_req_get_data, Ptr{Cvoid}, (Ptr{Cvoid},), handle) +uv_req_set_data(req, data) = ccall(:uv_req_set_data, Cvoid, (Ptr{Cvoid}, Any), req, data) +uv_req_set_data(req, data::Ptr{Cvoid}) = ccall(:uv_handle_set_data, Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}), req, data) macro handle_as(hand, typ) return quote diff --git a/base/reflection.jl b/base/reflection.jl index fe48b6f9aa6b9..be0209872db34 100644 --- a/base/reflection.jl +++ b/base/reflection.jl @@ -964,7 +964,7 @@ use it in the following manner to summarize information about a struct: julia> structinfo(T) = [(fieldoffset(T,i), fieldname(T,i), fieldtype(T,i)) for i = 1:fieldcount(T)]; julia> structinfo(Base.Filesystem.StatStruct) -13-element Vector{Tuple{UInt64, Symbol, Type}}: +14-element Vector{Tuple{UInt64, Symbol, Type}}: (0x0000000000000000, :desc, Union{RawFD, String}) (0x0000000000000008, :device, UInt64) (0x0000000000000010, :inode, UInt64) @@ -978,6 +978,7 @@ julia> structinfo(Base.Filesystem.StatStruct) (0x0000000000000050, :blocks, Int64) (0x0000000000000058, :mtime, Float64) (0x0000000000000060, :ctime, Float64) + (0x0000000000000068, :ioerrno, Int32) ``` """ fieldoffset(x::DataType, idx::Integer) = (@_foldable_meta; ccall(:jl_get_field_offset, Csize_t, (Any, Cint), x, idx)) diff --git a/base/stat.jl b/base/stat.jl index 506b5644dccbc..c6fb239a96404 100644 --- a/base/stat.jl +++ b/base/stat.jl @@ -63,6 +63,7 @@ struct StatStruct blocks :: Int64 mtime :: Float64 ctime :: Float64 + ioerrno :: Int32 end @eval function Base.:(==)(x::StatStruct, y::StatStruct) # do not include `desc` in equality or hash @@ -80,22 +81,23 @@ end end) end -StatStruct() = StatStruct("", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) -StatStruct(buf::Union{Vector{UInt8},Ptr{UInt8}}) = StatStruct("", buf) -StatStruct(desc::Union{AbstractString, OS_HANDLE}, buf::Union{Vector{UInt8},Ptr{UInt8}}) = StatStruct( +StatStruct() = StatStruct("", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Base.UV_ENOENT) +StatStruct(buf::Union{Memory{UInt8},Vector{UInt8},Ptr{UInt8}}, ioerrno::Int32) = StatStruct("", buf, ioerrno) +StatStruct(desc::Union{AbstractString, OS_HANDLE}, buf::Union{Memory{UInt8},Vector{UInt8},Ptr{UInt8}}, ioerrno::Int32) = StatStruct( desc isa OS_HANDLE ? desc : String(desc), - ccall(:jl_stat_dev, UInt32, (Ptr{UInt8},), buf), - ccall(:jl_stat_ino, UInt32, (Ptr{UInt8},), buf), - ccall(:jl_stat_mode, UInt32, (Ptr{UInt8},), buf), - ccall(:jl_stat_nlink, UInt32, (Ptr{UInt8},), buf), - ccall(:jl_stat_uid, UInt32, (Ptr{UInt8},), buf), - ccall(:jl_stat_gid, UInt32, (Ptr{UInt8},), buf), - ccall(:jl_stat_rdev, UInt32, (Ptr{UInt8},), buf), - ccall(:jl_stat_size, UInt64, (Ptr{UInt8},), buf), - ccall(:jl_stat_blksize, UInt64, (Ptr{UInt8},), buf), - ccall(:jl_stat_blocks, UInt64, (Ptr{UInt8},), buf), - ccall(:jl_stat_mtime, Float64, (Ptr{UInt8},), buf), - ccall(:jl_stat_ctime, Float64, (Ptr{UInt8},), buf), + ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_dev, UInt32, (Ptr{UInt8},), buf), + ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_ino, UInt32, (Ptr{UInt8},), buf), + ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_mode, UInt32, (Ptr{UInt8},), buf), + ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_nlink, UInt32, (Ptr{UInt8},), buf), + ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_uid, UInt32, (Ptr{UInt8},), buf), + ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_gid, UInt32, (Ptr{UInt8},), buf), + ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_rdev, UInt32, (Ptr{UInt8},), buf), + ioerrno != 0 ? zero(UInt64) : ccall(:jl_stat_size, UInt64, (Ptr{UInt8},), buf), + ioerrno != 0 ? zero(UInt64) : ccall(:jl_stat_blksize, UInt64, (Ptr{UInt8},), buf), + ioerrno != 0 ? zero(UInt64) : ccall(:jl_stat_blocks, UInt64, (Ptr{UInt8},), buf), + ioerrno != 0 ? zero(Float64) : ccall(:jl_stat_mtime, Float64, (Ptr{UInt8},), buf), + ioerrno != 0 ? zero(Float64) : ccall(:jl_stat_ctime, Float64, (Ptr{UInt8},), buf), + ioerrno ) function iso_datetime_with_relative(t, tnow) @@ -130,35 +132,41 @@ end function show_statstruct(io::IO, st::StatStruct, oneline::Bool) print(io, oneline ? "StatStruct(" : "StatStruct for ") show(io, st.desc) - oneline || print(io, "\n ") - print(io, " size: ", st.size, " bytes") - oneline || print(io, "\n") - print(io, " device: ", st.device) - oneline || print(io, "\n ") - print(io, " inode: ", st.inode) - oneline || print(io, "\n ") - print(io, " mode: 0o", string(filemode(st), base = 8, pad = 6), " (", filemode_string(st), ")") - oneline || print(io, "\n ") - print(io, " nlink: ", st.nlink) - oneline || print(io, "\n ") - print(io, " uid: $(st.uid)") - username = getusername(st.uid) - username === nothing || print(io, " (", username, ")") - oneline || print(io, "\n ") - print(io, " gid: ", st.gid) - groupname = getgroupname(st.gid) - groupname === nothing || print(io, " (", groupname, ")") - oneline || print(io, "\n ") - print(io, " rdev: ", st.rdev) - oneline || print(io, "\n ") - print(io, " blksz: ", st.blksize) - oneline || print(io, "\n") - print(io, " blocks: ", st.blocks) - tnow = round(UInt, time()) - oneline || print(io, "\n ") - print(io, " mtime: ", iso_datetime_with_relative(st.mtime, tnow)) - oneline || print(io, "\n ") - print(io, " ctime: ", iso_datetime_with_relative(st.ctime, tnow)) + code = st.ioerrno + if code != 0 + print(io, oneline ? " " : "\n ") + print(io, Base.uverrorname(code), ": ", Base.struverror(code)) + else + oneline || print(io, "\n ") + print(io, " size: ", st.size, " bytes") + oneline || print(io, "\n") + print(io, " device: ", st.device) + oneline || print(io, "\n ") + print(io, " inode: ", st.inode) + oneline || print(io, "\n ") + print(io, " mode: 0o", string(filemode(st), base = 8, pad = 6), " (", filemode_string(st), ")") + oneline || print(io, "\n ") + print(io, " nlink: ", st.nlink) + oneline || print(io, "\n ") + print(io, " uid: $(st.uid)") + username = getusername(st.uid) + username === nothing || print(io, " (", username, ")") + oneline || print(io, "\n ") + print(io, " gid: ", st.gid) + groupname = getgroupname(st.gid) + groupname === nothing || print(io, " (", groupname, ")") + oneline || print(io, "\n ") + print(io, " rdev: ", st.rdev) + oneline || print(io, "\n ") + print(io, " blksz: ", st.blksize) + oneline || print(io, "\n") + print(io, " blocks: ", st.blocks) + tnow = round(UInt, time()) + oneline || print(io, "\n ") + print(io, " mtime: ", iso_datetime_with_relative(st.mtime, tnow)) + oneline || print(io, "\n ") + print(io, " ctime: ", iso_datetime_with_relative(st.ctime, tnow)) + end oneline && print(io, ")") return nothing end @@ -168,18 +176,13 @@ show(io::IO, ::MIME"text/plain", st::StatStruct) = show_statstruct(io, st, false # stat & lstat functions +checkstat(s::StatStruct) = Int(s.ioerrno) in (0, Base.UV_ENOENT, Base.UV_ENOTDIR, Base.UV_EINVAL) ? s : uv_error(string("stat(", repr(s.desc), ")"), s.ioerrno) + macro stat_call(sym, arg1type, arg) return quote - stat_buf = zeros(UInt8, Int(ccall(:jl_sizeof_stat, Int32, ()))) + stat_buf = fill!(Memory{UInt8}(undef, Int(ccall(:jl_sizeof_stat, Int32, ()))), 0x00) r = ccall($(Expr(:quote, sym)), Int32, ($(esc(arg1type)), Ptr{UInt8}), $(esc(arg)), stat_buf) - if !(r in (0, Base.UV_ENOENT, Base.UV_ENOTDIR, Base.UV_EINVAL)) - uv_error(string("stat(", repr($(esc(arg))), ")"), r) - end - st = StatStruct($(esc(arg)), stat_buf) - if ispath(st) != (r == 0) - error("stat returned zero type for a valid path") - end - return st + return checkstat(StatStruct($(esc(arg)), stat_buf, r)) end end @@ -334,7 +337,7 @@ Return `true` if a valid filesystem entity exists at `path`, otherwise returns `false`. This is the generalization of [`isfile`](@ref), [`isdir`](@ref) etc. """ -ispath(st::StatStruct) = filemode(st) & 0xf000 != 0x0000 +ispath(st::StatStruct) = st.ioerrno == 0 function ispath(path::String) # We use `access()` and `F_OK` to determine if a given path exists. `F_OK` comes from `unistd.h`. F_OK = 0x00 diff --git a/src/sys.c b/src/sys.c index b54edc32b32b6..fa9054bb93e9a 100644 --- a/src/sys.c +++ b/src/sys.c @@ -102,7 +102,6 @@ JL_DLLEXPORT int32_t jl_nb_available(ios_t *s) // --- dir/file stuff --- -JL_DLLEXPORT int jl_sizeof_uv_fs_t(void) { return sizeof(uv_fs_t); } JL_DLLEXPORT char *jl_uv_fs_t_ptr(uv_fs_t *req) { return (char*)req->ptr; } JL_DLLEXPORT char *jl_uv_fs_t_path(uv_fs_t *req) { return (char*)req->path; } diff --git a/stdlib/FileWatching/src/FileWatching.jl b/stdlib/FileWatching/src/FileWatching.jl index 0c987ad01c828..4ea6fcedd59bb 100644 --- a/stdlib/FileWatching/src/FileWatching.jl +++ b/stdlib/FileWatching/src/FileWatching.jl @@ -22,11 +22,11 @@ export trymkpidlock import Base: @handle_as, wait, close, eventloop, notify_error, IOError, - _sizeof_uv_poll, _sizeof_uv_fs_poll, _sizeof_uv_fs_event, _uv_hook_close, uv_error, _UVError, - iolock_begin, iolock_end, associate_julia_struct, disassociate_julia_struct, - preserve_handle, unpreserve_handle, isreadable, iswritable, isopen, - |, getproperty, propertynames -import Base.Filesystem.StatStruct + uv_req_data, uv_req_set_data, associate_julia_struct, disassociate_julia_struct, + _sizeof_uv_poll, _sizeof_uv_fs, _sizeof_uv_fs_event, _uv_hook_close, uv_error, _UVError, + iolock_begin, iolock_end, preserve_handle, unpreserve_handle, + isreadable, iswritable, isopen, |, getproperty, propertynames +import Base.Filesystem: StatStruct, uv_fs_req_cleanup if Sys.iswindows() import Base.WindowsRawSocket end @@ -126,31 +126,30 @@ mutable struct FolderMonitor end end +# this is similar to uv_fs_poll, but strives to avoid the design mistakes that make it unsuitable for any usable purpose +# https://github.com/libuv/libuv/issues/4543 mutable struct PollingFileWatcher - @atomic handle::Ptr{Cvoid} file::String - interval::UInt32 - notify::Base.ThreadSynchronizer - active::Bool - curr_error::Int32 - curr_stat::StatStruct + interval::Float64 + const notify::Base.ThreadSynchronizer # lock protects all fields which can be changed (including interval and file, if you really must) + timer::Union{Nothing,Timer} + const stat_req::Memory{UInt8} + active::Bool # whether there is already an uv_fspollcb in-flight, so to speak + closed::Bool # whether the user has explicitly destroyed this + ioerrno::Int32 # the stat errno as of the last result + prev_stat::StatStruct # the stat as of the last successful result PollingFileWatcher(file::AbstractString, interval::Float64=5.007) = PollingFileWatcher(String(file), interval) function PollingFileWatcher(file::String, interval::Float64=5.007) # same default as nodejs - handle = Libc.malloc(_sizeof_uv_fs_poll) - this = new(handle, file, round(UInt32, interval * 1000), Base.ThreadSynchronizer(), false, 0, StatStruct()) - associate_julia_struct(handle, this) - iolock_begin() - err = ccall(:uv_fs_poll_init, Int32, (Ptr{Cvoid}, Ptr{Cvoid}), eventloop(), handle) - if err != 0 - Libc.free(handle) - throw(_UVError("PollingFileWatcher", err)) - end - finalizer(uvfinalize, this) - iolock_end() + stat_req = Memory{UInt8}(undef, Int(_sizeof_uv_fs)) + this = new(file, interval, Base.ThreadSynchronizer(), nothing, stat_req, false, false, 0, StatStruct()) + uv_req_set_data(stat_req, this) + wait(this) # initialize with the current stat before return return this end end +Base.stat(pfw::PollingFileWatcher) = Base.checkstat(@lock pfw.notify pfw.prev_stat) + mutable struct _FDWatcher @atomic handle::Ptr{Cvoid} fdnum::Int # this is NOT the file descriptor @@ -327,7 +326,7 @@ function close(t::FDWatcher) close(t.watcher, mask) end -function uvfinalize(uv::Union{FileMonitor, FolderMonitor, PollingFileWatcher}) +function uvfinalize(uv::Union{FileMonitor, FolderMonitor}) iolock_begin() if uv.handle != C_NULL disassociate_julia_struct(uv) # close (and free) without notify @@ -336,7 +335,7 @@ function uvfinalize(uv::Union{FileMonitor, FolderMonitor, PollingFileWatcher}) iolock_end() end -function close(t::Union{FileMonitor, FolderMonitor, PollingFileWatcher}) +function close(t::Union{FileMonitor, FolderMonitor}) iolock_begin() if t.handle != C_NULL ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t.handle) @@ -344,6 +343,21 @@ function close(t::Union{FileMonitor, FolderMonitor, PollingFileWatcher}) iolock_end() end +function close(pfw::PollingFileWatcher) + timer = nothing + lock(pfw.notify) + try + pfw.closed = true + notify(pfw.notify, false) + timer = pfw.timer + pfw.timer = nothing + finally + unlock(pfw.notify) + end + timer === nothing || close(timer) + nothing +end + function _uv_hook_close(uv::_FDWatcher) # fyi: jl_atexit_hook can cause this to get called too Libc.free(@atomicswap :monotonic uv.handle = C_NULL) @@ -351,18 +365,6 @@ function _uv_hook_close(uv::_FDWatcher) nothing end -function _uv_hook_close(uv::PollingFileWatcher) - lock(uv.notify) - try - uv.active = false - Libc.free(@atomicswap :monotonic uv.handle = C_NULL) - notify(uv.notify, StatStruct()) - finally - unlock(uv.notify) - end - nothing -end - function _uv_hook_close(uv::FileMonitor) lock(uv.notify) try @@ -388,7 +390,7 @@ end isopen(fm::FileMonitor) = fm.handle != C_NULL isopen(fm::FolderMonitor) = fm.handle != C_NULL -isopen(pfw::PollingFileWatcher) = pfw.handle != C_NULL +isopen(pfw::PollingFileWatcher) = !pfw.closed isopen(pfw::_FDWatcher) = pfw.refcount != (0, 0) isopen(pfw::FDWatcher) = !pfw.mask.timedout @@ -449,21 +451,50 @@ function uv_pollcb(handle::Ptr{Cvoid}, status::Int32, events::Int32) nothing end -function uv_fspollcb(handle::Ptr{Cvoid}, status::Int32, prev::Ptr, curr::Ptr) - t = @handle_as handle PollingFileWatcher - old_status = t.curr_error - t.curr_error = status - if status == 0 - t.curr_stat = StatStruct(convert(Ptr{UInt8}, curr)) - end - if status == 0 || status != old_status - prev_stat = StatStruct(convert(Ptr{UInt8}, prev)) - lock(t.notify) - try - notify(t.notify, prev_stat) - finally - unlock(t.notify) +function uv_fspollcb(req::Ptr{Cvoid}) + pfw = unsafe_pointer_to_objref(uv_req_data(req))::PollingFileWatcher + pfw.active = false + unpreserve_handle(pfw) + @assert pointer(pfw.stat_req) == req + r = Int32(ccall(:uv_fs_get_result, Cssize_t, (Ptr{Cvoid},), req)) + statbuf = ccall(:uv_fs_get_statbuf, Ptr{UInt8}, (Ptr{Cvoid},), req) + curr_stat = StatStruct(pfw.file, statbuf, r) + uv_fs_req_cleanup(req) + lock(pfw.notify) + try + if !isempty(pfw.notify) # discard the update if nobody watching + if pfw.ioerrno != r || (r == 0 && pfw.prev_stat != curr_stat) + if r == 0 + pfw.prev_stat = curr_stat + end + pfw.ioerrno = r + notify(pfw.notify, true) + end + pfw.timer = Timer(pfw.interval) do t + # async task + iolock_begin() + lock(pfw.notify) + try + if pfw.timer === t # use identity check to test if this callback is stale by the time we got the lock + pfw.timer = nothing + @assert !pfw.active + if isopen(pfw) && !isempty(pfw.notify) + preserve_handle(pfw) + err = ccall(:uv_fs_stat, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}), + eventloop(), pfw.stat_req, pfw.file, uv_jl_fspollcb) + err == 0 || notify(pfw.notify, _UVError("PollingFileWatcher (start)", err), error=true) # likely just ENOMEM + pfw.active = true + end + end + finally + unlock(pfw.notify) + end + iolock_end() + nothing + end end + finally + unlock(pfw.notify) end nothing end @@ -475,7 +506,7 @@ global uv_jl_fseventscb_folder::Ptr{Cvoid} function __init__() global uv_jl_pollcb = @cfunction(uv_pollcb, Cvoid, (Ptr{Cvoid}, Cint, Cint)) - global uv_jl_fspollcb = @cfunction(uv_fspollcb, Cvoid, (Ptr{Cvoid}, Cint, Ptr{Cvoid}, Ptr{Cvoid})) + global uv_jl_fspollcb = @cfunction(uv_fspollcb, Cvoid, (Ptr{Cvoid},)) global uv_jl_fseventscb_file = @cfunction(uv_fseventscb_file, Cvoid, (Ptr{Cvoid}, Ptr{Int8}, Int32, Int32)) global uv_jl_fseventscb_folder = @cfunction(uv_fseventscb_folder, Cvoid, (Ptr{Cvoid}, Ptr{Int8}, Int32, Int32)) @@ -504,35 +535,6 @@ function start_watching(t::_FDWatcher) nothing end -function start_watching(t::PollingFileWatcher) - iolock_begin() - t.handle == C_NULL && throw(ArgumentError("PollingFileWatcher is closed")) - if !t.active - uv_error("PollingFileWatcher (start)", - ccall(:uv_fs_poll_start, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, UInt32), - t.handle, uv_jl_fspollcb::Ptr{Cvoid}, t.file, t.interval)) - t.active = true - end - iolock_end() - nothing -end - -function stop_watching(t::PollingFileWatcher) - iolock_begin() - lock(t.notify) - try - if t.active && isempty(t.notify) - t.active = false - uv_error("PollingFileWatcher (stop)", - ccall(:uv_fs_poll_stop, Int32, (Ptr{Cvoid},), t.handle)) - end - finally - unlock(t.notify) - end - iolock_end() - nothing -end - function start_watching(t::FileMonitor) iolock_begin() t.handle == C_NULL && throw(ArgumentError("FileMonitor is closed")) @@ -640,28 +642,65 @@ end function wait(pfw::PollingFileWatcher) iolock_begin() - preserve_handle(pfw) lock(pfw.notify) - local prevstat + prevstat = pfw.prev_stat + havechange = false + timer = nothing try - start_watching(pfw) + # we aren't too strict about the first interval after `wait`, but rather always + # check right away to see if it had immediately changed again, and then repeatedly + # after interval again until success + pfw.closed && throw(ArgumentError("PollingFileWatcher is closed")) + timer = pfw.timer + pfw.timer = nothing # disable Timer callback + # start_watching + if !pfw.active + preserve_handle(pfw) + err = ccall(:uv_fs_stat, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}), + eventloop(), pfw.stat_req, pfw.file, uv_jl_fspollcb) + err == 0 || uv_error("PollingFileWatcher (start)", err) # likely just ENOMEM + pfw.active = true + end iolock_end() - prevstat = wait(pfw.notify)::StatStruct + havechange = wait(pfw.notify)::Bool unlock(pfw.notify) iolock_begin() - lock(pfw.notify) - finally - unlock(pfw.notify) - unpreserve_handle(pfw) + catch + # stop_watching: cleanup any timers from before or after starting this wait before it failed, if there are no other watchers + latetimer = nothing + try + if isempty(pfw.notify) + latetimer = pfw.timer + pfw.timer = nothing + end + finally + unlock(pfw.notify) + end + if timer !== nothing || latetimer !== nothing + iolock_end() + timer === nothing || close(timer) + latetimer === nothing || close(latetimer) + iolock_begin() + end + rethrow() end - stop_watching(pfw) iolock_end() - if pfw.handle == C_NULL + timer === nothing || close(timer) # cleanup resources so we don't hang on exit + if !havechange # user canceled by calling close return prevstat, EOFError() - elseif pfw.curr_error != 0 - return prevstat, _UVError("PollingFileWatcher", pfw.curr_error) + end + # grab the most up-to-date stat result as of this time, even if it was a bit newer than the notify call + lock(pfw.notify) + currstat = pfw.prev_stat + ioerrno = pfw.ioerrno + unlock(pfw.notify) + if ioerrno == 0 + @assert currstat.ioerrno == 0 + return prevstat, currstat + elseif ioerrno in (Base.UV_ENOENT, Base.UV_ENOTDIR, Base.UV_EINVAL) + return prevstat, StatStruct(pfw.file, Ptr{UInt8}(0), ioerrno) else - return prevstat, pfw.curr_stat + return prevstat, _UVError("PollingFileWatcher", ioerrno) end end @@ -880,9 +919,9 @@ The `previous` status is always a `StatStruct`, but it may have all of the field The `current` status object may be a `StatStruct`, an `EOFError` (indicating the timeout elapsed), or some other `Exception` subtype (if the `stat` operation failed - for example, if the path does not exist). -To determine when a file was modified, compare `current isa StatStruct && mtime(prev) != mtime(current)` to detect -notification of changes. However, using [`watch_file`](@ref) for this operation is preferred, since -it is more reliable and efficient, although in some situations it may not be available. +To determine when a file was modified, compare `!(current isa StatStruct && prev == current)` to detect +notification of changes to the mtime or inode. However, using [`watch_file`](@ref) for this operation +is preferred, since it is more reliable and efficient, although in some situations it may not be available. """ function poll_file(s::AbstractString, interval_seconds::Real=5.007, timeout_s::Real=-1) pfw = PollingFileWatcher(s, Float64(interval_seconds)) @@ -893,12 +932,7 @@ function poll_file(s::AbstractString, interval_seconds::Real=5.007, timeout_s::R close(pfw) end end - statdiff = wait(pfw) - if isa(statdiff[2], IOError) - # file didn't initially exist, continue watching for it to be created (or the error to change) - statdiff = wait(pfw) - end - return statdiff + return wait(pfw) finally close(pfw) @isdefined(timer) && close(timer) diff --git a/stdlib/FileWatching/test/runtests.jl b/stdlib/FileWatching/test/runtests.jl index 2592aea024386..c9d7a4317fd08 100644 --- a/stdlib/FileWatching/test/runtests.jl +++ b/stdlib/FileWatching/test/runtests.jl @@ -2,6 +2,7 @@ using Test, FileWatching using Base: uv_error, Experimental +using Base.Filesystem: StatStruct @testset "FileWatching" begin @@ -218,7 +219,7 @@ function test_timeout(tval) @async test_file_poll(channel, 10, tval) tr = take!(channel) end - @test tr[1] === Base.Filesystem.StatStruct() && tr[2] === EOFError() + @test ispath(tr[1]::StatStruct) && tr[2] === EOFError() @test tval <= t_elapsed end @@ -231,7 +232,7 @@ function test_touch(slval) write(f, "Hello World\n") close(f) tr = take!(channel) - @test ispath(tr[1]) && ispath(tr[2]) + @test ispath(tr[1]::StatStruct) && ispath(tr[2]::StatStruct) fetch(t) end @@ -435,8 +436,8 @@ end @test_throws(Base._UVError("FolderMonitor (start)", Base.UV_ENOENT), watch_folder("____nonexistent_file", 10)) @test(@elapsed( - @test(poll_file("____nonexistent_file", 1, 3.1) === - (Base.Filesystem.StatStruct(), EOFError()))) > 3) + @test(poll_file("____nonexistent_file", 1, 3.1) == + (StatStruct(), EOFError()))) > 3) unwatch_folder(dir) @test isempty(FileWatching.watched_folders) diff --git a/test/file.jl b/test/file.jl index de258c92e02bc..a4262c4eaaa21 100644 --- a/test/file.jl +++ b/test/file.jl @@ -2128,6 +2128,16 @@ Base.joinpath(x::URI50890) = URI50890(x.f) @test !isnothing(Base.Filesystem.getusername(s.uid)) @test !isnothing(Base.Filesystem.getgroupname(s.gid)) end + s = Base.Filesystem.StatStruct() + stat_show_str = sprint(show, s) + stat_show_str_multi = sprint(show, MIME("text/plain"), s) + @test startswith(stat_show_str, "StatStruct(\"\" ENOENT: ") && endswith(stat_show_str, ")") + @test startswith(stat_show_str_multi, "StatStruct for \"\"\n ENOENT: ") && !endswith(stat_show_str_multi, r"\s") + s = Base.Filesystem.StatStruct("my/test", Ptr{UInt8}(0), Int32(Base.UV_ENOTDIR)) + stat_show_str = sprint(show, s) + stat_show_str_multi = sprint(show, MIME("text/plain"), s) + @test startswith(stat_show_str, "StatStruct(\"my/test\" ENOTDIR: ") && endswith(stat_show_str, ")") + @test startswith(stat_show_str_multi, "StatStruct for \"my/test\"\n ENOTDIR: ") && !endswith(stat_show_str_multi, r"\s") end @testset "diskstat() works" begin From b6e0136466396bc781406c0ab2f036f64cc818d7 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Thu, 26 Sep 2024 13:57:52 -0400 Subject: [PATCH 18/45] [FileWatching] fix FileMonitor similarly and improve pidfile reliability Previously pidfile used the same poll_interval as sleep to detect if this code made any concurrency mistakes, but we do not really need to do that once FileMonitor is fixed to be reliable in the presence of parallel concurrency (instead of using watch_file). --- stdlib/FileWatching/src/FileWatching.jl | 108 ++++++++++-------------- stdlib/FileWatching/src/pidfile.jl | 46 +++++++--- stdlib/FileWatching/test/runtests.jl | 11 +-- 3 files changed, 84 insertions(+), 81 deletions(-) diff --git a/stdlib/FileWatching/src/FileWatching.jl b/stdlib/FileWatching/src/FileWatching.jl index 4ea6fcedd59bb..b24f352943ec5 100644 --- a/stdlib/FileWatching/src/FileWatching.jl +++ b/stdlib/FileWatching/src/FileWatching.jl @@ -38,13 +38,13 @@ const UV_CHANGE = Int32(2) struct FileEvent renamed::Bool changed::Bool - timedout::Bool + timedout::Bool # aka canceled FileEvent(r::Bool, c::Bool, t::Bool) = new(r, c, t) end FileEvent() = FileEvent(false, false, true) FileEvent(flags::Integer) = FileEvent((flags & UV_RENAME) != 0, (flags & UV_CHANGE) != 0, - false) + iszero(flags)) |(a::FileEvent, b::FileEvent) = FileEvent(a.renamed | b.renamed, a.changed | b.changed, @@ -80,23 +80,26 @@ iswritable(f::FDEvent) = f.writable mutable struct FileMonitor @atomic handle::Ptr{Cvoid} - file::String - notify::Base.ThreadSynchronizer - events::Int32 - active::Bool + const file::String + const notify::Base.ThreadSynchronizer + events::Int32 # accumulator for events that occurred since the last wait call, similar to Event with autoreset + ioerrno::Int32 # record the error, if any occurs (unlikely) FileMonitor(file::AbstractString) = FileMonitor(String(file)) function FileMonitor(file::String) handle = Libc.malloc(_sizeof_uv_fs_event) - this = new(handle, file, Base.ThreadSynchronizer(), 0, false) + this = new(handle, file, Base.ThreadSynchronizer(), 0, 0) associate_julia_struct(handle, this) iolock_begin() err = ccall(:uv_fs_event_init, Cint, (Ptr{Cvoid}, Ptr{Cvoid}), eventloop(), handle) if err != 0 Libc.free(handle) - throw(_UVError("FileMonitor", err)) + uv_error("FileMonitor", err) end - iolock_end() finalizer(uvfinalize, this) + uv_error("FileMonitor (start)", + ccall(:uv_fs_event_start, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Int32), + this.handle, uv_jl_fseventscb_file::Ptr{Cvoid}, file, 0)) + iolock_end() return this end end @@ -104,8 +107,8 @@ end mutable struct FolderMonitor @atomic handle::Ptr{Cvoid} # notify::Channel{Any} # eltype = Union{Pair{String, FileEvent}, IOError} - notify::Base.ThreadSynchronizer - channel::Vector{Any} # eltype = Pair{String, FileEvent} + const notify::Base.ThreadSynchronizer + const channel::Vector{Any} # eltype = Pair{String, FileEvent} FolderMonitor(folder::AbstractString) = FolderMonitor(String(folder)) function FolderMonitor(folder::String) handle = Libc.malloc(_sizeof_uv_fs_event) @@ -152,9 +155,9 @@ Base.stat(pfw::PollingFileWatcher) = Base.checkstat(@lock pfw.notify pfw.prev_st mutable struct _FDWatcher @atomic handle::Ptr{Cvoid} - fdnum::Int # this is NOT the file descriptor + const fdnum::Int # this is NOT the file descriptor refcount::Tuple{Int, Int} - notify::Base.ThreadSynchronizer + const notify::Base.ThreadSynchronizer events::Int32 active::Tuple{Bool, Bool} @@ -275,7 +278,7 @@ end mutable struct FDWatcher # WARNING: make sure `close` has been manually called on this watcher before closing / destroying `fd` - watcher::_FDWatcher + const watcher::_FDWatcher mask::FDEvent function FDWatcher(fd::RawFD, readable::Bool, writable::Bool) return FDWatcher(fd, FDEvent(readable, writable, false, false)) @@ -368,9 +371,8 @@ end function _uv_hook_close(uv::FileMonitor) lock(uv.notify) try - uv.active = false Libc.free(@atomicswap :monotonic uv.handle = C_NULL) - notify(uv.notify, FileEvent()) + notify(uv.notify) finally unlock(uv.notify) end @@ -399,10 +401,12 @@ function uv_fseventscb_file(handle::Ptr{Cvoid}, filename::Ptr, events::Int32, st lock(t.notify) try if status != 0 + t.ioerrno = status notify_error(t.notify, _UVError("FileMonitor", status)) - else - t.events |= events - notify(t.notify, FileEvent(events)) + uvfinalize(t) + elseif events != t.events + events = t.events |= events + notify(t.notify, all=false) end finally unlock(t.notify) @@ -535,35 +539,6 @@ function start_watching(t::_FDWatcher) nothing end -function start_watching(t::FileMonitor) - iolock_begin() - t.handle == C_NULL && throw(ArgumentError("FileMonitor is closed")) - if !t.active - uv_error("FileMonitor (start)", - ccall(:uv_fs_event_start, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Int32), - t.handle, uv_jl_fseventscb_file::Ptr{Cvoid}, t.file, 0)) - t.active = true - end - iolock_end() - nothing -end - -function stop_watching(t::FileMonitor) - iolock_begin() - lock(t.notify) - try - if t.active && isempty(t.notify) - t.active = false - uv_error("FileMonitor (stop)", - ccall(:uv_fs_event_stop, Int32, (Ptr{Cvoid},), t.handle)) - end - finally - unlock(t.notify) - end - iolock_end() - nothing -end - # n.b. this _wait may return spuriously early with a timedout event function _wait(fdw::_FDWatcher, mask::FDEvent) iolock_begin() @@ -705,26 +680,23 @@ function wait(pfw::PollingFileWatcher) end function wait(m::FileMonitor) - iolock_begin() + m.handle == C_NULL && throw(EOFError()) preserve_handle(m) lock(m.notify) - local events try - start_watching(m) - iolock_end() - events = wait(m.notify)::FileEvent - events |= FileEvent(m.events) - m.events = 0 - unlock(m.notify) - iolock_begin() - lock(m.notify) + while true + m.handle == C_NULL && throw(EOFError()) + events = @atomicswap :not_atomic m.events = 0 + events == 0 || return FileEvent(events) + if m.ioerrno != 0 + uv_error("FileMonitor", m.ioerrno) + end + wait(m.notify) + end finally unlock(m.notify) unpreserve_handle(m) end - stop_watching(m) - iolock_end() - return events end function wait(m::FolderMonitor) @@ -743,6 +715,7 @@ function wait(m::FolderMonitor) end return evt::Pair{String, FileEvent} end +Base.take!(m::FolderMonitor) = wait(m) # Channel-like API """ @@ -823,7 +796,12 @@ function watch_file(s::String, timeout_s::Float64=-1.0) close(fm) end end - return wait(fm) + try + return wait(fm) + catch ex + ex isa EOFError && return FileEvent() + rethrow() + end finally close(fm) @isdefined(timer) && close(timer) @@ -851,7 +829,7 @@ This behavior of this function varies slightly across platforms. See """ watch_folder(s::AbstractString, timeout_s::Real=-1) = watch_folder(String(s), timeout_s) function watch_folder(s::String, timeout_s::Real=-1) - fm = get!(watched_folders, s) do + fm = @lock watched_folders get!(watched_folders[], s) do return FolderMonitor(s) end local timer @@ -898,12 +876,12 @@ It is not recommended to do this while another task is waiting for """ unwatch_folder(s::AbstractString) = unwatch_folder(String(s)) function unwatch_folder(s::String) - fm = pop!(watched_folders, s, nothing) + fm = @lock watched_folders pop!(watched_folders[], s, nothing) fm === nothing || close(fm) nothing end -const watched_folders = Dict{String, FolderMonitor}() +const watched_folders = Lockable(Dict{String, FolderMonitor}()) """ poll_file(path::AbstractString, interval_s::Real=5.007, timeout_s::Real=-1) -> (previous::StatStruct, current) diff --git a/stdlib/FileWatching/src/pidfile.jl b/stdlib/FileWatching/src/pidfile.jl index 4c821a3d897e4..95b8f20face29 100644 --- a/stdlib/FileWatching/src/pidfile.jl +++ b/stdlib/FileWatching/src/pidfile.jl @@ -4,14 +4,14 @@ module Pidfile export mkpidlock, trymkpidlock using Base: - IOError, UV_EEXIST, UV_ESRCH, + IOError, UV_EEXIST, UV_ESRCH, UV_ENOENT, Process using Base.Filesystem: File, open, JL_O_CREAT, JL_O_RDWR, JL_O_RDONLY, JL_O_EXCL, rename, samefile, path_separator -using ..FileWatching: watch_file +using ..FileWatching: FileMonitor using Base.Sys: iswindows """ @@ -256,19 +256,43 @@ function open_exclusive(path::String; end end # fall-back: wait for the lock - + watch = Lockable(Core.Box(nothing)) while true - # start the file-watcher prior to checking for the pidfile existence - t = @async try - watch_file(path, poll_interval) + # now try again to create it + # try to start the file-watcher prior to checking for the pidfile existence + watch = try + FileMonitor(path) catch ex isa(ex, IOError) || rethrow(ex) - sleep(poll_interval) # if the watch failed, convert to just doing a sleep + ex.code != UV_ENOENT # if the file was deleted in the meantime, don't sleep at all, even if the lock fails + end + timeout = nothing + if watch isa FileMonitor && stale_age > 0 + let watch = watch + timeout = Timer(stale_age) do t + close(watch) + end + end + end + try + file = tryopen_exclusive(path, mode) + file === nothing || return file + if watch isa FileMonitor + try + Base.wait(watch) # will time-out after stale_age passes + catch ex + isa(ex, EOFError) || isa(ex, IOError) || rethrow(ex) + end + end + if watch === true # if the watch failed, convert to just doing a sleep + sleep(poll_interval) + end + finally + # something changed about the path, so watch is now possibly monitoring the wrong file handle + # it will need to be recreated just before the next tryopen_exclusive attempt + timeout isa Timer && close(timeout) + watch isa FileMonitor && close(watch) end - # now try again to create it - file = tryopen_exclusive(path, mode) - file === nothing || return file - Base.wait(t) # sleep for a bit before trying again if stale_age > 0 && stale_pidfile(path, stale_age, refresh) # if the file seems stale, try to remove it before attempting again # set stale_age to zero so we won't attempt again, even if the attempt fails diff --git a/stdlib/FileWatching/test/runtests.jl b/stdlib/FileWatching/test/runtests.jl index c9d7a4317fd08..11df8849048f8 100644 --- a/stdlib/FileWatching/test/runtests.jl +++ b/stdlib/FileWatching/test/runtests.jl @@ -169,12 +169,13 @@ file = joinpath(dir, "afile.txt") # initialize a watch_folder instance and create afile.txt function test_init_afile() - @test isempty(FileWatching.watched_folders) + watched_folders = FileWatching.watched_folders + @test @lock watched_folders isempty(watched_folders[]) @test(watch_folder(dir, 0) == ("" => FileWatching.FileEvent())) @test @elapsed(@test(watch_folder(dir, 0) == ("" => FileWatching.FileEvent()))) <= 0.5 - @test length(FileWatching.watched_folders) == 1 + @test @lock(watched_folders, length(FileWatching.watched_folders[])) == 1 @test unwatch_folder(dir) === nothing - @test isempty(FileWatching.watched_folders) + @test @lock watched_folders isempty(watched_folders[]) @test 0.002 <= @elapsed(@test(watch_folder(dir, 0.004) == ("" => FileWatching.FileEvent()))) @test 0.002 <= @elapsed(@test(watch_folder(dir, 0.004) == ("" => FileWatching.FileEvent()))) <= 0.5 @test unwatch_folder(dir) === nothing @@ -204,7 +205,7 @@ function test_init_afile() @test unwatch_folder(dir) === nothing @test(watch_folder(dir, 0) == ("" => FileWatching.FileEvent())) @test 0.9 <= @elapsed(@test(watch_folder(dir, 1) == ("" => FileWatching.FileEvent()))) - @test length(FileWatching.watched_folders) == 1 + @test @lock(watched_folders, length(FileWatching.watched_folders[])) == 1 nothing end @@ -440,7 +441,7 @@ end (StatStruct(), EOFError()))) > 3) unwatch_folder(dir) -@test isempty(FileWatching.watched_folders) +@test @lock FileWatching.watched_folders isempty(FileWatching.watched_folders[]) rm(file) rm(dir) From f8d17e7ad4857ba3164ca1c4df8d118dbf42b429 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Thu, 26 Sep 2024 15:04:26 -0400 Subject: [PATCH 19/45] [FileWatching] reorganize file and add docs --- stdlib/FileWatching/docs/src/index.md | 16 +- stdlib/FileWatching/src/FileWatching.jl | 386 +++++++++++++++--------- stdlib/FileWatching/test/runtests.jl | 6 +- 3 files changed, 248 insertions(+), 160 deletions(-) diff --git a/stdlib/FileWatching/docs/src/index.md b/stdlib/FileWatching/docs/src/index.md index 1b2212fcc5a28..15d4e39a45117 100644 --- a/stdlib/FileWatching/docs/src/index.md +++ b/stdlib/FileWatching/docs/src/index.md @@ -5,11 +5,17 @@ EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/FileWatching/do # [File Events](@id lib-filewatching) ```@docs -FileWatching.poll_fd -FileWatching.poll_file -FileWatching.watch_file -FileWatching.watch_folder -FileWatching.unwatch_folder +poll_fd +poll_file +watch_file +watch_folder +unwatch_folder +``` +```@docs +FileMonitor +FolderMonitor +PollingFileWatcher +FDWatcher ``` # Pidfile diff --git a/stdlib/FileWatching/src/FileWatching.jl b/stdlib/FileWatching/src/FileWatching.jl index b24f352943ec5..7c743ce634193 100644 --- a/stdlib/FileWatching/src/FileWatching.jl +++ b/stdlib/FileWatching/src/FileWatching.jl @@ -6,7 +6,7 @@ Utilities for monitoring files and file descriptors for events. module FileWatching export - # one-shot API (returns results): + # one-shot API (returns results, race-y): watch_file, # efficient for small numbers of files watch_folder, # efficient for large numbers of files unwatch_folder, @@ -78,6 +78,134 @@ isreadable(f::FDEvent) = f.readable iswritable(f::FDEvent) = f.writable |(a::FDEvent, b::FDEvent) = FDEvent(getfield(a, :events) | getfield(b, :events)) +# Callback functions + +function uv_fseventscb_file(handle::Ptr{Cvoid}, filename::Ptr, events::Int32, status::Int32) + t = @handle_as handle FileMonitor + lock(t.notify) + try + if status != 0 + t.ioerrno = status + notify_error(t.notify, _UVError("FileMonitor", status)) + uvfinalize(t) + elseif events != t.events + events = t.events |= events + notify(t.notify, all=false) + end + finally + unlock(t.notify) + end + nothing +end + +function uv_fseventscb_folder(handle::Ptr{Cvoid}, filename::Ptr, events::Int32, status::Int32) + t = @handle_as handle FolderMonitor + lock(t.notify) + try + if status != 0 + notify_error(t.notify, _UVError("FolderMonitor", status)) + else + fname = (filename == C_NULL) ? "" : unsafe_string(convert(Cstring, filename)) + push!(t.channel, fname => FileEvent(events)) + notify(t.notify) + end + finally + unlock(t.notify) + end + nothing +end + +function uv_pollcb(handle::Ptr{Cvoid}, status::Int32, events::Int32) + t = @handle_as handle _FDWatcher + lock(t.notify) + try + if status != 0 + notify_error(t.notify, _UVError("FDWatcher", status)) + else + t.events |= events + if t.active[1] || t.active[2] + if isempty(t.notify) + # if we keep hearing about events when nobody appears to be listening, + # stop the poll to save cycles + t.active = (false, false) + ccall(:uv_poll_stop, Int32, (Ptr{Cvoid},), t.handle) + end + end + notify(t.notify, events) + end + finally + unlock(t.notify) + end + nothing +end + +function uv_fspollcb(req::Ptr{Cvoid}) + pfw = unsafe_pointer_to_objref(uv_req_data(req))::PollingFileWatcher + pfw.active = false + unpreserve_handle(pfw) + @assert pointer(pfw.stat_req) == req + r = Int32(ccall(:uv_fs_get_result, Cssize_t, (Ptr{Cvoid},), req)) + statbuf = ccall(:uv_fs_get_statbuf, Ptr{UInt8}, (Ptr{Cvoid},), req) + curr_stat = StatStruct(pfw.file, statbuf, r) + uv_fs_req_cleanup(req) + lock(pfw.notify) + try + if !isempty(pfw.notify) # must discard the update if nobody watching + if pfw.ioerrno != r || (r == 0 && pfw.prev_stat != curr_stat) + if r == 0 + pfw.prev_stat = curr_stat + end + pfw.ioerrno = r + notify(pfw.notify, true) + end + pfw.timer = Timer(pfw.interval) do t + # async task + iolock_begin() + lock(pfw.notify) + try + if pfw.timer === t # use identity check to test if this callback is stale by the time we got the lock + pfw.timer = nothing + @assert !pfw.active + if isopen(pfw) && !isempty(pfw.notify) + preserve_handle(pfw) + uv_jl_fspollcb = @cfunction(uv_fspollcb, Cvoid, (Ptr{Cvoid},)) + err = ccall(:uv_fs_stat, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}), + eventloop(), pfw.stat_req, pfw.file, uv_jl_fspollcb::Ptr{Cvoid}) + err == 0 || notify(pfw.notify, _UVError("PollingFileWatcher (start)", err), error=true) # likely just ENOMEM + pfw.active = true + end + end + finally + unlock(pfw.notify) + end + iolock_end() + nothing + end + end + finally + unlock(pfw.notify) + end + nothing +end + +# Types + +""" + FileMonitor(path::AbstractString) + +Watch file or directory `path` (which must exist) for changes until a change occurs. This +function does not poll the file system and instead uses platform-specific functionality to +receive notifications from the operating system (e.g. via inotify on Linux). See the NodeJS +documentation linked below for details. + +`fm = FileMonitor(path)` acts like an auto-reset Event, so `wait(fm)` blocks until there has +been at least one event in the file originally at the given path and then returns an object +with boolean fields `renamed`, `changed`, `timedout` summarizing all changes that have +occurred since the last call to `wait` returned. + +This behavior of this function varies slightly across platforms. See + for more detailed information. +""" mutable struct FileMonitor @atomic handle::Ptr{Cvoid} const file::String @@ -96,6 +224,7 @@ mutable struct FileMonitor uv_error("FileMonitor", err) end finalizer(uvfinalize, this) + uv_jl_fseventscb_file = @cfunction(uv_fseventscb_file, Cvoid, (Ptr{Cvoid}, Ptr{Int8}, Int32, Int32)) uv_error("FileMonitor (start)", ccall(:uv_fs_event_start, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Int32), this.handle, uv_jl_fseventscb_file::Ptr{Cvoid}, file, 0)) @@ -104,6 +233,23 @@ mutable struct FileMonitor end end + +""" + FolderMonitor(folder::AbstractString) + +Watch a file or directory `path` for changes until a change has occurred. This function does +not poll the file system and instead uses platform-specific functionality to receive +notifications from the operating system (e.g. via inotify on Linux). See the NodeJS +documentation linked below for details. + +This acts similar to a Channel, so calling `take!` (or `wait`) blocks until some change has +occurred. The `wait` function will return a pair where the first field is the name of the +changed file (if available) and the second field is an object with boolean fields `renamed` +and `changed`, giving the event that occurred on it. + +This behavior of this function varies slightly across platforms. See + for more detailed information. +""" mutable struct FolderMonitor @atomic handle::Ptr{Cvoid} # notify::Channel{Any} # eltype = Union{Pair{String, FileEvent}, IOError} @@ -121,6 +267,7 @@ mutable struct FolderMonitor throw(_UVError("FolderMonitor", err)) end finalizer(uvfinalize, this) + uv_jl_fseventscb_folder = @cfunction(uv_fseventscb_folder, Cvoid, (Ptr{Cvoid}, Ptr{Int8}, Int32, Int32)) uv_error("FolderMonitor (start)", ccall(:uv_fs_event_start, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Int32), handle, uv_jl_fseventscb_folder::Ptr{Cvoid}, folder, 0)) @@ -131,6 +278,28 @@ end # this is similar to uv_fs_poll, but strives to avoid the design mistakes that make it unsuitable for any usable purpose # https://github.com/libuv/libuv/issues/4543 +""" + PollingFileWatcher(path::AbstractString, interval_s::Real=5.007) + +Monitor a file for changes by polling `stat` every `interval_s` seconds until a change +occurs or `timeout_s` seconds have elapsed. The `interval_s` should be a long period; the +default is 5.007 seconds. Call `stat` on it to get the most recent, but old, result. + +This acts like an auto-reset Event, so calling `wait` blocks until the `stat` result has +changed since the previous value captured upon entry to the `wait` call. The `wait` function +will return a pair of status objects `(previous, current)` once any `stat` change is +detected since the previous time that `wait` was called. The `previous` status is always a +`StatStruct`, but it may have all of the fields zeroed (indicating the file didn't +previously exist, or wasn't previously accessible). + +The `current` status object may be a `StatStruct`, an `EOFError` (if the wait is canceled by +closing this object), or some other `Exception` subtype (if the `stat` operation failed: for +example, if the path is removed). Note that `stat` value may be outdated if the file has +changed again multiple times. + +Using [`FileMonitor`](@ref) for this operation is preferred, since it is more reliable and +efficient, although in some situations it may not be available. +""" mutable struct PollingFileWatcher file::String interval::Float64 @@ -151,8 +320,6 @@ mutable struct PollingFileWatcher end end -Base.stat(pfw::PollingFileWatcher) = Base.checkstat(@lock pfw.notify pfw.prev_stat) - mutable struct _FDWatcher @atomic handle::Ptr{Cvoid} const fdnum::Int # this is NOT the file descriptor @@ -276,6 +443,25 @@ mutable struct _FDWatcher end end +""" + FDWatcher(fd::Union{RawFD,WindowsRawSocket}, readable::Bool, writable::Bool) + +Monitor a file descriptor `fd` for changes in the read or write availability. + +The keyword arguments determine which of read and/or write status should be monitored; at +least one of them must be set to `true`. + +The returned value is an object with boolean fields `readable`, `writable`, and `timedout`, +giving the result of the polling. + +This acts like a level-set event, so calling `wait` blocks until one of those conditions is +met, but then continues to return without blocking until the condition is cleared (either +there is no more to read, or no more space in the write buffer, or both). + +!!! warning + You must call `close` manually, when finished with this object, before the fd + argument is closed. Failure to do so risks serious crashes. +""" mutable struct FDWatcher # WARNING: make sure `close` has been manually called on this watcher before closing / destroying `fd` const watcher::_FDWatcher @@ -396,148 +582,7 @@ isopen(pfw::PollingFileWatcher) = !pfw.closed isopen(pfw::_FDWatcher) = pfw.refcount != (0, 0) isopen(pfw::FDWatcher) = !pfw.mask.timedout -function uv_fseventscb_file(handle::Ptr{Cvoid}, filename::Ptr, events::Int32, status::Int32) - t = @handle_as handle FileMonitor - lock(t.notify) - try - if status != 0 - t.ioerrno = status - notify_error(t.notify, _UVError("FileMonitor", status)) - uvfinalize(t) - elseif events != t.events - events = t.events |= events - notify(t.notify, all=false) - end - finally - unlock(t.notify) - end - nothing -end - -function uv_fseventscb_folder(handle::Ptr{Cvoid}, filename::Ptr, events::Int32, status::Int32) - t = @handle_as handle FolderMonitor - lock(t.notify) - try - if status != 0 - notify_error(t.notify, _UVError("FolderMonitor", status)) - else - fname = (filename == C_NULL) ? "" : unsafe_string(convert(Cstring, filename)) - push!(t.channel, fname => FileEvent(events)) - notify(t.notify) - end - finally - unlock(t.notify) - end - nothing -end - -function uv_pollcb(handle::Ptr{Cvoid}, status::Int32, events::Int32) - t = @handle_as handle _FDWatcher - lock(t.notify) - try - if status != 0 - notify_error(t.notify, _UVError("FDWatcher", status)) - else - t.events |= events - if t.active[1] || t.active[2] - if isempty(t.notify) - # if we keep hearing about events when nobody appears to be listening, - # stop the poll to save cycles - t.active = (false, false) - ccall(:uv_poll_stop, Int32, (Ptr{Cvoid},), t.handle) - end - end - notify(t.notify, events) - end - finally - unlock(t.notify) - end - nothing -end - -function uv_fspollcb(req::Ptr{Cvoid}) - pfw = unsafe_pointer_to_objref(uv_req_data(req))::PollingFileWatcher - pfw.active = false - unpreserve_handle(pfw) - @assert pointer(pfw.stat_req) == req - r = Int32(ccall(:uv_fs_get_result, Cssize_t, (Ptr{Cvoid},), req)) - statbuf = ccall(:uv_fs_get_statbuf, Ptr{UInt8}, (Ptr{Cvoid},), req) - curr_stat = StatStruct(pfw.file, statbuf, r) - uv_fs_req_cleanup(req) - lock(pfw.notify) - try - if !isempty(pfw.notify) # discard the update if nobody watching - if pfw.ioerrno != r || (r == 0 && pfw.prev_stat != curr_stat) - if r == 0 - pfw.prev_stat = curr_stat - end - pfw.ioerrno = r - notify(pfw.notify, true) - end - pfw.timer = Timer(pfw.interval) do t - # async task - iolock_begin() - lock(pfw.notify) - try - if pfw.timer === t # use identity check to test if this callback is stale by the time we got the lock - pfw.timer = nothing - @assert !pfw.active - if isopen(pfw) && !isempty(pfw.notify) - preserve_handle(pfw) - err = ccall(:uv_fs_stat, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}), - eventloop(), pfw.stat_req, pfw.file, uv_jl_fspollcb) - err == 0 || notify(pfw.notify, _UVError("PollingFileWatcher (start)", err), error=true) # likely just ENOMEM - pfw.active = true - end - end - finally - unlock(pfw.notify) - end - iolock_end() - nothing - end - end - finally - unlock(pfw.notify) - end - nothing -end - -global uv_jl_pollcb::Ptr{Cvoid} -global uv_jl_fspollcb::Ptr{Cvoid} -global uv_jl_fseventscb_file::Ptr{Cvoid} -global uv_jl_fseventscb_folder::Ptr{Cvoid} - -function __init__() - global uv_jl_pollcb = @cfunction(uv_pollcb, Cvoid, (Ptr{Cvoid}, Cint, Cint)) - global uv_jl_fspollcb = @cfunction(uv_fspollcb, Cvoid, (Ptr{Cvoid},)) - global uv_jl_fseventscb_file = @cfunction(uv_fseventscb_file, Cvoid, (Ptr{Cvoid}, Ptr{Int8}, Int32, Int32)) - global uv_jl_fseventscb_folder = @cfunction(uv_fseventscb_folder, Cvoid, (Ptr{Cvoid}, Ptr{Int8}, Int32, Int32)) - - Base.mkpidlock_hook = mkpidlock - Base.trymkpidlock_hook = trymkpidlock - Base.parse_pidfile_hook = Pidfile.parse_pidfile - - nothing -end - -function start_watching(t::_FDWatcher) - iolock_begin() - t.handle == C_NULL && throw(ArgumentError("FDWatcher is closed")) - readable = t.refcount[1] > 0 - writable = t.refcount[2] > 0 - if t.active[1] != readable || t.active[2] != writable - # make sure the READABLE / WRITEABLE state is updated - uv_error("FDWatcher (start)", - ccall(:uv_poll_start, Int32, (Ptr{Cvoid}, Int32, Ptr{Cvoid}), - t.handle, - (readable ? UV_READABLE : 0) | (writable ? UV_WRITABLE : 0), - uv_jl_pollcb::Ptr{Cvoid})) - t.active = (readable, writable) - end - iolock_end() - nothing -end +Base.stat(pfw::PollingFileWatcher) = Base.checkstat(@lock pfw.notify pfw.prev_stat) # n.b. this _wait may return spuriously early with a timedout event function _wait(fdw::_FDWatcher, mask::FDEvent) @@ -549,7 +594,20 @@ function _wait(fdw::_FDWatcher, mask::FDEvent) if !isopen(fdw) # !open throw(EOFError()) elseif events.timedout - start_watching(fdw) # make sure the poll is active + fdw.handle == C_NULL && throw(ArgumentError("FDWatcher is closed")) + # start_watching to make sure the poll is active + readable = fdw.refcount[1] > 0 + writable = fdw.refcount[2] > 0 + if fdw.active[1] != readable || fdw.active[2] != writable + # make sure the READABLE / WRITEABLE state is updated + uv_jl_pollcb = @cfunction(uv_pollcb, Cvoid, (Ptr{Cvoid}, Cint, Cint)) + uv_error("FDWatcher (start)", + ccall(:uv_poll_start, Int32, (Ptr{Cvoid}, Int32, Ptr{Cvoid}), + fdw.handle, + (readable ? UV_READABLE : 0) | (writable ? UV_WRITABLE : 0), + uv_jl_pollcb::Ptr{Cvoid})) + fdw.active = (readable, writable) + end iolock_end() return FDEvent(wait(fdw.notify)::Int32) else @@ -631,8 +689,9 @@ function wait(pfw::PollingFileWatcher) # start_watching if !pfw.active preserve_handle(pfw) + uv_jl_fspollcb = @cfunction(uv_fspollcb, Cvoid, (Ptr{Cvoid},)) err = ccall(:uv_fs_stat, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}), - eventloop(), pfw.stat_req, pfw.file, uv_jl_fspollcb) + eventloop(), pfw.stat_req, pfw.file, uv_jl_fspollcb::Ptr{Cvoid}) err == 0 || uv_error("PollingFileWatcher (start)", err) # likely just ENOMEM pfw.active = true end @@ -664,7 +723,8 @@ function wait(pfw::PollingFileWatcher) if !havechange # user canceled by calling close return prevstat, EOFError() end - # grab the most up-to-date stat result as of this time, even if it was a bit newer than the notify call + # grab the most up-to-date stat result as of this time, even if it was a bit newer than + # the notify call (unlikely, as there would need to be a concurrent call to wait) lock(pfw.notify) currstat = pfw.prev_stat ioerrno = pfw.ioerrno @@ -729,6 +789,10 @@ least one of them must be set to `true`. The returned value is an object with boolean fields `readable`, `writable`, and `timedout`, giving the result of the polling. + +This is a thin wrapper over calling `wait` on a [`FDWatcher`](@ref), which implements the +functionality but requires the user to call `close` manually when finished with it, or risk +serious crashes. """ function poll_fd(s::Union{RawFD, Sys.iswindows() ? WindowsRawSocket : Union{}}, timeout_s::Real=-1; readable=false, writable=false) mask = FDEvent(readable, writable, false, false) @@ -786,6 +850,15 @@ giving the result of watching the file. This behavior of this function varies slightly across platforms. See for more detailed information. + +This is a thin wrapper over calling `wait` on a [`FileMonitor`](@ref). This function has a +small race window between consecutive calls to `watch_file` where the file might change +without being detected. To avoid this race, use + + fm = FileMonitor(path) + wait(fm) + +directly, re-using the same `fm` each time you `wait`. """ function watch_file(s::String, timeout_s::Float64=-1.0) fm = FileMonitor(s) @@ -812,7 +885,7 @@ watch_file(s::AbstractString, timeout_s::Real=-1) = watch_file(String(s), Float6 """ watch_folder(path::AbstractString, timeout_s::Real=-1) -Watches a file or directory `path` for changes until a change has occurred or `timeout_s` +Watch a file or directory `path` for changes until a change has occurred or `timeout_s` seconds have elapsed. This function does not poll the file system and instead uses platform-specific functionality to receive notifications from the operating system (e.g. via inotify on Linux). See the NodeJS documentation linked below for details. @@ -826,6 +899,8 @@ giving the event. This behavior of this function varies slightly across platforms. See for more detailed information. + +This function is a thin wrapper over calling `wait` on a [`FolderMonitor`](@ref), with added timeout support. """ watch_folder(s::AbstractString, timeout_s::Real=-1) = watch_folder(String(s), timeout_s) function watch_folder(s::String, timeout_s::Real=-1) @@ -895,11 +970,15 @@ The `previous` status is always a `StatStruct`, but it may have all of the field (indicating the file didn't previously exist, or wasn't previously accessible). The `current` status object may be a `StatStruct`, an `EOFError` (indicating the timeout elapsed), -or some other `Exception` subtype (if the `stat` operation failed - for example, if the path does not exist). +or some other `Exception` subtype (if the `stat` operation failed: for example, if the path does not exist). To determine when a file was modified, compare `!(current isa StatStruct && prev == current)` to detect notification of changes to the mtime or inode. However, using [`watch_file`](@ref) for this operation is preferred, since it is more reliable and efficient, although in some situations it may not be available. + +This is a thin wrapper over calling `wait` on a [`PollingFileWatcher`](@ref), which implements +the functionality, but this function has a small race window between consecutive calls to +`poll_file` where the file might change without being detected. """ function poll_file(s::AbstractString, interval_seconds::Real=5.007, timeout_s::Real=-1) pfw = PollingFileWatcher(s, Float64(interval_seconds)) @@ -920,4 +999,11 @@ end include("pidfile.jl") import .Pidfile: mkpidlock, trymkpidlock +function __init__() + Base.mkpidlock_hook = mkpidlock + Base.trymkpidlock_hook = trymkpidlock + Base.parse_pidfile_hook = Pidfile.parse_pidfile + nothing +end + end diff --git a/stdlib/FileWatching/test/runtests.jl b/stdlib/FileWatching/test/runtests.jl index 11df8849048f8..def555154264d 100644 --- a/stdlib/FileWatching/test/runtests.jl +++ b/stdlib/FileWatching/test/runtests.jl @@ -452,10 +452,6 @@ rm(dir) include("pidfile.jl") end -@testset "Docstrings" begin - undoc = Docs.undocumented_names(FileWatching) - @test_broken isempty(undoc) - @test undoc == [:FDWatcher, :FileMonitor, :FolderMonitor, :PollingFileWatcher] -end +@test isempty(Docs.undocumented_names(FileWatching)) end # testset From bb25910328570835f6a2fdbb3b8ca93b14a65858 Mon Sep 17 00:00:00 2001 From: Kiran Pamnany Date: Mon, 30 Sep 2024 15:41:20 -0400 Subject: [PATCH 20/45] Add `--trace-dispatch` (#55848) --- NEWS.md | 1 + base/options.jl | 1 + doc/man/julia.1 | 4 ++ doc/src/manual/command-line-interface.md | 1 + src/gf.c | 57 +++++++++++++++++++++++- src/jloptions.c | 8 ++++ src/jloptions.h | 1 + src/jltypes.c | 2 +- src/julia.h | 8 +++- src/method.c | 2 +- src/staticdata.c | 2 +- src/staticdata_utils.c | 3 +- test/cmdlineargs.jl | 22 +++++++++ test/core.jl | 2 +- 14 files changed, 106 insertions(+), 8 deletions(-) diff --git a/NEWS.md b/NEWS.md index ca2bf1f615012..cc1bbc7449e5d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -59,6 +59,7 @@ variables. ([#53742]). * New `--trace-compile-timing` option to report how long each method reported by `--trace-compile` took to compile, in ms. ([#54662]) * `--trace-compile` now prints recompiled methods in yellow or with a trailing comment if color is not supported ([#55763]) +* New `--trace-dispatch` option to report methods that are dynamically dispatched ([#55848]). Multi-threading changes ----------------------- diff --git a/base/options.jl b/base/options.jl index 1de7a2acb1e06..f535c27d99122 100644 --- a/base/options.jl +++ b/base/options.jl @@ -34,6 +34,7 @@ struct JLOptions can_inline::Int8 polly::Int8 trace_compile::Ptr{UInt8} + trace_dispatch::Ptr{UInt8} fast_math::Int8 worker::Int8 cookie::Ptr{UInt8} diff --git a/doc/man/julia.1 b/doc/man/julia.1 index 536a23bd37894..56cb690d66eeb 100644 --- a/doc/man/julia.1 +++ b/doc/man/julia.1 @@ -290,6 +290,10 @@ Methods that were recompiled are printed in yellow or with a trailing comment if --trace-compile-timing= If --trace-compile is enabled show how long each took to compile in ms +.TP +--trace-dispatch={stderr|name} +Print precompile statements for methods dispatched during execution or save to stderr or a path. + .TP -image-codegen Force generate code in imaging mode diff --git a/doc/src/manual/command-line-interface.md b/doc/src/manual/command-line-interface.md index ef20e51ea6e4e..5255720e55cd7 100644 --- a/doc/src/manual/command-line-interface.md +++ b/doc/src/manual/command-line-interface.md @@ -216,6 +216,7 @@ The following is a complete list of command-line switches available when launchi |`--output-incremental={yes\|no*}` |Generate an incremental output file (rather than complete)| |`--trace-compile={stderr\|name}` |Print precompile statements for methods compiled during execution or save to stderr or a path. Methods that were recompiled are printed in yellow or with a trailing comment if color is not supported| |`--trace-compile-timing` |If --trace-compile is enabled show how long each took to compile in ms| +|`--trace-dispatch={stderr\|name}` |Print precompile statements for methods dispatched during execution or save to stderr or a path.| |`--image-codegen` |Force generate code in imaging mode| |`--permalloc-pkgimg={yes\|no*}` |Copy the data section of package images into memory| |`--trim={no*|safe|unsafe|unsafe-warn}` |Build a sysimage including only code provably reachable from methods marked by calling `entrypoint`. The three non-default options differ in how they handle dynamic call sites. In safe mode, such sites result in compile-time errors. In unsafe mode, such sites are allowed but the resulting binary might be missing needed code and can throw runtime errors. With unsafe-warn, such sites will trigger warnings at compile-time and might error at runtime.| diff --git a/src/gf.c b/src/gf.c index 321711c839aa8..56ebe6fe2fa84 100644 --- a/src/gf.c +++ b/src/gf.c @@ -2560,6 +2560,38 @@ static void record_precompile_statement(jl_method_instance_t *mi, double compila JL_UNLOCK(&precomp_statement_out_lock); } +jl_mutex_t dispatch_statement_out_lock; + +static void record_dispatch_statement(jl_method_instance_t *mi) +{ + static ios_t f_dispatch; + static JL_STREAM* s_dispatch = NULL; + jl_method_t *def = mi->def.method; + if (!jl_is_method(def)) + return; + + JL_LOCK(&dispatch_statement_out_lock); + if (s_dispatch == NULL) { + const char *t = jl_options.trace_dispatch; + if (!strncmp(t, "stderr", 6)) { + s_dispatch = JL_STDERR; + } + else { + if (ios_file(&f_dispatch, t, 1, 1, 1, 1) == NULL) + jl_errorf("cannot open dispatch statement file \"%s\" for writing", t); + s_dispatch = (JL_STREAM*) &f_dispatch; + } + } + if (!jl_has_free_typevars(mi->specTypes)) { + jl_printf(s_dispatch, "precompile("); + jl_static_show(s_dispatch, mi->specTypes); + jl_printf(s_dispatch, ")\n"); + if (s_dispatch != JL_STDERR) + ios_flush(&f_dispatch); + } + JL_UNLOCK(&dispatch_statement_out_lock); +} + // If waitcompile is 0, this will return NULL if compiling is on-going in the JIT. This is // useful for the JIT itself, since it just doesn't cause redundant work or missed updates, // but merely causes it to look into the current JIT worklist. @@ -3067,7 +3099,8 @@ static void jl_compile_now(jl_method_instance_t *mi) JL_DLLEXPORT void jl_compile_method_instance(jl_method_instance_t *mi, jl_tupletype_t *types, size_t world) { size_t tworld = jl_typeinf_world; - jl_atomic_store_relaxed(&mi->precompiled, 1); + uint8_t miflags = jl_atomic_load_relaxed(&mi->flags) | JL_MI_FLAGS_MASK_PRECOMPILED; + jl_atomic_store_relaxed(&mi->flags, miflags); if (jl_generating_output()) { jl_compile_now(mi); // In addition to full compilation of the compilation-signature, if `types` is more specific (e.g. due to nospecialize), @@ -3082,7 +3115,8 @@ JL_DLLEXPORT void jl_compile_method_instance(jl_method_instance_t *mi, jl_tuplet types2 = jl_type_intersection_env((jl_value_t*)types, (jl_value_t*)mi->def.method->sig, &tpenv2); jl_method_instance_t *mi2 = jl_specializations_get_linfo(mi->def.method, (jl_value_t*)types2, tpenv2); JL_GC_POP(); - jl_atomic_store_relaxed(&mi2->precompiled, 1); + miflags = jl_atomic_load_relaxed(&mi2->flags) | JL_MI_FLAGS_MASK_PRECOMPILED; + jl_atomic_store_relaxed(&mi2->flags, miflags); if (jl_rettype_inferred_native(mi2, world, world) == jl_nothing) (void)jl_type_infer(mi2, world, SOURCE_MODE_NOT_REQUIRED); if (jl_typeinf_func && jl_atomic_load_relaxed(&mi->def.method->primary_world) <= tworld) { @@ -3358,6 +3392,16 @@ STATIC_INLINE jl_method_instance_t *jl_lookup_generic_(jl_value_t *F, jl_value_t jl_method_error(F, args, nargs, world); // unreachable } + // mfunc is about to be dispatched + if (jl_options.trace_dispatch != NULL) { + uint8_t miflags = jl_atomic_load_relaxed(&mfunc->flags); + uint8_t was_dispatched = miflags & JL_MI_FLAGS_MASK_DISPATCHED; + if (!was_dispatched) { + miflags |= JL_MI_FLAGS_MASK_DISPATCHED; + jl_atomic_store_relaxed(&mfunc->flags, miflags); + record_dispatch_statement(mfunc); + } + } } #ifdef JL_TRACE @@ -3480,6 +3524,15 @@ jl_value_t *jl_gf_invoke_by_method(jl_method_t *method, jl_value_t *gf, jl_value jl_gc_sync_total_bytes(last_alloc); // discard allocation count from compilation } JL_GC_PROMISE_ROOTED(mfunc); + if (jl_options.trace_dispatch != NULL) { + uint8_t miflags = jl_atomic_load_relaxed(&mfunc->flags); + uint8_t was_dispatched = miflags & JL_MI_FLAGS_MASK_DISPATCHED; + if (!was_dispatched) { + miflags |= JL_MI_FLAGS_MASK_DISPATCHED; + jl_atomic_store_relaxed(&mfunc->flags, miflags); + record_dispatch_statement(mfunc); + } + } size_t world = jl_current_task->world_age; return _jl_invoke(gf, args, nargs - 1, mfunc, world); } diff --git a/src/jloptions.c b/src/jloptions.c index 530d5e2577a9a..35f0a76e3f6e7 100644 --- a/src/jloptions.c +++ b/src/jloptions.c @@ -77,6 +77,7 @@ JL_DLLEXPORT void jl_init_options(void) 1, // can_inline JL_OPTIONS_POLLY_ON, // polly NULL, // trace_compile + NULL, // trace_dispatch JL_OPTIONS_FAST_MATH_DEFAULT, 0, // worker NULL, // cookie @@ -294,6 +295,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) opt_polly, opt_trace_compile, opt_trace_compile_timing, + opt_trace_dispatch, opt_math_mode, opt_worker, opt_bind_to, @@ -372,6 +374,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) { "polly", required_argument, 0, opt_polly }, { "trace-compile", required_argument, 0, opt_trace_compile }, { "trace-compile-timing", no_argument, 0, opt_trace_compile_timing }, + { "trace-dispatch", required_argument, 0, opt_trace_dispatch }, { "math-mode", required_argument, 0, opt_math_mode }, { "handle-signals", required_argument, 0, opt_handle_signals }, // hidden command line options @@ -828,6 +831,11 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) case opt_trace_compile_timing: jl_options.trace_compile_timing = 1; break; + case opt_trace_dispatch: + jl_options.trace_dispatch = strdup(optarg); + if (!jl_options.trace_dispatch) + jl_errorf("fatal error: failed to allocate memory: %s", strerror(errno)); + break; case opt_math_mode: if (!strcmp(optarg,"ieee")) jl_options.fast_math = JL_OPTIONS_FAST_MATH_OFF; diff --git a/src/jloptions.h b/src/jloptions.h index 3d7deedb59e15..e58797caace3c 100644 --- a/src/jloptions.h +++ b/src/jloptions.h @@ -38,6 +38,7 @@ typedef struct { int8_t can_inline; int8_t polly; const char *trace_compile; + const char *trace_dispatch; int8_t fast_math; int8_t worker; const char *cookie; diff --git a/src/jltypes.c b/src/jltypes.c index fbc8e9f7f7f16..11f1d11a14edc 100644 --- a/src/jltypes.c +++ b/src/jltypes.c @@ -3617,7 +3617,7 @@ void jl_init_types(void) JL_GC_DISABLED "backedges", "cache", "cache_with_orig", - "precompiled"), + "flags"), jl_svec(7, jl_new_struct(jl_uniontype_type, jl_method_type, jl_module_type), jl_any_type, diff --git a/src/julia.h b/src/julia.h index 73b96cf0183d1..c6ff729a308eb 100644 --- a/src/julia.h +++ b/src/julia.h @@ -410,8 +410,14 @@ struct _jl_method_instance_t { jl_array_t *backedges; // list of method-instances which call this method-instance; `invoke` records (invokesig, caller) pairs _Atomic(struct _jl_code_instance_t*) cache; uint8_t cache_with_orig; // !cache_with_specTypes - _Atomic(uint8_t) precompiled; // true if this instance was generated by an explicit `precompile(...)` call + + // flags for this method instance + // bit 0: generated by an explicit `precompile(...)` + // bit 1: dispatched + _Atomic(uint8_t) flags; }; +#define JL_MI_FLAGS_MASK_PRECOMPILED 0x01 +#define JL_MI_FLAGS_MASK_DISPATCHED 0x02 // OpaqueClosure typedef struct _jl_opaque_closure_t { diff --git a/src/method.c b/src/method.c index d4457b1549353..6aba60e7fe12c 100644 --- a/src/method.c +++ b/src/method.c @@ -629,7 +629,7 @@ JL_DLLEXPORT jl_method_instance_t *jl_new_method_instance_uninit(void) mi->backedges = NULL; jl_atomic_store_relaxed(&mi->cache, NULL); mi->cache_with_orig = 0; - jl_atomic_store_relaxed(&mi->precompiled, 0); + jl_atomic_store_relaxed(&mi->flags, 0); return mi; } diff --git a/src/staticdata.c b/src/staticdata.c index f54cc9692eaea..aa9a16daab7a5 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -1718,7 +1718,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED else if (jl_is_method_instance(v)) { assert(f == s->s); jl_method_instance_t *newmi = (jl_method_instance_t*)&f->buf[reloc_offset]; - jl_atomic_store_relaxed(&newmi->precompiled, 0); + jl_atomic_store_relaxed(&newmi->flags, 0); } else if (jl_is_code_instance(v)) { assert(f == s->s); diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c index f39e5357c6782..81aed233af5c0 100644 --- a/src/staticdata_utils.c +++ b/src/staticdata_utils.c @@ -159,7 +159,8 @@ static int has_backedge_to_worklist(jl_method_instance_t *mi, htable_t *visited, if (jl_is_method(mod)) mod = ((jl_method_t*)mod)->module; assert(jl_is_module(mod)); - if (jl_atomic_load_relaxed(&mi->precompiled) || !jl_object_in_image((jl_value_t*)mod) || type_in_worklist(mi->specTypes)) { + uint8_t is_precompiled = jl_atomic_load_relaxed(&mi->flags) & JL_MI_FLAGS_MASK_PRECOMPILED; + if (is_precompiled || !jl_object_in_image((jl_value_t*)mod) || type_in_worklist(mi->specTypes)) { return 1; } if (!mi->backedges) { diff --git a/test/cmdlineargs.jl b/test/cmdlineargs.jl index c6720e23739d8..cc3f8950f0dc0 100644 --- a/test/cmdlineargs.jl +++ b/test/cmdlineargs.jl @@ -787,6 +787,17 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no` # tested in test/parallel.jl) @test errors_not_signals(`$exename --worker=true`) + # --trace-compile + let + io = IOBuffer() + v = writereadpipeline( + "foo(x) = begin Base.Experimental.@force_compile; x; end; foo(1)", + `$exename --trace-compile=stderr -i`, + stderr=io) + _stderr = String(take!(io)) + @test occursin("precompile(Tuple{typeof(Main.foo), Int", _stderr) + end + # --trace-compile-timing let io = IOBuffer() @@ -798,6 +809,17 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no` @test occursin(" ms =# precompile(Tuple{typeof(Main.foo), Int", _stderr) end + # --trace-dispatch + let + io = IOBuffer() + v = writereadpipeline( + "foo(x) = begin Base.Experimental.@force_compile; x; end; foo(1)", + `$exename --trace-dispatch=stderr -i`, + stderr=io) + _stderr = String(take!(io)) + @test occursin("precompile(Tuple{typeof(Main.foo), Int", _stderr) + end + # test passing arguments mktempdir() do dir testfile, io = mktemp(dir) diff --git a/test/core.jl b/test/core.jl index d41a58a7ccb2e..1395817d8615e 100644 --- a/test/core.jl +++ b/test/core.jl @@ -34,7 +34,7 @@ for (T, c) in ( (Core.CodeInfo, []), (Core.CodeInstance, [:next, :min_world, :max_world, :inferred, :debuginfo, :ipo_purity_bits, :invoke, :specptr, :specsigflags, :precompile]), (Core.Method, [:primary_world, :deleted_world]), - (Core.MethodInstance, [:cache, :precompiled]), + (Core.MethodInstance, [:cache, :flags]), (Core.MethodTable, [:defs, :leafcache, :cache, :max_args]), (Core.TypeMapEntry, [:next, :min_world, :max_world]), (Core.TypeMapLevel, [:arg1, :targ, :name1, :tname, :list, :any]), From a7c5056b722182adfd183fdc7bdfdef39cd8e28e Mon Sep 17 00:00:00 2001 From: Florian Date: Tue, 1 Oct 2024 01:41:23 +0200 Subject: [PATCH 21/45] relocation: account for trailing path separator in depot paths (#55355) Fixes #55340 --- base/loading.jl | 26 ++++++++++++++++---------- src/precompile.c | 17 +++++++++++++---- src/staticdata_utils.c | 20 ++++++++++++++------ test/relocatedepot.jl | 34 +++++++++++++++++++++++++++++----- 4 files changed, 72 insertions(+), 25 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index fbf6bb4af50aa..9080a2271fb27 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -3165,16 +3165,9 @@ mutable struct CacheHeaderIncludes const modpath::Vector{String} # seemingly not needed in Base, but used by Revise end -function replace_depot_path(path::AbstractString) - for depot in DEPOT_PATH - !isdir(depot) && continue - - # Strip extraneous pathseps through normalization. - if isdirpath(depot) - depot = dirname(depot) - end - - if startswith(path, depot) +function replace_depot_path(path::AbstractString, depots::Vector{String}=normalize_depots_for_relocation()) + for depot in depots + if startswith(path, string(depot, Filesystem.pathsep())) || path == depot path = replace(path, depot => "@depot"; count=1) break end @@ -3182,6 +3175,19 @@ function replace_depot_path(path::AbstractString) return path end +function normalize_depots_for_relocation() + depots = String[] + sizehint!(depots, length(DEPOT_PATH)) + for d in DEPOT_PATH + isdir(d) || continue + if isdirpath(d) + d = dirname(d) + end + push!(depots, abspath(d)) + end + return depots +end + function restore_depot_path(path::AbstractString, depot::AbstractString) replace(path, r"^@depot" => depot; count=1) end diff --git a/src/precompile.c b/src/precompile.c index 5088d45a5ad74..c21cf5367fba6 100644 --- a/src/precompile.c +++ b/src/precompile.c @@ -39,9 +39,17 @@ void write_srctext(ios_t *f, jl_array_t *udeps, int64_t srctextpos) { static jl_value_t *replace_depot_func = NULL; if (!replace_depot_func) replace_depot_func = jl_get_global(jl_base_module, jl_symbol("replace_depot_path")); + static jl_value_t *normalize_depots_func = NULL; + if (!normalize_depots_func) + normalize_depots_func = jl_get_global(jl_base_module, jl_symbol("normalize_depots_for_relocation")); ios_t srctext; - jl_value_t *deptuple = NULL; - JL_GC_PUSH2(&deptuple, &udeps); + jl_value_t *deptuple = NULL, *depots = NULL; + JL_GC_PUSH3(&deptuple, &udeps, &depots); + jl_task_t *ct = jl_current_task; + size_t last_age = ct->world_age; + ct->world_age = jl_atomic_load_acquire(&jl_world_counter); + depots = jl_apply(&normalize_depots_func, 1); + ct->world_age = last_age; for (size_t i = 0; i < len; i++) { deptuple = jl_array_ptr_ref(udeps, i); jl_value_t *depmod = jl_fieldref(deptuple, 0); // module @@ -60,13 +68,14 @@ void write_srctext(ios_t *f, jl_array_t *udeps, int64_t srctextpos) { } jl_value_t **replace_depot_args; - JL_GC_PUSHARGS(replace_depot_args, 2); + JL_GC_PUSHARGS(replace_depot_args, 3); replace_depot_args[0] = replace_depot_func; replace_depot_args[1] = abspath; + replace_depot_args[2] = depots; jl_task_t *ct = jl_current_task; size_t last_age = ct->world_age; ct->world_age = jl_atomic_load_acquire(&jl_world_counter); - jl_value_t *depalias = (jl_value_t*)jl_apply(replace_depot_args, 2); + jl_value_t *depalias = (jl_value_t*)jl_apply(replace_depot_args, 3); ct->world_age = last_age; JL_GC_POP(); diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c index 81aed233af5c0..8eb223d3cfbde 100644 --- a/src/staticdata_utils.c +++ b/src/staticdata_utils.c @@ -753,6 +753,16 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t static jl_value_t *replace_depot_func = NULL; if (!replace_depot_func) replace_depot_func = jl_get_global(jl_base_module, jl_symbol("replace_depot_path")); + static jl_value_t *normalize_depots_func = NULL; + if (!normalize_depots_func) + normalize_depots_func = jl_get_global(jl_base_module, jl_symbol("normalize_depots_for_relocation")); + + jl_value_t *depots = NULL, *prefs_hash = NULL, *prefs_list = NULL; + JL_GC_PUSH2(&depots, &prefs_list); + last_age = ct->world_age; + ct->world_age = jl_atomic_load_acquire(&jl_world_counter); + depots = jl_apply(&normalize_depots_func, 1); + ct->world_age = last_age; // write a placeholder for total size so that we can quickly seek past all of the // dependencies if we don't need them @@ -765,13 +775,14 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t if (replace_depot_func) { jl_value_t **replace_depot_args; - JL_GC_PUSHARGS(replace_depot_args, 2); + JL_GC_PUSHARGS(replace_depot_args, 3); replace_depot_args[0] = replace_depot_func; replace_depot_args[1] = deppath; + replace_depot_args[2] = depots; ct = jl_current_task; size_t last_age = ct->world_age; ct->world_age = jl_atomic_load_acquire(&jl_world_counter); - deppath = (jl_value_t*)jl_apply(replace_depot_args, 2); + deppath = (jl_value_t*)jl_apply(replace_depot_args, 3); ct->world_age = last_age; JL_GC_POP(); } @@ -804,9 +815,6 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t write_int32(s, 0); // terminator, for ease of reading // Calculate Preferences hash for current package. - jl_value_t *prefs_hash = NULL; - jl_value_t *prefs_list = NULL; - JL_GC_PUSH1(&prefs_list); if (jl_base_module) { // Toplevel module is the module we're currently compiling, use it to get our preferences hash jl_value_t * toplevel = (jl_value_t*)jl_get_global(jl_base_module, jl_symbol("__toplevel__")); @@ -853,7 +861,7 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t write_int32(s, 0); write_uint64(s, 0); } - JL_GC_POP(); // for prefs_list + JL_GC_POP(); // for depots, prefs_list // write a dummy file position to indicate the beginning of the source-text pos = ios_pos(s); diff --git a/test/relocatedepot.jl b/test/relocatedepot.jl index 039d422c35e25..2ef6dec90dbc1 100644 --- a/test/relocatedepot.jl +++ b/test/relocatedepot.jl @@ -1,3 +1,5 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + using Test @@ -26,16 +28,38 @@ end if !test_relocated_depot - @testset "insert @depot tag in path" begin + @testset "edge cases when inserting @depot tag in path" begin + # insert @depot only once for first match test_harness() do mktempdir() do dir pushfirst!(DEPOT_PATH, dir) - path = dir*dir - @test Base.replace_depot_path(path) == "@depot"*dir + if Sys.iswindows() + # dirs start with a drive letter instead of a path separator + path = dir*Base.Filesystem.pathsep()*dir + @test Base.replace_depot_path(path) == "@depot"*Base.Filesystem.pathsep()*dir + else + path = dir*dir + @test Base.replace_depot_path(path) == "@depot"*dir + end + end + + # 55340 + empty!(DEPOT_PATH) + mktempdir() do dir + jlrc = joinpath(dir, "julia-rc2") + jl = joinpath(dir, "julia") + mkdir(jl) + push!(DEPOT_PATH, jl) + @test Base.replace_depot_path(jl) == "@depot" + @test Base.replace_depot_path(string(jl,Base.Filesystem.pathsep())) == + string("@depot",Base.Filesystem.pathsep()) + @test Base.replace_depot_path(jlrc) != "@depot-rc2" + @test Base.replace_depot_path(jlrc) == jlrc end end + # deal with and without trailing path separators test_harness() do mktempdir() do dir pushfirst!(DEPOT_PATH, dir) @@ -43,9 +67,9 @@ if !test_relocated_depot if isdirpath(DEPOT_PATH[1]) DEPOT_PATH[1] = dirname(DEPOT_PATH[1]) # strip trailing pathsep end - tag = joinpath("@depot", "") # append a pathsep + tag = string("@depot", Base.Filesystem.pathsep()) @test startswith(Base.replace_depot_path(path), tag) - DEPOT_PATH[1] = joinpath(DEPOT_PATH[1], "") # append a pathsep + DEPOT_PATH[1] = string(DEPOT_PATH[1], Base.Filesystem.pathsep()) @test startswith(Base.replace_depot_path(path), tag) popfirst!(DEPOT_PATH) @test !startswith(Base.replace_depot_path(path), tag) From 32ad9e60347ed83efe3778fd6f7a2702aadb3cfe Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Mon, 30 Sep 2024 22:32:58 -0400 Subject: [PATCH 22/45] change compiler to be stackless (#55575) This change ensures the compiler uses very little stack, making it compatible with running on any arbitrary system stack size and depths much more reliably. It also could be further modified now to easily add various forms of pause-able/resumable inference, since there is no implicit state on the stack--everything is local and explicit now. Whereas before, less than 900 frames would crash in less than a second: ``` $ time ./julia -e 'f(::Val{N}) where {N} = N <= 0 ? 0 : f(Val(N - 1)); f(Val(1000))' Warning: detected a stack overflow; program state may be corrupted, so further execution might be unreliable. Internal error: during type inference of f(Base.Val{1000}) Encountered stack overflow. This might be caused by recursion over very long tuples or argument lists. [23763] signal 6: Abort trap: 6 in expression starting at none:1 __pthread_kill at /usr/lib/system/libsystem_kernel.dylib (unknown line) Allocations: 1 (Pool: 1; Big: 0); GC: 0 Abort trap: 6 real 0m0.233s user 0m0.165s sys 0m0.049s ```` Now: it is effectively unlimited, as long as you are willing to wait for it: ``` $ time ./julia -e 'f(::Val{N}) where {N} = N <= 0 ? 0 : f(Val(N - 1)); f(Val(50000))' info: inference of f(Base.Val{50000}) from f(Base.Val{N}) where {N} exceeding 2500 frames (may be slow). info: inference of f(Base.Val{50000}) from f(Base.Val{N}) where {N} exceeding 5000 frames (may be slow). info: inference of f(Base.Val{50000}) from f(Base.Val{N}) where {N} exceeding 10000 frames (may be slow). info: inference of f(Base.Val{50000}) from f(Base.Val{N}) where {N} exceeding 20000 frames (may be slow). info: inference of f(Base.Val{50000}) from f(Base.Val{N}) where {N} exceeding 40000 frames (may be slow). real 7m4.988s $ time ./julia -e 'f(::Val{N}) where {N} = N <= 0 ? 0 : f(Val(N - 1)); f(Val(1000))' real 0m0.214s user 0m0.164s sys 0m0.044s $ time ./julia -e '@noinline f(::Val{N}) where {N} = N <= 0 ? GC.safepoint() : f(Val(N - 1)); f(Val(5000))' info: inference of f(Base.Val{5000}) from f(Base.Val{N}) where {N} exceeding 2500 frames (may be slow). info: inference of f(Base.Val{5000}) from f(Base.Val{N}) where {N} exceeding 5000 frames (may be slow). real 0m8.609s user 0m8.358s sys 0m0.240s ``` --- base/compiler/abstractinterpretation.jl | 1402 +++++++++++++---------- base/compiler/inferencestate.jl | 102 +- base/compiler/ssair/ir.jl | 1 + base/compiler/ssair/irinterp.jl | 38 +- base/compiler/ssair/verify.jl | 5 +- base/compiler/tfuncs.jl | 102 +- base/compiler/typeinfer.jl | 237 ++-- base/compiler/types.jl | 8 + base/reflection.jl | 2 +- test/compiler/AbstractInterpreter.jl | 9 +- test/compiler/inference.jl | 107 -- 11 files changed, 1048 insertions(+), 965 deletions(-) diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index 68b8394b72c3d..96355f2a6b5dd 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -47,223 +47,210 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f), matches = find_method_matches(interp, argtypes, atype; max_methods) if isa(matches, FailedMethodMatch) add_remark!(interp, sv, matches.reason) - return CallMeta(Any, Any, Effects(), NoCallInfo()) + return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) end (; valid_worlds, applicable, info) = matches update_valid_age!(sv, valid_worlds) - napplicable = length(applicable) + + # final result + gfresult = Future{CallMeta}() + # intermediate work for computing gfresult rettype = exctype = Bottom edges = MethodInstance[] conditionals = nothing # keeps refinement information of call argument types when the return type is boolean - seen = 0 # number of signatures actually inferred + seenall = true const_results = nothing # or const_results::Vector{Union{Nothing,ConstResult}} if any const results are available - multiple_matches = napplicable > 1 fargs = arginfo.fargs all_effects = EFFECTS_TOTAL slotrefinements = nothing # keeps refinement information on slot types obtained from call signature - for i in 1:napplicable - match = applicable[i]::MethodMatch - method = match.method - sig = match.spec_types - if bail_out_toplevel_call(interp, InferenceLoopState(sig, rettype, all_effects), sv) - # only infer concrete call sites in top-level expressions - add_remark!(interp, sv, "Refusing to infer non-concrete call site in top-level expression") - break - end - this_rt = Bottom - this_exct = Bottom - splitunions = false - # TODO: this used to trigger a bug in inference recursion detection, and is unmaintained now - # sigtuple = unwrap_unionall(sig)::DataType - # splitunions = 1 < unionsplitcost(sigtuple.parameters) * napplicable <= InferenceParams(interp).max_union_splitting - if splitunions - splitsigs = switchtupleunion(sig) - for sig_n in splitsigs - result = abstract_call_method(interp, method, sig_n, svec(), multiple_matches, si, sv) - (; rt, exct, edge, effects, volatile_inf_result) = result + # split the for loop off into a function, so that we can pause and restart it at will + i::Int = 1 + f = Core.Box(f) + atype = Core.Box(atype) + function infercalls(interp, sv) + napplicable = length(applicable) + multiple_matches = napplicable > 1 + while i <= napplicable + match = applicable[i]::MethodMatch + method = match.method + sig = match.spec_types + if bail_out_toplevel_call(interp, InferenceLoopState(sig, rettype, all_effects), sv) + # only infer concrete call sites in top-level expressions + add_remark!(interp, sv, "Refusing to infer non-concrete call site in top-level expression") + seenall = false + break + end + # TODO: this is unmaintained now as it didn't seem to improve things, though it does avoid hard-coding the union split at the higher level, + # it also can hurt infer-ability of some constrained parameter types (e.g. quacks like a duck) + # sigtuple = unwrap_unionall(sig)::DataType + # splitunions = 1 < unionsplitcost(sigtuple.parameters) * napplicable <= InferenceParams(interp).max_union_splitting + #if splitunions + # splitsigs = switchtupleunion(sig) + # for sig_n in splitsigs + # result = abstract_call_method(interp, method, sig_n, svec(), multiple_matches, si, sv)::Future + # handle1(...) + # end + #end + mresult = abstract_call_method(interp, method, sig, match.sparams, multiple_matches, si, sv)::Future + function handle1(interp, sv) + local (; rt, exct, edge, effects, volatile_inf_result) = mresult[] + this_conditional = ignorelimited(rt) + this_rt = widenwrappedconditional(rt) + this_exct = exct + # try constant propagation with argtypes for this match + # this is in preparation for inlining, or improving the return result this_argtypes = isa(matches, MethodMatches) ? argtypes : matches.applicable_argtypes[i] this_arginfo = ArgInfo(fargs, this_argtypes) const_call_result = abstract_call_method_with_const_args(interp, - result, f, this_arginfo, si, match, sv) + mresult[], f.contents, this_arginfo, si, match, sv) const_result = volatile_inf_result if const_call_result !== nothing - if const_call_result.rt ⊑ₚ rt - rt = const_call_result.rt + this_const_conditional = ignorelimited(const_call_result.rt) + this_const_rt = widenwrappedconditional(const_call_result.rt) + if this_const_rt ⊑ₚ this_rt + # As long as the const-prop result we have is not *worse* than + # what we found out on types, we'd like to use it. Even if the + # end result is exactly equivalent, it is likely that the IR + # we produced while constproping is better than that with + # generic types. + # Return type of const-prop' inference can be wider than that of non const-prop' inference + # e.g. in cases when there are cycles but cached result is still accurate + this_conditional = this_const_conditional + this_rt = this_const_rt (; effects, const_result, edge) = const_call_result elseif is_better_effects(const_call_result.effects, effects) (; effects, const_result, edge) = const_call_result else add_remark!(interp, sv, "[constprop] Discarded because the result was wider than inference") end - if const_call_result.exct ⋤ exct - (; exct, const_result, edge) = const_call_result + # Treat the exception type separately. Currently, constprop often cannot determine the exception type + # because consistent-cy does not apply to exceptions. + if const_call_result.exct ⋤ this_exct + this_exct = const_call_result.exct + (; const_result, edge) = const_call_result else add_remark!(interp, sv, "[constprop] Discarded exception type because result was wider than inference") end end + all_effects = merge_effects(all_effects, effects) if const_result !== nothing if const_results === nothing - const_results = fill!(Vector{Union{Nothing,ConstResult}}(undef, #=TODO=#napplicable), nothing) + const_results = fill!(Vector{Union{Nothing,ConstResult}}(undef, napplicable), nothing) end const_results[i] = const_result end edge === nothing || push!(edges, edge) - this_rt = this_rt ⊔ₚ rt - this_exct = this_exct ⊔ₚ exct - if bail_out_call(interp, this_rt, sv) - break + @assert !(this_conditional isa Conditional || this_rt isa MustAlias) "invalid lattice element returned from inter-procedural context" + if can_propagate_conditional(this_conditional, argtypes) + # The only case where we need to keep this in rt is where + # we can directly propagate the conditional to a slot argument + # that is not one of our arguments, otherwise we keep all the + # relevant information in `conditionals` below. + this_rt = this_conditional end - end - this_conditional = ignorelimited(this_rt) - this_rt = widenwrappedconditional(this_rt) - else - result = abstract_call_method(interp, method, sig, match.sparams, multiple_matches, si, sv) - (; rt, exct, edge, effects, volatile_inf_result) = result - this_conditional = ignorelimited(rt) - this_rt = widenwrappedconditional(rt) - this_exct = exct - # try constant propagation with argtypes for this match - # this is in preparation for inlining, or improving the return result - this_argtypes = isa(matches, MethodMatches) ? argtypes : matches.applicable_argtypes[i] - this_arginfo = ArgInfo(fargs, this_argtypes) - const_call_result = abstract_call_method_with_const_args(interp, - result, f, this_arginfo, si, match, sv) - const_result = volatile_inf_result - if const_call_result !== nothing - this_const_conditional = ignorelimited(const_call_result.rt) - this_const_rt = widenwrappedconditional(const_call_result.rt) - if this_const_rt ⊑ₚ this_rt - # As long as the const-prop result we have is not *worse* than - # what we found out on types, we'd like to use it. Even if the - # end result is exactly equivalent, it is likely that the IR - # we produced while constproping is better than that with - # generic types. - # Return type of const-prop' inference can be wider than that of non const-prop' inference - # e.g. in cases when there are cycles but cached result is still accurate - this_conditional = this_const_conditional - this_rt = this_const_rt - (; effects, const_result, edge) = const_call_result - elseif is_better_effects(const_call_result.effects, effects) - (; effects, const_result, edge) = const_call_result - else - add_remark!(interp, sv, "[constprop] Discarded because the result was wider than inference") + + rettype = rettype ⊔ₚ this_rt + exctype = exctype ⊔ₚ this_exct + if has_conditional(𝕃ₚ, sv) && this_conditional !== Bottom && is_lattice_bool(𝕃ₚ, rettype) && fargs !== nothing + if conditionals === nothing + conditionals = Any[Bottom for _ in 1:length(argtypes)], + Any[Bottom for _ in 1:length(argtypes)] + end + for i = 1:length(argtypes) + cnd = conditional_argtype(𝕃ᵢ, this_conditional, sig, argtypes, i) + conditionals[1][i] = conditionals[1][i] ⊔ᵢ cnd.thentype + conditionals[2][i] = conditionals[2][i] ⊔ᵢ cnd.elsetype + end end - # Treat the exception type separately. Currently, constprop often cannot determine the exception type - # because consistent-cy does not apply to exceptions. - if const_call_result.exct ⋤ this_exct - this_exct = const_call_result.exct - (; const_result, edge) = const_call_result - else - add_remark!(interp, sv, "[constprop] Discarded exception type because result was wider than inference") + if i < napplicable && bail_out_call(interp, InferenceLoopState(sig, rettype, all_effects), sv) + add_remark!(interp, sv, "Call inference reached maximally imprecise information. Bailing on.") + seenall = false + i = napplicable # break in outer function end + i += 1 + return true end - all_effects = merge_effects(all_effects, effects) - if const_result !== nothing - if const_results === nothing - const_results = fill!(Vector{Union{Nothing,ConstResult}}(undef, napplicable), nothing) - end - const_results[i] = const_result + if isready(mresult) && handle1(interp, sv) + continue + else + push!(sv.tasks, handle1) + return false end - edge === nothing || push!(edges, edge) - end - @assert !(this_conditional isa Conditional || this_rt isa MustAlias) "invalid lattice element returned from inter-procedural context" - seen += 1 + end # while - if can_propagate_conditional(this_conditional, argtypes) - # The only case where we need to keep this in rt is where - # we can directly propagate the conditional to a slot argument - # that is not one of our arguments, otherwise we keep all the - # relevant information in `conditionals` below. - this_rt = this_conditional + if const_results !== nothing + @assert napplicable == nmatches(info) == length(const_results) + info = ConstCallInfo(info, const_results) end - rettype = rettype ⊔ₚ this_rt - exctype = exctype ⊔ₚ this_exct - if has_conditional(𝕃ₚ, sv) && this_conditional !== Bottom && is_lattice_bool(𝕃ₚ, rettype) && fargs !== nothing - if conditionals === nothing - conditionals = Any[Bottom for _ in 1:length(argtypes)], - Any[Bottom for _ in 1:length(argtypes)] + if seenall + if !fully_covering(matches) || any_ambig(matches) + # Account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature. + all_effects = Effects(all_effects; nothrow=false) + exctype = exctype ⊔ₚ MethodError end - for i = 1:length(argtypes) - cnd = conditional_argtype(𝕃ᵢ, this_conditional, sig, argtypes, i) - conditionals[1][i] = conditionals[1][i] ⊔ᵢ cnd.thentype - conditionals[2][i] = conditionals[2][i] ⊔ᵢ cnd.elsetype + if sv isa InferenceState && fargs !== nothing + slotrefinements = collect_slot_refinements(𝕃ᵢ, applicable, argtypes, fargs, sv) end - end - if bail_out_call(interp, InferenceLoopState(sig, rettype, all_effects), sv) - add_remark!(interp, sv, "Call inference reached maximally imprecise information. Bailing on.") - break - end - end - - if const_results !== nothing - @assert napplicable == nmatches(info) == length(const_results) - info = ConstCallInfo(info, const_results) - end - - if seen ≠ napplicable - # there is unanalyzed candidate, widen type and effects to the top - rettype = exctype = Any - all_effects = Effects() - else - if !fully_covering(matches) || any_ambig(matches) - # Account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature. - all_effects = Effects(all_effects; nothrow=false) - exctype = exctype ⊔ₚ MethodError - end - if sv isa InferenceState && fargs !== nothing - slotrefinements = collect_slot_refinements(𝕃ᵢ, applicable, argtypes, fargs, sv) - end - end - - rettype = from_interprocedural!(interp, rettype, sv, arginfo, conditionals) - - # Also considering inferring the compilation signature for this method, so - # it is available to the compiler in case it ends up needing it. - if (isa(sv, InferenceState) && infer_compilation_signature(interp) && - (1 == seen == napplicable) && rettype !== Any && rettype !== Bottom && - !is_removable_if_unused(all_effects)) - match = applicable[1]::MethodMatch - method = match.method - sig = match.spec_types - mi = specialize_method(match; preexisting=true) - if mi !== nothing && !const_prop_methodinstance_heuristic(interp, mi, arginfo, sv) - csig = get_compileable_sig(method, sig, match.sparams) - if csig !== nothing && csig !== sig - abstract_call_method(interp, method, csig, match.sparams, multiple_matches, StmtInfo(false), sv) + else + # there is unanalyzed candidate, widen type and effects to the top + rettype = exctype = Any + all_effects = Effects() + end + + rettype = from_interprocedural!(interp, rettype, sv, arginfo, conditionals) + + # Also considering inferring the compilation signature for this method, so + # it is available to the compiler in case it ends up needing it. + if (isa(sv, InferenceState) && infer_compilation_signature(interp) && + (seenall && 1 == napplicable) && rettype !== Any && rettype !== Bottom && + !is_removable_if_unused(all_effects)) + match = applicable[1]::MethodMatch + method = match.method + sig = match.spec_types + mi = specialize_method(match; preexisting=true) + if mi !== nothing && !const_prop_methodinstance_heuristic(interp, mi, arginfo, sv) + csig = get_compileable_sig(method, sig, match.sparams) + if csig !== nothing && csig !== sig + abstract_call_method(interp, method, csig, match.sparams, multiple_matches, StmtInfo(false), sv)::Future + end end end - end - if call_result_unused(si) && !(rettype === Bottom) - add_remark!(interp, sv, "Call result type was widened because the return value is unused") - # We're mainly only here because the optimizer might want this code, - # but we ourselves locally don't typically care about it locally - # (beyond checking if it always throws). - # So avoid adding an edge, since we don't want to bother attempting - # to improve our result even if it does change (to always throw), - # and avoid keeping track of a more complex result type. - rettype = Any - end - any_slot_refined = slotrefinements !== nothing - add_call_backedges!(interp, rettype, all_effects, any_slot_refined, edges, matches, atype, sv) - if isa(sv, InferenceState) - # TODO (#48913) implement a proper recursion handling for irinterp: - # This works just because currently the `:terminate` condition guarantees that - # irinterp doesn't fail into unresolved cycles, but it's not a good solution. - # We should revisit this once we have a better story for handling cycles in irinterp. - if !isempty(sv.pclimitations) # remove self, if present - delete!(sv.pclimitations, sv) - for caller in callers_in_cycle(sv) - delete!(sv.pclimitations, caller) + if call_result_unused(si) && !(rettype === Bottom) + add_remark!(interp, sv, "Call result type was widened because the return value is unused") + # We're mainly only here because the optimizer might want this code, + # but we ourselves locally don't typically care about it locally + # (beyond checking if it always throws). + # So avoid adding an edge, since we don't want to bother attempting + # to improve our result even if it does change (to always throw), + # and avoid keeping track of a more complex result type. + rettype = Any + end + any_slot_refined = slotrefinements !== nothing + add_call_backedges!(interp, rettype, all_effects, any_slot_refined, edges, matches, atype.contents, sv) + if isa(sv, InferenceState) + # TODO (#48913) implement a proper recursion handling for irinterp: + # This works just because currently the `:terminate` condition guarantees that + # irinterp doesn't fail into unresolved cycles, but it's not a good solution. + # We should revisit this once we have a better story for handling cycles in irinterp. + if !isempty(sv.pclimitations) # remove self, if present + delete!(sv.pclimitations, sv) + for caller in callers_in_cycle(sv) + delete!(sv.pclimitations, caller) + end end end - end - return CallMeta(rettype, exctype, all_effects, info, slotrefinements) + gfresult[] = CallMeta(rettype, exctype, all_effects, info, slotrefinements) + return true + end # infercalls + # start making progress on the first call + infercalls(interp, sv) || push!(sv.tasks, infercalls) + return gfresult end struct FailedMethodMatch @@ -607,9 +594,9 @@ function abstract_call_method(interp::AbstractInterpreter, hardlimit::Bool, si::StmtInfo, sv::AbsIntState) sigtuple = unwrap_unionall(sig) sigtuple isa DataType || - return MethodCallResult(Any, Any, false, false, nothing, Effects()) + return Future(MethodCallResult(Any, Any, false, false, nothing, Effects())) all(@nospecialize(x) -> valid_as_lattice(unwrapva(x), true), sigtuple.parameters) || - return MethodCallResult(Union{}, Any, false, false, nothing, EFFECTS_THROWS) # catch bad type intersections early + return Future(MethodCallResult(Union{}, Any, false, false, nothing, EFFECTS_THROWS)) # catch bad type intersections early if is_nospecializeinfer(method) sig = get_nospecializeinfer_sig(method, sig, sparams) @@ -634,7 +621,7 @@ function abstract_call_method(interp::AbstractInterpreter, # we have a self-cycle in the call-graph, but not in the inference graph (typically): # break this edge now (before we record it) by returning early # (non-typically, this means that we lose the ability to detect a guaranteed StackOverflow in some cases) - return MethodCallResult(Any, Any, true, true, nothing, Effects()) + return Future(MethodCallResult(Any, Any, true, true, nothing, Effects())) end topmost = nothing edgecycle = true @@ -689,7 +676,7 @@ function abstract_call_method(interp::AbstractInterpreter, # since it's very unlikely that we'll try to inline this, # or want make an invoke edge to its calling convention return type. # (non-typically, this means that we lose the ability to detect a guaranteed StackOverflow in some cases) - return MethodCallResult(Any, Any, true, true, nothing, Effects()) + return Future(MethodCallResult(Any, Any, true, true, nothing, Effects())) end add_remark!(interp, sv, washardlimit ? RECURSION_MSG_HARDLIMIT : RECURSION_MSG) # TODO (#48913) implement a proper recursion handling for irinterp: @@ -745,31 +732,7 @@ function abstract_call_method(interp::AbstractInterpreter, sparams = recomputed[2]::SimpleVector end - (; rt, exct, edge, effects, volatile_inf_result) = typeinf_edge(interp, method, sig, sparams, sv) - - if edge === nothing - edgecycle = edgelimited = true - end - - # we look for the termination effect override here as well, since the :terminates effect - # may have been tainted due to recursion at this point even if it's overridden - if is_effect_overridden(sv, :terminates_globally) - # this frame is known to terminate - effects = Effects(effects, terminates=true) - elseif is_effect_overridden(method, :terminates_globally) - # this edge is known to terminate - effects = Effects(effects; terminates=true) - elseif edgecycle - # Some sort of recursion was detected. - if edge !== nothing && !edgelimited && !is_edge_recursed(edge, sv) - # no `MethodInstance` cycles -- don't taint :terminate - else - # we cannot guarantee that the call will terminate - effects = Effects(effects; terminates=false) - end - end - - return MethodCallResult(rt, exct, edgecycle, edgelimited, edge, effects, volatile_inf_result) + return typeinf_edge(interp, method, sig, sparams, sv, edgecycle, edgelimited) end function edge_matches_sv(interp::AbstractInterpreter, frame::AbsIntState, @@ -1331,7 +1294,7 @@ const_prop_result(inf_result::InferenceResult) = inf_result.ipo_effects, inf_result.linfo) # return cached result of constant analysis -return_cached_result(::AbstractInterpreter, inf_result::InferenceResult, ::AbsIntState) = +return_localcache_result(::AbstractInterpreter, inf_result::InferenceResult, ::AbsIntState) = const_prop_result(inf_result) function compute_forwarded_argtypes(interp::AbstractInterpreter, arginfo::ArgInfo, sv::AbsIntState) @@ -1361,7 +1324,7 @@ function const_prop_call(interp::AbstractInterpreter, return nothing end @assert inf_result.linfo === mi "MethodInstance for cached inference result does not match" - return return_cached_result(interp, inf_result, sv) + return return_localcache_result(interp, inf_result, sv) end overridden_by_const = falses(length(argtypes)) for i = 1:length(argtypes) @@ -1375,7 +1338,7 @@ function const_prop_call(interp::AbstractInterpreter, end # perform fresh constant prop' inf_result = InferenceResult(mi, argtypes, overridden_by_const) - frame = InferenceState(inf_result, #=cache_mode=#:local, interp) + frame = InferenceState(inf_result, #=cache_mode=#:local, interp) # TODO: this should also be converted to a stackless Future if frame === nothing add_remark!(interp, sv, "[constprop] Could not retrieve the source") return nothing # this is probably a bad generated function (unsound), but just ignore it @@ -1517,9 +1480,9 @@ function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft) widet = typ.typ if isa(widet, DataType) if widet.name === Tuple.name - return AbstractIterationResult(typ.fields, nothing) + return Future(AbstractIterationResult(typ.fields, nothing)) elseif widet.name === _NAMEDTUPLE_NAME - return AbstractIterationResult(typ.fields, nothing) + return Future(AbstractIterationResult(typ.fields, nothing)) end end end @@ -1527,7 +1490,7 @@ function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft) if isa(typ, Const) val = typ.val if isa(val, SimpleVector) || isa(val, Tuple) || isa(val, NamedTuple) - return AbstractIterationResult(Any[ Const(val[i]) for i in 1:length(val) ], nothing) # avoid making a tuple Generator here! + return Future(AbstractIterationResult(Any[ Const(val[i]) for i in 1:length(val) ], nothing)) # avoid making a tuple Generator here! end end @@ -1544,18 +1507,18 @@ function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft) # refine the Union to remove elements that are not valid tags for objects filter!(@nospecialize(x) -> valid_as_lattice(x, true), utis) if length(utis) == 0 - return AbstractIterationResult(Any[], nothing) # oops, this statement was actually unreachable + return Future(AbstractIterationResult(Any[], nothing)) # oops, this statement was actually unreachable elseif length(utis) == 1 tti = utis[1] tti0 = rewrap_unionall(tti, tti0) else if any(@nospecialize(t) -> !isa(t, DataType) || !(t <: Tuple) || !isknownlength(t), utis) - return AbstractIterationResult(Any[Vararg{Any}], nothing, Effects()) + return Future(AbstractIterationResult(Any[Vararg{Any}], nothing, Effects())) end ltp = length((utis[1]::DataType).parameters) for t in utis if length((t::DataType).parameters) != ltp - return AbstractIterationResult(Any[Vararg{Any}], nothing) + return Future(AbstractIterationResult(Any[Vararg{Any}], nothing)) end end result = Any[ Union{} for _ in 1:ltp ] @@ -1566,14 +1529,14 @@ function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft) result[j] = tmerge(result[j], rewrap_unionall(tps[j], tti0)) end end - return AbstractIterationResult(result, nothing) + return Future(AbstractIterationResult(result, nothing)) end end if tti0 <: Tuple if isa(tti0, DataType) - return AbstractIterationResult(Any[ p for p in tti0.parameters ], nothing) + return Future(AbstractIterationResult(Any[ p for p in tti0.parameters ], nothing)) elseif !isa(tti, DataType) - return AbstractIterationResult(Any[Vararg{Any}], nothing) + return Future(AbstractIterationResult(Any[Vararg{Any}], nothing)) else len = length(tti.parameters) last = tti.parameters[len] @@ -1586,17 +1549,17 @@ function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft) elts[len] = Vararg{elts[len]} end end - return AbstractIterationResult(elts, nothing) + return Future(AbstractIterationResult(elts, nothing)) end elseif tti0 === SimpleVector - return AbstractIterationResult(Any[Vararg{Any}], nothing) + return Future(AbstractIterationResult(Any[Vararg{Any}], nothing)) elseif tti0 === Any - return AbstractIterationResult(Any[Vararg{Any}], nothing, Effects()) + return Future(AbstractIterationResult(Any[Vararg{Any}], nothing, Effects())) elseif tti0 <: Array || tti0 <: GenericMemory if eltype(tti0) === Union{} - return AbstractIterationResult(Any[], nothing) + return Future(AbstractIterationResult(Any[], nothing)) end - return AbstractIterationResult(Any[Vararg{eltype(tti0)}], nothing) + return Future(AbstractIterationResult(Any[Vararg{eltype(tti0)}], nothing)) else return abstract_iteration(interp, itft, typ, sv) end @@ -1607,95 +1570,144 @@ function abstract_iteration(interp::AbstractInterpreter, @nospecialize(itft), @n if isa(itft, Const) iteratef = itft.val else - return AbstractIterationResult(Any[Vararg{Any}], nothing, Effects()) + return Future(AbstractIterationResult(Any[Vararg{Any}], nothing, Effects())) end @assert !isvarargtype(itertype) - call = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[itft, itertype]), StmtInfo(true), sv) - stateordonet = call.rt - info = call.info - # Return Bottom if this is not an iterator. - # WARNING: Changes to the iteration protocol must be reflected here, - # this is not just an optimization. - # TODO: this doesn't realize that Array, GenericMemory, SimpleVector, Tuple, and NamedTuple do not use the iterate protocol - stateordonet === Bottom && return AbstractIterationResult(Any[Bottom], AbstractIterationInfo(CallMeta[CallMeta(Bottom, Any, call.effects, info)], true)) - valtype = statetype = Bottom - ret = Any[] - calls = CallMeta[call] - stateordonet_widened = widenconst(stateordonet) - 𝕃ᵢ = typeinf_lattice(interp) - # Try to unroll the iteration up to max_tuple_splat, which covers any finite - # length iterators, or interesting prefix - while true - if stateordonet_widened === Nothing - return AbstractIterationResult(ret, AbstractIterationInfo(calls, true)) - end - if Nothing <: stateordonet_widened || length(ret) >= InferenceParams(interp).max_tuple_splat - break - end - if !isa(stateordonet_widened, DataType) || !(stateordonet_widened <: Tuple) || isvatuple(stateordonet_widened) || length(stateordonet_widened.parameters) != 2 - break - end - nstatetype = getfield_tfunc(𝕃ᵢ, stateordonet, Const(2)) - # If there's no new information in this statetype, don't bother continuing, - # the iterator won't be finite. - if ⊑(𝕃ᵢ, nstatetype, statetype) - return AbstractIterationResult(Any[Bottom], AbstractIterationInfo(calls, false), EFFECTS_THROWS) - end - valtype = getfield_tfunc(𝕃ᵢ, stateordonet, Const(1)) - push!(ret, valtype) - statetype = nstatetype - call = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), StmtInfo(true), sv) - stateordonet = call.rt + iterateresult = Future{AbstractIterationResult}() + call1future = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[itft, itertype]), StmtInfo(true), sv)::Future + function inferiterate(interp, sv) + call1 = call1future[] + stateordonet = call1.rt + # Return Bottom if this is not an iterator. + # WARNING: Changes to the iteration protocol must be reflected here, + # this is not just an optimization. + # TODO: this doesn't realize that Array, GenericMemory, SimpleVector, Tuple, and NamedTuple do not use the iterate protocol + if stateordonet === Bottom + iterateresult[] = AbstractIterationResult(Any[Bottom], AbstractIterationInfo(CallMeta[CallMeta(Bottom, Any, call1.effects, call1.info)], true)) + return true + end stateordonet_widened = widenconst(stateordonet) - push!(calls, call) - end - # From here on, we start asking for results on the widened types, rather than - # the precise (potentially const) state type - # statetype and valtype are reinitialized in the first iteration below from the - # (widened) stateordonet, which has not yet been fully analyzed in the loop above - valtype = statetype = Bottom - may_have_terminated = Nothing <: stateordonet_widened - while valtype !== Any - nounion = typeintersect(stateordonet_widened, Tuple{Any,Any}) - if nounion !== Union{} && !isa(nounion, DataType) - # nounion is of a type we cannot handle - valtype = Any - break - end - if nounion === Union{} || (nounion.parameters[1] <: valtype && nounion.parameters[2] <: statetype) - # reached a fixpoint or iterator failed/gave invalid answer - if !hasintersect(stateordonet_widened, Nothing) - # ... but cannot terminate - if !may_have_terminated - # ... and cannot have terminated prior to this loop - return AbstractIterationResult(Any[Bottom], AbstractIterationInfo(calls, false), Effects()) - else - # iterator may have terminated prior to this loop, but not during it - valtype = Bottom + calls = CallMeta[call1] + valtype = statetype = Bottom + ret = Any[] + 𝕃ᵢ = typeinf_lattice(interp) + may_have_terminated = false + local call2future::Future{CallMeta} + + nextstate::UInt8 = 0x0 + function inferiterate_2arg(interp, sv) + if nextstate === 0x1 + nextstate = 0xff + @goto state1 + elseif nextstate === 0x2 + nextstate = 0xff + @goto state2 + else + @assert nextstate === 0x0 + nextstate = 0xff + end + + # Try to unroll the iteration up to max_tuple_splat, which covers any finite + # length iterators, or interesting prefix + while true + if stateordonet_widened === Nothing + iterateresult[] = AbstractIterationResult(ret, AbstractIterationInfo(calls, true)) + return true + end + if Nothing <: stateordonet_widened || length(ret) >= InferenceParams(interp).max_tuple_splat + break + end + if !isa(stateordonet_widened, DataType) || !(stateordonet_widened <: Tuple) || isvatuple(stateordonet_widened) || length(stateordonet_widened.parameters) != 2 + break + end + nstatetype = getfield_tfunc(𝕃ᵢ, stateordonet, Const(2)) + # If there's no new information in this statetype, don't bother continuing, + # the iterator won't be finite. + if ⊑(𝕃ᵢ, nstatetype, statetype) + iterateresult[] = AbstractIterationResult(Any[Bottom], AbstractIterationInfo(calls, false), EFFECTS_THROWS) + return true + end + valtype = getfield_tfunc(𝕃ᵢ, stateordonet, Const(1)) + push!(ret, valtype) + statetype = nstatetype + call2future = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), StmtInfo(true), sv)::Future + if !isready(call2future) + nextstate = 0x1 + return false + @label state1 + end + let call = call2future[] + push!(calls, call) + stateordonet = call.rt + stateordonet_widened = widenconst(stateordonet) end end - break - end - valtype = tmerge(valtype, nounion.parameters[1]) - statetype = tmerge(statetype, nounion.parameters[2]) - call = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), StmtInfo(true), sv) - push!(calls, call) - stateordonet = call.rt - stateordonet_widened = widenconst(stateordonet) - end - if valtype !== Union{} - push!(ret, Vararg{valtype}) + # From here on, we start asking for results on the widened types, rather than + # the precise (potentially const) state type + # statetype and valtype are reinitialized in the first iteration below from the + # (widened) stateordonet, which has not yet been fully analyzed in the loop above + valtype = statetype = Bottom + may_have_terminated = Nothing <: stateordonet_widened + while valtype !== Any + nounion = typeintersect(stateordonet_widened, Tuple{Any,Any}) + if nounion !== Union{} && !isa(nounion, DataType) + # nounion is of a type we cannot handle + valtype = Any + break + end + if nounion === Union{} || (nounion.parameters[1] <: valtype && nounion.parameters[2] <: statetype) + # reached a fixpoint or iterator failed/gave invalid answer + if !hasintersect(stateordonet_widened, Nothing) + # ... but cannot terminate + if may_have_terminated + # ... and iterator may have terminated prior to this loop, but not during it + valtype = Bottom + else + # ... or cannot have terminated prior to this loop + iterateresult[] = AbstractIterationResult(Any[Bottom], AbstractIterationInfo(calls, false), Effects()) + return true + end + end + break + end + valtype = tmerge(valtype, nounion.parameters[1]) + statetype = tmerge(statetype, nounion.parameters[2]) + call2future = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), StmtInfo(true), sv)::Future + if !isready(call2future) + nextstate = 0x2 + return false + @label state2 + end + let call = call2future[] + push!(calls, call) + stateordonet = call.rt + stateordonet_widened = widenconst(stateordonet) + end + end + if valtype !== Union{} + push!(ret, Vararg{valtype}) + end + iterateresult[] = AbstractIterationResult(ret, AbstractIterationInfo(calls, false)) + return true + end # inferiterate_2arg + # continue making progress as much as possible, on iterate(arg, state) + inferiterate_2arg(interp, sv) || push!(sv.tasks, inferiterate_2arg) + return true + end # inferiterate + # continue making progress as soon as possible, on iterate(arg) + if !(isready(call1future) && inferiterate(interp, sv)) + push!(sv.tasks, inferiterate) end - return AbstractIterationResult(ret, AbstractIterationInfo(calls, false)) + return iterateresult end # do apply(af, fargs...), where af is a function value function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, si::StmtInfo, sv::AbsIntState, max_methods::Int=get_max_methods(interp, sv)) - itft = argtype_by_index(argtypes, 2) + itft = Core.Box(argtype_by_index(argtypes, 2)) aft = argtype_by_index(argtypes, 3) - (itft === Bottom || aft === Bottom) && return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()) + (itft.contents === Bottom || aft === Bottom) && return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())) aargtypes = argtype_tail(argtypes, 4) aftw = widenconst(aft) if !isa(aft, Const) && !isa(aft, PartialOpaque) && (!isType(aftw) || has_free_typevars(aftw)) @@ -1703,100 +1715,155 @@ function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, si:: add_remark!(interp, sv, "Core._apply_iterate called on a function of a non-concrete type") # bail now, since it seems unlikely that abstract_call will be able to do any better after splitting # this also ensures we don't call abstract_call_gf_by_type below on an IntrinsicFunction or Builtin - return CallMeta(Any, Any, Effects(), NoCallInfo()) + return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) end end res = Union{} - nargs = length(aargtypes) splitunions = 1 < unionsplitcost(typeinf_lattice(interp), aargtypes) <= InferenceParams(interp).max_apply_union_enum - ctypes = [Any[aft]] - infos = Vector{MaybeAbstractIterationInfo}[MaybeAbstractIterationInfo[]] - effects = EFFECTS_TOTAL - for i = 1:nargs - ctypes´ = Vector{Any}[] - infos′ = Vector{MaybeAbstractIterationInfo}[] - for ti in (splitunions ? uniontypes(aargtypes[i]) : Any[aargtypes[i]]) - if !isvarargtype(ti) - (;cti, info, ai_effects) = precise_container_type(interp, itft, ti, sv) - else - (;cti, info, ai_effects) = precise_container_type(interp, itft, unwrapva(ti), sv) - # We can't represent a repeating sequence of the same types, - # so tmerge everything together to get one type that represents - # everything. - argt = cti[end] - if isvarargtype(argt) - argt = unwrapva(argt) + ctypes::Vector{Vector{Any}} = [Any[aft]] + infos::Vector{Vector{MaybeAbstractIterationInfo}} = Vector{MaybeAbstractIterationInfo}[MaybeAbstractIterationInfo[]] + all_effects::Effects = EFFECTS_TOTAL + retinfos = ApplyCallInfo[] + retinfo = UnionSplitApplyCallInfo(retinfos) + exctype = Union{} + ctypes´ = Vector{Any}[] + infos´ = Vector{MaybeAbstractIterationInfo}[] + local ti, argtypesi + local ctfuture::Future{AbstractIterationResult} + local callfuture::Future{CallMeta} + + applyresult = Future{CallMeta}() + # split the rest into a resumable state machine + i::Int = 1 + j::Int = 1 + nextstate::UInt8 = 0x0 + function infercalls(interp, sv) + # n.b. Remember that variables will lose their values across restarts, + # so be sure to manually hoist any values that must be preserved and do + # not rely on program order. + # This is a little more complex than the closure continuations often used elsewhere, but avoids needing to manage all of that indentation + if nextstate === 0x1 + nextstate = 0xff + @goto state1 + elseif nextstate === 0x2 + nextstate = 0xff + @goto state2 + elseif nextstate === 0x3 + nextstate = 0xff + @goto state3 + else + @assert nextstate === 0x0 + nextstate = 0xff + end + while i <= length(aargtypes) + argtypesi = (splitunions ? uniontypes(aargtypes[i]) : Any[aargtypes[i]]) + i += 1 + j = 1 + while j <= length(argtypesi) + ti = argtypesi[j] + j += 1 + if !isvarargtype(ti) + ctfuture = precise_container_type(interp, itft.contents, ti, sv)::Future + if !isready(ctfuture) + nextstate = 0x1 + return false + @label state1 + end + (;cti, info, ai_effects) = ctfuture[] + else + ctfuture = precise_container_type(interp, itft.contents, unwrapva(ti), sv)::Future + if !isready(ctfuture) + nextstate = 0x2 + return false + @label state2 + end + (;cti, info, ai_effects) = ctfuture[] + # We can't represent a repeating sequence of the same types, + # so tmerge everything together to get one type that represents + # everything. + argt = cti[end] + if isvarargtype(argt) + argt = unwrapva(argt) + end + for k in 1:(length(cti)-1) + argt = tmerge(argt, cti[k]) + end + cti = Any[Vararg{argt}] end - for i in 1:(length(cti)-1) - argt = tmerge(argt, cti[i]) + all_effects = merge_effects(all_effects, ai_effects) + if info !== nothing + for call in info.each + all_effects = merge_effects(all_effects, call.effects) + end + end + if any(@nospecialize(t) -> t === Bottom, cti) + continue + end + for k = 1:length(ctypes) + ct = ctypes[k] + if isvarargtype(ct[end]) + # This is vararg, we're not gonna be able to do any inlining, + # drop the info + info = nothing + tail = tuple_tail_elem(typeinf_lattice(interp), unwrapva(ct[end]), cti) + push!(ctypes´, push!(ct[1:(end - 1)], tail)) + else + push!(ctypes´, append!(ct[:], cti)) + end + push!(infos´, push!(copy(infos[k]), info)) end - cti = Any[Vararg{argt}] end - effects = merge_effects(effects, ai_effects) - if info !== nothing - for call in info.each - effects = merge_effects(effects, call.effects) + # swap for the new array and empty the temporary one + ctypes´, ctypes = ctypes, ctypes´ + infos´, infos = infos, infos´ + empty!(ctypes´) + empty!(infos´) + end + all_effects.nothrow || (exctype = Any) + + i = 1 + while i <= length(ctypes) + ct = ctypes[i] + lct = length(ct) + # truncate argument list at the first Vararg + for k = 1:lct-1 + cti = ct[k] + if isvarargtype(cti) + ct[k] = tuple_tail_elem(typeinf_lattice(interp), unwrapva(cti), ct[(k+1):lct]) + resize!(ct, k) + break end end - if any(@nospecialize(t) -> t === Bottom, cti) - continue + callfuture = abstract_call(interp, ArgInfo(nothing, ct), si, sv, max_methods)::Future + if !isready(callfuture) + nextstate = 0x3 + return false + @label state3 end - for j = 1:length(ctypes) - ct = ctypes[j]::Vector{Any} - if isvarargtype(ct[end]) - # This is vararg, we're not gonna be able to do any inlining, - # drop the info - info = nothing - tail = tuple_tail_elem(typeinf_lattice(interp), unwrapva(ct[end]), cti) - push!(ctypes´, push!(ct[1:(end - 1)], tail)) - else - push!(ctypes´, append!(ct[:], cti)) + let (; info, rt, exct, effects) = callfuture[] + push!(retinfos, ApplyCallInfo(info, infos[i])) + res = tmerge(typeinf_lattice(interp), res, rt) + exctype = tmerge(typeinf_lattice(interp), exctype, exct) + all_effects = merge_effects(all_effects, effects) + if i < length(ctypes) && bail_out_apply(interp, InferenceLoopState(ctypes[i], res, all_effects), sv) + add_remark!(interp, sv, "_apply_iterate inference reached maximally imprecise information. Bailing on.") + # there is unanalyzed candidate, widen type and effects to the top + let retinfo = NoCallInfo() # NOTE this is necessary to prevent the inlining processing + applyresult[] = CallMeta(Any, Any, Effects(), retinfo) + return true + end end - push!(infos′, push!(copy(infos[j]), info)) end + i += 1 end - ctypes = ctypes´ - infos = infos′ - end - retinfos = ApplyCallInfo[] - retinfo = UnionSplitApplyCallInfo(retinfos) - napplicable = length(ctypes) - seen = 0 - exct = effects.nothrow ? Union{} : Any - for i = 1:napplicable - ct = ctypes[i] - arginfo = infos[i] - lct = length(ct) - # truncate argument list at the first Vararg - for i = 1:lct-1 - cti = ct[i] - if isvarargtype(cti) - ct[i] = tuple_tail_elem(typeinf_lattice(interp), unwrapva(cti), ct[(i+1):lct]) - resize!(ct, i) - break - end - end - call = abstract_call(interp, ArgInfo(nothing, ct), si, sv, max_methods) - seen += 1 - push!(retinfos, ApplyCallInfo(call.info, arginfo)) - res = tmerge(typeinf_lattice(interp), res, call.rt) - exct = tmerge(typeinf_lattice(interp), exct, call.exct) - effects = merge_effects(effects, call.effects) - if bail_out_apply(interp, InferenceLoopState(ct, res, effects), sv) - add_remark!(interp, sv, "_apply_iterate inference reached maximally imprecise information. Bailing on.") - break - end - end - if seen ≠ napplicable - # there is unanalyzed candidate, widen type and effects to the top - res = Any - exct = Any - effects = Effects() - retinfo = NoCallInfo() # NOTE this is necessary to prevent the inlining processing + # TODO: Add a special info type to capture all the iteration info. + # For now, only propagate info if we don't also union-split the iteration + applyresult[] = CallMeta(res, exctype, all_effects, retinfo) + return true end - # TODO: Add a special info type to capture all the iteration info. - # For now, only propagate info if we don't also union-split the iteration - return CallMeta(res, exct, effects, retinfo) + # start making progress on the first call + infercalls(interp, sv) || push!(sv.tasks, infercalls) + return applyresult end function argtype_by_index(argtypes::Vector{Any}, i::Int) @@ -2135,66 +2202,69 @@ function abstract_invoke(interp::AbstractInterpreter, arginfo::ArgInfo, si::Stmt argtypes = arginfo.argtypes ft′ = argtype_by_index(argtypes, 2) ft = widenconst(ft′) - ft === Bottom && return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()) + ft === Bottom && return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())) (types, isexact, isconcrete, istype) = instanceof_tfunc(argtype_by_index(argtypes, 3), false) - isexact || return CallMeta(Any, Any, Effects(), NoCallInfo()) + isexact || return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) unwrapped = unwrap_unionall(types) - types === Bottom && return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()) + types === Bottom && return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())) if !(unwrapped isa DataType && unwrapped.name === Tuple.name) - return CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo()) + return Future(CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo())) end argtype = argtypes_to_type(argtype_tail(argtypes, 4)) nargtype = typeintersect(types, argtype) - nargtype === Bottom && return CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo()) - nargtype isa DataType || return CallMeta(Any, Any, Effects(), NoCallInfo()) # other cases are not implemented below - isdispatchelem(ft) || return CallMeta(Any, Any, Effects(), NoCallInfo()) # check that we might not have a subtype of `ft` at runtime, before doing supertype lookup below + nargtype === Bottom && return Future(CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo())) + nargtype isa DataType || return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) # other cases are not implemented below + isdispatchelem(ft) || return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) # check that we might not have a subtype of `ft` at runtime, before doing supertype lookup below ft = ft::DataType lookupsig = rewrap_unionall(Tuple{ft, unwrapped.parameters...}, types)::Type nargtype = Tuple{ft, nargtype.parameters...} argtype = Tuple{ft, argtype.parameters...} match, valid_worlds = findsup(lookupsig, method_table(interp)) - match === nothing && return CallMeta(Any, Any, Effects(), NoCallInfo()) + match === nothing && return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) update_valid_age!(sv, valid_worlds) method = match.method tienv = ccall(:jl_type_intersection_with_env, Any, (Any, Any), nargtype, method.sig)::SimpleVector - ti = tienv[1]; env = tienv[2]::SimpleVector - result = abstract_call_method(interp, method, ti, env, false, si, sv) - (; rt, exct, edge, effects, volatile_inf_result) = result + ti = tienv[1] + env = tienv[2]::SimpleVector + mresult = abstract_call_method(interp, method, ti, env, false, si, sv)::Future match = MethodMatch(ti, env, method, argtype <: method.sig) - res = nothing - sig = match.spec_types - argtypes′ = invoke_rewrite(argtypes) - fargs = arginfo.fargs - fargs′ = fargs === nothing ? nothing : invoke_rewrite(fargs) - arginfo = ArgInfo(fargs′, argtypes′) - # # typeintersect might have narrowed signature, but the accuracy gain doesn't seem worth the cost involved with the lattice comparisons - # for i in 1:length(argtypes′) - # t, a = ti.parameters[i], argtypes′[i] - # argtypes′[i] = t ⊑ a ? t : a - # end - 𝕃ₚ = ipo_lattice(interp) - ⊑, ⋤, ⊔ = partialorder(𝕃ₚ), strictneqpartialorder(𝕃ₚ), join(𝕃ₚ) - f = singleton_type(ft′) - invokecall = InvokeCall(types, lookupsig) - const_call_result = abstract_call_method_with_const_args(interp, - result, f, arginfo, si, match, sv, invokecall) - const_result = volatile_inf_result - if const_call_result !== nothing - if const_call_result.rt ⊑ rt - (; rt, effects, const_result, edge) = const_call_result + return Future{CallMeta}(mresult, interp, sv) do result, interp, sv + (; rt, exct, edge, effects, volatile_inf_result) = result + res = nothing + sig = match.spec_types + argtypes′ = invoke_rewrite(argtypes) + fargs = arginfo.fargs + fargs′ = fargs === nothing ? nothing : invoke_rewrite(fargs) + arginfo = ArgInfo(fargs′, argtypes′) + # # typeintersect might have narrowed signature, but the accuracy gain doesn't seem worth the cost involved with the lattice comparisons + # for i in 1:length(argtypes′) + # t, a = ti.parameters[i], argtypes′[i] + # argtypes′[i] = t ⊑ a ? t : a + # end + 𝕃ₚ = ipo_lattice(interp) + ⊑, ⋤, ⊔ = partialorder(𝕃ₚ), strictneqpartialorder(𝕃ₚ), join(𝕃ₚ) + f = singleton_type(ft′) + invokecall = InvokeCall(types, lookupsig) + const_call_result = abstract_call_method_with_const_args(interp, + result, f, arginfo, si, match, sv, invokecall) + const_result = volatile_inf_result + if const_call_result !== nothing + if const_call_result.rt ⊑ rt + (; rt, effects, const_result, edge) = const_call_result + end + if const_call_result.exct ⋤ exct + (; exct, const_result, edge) = const_call_result + end end - if const_call_result.exct ⋤ exct - (; exct, const_result, edge) = const_call_result + rt = from_interprocedural!(interp, rt, sv, arginfo, sig) + info = InvokeCallInfo(match, const_result) + edge !== nothing && add_invoke_backedge!(sv, lookupsig, edge) + if !match.fully_covers + effects = Effects(effects; nothrow=false) + exct = exct ⊔ TypeError end + return CallMeta(rt, exct, effects, info) end - rt = from_interprocedural!(interp, rt, sv, arginfo, sig) - info = InvokeCallInfo(match, const_result) - edge !== nothing && add_invoke_backedge!(sv, lookupsig, edge) - if !match.fully_covers - effects = Effects(effects; nothrow=false) - exct = exct ⊔ TypeError - end - return CallMeta(rt, exct, effects, info) end function invoke_rewrite(xs::Vector{Any}) @@ -2207,10 +2277,12 @@ end function abstract_finalizer(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::AbsIntState) if length(argtypes) == 3 finalizer_argvec = Any[argtypes[2], argtypes[3]] - call = abstract_call(interp, ArgInfo(nothing, finalizer_argvec), StmtInfo(false), sv, #=max_methods=#1) - return CallMeta(Nothing, Any, Effects(), FinalizerInfo(call.info, call.effects)) + call = abstract_call(interp, ArgInfo(nothing, finalizer_argvec), StmtInfo(false), sv, #=max_methods=#1)::Future + return Future{CallMeta}(call, interp, sv) do call, interp, sv + return CallMeta(Nothing, Any, Effects(), FinalizerInfo(call.info, call.effects)) + end end - return CallMeta(Nothing, Any, Effects(), NoCallInfo()) + return Future(CallMeta(Nothing, Any, Effects(), NoCallInfo())) end function abstract_throw(interp::AbstractInterpreter, argtypes::Vector{Any}, ::AbsIntState) @@ -2228,7 +2300,7 @@ function abstract_throw(interp::AbstractInterpreter, argtypes::Vector{Any}, ::Ab else exct = ArgumentError end - return CallMeta(Union{}, exct, EFFECTS_THROWS, NoCallInfo()) + return Future(CallMeta(Union{}, exct, EFFECTS_THROWS, NoCallInfo())) end function abstract_throw_methoderror(interp::AbstractInterpreter, argtypes::Vector{Any}, ::AbsIntState) @@ -2240,7 +2312,7 @@ function abstract_throw_methoderror(interp::AbstractInterpreter, argtypes::Vecto ⊔ = join(typeinf_lattice(interp)) MethodError ⊔ ArgumentError end - return CallMeta(Union{}, exct, EFFECTS_THROWS, NoCallInfo()) + return Future(CallMeta(Union{}, exct, EFFECTS_THROWS, NoCallInfo())) end # call where the function is known exactly @@ -2285,60 +2357,70 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f), end end end - return CallMeta(rt, exct, effects, NoCallInfo(), refinements) + return Future(CallMeta(rt, exct, effects, NoCallInfo(), refinements)) elseif isa(f, Core.OpaqueClosure) # calling an OpaqueClosure about which we have no information returns no information - return CallMeta(typeof(f).parameters[2], Any, Effects(), NoCallInfo()) + return Future(CallMeta(typeof(f).parameters[2], Any, Effects(), NoCallInfo())) elseif f === TypeVar && !isvarargtype(argtypes[end]) # Manually look through the definition of TypeVar to # make sure to be able to get `PartialTypeVar`s out. - 2 ≤ la ≤ 4 || return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()) - n = argtypes[2] - ub_var = Const(Any) - lb_var = Const(Union{}) - if la == 4 - ub_var = argtypes[4] - lb_var = argtypes[3] - elseif la == 3 - ub_var = argtypes[3] - end + 2 ≤ la ≤ 4 || return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())) # make sure generic code is prepared for inlining if needed later - call = let T = Any[Type{TypeVar}, Any, Any, Any] + let T = Any[Type{TypeVar}, Any, Any, Any] resize!(T, la) atype = Tuple{T...} T[1] = Const(TypeVar) - abstract_call_gf_by_type(interp, f, ArgInfo(nothing, T), si, atype, sv, max_methods) - end - pT = typevar_tfunc(𝕃ᵢ, n, lb_var, ub_var) - typevar_argtypes = Any[n, lb_var, ub_var] - effects = builtin_effects(𝕃ᵢ, Core._typevar, typevar_argtypes, pT) - if effects.nothrow - exct = Union{} - else - exct = builtin_exct(𝕃ᵢ, Core._typevar, typevar_argtypes, pT) + let call = abstract_call_gf_by_type(interp, f, ArgInfo(nothing, T), si, atype, sv, max_methods)::Future + return Future{CallMeta}(call, interp, sv) do call, interp, sv + n = argtypes[2] + ub_var = Const(Any) + lb_var = Const(Union{}) + if la == 4 + ub_var = argtypes[4] + lb_var = argtypes[3] + elseif la == 3 + ub_var = argtypes[3] + end + pT = typevar_tfunc(𝕃ᵢ, n, lb_var, ub_var) + typevar_argtypes = Any[n, lb_var, ub_var] + effects = builtin_effects(𝕃ᵢ, Core._typevar, typevar_argtypes, pT) + if effects.nothrow + exct = Union{} + else + exct = builtin_exct(𝕃ᵢ, Core._typevar, typevar_argtypes, pT) + end + return CallMeta(pT, exct, effects, call.info) + end + end end - return CallMeta(pT, exct, effects, call.info) elseif f === UnionAll - call = abstract_call_gf_by_type(interp, f, ArgInfo(nothing, Any[Const(UnionAll), Any, Any]), si, Tuple{Type{UnionAll}, Any, Any}, sv, max_methods) - return abstract_call_unionall(interp, argtypes, call) + let call = abstract_call_gf_by_type(interp, f, ArgInfo(nothing, Any[Const(UnionAll), Any, Any]), si, Tuple{Type{UnionAll}, Any, Any}, sv, max_methods)::Future + return Future{CallMeta}(call, interp, sv) do call, interp, sv + return abstract_call_unionall(interp, argtypes, call) + end + end elseif f === Tuple && la == 2 aty = argtypes[2] ty = isvarargtype(aty) ? unwrapva(aty) : widenconst(aty) if !isconcretetype(ty) - return CallMeta(Tuple, Any, EFFECTS_UNKNOWN, NoCallInfo()) + return Future(CallMeta(Tuple, Any, EFFECTS_UNKNOWN, NoCallInfo())) end elseif is_return_type(f) return return_type_tfunc(interp, argtypes, si, sv) elseif la == 3 && f === Core.:(!==) # mark !== as exactly a negated call to === - call = abstract_call_gf_by_type(interp, f, ArgInfo(fargs, Any[Const(f), Any, Any]), si, Tuple{typeof(f), Any, Any}, sv, max_methods) - rty = abstract_call_known(interp, (===), arginfo, si, sv, max_methods).rt - if isa(rty, Conditional) - return CallMeta(Conditional(rty.slot, rty.elsetype, rty.thentype), Bottom, EFFECTS_TOTAL, NoCallInfo()) # swap if-else - elseif isa(rty, Const) - return CallMeta(Const(rty.val === false), Bottom, EFFECTS_TOTAL, MethodResultPure()) - end - return call + let callfuture = abstract_call_gf_by_type(interp, f, ArgInfo(fargs, Any[Const(f), Any, Any]), si, Tuple{typeof(f), Any, Any}, sv, max_methods)::Future, + rtfuture = abstract_call_known(interp, (===), arginfo, si, sv, max_methods)::Future + return Future{CallMeta}(isready(callfuture) && isready(rtfuture), interp, sv) do interp, sv + local rty = rtfuture[].rt + if isa(rty, Conditional) + return CallMeta(Conditional(rty.slot, rty.elsetype, rty.thentype), Bottom, EFFECTS_TOTAL, NoCallInfo()) # swap if-else + elseif isa(rty, Const) + return CallMeta(Const(rty.val === false), Bottom, EFFECTS_TOTAL, MethodResultPure()) + end + return callfuture[] + end + end elseif la == 3 && f === Core.:(>:) # mark issupertype as a exact alias for issubtype # swap T1 and T2 arguments and call <: @@ -2350,12 +2432,12 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f), argtypes = Any[typeof(<:), argtypes[3], argtypes[2]] return abstract_call_known(interp, <:, ArgInfo(fargs, argtypes), si, sv, max_methods) elseif la == 2 && f === Core.typename - return CallMeta(typename_static(argtypes[2]), Bottom, EFFECTS_TOTAL, MethodResultPure()) + return Future(CallMeta(typename_static(argtypes[2]), Bottom, EFFECTS_TOTAL, MethodResultPure())) elseif f === Core._hasmethod - return _hasmethod_tfunc(interp, argtypes, sv) + return Future(_hasmethod_tfunc(interp, argtypes, sv)) end atype = argtypes_to_type(argtypes) - return abstract_call_gf_by_type(interp, f, arginfo, si, atype, sv, max_methods) + return abstract_call_gf_by_type(interp, f, arginfo, si, atype, sv, max_methods)::Future end function abstract_call_opaque_closure(interp::AbstractInterpreter, @@ -2364,40 +2446,44 @@ function abstract_call_opaque_closure(interp::AbstractInterpreter, tt = closure.typ ocargsig = rewrap_unionall((unwrap_unionall(tt)::DataType).parameters[1], tt) ocargsig′ = unwrap_unionall(ocargsig) - ocargsig′ isa DataType || return CallMeta(Any, Any, Effects(), NoCallInfo()) + ocargsig′ isa DataType || return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) ocsig = rewrap_unionall(Tuple{Tuple, ocargsig′.parameters...}, ocargsig) - hasintersect(sig, ocsig) || return CallMeta(Union{}, Union{MethodError,TypeError}, EFFECTS_THROWS, NoCallInfo()) + hasintersect(sig, ocsig) || return Future(CallMeta(Union{}, Union{MethodError,TypeError}, EFFECTS_THROWS, NoCallInfo())) ocmethod = closure.source::Method - result = abstract_call_method(interp, ocmethod, sig, Core.svec(), false, si, sv) - (; rt, exct, edge, effects, volatile_inf_result) = result match = MethodMatch(sig, Core.svec(), ocmethod, sig <: ocsig) - 𝕃ₚ = ipo_lattice(interp) - ⊑, ⋤, ⊔ = partialorder(𝕃ₚ), strictneqpartialorder(𝕃ₚ), join(𝕃ₚ) - const_result = volatile_inf_result - if !result.edgecycle - const_call_result = abstract_call_method_with_const_args(interp, result, - nothing, arginfo, si, match, sv) - if const_call_result !== nothing - if const_call_result.rt ⊑ rt - (; rt, effects, const_result, edge) = const_call_result - end - if const_call_result.exct ⋤ exct - (; exct, const_result, edge) = const_call_result + mresult = abstract_call_method(interp, ocmethod, sig, Core.svec(), false, si, sv) + ocsig_box = Core.Box(ocsig) + return Future{CallMeta}(mresult, interp, sv) do result, interp, sv + (; rt, exct, edge, effects, volatile_inf_result, edgecycle) = result + 𝕃ₚ = ipo_lattice(interp) + ⊑, ⋤, ⊔ = partialorder(𝕃ₚ), strictneqpartialorder(𝕃ₚ), join(𝕃ₚ) + const_result = volatile_inf_result + if !edgecycle + const_call_result = abstract_call_method_with_const_args(interp, result, + nothing, arginfo, si, match, sv) + if const_call_result !== nothing + if const_call_result.rt ⊑ rt + (; rt, effects, const_result, edge) = const_call_result + end + if const_call_result.exct ⋤ exct + (; exct, const_result, edge) = const_call_result + end end end - end - if check # analyze implicit type asserts on argument and return type - rty = (unwrap_unionall(tt)::DataType).parameters[2] - rty = rewrap_unionall(rty isa TypeVar ? rty.ub : rty, tt) - if !(rt ⊑ rty && sig ⊑ ocsig) - effects = Effects(effects; nothrow=false) - exct = exct ⊔ TypeError + if check # analyze implicit type asserts on argument and return type + ftt = closure.typ + rty = (unwrap_unionall(ftt)::DataType).parameters[2] + rty = rewrap_unionall(rty isa TypeVar ? rty.ub : rty, ftt) + if !(rt ⊑ rty && sig ⊑ ocsig_box.contents) + effects = Effects(effects; nothrow=false) + exct = exct ⊔ TypeError + end end + rt = from_interprocedural!(interp, rt, sv, arginfo, match.spec_types) + info = OpaqueClosureCallInfo(match, const_result) + edge !== nothing && add_backedge!(sv, edge) + return CallMeta(rt, exct, effects, info) end - rt = from_interprocedural!(interp, rt, sv, arginfo, match.spec_types) - info = OpaqueClosureCallInfo(match, const_result) - edge !== nothing && add_backedge!(sv, edge) - return CallMeta(rt, exct, effects, info) end function most_general_argtypes(closure::PartialOpaque) @@ -2422,17 +2508,17 @@ function abstract_call_unknown(interp::AbstractInterpreter, @nospecialize(ft), wft = widenconst(ft) if hasintersect(wft, Builtin) add_remark!(interp, sv, "Could not identify method table for call") - return CallMeta(Any, Any, Effects(), NoCallInfo()) + return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) elseif hasintersect(wft, Core.OpaqueClosure) uft = unwrap_unionall(wft) if isa(uft, DataType) - return CallMeta(rewrap_unionall(uft.parameters[2], wft), Any, Effects(), NoCallInfo()) + return Future(CallMeta(rewrap_unionall(uft.parameters[2], wft), Any, Effects(), NoCallInfo())) end - return CallMeta(Any, Any, Effects(), NoCallInfo()) + return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) end # non-constant function, but the number of arguments is known and the `f` is not a builtin or intrinsic atype = argtypes_to_type(arginfo.argtypes) - return abstract_call_gf_by_type(interp, nothing, arginfo, si, atype, sv, max_methods) + return abstract_call_gf_by_type(interp, nothing, arginfo, si, atype, sv, max_methods)::Future end # call where the function is any lattice element @@ -2503,7 +2589,7 @@ function abstract_eval_cfunction(interp::AbstractInterpreter, e::Expr, vtypes::U # this may be the wrong world for the call, # but some of the result is likely to be valid anyways # and that may help generate better codegen - abstract_call(interp, ArgInfo(nothing, at), StmtInfo(false), sv) + abstract_call(interp, ArgInfo(nothing, at), StmtInfo(false), sv)::Future rt = e.args[1] isa(rt, Type) || (rt = Any) return RTEffects(rt, Any, EFFECTS_UNKNOWN) @@ -2544,6 +2630,7 @@ function abstract_eval_value_expr(interp::AbstractInterpreter, e::Expr, sv::AbsI # TODO: We still have non-linearized cglobal @assert e.args[1] === Core.tuple || e.args[1] === GlobalRef(Core, :tuple) else + @assert e.head !== :(=) # Some of our tests expect us to handle invalid IR here and error later # - permit that for now. # @assert false "Unexpected EXPR head in value position" @@ -2592,8 +2679,13 @@ function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, sv::Infere add_curr_ssaflag!(sv, IR_FLAG_UNUSED) end si = StmtInfo(!unused) - call = abstract_call(interp, arginfo, si, sv) - sv.stmt_info[sv.currpc] = call.info + call = abstract_call(interp, arginfo, si, sv)::Future + Future{Nothing}(call, interp, sv) do call, interp, sv + # this only is needed for the side-effect, sequenced before any task tries to consume the return value, + # which this will do even without returning this Future + sv.stmt_info[sv.currpc] = call.info + nothing + end return call end @@ -2602,11 +2694,14 @@ function abstract_eval_call(interp::AbstractInterpreter, e::Expr, vtypes::Union{ ea = e.args argtypes = collect_argtypes(interp, ea, vtypes, sv) if argtypes === nothing - return RTEffects(Bottom, Any, Effects()) + return Future(RTEffects(Bottom, Any, Effects())) end arginfo = ArgInfo(ea, argtypes) - (; rt, exct, effects, refinements) = abstract_call(interp, arginfo, sv) - return RTEffects(rt, exct, effects, refinements) + call = abstract_call(interp, arginfo, sv)::Future + return Future{RTEffects}(call, interp, sv) do call, interp, sv + (; rt, exct, effects, refinements) = call + return RTEffects(rt, exct, effects, refinements) + end end function abstract_eval_new(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing}, @@ -2736,12 +2831,15 @@ function abstract_eval_new_opaque_closure(interp::AbstractInterpreter, e::Expr, argtypes = most_general_argtypes(rt) pushfirst!(argtypes, rt.env) callinfo = abstract_call_opaque_closure(interp, rt, - ArgInfo(nothing, argtypes), StmtInfo(true), sv, #=check=#false) - sv.stmt_info[sv.currpc] = OpaqueClosureCreateInfo(callinfo) + ArgInfo(nothing, argtypes), StmtInfo(true), sv, #=check=#false)::Future + Future{Nothing}(callinfo, interp, sv) do callinfo, interp, sv + sv.stmt_info[sv.currpc] = OpaqueClosureCreateInfo(callinfo) + nothing + end end end end - return RTEffects(rt, Any, effects) + return Future(RTEffects(rt, Any, effects)) end function abstract_eval_copyast(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing}, @@ -2837,7 +2935,7 @@ function abstract_eval_static_parameter(::AbstractInterpreter, e::Expr, sv::AbsI end function abstract_eval_statement_expr(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing}, - sv::AbsIntState) + sv::AbsIntState)::Future{RTEffects} ehead = e.head if ehead === :call return abstract_eval_call(interp, e, vtypes, sv) @@ -2935,43 +3033,7 @@ function stmt_taints_inbounds_consistency(sv::AbsIntState) return has_curr_ssaflag(sv, IR_FLAG_INBOUNDS) end -function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e), vtypes::VarTable, sv::InferenceState) - if !isa(e, Expr) - if isa(e, PhiNode) - add_curr_ssaflag!(sv, IR_FLAGS_REMOVABLE) - # Implement convergence for PhiNodes. In particular, PhiNodes need to tmerge over - # the incoming values from all iterations, but `abstract_eval_phi` will only tmerge - # over the first and last iterations. By tmerging in the current old_rt, we ensure that - # we will not lose an intermediate value. - rt = abstract_eval_phi(interp, e, vtypes, sv) - old_rt = sv.ssavaluetypes[sv.currpc] - rt = old_rt === NOT_FOUND ? rt : tmerge(typeinf_lattice(interp), old_rt, rt) - return RTEffects(rt, Union{}, EFFECTS_TOTAL) - end - (; rt, exct, effects, refinements) = abstract_eval_special_value(interp, e, vtypes, sv) - else - (; rt, exct, effects, refinements) = abstract_eval_statement_expr(interp, e, vtypes, sv) - if effects.noub === NOUB_IF_NOINBOUNDS - if has_curr_ssaflag(sv, IR_FLAG_INBOUNDS) - effects = Effects(effects; noub=ALWAYS_FALSE) - elseif !propagate_inbounds(sv) - # The callee read our inbounds flag, but unless we propagate inbounds, - # we ourselves don't read our parent's inbounds. - effects = Effects(effects; noub=ALWAYS_TRUE) - end - end - e = e::Expr - @assert !isa(rt, TypeVar) "unhandled TypeVar" - rt = maybe_singleton_const(rt) - if !isempty(sv.pclimitations) - if rt isa Const || rt === Union{} - empty!(sv.pclimitations) - else - rt = LimitedAccuracy(rt, sv.pclimitations) - sv.pclimitations = IdSet{InferenceState}() - end - end - end +function merge_override_effects!(interp::AbstractInterpreter, effects::Effects, sv::InferenceState) # N.B.: This only applies to the effects of the statement itself. # It is possible for arguments (GlobalRef/:static_parameter) to throw, # but these will be recomputed during SSA construction later. @@ -2979,8 +3041,11 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e), effects = override_effects(effects, override) set_curr_ssaflag!(sv, flags_for_effects(effects), IR_FLAGS_EFFECTS) merge_effects!(interp, sv, effects) + return effects +end - return RTEffects(rt, exct, effects, refinements) +function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e), vtypes::VarTable, sv::InferenceState) + @assert !isa(e, Union{Expr, PhiNode, NewvarNode}) end function override_effects(effects::Effects, override::EffectsOverride) @@ -3226,60 +3291,6 @@ function handle_control_backedge!(interp::AbstractInterpreter, frame::InferenceS return nothing end -struct BasicStmtChange - changes::Union{Nothing,StateUpdate} - rt::Any # extended lattice element or `nothing` - `nothing` if this statement may not be used as an SSA Value - exct::Any - # TODO effects::Effects - refinements # ::Union{Nothing,SlotRefinement,Vector{Any}} - function BasicStmtChange(changes::Union{Nothing,StateUpdate}, rt::Any, exct::Any, - refinements=nothing) - @nospecialize rt exct refinements - return new(changes, rt, exct, refinements) - end -end - -@inline function abstract_eval_basic_statement(interp::AbstractInterpreter, - @nospecialize(stmt), pc_vartable::VarTable, frame::InferenceState) - if isa(stmt, NewvarNode) - changes = StateUpdate(stmt.slot, VarState(Bottom, true)) - return BasicStmtChange(changes, nothing, Union{}) - elseif !isa(stmt, Expr) - (; rt, exct) = abstract_eval_statement(interp, stmt, pc_vartable, frame) - return BasicStmtChange(nothing, rt, exct) - end - changes = nothing - hd = stmt.head - if hd === :(=) - (; rt, exct, refinements) = abstract_eval_statement(interp, stmt.args[2], pc_vartable, frame) - if rt === Bottom - return BasicStmtChange(nothing, Bottom, exct, refinements) - end - lhs = stmt.args[1] - if isa(lhs, SlotNumber) - changes = StateUpdate(lhs, VarState(rt, false)) - elseif isa(lhs, GlobalRef) - handle_global_assignment!(interp, frame, lhs, rt) - elseif !isa(lhs, SSAValue) - merge_effects!(interp, frame, EFFECTS_UNKNOWN) - end - return BasicStmtChange(changes, rt, exct, refinements) - elseif hd === :method - fname = stmt.args[1] - if isa(fname, SlotNumber) - changes = StateUpdate(fname, VarState(Any, false)) - end - return BasicStmtChange(changes, nothing, Union{}) - elseif (hd === :code_coverage_effect || ( - hd !== :boundscheck && # :boundscheck can be narrowed to Bool - is_meta_expr(stmt))) - return BasicStmtChange(nothing, Nothing, Bottom) - else - (; rt, exct, refinements) = abstract_eval_statement(interp, stmt, pc_vartable, frame) - return BasicStmtChange(nothing, rt, exct, refinements) - end -end - function update_bbstate!(𝕃ᵢ::AbstractLattice, frame::InferenceState, bb::Int, vartable::VarTable) bbtable = frame.bb_vartables[bb] if bbtable === nothing @@ -3379,27 +3390,45 @@ function update_cycle_worklists!(callback, frame::InferenceState) end # make as much progress on `frame` as possible (without handling cycles) -function typeinf_local(interp::AbstractInterpreter, frame::InferenceState) +struct CurrentState + result::Future + currstate::VarTable + bbstart::Int + bbend::Int + CurrentState(result::Future, currstate::VarTable, bbstart::Int, bbend::Int) = new(result, currstate, bbstart, bbend) + CurrentState() = new() +end +function typeinf_local(interp::AbstractInterpreter, frame::InferenceState, nextresult::CurrentState) @assert !is_inferred(frame) W = frame.ip ssavaluetypes = frame.ssavaluetypes bbs = frame.cfg.blocks nbbs = length(bbs) 𝕃ᵢ = typeinf_lattice(interp) - + states = frame.bb_vartables currbb = frame.currbb + currpc = frame.currpc + + if isdefined(nextresult, :result) + # for reasons that are fairly unclear, some state is arbitrarily on the stack instead in the InferenceState as normal + bbstart = nextresult.bbstart + bbend = nextresult.bbend + currstate = nextresult.currstate + @goto injectresult + end + if currbb != 1 currbb = frame.currbb = _bits_findnext(W.bits, 1)::Int # next basic block end - - states = frame.bb_vartables currstate = copy(states[currbb]::VarTable) while currbb <= nbbs delete!(W, currbb) bbstart = first(bbs[currbb].stmts) bbend = last(bbs[currbb].stmts) - for currpc in bbstart:bbend + currpc = bbstart - 1 + while currpc < bbend + currpc += 1 frame.currpc = currpc empty_backedges!(frame, currpc) stmt = frame.src.code[currpc] @@ -3511,14 +3540,14 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState) return caller.ssavaluetypes[caller_pc] !== Any end end - ssavaluetypes[frame.currpc] = Any + ssavaluetypes[currpc] = Any @goto find_next_bb elseif isa(stmt, EnterNode) ssavaluetypes[currpc] = Any add_curr_ssaflag!(frame, IR_FLAG_NOTHROW) if isdefined(stmt, :scope) scopet = abstract_eval_value(interp, stmt.scope, currstate, frame) - handler = gethandler(frame, frame.currpc+1)::TryCatchFrame + handler = gethandler(frame, currpc + 1)::TryCatchFrame @assert handler.scopet !== nothing if !⊑(𝕃ᵢ, scopet, handler.scopet) handler.scopet = tmerge(𝕃ᵢ, scopet, handler.scopet) @@ -3537,8 +3566,91 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState) # Fall through terminator - treat as regular stmt end # Process non control-flow statements - (; changes, rt, exct, refinements) = abstract_eval_basic_statement(interp, - stmt, currstate, frame) + @assert isempty(frame.tasks) + rt = nothing + exct = Bottom + changes = nothing + refinements = nothing + effects = nothing + if isa(stmt, NewvarNode) + changes = StateUpdate(stmt.slot, VarState(Bottom, true)) + elseif isa(stmt, PhiNode) + add_curr_ssaflag!(frame, IR_FLAGS_REMOVABLE) + # Implement convergence for PhiNodes. In particular, PhiNodes need to tmerge over + # the incoming values from all iterations, but `abstract_eval_phi` will only tmerge + # over the first and last iterations. By tmerging in the current old_rt, we ensure that + # we will not lose an intermediate value. + rt = abstract_eval_phi(interp, stmt, currstate, frame) + old_rt = frame.ssavaluetypes[currpc] + rt = old_rt === NOT_FOUND ? rt : tmerge(typeinf_lattice(interp), old_rt, rt) + else + lhs = nothing + if isexpr(stmt, :(=)) + lhs = stmt.args[1] + stmt = stmt.args[2] + end + if !isa(stmt, Expr) + (; rt, exct, effects, refinements) = abstract_eval_special_value(interp, stmt, currstate, frame) + else + hd = stmt.head + if hd === :method + fname = stmt.args[1] + if isa(fname, SlotNumber) + changes = StateUpdate(fname, VarState(Any, false)) + end + elseif (hd === :code_coverage_effect || ( + hd !== :boundscheck && # :boundscheck can be narrowed to Bool + is_meta_expr(stmt))) + rt = Nothing + else + result = abstract_eval_statement_expr(interp, stmt, currstate, frame)::Future + if !isready(result) || !isempty(frame.tasks) + return CurrentState(result, currstate, bbstart, bbend) + @label injectresult + # reload local variables + stmt = frame.src.code[currpc] + changes = nothing + lhs = nothing + if isexpr(stmt, :(=)) + lhs = stmt.args[1] + stmt = stmt.args[2] + end + result = nextresult.result::Future{RTEffects} + end + result = result[] + (; rt, exct, effects, refinements) = result + if effects.noub === NOUB_IF_NOINBOUNDS + if has_curr_ssaflag(frame, IR_FLAG_INBOUNDS) + effects = Effects(effects; noub=ALWAYS_FALSE) + elseif !propagate_inbounds(frame) + # The callee read our inbounds flag, but unless we propagate inbounds, + # we ourselves don't read our parent's inbounds. + effects = Effects(effects; noub=ALWAYS_TRUE) + end + end + @assert !isa(rt, TypeVar) "unhandled TypeVar" + rt = maybe_singleton_const(rt) + if !isempty(frame.pclimitations) + if rt isa Const || rt === Union{} + empty!(frame.pclimitations) + else + rt = LimitedAccuracy(rt, frame.pclimitations) + frame.pclimitations = IdSet{InferenceState}() + end + end + end + end + effects === nothing || merge_override_effects!(interp, effects, frame) + if lhs !== nothing && rt !== Bottom + if isa(lhs, SlotNumber) + changes = StateUpdate(lhs, VarState(rt, false)) + elseif isa(lhs, GlobalRef) + handle_global_assignment!(interp, frame, lhs, rt) + elseif !isa(lhs, SSAValue) + merge_effects!(interp, frame, EFFECTS_UNKNOWN) + end + end + end if !has_curr_ssaflag(frame, IR_FLAG_NOTHROW) if exct !== Union{} update_exc_bestguess!(interp, exct, frame) @@ -3601,7 +3713,7 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState) end end # while currbb <= nbbs - nothing + return CurrentState() end function apply_refinement!(𝕃ᵢ::AbstractLattice, slot::SlotNumber, @nospecialize(newtyp), @@ -3652,31 +3764,81 @@ function condition_object_change(currstate::VarTable, condt::Conditional, end # make as much progress on `frame` as possible (by handling cycles) -function typeinf_nocycle(interp::AbstractInterpreter, frame::InferenceState) - typeinf_local(interp, frame) - @assert isempty(frame.ip) +warnlength::Int = 2500 +function typeinf(interp::AbstractInterpreter, frame::InferenceState) callstack = frame.callstack::Vector{AbsIntState} - frame.cycleid == length(callstack) && return true - - no_active_ips_in_callers = false - while true - # If the current frame is not the top part of a cycle, continue to the top of the cycle before resuming work - frame.cycleid == frame.frameid || return false - # If done, return and finalize this cycle - no_active_ips_in_callers && return true - # Otherwise, do at least one iteration over the entire current cycle - no_active_ips_in_callers = true - for i = reverse(frame.cycleid:length(callstack)) - caller = callstack[i]::InferenceState - if !isempty(caller.ip) - # Note that `typeinf_local(interp, caller)` can potentially modify the other frames - # `frame.cycleid`, which is why making incremental progress requires the - # outer while loop. - typeinf_local(interp, caller) - no_active_ips_in_callers = false - end - update_valid_age!(caller, frame.valid_worlds) + nextstates = CurrentState[] + takenext = frame.frameid + minwarn = warnlength + takeprev = 0 + while takenext >= frame.frameid + callee = takenext == 0 ? frame : callstack[takenext]::InferenceState + if !isempty(callstack) + if length(callstack) - frame.frameid >= minwarn + topmethod = callstack[1].linfo + topmethod.def isa Method || (topmethod = callstack[2].linfo) + print(Core.stderr, "info: inference of ", topmethod, " exceeding ", length(callstack), " frames (may be slow).\n") + minwarn *= 2 + end + topcallee = (callstack[end]::InferenceState) + if topcallee.cycleid != callee.cycleid + callee = topcallee + takenext = length(callstack) + end + end + nextstateid = takenext + 1 - frame.frameid + while length(nextstates) < nextstateid + push!(nextstates, CurrentState()) + end + if doworkloop(interp, callee) + # First drain the workloop. Note that since some scheduled work doesn't + # affect the result (e.g. cfunction or abstract_call_method on + # get_compileable_sig), but still must be finished up since it may see and + # change the local variables of the InferenceState at currpc, we do this + # even if the nextresult status is already completed. + continue + elseif isdefined(nextstates[nextstateid], :result) || !isempty(callee.ip) + # Next make progress on this frame + prev = length(callee.tasks) + 1 + nextstates[nextstateid] = typeinf_local(interp, callee, nextstates[nextstateid]) + reverse!(callee.tasks, prev) + elseif callee.cycleid == length(callstack) + # With no active ip's and no cycles, frame is done + finish_nocycle(interp, callee) + callee.frameid == 0 && break + takenext = length(callstack) + nextstateid = takenext + 1 - frame.frameid + #@assert length(nextstates) == nextstateid + 1 + #@assert all(i -> !isdefined(nextstates[i], :result), nextstateid+1:length(nextstates)) + resize!(nextstates, nextstateid) + elseif callee.cycleid == callee.frameid + # If the current frame is the top part of a cycle, check if the whole cycle + # is done, and if not, pick the next item to work on. + no_active_ips_in_cycle = true + for i = callee.cycleid:length(callstack) + caller = callstack[i]::InferenceState + @assert caller.cycleid == callee.cycleid + if !isempty(caller.tasks) || isdefined(nextstates[i+1-frame.frameid], :result) || !isempty(caller.ip) + no_active_ips_in_cycle = false + break + end + end + if no_active_ips_in_cycle + finish_cycle(interp, callstack, callee.cycleid) + end + takenext = length(callstack) + nextstateid = takenext + 1 - frame.frameid + if no_active_ips_in_cycle + #@assert all(i -> !isdefined(nextstates[i], :result), nextstateid+1:length(nextstates)) + resize!(nextstates, nextstateid) + else + #@assert length(nextstates) == nextstateid + end + else + # Continue to the next frame in this cycle + takenext = takenext - 1 end end - return true + #@assert all(nextresult -> !isdefined(nextresult, :result), nextstates) + return is_inferred(frame) end diff --git a/base/compiler/inferencestate.jl b/base/compiler/inferencestate.jl index 6953dea5b9bd7..05d95d1d5bdc7 100644 --- a/base/compiler/inferencestate.jl +++ b/base/compiler/inferencestate.jl @@ -251,6 +251,7 @@ mutable struct InferenceState stmt_info::Vector{CallInfo} #= intermediate states for interprocedural abstract interpretation =# + tasks::Vector{WorkThunk} pclimitations::IdSet{InferenceState} # causes of precision restrictions (LimitedAccuracy) on currpc ssavalue limitations::IdSet{InferenceState} # causes of precision restrictions (LimitedAccuracy) on return cycle_backedges::Vector{Tuple{InferenceState, Int}} # call-graph backedges connecting from callee to caller @@ -328,6 +329,7 @@ mutable struct InferenceState limitations = IdSet{InferenceState}() cycle_backedges = Vector{Tuple{InferenceState,Int}}() callstack = AbsIntState[] + tasks = WorkThunk[] valid_worlds = WorldRange(1, get_world_counter()) bestguess = Bottom @@ -351,7 +353,7 @@ mutable struct InferenceState this = new( mi, world, mod, sptypes, slottypes, src, cfg, method_info, currbb, currpc, ip, handler_info, ssavalue_uses, bb_vartables, ssavaluetypes, stmt_edges, stmt_info, - pclimitations, limitations, cycle_backedges, callstack, 0, 0, 0, + tasks, pclimitations, limitations, cycle_backedges, callstack, 0, 0, 0, result, unreachable, valid_worlds, bestguess, exc_bestguess, ipo_effects, restrict_abstract_call_sites, cache_mode, insert_coverage, interp) @@ -800,6 +802,7 @@ mutable struct IRInterpretationState const ssa_refined::BitSet const lazyreachability::LazyCFGReachability valid_worlds::WorldRange + const tasks::Vector{WorkThunk} const edges::Vector{Any} callstack #::Vector{AbsIntState} frameid::Int @@ -825,10 +828,11 @@ mutable struct IRInterpretationState ssa_refined = BitSet() lazyreachability = LazyCFGReachability(ir) valid_worlds = WorldRange(min_world, max_world == typemax(UInt) ? get_world_counter() : max_world) + tasks = WorkThunk[] edges = Any[] callstack = AbsIntState[] return new(method_info, ir, mi, world, curridx, argtypes_refined, ir.sptypes, tpdum, - ssa_refined, lazyreachability, valid_worlds, edges, callstack, 0, 0) + ssa_refined, lazyreachability, valid_worlds, tasks, edges, callstack, 0, 0) end end @@ -870,6 +874,7 @@ function print_callstack(frame::AbsIntState) print(frame_instance(sv)) is_cached(sv) || print(" [uncached]") sv.parentid == idx - 1 || print(" [parent=", sv.parentid, "]") + isempty(callers_in_cycle(sv)) || print(" [cycle=", sv.cycleid, "]") println() @assert sv.frameid == idx end @@ -994,7 +999,10 @@ of the same cycle, only if it is part of a cycle with multiple frames. function callers_in_cycle(sv::InferenceState) callstack = sv.callstack::Vector{AbsIntState} cycletop = cycleid = sv.cycleid - while cycletop < length(callstack) && (callstack[cycletop + 1]::InferenceState).cycleid == cycleid + while cycletop < length(callstack) + frame = callstack[cycletop + 1] + frame isa InferenceState || break + frame.cycleid == cycleid || break cycletop += 1 end return AbsIntCycle(callstack, cycletop == cycleid ? 0 : cycleid, cycletop) @@ -1054,6 +1062,7 @@ function merge_effects!(::AbstractInterpreter, caller::InferenceState, effects:: effects = Effects(effects; effect_free=ALWAYS_TRUE) end caller.ipo_effects = merge_effects(caller.ipo_effects, effects) + nothing end merge_effects!(::AbstractInterpreter, ::IRInterpretationState, ::Effects) = return @@ -1116,3 +1125,90 @@ function get_max_methods_for_module(mod::Module) max_methods < 0 && return nothing return max_methods end + +""" + Future{T} + +Delayed return value for a value of type `T`, similar to RefValue{T}, but +explicitly represents completed as a `Bool` rather than as `isdefined`. +Set once with `f[] = v` and accessed with `f[]` afterwards. + +Can also be constructed with the `completed` flag value and a closure to +produce `x`, as well as the additional arguments to avoid always capturing the +same couple of values. +""" +struct Future{T} + later::Union{Nothing,RefValue{T}} + now::Union{Nothing,T} + Future{T}() where {T} = new{T}(RefValue{T}(), nothing) + Future{T}(x) where {T} = new{T}(nothing, x) + Future(x::T) where {T} = new{T}(nothing, x) +end +isready(f::Future) = f.later === nothing +getindex(f::Future{T}) where {T} = (later = f.later; later === nothing ? f.now::T : later[]) +setindex!(f::Future, v) = something(f.later)[] = v +convert(::Type{Future{T}}, x) where {T} = Future{T}(x) # support return type conversion +convert(::Type{Future{T}}, x::Future) where {T} = x::Future{T} +function Future{T}(f, immediate::Bool, interp::AbstractInterpreter, sv::AbsIntState) where {T} + if immediate + return Future{T}(f(interp, sv)) + else + @assert applicable(f, interp, sv) + result = Future{T}() + push!(sv.tasks, function (interp, sv) + result[] = f(interp, sv) + return true + end) + return result + end +end +function Future{T}(f, prev::Future{S}, interp::AbstractInterpreter, sv::AbsIntState) where {T, S} + later = prev.later + if later === nothing + return Future{T}(f(prev[], interp, sv)) + else + @assert Core._hasmethod(Tuple{Core.Typeof(f), S, typeof(interp), typeof(sv)}) + result = Future{T}() + push!(sv.tasks, function (interp, sv) + result[] = f(later[], interp, sv) # capture just later, instead of all of prev + return true + end) + return result + end +end + + +""" + doworkloop(args...) + +Run a tasks inside the abstract interpreter, returning false if there are none. +Tasks will be run in DFS post-order tree order, such that all child tasks will +be run in the order scheduled, prior to running any subsequent tasks. This +allows tasks to generate more child tasks, which will be run before anything else. +Each task will be run repeatedly when returning `false`, until it returns `true`. +""" +function doworkloop(interp::AbstractInterpreter, sv::AbsIntState) + tasks = sv.tasks + prev = length(tasks) + prev == 0 && return false + task = pop!(tasks) + completed = task(interp, sv) + tasks = sv.tasks # allow dropping gc root over the previous call + completed isa Bool || throw(TypeError(:return, "", Bool, task)) # print the task on failure as part of the error message, instead of just "@ workloop:line" + completed || push!(tasks, task) + # efficient post-order visitor: items pushed are executed in reverse post order such + # that later items are executed before earlier ones, but are fully executed + # (including any dependencies scheduled by them) before going on to the next item + reverse!(tasks, #=start=#prev) + return true +end + + +#macro workthunk(name::Symbol, body) +# name = esc(name) +# body = esc(body) +# return replace_linenums!( +# :(function $name($(esc(interp)), $(esc(sv))) +# $body +# end), __source__) +#end diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index 960da88ddffc8..fdcb4621c5c0f 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -1432,6 +1432,7 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr elseif isa(stmt, OldSSAValue) ssa_rename[idx] = ssa_rename[stmt.id] elseif isa(stmt, GotoNode) && cfg_transforms_enabled + stmt.label < 0 && (println(stmt); println(compact)) label = bb_rename_succ[stmt.label] @assert label > 0 ssa_rename[idx] = SSAValue(result_idx) diff --git a/base/compiler/ssair/irinterp.jl b/base/compiler/ssair/irinterp.jl index 1aeb87accbcd7..ca8ca770df413 100644 --- a/base/compiler/ssair/irinterp.jl +++ b/base/compiler/ssair/irinterp.jl @@ -51,8 +51,11 @@ end function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, irsv::IRInterpretationState) si = StmtInfo(true) # TODO better job here? - call = abstract_call(interp, arginfo, si, irsv) - irsv.ir.stmts[irsv.curridx][:info] = call.info + call = abstract_call(interp, arginfo, si, irsv)::Future + Future{Nothing}(call, interp, irsv) do call, interp, irsv + irsv.ir.stmts[irsv.curridx][:info] = call.info + nothing + end return call end @@ -143,7 +146,19 @@ function reprocess_instruction!(interp::AbstractInterpreter, inst::Instruction, head = stmt.head if (head === :call || head === :foreigncall || head === :new || head === :splatnew || head === :static_parameter || head === :isdefined || head === :boundscheck) - (; rt, effects) = abstract_eval_statement_expr(interp, stmt, nothing, irsv) + @assert isempty(irsv.tasks) # TODO: this whole function needs to be converted to a stackless design to be a valid AbsIntState, but this should work here for now + result = abstract_eval_statement_expr(interp, stmt, nothing, irsv) + reverse!(irsv.tasks) + while true + if length(irsv.callstack) > irsv.frameid + typeinf(interp, irsv.callstack[irsv.frameid + 1]) + elseif !doworkloop(interp, irsv) + break + end + end + @assert length(irsv.callstack) == irsv.frameid && isempty(irsv.tasks) + result isa Future && (result = result[]) + (; rt, effects) = result add_flag!(inst, flags_for_effects(effects)) elseif head === :invoke rt, (nothrow, noub) = abstract_eval_invoke_inst(interp, inst, irsv) @@ -293,7 +308,7 @@ function is_all_const_call(@nospecialize(stmt), interp::AbstractInterpreter, irs return true end -function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IRInterpretationState; +function ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IRInterpretationState; externally_refined::Union{Nothing,BitSet} = nothing) (; ir, tpdum, ssa_refined) = irsv @@ -449,18 +464,3 @@ function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IR return Pair{Any,Tuple{Bool,Bool}}(maybe_singleton_const(ultimate_rt), (nothrow, noub)) end - -function ir_abstract_constant_propagation(interp::NativeInterpreter, irsv::IRInterpretationState) - if __measure_typeinf__[] - inf_frame = Timings.InferenceFrameInfo(irsv.mi, irsv.world, VarState[], Any[], length(irsv.ir.argtypes)) - Timings.enter_new_timer(inf_frame) - ret = _ir_abstract_constant_propagation(interp, irsv) - append!(inf_frame.slottypes, irsv.ir.argtypes) - Timings.exit_current_timer(inf_frame) - return ret - else - return _ir_abstract_constant_propagation(interp, irsv) - end -end -ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IRInterpretationState) = - _ir_abstract_constant_propagation(interp, irsv) diff --git a/base/compiler/ssair/verify.jl b/base/compiler/ssair/verify.jl index a4286177e93a4..268991282c483 100644 --- a/base/compiler/ssair/verify.jl +++ b/base/compiler/ssair/verify.jl @@ -1,9 +1,11 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license function maybe_show_ir(ir::IRCode) - if isdefined(Core, :Main) + if isdefined(Core, :Main) && isdefined(Core.Main, :Base) # ensure we use I/O that does not yield, as this gets called during compilation invokelatest(Core.Main.Base.show, Core.stdout, "text/plain", ir) + else + Core.show(ir) end end @@ -25,6 +27,7 @@ is_toplevel_expr_head(head::Symbol) = head === :global || head === :method || he is_value_pos_expr_head(head::Symbol) = head === :static_parameter function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int, use_idx::Int, printed_use_idx::Int, print::Bool, isforeigncall::Bool, arg_idx::Int, allow_frontend_forms::Bool) if isa(op, SSAValue) + op.id > 0 || @verify_error "Def ($(op.id)) is invalid in final IR" if op.id > length(ir.stmts) def_bb = block_for_inst(ir.cfg, ir.new_nodes.info[op.id - length(ir.stmts)].pos) else diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl index cc8ba227bd088..a6b7e53c6f320 100644 --- a/base/compiler/tfuncs.jl +++ b/base/compiler/tfuncs.jl @@ -1383,10 +1383,10 @@ end nargs = length(argtypes) if !isempty(argtypes) && isvarargtype(argtypes[nargs]) - nargs - 1 <= maxargs || return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()) - nargs + 1 >= op_argi || return CallMeta(Any, Any, Effects(), NoCallInfo()) + nargs - 1 <= maxargs || return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())) + nargs + 1 >= op_argi || return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) else - minargs <= nargs <= maxargs || return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()) + minargs <= nargs <= maxargs || return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())) end 𝕃ᵢ = typeinf_lattice(interp) if ff === modifyfield! @@ -1417,15 +1417,22 @@ end op = unwrapva(argtypes[op_argi]) v = unwrapva(argtypes[v_argi]) callinfo = abstract_call(interp, ArgInfo(nothing, Any[op, TF, v]), StmtInfo(true), sv, #=max_methods=#1) - TF2 = tmeet(callinfo.rt, widenconst(TF)) - if TF2 === Bottom - RT = Bottom - elseif isconcretetype(RT) && has_nontrivial_extended_info(𝕃ᵢ, TF2) # isconcrete condition required to form a PartialStruct - RT = PartialStruct(RT, Any[TF, TF2]) + TF = Core.Box(TF) + RT = Core.Box(RT) + return Future{CallMeta}(callinfo, interp, sv) do callinfo, interp, sv + TF = TF.contents + RT = RT.contents + TF2 = tmeet(callinfo.rt, widenconst(TF)) + if TF2 === Bottom + RT = Bottom + elseif isconcretetype(RT) && has_nontrivial_extended_info(𝕃ᵢ, TF2) # isconcrete condition required to form a PartialStruct + RT = PartialStruct(RT, Any[TF, TF2]) + end + info = ModifyOpInfo(callinfo.info) + return CallMeta(RT, Any, Effects(), info) end - info = ModifyOpInfo(callinfo.info) end - return CallMeta(RT, Any, Effects(), info) + return Future(CallMeta(RT, Any, Effects(), info)) end # we could use tuple_tfunc instead of widenconst, but `o` is mutable, so that is unlikely to be beneficial @@ -2895,17 +2902,17 @@ end function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, si::StmtInfo, sv::AbsIntState) UNKNOWN = CallMeta(Type, Any, Effects(EFFECTS_THROWS; nortcall=false), NoCallInfo()) if !(2 <= length(argtypes) <= 3) - return UNKNOWN + return Future(UNKNOWN) end tt = widenslotwrapper(argtypes[end]) if !isa(tt, Const) && !(isType(tt) && !has_free_typevars(tt)) - return UNKNOWN + return Future(UNKNOWN) end af_argtype = isa(tt, Const) ? tt.val : (tt::DataType).parameters[1] if !isa(af_argtype, DataType) || !(af_argtype <: Tuple) - return UNKNOWN + return Future(UNKNOWN) end if length(argtypes) == 3 @@ -2918,7 +2925,7 @@ function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, s end if !(isa(aft, Const) || (isType(aft) && !has_free_typevars(aft)) || (isconcretetype(aft) && !(aft <: Builtin) && !iskindtype(aft))) - return UNKNOWN + return Future(UNKNOWN) end # effects are not an issue if we know this statement will get removed, but if it does not get removed, @@ -2926,7 +2933,7 @@ function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, s RT_CALL_EFFECTS = Effects(EFFECTS_TOTAL; nortcall=false) if contains_is(argtypes_vec, Union{}) - return CallMeta(Const(Union{}), Union{}, RT_CALL_EFFECTS, NoCallInfo()) + return Future(CallMeta(Const(Union{}), Union{}, RT_CALL_EFFECTS, NoCallInfo())) end # Run the abstract_call without restricting abstract call @@ -2935,42 +2942,45 @@ function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, s if isa(sv, InferenceState) old_restrict = sv.restrict_abstract_call_sites sv.restrict_abstract_call_sites = false - call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, #=max_methods=#-1) - sv.restrict_abstract_call_sites = old_restrict - else - call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, #=max_methods=#-1) - end - info = verbose_stmt_info(interp) ? MethodResultPure(ReturnTypeCallInfo(call.info)) : MethodResultPure() - rt = widenslotwrapper(call.rt) - if isa(rt, Const) - # output was computed to be constant - return CallMeta(Const(typeof(rt.val)), Union{}, RT_CALL_EFFECTS, info) - end - rt = widenconst(rt) - if rt === Bottom || (isconcretetype(rt) && !iskindtype(rt)) - # output cannot be improved so it is known for certain - return CallMeta(Const(rt), Union{}, RT_CALL_EFFECTS, info) - elseif isa(sv, InferenceState) && !isempty(sv.pclimitations) - # conservatively express uncertainty of this result - # in two ways: both as being a subtype of this, and - # because of LimitedAccuracy causes - return CallMeta(Type{<:rt}, Union{}, RT_CALL_EFFECTS, info) - elseif isa(tt, Const) || isconstType(tt) - # input arguments were known for certain - # XXX: this doesn't imply we know anything about rt - return CallMeta(Const(rt), Union{}, RT_CALL_EFFECTS, info) - elseif isType(rt) - return CallMeta(Type{rt}, Union{}, RT_CALL_EFFECTS, info) - else - return CallMeta(Type{<:rt}, Union{}, RT_CALL_EFFECTS, info) + end + call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, #=max_methods=#-1) + tt = Core.Box(tt) + return Future{CallMeta}(call, interp, sv) do call, interp, sv + if isa(sv, InferenceState) + sv.restrict_abstract_call_sites = old_restrict + end + info = verbose_stmt_info(interp) ? MethodResultPure(ReturnTypeCallInfo(call.info)) : MethodResultPure() + rt = widenslotwrapper(call.rt) + if isa(rt, Const) + # output was computed to be constant + return CallMeta(Const(typeof(rt.val)), Union{}, RT_CALL_EFFECTS, info) + end + rt = widenconst(rt) + if rt === Bottom || (isconcretetype(rt) && !iskindtype(rt)) + # output cannot be improved so it is known for certain + return CallMeta(Const(rt), Union{}, RT_CALL_EFFECTS, info) + elseif isa(sv, InferenceState) && !isempty(sv.pclimitations) + # conservatively express uncertainty of this result + # in two ways: both as being a subtype of this, and + # because of LimitedAccuracy causes + return CallMeta(Type{<:rt}, Union{}, RT_CALL_EFFECTS, info) + elseif isa(tt.contents, Const) || isconstType(tt.contents) + # input arguments were known for certain + # XXX: this doesn't imply we know anything about rt + return CallMeta(Const(rt), Union{}, RT_CALL_EFFECTS, info) + elseif isType(rt) + return CallMeta(Type{rt}, Union{}, RT_CALL_EFFECTS, info) + else + return CallMeta(Type{<:rt}, Union{}, RT_CALL_EFFECTS, info) + end end end # a simplified model of abstract_call_gf_by_type for applicable function abstract_applicable(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::AbsIntState, max_methods::Int) - length(argtypes) < 2 && return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()) - isvarargtype(argtypes[2]) && return CallMeta(Bool, Any, EFFECTS_THROWS, NoCallInfo()) + length(argtypes) < 2 && return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())) + isvarargtype(argtypes[2]) && return Future(CallMeta(Bool, Any, EFFECTS_THROWS, NoCallInfo())) argtypes = argtypes[2:end] atype = argtypes_to_type(argtypes) matches = find_method_matches(interp, argtypes, atype; max_methods) @@ -2997,7 +3007,7 @@ function abstract_applicable(interp::AbstractInterpreter, argtypes::Vector{Any}, # added that did not intersect with any existing method add_uncovered_edges!(sv, matches, atype) end - return CallMeta(rt, Union{}, EFFECTS_TOTAL, NoCallInfo()) + return Future(CallMeta(rt, Union{}, EFFECTS_TOTAL, NoCallInfo())) end add_tfunc(applicable, 1, INT_INF, @nospecs((𝕃::AbstractLattice, f, args...)->Bool), 40) diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl index 315a068e611fe..77a2e02129ce4 100644 --- a/base/compiler/typeinfer.jl +++ b/base/compiler/typeinfer.jl @@ -56,7 +56,7 @@ end Timing(mi_info, start_time, cur_start_time, time, children) = Timing(mi_info, start_time, cur_start_time, time, children, nothing) Timing(mi_info, start_time) = Timing(mi_info, start_time, start_time, UInt64(0), Timing[]) -_time_ns() = ccall(:jl_hrtime, UInt64, ()) # Re-implemented here because Base not yet available. +_time_ns() = ccall(:jl_hrtime, UInt64, ()) # We keep a stack of the Timings for each of the MethodInstances currently being timed. # Since type inference currently operates via a depth-first search (during abstract @@ -77,114 +77,14 @@ const ROOTmi = Core.Compiler.specialize_method( Empty out the previously recorded type inference timings (`Core.Compiler._timings`), and start the ROOT() timer again. `ROOT()` measures all time spent _outside_ inference. """ -function reset_timings() - empty!(_timings) - push!(_timings, Timing( - # The MethodInstance for ROOT(), and default empty values for other fields. - InferenceFrameInfo(ROOTmi, 0x0, Core.Compiler.VarState[], Any[Core.Const(ROOT)], 1), - _time_ns())) - return nothing -end -reset_timings() - -# (This is split into a function so that it can be called both in this module, at the top -# of `enter_new_timer()`, and once at the Very End of the operation, by whoever started -# the operation and called `reset_timings()`.) -# NOTE: the @inline annotations here are not to make it faster, but to reduce the gap between -# timer manipulations and the tasks we're timing. -@inline function close_current_timer() - stop_time = _time_ns() - parent_timer = _timings[end] - accum_time = stop_time - parent_timer.cur_start_time - - # Add in accum_time ("modify" the immutable struct) - @inbounds begin - _timings[end] = Timing( - parent_timer.mi_info, - parent_timer.start_time, - parent_timer.cur_start_time, - parent_timer.time + accum_time, - parent_timer.children, - parent_timer.bt, - ) - end - return nothing -end - -@inline function enter_new_timer(frame) - # Very first thing, stop the active timer: get the current time and add in the - # time since it was last started to its aggregate exclusive time. - close_current_timer() - - mi_info = _typeinf_identifier(frame) - - # Start the new timer right before returning - push!(_timings, Timing(mi_info, UInt64(0))) - len = length(_timings) - new_timer = @inbounds _timings[len] - # Set the current time _after_ appending the node, to try to exclude the - # overhead from measurement. - start = _time_ns() - - @inbounds begin - _timings[len] = Timing( - new_timer.mi_info, - start, - start, - new_timer.time, - new_timer.children, - ) - end - - return nothing -end - -# _expected_frame_ is not needed within this function; it is used in the `@assert`, to -# assert that indeed we are always returning to a parent after finishing all of its -# children (that is, asserting that inference proceeds via depth-first-search). -@inline function exit_current_timer(_expected_frame_) - # Finish the new timer - stop_time = _time_ns() - - expected_mi_info = _typeinf_identifier(_expected_frame_) - - # Grab the new timer again because it might have been modified in _timings - # (since it's an immutable struct) - # And remove it from the current timings stack - new_timer = pop!(_timings) - Core.Compiler.@assert new_timer.mi_info.mi === expected_mi_info.mi - - # Prepare to unwind one level of the stack and record in the parent - parent_timer = _timings[end] - - accum_time = stop_time - new_timer.cur_start_time - # Add in accum_time ("modify" the immutable struct) - new_timer = Timing( - new_timer.mi_info, - new_timer.start_time, - new_timer.cur_start_time, - new_timer.time + accum_time, - new_timer.children, - parent_timer.mi_info.mi === ROOTmi ? backtrace() : nothing, - ) - # Record the final timing with the original parent timer - push!(parent_timer.children, new_timer) - - # And finally restart the parent timer: - len = length(_timings) - @inbounds begin - _timings[len] = Timing( - parent_timer.mi_info, - parent_timer.start_time, - _time_ns(), - parent_timer.time, - parent_timer.children, - parent_timer.bt, - ) - end - - return nothing -end +function reset_timings() end +push!(_timings, Timing( + # The MethodInstance for ROOT(), and default empty values for other fields. + InferenceFrameInfo(ROOTmi, 0x0, Core.Compiler.VarState[], Any[Core.Const(ROOT)], 1), + _time_ns())) +function close_current_timer() end +function enter_new_timer(frame) end +function exit_current_timer(_expected_frame_) end end # module Timings @@ -194,19 +94,7 @@ end # module Timings If set to `true`, record per-method-instance timings within type inference in the Compiler. """ __set_measure_typeinf(onoff::Bool) = __measure_typeinf__[] = onoff -const __measure_typeinf__ = fill(false) - -# Wrapper around `_typeinf` that optionally records the exclusive time for each invocation. -function typeinf(interp::AbstractInterpreter, frame::InferenceState) - if __measure_typeinf__[] - Timings.enter_new_timer(frame) - v = _typeinf(interp, frame) - Timings.exit_current_timer(frame) - return v - else - return _typeinf(interp, frame) - end -end +const __measure_typeinf__ = RefValue{Bool}(false) function finish!(interp::AbstractInterpreter, caller::InferenceState; can_discard_trees::Bool=may_discard_trees(interp)) @@ -258,19 +146,6 @@ function finish!(interp::AbstractInterpreter, caller::InferenceState; return nothing end -function _typeinf(interp::AbstractInterpreter, frame::InferenceState) - typeinf_nocycle(interp, frame) || return false # frame is now part of a higher cycle - # with no active ip's, frame is done - frames = frame.callstack::Vector{AbsIntState} - if length(frames) == frame.cycleid - finish_nocycle(interp, frame) - else - @assert frame.cycleid != 0 - finish_cycle(interp, frames, frame.cycleid) - end - return true -end - function finish_nocycle(::AbstractInterpreter, frame::InferenceState) finishinfer!(frame, frame.interp) opt = frame.result.src @@ -762,16 +637,11 @@ function merge_call_chain!(interp::AbstractInterpreter, parent::InferenceState, add_cycle_backedge!(parent, child) parent.cycleid === ancestorid && break child = parent - parent = frame_parent(child) - while !isa(parent, InferenceState) - # XXX we may miss some edges here? - parent = frame_parent(parent::IRInterpretationState) - end + parent = frame_parent(child)::InferenceState end # ensure that walking the callstack has the same cycleid (DAG) for frame = reverse(ancestorid:length(frames)) - frame = frames[frame] - frame isa InferenceState || continue + frame = frames[frame]::InferenceState frame.cycleid == ancestorid && break @assert frame.cycleid > ancestorid frame.cycleid = ancestorid @@ -796,9 +666,9 @@ end # returned instead. function resolve_call_cycle!(interp::AbstractInterpreter, mi::MethodInstance, parent::AbsIntState) # TODO (#48913) implement a proper recursion handling for irinterp: - # This works just because currently the `:terminate` condition guarantees that - # irinterp doesn't fail into unresolved cycles, but it's not a good solution. - # We should revisit this once we have a better story for handling cycles in irinterp. + # This works currently just because the irinterp code doesn't get used much with + # `@assume_effects`, so it never sees a cycle normally, but that may not be a sustainable solution. + parent isa InferenceState || return false frames = parent.callstack::Vector{AbsIntState} uncached = false for frame = reverse(1:length(frames)) @@ -837,15 +707,43 @@ struct EdgeCallResult end # return cached result of regular inference -function return_cached_result(::AbstractInterpreter, codeinst::CodeInstance, caller::AbsIntState) +function return_cached_result(interp::AbstractInterpreter, method::Method, codeinst::CodeInstance, caller::AbsIntState, edgecycle::Bool, edgelimited::Bool) rt = cached_return_type(codeinst) effects = ipo_effects(codeinst) update_valid_age!(caller, WorldRange(min_world(codeinst), max_world(codeinst))) - return EdgeCallResult(rt, codeinst.exctype, codeinst.def, effects) + return Future(EdgeCall_to_MethodCall_Result(interp, caller, method, EdgeCallResult(rt, codeinst.exctype, codeinst.def, effects), edgecycle, edgelimited)) +end + +function EdgeCall_to_MethodCall_Result(interp::AbstractInterpreter, sv::AbsIntState, method::Method, result::EdgeCallResult, edgecycle::Bool, edgelimited::Bool) + (; rt, exct, edge, effects, volatile_inf_result) = result + + if edge === nothing + edgecycle = edgelimited = true + end + + # we look for the termination effect override here as well, since the :terminates effect + # may have been tainted due to recursion at this point even if it's overridden + if is_effect_overridden(sv, :terminates_globally) + # this frame is known to terminate + effects = Effects(effects, terminates=true) + elseif is_effect_overridden(method, :terminates_globally) + # this edge is known to terminate + effects = Effects(effects; terminates=true) + elseif edgecycle + # Some sort of recursion was detected. + if edge !== nothing && !edgelimited && !is_edge_recursed(edge, sv) + # no `MethodInstance` cycles -- don't taint :terminate + else + # we cannot guarantee that the call will terminate + effects = Effects(effects; terminates=false) + end + end + + return MethodCallResult(rt, exct, edgecycle, edgelimited, edge, effects, volatile_inf_result) end # compute (and cache) an inferred AST and return the current best estimate of the result type -function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, caller::AbsIntState) +function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, caller::AbsIntState, edgecycle::Bool, edgelimited::Bool) mi = specialize_method(method, atype, sparams)::MethodInstance cache_mode = CACHE_MODE_GLOBAL # cache edge targets globally by default force_inline = is_stmt_inline(get_curr_ssaflag(caller)) @@ -859,13 +757,13 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize cache_mode = CACHE_MODE_VOLATILE else @assert codeinst.def === mi "MethodInstance for cached edge does not match" - return return_cached_result(interp, codeinst, caller) + return return_cached_result(interp, method, codeinst, caller, edgecycle, edgelimited) end end end if ccall(:jl_get_module_infer, Cint, (Any,), method.module) == 0 && !generating_output(#=incremental=#false) add_remark!(interp, caller, "[typeinf_edge] Inference is disabled for the target module") - return EdgeCallResult(Any, Any, nothing, Effects()) + return Future(EdgeCall_to_MethodCall_Result(interp, caller, method, EdgeCallResult(Any, Any, nothing, Effects()), edgecycle, edgelimited)) end if !is_cached(caller) && frame_parent(caller) === nothing # this caller exists to return to the user @@ -886,7 +784,7 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize cache_mode = CACHE_MODE_VOLATILE else @assert codeinst.def === mi "MethodInstance for cached edge does not match" - return return_cached_result(interp, codeinst, caller) + return return_cached_result(interp, method, codeinst, caller, edgecycle, edgelimited) end end end @@ -902,31 +800,40 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize if cache_mode == CACHE_MODE_GLOBAL engine_reject(interp, ci) end - return EdgeCallResult(Any, Any, nothing, Effects()) + return Future(EdgeCall_to_MethodCall_Result(interp, caller, method, EdgeCallResult(Any, Any, nothing, Effects()), edgecycle, edgelimited)) end assign_parentchild!(frame, caller) - typeinf(interp, frame) - update_valid_age!(caller, frame.valid_worlds) - isinferred = is_inferred(frame) - edge = isinferred ? mi : nothing - effects = isinferred ? frame.result.ipo_effects : # effects are adjusted already within `finish` for ipo_effects - adjust_effects(effects_for_cycle(frame.ipo_effects), method) - exc_bestguess = refine_exception_type(frame.exc_bestguess, effects) - # propagate newly inferred source to the inliner, allowing efficient inlining w/o deserialization: - # note that this result is cached globally exclusively, so we can use this local result destructively - volatile_inf_result = isinferred ? VolatileInferenceResult(result) : nothing - return EdgeCallResult(frame.bestguess, exc_bestguess, edge, effects, volatile_inf_result) + # the actual inference task for this edge is going to be scheduled within `typeinf_local` via the callstack queue + # while splitting off the rest of the work for this caller into a separate workq thunk + let mresult = Future{MethodCallResult}() + push!(caller.tasks, function get_infer_result(interp, caller) + update_valid_age!(caller, frame.valid_worlds) + local isinferred = is_inferred(frame) + local edge = isinferred ? mi : nothing + local effects = isinferred ? frame.result.ipo_effects : # effects are adjusted already within `finish` for ipo_effects + adjust_effects(effects_for_cycle(frame.ipo_effects), method) + local exc_bestguess = refine_exception_type(frame.exc_bestguess, effects) + # propagate newly inferred source to the inliner, allowing efficient inlining w/o deserialization: + # note that this result is cached globally exclusively, so we can use this local result destructively + local volatile_inf_result = isinferred ? VolatileInferenceResult(result) : nothing + local edgeresult = EdgeCallResult(frame.bestguess, exc_bestguess, edge, effects, volatile_inf_result) + mresult[] = EdgeCall_to_MethodCall_Result(interp, caller, method, edgeresult, edgecycle, edgelimited) + return true + end) + return mresult + end elseif frame === true # unresolvable cycle add_remark!(interp, caller, "[typeinf_edge] Unresolvable cycle") - return EdgeCallResult(Any, Any, nothing, Effects()) + return Future(EdgeCall_to_MethodCall_Result(interp, caller, method, EdgeCallResult(Any, Any, nothing, Effects()), edgecycle, edgelimited)) end # return the current knowledge about this cycle frame = frame::InferenceState update_valid_age!(caller, frame.valid_worlds) effects = adjust_effects(effects_for_cycle(frame.ipo_effects), method) exc_bestguess = refine_exception_type(frame.exc_bestguess, effects) - return EdgeCallResult(frame.bestguess, exc_bestguess, nothing, effects) + edgeresult = EdgeCallResult(frame.bestguess, exc_bestguess, nothing, effects) + return Future(EdgeCall_to_MethodCall_Result(interp, caller, method, edgeresult, edgecycle, edgelimited)) end # The `:terminates` effect bit must be conservatively tainted unless recursion cycle has diff --git a/base/compiler/types.jl b/base/compiler/types.jl index b475e360dac02..c51785f23ea29 100644 --- a/base/compiler/types.jl +++ b/base/compiler/types.jl @@ -1,4 +1,12 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license +# + +const WorkThunk = Any +# #@eval struct WorkThunk +# thunk::Core.OpaqueClosure{Tuple{Vector{Tasks}}, Bool} +# WorkThunk(work) = new($(Expr(:opaque_closure, :(Tuple{Vector{Tasks}}), :Bool, :Bool, :((tasks) -> work(tasks))))) # @opaque Vector{Tasks}->Bool (tasks)->work(tasks) +# end +# (p::WorkThunk)() = p.thunk() """ AbstractInterpreter diff --git a/base/reflection.jl b/base/reflection.jl index fe48b6f9aa6b9..df29b9a5b1a4e 100644 --- a/base/reflection.jl +++ b/base/reflection.jl @@ -2447,7 +2447,7 @@ true ``` """ function hasmethod(@nospecialize(f), @nospecialize(t)) - return Core._hasmethod(f, t isa Type ? t : to_tuple_type(t)) + return Core._hasmethod(signature_type(f, t)) end function Core.kwcall(kwargs::NamedTuple, ::typeof(hasmethod), @nospecialize(f), @nospecialize(t)) diff --git a/test/compiler/AbstractInterpreter.jl b/test/compiler/AbstractInterpreter.jl index bab4fe02a5168..009128b289ade 100644 --- a/test/compiler/AbstractInterpreter.jl +++ b/test/compiler/AbstractInterpreter.jl @@ -415,10 +415,13 @@ function CC.abstract_call(interp::NoinlineInterpreter, arginfo::CC.ArgInfo, si::CC.StmtInfo, sv::CC.InferenceState, max_methods::Int) ret = @invoke CC.abstract_call(interp::CC.AbstractInterpreter, arginfo::CC.ArgInfo, si::CC.StmtInfo, sv::CC.InferenceState, max_methods::Int) - if sv.mod in noinline_modules(interp) - return CC.CallMeta(ret.rt, ret.exct, ret.effects, NoinlineCallInfo(ret.info)) + return CC.Future{CC.CallMeta}(ret, interp, sv) do ret, interp, sv + if sv.mod in noinline_modules(interp) + (;rt, exct, effects, info) = ret + return CC.CallMeta(rt, exct, effects, NoinlineCallInfo(info)) + end + return ret end - return ret end function CC.src_inlining_policy(interp::NoinlineInterpreter, @nospecialize(src), @nospecialize(info::CallInfo), stmt_flag::UInt32) diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl index 46009e0790942..7c7726413004a 100644 --- a/test/compiler/inference.jl +++ b/test/compiler/inference.jl @@ -3887,113 +3887,6 @@ f_apply_cglobal(args...) = cglobal(args...) f37532(T, x) = (Core.bitcast(Ptr{T}, x); x) @test Base.return_types(f37532, Tuple{Any, Int}) == Any[Int] -# PR #37749 -# Helper functions for Core.Compiler.Timings. These are normally accessed via a package - -# usually (SnoopCompileCore). -function time_inference(f) - Core.Compiler.Timings.reset_timings() - Core.Compiler.__set_measure_typeinf(true) - f() - Core.Compiler.__set_measure_typeinf(false) - Core.Compiler.Timings.close_current_timer() - return Core.Compiler.Timings._timings[1] -end -function depth(t::Core.Compiler.Timings.Timing) - maximum(depth.(t.children), init=0) + 1 -end -function flatten_times(t::Core.Compiler.Timings.Timing) - collect(Iterators.flatten([(t.time => t.mi_info,), flatten_times.(t.children)...])) -end -# Some very limited testing of timing the type inference (#37749). -@testset "Core.Compiler.Timings" begin - # Functions that call each other - @eval module M1 - i(x) = x+5 - i2(x) = x+2 - h(a::Array) = i2(a[1]::Integer) + i(a[1]::Integer) + 2 - g(y::Integer, x) = h(Any[y]) + Int(x) - end - timing1 = time_inference() do - @eval M1.g(2, 3.0) - end - @test occursin(r"Core.Compiler.Timings.Timing\(InferenceFrameInfo for Core.Compiler.Timings.ROOT\(\)\) with \d+ children", sprint(show, timing1)) - # The last two functions to be inferred should be `i` and `i2`, inferred at runtime with - # their concrete types. - @test sort([mi_info.mi.def.name for (time,mi_info) in flatten_times(timing1)[end-1:end]]) == [:i, :i2] - @test all(child->isa(child.bt, Vector), timing1.children) - @test all(child->child.bt===nothing, timing1.children[1].children) - # Test the stacktrace - @test isa(stacktrace(timing1.children[1].bt), Vector{Base.StackTraces.StackFrame}) - # Test that inference has cached some of the Method Instances - timing2 = time_inference() do - @eval M1.g(2, 3.0) - end - @test length(flatten_times(timing2)) < length(flatten_times(timing1)) - # Printing of InferenceFrameInfo for mi.def isa Module - @eval module M2 - i(x) = x+5 - i2(x) = x+2 - h(a::Array) = i2(a[1]::Integer) + i(a[1]::Integer) + 2 - g(y::Integer, x) = h(Any[y]) + Int(x) - end - # BEGIN LINE NUMBER SENSITIVITY (adjust the line offset below as needed) - timingmod = time_inference() do - @eval @testset "Outer" begin - @testset "Inner" begin - for i = 1:2 M2.g(2, 3.0) end - end - end - end - @test occursin("thunk from $(@__MODULE__) starting at $(@__FILE__):$((@__LINE__) - 6)", string(timingmod.children)) - # END LINE NUMBER SENSITIVITY - - # Recursive function - @eval module _Recursive f(n::Integer) = n == 0 ? 0 : f(n-1) + 1 end - timing = time_inference() do - @eval _Recursive.f(Base.inferencebarrier(5)) - end - @test 2 <= depth(timing) <= 3 # root -> f (-> +) - @test 2 <= length(flatten_times(timing)) <= 3 # root, f, + - - # Functions inferred with multiple constants - @eval module C - i(x) = x === 0 ? 0 : 1 / x - a(x) = i(0) * i(x) - b() = i(0) * i(1) * i(0) - function loopc(n) - s = 0 - for i = 1:n - s += i - end - return s - end - call_loopc() = loopc(5) - myfloor(::Type{T}, x) where T = floor(T, x) - d(x) = myfloor(Int16, x) - end - timing = time_inference() do - @eval C.a(2) - @eval C.b() - @eval C.call_loopc() - @eval C.d(3.2) - end - ft = flatten_times(timing) - @test !isempty(ft) - str = sprint(show, ft) - @test occursin("InferenceFrameInfo for /(1::$Int, ::$Int)", str) # inference constants - @test occursin("InferenceFrameInfo for Core.Compiler.Timings.ROOT()", str) # qualified - # loopc has internal slots, check constant printing in this case - sel = filter(ti -> ti.second.mi.def.name === :loopc, ft) - ifi = sel[end].second - @test length(ifi.slottypes) > ifi.nargs - str = sprint(show, sel) - @test occursin("InferenceFrameInfo for $(@__MODULE__).C.loopc(5::$Int)", str) - # check that types aren't double-printed as `T::Type{T}` - sel = filter(ti -> ti.second.mi.def.name === :myfloor, ft) - str = sprint(show, sel) - @test occursin("InferenceFrameInfo for $(@__MODULE__).C.myfloor(::Type{Int16}, ::Float64)", str) -end - # issue #37638 @test only(Base.return_types(() -> (nothing, Any[]...)[2])) isa Type From 1bd610f9ab9dd6e2145d1731c0fb8f7e84208876 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Tue, 1 Oct 2024 13:26:56 +0900 Subject: [PATCH 23/45] optimizer: simplify the finalizer inlining pass a bit (#55934) Minor adjustments have been made to the algorithm of the finalizer inlining pass. Previously, it required that the finalizer registration dominate all uses, but this is not always necessary as far as the finalizer inlining point dominates all the uses. So the check has been relaxed. Other minor fixes have been made as well, but their importance is low. --- base/compiler/optimize.jl | 2 +- base/compiler/ssair/inlining.jl | 1 - base/compiler/ssair/passes.jl | 103 +++++++++++++------------------- test/compiler/inline.jl | 2 - 4 files changed, 42 insertions(+), 66 deletions(-) diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl index 6b0cf981930ad..1971b47323f5d 100644 --- a/base/compiler/optimize.jl +++ b/base/compiler/optimize.jl @@ -647,7 +647,7 @@ end function refine_effects!(interp::AbstractInterpreter, sv::PostOptAnalysisState) if !is_effect_free(sv.result.ipo_effects) && sv.all_effect_free && !isempty(sv.ea_analysis_pending) ir = sv.ir - nargs = length(ir.argtypes) + nargs = let def = sv.result.linfo.def; isa(def, Method) ? Int(def.nargs) : 0; end estate = EscapeAnalysis.analyze_escapes(ir, nargs, optimizer_lattice(interp), GetNativeEscapeCache(interp)) argescapes = EscapeAnalysis.ArgEscapeCache(estate) stack_analysis_result!(sv.result, argescapes) diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl index 727e015b67062..9f250b156cd2f 100644 --- a/base/compiler/ssair/inlining.jl +++ b/base/compiler/ssair/inlining.jl @@ -1597,7 +1597,6 @@ function handle_finalizer_call!(ir::IRCode, idx::Int, stmt::Expr, info::Finalize push!(stmt.args, item1.invoke) elseif isa(item1, ConstantCase) push!(stmt.args, nothing) - push!(stmt.args, item1.val) end end return nothing diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index 37d79e2bd7b0c..3981f7382d707 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -1564,10 +1564,12 @@ end is_nothrow(ir::IRCode, ssa::SSAValue) = has_flag(ir[ssa], IR_FLAG_NOTHROW) -function reachable_blocks(cfg::CFG, from_bb::Int, to_bb::Union{Nothing,Int} = nothing) +function reachable_blocks(cfg::CFG, from_bb::Int, to_bb::Int) worklist = Int[from_bb] visited = BitSet(from_bb) - if to_bb !== nothing + if to_bb == from_bb + return visited + else push!(visited, to_bb) end function visit!(bb::Int) @@ -1582,100 +1584,78 @@ function reachable_blocks(cfg::CFG, from_bb::Int, to_bb::Union{Nothing,Int} = no return visited end -function try_resolve_finalizer!(ir::IRCode, idx::Int, finalizer_idx::Int, defuse::SSADefUse, +function try_resolve_finalizer!(ir::IRCode, alloc_idx::Int, finalizer_idx::Int, defuse::SSADefUse, inlining::InliningState, lazydomtree::LazyDomtree, lazypostdomtree::LazyPostDomtree, @nospecialize(info::CallInfo)) # For now, require that: # 1. The allocation dominates the finalizer registration - # 2. The finalizer registration dominates all uses reachable from the - # finalizer registration. - # 3. The insertion block for the finalizer is the post-dominator of all - # uses and the finalizer registration block. The insertion block must - # be dominated by the finalizer registration block. - # 4. The path from the finalizer registration to the finalizer inlining + # 2. The insertion block for the finalizer is the post-dominator of all + # uses (including the finalizer registration). + # 3. The path from the finalizer registration to the finalizer inlining # location is nothrow # - # TODO: We could relax item 3, by inlining the finalizer multiple times. + # TODO: We could relax the check 2, by inlining the finalizer multiple times. # Check #1: The allocation dominates the finalizer registration domtree = get!(lazydomtree) finalizer_bb = block_for_inst(ir, finalizer_idx) - alloc_bb = block_for_inst(ir, idx) + alloc_bb = block_for_inst(ir, alloc_idx) dominates(domtree, alloc_bb, finalizer_bb) || return nothing - bb_insert_block::Int = finalizer_bb - bb_insert_idx::Union{Int,Nothing} = finalizer_idx - function note_block_use!(usebb::Int, useidx::Int) - new_bb_insert_block = nearest_common_dominator(get!(lazypostdomtree), - bb_insert_block, usebb) - if new_bb_insert_block == bb_insert_block && bb_insert_idx !== nothing - bb_insert_idx = max(bb_insert_idx::Int, useidx) - elseif new_bb_insert_block == usebb - bb_insert_idx = useidx + # Check #2: The insertion block for the finalizer is the post-dominator of all uses + insert_bb::Int = finalizer_bb + insert_idx::Union{Int,Nothing} = finalizer_idx + function note_defuse!(x::Union{Int,SSAUse}) + defuse_idx = x isa SSAUse ? x.idx : x + defuse_idx == finalizer_idx && return nothing + defuse_bb = block_for_inst(ir, defuse_idx) + new_insert_bb = nearest_common_dominator(get!(lazypostdomtree), + insert_bb, defuse_bb) + if new_insert_bb == insert_bb && insert_idx !== nothing + insert_idx = max(insert_idx::Int, defuse_idx) + elseif new_insert_bb == defuse_bb + insert_idx = defuse_idx else - bb_insert_idx = nothing + insert_idx = nothing end - bb_insert_block = new_bb_insert_block + insert_bb = new_insert_bb nothing end - - # Collect all reachable blocks between the finalizer registration and the - # insertion point - blocks = reachable_blocks(ir.cfg, finalizer_bb, alloc_bb) - - # Check #2 - function check_defuse(x::Union{Int,SSAUse}) - duidx = x isa SSAUse ? x.idx : x - duidx == finalizer_idx && return true - bb = block_for_inst(ir, duidx) - # Not reachable from finalizer registration - we're ok - bb ∉ blocks && return true - note_block_use!(bb, duidx) - if dominates(domtree, finalizer_bb, bb) - return true - else - return false - end - end - all(check_defuse, defuse.uses) || return nothing - all(check_defuse, defuse.defs) || return nothing - bb_insert_block != 0 || return nothing # verify post-dominator of all uses exists - - # Check #3 - dominates(domtree, finalizer_bb, bb_insert_block) || return nothing + foreach(note_defuse!, defuse.uses) + foreach(note_defuse!, defuse.defs) + insert_bb != 0 || return nothing # verify post-dominator of all uses exists if !OptimizationParams(inlining.interp).assume_fatal_throw # Collect all reachable blocks between the finalizer registration and the # insertion point - blocks = finalizer_bb == bb_insert_block ? Int[finalizer_bb] : - reachable_blocks(ir.cfg, finalizer_bb, bb_insert_block) + blocks = reachable_blocks(ir.cfg, finalizer_bb, insert_bb) - # Check #4 - function check_range_nothrow(ir::IRCode, s::Int, e::Int) + # Check #3 + function check_range_nothrow(s::Int, e::Int) return all(s:e) do sidx::Int sidx == finalizer_idx && return true - sidx == idx && return true + sidx == alloc_idx && return true return is_nothrow(ir, SSAValue(sidx)) end end for bb in blocks range = ir.cfg.blocks[bb].stmts s, e = first(range), last(range) - if bb == bb_insert_block - bb_insert_idx === nothing && continue - e = bb_insert_idx + if bb == insert_bb + insert_idx === nothing && continue + e = insert_idx end if bb == finalizer_bb s = finalizer_idx end - check_range_nothrow(ir, s, e) || return nothing + check_range_nothrow(s, e) || return nothing end end # Ok, legality check complete. Figure out the exact statement where we're # going to inline the finalizer. - loc = bb_insert_idx === nothing ? first(ir.cfg.blocks[bb_insert_block].stmts) : bb_insert_idx::Int - attach_after = bb_insert_idx !== nothing + loc = insert_idx === nothing ? first(ir.cfg.blocks[insert_bb].stmts) : insert_idx::Int + attach_after = insert_idx !== nothing finalizer_stmt = ir[SSAValue(finalizer_idx)][:stmt] argexprs = Any[finalizer_stmt.args[2], finalizer_stmt.args[3]] @@ -1702,11 +1682,10 @@ function try_resolve_finalizer!(ir::IRCode, idx::Int, finalizer_idx::Int, defuse return nothing end -function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree, inlining::Union{Nothing, InliningState}) +function sroa_mutables!(ir::IRCode, defuses::IdDict{Int,Tuple{SPCSet,SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree, inlining::Union{Nothing,InliningState}) 𝕃ₒ = inlining === nothing ? SimpleInferenceLattice.instance : optimizer_lattice(inlining.interp) lazypostdomtree = LazyPostDomtree(ir) for (defidx, (intermediaries, defuse)) in defuses - intermediaries = collect(intermediaries) # Check if there are any uses we did not account for. If so, the variable # escapes and we cannot eliminate the allocation. This works, because we're guaranteed # not to include any intermediaries that have dead uses. As a result, missing uses will only ever @@ -1906,7 +1885,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse end end -function form_new_preserves(origex::Expr, intermediates::Vector{Int}, new_preserves::Vector{Any}) +function form_new_preserves(origex::Expr, intermediaries::Union{Vector{Int},SPCSet}, new_preserves::Vector{Any}) newex = Expr(:foreigncall) nccallargs = length(origex.args[3]::SimpleVector) for i in 1:(6+nccallargs-1) @@ -1915,7 +1894,7 @@ function form_new_preserves(origex::Expr, intermediates::Vector{Int}, new_preser for i in (6+nccallargs):length(origex.args) x = origex.args[i] # don't need to preserve intermediaries - if isa(x, SSAValue) && x.id in intermediates + if isa(x, SSAValue) && x.id in intermediaries continue end push!(newex.args, x) diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl index 80c8ddbb08c69..fceb920352482 100644 --- a/test/compiler/inline.jl +++ b/test/compiler/inline.jl @@ -1570,7 +1570,6 @@ let @test get_finalization_count() == 1000 end - function cfg_finalization7(io) for i = -999:1000 o = DoAllocWithField(0) @@ -1597,7 +1596,6 @@ let @test get_finalization_count() == 1000 end - # optimize `[push!|pushfirst!](::Vector{Any}, x...)` @testset "optimize `$f(::Vector{Any}, x...)`" for f = Any[push!, pushfirst!] @eval begin From 06e7b9d292ed4ced5b523fe94daef30332eabbd3 Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Tue, 1 Oct 2024 10:13:29 +0530 Subject: [PATCH 24/45] Limit `@inbounds` to indexing in the dual-iterator branch in `copyto_unaliased!` (#55919) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This simplifies the `copyto_unalised!` implementation where the source and destination have different `IndexStyle`s, and limits the `@inbounds` to only the indexing operation. In particular, the iteration over `eachindex(dest)` is not marked as `@inbounds` anymore. This seems to help with performance when the destination uses Cartesian indexing. Reduced implementation of the branch: ```julia function copyto_proposed!(dest, src) axes(dest) == axes(src) || throw(ArgumentError("incompatible sizes")) iterdest, itersrc = eachindex(dest), eachindex(src) for (destind, srcind) in zip(iterdest, itersrc) @inbounds dest[destind] = src[srcind] end dest end function copyto_current!(dest, src) axes(dest) == axes(src) || throw(ArgumentError("incompatible sizes")) iterdest, itersrc = eachindex(dest), eachindex(src) ret = iterate(iterdest) @inbounds for a in src idx, state = ret::NTuple{2,Any} dest[idx] = a ret = iterate(iterdest, state) end dest end function copyto_current_limitinbounds!(dest, src) axes(dest) == axes(src) || throw(ArgumentError("incompatible sizes")) iterdest, itersrc = eachindex(dest), eachindex(src) ret = iterate(iterdest) for isrc in itersrc idx, state = ret::NTuple{2,Any} @inbounds dest[idx] = src[isrc] ret = iterate(iterdest, state) end dest end ``` ```julia julia> a = zeros(40000,4000); b = rand(size(a)...); julia> av = view(a, UnitRange.(axes(a))...); julia> @btime copyto_current!($av, $b); 617.704 ms (0 allocations: 0 bytes) julia> @btime copyto_current_limitinbounds!($av, $b); 304.146 ms (0 allocations: 0 bytes) julia> @btime copyto_proposed!($av, $b); 240.217 ms (0 allocations: 0 bytes) julia> versioninfo() Julia Version 1.12.0-DEV.1260 Commit 4a4ca9c8152 (2024-09-28 01:49 UTC) Build Info: Official https://julialang.org release Platform Info: OS: Linux (x86_64-linux-gnu) CPU: 8 × Intel(R) Core(TM) i5-10310U CPU @ 1.70GHz WORD_SIZE: 64 LLVM: libLLVM-18.1.7 (ORCJIT, skylake) Threads: 1 default, 0 interactive, 1 GC (on 8 virtual cores) Environment: JULIA_EDITOR = subl ``` I'm not quite certain why the proposed implementation here (`copyto_proposed!`) is even faster than `copyto_current_limitinbounds!`. In any case, `copyto_proposed!` is easier to read, so I'm not complaining. This fixes https://github.com/JuliaLang/julia/issues/53158 --- base/abstractarray.jl | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index 754ab20660ab8..e877a87c2cdd1 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -1101,11 +1101,8 @@ function copyto_unaliased!(deststyle::IndexStyle, dest::AbstractArray, srcstyle: end else # Dual-iterator implementation - ret = iterate(iterdest) - @inbounds for a in src - idx, state = ret::NTuple{2,Any} - dest[idx] = a - ret = iterate(iterdest, state) + for (Idest, Isrc) in zip(iterdest, itersrc) + @inbounds dest[Idest] = src[Isrc] end end end From 1cfda3f9b1a88c8f6069b2cec03fbc957f3ccd3f Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Tue, 1 Oct 2024 18:10:42 +0530 Subject: [PATCH 25/45] Strong zero in Diagonal triple multiplication (#55927) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, triple multiplication with a `LinearAlgebra.BandedMatrix` sandwiched between two `Diagonal`s isn't associative, as this is implemented using broadcasting, which doesn't assume a strong zero, whereas the two-term matrix multiplication does. ```julia julia> D = Diagonal(StepRangeLen(NaN, 0, 3)); julia> B = Bidiagonal(1:3, 1:2, :U); julia> D * B * D 3×3 Matrix{Float64}: NaN NaN NaN NaN NaN NaN NaN NaN NaN julia> (D * B) * D 3×3 Bidiagonal{Float64, Vector{Float64}}: NaN NaN ⋅ ⋅ NaN NaN ⋅ ⋅ NaN julia> D * (B * D) 3×3 Bidiagonal{Float64, Vector{Float64}}: NaN NaN ⋅ ⋅ NaN NaN ⋅ ⋅ NaN ``` This PR ensures that the 3-term multiplication is evaluated as a sequence of two-term multiplications, which fixes this issue. This also improves performance, as only the bands need to be evaluated now. ```julia julia> D = Diagonal(1:1000); B = Bidiagonal(1:1000, 1:999, :U); julia> @btime $D * $B * $D; 656.364 μs (11 allocations: 7.63 MiB) # v"1.12.0-DEV.1262" 2.483 μs (12 allocations: 31.50 KiB) # This PR ``` --- stdlib/LinearAlgebra/src/special.jl | 2 ++ stdlib/LinearAlgebra/test/diagonal.jl | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/stdlib/LinearAlgebra/src/special.jl b/stdlib/LinearAlgebra/src/special.jl index 5a7c98cfdf32c..32a5476842933 100644 --- a/stdlib/LinearAlgebra/src/special.jl +++ b/stdlib/LinearAlgebra/src/special.jl @@ -112,6 +112,8 @@ for op in (:+, :-) end end +(*)(Da::Diagonal, A::BandedMatrix, Db::Diagonal) = _tri_matmul(Da, A, Db) + # disambiguation between triangular and banded matrices, banded ones "dominate" _mul!(C::AbstractMatrix, A::AbstractTriangular, B::BandedMatrix, alpha::Number, beta::Number) = @stable_muladdmul _mul!(C, A, B, MulAddMul(alpha, beta)) diff --git a/stdlib/LinearAlgebra/test/diagonal.jl b/stdlib/LinearAlgebra/test/diagonal.jl index dfb901908ba69..98f5498c71033 100644 --- a/stdlib/LinearAlgebra/test/diagonal.jl +++ b/stdlib/LinearAlgebra/test/diagonal.jl @@ -1265,6 +1265,17 @@ end @test *(Diagonal(ones(n)), Diagonal(1:n), Diagonal(ones(n)), Diagonal(1:n)) isa Diagonal end +@testset "triple multiplication with a sandwiched BandedMatrix" begin + D = Diagonal(StepRangeLen(NaN, 0, 4)); + B = Bidiagonal(1:4, 1:3, :U) + C = D * B * D + @test iszero(diag(C, 2)) + # test associativity + C1 = (D * B) * D + C2 = D * (B * D) + @test diag(C,2) == diag(C1,2) == diag(C2,2) +end + @testset "diagind" begin D = Diagonal(1:4) M = Matrix(D) From c3b7573c756ee1e6752f34fa8f1dce77bff4d6b7 Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Tue, 1 Oct 2024 18:38:34 +0530 Subject: [PATCH 26/45] Fix dispatch on `alg` in Float16 Hermitian eigen (#55928) Currently, ```julia julia> using LinearAlgebra julia> A = Hermitian(reshape(Float16[1:16;], 4, 4)); julia> eigen(A).values |> typeof Vector{Float16} (alias for Array{Float16, 1}) julia> eigen(A, LinearAlgebra.QRIteration()).values |> typeof Vector{Float32} (alias for Array{Float32, 1}) ``` This PR moves the specialization on the `eltype` to an internal method, so that firstly all `alg`s dispatch to that method, and secondly, there are no ambiguities introduce by specializing the top-level `eigen`. The latter currently causes test failures in `StaticArrays` (https://github.com/JuliaArrays/StaticArrays.jl/actions/runs/11092206012/job/30816955210?pr=1279), and should be fixed by this PR. --- stdlib/LinearAlgebra/src/symmetriceigen.jl | 19 ++++++++++++------- stdlib/LinearAlgebra/test/symmetriceigen.jl | 5 +++++ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/stdlib/LinearAlgebra/src/symmetriceigen.jl b/stdlib/LinearAlgebra/src/symmetriceigen.jl index fee524a702187..68a1b29f5dbc7 100644 --- a/stdlib/LinearAlgebra/src/symmetriceigen.jl +++ b/stdlib/LinearAlgebra/src/symmetriceigen.jl @@ -20,13 +20,6 @@ function eigen!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}, alg::Algo throw(ArgumentError("Unsupported value for `alg` keyword.")) end end -function eigen(A::RealHermSymComplexHerm{Float16}; sortby::Union{Function,Nothing}=nothing) - S = eigtype(eltype(A)) - E = eigen!(eigencopy_oftype(A, S), sortby=sortby) - values = convert(AbstractVector{Float16}, E.values) - vectors = convert(AbstractMatrix{isreal(E.vectors) ? Float16 : Complex{Float16}}, E.vectors) - return Eigen(values, vectors) -end """ eigen(A::Union{Hermitian, Symmetric}, alg::Algorithm = default_eigen_alg(A)) -> Eigen @@ -53,10 +46,22 @@ The default `alg` used may change in the future. The following functions are available for `Eigen` objects: [`inv`](@ref), [`det`](@ref), and [`isposdef`](@ref). """ function eigen(A::RealHermSymComplexHerm, alg::Algorithm = default_eigen_alg(A); sortby::Union{Function,Nothing}=nothing) + _eigen(A, alg; sortby) +end + +# we dispatch on the eltype in an internal method to avoid ambiguities +function _eigen(A::RealHermSymComplexHerm, alg::Algorithm; sortby) S = eigtype(eltype(A)) eigen!(eigencopy_oftype(A, S), alg; sortby) end +function _eigen(A::RealHermSymComplexHerm{Float16}, alg::Algorithm; sortby::Union{Function,Nothing}=nothing) + S = eigtype(eltype(A)) + E = eigen!(eigencopy_oftype(A, S), alg, sortby=sortby) + values = convert(AbstractVector{Float16}, E.values) + vectors = convert(AbstractMatrix{isreal(E.vectors) ? Float16 : Complex{Float16}}, E.vectors) + return Eigen(values, vectors) +end eigen!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}, irange::UnitRange) = Eigen(LAPACK.syevr!('V', 'I', A.uplo, A.data, 0.0, 0.0, irange.start, irange.stop, -1.0)...) diff --git a/stdlib/LinearAlgebra/test/symmetriceigen.jl b/stdlib/LinearAlgebra/test/symmetriceigen.jl index d55d1deb6bf33..71087ae4d8d24 100644 --- a/stdlib/LinearAlgebra/test/symmetriceigen.jl +++ b/stdlib/LinearAlgebra/test/symmetriceigen.jl @@ -171,6 +171,11 @@ end @test D isa Eigen{ComplexF16, Float16, Matrix{ComplexF16}, Vector{Float16}} @test D.values ≈ D32.values @test D.vectors ≈ D32.vectors + + # ensure that different algorithms dispatch correctly + λ, V = eigen(C, LinearAlgebra.QRIteration()) + @test λ isa Vector{Float16} + @test C * V ≈ V * Diagonal(λ) end @testset "complex Symmetric" begin From 4eb2e4787f67437d18738cff491f5aa4de6a6c03 Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Tue, 1 Oct 2024 19:11:28 +0530 Subject: [PATCH 27/45] Remove specialized `ishermitian` method for `Diagonal{<:Real}` (#55948) The fallback method for `Diagonal{<:Number}` handles this already by checking that the `diag` is real, so we don't need this additional specialization. --- stdlib/LinearAlgebra/src/diagonal.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/stdlib/LinearAlgebra/src/diagonal.jl b/stdlib/LinearAlgebra/src/diagonal.jl index d762549a2b228..0a95bac5ffb93 100644 --- a/stdlib/LinearAlgebra/src/diagonal.jl +++ b/stdlib/LinearAlgebra/src/diagonal.jl @@ -227,7 +227,6 @@ Base._reverse(A::Diagonal, dims) = reverse!(Matrix(A); dims) Base._reverse(A::Diagonal, ::Colon) = Diagonal(reverse(A.diag)) Base._reverse!(A::Diagonal, ::Colon) = (reverse!(A.diag); A) -ishermitian(D::Diagonal{<:Real}) = true ishermitian(D::Diagonal{<:Number}) = isreal(D.diag) ishermitian(D::Diagonal) = all(ishermitian, D.diag) issymmetric(D::Diagonal{<:Number}) = true From 81ce6a41d737f15d8bbc2788190dcb5565e20b8b Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Tue, 1 Oct 2024 10:32:57 -0400 Subject: [PATCH 28/45] Fix logic in `?` docstring example (#55945) --- base/docs/basedocs.jl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/base/docs/basedocs.jl b/base/docs/basedocs.jl index e28b3a21659a8..a142ecffdb732 100644 --- a/base/docs/basedocs.jl +++ b/base/docs/basedocs.jl @@ -937,11 +937,14 @@ expression, rather than the side effects that evaluating `b` or `c` may have. See the manual section on [control flow](@ref man-conditional-evaluation) for more details. # Examples -``` +```jldoctest julia> x = 1; y = 2; -julia> x > y ? println("x is larger") : println("y is larger") -y is larger +julia> x > y ? println("x is larger") : println("x is not larger") +x is not larger + +julia> x > y ? "x is larger" : x == y ? "x and y are equal" : "y is larger" +"y is larger" ``` """ kw"?", kw"?:" From cf8df9a7a056d02d1953f1bb8d07946cc1ec6876 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Tue, 1 Oct 2024 23:41:09 +0900 Subject: [PATCH 29/45] fix `unwrap_macrocalls` (#55950) The implementation of `unwrap_macrocalls` has assumed that what `:macrocall` wraps is always an `Expr` object, but that is not necessarily correct: ```julia julia> Base.@assume_effects :nothrow @show 42 ERROR: LoadError: TypeError: in typeassert, expected Expr, got a value of type Int64 Stacktrace: [1] unwrap_macrocalls(ex::Expr) @ Base ./expr.jl:906 [2] var"@assume_effects"(__source__::LineNumberNode, __module__::Module, args::Vararg{Any}) @ Base ./expr.jl:756 in expression starting at REPL[1]:1 ``` This commit addresses this issue. --- base/expr.jl | 4 ++-- test/core.jl | 11 +++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/base/expr.jl b/base/expr.jl index c4f64b89de8b6..478ccd7d7cc20 100644 --- a/base/expr.jl +++ b/base/expr.jl @@ -902,8 +902,8 @@ end unwrap_macrocalls(@nospecialize(x)) = x function unwrap_macrocalls(ex::Expr) inner = ex - while inner.head === :macrocall - inner = inner.args[end]::Expr + while isexpr(inner, :macrocall) + inner = inner.args[end] end return inner end diff --git a/test/core.jl b/test/core.jl index 1395817d8615e..62fde5261bfd3 100644 --- a/test/core.jl +++ b/test/core.jl @@ -8293,3 +8293,14 @@ end # to properly give error messages for basic kwargs... Core.eval(Core.Compiler, quote issue50174(;a=1) = a end) @test_throws MethodError Core.Compiler.issue50174(;b=2) + +let s = mktemp() do path, io + xxx = 42 + redirect_stdout(io) do + Base.@assume_effects :nothrow @show xxx + end + flush(io) + read(path, String) + end + @test strip(s) == "xxx = 42" +end From 75393f6618782c87d4b321bb587b375c0d52326a Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Tue, 1 Oct 2024 10:42:30 -0400 Subject: [PATCH 30/45] make faster BigFloats (#55906) We can coalesce the two required allocations for the MFPR BigFloat API design into one allocation, hopefully giving a easy performance boost. It would have been slightly easier and more efficient if MPFR BigFloat was already a VLA instead of containing a pointer here, but that does not prevent the optimization. --- base/Base.jl | 1 - base/mpfr.jl | 161 ++++++++++++++++-------- base/{rawbigints.jl => rawbigfloats.jl} | 68 ++++------ stdlib/Random/src/generation.jl | 2 +- test/dict.jl | 2 +- test/mpfr.jl | 6 +- 6 files changed, 138 insertions(+), 102 deletions(-) rename base/{rawbigints.jl => rawbigfloats.jl} (58%) diff --git a/base/Base.jl b/base/Base.jl index 10a8dd1532f92..23633f0b5138b 100644 --- a/base/Base.jl +++ b/base/Base.jl @@ -306,7 +306,6 @@ end include("hashing.jl") include("rounding.jl") include("div.jl") -include("rawbigints.jl") include("float.jl") include("twiceprecision.jl") include("complex.jl") diff --git a/base/mpfr.jl b/base/mpfr.jl index d393469aa26a1..9d1a0843ebe06 100644 --- a/base/mpfr.jl +++ b/base/mpfr.jl @@ -18,12 +18,10 @@ import setrounding, maxintfloat, widen, significand, frexp, tryparse, iszero, isone, big, _string_n, decompose, minmax, _precision_with_base_2, sinpi, cospi, sincospi, tanpi, sind, cosd, tand, asind, acosd, atand, - uinttype, exponent_max, exponent_min, ieee754_representation, significand_mask, - RawBigIntRoundingIncrementHelper, truncated, RawBigInt - + uinttype, exponent_max, exponent_min, ieee754_representation, significand_mask using .Base.Libc -import ..Rounding: +import ..Rounding: Rounding, rounding_raw, setrounding_raw, rounds_to_nearest, rounds_away_from_zero, tie_breaker_is_to_even, correct_rounding_requires_increment @@ -39,7 +37,6 @@ else const libmpfr = "libmpfr.so.6" end - version() = VersionNumber(unsafe_string(ccall((:mpfr_get_version,libmpfr), Ptr{Cchar}, ()))) patches() = split(unsafe_string(ccall((:mpfr_get_patches,libmpfr), Ptr{Cchar}, ())),' ') @@ -120,44 +117,116 @@ const mpfr_special_exponent_zero = typemin(Clong) + true const mpfr_special_exponent_nan = mpfr_special_exponent_zero + true const mpfr_special_exponent_inf = mpfr_special_exponent_nan + true +struct BigFloatLayout + prec::Clong + sign::Cint + exp::Clong + d::Ptr{Limb} + # possible padding + p::Limb # Tuple{Vararg{Limb}} +end +const offset_prec = fieldoffset(BigFloatLayout, 1) % Int +const offset_sign = fieldoffset(BigFloatLayout, 2) % Int +const offset_exp = fieldoffset(BigFloatLayout, 3) % Int +const offset_d = fieldoffset(BigFloatLayout, 4) % Int +const offset_p_limbs = ((fieldoffset(BigFloatLayout, 5) % Int + sizeof(Limb) - 1) ÷ sizeof(Limb)) +const offset_p = offset_p_limbs * sizeof(Limb) + """ BigFloat <: AbstractFloat Arbitrary precision floating point number type. """ -mutable struct BigFloat <: AbstractFloat - prec::Clong - sign::Cint - exp::Clong - d::Ptr{Limb} - # _d::Buffer{Limb} # Julia gc handle for memory @ d - _d::String # Julia gc handle for memory @ d (optimized) +struct BigFloat <: AbstractFloat + d::Memory{Limb} # Not recommended for general use: # used internally by, e.g. deepcopy - global function _BigFloat(prec::Clong, sign::Cint, exp::Clong, d::String) - # ccall-based version, inlined below - #z = new(zero(Clong), zero(Cint), zero(Clong), C_NULL, d) - #ccall((:mpfr_custom_init,libmpfr), Cvoid, (Ptr{Limb}, Clong), d, prec) # currently seems to be a no-op in mpfr - #NAN_KIND = Cint(0) - #ccall((:mpfr_custom_init_set,libmpfr), Cvoid, (Ref{BigFloat}, Cint, Clong, Ptr{Limb}), z, NAN_KIND, prec, d) - #return z - return new(prec, sign, exp, pointer(d), d) - end + global _BigFloat(d::Memory{Limb}) = new(d) function BigFloat(; precision::Integer=_precision_with_base_2(BigFloat)) precision < 1 && throw(DomainError(precision, "`precision` cannot be less than 1.")) nb = ccall((:mpfr_custom_get_size,libmpfr), Csize_t, (Clong,), precision) - nb = (nb + Core.sizeof(Limb) - 1) ÷ Core.sizeof(Limb) # align to number of Limb allocations required for this - #d = Vector{Limb}(undef, nb) - d = _string_n(nb * Core.sizeof(Limb)) - EXP_NAN = mpfr_special_exponent_nan - return _BigFloat(Clong(precision), one(Cint), EXP_NAN, d) # +NAN + nl = (nb + offset_p + sizeof(Limb) - 1) ÷ Core.sizeof(Limb) # align to number of Limb allocations required for this + d = Memory{Limb}(undef, nl % Int) + # ccall-based version, inlined below + z = _BigFloat(d) # initialize to +NAN + #ccall((:mpfr_custom_init,libmpfr), Cvoid, (Ptr{Limb}, Clong), BigFloatData(d), prec) # currently seems to be a no-op in mpfr + #NAN_KIND = Cint(0) + #ccall((:mpfr_custom_init_set,libmpfr), Cvoid, (Ref{BigFloat}, Cint, Clong, Ptr{Limb}), z, NAN_KIND, prec, BigFloatData(d)) + z.prec = Clong(precision) + z.sign = one(Cint) + z.exp = mpfr_special_exponent_nan + return z end end -# The rounding mode here shouldn't matter. -significand_limb_count(x::BigFloat) = div(sizeof(x._d), sizeof(Limb), RoundToZero) +""" +Segment of raw words of bits interpreted as a big integer. Less +significant words come first. Each word is in machine-native bit-order. +""" +struct BigFloatData{Limb} + d::Memory{Limb} +end + +# BigFloat interface +@inline function Base.getproperty(x::BigFloat, s::Symbol) + d = getfield(x, :d) + p = Base.unsafe_convert(Ptr{Limb}, d) + if s === :prec + return GC.@preserve d unsafe_load(Ptr{Clong}(p) + offset_prec) + elseif s === :sign + return GC.@preserve d unsafe_load(Ptr{Cint}(p) + offset_sign) + elseif s === :exp + return GC.@preserve d unsafe_load(Ptr{Clong}(p) + offset_exp) + elseif s === :d + return BigFloatData(d) + else + return throw(FieldError(typeof(x), s)) + end +end + +@inline function Base.setproperty!(x::BigFloat, s::Symbol, v) + d = getfield(x, :d) + p = Base.unsafe_convert(Ptr{Limb}, d) + if s === :prec + return GC.@preserve d unsafe_store!(Ptr{Clong}(p) + offset_prec, v) + elseif s === :sign + return GC.@preserve d unsafe_store!(Ptr{Cint}(p) + offset_sign, v) + elseif s === :exp + return GC.@preserve d unsafe_store!(Ptr{Clong}(p) + offset_exp, v) + #elseif s === :d # not mutable + else + return throw(FieldError(x, s)) + end +end + +# Ref interface: make sure the conversion to C is done properly +Base.unsafe_convert(::Type{Ref{BigFloat}}, x::Ptr{BigFloat}) = error("not compatible with mpfr") +Base.unsafe_convert(::Type{Ref{BigFloat}}, x::Ref{BigFloat}) = error("not compatible with mpfr") +Base.cconvert(::Type{Ref{BigFloat}}, x::BigFloat) = x.d # BigFloatData is the Ref type for BigFloat +function Base.unsafe_convert(::Type{Ref{BigFloat}}, x::BigFloatData) + d = getfield(x, :d) + p = Base.unsafe_convert(Ptr{Limb}, d) + GC.@preserve d unsafe_store!(Ptr{Ptr{Limb}}(p) + offset_d, p + offset_p, :monotonic) # :monotonic ensure that TSAN knows that this isn't a data race + return Ptr{BigFloat}(p) +end +Base.unsafe_convert(::Type{Ptr{Limb}}, fd::BigFloatData) = Base.unsafe_convert(Ptr{Limb}, getfield(fd, :d)) + offset_p +function Base.setindex!(fd::BigFloatData, v, i) + d = getfield(fd, :d) + @boundscheck 1 <= i <= length(d) - offset_p_limbs || throw(BoundsError(fd, i)) + @inbounds d[i + offset_p_limbs] = v + return fd +end +function Base.getindex(fd::BigFloatData, i) + d = getfield(fd, :d) + @boundscheck 1 <= i <= length(d) - offset_p_limbs || throw(BoundsError(fd, i)) + @inbounds d[i + offset_p_limbs] +end +Base.length(fd::BigFloatData) = length(getfield(fd, :d)) - offset_p_limbs +Base.copyto!(fd::BigFloatData, limbs) = copyto!(getfield(fd, :d), offset_p_limbs + 1, limbs) # for Random + +include("rawbigfloats.jl") rounding_raw(::Type{BigFloat}) = something(Base.ScopedValues.get(CURRENT_ROUNDING_MODE), ROUNDING_MODE[]) setrounding_raw(::Type{BigFloat}, r::MPFRRoundingMode) = ROUNDING_MODE[]=r @@ -165,24 +234,12 @@ function setrounding_raw(f::Function, ::Type{BigFloat}, r::MPFRRoundingMode) Base.ScopedValues.@with(CURRENT_ROUNDING_MODE => r, f()) end - rounding(::Type{BigFloat}) = convert(RoundingMode, rounding_raw(BigFloat)) setrounding(::Type{BigFloat}, r::RoundingMode) = setrounding_raw(BigFloat, convert(MPFRRoundingMode, r)) setrounding(f::Function, ::Type{BigFloat}, r::RoundingMode) = setrounding_raw(f, BigFloat, convert(MPFRRoundingMode, r)) -# overload the definition of unsafe_convert to ensure that `x.d` is assigned -# it may have been dropped in the event that the BigFloat was serialized -Base.unsafe_convert(::Type{Ref{BigFloat}}, x::Ptr{BigFloat}) = x -@inline function Base.unsafe_convert(::Type{Ref{BigFloat}}, x::Ref{BigFloat}) - x = x[] - if x.d == C_NULL - x.d = pointer(x._d) - end - return convert(Ptr{BigFloat}, Base.pointer_from_objref(x)) -end - """ BigFloat(x::Union{Real, AbstractString} [, rounding::RoundingMode=rounding(BigFloat)]; [precision::Integer=precision(BigFloat)]) @@ -283,17 +340,18 @@ function BigFloat(x::Float64, r::MPFRRoundingMode=rounding_raw(BigFloat); precis nlimbs = (precision + 8*Core.sizeof(Limb) - 1) ÷ (8*Core.sizeof(Limb)) # Limb is a CLong which is a UInt32 on windows (thank M$) which makes this more complicated and slower. + zd = z.d if Limb === UInt64 for i in 1:nlimbs-1 - unsafe_store!(z.d, 0x0, i) + @inbounds setindex!(zd, 0x0, i) end - unsafe_store!(z.d, val, nlimbs) + @inbounds setindex!(zd, val, nlimbs) else for i in 1:nlimbs-2 - unsafe_store!(z.d, 0x0, i) + @inbounds setindex!(zd, 0x0, i) end - unsafe_store!(z.d, val % UInt32, nlimbs-1) - unsafe_store!(z.d, (val >> 32) % UInt32, nlimbs) + @inbounds setindex!(zd, val % UInt32, nlimbs-1) + @inbounds setindex!(zd, (val >> 32) % UInt32, nlimbs) end z end @@ -440,12 +498,12 @@ function to_ieee754(::Type{T}, x::BigFloat, rm) where {T<:AbstractFloat} ret_u = if is_regular & !rounds_to_inf & !rounds_to_zero if !exp_is_huge_p # significand - v = RawBigInt{Limb}(x._d, significand_limb_count(x)) + v = x.d::BigFloatData len = max(ieee_precision + min(exp_diff, 0), 0)::Int signif = truncated(U, v, len) & significand_mask(T) # round up if necessary - rh = RawBigIntRoundingIncrementHelper(v, len) + rh = BigFloatDataRoundingIncrementHelper(v, len) incr = correct_rounding_requires_increment(rh, rm, sb) # exponent @@ -1193,10 +1251,8 @@ set_emin!(x) = check_exponent_err(ccall((:mpfr_set_emin, libmpfr), Cint, (Clong, function Base.deepcopy_internal(x::BigFloat, stackdict::IdDict) get!(stackdict, x) do - # d = copy(x._d) - d = x._d - d′ = GC.@preserve d unsafe_string(pointer(d), sizeof(d)) # creates a definitely-new String - y = _BigFloat(x.prec, x.sign, x.exp, d′) + d′ = copy(getfield(x, :d)) + y = _BigFloat(d′) #ccall((:mpfr_custom_move,libmpfr), Cvoid, (Ref{BigFloat}, Ptr{Limb}), y, d) # unnecessary return y end::BigFloat @@ -1210,7 +1266,8 @@ function decompose(x::BigFloat)::Tuple{BigInt, Int, Int} s.size = cld(x.prec, 8*sizeof(Limb)) # limbs b = s.size * sizeof(Limb) # bytes ccall((:__gmpz_realloc2, libgmp), Cvoid, (Ref{BigInt}, Culong), s, 8b) # bits - memcpy(s.d, x.d, b) + xd = x.d + GC.@preserve xd memcpy(s.d, Base.unsafe_convert(Ptr{Limb}, xd), b) s, x.exp - 8b, x.sign end diff --git a/base/rawbigints.jl b/base/rawbigfloats.jl similarity index 58% rename from base/rawbigints.jl rename to base/rawbigfloats.jl index a9bb18e163e2d..4377edfc463d8 100644 --- a/base/rawbigints.jl +++ b/base/rawbigfloats.jl @@ -1,41 +1,21 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license -""" -Segment of raw words of bits interpreted as a big integer. Less -significant words come first. Each word is in machine-native bit-order. -""" -struct RawBigInt{T<:Unsigned} - d::String - word_count::Int - - function RawBigInt{T}(d::String, word_count::Int) where {T<:Unsigned} - new{T}(d, word_count) - end -end +# Some operations on BigFloat can be done more directly by treating the data portion ("BigFloatData") as a BigInt -elem_count(x::RawBigInt, ::Val{:words}) = x.word_count +elem_count(x::BigFloatData, ::Val{:words}) = length(x) elem_count(x::Unsigned, ::Val{:bits}) = sizeof(x) * 8 -word_length(::RawBigInt{T}) where {T} = elem_count(zero(T), Val(:bits)) -elem_count(x::RawBigInt{T}, ::Val{:bits}) where {T} = word_length(x) * elem_count(x, Val(:words)) +word_length(::BigFloatData{T}) where {T} = elem_count(zero(T), Val(:bits)) +elem_count(x::BigFloatData{T}, ::Val{:bits}) where {T} = word_length(x) * elem_count(x, Val(:words)) reversed_index(n::Int, i::Int) = n - i - 1 reversed_index(x, i::Int, v::Val) = reversed_index(elem_count(x, v), i)::Int -split_bit_index(x::RawBigInt, i::Int) = divrem(i, word_length(x), RoundToZero) - -function get_elem_words_raw(x::RawBigInt{T}, i::Int) where {T} - @boundscheck if (i < 0) || (elem_count(x, Val(:words)) ≤ i) - throw(BoundsError(x, i)) - end - d = x.d - j = i + 1 - (GC.@preserve d unsafe_load(Ptr{T}(pointer(d)), j))::T -end +split_bit_index(x::BigFloatData, i::Int) = divrem(i, word_length(x), RoundToZero) """ `i` is the zero-based index of the wanted word in `x`, starting from the less significant words. """ -function get_elem(x::RawBigInt, i::Int, ::Val{:words}, ::Val{:ascending}) - @inbounds @inline get_elem_words_raw(x, i) +function get_elem(x::BigFloatData{T}, i::Int, ::Val{:words}, ::Val{:ascending}) where {T} + @inbounds return x[i + 1]::T end function get_elem(x, i::Int, v::Val, ::Val{:descending}) @@ -43,9 +23,9 @@ function get_elem(x, i::Int, v::Val, ::Val{:descending}) get_elem(x, j, v, Val(:ascending)) end -word_is_nonzero(x::RawBigInt, i::Int, v::Val) = !iszero(get_elem(x, i, Val(:words), v)) +word_is_nonzero(x::BigFloatData, i::Int, v::Val) = !iszero(get_elem(x, i, Val(:words), v)) -word_is_nonzero(x::RawBigInt, v::Val) = let x = x +word_is_nonzero(x::BigFloatData, v::Val) = let x = x i -> word_is_nonzero(x, i, v) end @@ -53,7 +33,7 @@ end Returns a `Bool` indicating whether the `len` least significant words of `x` are nonzero. """ -function tail_is_nonzero(x::RawBigInt, len::Int, ::Val{:words}) +function tail_is_nonzero(x::BigFloatData, len::Int, ::Val{:words}) any(word_is_nonzero(x, Val(:ascending)), 0:(len - 1)) end @@ -61,7 +41,7 @@ end Returns a `Bool` indicating whether the `len` least significant bits of the `i`-th (zero-based index) word of `x` are nonzero. """ -function tail_is_nonzero(x::RawBigInt, len::Int, i::Int, ::Val{:word}) +function tail_is_nonzero(x::BigFloatData, len::Int, i::Int, ::Val{:word}) !iszero(len) && !iszero(get_elem(x, i, Val(:words), Val(:ascending)) << (word_length(x) - len)) end @@ -70,7 +50,7 @@ end Returns a `Bool` indicating whether the `len` least significant bits of `x` are nonzero. """ -function tail_is_nonzero(x::RawBigInt, len::Int, ::Val{:bits}) +function tail_is_nonzero(x::BigFloatData, len::Int, ::Val{:bits}) if 0 < len word_count, bit_count_in_word = split_bit_index(x, len) tail_is_nonzero(x, bit_count_in_word, word_count, Val(:word)) || @@ -90,7 +70,7 @@ end """ Returns a `Bool` that is the `i`-th (zero-based index) bit of `x`. """ -function get_elem(x::RawBigInt, i::Int, ::Val{:bits}, v::Val{:ascending}) +function get_elem(x::BigFloatData, i::Int, ::Val{:bits}, v::Val{:ascending}) vb = Val(:bits) if 0 ≤ i < elem_count(x, vb) word_index, bit_index_in_word = split_bit_index(x, i) @@ -106,7 +86,7 @@ Returns an integer of type `R`, consisting of the `len` most significant bits of `x`. If there are less than `len` bits in `x`, the least significant bits are zeroed. """ -function truncated(::Type{R}, x::RawBigInt, len::Int) where {R<:Integer} +function truncated(::Type{R}, x::BigFloatData, len::Int) where {R<:Integer} ret = zero(R) if 0 < len word_count, bit_count_in_word = split_bit_index(x, len) @@ -116,7 +96,7 @@ function truncated(::Type{R}, x::RawBigInt, len::Int) where {R<:Integer} for w ∈ 0:(word_count - 1) ret <<= k - if w < lenx + if w < lenx # if the output type is larger, truncate turns into zero-extend word = get_elem(x, w, vals...) ret |= R(word) end @@ -124,7 +104,7 @@ function truncated(::Type{R}, x::RawBigInt, len::Int) where {R<:Integer} if !iszero(bit_count_in_word) ret <<= bit_count_in_word - if word_count < lenx + if word_count < lenx # if the output type is larger, truncate turns into zero-extend wrd = get_elem(x, word_count, vals...) ret |= R(wrd >>> (k - bit_count_in_word)) end @@ -133,14 +113,14 @@ function truncated(::Type{R}, x::RawBigInt, len::Int) where {R<:Integer} ret::R end -struct RawBigIntRoundingIncrementHelper{T<:Unsigned} - n::RawBigInt{T} +struct BigFloatDataRoundingIncrementHelper{T<:Unsigned} + n::BigFloatData{T} trunc_len::Int final_bit::Bool round_bit::Bool - function RawBigIntRoundingIncrementHelper{T}(n::RawBigInt{T}, len::Int) where {T<:Unsigned} + function BigFloatDataRoundingIncrementHelper{T}(n::BigFloatData{T}, len::Int) where {T<:Unsigned} vals = (Val(:bits), Val(:descending)) f = get_elem(n, len - 1, vals...) r = get_elem(n, len , vals...) @@ -148,15 +128,15 @@ struct RawBigIntRoundingIncrementHelper{T<:Unsigned} end end -function RawBigIntRoundingIncrementHelper(n::RawBigInt{T}, len::Int) where {T<:Unsigned} - RawBigIntRoundingIncrementHelper{T}(n, len) +function BigFloatDataRoundingIncrementHelper(n::BigFloatData{T}, len::Int) where {T<:Unsigned} + BigFloatDataRoundingIncrementHelper{T}(n, len) end -(h::RawBigIntRoundingIncrementHelper)(::Rounding.FinalBit) = h.final_bit +(h::BigFloatDataRoundingIncrementHelper)(::Rounding.FinalBit) = h.final_bit -(h::RawBigIntRoundingIncrementHelper)(::Rounding.RoundBit) = h.round_bit +(h::BigFloatDataRoundingIncrementHelper)(::Rounding.RoundBit) = h.round_bit -function (h::RawBigIntRoundingIncrementHelper)(::Rounding.StickyBit) +function (h::BigFloatDataRoundingIncrementHelper)(::Rounding.StickyBit) v = Val(:bits) n = h.n tail_is_nonzero(n, elem_count(n, v) - h.trunc_len - 1, v) diff --git a/stdlib/Random/src/generation.jl b/stdlib/Random/src/generation.jl index d8bb48d2764d2..b605dff9e5d80 100644 --- a/stdlib/Random/src/generation.jl +++ b/stdlib/Random/src/generation.jl @@ -66,7 +66,7 @@ function _rand!(rng::AbstractRNG, z::BigFloat, sp::SamplerBigFloat) limbs[end] |= Limb_high_bit end z.sign = 1 - GC.@preserve limbs unsafe_copyto!(z.d, pointer(limbs), sp.nlimbs) + copyto!(z.d, limbs) randbool end diff --git a/test/dict.jl b/test/dict.jl index 13c60d5a6a053..909afb3607907 100644 --- a/test/dict.jl +++ b/test/dict.jl @@ -1049,7 +1049,7 @@ Dict(1 => rand(2,3), 'c' => "asdf") # just make sure this does not trigger a dep # issue #26939 d26939 = WeakKeyDict() - (@noinline d -> d[big"1.0" + 1.1] = 1)(d26939) + (@noinline d -> d[big"1" + 1] = 1)(d26939) GC.gc() # primarily to make sure this doesn't segfault @test count(d26939) == 0 @test length(d26939.ht) == 1 diff --git a/test/mpfr.jl b/test/mpfr.jl index 63da732df1c09..c212bdfc92821 100644 --- a/test/mpfr.jl +++ b/test/mpfr.jl @@ -1089,11 +1089,11 @@ end end end -@testset "RawBigInt truncation OOB read" begin +@testset "BigFloatData truncation OOB read" begin @testset "T: $T" for T ∈ (UInt8, UInt16, UInt32, UInt64, UInt128) - v = Base.RawBigInt{T}("a"^sizeof(T), 1) + v = Base.MPFR.BigFloatData{T}(fill(typemax(T), 1 + Base.MPFR.offset_p_limbs)) @testset "bit_count: $bit_count" for bit_count ∈ (0:10:80) - @test Base.truncated(UInt128, v, bit_count) isa Any + @test Base.MPFR.truncated(UInt128, v, bit_count) isa Any end end end From 03f8523f27b55f75e16ff1ef592c2bbb1eafd46c Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 1 Oct 2024 16:53:59 +0200 Subject: [PATCH 31/45] Add propagate_inbounds_meta to atomic genericmemory ops (#55902) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `memoryref(mem, i)` will otherwise emit a boundscheck. ``` ; │ @ /home/vchuravy/WorkstealingQueues/src/CLL.jl:53 within `setindex_atomic!` @ genericmemory.jl:329 ; │┌ @ boot.jl:545 within `memoryref` %ptls_field = getelementptr inbounds i8, ptr %tls_pgcstack, i64 16 %ptls_load = load ptr, ptr %ptls_field, align 8 %"box::GenericMemoryRef" = call noalias nonnull align 8 dereferenceable(32) ptr @ijl_gc_small_alloc(ptr %ptls_load, i32 552, i32 32, i64 23456076646928) #9 %"box::GenericMemoryRef.tag_addr" = getelementptr inbounds i64, ptr %"box::GenericMemoryRef", i64 -1 store atomic i64 23456076646928, ptr %"box::GenericMemoryRef.tag_addr" unordered, align 8 store ptr %memoryref_data, ptr %"box::GenericMemoryRef", align 8 %.repack8 = getelementptr inbounds { ptr, ptr }, ptr %"box::GenericMemoryRef", i64 0, i32 1 store ptr %memoryref_mem, ptr %.repack8, align 8 call void @ijl_bounds_error_int(ptr nonnull %"box::GenericMemoryRef", i64 %7) unreachable ``` For the Julia code: ```julia function Base.setindex_atomic!(buf::WSBuffer{T}, order::Symbol, val::T, idx::Int64) where T @inbounds Base.setindex_atomic!(buf.buffer, order, val,((idx - 1) & buf.mask) + 1) end ``` from https://github.com/gbaraldi/WorkstealingQueues.jl/blob/0ebc57237cf0c90feedf99e4338577d04b67805b/src/CLL.jl#L41 --- base/genericmemory.jl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/base/genericmemory.jl b/base/genericmemory.jl index 6537839320206..c4ebbc6ca14e1 100644 --- a/base/genericmemory.jl +++ b/base/genericmemory.jl @@ -320,11 +320,13 @@ end # get, set(once), modify, swap and replace at index, atomically function getindex_atomic(mem::GenericMemory, order::Symbol, i::Int) + @_propagate_inbounds_meta memref = memoryref(mem, i) return memoryrefget(memref, order, @_boundscheck) end function setindex_atomic!(mem::GenericMemory, order::Symbol, val, i::Int) + @_propagate_inbounds_meta T = eltype(mem) memref = memoryref(mem, i) return memoryrefset!( @@ -342,6 +344,7 @@ function setindexonce_atomic!( val, i::Int, ) + @_propagate_inbounds_meta T = eltype(mem) memref = memoryref(mem, i) return Core.memoryrefsetonce!( @@ -354,11 +357,13 @@ function setindexonce_atomic!( end function modifyindex_atomic!(mem::GenericMemory, order::Symbol, op, val, i::Int) + @_propagate_inbounds_meta memref = memoryref(mem, i) return Core.memoryrefmodify!(memref, op, val, order, @_boundscheck) end function swapindex_atomic!(mem::GenericMemory, order::Symbol, val, i::Int) + @_propagate_inbounds_meta T = eltype(mem) memref = memoryref(mem, i) return Core.memoryrefswap!( @@ -377,6 +382,7 @@ function replaceindex_atomic!( desired, i::Int, ) + @_propagate_inbounds_meta T = eltype(mem) memref = memoryref(mem, i) return Core.memoryrefreplace!( From dd310849adbf9f089d7e21c142b513deb8ff7b01 Mon Sep 17 00:00:00 2001 From: Neven Sajko Date: Tue, 1 Oct 2024 20:56:25 +0200 Subject: [PATCH 32/45] fix rounding mode in construction of `BigFloat` from pi (#55911) The default argument of the method was outdated, reading the global default rounding directly, bypassing the `ScopedValue` stuff. --- base/irrationals.jl | 2 +- test/rounding.jl | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/base/irrationals.jl b/base/irrationals.jl index eafe388162353..b3073c503238a 100644 --- a/base/irrationals.jl +++ b/base/irrationals.jl @@ -216,7 +216,7 @@ function irrational(sym, val, def) esym = esc(sym) qsym = esc(Expr(:quote, sym)) bigconvert = isa(def,Symbol) ? quote - function Base.BigFloat(::Irrational{$qsym}, r::MPFR.MPFRRoundingMode=MPFR.ROUNDING_MODE[]; precision=precision(BigFloat)) + function Base.BigFloat(::Irrational{$qsym}, r::MPFR.MPFRRoundingMode=Rounding.rounding_raw(BigFloat); precision=precision(BigFloat)) c = BigFloat(;precision=precision) ccall(($(string("mpfr_const_", def)), :libmpfr), Cint, (Ref{BigFloat}, MPFR.MPFRRoundingMode), c, r) diff --git a/test/rounding.jl b/test/rounding.jl index 76b15ec1d9118..6fad6f62e8dfe 100644 --- a/test/rounding.jl +++ b/test/rounding.jl @@ -470,3 +470,28 @@ end @test prevfloat(f) < i end end + +@testset "π to `BigFloat` with `setrounding`" begin + function irrational_to_big_float(c::AbstractIrrational) + BigFloat(c) + end + + function irrational_to_big_float_with_rounding_mode(c::AbstractIrrational, rm::RoundingMode) + f = () -> irrational_to_big_float(c) + setrounding(f, BigFloat, rm) + end + + function irrational_to_big_float_with_rounding_mode_and_precision(c::AbstractIrrational, rm::RoundingMode, prec::Int) + f = () -> irrational_to_big_float_with_rounding_mode(c, rm) + setprecision(f, BigFloat, prec) + end + + for c ∈ (π, MathConstants.γ, MathConstants.catalan) + for p ∈ 1:40 + @test ( + irrational_to_big_float_with_rounding_mode_and_precision(c, RoundDown, p) < c < + irrational_to_big_float_with_rounding_mode_and_precision(c, RoundUp, p) + ) + end + end +end From a45d701e216139a9ef6d5e1f674e943d18677c8d Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Wed, 2 Oct 2024 17:59:29 +0900 Subject: [PATCH 33/45] fix `nonsetable_type_hint_handler` (#55962) The current implementation is wrong, causing it to display inappropriate hints like the following: ```julia julia> s = Some("foo"); julia> s[] = "bar" ERROR: MethodError: no method matching setindex!(::Some{String}, ::String) The function `setindex!` exists, but no method is defined for this combination of argument types. You attempted to index the type String, rather than an instance of the type. Make sure you create the type using its constructor: d = String([...]) rather than d = String Stacktrace: [1] top-level scope @ REPL[2]:1 ``` --- base/errorshow.jl | 2 +- test/errorshow.jl | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/base/errorshow.jl b/base/errorshow.jl index 9c8aad8b6ee2c..20bdee1de6ec0 100644 --- a/base/errorshow.jl +++ b/base/errorshow.jl @@ -1052,7 +1052,7 @@ function nonsetable_type_hint_handler(io, ex, arg_types, kwargs) print(io, "\nAre you trying to index into an array? For multi-dimensional arrays, separate the indices with commas: ") printstyled(io, "a[1, 2]", color=:cyan) print(io, " rather than a[1][2]") - else isType(T) + elseif isType(T) Tx = T.parameters[1] print(io, "\nYou attempted to index the type $Tx, rather than an instance of the type. Make sure you create the type using its constructor: ") printstyled(io, "d = $Tx([...])", color=:cyan) diff --git a/test/errorshow.jl b/test/errorshow.jl index 3ede370553212..db22fea1131d1 100644 --- a/test/errorshow.jl +++ b/test/errorshow.jl @@ -739,8 +739,7 @@ end pop!(Base.Experimental._hint_handlers[DomainError]) # order is undefined, don't copy this struct ANumber <: Number end -let err_str - err_str = @except_str ANumber()(3 + 4) MethodError +let err_str = @except_str ANumber()(3 + 4) MethodError @test occursin("objects of type $(curmod_prefix)ANumber are not callable", err_str) @test count(==("Maybe you forgot to use an operator such as *, ^, %, / etc. ?"), split(err_str, '\n')) == 1 # issue 40478 @@ -748,22 +747,25 @@ let err_str @test count(==("Maybe you forgot to use an operator such as *, ^, %, / etc. ?"), split(err_str, '\n')) == 1 end -let err_str - a = [1 2; 3 4]; +let a = [1 2; 3 4]; err_str = @except_str (a[1][2] = 5) MethodError @test occursin("\nAre you trying to index into an array? For multi-dimensional arrays, separate the indices with commas: ", err_str) @test occursin("a[1, 2]", err_str) @test occursin("rather than a[1][2]", err_str) end -let err_str - d = Dict +let d = Dict err_str = @except_str (d[1] = 5) MethodError @test occursin("\nYou attempted to index the type Dict, rather than an instance of the type. Make sure you create the type using its constructor: ", err_str) @test occursin("d = Dict([...])", err_str) @test occursin(" rather than d = Dict", err_str) end +let s = Some("foo") + err_str = @except_str (s[] = "bar") MethodError + @test !occursin("You attempted to index the type String", err_str) +end + # Execute backtrace once before checking formatting, see #38858 backtrace() From fbb3e1175d52abec0ff4ca83d8c9e126d9f8a06b Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Wed, 2 Oct 2024 07:27:35 -0400 Subject: [PATCH 34/45] REPL: make UndefVarError aware of imported modules (#55932) --- base/experimental.jl | 4 ++-- stdlib/REPL/src/REPL.jl | 12 +++++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/base/experimental.jl b/base/experimental.jl index 6e757e9fa0e5f..648b5da0ed9a1 100644 --- a/base/experimental.jl +++ b/base/experimental.jl @@ -319,9 +319,9 @@ function show_error_hints(io, ex, args...) for handler in hinters try @invokelatest handler(io, ex, args...) - catch err + catch tn = typeof(handler).name - @error "Hint-handler $handler for $(typeof(ex)) in $(tn.module) caused an error" + @error "Hint-handler $handler for $(typeof(ex)) in $(tn.module) caused an error" exception=current_exceptions() end end end diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl index 44fe0446240c6..272b907165341 100644 --- a/stdlib/REPL/src/REPL.jl +++ b/stdlib/REPL/src/REPL.jl @@ -74,7 +74,17 @@ end function _UndefVarError_warnfor(io::IO, m::Module, var::Symbol) Base.isbindingresolved(m, var) || return false (Base.isexported(m, var) || Base.ispublic(m, var)) || return false - print(io, "\nHint: a global variable of this name also exists in $m.") + active_mod = Base.active_module() + print(io, "\nHint: ") + if isdefined(active_mod, Symbol(m)) + print(io, "a global variable of this name also exists in $m.") + else + if Symbol(m) == var + print(io, "$m is loaded but not imported in the active module $active_mod.") + else + print(io, "a global variable of this name may be made accessible by importing $m in the current active module $active_mod") + end + end return true end From 5fc582b3fcc8adbd5e4b9a8df790a63fcb7f7a9c Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Thu, 3 Oct 2024 07:17:00 +0900 Subject: [PATCH 35/45] fix test/staged.jl (#55967) In particular, the implementation of `overdub_generator54341` was dangerous. This fixes it up. --- test/staged.jl | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/test/staged.jl b/test/staged.jl index aec4a3bf135d3..0112dd73b45f7 100644 --- a/test/staged.jl +++ b/test/staged.jl @@ -381,11 +381,18 @@ let @test length(ir.cfg.blocks) == 1 end +function generate_lambda_ex(world::UInt, source::LineNumberNode, + argnames::Core.SimpleVector, spnames::Core.SimpleVector, + body::Expr) + stub = Core.GeneratedFunctionStub(identity, argnames, spnames) + return stub(world, source, body) +end + # Test that `Core.CachedGenerator` works as expected struct Generator54916 <: Core.CachedGenerator end function (::Generator54916)(world::UInt, source::LineNumberNode, args...) - stub = Core.GeneratedFunctionStub(identity, Core.svec(:doit54916, :func, :arg), Core.svec()) - return stub(world, source, :(func(arg))) + return generate_lambda_ex(world, source, + Core.svec(:doit54916, :func, :arg), Core.svec(), :(func(arg))) end @eval function doit54916(func, arg) $(Expr(:meta, :generated, Generator54916())) @@ -412,8 +419,8 @@ function generator49715(world, source, self, f, tt) sig = Tuple{f, tt.parameters...} mi = Base._which(sig; world) error("oh no") - stub = Core.GeneratedFunctionStub(identity, Core.svec(:methodinstance, :ctx, :x, :f), Core.svec()) - stub(world, source, :(nothing)) + return generate_lambda_ex(world, source, + Core.svec(:doit49715, :f, :tt), Core.svec(), :(nothing)) end @eval function doit49715(f, tt) $(Expr(:meta, :generated, generator49715)) @@ -426,9 +433,10 @@ function overdubbee54341(a, b) a + b end const overdubee_codeinfo54341 = code_lowered(overdubbee54341, Tuple{Any, Any})[1] -function overdub_generator54341(world::UInt, source::LineNumberNode, args...) - if length(args) != 2 - :(error("Wrong number of arguments")) +function overdub_generator54341(world::UInt, source::LineNumberNode, selftype, fargtypes) + if length(fargtypes) != 2 + return generate_lambda_ex(world, source, + Core.svec(:overdub54341, :args), Core.svec(), :(error("Wrong number of arguments"))) else return copy(overdubee_codeinfo54341) end @@ -438,3 +446,4 @@ end $(Expr(:meta, :generated_only)) end @test overdub54341(1, 2) == 3 +@test_throws "Wrong number of arguments" overdub54341(1, 2, 3) From d19bb472fbc92c1d93645426025f77e449cee763 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Wed, 2 Oct 2024 19:46:24 -0400 Subject: [PATCH 36/45] Explicitly store a module's location (#55963) Revise wants to know what file a module's `module` definition is in. Currently it does this by looking at the source location for the implicitly generated `eval` method. This is terrible for two reasons: 1. The method may not exist if the module is a baremodule (which is not particularly common, which is probably why we haven't seen it). 2. The fact that the implicitly generated `eval` method has this location information is an implementation detail that I'd like to get rid of (#55949). This PR adds explicit file/line info to `Module`, so that Revise doesn't have to use the hack anymore. --- base/reflection.jl | 11 +++++++++++ src/jl_exported_funcs.inc | 1 + src/julia.h | 2 ++ src/module.c | 10 ++++++++++ src/staticdata.c | 3 +++ src/toplevel.c | 4 ++++ 6 files changed, 31 insertions(+) diff --git a/base/reflection.jl b/base/reflection.jl index f738ca1a618ae..80eeb4c4efb12 100644 --- a/base/reflection.jl +++ b/base/reflection.jl @@ -76,6 +76,17 @@ function fullname(m::Module) return (fullname(mp)..., mn) end +""" + moduleloc(m::Module) -> LineNumberNode + +Get the location of the `module` definition. +""" +function moduleloc(m::Module) + line = Ref{Int32}(0) + file = ccall(:jl_module_getloc, Ref{Symbol}, (Any, Ref{Int32}), m, line) + return LineNumberNode(Int(line[]), file) +end + """ names(x::Module; all::Bool=false, imported::Bool=false, usings::Bool=false) -> Vector{Symbol} diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index 0c712ef37cb5b..a00a0171d23b7 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -316,6 +316,7 @@ XX(jl_module_name) \ XX(jl_module_names) \ XX(jl_module_parent) \ + XX(jl_module_getloc) \ XX(jl_module_public) \ XX(jl_module_public_p) \ XX(jl_module_use) \ diff --git a/src/julia.h b/src/julia.h index c6ff729a308eb..ed3d9bf825658 100644 --- a/src/julia.h +++ b/src/julia.h @@ -710,6 +710,8 @@ typedef struct _jl_module_t { struct _jl_module_t *parent; _Atomic(jl_svec_t*) bindings; _Atomic(jl_genericmemory_t*) bindingkeyset; // index lookup by name into bindings + jl_sym_t *file; + int32_t line; // hidden fields: arraylist_t usings; // modules with all bindings potentially imported jl_uuid_t build_id; diff --git a/src/module.c b/src/module.c index a6c05d279f5b0..f4da7e1e994de 100644 --- a/src/module.c +++ b/src/module.c @@ -52,6 +52,8 @@ JL_DLLEXPORT jl_module_t *jl_new_module_(jl_sym_t *name, jl_module_t *parent, ui m->compile = -1; m->infer = -1; m->max_methods = -1; + m->file = name; // Using the name as a placeholder is better than nothing + m->line = 0; m->hash = parent == NULL ? bitmix(name->hash, jl_module_type->hash) : bitmix(name->hash, parent->hash); JL_MUTEX_INIT(&m->lock, "module->lock"); @@ -1179,6 +1181,14 @@ jl_module_t *jl_module_root(jl_module_t *m) } } +JL_DLLEXPORT jl_sym_t *jl_module_getloc(jl_module_t *m, int32_t *line) +{ + if (line) { + *line = m->line; + } + return m->file; +} + JL_DLLEXPORT jl_uuid_t jl_module_build_id(jl_module_t *m) { return m->build_id; } JL_DLLEXPORT jl_uuid_t jl_module_uuid(jl_module_t* m) { return m->uuid; } diff --git a/src/staticdata.c b/src/staticdata.c index aa9a16daab7a5..0a8cbe6db7c67 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -1259,6 +1259,9 @@ static void jl_write_module(jl_serializer_state *s, uintptr_t item, jl_module_t jl_atomic_store_relaxed(&newm->bindingkeyset, NULL); arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, bindingkeyset))); arraylist_push(&s->relocs_list, (void*)backref_id(s, jl_atomic_load_relaxed(&m->bindingkeyset), s->link_ids_relocs)); + newm->file = NULL; + arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, file))); + arraylist_push(&s->relocs_list, (void*)backref_id(s, m->file, s->link_ids_relocs)); // write out the usings list memset(&newm->usings._space, 0, sizeof(newm->usings._space)); diff --git a/src/toplevel.c b/src/toplevel.c index 5d17a3fcf89a7..8caa8b086ec00 100644 --- a/src/toplevel.c +++ b/src/toplevel.c @@ -213,6 +213,10 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex form = NULL; } + newm->file = jl_symbol(filename); + jl_gc_wb_knownold(newm, newm->file); + newm->line = lineno; + for (int i = 0; i < jl_array_nrows(exprs); i++) { // process toplevel form ct->world_age = jl_atomic_load_acquire(&jl_world_counter); From 3034fc5e3f76d46e57409b4b098577dfa60cc3fa Mon Sep 17 00:00:00 2001 From: Dennis Hoelgaard Bal <61620837+KronosTheLate@users.noreply.github.com> Date: Thu, 3 Oct 2024 01:56:06 +0200 Subject: [PATCH 37/45] mergewith: add single argument example to docstring (#55964) I ran into this edge case. I though it should be documented. --------- Co-authored-by: Lilith Orion Hafner --- base/abstractdict.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/base/abstractdict.jl b/base/abstractdict.jl index 62a5b3ee9e1b0..85a726b4cdbf4 100644 --- a/base/abstractdict.jl +++ b/base/abstractdict.jl @@ -392,6 +392,10 @@ Dict{String, Float64} with 3 entries: julia> ans == mergewith(+)(a, b) true + +julia> mergewith(-, Dict(), Dict(:a=>1)) # Combining function only used if key is present in both +Dict{Any, Any} with 1 entry: + :a => 1 ``` """ mergewith(combine, d::AbstractDict, others::AbstractDict...) = From 77c5875b3cbe85e7fb0bb5a7e796809c901ede95 Mon Sep 17 00:00:00 2001 From: Michael Cho Date: Wed, 2 Oct 2024 20:46:36 -0400 Subject: [PATCH 38/45] [build] avoid libedit linkage and align libccalllazy* SONAMEs (#55968) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While building the 1.11.0-rc4 in Homebrew[^1] in preparation for 1.11.0 release (and to confirm Sequoia successfully builds) I noticed some odd linkage for our Linux builds, which included of: 1. LLVM libraries were linking to `libedit.so`, e.g. ``` Dynamic Section: NEEDED libedit.so.0 NEEDED libz.so.1 NEEDED libzstd.so.1 NEEDED libstdc++.so.6 NEEDED libm.so.6 NEEDED libgcc_s.so.1 NEEDED libc.so.6 NEEDED ld-linux-x86-64.so.2 SONAME libLLVM-16jl.so ``` CMakeCache.txt showed ``` //Use libedit if available. LLVM_ENABLE_LIBEDIT:BOOL=ON ``` Which might be overriding `HAVE_LIBEDIT` at https://github.com/JuliaLang/llvm-project/blob/julia-release/16.x/llvm/cmake/config-ix.cmake#L222-L225. So just added `LLVM_ENABLE_LIBEDIT` 2. Wasn't sure if there was a reason for this but `libccalllazy*` had mismatched SONAME: ```console ❯ objdump -p lib/julia/libccalllazy* | rg '\.so' lib/julia/libccalllazybar.so: file format elf64-x86-64 NEEDED ccalllazyfoo.so SONAME ccalllazybar.so lib/julia/libccalllazyfoo.so: file format elf64-x86-64 SONAME ccalllazyfoo.so ``` Modifying this, but can drop if intentional. --- [^1]: https://github.com/Homebrew/homebrew-core/pull/192116 --- deps/llvm.mk | 2 +- src/Makefile | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/deps/llvm.mk b/deps/llvm.mk index 73697069a4fac..3f4bc3e6746f0 100644 --- a/deps/llvm.mk +++ b/deps/llvm.mk @@ -102,7 +102,7 @@ endif LLVM_CMAKE += -DLLVM_TOOLS_INSTALL_DIR=$(call rel_path,$(build_prefix),$(build_depsbindir)) LLVM_CMAKE += -DLLVM_UTILS_INSTALL_DIR=$(call rel_path,$(build_prefix),$(build_depsbindir)) LLVM_CMAKE += -DLLVM_INCLUDE_UTILS=ON -DLLVM_INSTALL_UTILS=ON -LLVM_CMAKE += -DLLVM_BINDINGS_LIST="" -DLLVM_ENABLE_BINDINGS=OFF -DLLVM_INCLUDE_DOCS=Off -DLLVM_ENABLE_TERMINFO=Off -DHAVE_LIBEDIT=Off +LLVM_CMAKE += -DLLVM_BINDINGS_LIST="" -DLLVM_ENABLE_BINDINGS=OFF -DLLVM_INCLUDE_DOCS=Off -DLLVM_ENABLE_TERMINFO=Off -DHAVE_LIBEDIT=Off -DLLVM_ENABLE_LIBEDIT=OFF ifeq ($(LLVM_ASSERTIONS), 1) LLVM_CMAKE += -DLLVM_ENABLE_ASSERTIONS:BOOL=ON endif # LLVM_ASSERTIONS diff --git a/src/Makefile b/src/Makefile index 52e673aa6cc1a..a6b1f433b73ce 100644 --- a/src/Makefile +++ b/src/Makefile @@ -287,10 +287,10 @@ endif $(INSTALL_NAME_CMD)libccalltest.$(SHLIB_EXT) $@ $(build_shlibdir)/libccalllazyfoo.$(SHLIB_EXT): $(SRCDIR)/ccalllazyfoo.c - @$(call PRINT_CC, $(CC) $(JCFLAGS) $(JL_CFLAGS) $(JCPPFLAGS) $(FLAGS) -O3 $< $(fPIC) -shared -o $@ $(LDFLAGS) $(COMMON_LIBPATHS) $(call SONAME_FLAGS,ccalllazyfoo.$(SHLIB_EXT))) + @$(call PRINT_CC, $(CC) $(JCFLAGS) $(JL_CFLAGS) $(JCPPFLAGS) $(FLAGS) -O3 $< $(fPIC) -shared -o $@ $(LDFLAGS) $(COMMON_LIBPATHS) $(call SONAME_FLAGS,libccalllazyfoo.$(SHLIB_EXT))) $(build_shlibdir)/libccalllazybar.$(SHLIB_EXT): $(SRCDIR)/ccalllazybar.c $(build_shlibdir)/libccalllazyfoo.$(SHLIB_EXT) - @$(call PRINT_CC, $(CC) $(JCFLAGS) $(JL_CFLAGS) $(JCPPFLAGS) $(FLAGS) -O3 $< $(fPIC) -shared -o $@ $(LDFLAGS) $(COMMON_LIBPATHS) $(call SONAME_FLAGS,ccalllazybar.$(SHLIB_EXT)) -lccalllazyfoo) + @$(call PRINT_CC, $(CC) $(JCFLAGS) $(JL_CFLAGS) $(JCPPFLAGS) $(FLAGS) -O3 $< $(fPIC) -shared -o $@ $(LDFLAGS) $(COMMON_LIBPATHS) $(call SONAME_FLAGS,libccalllazybar.$(SHLIB_EXT)) -lccalllazyfoo) $(build_shlibdir)/libllvmcalltest.$(SHLIB_EXT): $(SRCDIR)/llvmcalltest.cpp $(LLVM_CONFIG_ABSOLUTE) @$(call PRINT_CC, $(CXX) $(LLVM_CXXFLAGS) $(FLAGS) $(CPPFLAGS) $(CXXFLAGS) -O3 $< $(fPIC) -shared -o $@ $(LDFLAGS) $(COMMON_LIBPATHS) $(NO_WHOLE_ARCHIVE) $(CG_LLVMLINK)) -lpthread From 234baad6c4406819af9778c1d4f753cd15f149a3 Mon Sep 17 00:00:00 2001 From: "David K. Zhang" Date: Thu, 3 Oct 2024 13:26:45 +0000 Subject: [PATCH 39/45] Add missing `copy!(::AbstractMatrix, ::UniformScaling)` method (#55970) Hi everyone! First PR to Julia here. It was noticed in a Slack thread yesterday that `copy!(A, I)` doesn't work, but `copyto!(A, I)` does. This PR adds the missing method for `copy!(::AbstractMatrix, ::UniformScaling)`, which simply defers to `copyto!`, and corresponding tests. I added a `compat` notice for Julia 1.12. --------- Co-authored-by: Lilith Orion Hafner --- stdlib/LinearAlgebra/src/uniformscaling.jl | 10 ++++++++++ stdlib/LinearAlgebra/test/uniformscaling.jl | 7 +++++++ 2 files changed, 17 insertions(+) diff --git a/stdlib/LinearAlgebra/src/uniformscaling.jl b/stdlib/LinearAlgebra/src/uniformscaling.jl index b75886b8d99fb..472ea53078f87 100644 --- a/stdlib/LinearAlgebra/src/uniformscaling.jl +++ b/stdlib/LinearAlgebra/src/uniformscaling.jl @@ -403,6 +403,16 @@ function copyto!(A::Tridiagonal, J::UniformScaling) return A end +""" + copy!(dest::AbstractMatrix, src::UniformScaling) + +Copies a [`UniformScaling`](@ref) onto a matrix. + +!!! compat "Julia 1.12" + This method is available as of Julia 1.12. +""" +Base.copy!(A::AbstractMatrix, J::UniformScaling) = copyto!(A, J) + function cond(J::UniformScaling{T}) where T onereal = inv(one(real(J.λ))) return J.λ ≠ zero(T) ? onereal : oftype(onereal, Inf) diff --git a/stdlib/LinearAlgebra/test/uniformscaling.jl b/stdlib/LinearAlgebra/test/uniformscaling.jl index 92547e8648d8a..d335cd6f63521 100644 --- a/stdlib/LinearAlgebra/test/uniformscaling.jl +++ b/stdlib/LinearAlgebra/test/uniformscaling.jl @@ -226,6 +226,13 @@ let @test copyto!(B, J) == [λ zero(λ)] end + @testset "copy!" begin + A = Matrix{Int}(undef, (3,3)) + @test copy!(A, I) == one(A) + B = Matrix{ComplexF64}(undef, (1,2)) + @test copy!(B, J) == [λ zero(λ)] + end + @testset "binary ops with vectors" begin v = complex.(randn(3), randn(3)) # As shown in #20423@GitHub, vector acts like x1 matrix when participating in linear algebra From be401635fe02b28ce994e2e3cae0733d101f8927 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Thu, 3 Oct 2024 08:28:32 -0500 Subject: [PATCH 40/45] Add forward progress update to NEWS.md (#54089) Closes #40009 which was left open because of the needs news tag. --------- Co-authored-by: Ian Butterworth --- NEWS.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/NEWS.md b/NEWS.md index cc1bbc7449e5d..fb1fcf381cc7f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -37,6 +37,10 @@ Language changes expression within a given `:toplevel` expression to make use of macros defined earlier in the same `:toplevel` expression. ([#53515]) + - Trivial infinite loops (like `while true; end`) are no longer undefined + behavior. Infinite loops that actually do things (e.g. have side effects + or sleep) were never and are still not undefined behavior. ([#52999]) + Compiler/Runtime improvements ----------------------------- From 6b9719f767d98fa7d6e0d86adf0e204ed226f90e Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Thu, 3 Oct 2024 11:22:29 -0400 Subject: [PATCH 41/45] Fix an intermittent test failure in `core` test (#55973) The test wants to assert that `Module` is not resolved in `Main`, but other tests do resolve this identifier, so the test can fail depending on test order (and I've been seeing such failures on CI recently). Fix that by running the test in a fresh subprocess. --- test/core.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/core.jl b/test/core.jl index 62fde5261bfd3..b27832209a835 100644 --- a/test/core.jl +++ b/test/core.jl @@ -1183,7 +1183,7 @@ end # Make sure that `Module` is not resolved to `Core.Module` during sysimg generation # so that users can define their own binding named `Module` in Main. -@test !Base.isbindingresolved(Main, :Module) +@test success(`$(Base.julia_cmd()) -e '@assert !Base.isbindingresolved(Main, :Module)'`) # Module() constructor @test names(Module(:anonymous), all = true, imported = true) == [:anonymous] From 42737f79e02bbaf444a9d93e6668b3c55cdb8a6e Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Thu, 3 Oct 2024 16:23:21 -0400 Subject: [PATCH 42/45] fix comma logic in time_print (#55977) Minor formatting fix --- base/timing.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/timing.jl b/base/timing.jl index 6d97d70d2f04c..9686c5b33bccd 100644 --- a/base/timing.jl +++ b/base/timing.jl @@ -206,7 +206,7 @@ function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, lock_confl print(io, length(timestr) < 10 ? (" "^(10 - length(timestr))) : "") end print(io, timestr, " seconds") - parens = bytes != 0 || allocs != 0 || gctime > 0 || compile_time > 0 + parens = bytes != 0 || allocs != 0 || gctime > 0 || compile_time > 0 || lock_conflicts > 0 parens && print(io, " (") if bytes != 0 || allocs != 0 allocs, ma = prettyprint_getunits(allocs, length(_cnt_units), Int64(1000)) @@ -228,7 +228,7 @@ function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, lock_confl print(io, ", ", lock_conflicts, " lock conflict$plural") end if compile_time > 0 - if bytes != 0 || allocs != 0 || gctime > 0 + if bytes != 0 || allocs != 0 || gctime > 0 || lock_conflicts > 0 print(io, ", ") end print(io, Ryu.writefixed(Float64(100*compile_time/elapsedtime), 2), "% compilation time") From b6b5528da1ea8f322b80247ee4c6c3e65b5a236e Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Fri, 4 Oct 2024 06:01:42 +0900 Subject: [PATCH 43/45] optimizer: fix up the inlining algorithm to use correct `nargs`/`isva` (#55976) It appears that inlining.jl was not updated in JuliaLang/julia#54341. Specifically, using `nargs`/`isva` from `mi.def::Method` in `ir_prepare_inlining!` causes the following error to occur: ```julia function generate_lambda_ex(world::UInt, source::LineNumberNode, argnames, spnames, @nospecialize body) stub = Core.GeneratedFunctionStub(identity, Core.svec(argnames...), Core.svec(spnames...)) return stub(world, source, body) end function overdubbee54341(a, b) return a + b end const overdubee_codeinfo54341 = code_lowered(overdubbee54341, Tuple{Any, Any})[1] function overdub_generator54341(world::UInt, source::LineNumberNode, selftype, fargtypes) if length(fargtypes) != 2 return generate_lambda_ex(world, source, (:overdub54341, :args), (), :(error("Wrong number of arguments"))) else return copy(overdubee_codeinfo54341) end end @eval function overdub54341(args...) $(Expr(:meta, :generated, overdub_generator54341)) $(Expr(:meta, :generated_only)) end topfunc(x) = overdub54341(x, 2) ``` ```julia julia> topfunc(1) Internal error: during type inference of topfunc(Int64) Encountered unexpected error in runtime: BoundsError(a=Array{Any, 1}(dims=(2,), mem=Memory{Any}(8, 0x10632e780)[SSAValue(2), SSAValue(3), #, #, #, #, #, #]), i=(3,)) throw_boundserror at ./essentials.jl:14 getindex at ./essentials.jl:909 [inlined] ssa_substitute_op! at ./compiler/ssair/inlining.jl:1798 ssa_substitute_op! at ./compiler/ssair/inlining.jl:1852 ir_inline_item! at ./compiler/ssair/inlining.jl:386 ... ``` This commit updates the abstract interpretation and inlining algorithm to use the `nargs`/`isva` values held by `CodeInfo`. Similar modifications have also been made to EscapeAnalysis.jl. @nanosoldier `runbenchmarks("inference", vs=":master")` --- base/compiler/abstractinterpretation.jl | 2 +- base/compiler/inferencestate.jl | 25 +++++---- base/compiler/optimize.jl | 14 ++--- base/compiler/ssair/inlining.jl | 68 ++++++++++++++----------- base/compiler/ssair/passes.jl | 4 +- base/compiler/stmtinfo.jl | 1 + base/compiler/typeinfer.jl | 2 +- base/compiler/types.jl | 7 ++- test/compiler/EscapeAnalysis/EAUtils.jl | 11 ++-- test/staged.jl | 14 ++--- 10 files changed, 81 insertions(+), 67 deletions(-) diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index 96355f2a6b5dd..c8a25be422637 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -1282,7 +1282,7 @@ function semi_concrete_eval_call(interp::AbstractInterpreter, effects = Effects(effects; noub=ALWAYS_TRUE) end exct = refine_exception_type(result.exct, effects) - return ConstCallResults(rt, exct, SemiConcreteResult(mi, ir, effects), effects, mi) + return ConstCallResults(rt, exct, SemiConcreteResult(mi, ir, effects, spec_info(irsv)), effects, mi) end end end diff --git a/base/compiler/inferencestate.jl b/base/compiler/inferencestate.jl index 05d95d1d5bdc7..5f8fb82caaa34 100644 --- a/base/compiler/inferencestate.jl +++ b/base/compiler/inferencestate.jl @@ -236,7 +236,7 @@ mutable struct InferenceState slottypes::Vector{Any} src::CodeInfo cfg::CFG - method_info::MethodInfo + spec_info::SpecInfo #= intermediate states for local abstract interpretation =# currbb::Int @@ -294,7 +294,7 @@ mutable struct InferenceState sptypes = sptypes_from_meth_instance(mi) code = src.code::Vector{Any} cfg = compute_basic_blocks(code) - method_info = MethodInfo(src) + spec_info = SpecInfo(src) currbb = currpc = 1 ip = BitSet(1) # TODO BitSetBoundedMinPrioritySet(1) @@ -351,7 +351,7 @@ mutable struct InferenceState restrict_abstract_call_sites = isa(def, Module) this = new( - mi, world, mod, sptypes, slottypes, src, cfg, method_info, + mi, world, mod, sptypes, slottypes, src, cfg, spec_info, currbb, currpc, ip, handler_info, ssavalue_uses, bb_vartables, ssavaluetypes, stmt_edges, stmt_info, tasks, pclimitations, limitations, cycle_backedges, callstack, 0, 0, 0, result, unreachable, valid_worlds, bestguess, exc_bestguess, ipo_effects, @@ -791,7 +791,7 @@ end # TODO add `result::InferenceResult` and put the irinterp result into the inference cache? mutable struct IRInterpretationState - const method_info::MethodInfo + const spec_info::SpecInfo const ir::IRCode const mi::MethodInstance const world::UInt @@ -809,7 +809,7 @@ mutable struct IRInterpretationState parentid::Int function IRInterpretationState(interp::AbstractInterpreter, - method_info::MethodInfo, ir::IRCode, mi::MethodInstance, argtypes::Vector{Any}, + spec_info::SpecInfo, ir::IRCode, mi::MethodInstance, argtypes::Vector{Any}, world::UInt, min_world::UInt, max_world::UInt) curridx = 1 given_argtypes = Vector{Any}(undef, length(argtypes)) @@ -831,7 +831,7 @@ mutable struct IRInterpretationState tasks = WorkThunk[] edges = Any[] callstack = AbsIntState[] - return new(method_info, ir, mi, world, curridx, argtypes_refined, ir.sptypes, tpdum, + return new(spec_info, ir, mi, world, curridx, argtypes_refined, ir.sptypes, tpdum, ssa_refined, lazyreachability, valid_worlds, tasks, edges, callstack, 0, 0) end end @@ -845,14 +845,13 @@ function IRInterpretationState(interp::AbstractInterpreter, else isa(src, CodeInfo) || return nothing end - method_info = MethodInfo(src) + spec_info = SpecInfo(src) ir = inflate_ir(src, mi) argtypes = va_process_argtypes(optimizer_lattice(interp), argtypes, src.nargs, src.isva) - return IRInterpretationState(interp, method_info, ir, mi, argtypes, world, + return IRInterpretationState(interp, spec_info, ir, mi, argtypes, world, codeinst.min_world, codeinst.max_world) end - # AbsIntState # =========== @@ -927,11 +926,11 @@ is_constproped(::IRInterpretationState) = true is_cached(sv::InferenceState) = !iszero(sv.cache_mode & CACHE_MODE_GLOBAL) is_cached(::IRInterpretationState) = false -method_info(sv::InferenceState) = sv.method_info -method_info(sv::IRInterpretationState) = sv.method_info +spec_info(sv::InferenceState) = sv.spec_info +spec_info(sv::IRInterpretationState) = sv.spec_info -propagate_inbounds(sv::AbsIntState) = method_info(sv).propagate_inbounds -method_for_inference_limit_heuristics(sv::AbsIntState) = method_info(sv).method_for_inference_limit_heuristics +propagate_inbounds(sv::AbsIntState) = spec_info(sv).propagate_inbounds +method_for_inference_limit_heuristics(sv::AbsIntState) = spec_info(sv).method_for_inference_limit_heuristics frame_world(sv::InferenceState) = sv.world frame_world(sv::IRInterpretationState) = sv.world diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl index 1971b47323f5d..02f6b46e2e73f 100644 --- a/base/compiler/optimize.jl +++ b/base/compiler/optimize.jl @@ -644,10 +644,10 @@ function ((; code_cache)::GetNativeEscapeCache)(mi::MethodInstance) return false end -function refine_effects!(interp::AbstractInterpreter, sv::PostOptAnalysisState) +function refine_effects!(interp::AbstractInterpreter, opt::OptimizationState, sv::PostOptAnalysisState) if !is_effect_free(sv.result.ipo_effects) && sv.all_effect_free && !isempty(sv.ea_analysis_pending) ir = sv.ir - nargs = let def = sv.result.linfo.def; isa(def, Method) ? Int(def.nargs) : 0; end + nargs = Int(opt.src.nargs) estate = EscapeAnalysis.analyze_escapes(ir, nargs, optimizer_lattice(interp), GetNativeEscapeCache(interp)) argescapes = EscapeAnalysis.ArgEscapeCache(estate) stack_analysis_result!(sv.result, argescapes) @@ -939,7 +939,8 @@ function check_inconsistentcy!(sv::PostOptAnalysisState, scanner::BBScanner) end end -function ipo_dataflow_analysis!(interp::AbstractInterpreter, ir::IRCode, result::InferenceResult) +function ipo_dataflow_analysis!(interp::AbstractInterpreter, opt::OptimizationState, + ir::IRCode, result::InferenceResult) if !is_ipo_dataflow_analysis_profitable(result.ipo_effects) return false end @@ -967,13 +968,13 @@ function ipo_dataflow_analysis!(interp::AbstractInterpreter, ir::IRCode, result: end end - return refine_effects!(interp, sv) + return refine_effects!(interp, opt, sv) end # run the optimization work function optimize(interp::AbstractInterpreter, opt::OptimizationState, caller::InferenceResult) - @timeit "optimizer" ir = run_passes_ipo_safe(opt.src, opt, caller) - ipo_dataflow_analysis!(interp, ir, caller) + @timeit "optimizer" ir = run_passes_ipo_safe(opt.src, opt) + ipo_dataflow_analysis!(interp, opt, ir, caller) return finish(interp, opt, ir, caller) end @@ -995,7 +996,6 @@ matchpass(::Nothing, _, _) = false function run_passes_ipo_safe( ci::CodeInfo, sv::OptimizationState, - caller::InferenceResult, optimize_until = nothing, # run all passes by default ) __stage__ = 0 # used by @pass diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl index 9f250b156cd2f..5017b619469ff 100644 --- a/base/compiler/ssair/inlining.jl +++ b/base/compiler/ssair/inlining.jl @@ -12,6 +12,8 @@ struct InliningTodo mi::MethodInstance # The IR of the inlinee ir::IRCode + # The SpecInfo for the inlinee + spec_info::SpecInfo # The DebugInfo table for the inlinee di::DebugInfo # If the function being inlined is a single basic block we can use a @@ -20,8 +22,8 @@ struct InliningTodo # Effects of the call statement effects::Effects end -function InliningTodo(mi::MethodInstance, (ir, di)::Tuple{IRCode, DebugInfo}, effects::Effects) - return InliningTodo(mi, ir, di, linear_inline_eligible(ir), effects) +function InliningTodo(mi::MethodInstance, ir::IRCode, spec_info::SpecInfo, di::DebugInfo, effects::Effects) + return InliningTodo(mi, ir, spec_info, di, linear_inline_eligible(ir), effects) end struct ConstantCase @@ -321,7 +323,8 @@ function ir_inline_linetable!(debuginfo::DebugInfoStream, inlinee_debuginfo::Deb end function ir_prepare_inlining!(insert_node!::Inserter, inline_target::Union{IRCode, IncrementalCompact}, - ir::IRCode, di::DebugInfo, mi::MethodInstance, inlined_at::NTuple{3,Int32}, argexprs::Vector{Any}) + ir::IRCode, spec_info::SpecInfo, di::DebugInfo, mi::MethodInstance, + inlined_at::NTuple{3,Int32}, argexprs::Vector{Any}) def = mi.def::Method debuginfo = inline_target isa IRCode ? inline_target.debuginfo : inline_target.ir.debuginfo topline = new_inlined_at = ir_inline_linetable!(debuginfo, di, inlined_at) @@ -334,8 +337,8 @@ function ir_prepare_inlining!(insert_node!::Inserter, inline_target::Union{IRCod spvals_ssa = insert_node!( removable_if_unused(NewInstruction(Expr(:call, Core._compute_sparams, def, argexprs...), SimpleVector, topline))) end - if def.isva - nargs_def = Int(def.nargs::Int32) + if spec_info.isva + nargs_def = spec_info.nargs if nargs_def > 0 argexprs = fix_va_argexprs!(insert_node!, inline_target, argexprs, nargs_def, topline) end @@ -362,7 +365,7 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector item::InliningTodo, boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}}) # Ok, do the inlining here inlined_at = compact.result[idx][:line] - ssa_substitute = ir_prepare_inlining!(InsertHere(compact), compact, item.ir, item.di, item.mi, inlined_at, argexprs) + ssa_substitute = ir_prepare_inlining!(InsertHere(compact), compact, item.ir, item.spec_info, item.di, item.mi, inlined_at, argexprs) boundscheck = has_flag(compact.result[idx], IR_FLAG_INBOUNDS) ? :off : boundscheck # If the iterator already moved on to the next basic block, @@ -860,15 +863,14 @@ function resolve_todo(mi::MethodInstance, result::Union{Nothing,InferenceResult, if inferred_result isa ConstantCase add_inlining_backedge!(et, mi) return inferred_result - end - if inferred_result isa InferredResult + elseif inferred_result isa InferredResult (; src, effects) = inferred_result elseif inferred_result isa CodeInstance src = @atomic :monotonic inferred_result.inferred effects = decode_effects(inferred_result.ipo_purity_bits) - else - src = nothing - effects = Effects() + else # there is no cached source available, bail out + return compileable_specialization(mi, Effects(), et, info; + compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes) end # the duplicated check might have been done already within `analyze_method!`, but still @@ -883,9 +885,12 @@ function resolve_todo(mi::MethodInstance, result::Union{Nothing,InferenceResult, compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes) add_inlining_backedge!(et, mi) - ir = inferred_result isa CodeInstance ? retrieve_ir_for_inlining(inferred_result, src) : - retrieve_ir_for_inlining(mi, src, preserve_local_sources) - return InliningTodo(mi, ir, effects) + if inferred_result isa CodeInstance + ir, spec_info, debuginfo = retrieve_ir_for_inlining(inferred_result, src) + else + ir, spec_info, debuginfo = retrieve_ir_for_inlining(mi, src, preserve_local_sources) + end + return InliningTodo(mi, ir, spec_info, debuginfo, effects) end # the special resolver for :invoke-d call @@ -901,23 +906,17 @@ function resolve_todo(mi::MethodInstance, @nospecialize(info::CallInfo), flag::U if cached_result isa ConstantCase add_inlining_backedge!(et, mi) return cached_result - end - if cached_result isa InferredResult - (; src, effects) = cached_result elseif cached_result isa CodeInstance src = @atomic :monotonic cached_result.inferred effects = decode_effects(cached_result.ipo_purity_bits) - else - src = nothing - effects = Effects() + else # there is no cached source available, bail out + return nothing end - preserve_local_sources = true src_inlining_policy(state.interp, src, info, flag) || return nothing - ir = cached_result isa CodeInstance ? retrieve_ir_for_inlining(cached_result, src) : - retrieve_ir_for_inlining(mi, src, preserve_local_sources) + ir, spec_info, debuginfo = retrieve_ir_for_inlining(cached_result, src) add_inlining_backedge!(et, mi) - return InliningTodo(mi, ir, effects) + return InliningTodo(mi, ir, spec_info, debuginfo, effects) end function validate_sparams(sparams::SimpleVector) @@ -971,22 +970,29 @@ function analyze_method!(match::MethodMatch, argtypes::Vector{Any}, return resolve_todo(mi, volatile_inf_result, info, flag, state; invokesig) end -function retrieve_ir_for_inlining(cached_result::CodeInstance, src::MaybeCompressed) - src = _uncompressed_ir(cached_result, src)::CodeInfo - return inflate_ir!(src, cached_result.def), src.debuginfo +function retrieve_ir_for_inlining(cached_result::CodeInstance, src::String) + src = _uncompressed_ir(cached_result, src) + return inflate_ir!(src, cached_result.def), SpecInfo(src), src.debuginfo +end +function retrieve_ir_for_inlining(cached_result::CodeInstance, src::CodeInfo) + return inflate_ir!(copy(src), cached_result.def), SpecInfo(src), src.debuginfo end function retrieve_ir_for_inlining(mi::MethodInstance, src::CodeInfo, preserve_local_sources::Bool) if preserve_local_sources src = copy(src) end - return inflate_ir!(src, mi), src.debuginfo + return inflate_ir!(src, mi), SpecInfo(src), src.debuginfo end function retrieve_ir_for_inlining(mi::MethodInstance, ir::IRCode, preserve_local_sources::Bool) if preserve_local_sources ir = copy(ir) end + # COMBAK this is not correct, we should make `InferenceResult` propagate `SpecInfo` + spec_info = let m = mi.def::Method + SpecInfo(Int(m.nargs), m.isva, false, nothing) + end ir.debuginfo.def = mi - return ir, DebugInfo(ir.debuginfo, length(ir.stmts)) + return ir, spec_info, DebugInfo(ir.debuginfo, length(ir.stmts)) end function handle_single_case!(todo::Vector{Pair{Int,Any}}, @@ -1466,8 +1472,8 @@ function semiconcrete_result_item(result::SemiConcreteResult, add_inlining_backedge!(et, mi) preserve_local_sources = OptimizationParams(state.interp).preserve_local_sources - ir = retrieve_ir_for_inlining(mi, result.ir, preserve_local_sources) - return InliningTodo(mi, ir, result.effects) + ir, _, debuginfo = retrieve_ir_for_inlining(mi, result.ir, preserve_local_sources) + return InliningTodo(mi, ir, result.spec_info, debuginfo, result.effects) end function handle_semi_concrete_result!(cases::Vector{InliningCase}, result::SemiConcreteResult, diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index 3981f7382d707..e227249b48598 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -1532,7 +1532,7 @@ function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int, end src_inlining_policy(inlining.interp, src, info, IR_FLAG_NULL) || return false - src, di = retrieve_ir_for_inlining(code, src) + src, spec_info, di = retrieve_ir_for_inlining(code, src) # For now: Require finalizer to only have one basic block length(src.cfg.blocks) == 1 || return false @@ -1542,7 +1542,7 @@ function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int, # TODO: Should there be a special line number node for inlined finalizers? inline_at = ir[SSAValue(idx)][:line] - ssa_substitute = ir_prepare_inlining!(InsertBefore(ir, SSAValue(idx)), ir, src, di, mi, inline_at, argexprs) + ssa_substitute = ir_prepare_inlining!(InsertBefore(ir, SSAValue(idx)), ir, src, spec_info, di, mi, inline_at, argexprs) # TODO: Use the actual inliner here rather than open coding this special purpose inliner. ssa_rename = Vector{Any}(undef, length(src.stmts)) diff --git a/base/compiler/stmtinfo.jl b/base/compiler/stmtinfo.jl index ac5ffbdd5d76d..9dba7a4459f9e 100644 --- a/base/compiler/stmtinfo.jl +++ b/base/compiler/stmtinfo.jl @@ -94,6 +94,7 @@ struct SemiConcreteResult <: ConstResult mi::MethodInstance ir::IRCode effects::Effects + spec_info::SpecInfo end # XXX Technically this does not represent a result of constant inference, but rather that of diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl index 77a2e02129ce4..8b85f7c6f35f1 100644 --- a/base/compiler/typeinfer.jl +++ b/base/compiler/typeinfer.jl @@ -941,7 +941,7 @@ function typeinf_ircode(interp::AbstractInterpreter, mi::MethodInstance, end (; result) = frame opt = OptimizationState(frame, interp) - ir = run_passes_ipo_safe(opt.src, opt, result, optimize_until) + ir = run_passes_ipo_safe(opt.src, opt, optimize_until) rt = widenconst(ignorelimited(result.result)) return ir, rt end diff --git a/base/compiler/types.jl b/base/compiler/types.jl index c51785f23ea29..ecf2417fd6199 100644 --- a/base/compiler/types.jl +++ b/base/compiler/types.jl @@ -41,11 +41,14 @@ struct StmtInfo used::Bool end -struct MethodInfo +struct SpecInfo + nargs::Int + isva::Bool propagate_inbounds::Bool method_for_inference_limit_heuristics::Union{Nothing,Method} end -MethodInfo(src::CodeInfo) = MethodInfo( +SpecInfo(src::CodeInfo) = SpecInfo( + Int(src.nargs), src.isva, src.propagate_inbounds, src.method_for_inference_limit_heuristics::Union{Nothing,Method}) diff --git a/test/compiler/EscapeAnalysis/EAUtils.jl b/test/compiler/EscapeAnalysis/EAUtils.jl index 188ec93ebc5be..b8ad4589db626 100644 --- a/test/compiler/EscapeAnalysis/EAUtils.jl +++ b/test/compiler/EscapeAnalysis/EAUtils.jl @@ -116,12 +116,14 @@ CC.get_inference_world(interp::EscapeAnalyzer) = interp.world CC.get_inference_cache(interp::EscapeAnalyzer) = interp.inf_cache CC.cache_owner(::EscapeAnalyzer) = EAToken() -function CC.ipo_dataflow_analysis!(interp::EscapeAnalyzer, ir::IRCode, caller::InferenceResult) +function CC.ipo_dataflow_analysis!(interp::EscapeAnalyzer, opt::OptimizationState, + ir::IRCode, caller::InferenceResult) # run EA on all frames that have been optimized - nargs = let def = caller.linfo.def; isa(def, Method) ? Int(def.nargs) : 0; end + nargs = Int(opt.src.nargs) + 𝕃ₒ = CC.optimizer_lattice(interp) get_escape_cache = GetEscapeCache(interp) estate = try - analyze_escapes(ir, nargs, CC.optimizer_lattice(interp), get_escape_cache) + analyze_escapes(ir, nargs, 𝕃ₒ, get_escape_cache) catch err @error "error happened within EA, inspect `Main.failed_escapeanalysis`" Main.failed_escapeanalysis = FailedAnalysis(ir, nargs, get_escape_cache) @@ -133,7 +135,8 @@ function CC.ipo_dataflow_analysis!(interp::EscapeAnalyzer, ir::IRCode, caller::I end record_escapes!(interp, caller, estate, ir) - @invoke CC.ipo_dataflow_analysis!(interp::AbstractInterpreter, ir::IRCode, caller::InferenceResult) + @invoke CC.ipo_dataflow_analysis!(interp::AbstractInterpreter, opt::OptimizationState, + ir::IRCode, caller::InferenceResult) end function record_escapes!(interp::EscapeAnalyzer, diff --git a/test/staged.jl b/test/staged.jl index 0112dd73b45f7..1b28144639f97 100644 --- a/test/staged.jl +++ b/test/staged.jl @@ -382,9 +382,8 @@ let end function generate_lambda_ex(world::UInt, source::LineNumberNode, - argnames::Core.SimpleVector, spnames::Core.SimpleVector, - body::Expr) - stub = Core.GeneratedFunctionStub(identity, argnames, spnames) + argnames, spnames, @nospecialize body) + stub = Core.GeneratedFunctionStub(identity, Core.svec(argnames...), Core.svec(spnames...)) return stub(world, source, body) end @@ -392,7 +391,7 @@ end struct Generator54916 <: Core.CachedGenerator end function (::Generator54916)(world::UInt, source::LineNumberNode, args...) return generate_lambda_ex(world, source, - Core.svec(:doit54916, :func, :arg), Core.svec(), :(func(arg))) + (:doit54916, :func, :arg), (), :(func(arg))) end @eval function doit54916(func, arg) $(Expr(:meta, :generated, Generator54916())) @@ -420,7 +419,7 @@ function generator49715(world, source, self, f, tt) mi = Base._which(sig; world) error("oh no") return generate_lambda_ex(world, source, - Core.svec(:doit49715, :f, :tt), Core.svec(), :(nothing)) + (:doit49715, :f, :tt), (), nothing) end @eval function doit49715(f, tt) $(Expr(:meta, :generated, generator49715)) @@ -436,7 +435,7 @@ const overdubee_codeinfo54341 = code_lowered(overdubbee54341, Tuple{Any, Any})[1 function overdub_generator54341(world::UInt, source::LineNumberNode, selftype, fargtypes) if length(fargtypes) != 2 return generate_lambda_ex(world, source, - Core.svec(:overdub54341, :args), Core.svec(), :(error("Wrong number of arguments"))) + (:overdub54341, :args), (), :(error("Wrong number of arguments"))) else return copy(overdubee_codeinfo54341) end @@ -446,4 +445,7 @@ end $(Expr(:meta, :generated_only)) end @test overdub54341(1, 2) == 3 +# check if the inlining pass handles `nargs`/`isva` correctly +@test first(only(code_typed((Int,Int)) do x, y; @inline overdub54341(x, y); end)) isa Core.CodeInfo +@test first(only(code_typed((Int,)) do x; @inline overdub54341(x, 1); end)) isa Core.CodeInfo @test_throws "Wrong number of arguments" overdub54341(1, 2, 3) From b9d9b69165493f6fc03870d975be05c67f14a30b Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Fri, 4 Oct 2024 09:02:29 +0530 Subject: [PATCH 44/45] Add `.zed` directory to `.gitignore` (#55974) Similar to the `vscode` config directory, we may ignore the `zed` directory as well. --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 524a12d066c4d..80bdd67619454 100644 --- a/.gitignore +++ b/.gitignore @@ -34,6 +34,7 @@ .DS_Store .idea/* .vscode/* +.zed/* *.heapsnapshot .cache # Buildkite: Ignore the entire .buildkite directory From 636a35d83ca16d2077fc507701f41d50f409c7a5 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Fri, 4 Oct 2024 09:57:53 -0400 Subject: [PATCH 45/45] `@time` actually fix time report commas & add tests (#55982) https://github.com/JuliaLang/julia/pull/55977 looked simple but wasn't quite right because of a bad pattern in the lock conflicts report section. So fix and add tests. --- base/timing.jl | 7 +++++-- test/misc.jl | 9 +++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/base/timing.jl b/base/timing.jl index 9686c5b33bccd..b094aa230e1c2 100644 --- a/base/timing.jl +++ b/base/timing.jl @@ -206,7 +206,7 @@ function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, lock_confl print(io, length(timestr) < 10 ? (" "^(10 - length(timestr))) : "") end print(io, timestr, " seconds") - parens = bytes != 0 || allocs != 0 || gctime > 0 || compile_time > 0 || lock_conflicts > 0 + parens = bytes != 0 || allocs != 0 || gctime > 0 || lock_conflicts > 0 || compile_time > 0 parens && print(io, " (") if bytes != 0 || allocs != 0 allocs, ma = prettyprint_getunits(allocs, length(_cnt_units), Int64(1000)) @@ -224,8 +224,11 @@ function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, lock_confl print(io, Ryu.writefixed(Float64(100*gctime/elapsedtime), 2), "% gc time") end if lock_conflicts > 0 + if bytes != 0 || allocs != 0 || gctime > 0 + print(io, ", ") + end plural = lock_conflicts == 1 ? "" : "s" - print(io, ", ", lock_conflicts, " lock conflict$plural") + print(io, lock_conflicts, " lock conflict$plural") end if compile_time > 0 if bytes != 0 || allocs != 0 || gctime > 0 || lock_conflicts > 0 diff --git a/test/misc.jl b/test/misc.jl index 66b70956935cd..e089395ce4557 100644 --- a/test/misc.jl +++ b/test/misc.jl @@ -360,6 +360,15 @@ let foo() = 1 @test @timev foo() true end +# this is internal, but used for easy testing +@test sprint(Base.time_print, 1e9) == " 1.000000 seconds" +@test sprint(Base.time_print, 1e9, 111, 0, 222) == " 1.000000 seconds (222 allocations: 111 bytes)" +@test sprint(Base.time_print, 1e9, 111, 0.5e9, 222) == " 1.000000 seconds (222 allocations: 111 bytes, 50.00% gc time)" +@test sprint(Base.time_print, 1e9, 111, 0, 222, 333) == " 1.000000 seconds (222 allocations: 111 bytes, 333 lock conflicts)" +@test sprint(Base.time_print, 1e9, 0, 0, 0, 333) == " 1.000000 seconds (333 lock conflicts)" +@test sprint(Base.time_print, 1e9, 111, 0, 222, 333, 0.25e9) == " 1.000000 seconds (222 allocations: 111 bytes, 333 lock conflicts, 25.00% compilation time)" +@test sprint(Base.time_print, 1e9, 111, 0.5e9, 222, 333, 0.25e9, 0.175e9) == " 1.000000 seconds (222 allocations: 111 bytes, 50.00% gc time, 333 lock conflicts, 25.00% compilation time: 70% of which was recompilation)" + # @showtime @test @showtime true let foo() = true