From 05c8f3b40ab9ba090f304fffcd360120077f5365 Mon Sep 17 00:00:00 2001 From: Elliot Saba Date: Tue, 15 Nov 2022 09:36:14 -0800 Subject: [PATCH 01/57] Limit initial OpenBLAS thread count (#46844) * Limit initial OpenBLAS thread count We set OpenBLAS's initial thread count to `1` to prevent runaway allocation within OpenBLAS's initial thread startup. LinearAlgebra will later call `BLAS.set_num_threads()` to the actual value we require. * Support older names (cherry picked from commit 58b559f4a238faeeac03fbdec181ededd27053bc) --- stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl b/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl index f656621d957d6..c57dd15bb1930 100644 --- a/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl +++ b/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl @@ -37,6 +37,19 @@ function __init__() ENV["OPENBLAS_MAIN_FREE"] = "1" end + # Ensure that OpenBLAS does not grab a huge amount of memory at first, + # since it instantly allocates scratch buffer space for the number of + # threads it thinks it needs to use. + # X-ref: https://github.com/xianyi/OpenBLAS/blob/c43ec53bdd00d9423fc609d7b7ecb35e7bf41b85/README.md#setting-the-number-of-threads-using-environment-variables + # X-ref: https://github.com/JuliaLang/julia/issues/45434 + if !haskey(ENV, "OPENBLAS_NUM_THREADS") && + !haskey(ENV, "GOTO_NUM_THREADS") && + !haskey(ENV, "OMP_NUM_THREADS") + # We set this to `1` here, and then LinearAlgebra will update + # to the true value in its `__init__()` function. + ENV["OPENBLAS_NUM_THREADS"] = "1" + end + global libopenblas_handle = dlopen(libopenblas) global libopenblas_path = dlpath(libopenblas_handle) global artifact_dir = dirname(Sys.BINDIR) From 9606388dba7d3acd698e436ca63a52255fb4e484 Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Tue, 15 Nov 2022 19:01:25 -0500 Subject: [PATCH 02/57] fix #46778, precompile() for abstract but compileable signatures (#47259) (cherry picked from commit fe8113839eb7233a0af56737a581c659c9b88cea) --- contrib/generate_precompile.jl | 4 -- src/gf.c | 125 ++++++++++++++++++++++++++------- test/precompile.jl | 7 ++ 3 files changed, 107 insertions(+), 29 deletions(-) diff --git a/contrib/generate_precompile.jl b/contrib/generate_precompile.jl index 295b24d22e1c7..53ca9403463b3 100644 --- a/contrib/generate_precompile.jl +++ b/contrib/generate_precompile.jl @@ -401,10 +401,6 @@ function generate_precompile_statements() end # println(ps) ps = Core.eval(PrecompileStagingArea, ps) - # XXX: precompile doesn't currently handle overloaded nospecialize arguments very well. - # Skipping them avoids the warning. - ms = length(ps) == 1 ? Base._methods_by_ftype(ps[1], 1, Base.get_world_counter()) : Base.methods(ps...) - ms isa Vector || continue precompile(ps...) n_succeeded += 1 print("\rExecuting precompile statements... $n_succeeded/$(length(statements))") diff --git a/src/gf.c b/src/gf.c index 3574aa1fde256..191a9321ba805 100644 --- a/src/gf.c +++ b/src/gf.c @@ -2255,6 +2255,39 @@ JL_DLLEXPORT jl_value_t *jl_normalize_to_compilable_sig(jl_methtable_t *mt, jl_t return is_compileable ? (jl_value_t*)tt : jl_nothing; } +// return a MethodInstance for a compileable method_match +jl_method_instance_t *jl_method_match_to_mi(jl_method_match_t *match, size_t world, size_t min_valid, size_t max_valid, int mt_cache) +{ + jl_method_t *m = match->method; + jl_svec_t *env = match->sparams; + jl_tupletype_t *ti = match->spec_types; + jl_method_instance_t *mi = NULL; + if (jl_is_datatype(ti)) { + jl_methtable_t *mt = jl_method_get_table(m); + if ((jl_value_t*)mt != jl_nothing) { + // get the specialization without caching it + if (mt_cache && ((jl_datatype_t*)ti)->isdispatchtuple) { + // Since we also use this presence in the cache + // to trigger compilation when producing `.ji` files, + // inject it there now if we think it will be + // used via dispatch later (e.g. because it was hinted via a call to `precompile`) + JL_LOCK(&mt->writelock); + mi = cache_method(mt, &mt->cache, (jl_value_t*)mt, ti, m, world, min_valid, max_valid, env); + JL_UNLOCK(&mt->writelock); + } + else { + jl_value_t *tt = jl_normalize_to_compilable_sig(mt, ti, env, m); + JL_GC_PUSH1(&tt); + if (tt != jl_nothing) { + mi = jl_specializations_get_linfo(m, (jl_value_t*)tt, env); + } + JL_GC_POP(); + } + } + } + return mi; +} + // compile-time method lookup jl_method_instance_t *jl_get_specialization1(jl_tupletype_t *types JL_PROPAGATES_ROOT, size_t world, size_t *min_valid, size_t *max_valid, int mt_cache) { @@ -2274,36 +2307,78 @@ jl_method_instance_t *jl_get_specialization1(jl_tupletype_t *types JL_PROPAGATES *max_valid = max_valid2; if (matches == jl_false || jl_array_len(matches) != 1 || ambig) return NULL; - jl_value_t *tt = NULL; - JL_GC_PUSH2(&matches, &tt); + JL_GC_PUSH1(&matches); jl_method_match_t *match = (jl_method_match_t*)jl_array_ptr_ref(matches, 0); - jl_method_t *m = match->method; - jl_svec_t *env = match->sparams; - jl_tupletype_t *ti = match->spec_types; - jl_method_instance_t *nf = NULL; - if (jl_is_datatype(ti)) { - jl_methtable_t *mt = jl_method_get_table(m); - if ((jl_value_t*)mt != jl_nothing) { - // get the specialization without caching it - if (mt_cache && ((jl_datatype_t*)ti)->isdispatchtuple) { - // Since we also use this presence in the cache - // to trigger compilation when producing `.ji` files, - // inject it there now if we think it will be - // used via dispatch later (e.g. because it was hinted via a call to `precompile`) - JL_LOCK(&mt->writelock); - nf = cache_method(mt, &mt->cache, (jl_value_t*)mt, ti, m, world, min_valid2, max_valid2, env); - JL_UNLOCK(&mt->writelock); - } - else { - tt = jl_normalize_to_compilable_sig(mt, ti, env, m); - if (tt != jl_nothing) { - nf = jl_specializations_get_linfo(m, (jl_value_t*)tt, env); + jl_method_instance_t *mi = jl_method_match_to_mi(match, world, min_valid2, max_valid2, mt_cache); + JL_GC_POP(); + return mi; +} + +// Get a MethodInstance for a precompile() call. This uses a special kind of lookup that +// tries to find a method for which the requested signature is compileable. +jl_method_instance_t *jl_get_compile_hint_specialization(jl_tupletype_t *types JL_PROPAGATES_ROOT, size_t world, size_t *min_valid, size_t *max_valid, int mt_cache) +{ + if (jl_has_free_typevars((jl_value_t*)types)) + return NULL; // don't poison the cache due to a malformed query + if (!jl_has_concrete_subtype((jl_value_t*)types)) + return NULL; + + size_t min_valid2 = 1; + size_t max_valid2 = ~(size_t)0; + int ambig = 0; + jl_value_t *matches = jl_matching_methods(types, jl_nothing, -1, 0, world, &min_valid2, &max_valid2, &ambig); + if (*min_valid < min_valid2) + *min_valid = min_valid2; + if (*max_valid > max_valid2) + *max_valid = max_valid2; + size_t i, n = jl_array_len(matches); + if (n == 0) + return NULL; + JL_GC_PUSH1(&matches); + jl_method_match_t *match = NULL; + if (n == 1) { + match = (jl_method_match_t*)jl_array_ptr_ref(matches, 0); + } + else { + // first, select methods for which `types` is compileable + size_t count = 0; + for (i = 0; i < n; i++) { + jl_method_match_t *match1 = (jl_method_match_t*)jl_array_ptr_ref(matches, i); + if (jl_isa_compileable_sig(types, match1->method)) + jl_array_ptr_set(matches, count++, (jl_value_t*)match1); + } + jl_array_del_end((jl_array_t*)matches, n - count); + n = count; + // now remove methods that are more specific than others in the list. + // this is because the intent of precompiling e.g. f(::DataType) is to + // compile that exact method if it exists, and not lots of f(::Type{X}) methods + int exclude; + count = 0; + for (i = 0; i < n; i++) { + jl_method_match_t *match1 = (jl_method_match_t*)jl_array_ptr_ref(matches, i); + exclude = 0; + for (size_t j = n-1; j > i; j--) { // more general methods maybe more likely to be at end + jl_method_match_t *match2 = (jl_method_match_t*)jl_array_ptr_ref(matches, j); + if (jl_type_morespecific(match1->method->sig, match2->method->sig)) { + exclude = 1; + break; } } + if (!exclude) + jl_array_ptr_set(matches, count++, (jl_value_t*)match1); + if (count > 1) + break; } + // at this point if there are 0 matches left we found nothing, or if there are + // more than one the request is ambiguous and we ignore it. + if (count == 1) + match = (jl_method_match_t*)jl_array_ptr_ref(matches, 0); } + jl_method_instance_t *mi = NULL; + if (match != NULL) + mi = jl_method_match_to_mi(match, world, min_valid2, max_valid2, mt_cache); JL_GC_POP(); - return nf; + return mi; } static void _generate_from_hint(jl_method_instance_t *mi, size_t world) @@ -2370,7 +2445,7 @@ JL_DLLEXPORT int jl_compile_hint(jl_tupletype_t *types) size_t world = jl_atomic_load_acquire(&jl_world_counter); size_t min_valid = 0; size_t max_valid = ~(size_t)0; - jl_method_instance_t *mi = jl_get_specialization1(types, world, &min_valid, &max_valid, 1); + jl_method_instance_t *mi = jl_get_compile_hint_specialization(types, world, &min_valid, &max_valid, 1); if (mi == NULL) return 0; JL_GC_PROMISE_ROOTED(mi); diff --git a/test/precompile.jl b/test/precompile.jl index 098d1ffbba231..5b49ad4a3b31a 100644 --- a/test/precompile.jl +++ b/test/precompile.jl @@ -1556,3 +1556,10 @@ end empty!(Base.DEPOT_PATH) append!(Base.DEPOT_PATH, original_depot_path) + +@testset "issue 46778" begin + f46778(::Any, ::Type{Int}) = 1 + f46778(::Any, ::DataType) = 2 + @test precompile(Tuple{typeof(f46778), Int, DataType}) + @test which(f46778, Tuple{Any,DataType}).specializations[1].cache.invoke != C_NULL +end From 0e17295c1fc0796af91a6821fb28e189054f8eaa Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Wed, 16 Nov 2022 11:17:34 -0500 Subject: [PATCH 03/57] fix #47410, syntax error with anonfn inside `elseif` and short-circuit op (#47499) (cherry picked from commit 5f256e7d6bbe8248b58af5c1abbf8f8715522b0b) --- src/julia-syntax.scm | 3 +++ test/syntax.jl | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm index 49ba2fe728789..4a0407e019432 100644 --- a/src/julia-syntax.scm +++ b/src/julia-syntax.scm @@ -4404,6 +4404,9 @@ f(x) = yt(x) cnd))) (define (emit-cond cnd break-labels endl) (let* ((cnd (if (and (pair? cnd) (eq? (car cnd) 'block)) + (flatten-ex 'block cnd) + cnd)) + (cnd (if (and (pair? cnd) (eq? (car cnd) 'block)) (begin (if (length> cnd 2) (compile (butlast cnd) break-labels #f #f)) (last cnd)) cnd)) diff --git a/test/syntax.jl b/test/syntax.jl index 71c051040967c..cff8628290081 100644 --- a/test/syntax.jl +++ b/test/syntax.jl @@ -3422,3 +3422,10 @@ end # issue #46251 @test begin; global value = 1; (value, value += 1) end == (1, 2) @test begin; global value = 1; "($(value), $(value += 1))" end == "(1, 2)" + +# issue #47410 +# note `eval` is needed since this needs to be at the top level +@test eval(:(if false + elseif false || (()->true)() + 42 + end)) == 42 From 9dfd76b198c4f3dc05c741df5e436e959345fe8f Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Wed, 16 Nov 2022 16:17:00 -0500 Subject: [PATCH 04/57] ensure bindings handle write barriers for ty and globalref (#47580) This has probably been wrong for a long time (since being introduced in 79082468986). (cherry picked from commit b36951160ef0ba8c4641dd768cd7a1f5f570d0a9) --- src/builtins.c | 1 + src/gc.c | 11 ++++++++++- src/module.c | 2 +- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/builtins.c b/src/builtins.c index 1ef284dc1d17b..bf9f886d92ba8 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -1252,6 +1252,7 @@ JL_CALLABLE(jl_f_set_binding_type) jl_errorf("cannot set type for global %s. It already has a value or is already set to a different type.", jl_symbol_name(b->name)); } + jl_gc_wb_binding(b, ty); return jl_nothing; } diff --git a/src/gc.c b/src/gc.c index 2ae7f677cbb75..15e3883df81ca 100644 --- a/src/gc.c +++ b/src/gc.c @@ -3203,8 +3203,17 @@ static void jl_gc_queue_remset(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp jl_binding_t *ptr = (jl_binding_t*)items[i]; // A null pointer can happen here when the binding is cleaned up // as an exception is thrown after it was already queued (#10221) + int bnd_refyoung = 0; jl_value_t *v = jl_atomic_load_relaxed(&ptr->value); - if (v != NULL && gc_mark_queue_obj(gc_cache, sp, v)) { + if (v != NULL && gc_mark_queue_obj(gc_cache, sp, v)) + bnd_refyoung = 1; + jl_value_t *ty = jl_atomic_load_relaxed(&ptr->ty); + if (ty != NULL && gc_mark_queue_obj(gc_cache, sp, ty)) + bnd_refyoung = 1; + jl_value_t *globalref = jl_atomic_load_relaxed(&ptr->globalref); + if (globalref != NULL && gc_mark_queue_obj(gc_cache, sp, globalref)) + bnd_refyoung = 1; + if (bnd_refyoung) { items[n_bnd_refyoung] = ptr; n_bnd_refyoung++; } diff --git a/src/module.c b/src/module.c index 1e1bf4d52436e..0dc5e20d18b89 100644 --- a/src/module.c +++ b/src/module.c @@ -449,7 +449,7 @@ JL_DLLEXPORT jl_value_t *jl_module_globalref(jl_module_t *m, jl_sym_t *var) if (jl_atomic_cmpswap_relaxed(&b->globalref, &globalref, newref)) { JL_GC_PROMISE_ROOTED(newref); globalref = newref; - jl_gc_wb(m, globalref); + jl_gc_wb_binding(b, globalref); } } JL_UNLOCK(&m->lock); // may GC From d85e9acb5afc4b1d21fe07f8a993a11e4ab7bad3 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Sun, 20 Nov 2022 14:45:20 -0500 Subject: [PATCH 05/57] Turn on Intel jitevents by default on Linux (#47586) (cherry picked from commit bba41d41319aa898373784438bd38873eab1da41) --- Make.inc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Make.inc b/Make.inc index 5676f8c0a2878..323db88ed5876 100644 --- a/Make.inc +++ b/Make.inc @@ -77,7 +77,7 @@ JULIA_THREADS := 1 # Set to 1 to enable profiling with OProfile USE_OPROFILE_JITEVENTS ?= 0 -# USE_PERF_JITEVENTS defined below since default is OS specific +# USE_PERF_JITEVENTS, and USE_INTEL_JITEVENTS defined below since default is OS specific # assume we don't have LIBSSP support in our compiler, will enable later if likely true HAVE_SSP := 0 @@ -442,8 +442,10 @@ endif # Set to 1 to enable profiling with perf ifeq ("$(OS)", "Linux") USE_PERF_JITEVENTS ?= 1 +USE_INTEL_JITEVENTS ?= 1 else USE_PERF_JITEVENTS ?= 0 +USE_INTEL_JITEVENTS ?= 0 endif JULIACODEGEN := LLVM From b5d6b03af6ee6c02c5ed2f9923d1df5e16da8862 Mon Sep 17 00:00:00 2001 From: Petr Vana Date: Mon, 21 Nov 2022 15:45:21 +0100 Subject: [PATCH 06/57] Doc: The default sorting alg. is stable from 1.9 (#47579) * Update doc/src/base/sort.md * Update docs: The default sorting alg. is stable * Compat 1.9 for QuickSort to be stable * Specify the default algorithm * Use example from InlineStrings.jl * Change example to jldoctest * Remove "*appear* to be stable." as slightly misleading. Co-authored-by: Lilith Orion Hafner (cherry picked from commit c5fe17b821b8af32ada7694bf874cb6eb1793d77) --- doc/src/base/sort.md | 80 ++++++++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 33 deletions(-) diff --git a/doc/src/base/sort.md b/doc/src/base/sort.md index 9f00381ab892c..e93d9716b1487 100644 --- a/doc/src/base/sort.md +++ b/doc/src/base/sort.md @@ -141,53 +141,67 @@ There are currently four sorting algorithms available in base Julia: * [`PartialQuickSort(k)`](@ref) * [`MergeSort`](@ref) -`InsertionSort` is an O(n^2) stable sorting algorithm. It is efficient for very small `n`, and -is used internally by `QuickSort`. +`InsertionSort` is an O(n²) stable sorting algorithm. It is efficient for very small `n`, +and is used internally by `QuickSort`. -`QuickSort` is an O(n log n) sorting algorithm which is in-place, very fast, but not stable – -i.e. elements which are considered equal will not remain in the same order in which they originally -appeared in the array to be sorted. `QuickSort` is the default algorithm for numeric values, including -integers and floats. +`QuickSort` is a very fast sorting algorithm with an average-case time complexity of +O(n log n). `QuickSort` is stable, i.e., elements considered equal will remain in the same +order. Notice that O(n²) is worst-case complexity, but it gets vanishingly unlikely as the +pivot selection is randomized. -`PartialQuickSort(k)` is similar to `QuickSort`, but the output array is only sorted up to index -`k` if `k` is an integer, or in the range of `k` if `k` is an `OrdinalRange`. For example: +`PartialQuickSort(k::OrdinalRange)` is similar to `QuickSort`, but the output array is only +sorted in the range of `k`. For example: -```julia -x = rand(1:500, 100) -k = 50 -k2 = 50:100 -s = sort(x; alg=QuickSort) -ps = sort(x; alg=PartialQuickSort(k)) -qs = sort(x; alg=PartialQuickSort(k2)) -map(issorted, (s, ps, qs)) # => (true, false, false) -map(x->issorted(x[1:k]), (s, ps, qs)) # => (true, true, false) -map(x->issorted(x[k2]), (s, ps, qs)) # => (true, false, true) -s[1:k] == ps[1:k] # => true -s[k2] == qs[k2] # => true +```jldoctest +julia> x = rand(1:500, 100); + +julia> k = 50:100; + +julia> s1 = sort(x; alg=QuickSort); + +julia> s2 = sort(x; alg=PartialQuickSort(k)); + +julia> map(issorted, (s1, s2)) +(true, false) + +julia> map(x->issorted(x[k]), (s1, s2)) +(true, true) + +julia> s1[k] == s2[k] +true ``` +!!! compat "Julia 1.9" + The `QuickSort` and `PartialQuickSort` algorithms are stable since Julia 1.9. + `MergeSort` is an O(n log n) stable sorting algorithm but is not in-place – it requires a temporary array of half the size of the input array – and is typically not quite as fast as `QuickSort`. It is the default algorithm for non-numeric data. -The default sorting algorithms are chosen on the basis that they are fast and stable, or *appear* -to be so. For numeric types indeed, `QuickSort` is selected as it is faster and indistinguishable -in this case from a stable sort (unless the array records its mutations in some way). The stability -property comes at a non-negligible cost, so if you don't need it, you may want to explicitly specify -your preferred algorithm, e.g. `sort!(v, alg=QuickSort)`. +The default sorting algorithms are chosen on the basis that they are fast and stable. +Usually, `QuickSort` is selected, but `InsertionSort` is preferred for small data. +You can also explicitly specify your preferred algorithm, e.g. +`sort!(v, alg=PartialQuickSort(10:20))`. -The mechanism by which Julia picks default sorting algorithms is implemented via the `Base.Sort.defalg` -function. It allows a particular algorithm to be registered as the default in all sorting functions -for specific arrays. For example, here are the two default methods from [`sort.jl`](https://github.com/JuliaLang/julia/blob/master/base/sort.jl): +The mechanism by which Julia picks default sorting algorithms is implemented via the +`Base.Sort.defalg` function. It allows a particular algorithm to be registered as the +default in all sorting functions for specific arrays. For example, here is the default +method from [`sort.jl`](https://github.com/JuliaLang/julia/blob/master/base/sort.jl): + +```julia +defalg(v::AbstractArray) = DEFAULT_STABLE +``` +You may change the default behavior for specific types by defining new methods for `defalg`. +For example, [InlineStrings.jl](https://github.com/JuliaStrings/InlineStrings.jl/blob/v1.3.2/src/InlineStrings.jl#L903) +defines the following method: ```julia -defalg(v::AbstractArray) = MergeSort -defalg(v::AbstractArray{<:Number}) = QuickSort +Base.Sort.defalg(::AbstractArray{<:Union{SmallInlineStrings, Missing}}) = InlineStringSort ``` -As for numeric arrays, choosing a non-stable default algorithm for array types for which the notion -of a stable sort is meaningless (i.e. when two values comparing equal can not be distinguished) -may make sense. +!!! compat "Julia 1.9" + The default sorting algorithm (returned by `Base.Sort.defalg`) is guaranteed + to be stable since Julia 1.9. Previous versions had unstable edge cases when sorting numeric arrays. ## Alternate orderings From 3bc94a9e0ed5ed4b57223f464ab003aa46883277 Mon Sep 17 00:00:00 2001 From: Simon Byrne Date: Mon, 21 Nov 2022 13:31:33 -0800 Subject: [PATCH 07/57] update MPFR (#47659) checksums updated via approach described in #47174. (cherry picked from commit 59965205ccbdffb4e25e1b60f651ca9df79230a4) --- deps/checksums/mpfr | 68 ++++++++++++++++---------------- deps/mpfr.version | 6 +-- stdlib/MPFR_jll/Project.toml | 2 +- stdlib/MPFR_jll/test/runtests.jl | 2 +- 4 files changed, 37 insertions(+), 41 deletions(-) diff --git a/deps/checksums/mpfr b/deps/checksums/mpfr index d00b0133d36ea..99c02301251d8 100644 --- a/deps/checksums/mpfr +++ b/deps/checksums/mpfr @@ -1,34 +1,34 @@ -MPFR.v4.1.1+3.aarch64-apple-darwin.tar.gz/md5/cd774c829cb5d5f9908ef84966af75f0 -MPFR.v4.1.1+3.aarch64-apple-darwin.tar.gz/sha512/c20ba17da62facb9bd60dea6fd400a075027c1bb5ebb5c7d0e333dc348b72f17e1de3edca24dd74178ae98c86591d2c817a69e866fd2a8f6b10ee097091f8ffd -MPFR.v4.1.1+3.aarch64-linux-gnu.tar.gz/md5/b99df82089eb79447b8f17eed56c87eb -MPFR.v4.1.1+3.aarch64-linux-gnu.tar.gz/sha512/9935bda1d37a7947808c887e10762fc71307027c698a7b871cc02ae87c2f41cffee0400f453ae9940899bba515f104ea7a81610801919e2c74bdb67703756d7a -MPFR.v4.1.1+3.aarch64-linux-musl.tar.gz/md5/e41f04255e53a24f66c75a40c0d17279 -MPFR.v4.1.1+3.aarch64-linux-musl.tar.gz/sha512/56d08924f8ec0e2f48b8d052d4687b14230737e045ba2c70325271c07987212671254902229189e7ae6cabc80cd88613e442aec0a1ab6e191d4844a86cf9b2b0 -MPFR.v4.1.1+3.armv6l-linux-gnueabihf.tar.gz/md5/c20bb7471bffff3bcd6b2db75ec9dda6 -MPFR.v4.1.1+3.armv6l-linux-gnueabihf.tar.gz/sha512/33ba51c8f0a2412d99c747128d4c813e8b31872cc50e4b9edb133645aa1b993b84e174ffc63c61861e41527400ae22fc7cfb5983feaab3cd31ab1f7e412e8e91 -MPFR.v4.1.1+3.armv6l-linux-musleabihf.tar.gz/md5/acbf8b461679b65a72bb3c7973ac6d8a -MPFR.v4.1.1+3.armv6l-linux-musleabihf.tar.gz/sha512/2cac950fa45c09a316e71583c541b3cb9e556ac771807f2482c0051b43141eefb803974e4f9f57113e911992a5d2510ef783b9970b8eef000869d61b10a3ad8f -MPFR.v4.1.1+3.armv7l-linux-gnueabihf.tar.gz/md5/5fce89c30bb9e59b97cbe061b27b1b15 -MPFR.v4.1.1+3.armv7l-linux-gnueabihf.tar.gz/sha512/e18267b2cbc7860c7121211ab8b081bf065b8b35831368df23b51b03a980f5083e505bafbc0351c6e8e7dd6d7d94c592c36b840e577a738116c83f2e93e2054c -MPFR.v4.1.1+3.armv7l-linux-musleabihf.tar.gz/md5/39a5e85cdcb8752b67aa4e4b6a756ae6 -MPFR.v4.1.1+3.armv7l-linux-musleabihf.tar.gz/sha512/a4ef907c80959c372f1b733457a9240e9a90879cd2eace95dc916f4234a430d8c1a7eb6bf7879be8fb016b59fea96bee47933c7f51f553e0351ab0ac578cc46b -MPFR.v4.1.1+3.i686-linux-gnu.tar.gz/md5/542abb1baf47807320049d484aa1ad5b -MPFR.v4.1.1+3.i686-linux-gnu.tar.gz/sha512/dc0fe265d3b89658d75bdedf53b3ee23250d7314d70d9d3ccbafe4172d0493da89620e39c48e60f5f7e56daf60226c4a7a814df5b213e4df71d6c29edab82012 -MPFR.v4.1.1+3.i686-linux-musl.tar.gz/md5/a32e2396d9410d4308141d1cbf9eb761 -MPFR.v4.1.1+3.i686-linux-musl.tar.gz/sha512/533981ce319d06bc4569a094f82d00f80e01e1336b52d95b79ac0dcc225bb08ce3593f261ab5b7c450e5596016b5ef906177eb96fc0e321ba95d54b5f1f9ce2e -MPFR.v4.1.1+3.i686-w64-mingw32.tar.gz/md5/20255e7daea1ea2b0f4edf7425545687 -MPFR.v4.1.1+3.i686-w64-mingw32.tar.gz/sha512/69f96bcc85ee53ca7ea0cc46cb719e9ee4dfdddd604e744bcf9668ae9217f00a9a039d2f9a065734038da716f4699f3d21cfcd2c56e209ddd57a1761f5005782 -MPFR.v4.1.1+3.powerpc64le-linux-gnu.tar.gz/md5/b791927fce9e496624b4edd38fd84b28 -MPFR.v4.1.1+3.powerpc64le-linux-gnu.tar.gz/sha512/b65e9fe22d0b7816203e000b3135ed9cf10345ad490ec15c792f14126a60ad362593567d9bb24d91b6602c3a9134a087d25a75f1719bfbd3f2ebaf2af32409e4 -MPFR.v4.1.1+3.x86_64-apple-darwin.tar.gz/md5/a17d9b178bc7c8f3705067464892d3e1 -MPFR.v4.1.1+3.x86_64-apple-darwin.tar.gz/sha512/e1a5c93212779ff9b66e7169cd33e47645d8256ea29ef4fb8f2bb98f9f7b2da38b7e11194e5be4386b9f16ce452a654b714f9bc62a214b93a05cb3e7cc9bcb1c -MPFR.v4.1.1+3.x86_64-linux-gnu.tar.gz/md5/bba0619a653df1ef6d780991d5afd161 -MPFR.v4.1.1+3.x86_64-linux-gnu.tar.gz/sha512/0a07cb559cb406c07ca9d209e2db6f31ea78c4e311e996dd47d670900d35ef305961d1c10aea04b63cf149d129f41d994e8a410ca06a2eb93e6c23443a3aff10 -MPFR.v4.1.1+3.x86_64-linux-musl.tar.gz/md5/e48473dc33f5da91649e1f96f39f7c9f -MPFR.v4.1.1+3.x86_64-linux-musl.tar.gz/sha512/f0df45dce81051283d7663c1457a805559810df921c215ec9e1a7415fe5f6ab398f2ae2215ed71916a48aa955b986f3f1050df41390b1f8fbb33c7cdb85ff716 -MPFR.v4.1.1+3.x86_64-unknown-freebsd.tar.gz/md5/5e667fc1528a594658792696e36dc8b7 -MPFR.v4.1.1+3.x86_64-unknown-freebsd.tar.gz/sha512/2e6bf53e01d2bd99a2cdba057b59aaa827d08e049083172abc5c2d71b280307c5a6439ea2d68b8d306787255ee23e429ef68ac8f9c7ffb846e0ec32f59cc43c0 -MPFR.v4.1.1+3.x86_64-w64-mingw32.tar.gz/md5/c4a704a8b1ca6a37824f6e6c17991f27 -MPFR.v4.1.1+3.x86_64-w64-mingw32.tar.gz/sha512/4b9c7af4d8ec6780fd88fa6f5284b909eb9ed1d81efac5cf525f60ac32ccf7bc1ad970bde42f273f9c9ced9e12c3c6a21dd9f8a67510c06919285ae9e85f0e2a -mpfr-4.1.0.tar.bz2/md5/44b892bc5a45bafb4294d134e13aad1d -mpfr-4.1.0.tar.bz2/sha512/410208ee0d48474c1c10d3d4a59decd2dfa187064183b09358ec4c4666e34d74383128436b404123b831e585d81a9176b24c7ced9d913967c5fce35d4040a0b4 +MPFR.v4.1.1+4.aarch64-apple-darwin.tar.gz/md5/07c92f3104cf508189292287719e77df +MPFR.v4.1.1+4.aarch64-apple-darwin.tar.gz/sha512/75f828f39091abcb8c8742ba7ea2bea2affb1644848a4272ec39081d6ad1399f027c3371f922d424c5d8bc72b78b408ce78f53a3c7b784140b2002f140684665 +MPFR.v4.1.1+4.aarch64-linux-gnu.tar.gz/md5/a6f60de83c161fa401c5a49c283ee94a +MPFR.v4.1.1+4.aarch64-linux-gnu.tar.gz/sha512/1c3f52d0f3c9005f2290a7a632458486972f768a9772a55ec59438f5010441768e1351a1a23e4a0b1f341b038324ceea0032b1efc0a0ad017aacbf70cde2cafb +MPFR.v4.1.1+4.aarch64-linux-musl.tar.gz/md5/8e6bc4cf8b94bdbd08ec7428d29f75b7 +MPFR.v4.1.1+4.aarch64-linux-musl.tar.gz/sha512/08489b81aa665bb2eb62c6c804c1c041c90587a0df6004a10017a3490c4ad049511dcca29cf38dbaada44fbf783b2bd1a788797dc16f128adce77bef4ec9a4a3 +MPFR.v4.1.1+4.armv6l-linux-gnueabihf.tar.gz/md5/f6f7f3f264e7b48ee9a518f21a7249f5 +MPFR.v4.1.1+4.armv6l-linux-gnueabihf.tar.gz/sha512/3b45907a1de70fcddf5a0fb90ce45d5dabf09f11b92e1174e2779a79b0991c75b1c9037981b0cd999f32cebfc358d311377af71130222a5b81dbb43c0a9ebe76 +MPFR.v4.1.1+4.armv6l-linux-musleabihf.tar.gz/md5/07304ab9676c39c56aad073f2825fd1d +MPFR.v4.1.1+4.armv6l-linux-musleabihf.tar.gz/sha512/3c7a872aab1baa4d1966cbf42cc09799944d319441f41df560632f5e4d9af9c71de25c714faab223aa1cf4e5ae09ff68c514d073711b07758e033cd492bf7eb7 +MPFR.v4.1.1+4.armv7l-linux-gnueabihf.tar.gz/md5/261482058f90306858833156bb332281 +MPFR.v4.1.1+4.armv7l-linux-gnueabihf.tar.gz/sha512/c0acb7f476a736360763e269fb7b309b9f8843d19a9931694bb01efe77e6fe4f12c969d9ae0e16c16cb14cd9a0d67ff91fa02ba141c3f2f7b908170cac216800 +MPFR.v4.1.1+4.armv7l-linux-musleabihf.tar.gz/md5/c61c6d04f3d4147b76480867e90d2262 +MPFR.v4.1.1+4.armv7l-linux-musleabihf.tar.gz/sha512/3e6cc63c7404899de3d4e4da208c40e363f427ce1bd4f0c1d5d04711870239240a8b98e4d152f6d78128e4430f703ab0debe6c35e6cd8ef80aa4a605105d619f +MPFR.v4.1.1+4.i686-linux-gnu.tar.gz/md5/0dff053d5488f220f94a56beae0bf4a4 +MPFR.v4.1.1+4.i686-linux-gnu.tar.gz/sha512/26c5c4b91998f5bcffcf5a873c451acab376efd25e13671ec5cb4f1316d1866cf7fc841f7aff17a339326ed1730b720be8ab39349ff5cee0619891925b4eb79e +MPFR.v4.1.1+4.i686-linux-musl.tar.gz/md5/2edb5f985db6b39115f13bd05d623677 +MPFR.v4.1.1+4.i686-linux-musl.tar.gz/sha512/207f346be68458aeadc803d0466eba428b63c7ee9c654b06c00ae4a7e2bbd01ab3644f1db1ef9870730937a37e658956bdc2fdcab70d4619e149574a48a7191d +MPFR.v4.1.1+4.i686-w64-mingw32.tar.gz/md5/7228b731bfb530c48d5afe7c5f51cccc +MPFR.v4.1.1+4.i686-w64-mingw32.tar.gz/sha512/faac80db43d5c252c8d7f90a56b832b6a8bd7543465dadc57dfc8590c6eb54e49c96d6b337b4caeeba73917440be512d115b54485de73b6194f67d67e3d11dce +MPFR.v4.1.1+4.powerpc64le-linux-gnu.tar.gz/md5/27e01308e698ddd83a68cd0fdbea318b +MPFR.v4.1.1+4.powerpc64le-linux-gnu.tar.gz/sha512/48718cff4df3e16c50d7ed47fc0a693699919b9033fd31084e125d8a7abb68cecfcf6e1b34be83f4b6ada9d168a01fc653b4e33e1b5021b3143e603b560a8225 +MPFR.v4.1.1+4.x86_64-apple-darwin.tar.gz/md5/a91682cb62bd6c7f8acb36a33585867a +MPFR.v4.1.1+4.x86_64-apple-darwin.tar.gz/sha512/82d2ff90e1a8a358f2fab643dfc3ead84edc8fabcf956b7479c0a0b1005430187a5315951e1b160e843776233cb2d655b5a27cfd37691cfed42f9b89f824e525 +MPFR.v4.1.1+4.x86_64-linux-gnu.tar.gz/md5/d3a3c97177e554685882f7b9f3eb0ee8 +MPFR.v4.1.1+4.x86_64-linux-gnu.tar.gz/sha512/c7af9df8c12ea3d3f784a048aae7c630f07515b509d9d0a3e0003b9697a3370112c3507a39b442d80a5671df95c2fa6a20b446443ac4cb0d48f3108e21e0d755 +MPFR.v4.1.1+4.x86_64-linux-musl.tar.gz/md5/bda6453ee85bf43348c41ebfd4accc94 +MPFR.v4.1.1+4.x86_64-linux-musl.tar.gz/sha512/0e85dd4361a67c7fe91bf9fffaad0eddfc93d578b0452e662628124d1e7589502221f20919d442875c731f57678c87b30ccfa1e9a00a77a6b42740dce96fd410 +MPFR.v4.1.1+4.x86_64-unknown-freebsd.tar.gz/md5/b2e40a50e486991660c30985a0ee6214 +MPFR.v4.1.1+4.x86_64-unknown-freebsd.tar.gz/sha512/bfc3010b2c94384ca2050b41e08ca26b22c813c1f38b274074854430a736f0f45530ee0df36030cfa479950848d8623c4e9b07fc8de4f6fbfda31a98abc9a4c6 +MPFR.v4.1.1+4.x86_64-w64-mingw32.tar.gz/md5/1b87833f68846d342dbdf283f3d39170 +MPFR.v4.1.1+4.x86_64-w64-mingw32.tar.gz/sha512/5c85a5664b4106eae733be0a85e8ab645b93dd78983cab8741cc13451ea429cb432a783f5a3b2a815db9376eb8bf83a6649247ef028d6a7f5dab9e519a9005b4 +mpfr-4.1.1.tar.bz2/md5/48eea07f8bb60dd9bbec1ec37a749f24 +mpfr-4.1.1.tar.bz2/sha512/f0efefbfc4dec367cdab6299272062508ec80d53daa779fe05954cd626983277039a10d9d072ae686584f6ce75014ef2136e3f095128fa21fc994f7c6f33d674 diff --git a/deps/mpfr.version b/deps/mpfr.version index 63fed0f8504f4..5e9e119e3e3a1 100644 --- a/deps/mpfr.version +++ b/deps/mpfr.version @@ -2,8 +2,4 @@ MPFR_JLL_NAME := MPFR ## source build -MPFR_VER := 4.1.0 - -# Note: jll use a different version `4.1.1+1` ("stdlib/MPFR_jll/Project.toml") -# See notes in build_tarballs.jl -# https://github.com/JuliaPackaging/Yggdrasil/blob/3c877e18dd9bb9b2e79415e00f661a7e37b2aea9/M/MPFR/build_tarballs.jl#L40-L42 +MPFR_VER := 4.1.1 diff --git a/stdlib/MPFR_jll/Project.toml b/stdlib/MPFR_jll/Project.toml index e3b994a94e98e..560c25a556401 100644 --- a/stdlib/MPFR_jll/Project.toml +++ b/stdlib/MPFR_jll/Project.toml @@ -1,6 +1,6 @@ name = "MPFR_jll" uuid = "3a97d323-0669-5f0c-9066-3539efd106a3" -version = "4.1.1+3" +version = "4.1.1+4" [deps] GMP_jll = "781609d7-10c4-51f6-84f2-b8444358ff6d" diff --git a/stdlib/MPFR_jll/test/runtests.jl b/stdlib/MPFR_jll/test/runtests.jl index 68bb6d3ec40e4..31c4ed0702551 100644 --- a/stdlib/MPFR_jll/test/runtests.jl +++ b/stdlib/MPFR_jll/test/runtests.jl @@ -4,5 +4,5 @@ using Test, Libdl, MPFR_jll @testset "MPFR_jll" begin vn = VersionNumber(unsafe_string(ccall((:mpfr_get_version,libmpfr), Cstring, ()))) - @test vn == v"4.1.0" + @test vn == v"4.1.1" end From f8f0c6335dc89382b45d5b8f0d9d8e7d6672cbb6 Mon Sep 17 00:00:00 2001 From: Michael Abbott <32575566+mcabbott@users.noreply.github.com> Date: Mon, 21 Nov 2022 20:25:15 -0500 Subject: [PATCH 08/57] Add compat note for `sortperm(x; dims)` (#47657) (cherry picked from commit 4fa07cd0d07c0e2882a505c08a992f146d885ad4) --- base/sort.jl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/base/sort.jl b/base/sort.jl index b4227e6fb5d3e..e995a64a9f76f 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -1100,6 +1100,9 @@ ascending order. See also [`sortperm!`](@ref), [`partialsortperm`](@ref), [`invperm`](@ref), [`indexin`](@ref). To sort slices of an array, refer to [`sortslices`](@ref). +!!! compat "Julia 1.9" + The method accepting `dims` requires at least Julia 1.9. + # Examples ```jldoctest julia> v = [3, 1, 2]; @@ -1163,6 +1166,9 @@ end Like [`sortperm`](@ref), but accepts a preallocated index vector or array `ix` with the same `axes` as `A`. If `initialized` is `false` (the default), `ix` is initialized to contain the values `LinearIndices(A)`. +!!! compat "Julia 1.9" + The method accepting `dims` requires at least Julia 1.9. + # Examples ```jldoctest julia> v = [3, 1, 2]; p = zeros(Int, 3); From 382661c40e2830afb035e8382a77fbe4a444a002 Mon Sep 17 00:00:00 2001 From: Petr Vana Date: Wed, 23 Nov 2022 05:02:38 +0100 Subject: [PATCH 09/57] build: add get-lld target (#47589) Fixes `make -C deps getall` (cherry picked from commit 3200219b1f7e2681ece9e4b99bda48586fab8a93) --- deps/llvm.mk | 1 + 1 file changed, 1 insertion(+) diff --git a/deps/llvm.mk b/deps/llvm.mk index 5d297b6c369bf..c13551ee331ef 100644 --- a/deps/llvm.mk +++ b/deps/llvm.mk @@ -314,4 +314,5 @@ $(eval $(call bb-install,llvm-tools,LLVM_TOOLS,false,true)) endif # USE_BINARYBUILDER_LLVM +get-lld: get-llvm install-lld install-clang install-llvm-tools: install-llvm From 0f271d7af86973781c46cad1bc031710327acb8d Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Wed, 23 Nov 2022 16:26:11 -0300 Subject: [PATCH 10/57] Print the detailed type on heap snapshot (#47503) Fixes https://github.com/JuliaLang/julia/issues/47502 (cherry picked from commit 27ebaa7fd5854ae76cf68b273fafed3fe9fe4a19) --- .gitignore | 2 +- src/gc-heap-snapshot.cpp | 48 +++++++++++++++++++++++----------------- 2 files changed, 29 insertions(+), 21 deletions(-) diff --git a/.gitignore b/.gitignore index 2780210c41a9b..836a35781cd6f 100644 --- a/.gitignore +++ b/.gitignore @@ -33,7 +33,7 @@ .DS_Store .idea/* .vscode/* - +*.heapsnapshot # Buildkite: Ignore the entire .buildkite directory /.buildkite diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 001f2ea74d092..ac2a046936452 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -124,7 +124,7 @@ HeapSnapshot *g_snapshot = nullptr; extern jl_mutex_t heapsnapshot_lock; void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot, char all_one); -static inline void _record_gc_edge(const char *node_type, const char *edge_type, +static inline void _record_gc_edge(const char *edge_type, jl_value_t *a, jl_value_t *b, size_t name_or_index) JL_NOTSAFEPOINT; void _record_gc_just_edge(const char *edge_type, Node &from_node, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT; void _add_internal_root(HeapSnapshot *snapshot); @@ -185,45 +185,56 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT // Insert a new Node size_t self_size = 0; - std::string type_name; StringRef name = ""; StringRef node_type = "object"; jl_datatype_t *type = (jl_datatype_t*)jl_typeof(a); if (jl_is_string(a)) { - node_type = "string"; + node_type = "String"; name = jl_string_data(a); self_size = jl_string_len(a); } else if (jl_is_symbol(a)) { - node_type = "symbol"; + node_type = "jl_sym_t"; name = jl_symbol_name((jl_sym_t*)a); self_size = name.size(); } else if (jl_is_simplevector(a)) { - node_type = "array"; + node_type = "jl_svec_t"; name = "SimpleVector"; self_size = sizeof(jl_svec_t) + sizeof(void*) * jl_svec_len(a); } else if (jl_is_module(a)) { + node_type = "jl_module_t"; name = jl_symbol_name_(((_jl_module_t*)a)->name); self_size = sizeof(jl_module_t); } else if (jl_is_task(a)) { + node_type = "jl_task_t"; name = "Task"; self_size = sizeof(jl_task_t); } else if (jl_is_datatype(a)) { - type_name = string("Type{") + string(jl_symbol_name_(((_jl_datatype_t*)a)->name->name)) + string("}"); - name = StringRef(type_name); - self_size = sizeof(jl_task_t); + ios_need_close = 1; + ios_mem(&str_, 0); + JL_STREAM* str = (JL_STREAM*)&str_; + jl_static_show(str, a); + name = StringRef((const char*)str_.buf, str_.size); + node_type = "jl_datatype_t"; + self_size = sizeof(jl_datatype_t); + } + else if (jl_is_array(a)){ + ios_need_close = 1; + ios_mem(&str_, 0); + JL_STREAM* str = (JL_STREAM*)&str_; + jl_static_show(str, (jl_value_t*)type); + name = StringRef((const char*)str_.buf, str_.size); + node_type = "jl_array_t"; + self_size = sizeof(jl_array_t); } else { - self_size = jl_is_array_type(type) - ? sizeof(jl_array_t) - : (size_t)jl_datatype_size(type); - + self_size = (size_t)jl_datatype_size(type); // print full type into ios buffer and get StringRef to it. // The ios is cleaned up below. ios_need_close = 1; @@ -371,13 +382,13 @@ void _gc_heap_snapshot_record_frame_to_frame_edge(jl_gcframe_t *from, jl_gcframe void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT { - _record_gc_edge("array", "element", from, to, index); + _record_gc_edge("element", from, to, index); } void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void *slot) JL_NOTSAFEPOINT { string path = _fieldpath_for_slot(from, slot); - _record_gc_edge("object", "property", from, to, + _record_gc_edge("property", from, to, g_snapshot->names.find_or_create_string_id(path)); } @@ -395,7 +406,6 @@ void _gc_heap_snapshot_record_module_to_binding(jl_module_t* module, jl_binding_ auto &from_node = g_snapshot->nodes[from_node_idx]; auto &to_node = g_snapshot->nodes[to_node_idx]; - from_node.type = g_snapshot->node_types.find_or_create_string_id("object"); _record_gc_just_edge("property", from_node, to_node_idx, g_snapshot->names.find_or_create_string_id("")); if (value_idx) _record_gc_just_edge("internal", to_node, value_idx, g_snapshot->names.find_or_create_string_id("value")); @@ -405,7 +415,7 @@ void _gc_heap_snapshot_record_module_to_binding(jl_module_t* module, jl_binding_ void _gc_heap_snapshot_record_internal_array_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT { - _record_gc_edge("object", "internal", from, to, + _record_gc_edge("internal", from, to, g_snapshot->names.find_or_create_string_id("")); } @@ -432,19 +442,17 @@ void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, void* to, size_t byt } auto to_node_idx = record_pointer_to_gc_snapshot(to, bytes, alloc_kind); auto &from_node = g_snapshot->nodes[from_node_idx]; - from_node.type = g_snapshot->node_types.find_or_create_string_id("native"); _record_gc_just_edge("hidden", from_node, to_node_idx, name_or_idx); } -static inline void _record_gc_edge(const char *node_type, const char *edge_type, - jl_value_t *a, jl_value_t *b, size_t name_or_idx) JL_NOTSAFEPOINT +static inline void _record_gc_edge(const char *edge_type, jl_value_t *a, + jl_value_t *b, size_t name_or_idx) JL_NOTSAFEPOINT { auto from_node_idx = record_node_to_gc_snapshot(a); auto to_node_idx = record_node_to_gc_snapshot(b); auto &from_node = g_snapshot->nodes[from_node_idx]; - from_node.type = g_snapshot->node_types.find_or_create_string_id(node_type); _record_gc_just_edge(edge_type, from_node, to_node_idx, name_or_idx); } From c615a49adf6945a81d0c044a11f5e06b2b869f93 Mon Sep 17 00:00:00 2001 From: pchintalapudi <34727397+pchintalapudi@users.noreply.github.com> Date: Wed, 23 Nov 2022 17:11:39 -0500 Subject: [PATCH 11/57] Remove typeinfer lock altogether (#46825) * Remove typeinfer lock altogether * Don't remove the typeinf lock functions * Track reentrancy in current task state * Fix up some git status * Initialize task variables * Promise that jl_typeinf_func is rooted somewhere (cherry picked from commit 113efb6e0aa27879cb423ab323c0159911e4c5e7) --- base/compiler/typeinfer.jl | 4 +--- base/loading.jl | 2 +- doc/src/devdocs/locks.md | 1 + src/aotcompile.cpp | 3 +++ src/dump.c | 15 ++++++++++++-- src/gf.c | 42 ++++++++++++++++---------------------- src/jitlayers.cpp | 30 +++++++++++++++++---------- src/julia.h | 4 ++++ src/julia_internal.h | 2 +- src/task.c | 4 ++++ 10 files changed, 65 insertions(+), 42 deletions(-) diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl index db2cb901b42e3..0bc3d6a68e253 100644 --- a/base/compiler/typeinfer.jl +++ b/base/compiler/typeinfer.jl @@ -404,9 +404,7 @@ function cache_result!(interp::AbstractInterpreter, result::InferenceResult) if track_newly_inferred[] m = linfo.def if isa(m, Method) && m.module != Core - ccall(:jl_typeinf_lock_begin, Cvoid, ()) - push!(newly_inferred, linfo) - ccall(:jl_typeinf_lock_end, Cvoid, ()) + ccall(:jl_push_newly_inferred, Cvoid, (Any,), linfo) end end end diff --git a/base/loading.jl b/base/loading.jl index 1a933b274b7de..a5df7c24408ae 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -1662,6 +1662,7 @@ function include_package_for_output(pkg::PkgId, input::String, depot_path::Vecto task_local_storage()[:SOURCE_PATH] = source end + ccall(:jl_set_newly_inferred, Cvoid, (Any,), Core.Compiler.newly_inferred) Core.Compiler.track_newly_inferred.x = true try Base.include(Base.__toplevel__, input) @@ -1672,7 +1673,6 @@ function include_package_for_output(pkg::PkgId, input::String, depot_path::Vecto finally Core.Compiler.track_newly_inferred.x = false end - ccall(:jl_set_newly_inferred, Cvoid, (Any,), Core.Compiler.newly_inferred) end const PRECOMPILE_TRACE_COMPILE = Ref{String}() diff --git a/doc/src/devdocs/locks.md b/doc/src/devdocs/locks.md index f2ddc26fb954d..9b2d992d8f5bb 100644 --- a/doc/src/devdocs/locks.md +++ b/doc/src/devdocs/locks.md @@ -42,6 +42,7 @@ The following is a leaf lock (level 2), and only acquires level 1 locks (safepoi > * typecache > * Module->lock > * JLDebuginfoPlugin::PluginMutex +> * newly_inferred_mutex The following is a level 3 lock, which can only acquire level 1 or level 2 locks internally: diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index 2714bc664eb57..83e1c6d150430 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -274,6 +274,8 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm jl_code_info_t *src = NULL; JL_GC_PUSH1(&src); JL_LOCK(&jl_codegen_lock); + auto ct = jl_current_task; + ct->reentrant_codegen++; orc::ThreadSafeContext ctx; orc::ThreadSafeModule backing; if (!llvmmod) { @@ -425,6 +427,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm if (ctx.getContext()) { jl_ExecutionEngine->releaseContext(std::move(ctx)); } + ct->reentrant_codegen--; JL_UNLOCK(&jl_codegen_lock); // Might GC return (void*)data; } diff --git a/src/dump.c b/src/dump.c index 2a32d40e7a2a3..96c875c4ec7f5 100644 --- a/src/dump.c +++ b/src/dump.c @@ -158,6 +158,8 @@ static htable_t external_mis; // Inference tracks newly-inferred MethodInstances during precompilation // and registers them by calling jl_set_newly_inferred static jl_array_t *newly_inferred JL_GLOBALLY_ROOTED; +// Mutex for newly_inferred +static jl_mutex_t newly_inferred_mutex; // New roots to add to Methods. These can't be added until after // recaching is complete, so we have to hold on to them separately @@ -2894,14 +2896,23 @@ JL_DLLEXPORT void jl_init_restored_modules(jl_array_t *init_order) // --- entry points --- -// Register all newly-inferred MethodInstances -// This gets called as the final step of Base.include_package_for_output +// Register array of newly-inferred MethodInstances +// This gets called as the first step of Base.include_package_for_output JL_DLLEXPORT void jl_set_newly_inferred(jl_value_t* _newly_inferred) { assert(_newly_inferred == NULL || jl_is_array(_newly_inferred)); newly_inferred = (jl_array_t*) _newly_inferred; } +JL_DLLEXPORT void jl_push_newly_inferred(jl_value_t* linfo) +{ + JL_LOCK(&newly_inferred_mutex); + size_t end = jl_array_len(newly_inferred); + jl_array_grow_end(newly_inferred, 1); + jl_arrayset(newly_inferred, linfo, end); + JL_UNLOCK(&newly_inferred_mutex); +} + // Serialize the modules in `worklist` to file `fname` JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist) { diff --git a/src/gf.c b/src/gf.c index 191a9321ba805..0bce672ca729c 100644 --- a/src/gf.c +++ b/src/gf.c @@ -279,8 +279,8 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force) JL_TIMING(INFERENCE); if (jl_typeinf_func == NULL) return NULL; - static int in_inference; - if (in_inference > 2) + jl_task_t *ct = jl_current_task; + if (ct->reentrant_inference > 2) return NULL; jl_code_info_t *src = NULL; @@ -300,7 +300,6 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force) jl_printf(JL_STDERR, "\n"); } #endif - jl_task_t *ct = jl_current_task; int last_errno = errno; #ifdef _OS_WINDOWS_ DWORD last_error = GetLastError(); @@ -308,7 +307,7 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force) size_t last_age = ct->world_age; ct->world_age = jl_typeinf_world; mi->inInference = 1; - in_inference++; + ct->reentrant_inference++; JL_TRY { src = (jl_code_info_t*)jl_apply(fargs, 3); } @@ -329,7 +328,7 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force) src = NULL; } ct->world_age = last_age; - in_inference--; + ct->reentrant_inference--; mi->inInference = 0; #ifdef _OS_WINDOWS_ SetLastError(last_error); @@ -544,7 +543,7 @@ static int reset_mt_caches(jl_methtable_t *mt, void *env) } -jl_function_t *jl_typeinf_func = NULL; +jl_function_t *jl_typeinf_func JL_GLOBALLY_ROOTED = NULL; JL_DLLEXPORT size_t jl_typeinf_world = 1; JL_DLLEXPORT void jl_set_typeinf_func(jl_value_t *f) @@ -3416,44 +3415,39 @@ int jl_has_concrete_subtype(jl_value_t *typ) return ((jl_datatype_t*)typ)->has_concrete_subtype; } -// TODO: separate the codegen and typeinf locks -// currently using a coarser lock seems like -// the best way to avoid acquisition priority -// ordering violations -//static jl_mutex_t typeinf_lock; #define typeinf_lock jl_codegen_lock -static jl_mutex_t inference_timing_mutex; -static uint64_t inference_start_time = 0; -static uint8_t inference_is_measuring_compile_time = 0; - JL_DLLEXPORT void jl_typeinf_timing_begin(void) { if (jl_atomic_load_relaxed(&jl_measure_compile_time_enabled)) { - JL_LOCK_NOGC(&inference_timing_mutex); - if (inference_is_measuring_compile_time++ == 0) { - inference_start_time = jl_hrtime(); - } - JL_UNLOCK_NOGC(&inference_timing_mutex); + jl_task_t *ct = jl_current_task; + if (ct->inference_start_time == 0 && ct->reentrant_inference == 1) + ct->inference_start_time = jl_hrtime(); } } JL_DLLEXPORT void jl_typeinf_timing_end(void) { - JL_LOCK_NOGC(&inference_timing_mutex); - if (--inference_is_measuring_compile_time == 0) { - jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - inference_start_time)); + jl_task_t *ct = jl_current_task; + if (ct->inference_start_time != 0 && ct->reentrant_inference == 1) { + jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - ct->inference_start_time)); + ct->inference_start_time = 0; } - JL_UNLOCK_NOGC(&inference_timing_mutex); } JL_DLLEXPORT void jl_typeinf_lock_begin(void) { JL_LOCK(&typeinf_lock); + //Although this is claiming to be a typeinfer lock, it is actually + //affecting the codegen lock count, not type inference's inferencing count + jl_task_t *ct = jl_current_task; + ct->reentrant_codegen++; } JL_DLLEXPORT void jl_typeinf_lock_end(void) { + jl_task_t *ct = jl_current_task; + ct->reentrant_codegen--; JL_UNLOCK(&typeinf_lock); } diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index e612c39ca97d2..b6a30d3380b27 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -295,7 +295,8 @@ const char *jl_generate_ccallable(LLVMOrcThreadSafeModuleRef llvmmod, void *sysi extern "C" JL_DLLEXPORT int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void *sysimg, jl_value_t *declrt, jl_value_t *sigt) { - JL_LOCK(&jl_codegen_lock); + auto ct = jl_current_task; + ct->reentrant_codegen++; uint64_t compiler_start_time = 0; uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled); if (measure_compile_time_enabled) @@ -311,6 +312,7 @@ int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void * backing = jl_create_llvm_module("cextern", pparams ? pparams->tsctx : ctx, pparams ? pparams->imaging : imaging_default()); into = &backing; } + JL_LOCK(&jl_codegen_lock); jl_codegen_params_t params(into->getContext()); if (pparams == NULL) pparams = ¶ms; @@ -330,12 +332,12 @@ int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void * if (success && llvmmod == NULL) jl_ExecutionEngine->addModule(std::move(*into)); } - if (jl_codegen_lock.count == 1 && measure_compile_time_enabled) + JL_UNLOCK(&jl_codegen_lock); + if (!--ct->reentrant_codegen && measure_compile_time_enabled) jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time)); if (ctx.getContext()) { jl_ExecutionEngine->releaseContext(std::move(ctx)); } - JL_UNLOCK(&jl_codegen_lock); return success; } @@ -386,7 +388,8 @@ void jl_extern_c_impl(jl_value_t *declrt, jl_tupletype_t *sigt) extern "C" JL_DLLEXPORT jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world) { - JL_LOCK(&jl_codegen_lock); // also disables finalizers, to prevent any unexpected recursion + auto ct = jl_current_task; + ct->reentrant_codegen++; uint64_t compiler_start_time = 0; uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled); bool is_recompile = false; @@ -395,6 +398,7 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES // if we don't have any decls already, try to generate it now jl_code_info_t *src = NULL; JL_GC_PUSH1(&src); + JL_LOCK(&jl_codegen_lock); // also disables finalizers, to prevent any unexpected recursion jl_value_t *ci = jl_rettype_inferred(mi, world, world); jl_code_instance_t *codeinst = (ci == jl_nothing ? NULL : (jl_code_instance_t*)ci); if (codeinst) { @@ -437,13 +441,13 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES else { codeinst = NULL; } - if (jl_codegen_lock.count == 1 && measure_compile_time_enabled) { + JL_UNLOCK(&jl_codegen_lock); + if (!--ct->reentrant_codegen && measure_compile_time_enabled) { uint64_t t_comp = jl_hrtime() - compiler_start_time; if (is_recompile) jl_atomic_fetch_add_relaxed(&jl_cumulative_recompile_time, t_comp); jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, t_comp); } - JL_UNLOCK(&jl_codegen_lock); JL_GC_POP(); return codeinst; } @@ -454,11 +458,13 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec) if (jl_atomic_load_relaxed(&unspec->invoke) != NULL) { return; } - JL_LOCK(&jl_codegen_lock); + auto ct = jl_current_task; + ct->reentrant_codegen++; uint64_t compiler_start_time = 0; uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled); if (measure_compile_time_enabled) compiler_start_time = jl_hrtime(); + JL_LOCK(&jl_codegen_lock); if (jl_atomic_load_relaxed(&unspec->invoke) == NULL) { jl_code_info_t *src = NULL; JL_GC_PUSH1(&src); @@ -486,9 +492,9 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec) } JL_GC_POP(); } - if (jl_codegen_lock.count == 1 && measure_compile_time_enabled) - jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time)); JL_UNLOCK(&jl_codegen_lock); // Might GC + if (!--ct->reentrant_codegen && measure_compile_time_enabled) + jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time)); } @@ -508,11 +514,13 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world, // normally we prevent native code from being generated for these functions, // (using sentinel value `1` instead) // so create an exception here so we can print pretty our lies - JL_LOCK(&jl_codegen_lock); // also disables finalizers, to prevent any unexpected recursion + auto ct = jl_current_task; + ct->reentrant_codegen++; uint64_t compiler_start_time = 0; uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled); if (measure_compile_time_enabled) compiler_start_time = jl_hrtime(); + JL_LOCK(&jl_codegen_lock); // also disables finalizers, to prevent any unexpected recursion specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr); if (specfptr == 0) { jl_code_info_t *src = jl_type_infer(mi, world, 0); @@ -536,7 +544,7 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world, } JL_GC_POP(); } - if (measure_compile_time_enabled) + if (!--ct->reentrant_codegen && measure_compile_time_enabled) jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time)); JL_UNLOCK(&jl_codegen_lock); } diff --git a/src/julia.h b/src/julia.h index bd9e1b116f8d3..3a30a2e1c59d8 100644 --- a/src/julia.h +++ b/src/julia.h @@ -1765,6 +1765,7 @@ JL_DLLEXPORT void jl_save_system_image(const char *fname); JL_DLLEXPORT void jl_restore_system_image(const char *fname); JL_DLLEXPORT void jl_restore_system_image_data(const char *buf, size_t len); JL_DLLEXPORT void jl_set_newly_inferred(jl_value_t *newly_inferred); +JL_DLLEXPORT void jl_push_newly_inferred(jl_value_t *linfo); JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist); JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *depmods); JL_DLLEXPORT jl_value_t *jl_restore_incremental_from_buf(const char *buf, size_t sz, jl_array_t *depmods); @@ -1935,6 +1936,9 @@ typedef struct _jl_task_t { jl_ucontext_t ctx; void *stkbuf; // malloc'd memory (either copybuf or stack) size_t bufsz; // actual sizeof stkbuf + uint64_t inference_start_time; // time when inference started + unsigned int reentrant_inference; // How many times we've reentered inference + unsigned int reentrant_codegen; // How many times we've reentered codegen unsigned int copy_stack:31; // sizeof stack for copybuf unsigned int started:1; } jl_task_t; diff --git a/src/julia_internal.h b/src/julia_internal.h index 77bc7a14f933b..fc30ed09277c4 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -292,7 +292,7 @@ void print_func_loc(JL_STREAM *s, jl_method_t *m); extern jl_array_t *_jl_debug_method_invalidation JL_GLOBALLY_ROOTED; extern JL_DLLEXPORT size_t jl_page_size; -extern jl_function_t *jl_typeinf_func; +extern jl_function_t *jl_typeinf_func JL_GLOBALLY_ROOTED; extern JL_DLLEXPORT size_t jl_typeinf_world; extern _Atomic(jl_typemap_entry_t*) call_cache[N_CALL_CACHE] JL_GLOBALLY_ROOTED; extern jl_array_t *jl_all_methods JL_GLOBALLY_ROOTED; diff --git a/src/task.c b/src/task.c index 1f7bf027f032c..81b90a832e2dd 100644 --- a/src/task.c +++ b/src/task.c @@ -938,6 +938,8 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion t->threadpoolid = ct->threadpoolid; t->ptls = NULL; t->world_age = ct->world_age; + t->reentrant_codegen = 0; + t->reentrant_inference = 0; #ifdef COPY_STACKS if (!t->copy_stack) { @@ -1523,6 +1525,8 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi) ct->sticky = 1; ct->ptls = ptls; ct->world_age = 1; // OK to run Julia code on this task + ct->reentrant_codegen = 0; + ct->reentrant_inference = 0; ptls->root_task = ct; jl_atomic_store_relaxed(&ptls->current_task, ct); JL_GC_PROMISE_ROOTED(ct); From 3b414c6567b854192f321d514ca28701b180d3c4 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Thu, 24 Nov 2022 12:30:00 +0100 Subject: [PATCH 12/57] Fix regression in generic_bitcast with Union{} arguments. (#47605) (cherry picked from commit 726bbd7afda4373e10b8ab1eac9dfb53c81c8755) --- src/intrinsics.cpp | 7 ++++++- test/compiler/codegen.jl | 5 +++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index 7893a37664508..38d923cb5a99e 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -1134,7 +1134,12 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar jl_cgval_t *argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nargs); for (size_t i = 0; i < nargs; ++i) { - argv[i] = emit_expr(ctx, args[i + 1]); + jl_cgval_t arg = emit_expr(ctx, args[i + 1]); + if (arg.typ == jl_bottom_type) { + // intrinsics generally don't handle buttom values, so bail out early + return jl_cgval_t(); + } + argv[i] = arg; } // this forces everything to use runtime-intrinsics (e.g. for testing) diff --git a/test/compiler/codegen.jl b/test/compiler/codegen.jl index 4bc7eb8f6d856..11cbd21b793a1 100644 --- a/test/compiler/codegen.jl +++ b/test/compiler/codegen.jl @@ -785,3 +785,8 @@ f_isa_type(@nospecialize(x)) = isa(x, Type) # Issue #47247 f47247(a::Ref{Int}, b::Nothing) = setfield!(a, :x, b) @test_throws TypeError f47247(Ref(5), nothing) + +@testset "regression in generic_bitcast: should support Union{} values" begin + f(x) = Core.bitcast(UInt64, x) + @test occursin("llvm.trap", get_llvm(f, Tuple{Union{}})) +end From aba112cc68f9278e0730e18ae3ddb8bf27d30639 Mon Sep 17 00:00:00 2001 From: Elliot Saba Date: Thu, 24 Nov 2022 07:42:49 -0800 Subject: [PATCH 13/57] Filesystem: `rm(; recursive=true)` should ignore `UV_EACCES` (#47668) The command-line program `rm` has no problem deleting an empty directory that we do not have listing permissions on, so we should follow suit. Example: ``` mktempdir() do dir mkpath("$(dir)/foo") chmod("$(dir)/foo", 0o200) rm(dir; recursive=true) end ``` (cherry picked from commit d0a211a9209d25b1297693c562fc3a679204a0c6) --- base/file.jl | 2 +- test/file.jl | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/base/file.jl b/base/file.jl index d57b17354eb1f..b761e1d65ccb5 100644 --- a/base/file.jl +++ b/base/file.jl @@ -294,7 +294,7 @@ function rm(path::AbstractString; force::Bool=false, recursive::Bool=false) rm(joinpath(path, p), force=force, recursive=true) end catch err - if !(force && isa(err, IOError) && err.code==Base.UV_EACCES) + if !(isa(err, IOError) && err.code==Base.UV_EACCES) rethrow(err) end end diff --git a/test/file.jl b/test/file.jl index c0cdc0a8eacd5..7ca49fe3a065b 100644 --- a/test/file.jl +++ b/test/file.jl @@ -1520,11 +1520,11 @@ if !Sys.iswindows() chmod(joinpath(d, "empty_outer", "empty_inner"), 0o333) # Test that an empty directory, even when we can't read its contents, is deletable - rm(joinpath(d, "empty_outer"); recursive=true, force=true) + rm(joinpath(d, "empty_outer"); recursive=true) @test !isdir(joinpath(d, "empty_outer")) # But a non-empty directory is not - @test_throws Base.IOError rm(joinpath(d, "nonempty"); recursive=true, force=true) + @test_throws Base.IOError rm(joinpath(d, "nonempty"); recursive=true) chmod(joinpath(d, "nonempty"), 0o777) rm(joinpath(d, "nonempty"); recursive=true, force=true) @test !isdir(joinpath(d, "nonempty")) From 24505fcb06f8010ffd6239e27ece9cf93b08f0f4 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Sat, 26 Nov 2022 06:47:30 +0600 Subject: [PATCH 14/57] Fix overflow in pow5 (#47511) Fixup for #46764 (cherry picked from commit 02aa0b08665c5d5ff34ec344c21ba17c0f8d6a07) --- base/ryu/utils.jl | 2 +- test/ryu.jl | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/base/ryu/utils.jl b/base/ryu/utils.jl index e87d245aa4ee8..4fe0b7d397d07 100644 --- a/base/ryu/utils.jl +++ b/base/ryu/utils.jl @@ -64,7 +64,7 @@ lengthforindex(idx) = div(((Int64(16 * idx) * 1292913986) >> 32) + 1 + 16 + 8, 9 Return `true` if `5^p` is a divisor of `x`. """ -pow5(x, p) = x % (5^p) == 0 +pow5(x, p) = x % (UInt64(5)^p) == 0 """ Ryu.pow2(x, p) diff --git a/test/ryu.jl b/test/ryu.jl index cf60e4867e236..0b10bd7e49ba5 100644 --- a/test/ryu.jl +++ b/test/ryu.jl @@ -52,6 +52,11 @@ end @test "2.305843009213694e40" == Ryu.writeshortest(Core.bitcast(Float64, 0x4850F0CF064DD592)) end +@testset "pow5 overflow (#47464)" begin + @test "4.6458339e+63" == Ryu.writeexp(4.645833859177319e63, 7) + @test "4.190673780e+40" == Ryu.writeexp(4.190673779576499e40, 9) +end + @testset "OutputLength" begin @test "1.0" == Ryu.writeshortest(1.0) # already tested in Basic @test "1.2" == Ryu.writeshortest(1.2) From 6fb45be1540f84f6d642f9655fbf427327771b19 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Sun, 27 Nov 2022 18:21:46 -0500 Subject: [PATCH 15/57] Fix GCExt test (#47699) * Add test/gcext to out-of-tree * Disable gcext test that uses jl_gc_internal_obj_base_ptr (cherry picked from commit 5495b8d67a66720559cfd8c13ebb315a80e4e579) --- Makefile | 2 +- src/julia_gcext.h | 2 ++ test/gcext/LocalTest.jl | 20 ++++++++++---------- test/gcext/Makefile | 2 +- test/gcext/gcext-test.jl | 11 +++++++---- test/gcext/gcext.c | 1 + 6 files changed, 22 insertions(+), 16 deletions(-) diff --git a/Makefile b/Makefile index 15a8cd1c855f9..c17d6ce694c5f 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ all: debug release # sort is used to remove potential duplicates DIRS := $(sort $(build_bindir) $(build_depsbindir) $(build_libdir) $(build_private_libdir) $(build_libexecdir) $(build_includedir) $(build_includedir)/julia $(build_sysconfdir)/julia $(build_datarootdir)/julia $(build_datarootdir)/julia/stdlib $(build_man1dir)) ifneq ($(BUILDROOT),$(JULIAHOME)) -BUILDDIRS := $(BUILDROOT) $(addprefix $(BUILDROOT)/,base src src/flisp src/support src/clangsa cli doc deps stdlib test test/clangsa test/embedding test/llvmpasses) +BUILDDIRS := $(BUILDROOT) $(addprefix $(BUILDROOT)/,base src src/flisp src/support src/clangsa cli doc deps stdlib test test/clangsa test/embedding test/gcext test/llvmpasses) BUILDDIRMAKE := $(addsuffix /Makefile,$(BUILDDIRS)) $(BUILDROOT)/sysimage.mk DIRS := $(DIRS) $(BUILDDIRS) $(BUILDDIRMAKE): | $(BUILDDIRS) diff --git a/src/julia_gcext.h b/src/julia_gcext.h index 6523198474771..669e80d069fa4 100644 --- a/src/julia_gcext.h +++ b/src/julia_gcext.h @@ -120,6 +120,8 @@ JL_DLLEXPORT int jl_gc_conservative_gc_support_enabled(void); // external allocations may not all be valid objects and that for those, // the user *must* validate that they have a proper type, i.e. that // jl_typeof(obj) is an actual type object. +// +// NOTE: Only valid to call from within a GC context. JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p); // Return a non-null pointer to the start of the stack area if the task diff --git a/test/gcext/LocalTest.jl b/test/gcext/LocalTest.jl index f73b4b47e8023..e2ee94e765321 100644 --- a/test/gcext/LocalTest.jl +++ b/test/gcext/LocalTest.jl @@ -54,13 +54,13 @@ function set_aux_root(n :: Int, x :: String) return ccall(:set_aux_root, Nothing, (UInt, String), n, x) end -function internal_obj_scan(p :: Any) - if ccall(:internal_obj_scan, Cint, (Any,), p) == 0 - global internal_obj_scan_failures += 1 - end -end +# function internal_obj_scan(p :: Any) +# if ccall(:internal_obj_scan, Cint, (Any,), p) == 0 +# global internal_obj_scan_failures += 1 +# end +# end -global internal_obj_scan_failures = 0 +# global internal_obj_scan_failures = 0 for i in 0:1000 set_aux_root(i, string(i)) @@ -70,12 +70,12 @@ function test() local stack = make() for i in 1:100000 push(stack, string(i, base=2)) - internal_obj_scan(top(stack)) + # internal_obj_scan(top(stack)) end for i in 1:1000 local stack2 = make() - internal_obj_scan(stack2) - internal_obj_scan(blob(stack2)) + # internal_obj_scan(stack2) + # internal_obj_scan(blob(stack2)) while !empty(stack) push(stack2, pop(stack)) end @@ -98,5 +98,5 @@ end print(gc_counter_full(), " full collections.\n") print(gc_counter_inc(), " partial collections.\n") print(num_obj_sweeps(), " object sweeps.\n") -print(internal_obj_scan_failures, " internal object scan failures.\n") +# print(internal_obj_scan_failures, " internal object scan failures.\n") print(corrupted_roots, " corrupted auxiliary roots.\n") diff --git a/test/gcext/Makefile b/test/gcext/Makefile index 7cb602572e3c5..b3314d1f9b32b 100644 --- a/test/gcext/Makefile +++ b/test/gcext/Makefile @@ -41,7 +41,7 @@ $(BIN)/gcext-debug$(EXE): $(SRCDIR)/gcext.c ifneq ($(abspath $(BIN)),$(abspath $(SRCDIR))) # for demonstration purposes, our demo code is also installed # in $BIN, although this would likely not be typical -$(BIN)/LocalModule.jl: $(SRCDIR)/LocalModule.jl +$(BIN)/LocalTest.jl: $(SRCDIR)/LocalTest.jl cp $< $@ endif diff --git a/test/gcext/gcext-test.jl b/test/gcext/gcext-test.jl index e6f3e3663ff0e..0dc9bbadd92b5 100644 --- a/test/gcext/gcext-test.jl +++ b/test/gcext/gcext-test.jl @@ -31,12 +31,15 @@ end errlines = fetch(err_task) lines = fetch(out_task) @test length(errlines) == 0 - @test length(lines) == 6 + # @test length(lines) == 6 + @test length(lines) == 5 @test checknum(lines[2], r"([0-9]+) full collections", n -> n >= 10) @test checknum(lines[3], r"([0-9]+) partial collections", n -> n > 0) @test checknum(lines[4], r"([0-9]+) object sweeps", n -> n > 0) - @test checknum(lines[5], r"([0-9]+) internal object scan failures", - n -> n == 0) - @test checknum(lines[6], r"([0-9]+) corrupted auxiliary roots", + # @test checknum(lines[5], r"([0-9]+) internal object scan failures", + # n -> n == 0) + # @test checknum(lines[6], r"([0-9]+) corrupted auxiliary roots", + # n -> n == 0) + @test checknum(lines[5], r"([0-9]+) corrupted auxiliary roots", n -> n == 0) end diff --git a/test/gcext/gcext.c b/test/gcext/gcext.c index 7f2986d8f1f57..842d6004ab965 100644 --- a/test/gcext/gcext.c +++ b/test/gcext/gcext.c @@ -307,6 +307,7 @@ static size_t gc_alloc_size(jl_value_t *val) int internal_obj_scan(jl_value_t *val) { + // FIXME: `jl_gc_internal_obj_base_ptr` is not allowed to be called from outside GC if (jl_gc_internal_obj_base_ptr(val) == val) { size_t size = gc_alloc_size(val); char *addr = (char *)val; From 0865ae0017e262f385ebf375284938bf39d40f78 Mon Sep 17 00:00:00 2001 From: Rashid Rafeek Date: Mon, 28 Nov 2022 12:14:05 +0530 Subject: [PATCH 16/57] Fix REPL keybinding CTRL-Q for stdlib methods (#47637) (cherry picked from commit 7514bcf0bda547012f19a071daa132c3e8e97613) --- stdlib/REPL/src/REPL.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl index 708a4f895573a..4c83cdf33508d 100644 --- a/stdlib/REPL/src/REPL.jl +++ b/stdlib/REPL/src/REPL.jl @@ -1248,7 +1248,7 @@ function setup_interface( @goto writeback end try - InteractiveUtils.edit(linfos[n][1], linfos[n][2]) + InteractiveUtils.edit(Base.fixup_stdlib_path(linfos[n][1]), linfos[n][2]) catch ex ex isa ProcessFailedException || ex isa Base.IOError || ex isa SystemError || rethrow() @info "edit failed" _exception=ex From cfbb86a6b758ae0345ee392971e2f45e3637d069 Mon Sep 17 00:00:00 2001 From: Hendrik Ranocha Date: Mon, 28 Nov 2022 12:17:27 +0100 Subject: [PATCH 17/57] fix 5-arg `mul!` for vectors of vectors (#47665) Co-authored-by: N5N3 <2642243996@qq.com> (cherry picked from commit 902e8a7c2f7ba45aa35b8f5de4c2840a306a1958) --- stdlib/LinearAlgebra/src/matmul.jl | 2 +- stdlib/LinearAlgebra/test/matmul.jl | 52 +++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/stdlib/LinearAlgebra/src/matmul.jl b/stdlib/LinearAlgebra/src/matmul.jl index 3e034ce87ede0..6d00b950525e6 100644 --- a/stdlib/LinearAlgebra/src/matmul.jl +++ b/stdlib/LinearAlgebra/src/matmul.jl @@ -807,7 +807,7 @@ function generic_matvecmul!(C::AbstractVector{R}, tA, A::AbstractVecOrMat, B::Ab end for k = 1:mB aoffs = (k-1)*Astride - b = _add(B[k], false) + b = _add(B[k]) for i = 1:mA C[i] += A[aoffs + i] * b end diff --git a/stdlib/LinearAlgebra/test/matmul.jl b/stdlib/LinearAlgebra/test/matmul.jl index cf0295ce552b5..0150c4c2efdc8 100644 --- a/stdlib/LinearAlgebra/test/matmul.jl +++ b/stdlib/LinearAlgebra/test/matmul.jl @@ -156,6 +156,58 @@ end end end +@testset "generic_matvecmul for vectors of vectors" begin + @testset "matrix of scalars" begin + u = [[1, 2], [3, 4]] + A = [1 2; 3 4] + v = [[0, 0], [0, 0]] + Au = [[7, 10], [15, 22]] + @test A * u == Au + mul!(v, A, u) + @test v == Au + mul!(v, A, u, 2, -1) + @test v == Au + end + + @testset "matrix of matrices" begin + u = [[1, 2], [3, 4]] + A = Matrix{Matrix{Int}}(undef, 2, 2) + A[1, 1] = [1 2; 3 4] + A[1, 2] = [5 6; 7 8] + A[2, 1] = [9 10; 11 12] + A[2, 2] = [13 14; 15 16] + v = [[0, 0], [0, 0]] + Au = [[44, 64], [124, 144]] + @test A * u == Au + mul!(v, A, u) + @test v == Au + mul!(v, A, u, 2, -1) + @test v == Au + end +end + +@testset "generic_matmatmul for matrices of vectors" begin + B = Matrix{Vector{Int}}(undef, 2, 2) + B[1, 1] = [1, 2] + B[2, 1] = [3, 4] + B[1, 2] = [5, 6] + B[2, 2] = [7, 8] + A = [1 2; 3 4] + C = Matrix{Vector{Int}}(undef, 2, 2) + AB = Matrix{Vector{Int}}(undef, 2, 2) + AB[1, 1] = [7, 10] + AB[2, 1] = [15, 22] + AB[1, 2] = [19, 22] + AB[2, 2] = [43, 50] + @test A * B == AB + mul!(C, A, B) + @test C == AB + mul!(C, A, B, 2, -1) + @test C == AB + LinearAlgebra._generic_matmatmul!(C, 'N', 'N', A, B, LinearAlgebra.MulAddMul(2, -1)) + @test C == AB +end + @testset "fallbacks & such for BlasFloats" begin AA = rand(Float64, 6, 6) BB = rand(Float64, 6, 6) From 93587d7c1015efcd4c5184e9c42684382f1f9ab2 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 7 Dec 2022 16:01:47 +0100 Subject: [PATCH 18/57] Add support for "package extensions" to code loading (#47695) * Add support for "glue packages" to code loading This allows packages to define "glue packages" which are modules that are automatically loaded when a set of other packages are loaded into the Julia session. (cherry picked from commit 495a004bda33e284b0acc612f5ced9ba1eb9a777) --- NEWS.md | 4 + base/loading.jl | 243 ++++++++++++++++-- doc/src/manual/code-loading.md | 39 +++ test/loading.jl | 31 +++ .../project/Extensions/ExtDep.jl/Project.toml | 3 + .../Extensions/ExtDep.jl/src/ExtDep.jl | 5 + test/project/Extensions/ExtDep2/Project.toml | 3 + .../project/Extensions/ExtDep2/src/ExtDep2.jl | 5 + .../HasDepWithExtensions.jl/Manifest.toml | 25 ++ .../HasDepWithExtensions.jl/Project.toml | 8 + .../src/HasDepWithExtensions.jl | 13 + .../Extensions/HasExtensions.jl/Manifest.toml | 7 + .../Extensions/HasExtensions.jl/Project.toml | 11 + .../HasExtensions.jl/ext/Extension.jl | 13 + .../ext/ExtensionFolder/ExtensionFolder.jl | 9 + .../HasExtensions.jl/src/HasExtensions.jl | 10 + 16 files changed, 406 insertions(+), 23 deletions(-) create mode 100644 test/project/Extensions/ExtDep.jl/Project.toml create mode 100644 test/project/Extensions/ExtDep.jl/src/ExtDep.jl create mode 100644 test/project/Extensions/ExtDep2/Project.toml create mode 100644 test/project/Extensions/ExtDep2/src/ExtDep2.jl create mode 100644 test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml create mode 100644 test/project/Extensions/HasDepWithExtensions.jl/Project.toml create mode 100644 test/project/Extensions/HasDepWithExtensions.jl/src/HasDepWithExtensions.jl create mode 100644 test/project/Extensions/HasExtensions.jl/Manifest.toml create mode 100644 test/project/Extensions/HasExtensions.jl/Project.toml create mode 100644 test/project/Extensions/HasExtensions.jl/ext/Extension.jl create mode 100644 test/project/Extensions/HasExtensions.jl/ext/ExtensionFolder/ExtensionFolder.jl create mode 100644 test/project/Extensions/HasExtensions.jl/src/HasExtensions.jl diff --git a/NEWS.md b/NEWS.md index 92f13913d997c..2320f5af907a5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -98,6 +98,10 @@ Standard library changes #### Package Manager +- "Package Extensions": support for loading a piece of code based on other + packages being loaded in the Julia session. + This has similar applications as the Requires.jl package but also + supports precompilation and setting compatibility. #### LinearAlgebra * The methods `a / b` and `b \ a` with `a` a scalar and `b` a vector, which were equivalent to `a * pinv(b)`, diff --git a/base/loading.jl b/base/loading.jl index a5df7c24408ae..8846b5f197c94 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -564,7 +564,7 @@ function manifest_deps_get(env::String, where::PkgId, name::String)::Union{Nothi return PkgId(pkg_uuid, name) end # look for manifest file and `where` stanza - return explicit_manifest_deps_get(project_file, uuid, name) + return explicit_manifest_deps_get(project_file, where, name) elseif project_file # if env names a directory, search it return implicit_manifest_deps_get(env, where, name) @@ -578,7 +578,7 @@ function manifest_uuid_path(env::String, pkg::PkgId)::Union{Nothing,String,Missi proj = project_file_name_uuid(project_file, pkg.name) if proj == pkg # if `pkg` matches the project, return the project itself - return project_file_path(project_file, pkg.name) + return project_file_path(project_file) end # look for manifest file and `where` stanza return explicit_manifest_uuid_path(project_file, pkg) @@ -598,7 +598,7 @@ function project_file_name_uuid(project_file::String, name::String)::PkgId return PkgId(uuid, name) end -function project_file_path(project_file::String, name::String) +function project_file_path(project_file::String) d = parsed_toml(project_file) joinpath(dirname(project_file), get(d, "path", "")::String) end @@ -716,7 +716,7 @@ end # find `where` stanza and return the PkgId for `name` # return `nothing` if it did not find `where` (indicating caller should continue searching) -function explicit_manifest_deps_get(project_file::String, where::UUID, name::String)::Union{Nothing,PkgId} +function explicit_manifest_deps_get(project_file::String, where::PkgId, name::String)::Union{Nothing,PkgId} manifest_file = project_file_manifest_path(project_file) manifest_file === nothing && return nothing # manifest not found--keep searching LOAD_PATH d = get_deps(parsed_toml(manifest_file)) @@ -728,16 +728,15 @@ function explicit_manifest_deps_get(project_file::String, where::UUID, name::Str entry = entry::Dict{String, Any} uuid = get(entry, "uuid", nothing)::Union{String, Nothing} uuid === nothing && continue - if UUID(uuid) === where + if UUID(uuid) === where.uuid found_where = true # deps is either a list of names (deps = ["DepA", "DepB"]) or # a table of entries (deps = {"DepA" = "6ea...", "DepB" = "55d..."} deps = get(entry, "deps", nothing)::Union{Vector{String}, Dict{String, Any}, Nothing} - deps === nothing && continue if deps isa Vector{String} found_name = name in deps break - else + elseif deps isa Dict{String, Any} deps = deps::Dict{String, Any} for (dep, uuid) in deps uuid::String @@ -746,6 +745,36 @@ function explicit_manifest_deps_get(project_file::String, where::UUID, name::Str end end end + else # Check for extensions + extensions = get(entry, "extensions", nothing) + if extensions !== nothing + if haskey(extensions, where.name) && where.uuid == uuid5(UUID(uuid), where.name) + found_where = true + if name == dep_name + return PkgId(UUID(uuid), name) + end + exts = extensions[where.name]::Union{String, Vector{String}} + if (exts isa String && name == exts) || (exts isa Vector{String} && name in exts) + weakdeps = get(entry, "weakdeps", nothing)::Union{Vector{String}, Dict{String, Any}, Nothing} + if weakdeps !== nothing + if weakdeps isa Vector{String} + found_name = name in weakdeps + break + elseif weakdeps isa Dict{String, Any} + weakdeps = weakdeps::Dict{String, Any} + for (dep, uuid) in weakdeps + uuid::String + if dep === name + return PkgId(UUID(uuid), name) + end + end + end + end + end + # `name` is not an ext, do standard lookup as if this was the parent + return identify_package(PkgId(UUID(uuid), dep_name), name) + end + end end end end @@ -769,13 +798,27 @@ function explicit_manifest_uuid_path(project_file::String, pkg::PkgId)::Union{No d = get_deps(parsed_toml(manifest_file)) entries = get(d, pkg.name, nothing)::Union{Nothing, Vector{Any}} - entries === nothing && return nothing # TODO: allow name to mismatch? - for entry in entries - entry = entry::Dict{String, Any} - uuid = get(entry, "uuid", nothing)::Union{Nothing, String} - uuid === nothing && continue - if UUID(uuid) === pkg.uuid - return explicit_manifest_entry_path(manifest_file, pkg, entry) + if entries !== nothing + for entry in entries + entry = entry::Dict{String, Any} + uuid = get(entry, "uuid", nothing)::Union{Nothing, String} + uuid === nothing && continue + if UUID(uuid) === pkg.uuid + return explicit_manifest_entry_path(manifest_file, pkg, entry) + end + end + end + # Extensions + for (name, entries::Vector{Any}) in d + for entry in entries + uuid = get(entry, "uuid", nothing)::Union{Nothing, String} + extensions = get(entry, "extensions", nothing)::Union{Nothing, Dict{String, Any}} + if extensions !== nothing && haskey(extensions, pkg.name) && uuid !== nothing && uuid5(UUID(uuid), pkg.name) == pkg.uuid + p = normpath(dirname(locate_package(PkgId(UUID(uuid), name))), "..") + extfiledir = joinpath(p, "ext", pkg.name, pkg.name * ".jl") + isfile(extfiledir) && return extfiledir + return joinpath(p, "ext", pkg.name * ".jl") + end end end return nothing @@ -958,6 +1001,7 @@ end function run_package_callbacks(modkey::PkgId) assert_havelock(require_lock) unlock(require_lock) + run_extension_callbacks() try for callback in package_callbacks invokelatest(callback, modkey) @@ -972,6 +1016,154 @@ function run_package_callbacks(modkey::PkgId) nothing end + +############## +# Extensions # +############## + +mutable struct ExtensionId + const id::PkgId # Could be symbol? + const parentid::PkgId + const triggers::Vector{PkgId} # What packages have to be loaded for the extension to get loaded + triggered::Bool + succeeded::Bool +end + +const EXT_DORMITORY = ExtensionId[] + +function insert_extension_triggers(pkg::PkgId) + pkg.uuid === nothing && return + for env in load_path() + insert_extension_triggers(env, pkg) + break # For now, only insert triggers for packages in the first load_path. + end +end + +function insert_extension_triggers(env::String, pkg::PkgId)::Union{Nothing,Missing} + project_file = env_project_file(env) + if project_file isa String + manifest_file = project_file_manifest_path(project_file) + manifest_file === nothing && return + d = get_deps(parsed_toml(manifest_file)) + for (dep_name, entries) in d + entries::Vector{Any} + for entry in entries + entry = entry::Dict{String, Any} + uuid = get(entry, "uuid", nothing)::Union{String, Nothing} + uuid === nothing && continue + if UUID(uuid) == pkg.uuid + weakdeps = get(entry, "weakdeps", nothing)::Union{Nothing, Vector{String}, Dict{String,Any}} + extensions = get(entry, "extensions", nothing)::Union{Nothing, Dict{String, Any}} + extensions === nothing && return + weakdeps === nothing && return + if weakdeps isa Dict{String, Any} + return _insert_extension_triggers(pkg, extensions, weakdeps) + end + + d_weakdeps = Dict{String, String}() + for (dep_name, entries) in d + dep_name in weakdeps || continue + entries::Vector{Any} + if length(entries) != 1 + error("expected a single entry for $(repr(name)) in $(repr(project_file))") + end + entry = first(entries)::Dict{String, Any} + uuid = get(entry, "uuid", nothing)::Union{String, Nothing} + d_weakdeps[dep_name] = uuid + end + @assert length(d_weakdeps) == length(weakdeps) + return _insert_extension_triggers(pkg, extensions, d_weakdeps) + end + end + end + end + return nothing +end + +function _insert_extension_triggers(parent::PkgId, extensions::Dict{String, <:Any}, weakdeps::Dict{String, <:Any}) + for (ext::String, triggers::Union{String, Vector{String}}) in extensions + triggers isa String && (triggers = [triggers]) + triggers_id = PkgId[] + id = PkgId(uuid5(parent.uuid, ext), ext) + for trigger in triggers + # TODO: Better error message if this lookup fails? + uuid_trigger = UUID(weakdeps[trigger]::String) + push!(triggers_id, PkgId(uuid_trigger, trigger)) + end + gid = ExtensionId(id, parent, triggers_id, false, false) + push!(EXT_DORMITORY, gid) + end +end + +function run_extension_callbacks(; force::Bool=false) + try + # TODO, if `EXT_DORMITORY` becomes very long, do something smarter + for extid in EXT_DORMITORY + extid.succeeded && continue + !force && extid.triggered && continue + if all(x -> haskey(Base.loaded_modules, x), extid.triggers) + ext_not_allowed_load = nothing + extid.triggered = true + # It is possible that some of the triggers were loaded in an environment + # below the one of the parent. This will cause a load failure when the + # pkg ext tries to load the triggers. Therefore, check this first + # before loading the pkg ext. + for trigger in extid.triggers + pkgenv = Base.identify_package_env(extid.id, trigger.name) + if pkgenv === nothing + ext_not_allowed_load = trigger + break + else + pkg, env = pkgenv + path = Base.locate_package(pkg, env) + if path === nothing + ext_not_allowed_load = trigger + break + end + end + end + if ext_not_allowed_load !== nothing + @debug "Extension $(extid.id.name) of $(extid.parentid.name) not loaded due to \ + $(ext_not_allowed_load.name) loaded in environment lower in load path" + else + require(extid.id) + @debug "Extension $(extid.id.name) of $(extid.parentid.name) loaded" + end + extid.succeeded = true + end + end + catch + # Try to continue loading if loading an extension errors + errs = current_exceptions() + @error "Error during loading of extension" exception=errs + end + nothing +end + +""" + load_extensions() + +Loads all the (not yet loaded) extensions that have their extension-dependencies loaded. +This is used in cases where the automatic loading of an extension failed +due to some problem with the extension. Instead of restarting the Julia session, +the extension can be fixed, and this function run. +""" +retry_load_extensions() = run_extension_callbacks(; force=true) + +""" + get_extension(parent::Module, extension::Symbol) + +Return the module for `extension` of `parent` or return `nothing` if the extension is not loaded. +""" +get_extension(parent::Module, ext::Symbol) = get_extension(PkgId(parent), ext) +function get_extension(parentid::PkgId, ext::Symbol) + parentid.uuid === nothing && return nothing + extid = PkgId(uuid5(parentid.uuid, string(ext)), string(ext)) + return get(loaded_modules, extid, nothing) +end + +# End extensions + # loads a precompile cache file, after checking stale_cachefile tests function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt64) assert_havelock(require_lock) @@ -995,6 +1187,7 @@ function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt64) notify(loading, loaded, all=true) end if loaded isa Module + insert_extension_triggers(modkey) run_package_callbacks(modkey) end end @@ -1035,6 +1228,7 @@ function _tryrequire_from_serialized(modkey::PkgId, path::String, sourcepath::St notify(loading, loaded, all=true) end if loaded isa Module + insert_extension_triggers(modkey) run_package_callbacks(modkey) end end @@ -1239,7 +1433,7 @@ function require(into::Module, mod::Symbol) LOADING_CACHE[] = LoadingCache() try uuidkey_env = identify_package_env(into, String(mod)) - # Core.println("require($(PkgId(into)), $mod) -> $uuidkey from env \"$env\"") + # Core.println("require($(PkgId(into)), $mod) -> $uuidkey_env") if uuidkey_env === nothing where = PkgId(into) if where.uuid === nothing @@ -1279,14 +1473,6 @@ function require(into::Module, mod::Symbol) end end -mutable struct PkgOrigin - path::Union{String,Nothing} - cachepath::Union{String,Nothing} - version::Union{VersionNumber,Nothing} -end -PkgOrigin() = PkgOrigin(nothing, nothing, nothing) -const pkgorigins = Dict{PkgId,PkgOrigin}() - require(uuidkey::PkgId) = @lock require_lock _require_prelocked(uuidkey) function _require_prelocked(uuidkey::PkgId, env=nothing) @@ -1297,6 +1483,7 @@ function _require_prelocked(uuidkey::PkgId, env=nothing) error("package `$(uuidkey.name)` did not define the expected \ module `$(uuidkey.name)`, check for typos in package module name") end + insert_extension_triggers(uuidkey) # After successfully loading, notify downstream consumers run_package_callbacks(uuidkey) else @@ -1305,6 +1492,14 @@ function _require_prelocked(uuidkey::PkgId, env=nothing) return newm end +mutable struct PkgOrigin + path::Union{String,Nothing} + cachepath::Union{String,Nothing} + version::Union{VersionNumber,Nothing} +end +PkgOrigin() = PkgOrigin(nothing, nothing, nothing) +const pkgorigins = Dict{PkgId,PkgOrigin}() + const loaded_modules = Dict{PkgId,Module}() const loaded_modules_order = Vector{Module}() const module_keys = IdDict{Module,PkgId}() # the reverse @@ -1479,6 +1674,7 @@ function _require_from_serialized(uuidkey::PkgId, path::String) set_pkgorigin_version_path(uuidkey, nothing) newm = _tryrequire_from_serialized(uuidkey, path) newm isa Module || throw(newm) + insert_extension_triggers(uuidkey) # After successfully loading, notify downstream consumers run_package_callbacks(uuidkey) return newm @@ -1711,6 +1907,7 @@ function create_expr_cache(pkg::PkgId, input::String, output::String, concrete_d "w", stdout) # write data over stdin to avoid the (unlikely) case of exceeding max command line size write(io.in, """ + empty!(Base.EXT_DORMITORY) # If we have a custom sysimage with `EXT_DORMITORY` prepopulated Base.include_package_for_output($(pkg_str(pkg)), $(repr(abspath(input))), $(repr(depot_path)), $(repr(dl_load_path)), $(repr(load_path)), $deps, $(repr(source_path(nothing)))) """) diff --git a/doc/src/manual/code-loading.md b/doc/src/manual/code-loading.md index d6f359f83d5cb..f9575b0159d8c 100644 --- a/doc/src/manual/code-loading.md +++ b/doc/src/manual/code-loading.md @@ -348,7 +348,46 @@ The subscripted `rootsᵢ`, `graphᵢ` and `pathsᵢ` variables correspond to th 2. Packages in non-primary environments can end up using incompatible versions of their dependencies even if their own environments are entirely compatible. This can happen when one of their dependencies is shadowed by a version in an earlier environment in the stack (either by graph or path, or both). Since the primary environment is typically the environment of a project you're working on, while environments later in the stack contain additional tools, this is the right trade-off: it's better to break your development tools but keep the project working. When such incompatibilities occur, you'll typically want to upgrade your dev tools to versions that are compatible with the main project. +### "Extension"s +An "extension" is a module that is automatically loaded when a specified set of other packages (its "extension dependencies") are loaded in the current Julia session. The extension dependencies of an extension are a subset of those packages listed under the `[weakdeps]` section of a Project file. Extensions are defined under the `[extensions]` section in the project file: + +```toml +name = "MyPackage" + +[weakdeps] +ExtDep = "c9a23..." # uuid +OtherExtDep = "862e..." # uuid + +[extensions] +BarExt = ["ExtDep", "OtherExtDep"] +FooExt = "ExtDep" +... +``` + +The keys under `extensions` are the name of the extensions. +They are loaded when all the packages on the right hand side (the extension dependencies) of that extension are loaded. +If an extension only has one extension dependency the list of extension dependencies can be written as just a string for brevity. +The location for the entry point of the extension is either in `ext/FooExt.jl` or `ext/FooExt/FooExt.jl` for +extension `FooExt`. +The content of an extension is often structured as: + +``` +module FooExt + +# Load main package and extension dependencies +using MyPackage, ExtDep + +# Extend functionality in main package with types from the extension dependencies +MyPackage.func(x::ExtDep.SomeStruct) = ... + +end +``` + +When a package with extensions is added to an environment, the `weakdeps` and `extensions` sections +are stored in the manifest file in the section for that package. The dependency lookup rules for +a package are the same as for its "parent" except that the listed extension dependencies are also considered as +dependencies. ### Package/Environment Preferences Preferences are dictionaries of metadata that influence package behavior within an environment. diff --git a/test/loading.jl b/test/loading.jl index d057f0b3c3702..99f39ae237532 100644 --- a/test/loading.jl +++ b/test/loading.jl @@ -991,5 +991,36 @@ end end end +@testset "Extensions" begin + old_depot_path = copy(DEPOT_PATH) + try + tmp = mktempdir() + push!(empty!(DEPOT_PATH), joinpath(tmp, "depot")) + + proj = joinpath(@__DIR__, "project", "Extensions", "HasDepWithExtensions.jl") + for i in 1:2 # Once when requiring precomilation, once where it is already precompiled + cmd = `$(Base.julia_cmd()) --project=$proj --startup-file=no -e ' + begin + using HasExtensions + # Base.get_extension(HasExtensions, :Extension) === nothing || error("unexpectedly got an extension") + HasExtensions.ext_loaded && error("ext_loaded set") + using HasDepWithExtensions + # Base.get_extension(HasExtensions, :Extension).extvar == 1 || error("extvar in Extension not set") + HasExtensions.ext_loaded || error("ext_loaded not set") + HasExtensions.ext_folder_loaded && error("ext_folder_loaded set") + HasDepWithExtensions.do_something() || error("do_something errored") + using ExtDep2 + HasExtensions.ext_folder_loaded || error("ext_folder_loaded not set") + + end + '` + @test success(cmd) + end + finally + copy!(DEPOT_PATH, old_depot_path) + end +end + + empty!(Base.DEPOT_PATH) append!(Base.DEPOT_PATH, original_depot_path) diff --git a/test/project/Extensions/ExtDep.jl/Project.toml b/test/project/Extensions/ExtDep.jl/Project.toml new file mode 100644 index 0000000000000..93c5e3925f06b --- /dev/null +++ b/test/project/Extensions/ExtDep.jl/Project.toml @@ -0,0 +1,3 @@ +name = "ExtDep" +uuid = "fa069be4-f60b-4d4c-8b95-f8008775090c" +version = "0.1.0" diff --git a/test/project/Extensions/ExtDep.jl/src/ExtDep.jl b/test/project/Extensions/ExtDep.jl/src/ExtDep.jl new file mode 100644 index 0000000000000..f0ca8c62d04b2 --- /dev/null +++ b/test/project/Extensions/ExtDep.jl/src/ExtDep.jl @@ -0,0 +1,5 @@ +module ExtDep + +struct ExtDepStruct end + +end # module ExtDep diff --git a/test/project/Extensions/ExtDep2/Project.toml b/test/project/Extensions/ExtDep2/Project.toml new file mode 100644 index 0000000000000..b25b99615b185 --- /dev/null +++ b/test/project/Extensions/ExtDep2/Project.toml @@ -0,0 +1,3 @@ +name = "ExtDep2" +uuid = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d" +version = "0.1.0" diff --git a/test/project/Extensions/ExtDep2/src/ExtDep2.jl b/test/project/Extensions/ExtDep2/src/ExtDep2.jl new file mode 100644 index 0000000000000..969905e25992f --- /dev/null +++ b/test/project/Extensions/ExtDep2/src/ExtDep2.jl @@ -0,0 +1,5 @@ +module ExtDep2 + +greet() = print("Hello World!") + +end # module ExtDep2 diff --git a/test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml b/test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml new file mode 100644 index 0000000000000..c96e3ef508ca8 --- /dev/null +++ b/test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml @@ -0,0 +1,25 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.10.0-DEV" +manifest_format = "2.0" +project_hash = "7cbe1857ecc6692a8cc8be428a5ad5073531ff98" + +[[deps.ExtDep]] +path = "../ExtDep.jl" +uuid = "fa069be4-f60b-4d4c-8b95-f8008775090c" +version = "0.1.0" + +[[deps.ExtDep2]] +path = "../ExtDep2" +uuid = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d" +version = "0.1.0" + +[[deps.HasExtensions]] +weakdeps = ["ExtDep", "ExtDep2"] +path = "../HasExtensions.jl" +uuid = "4d3288b3-3afc-4bb6-85f3-489fffe514c8" +version = "0.1.0" + + [deps.HasExtensions.extensions] + Extension = "ExtDep" + ExtensionFolder = ["ExtDep", "ExtDep2"] diff --git a/test/project/Extensions/HasDepWithExtensions.jl/Project.toml b/test/project/Extensions/HasDepWithExtensions.jl/Project.toml new file mode 100644 index 0000000000000..8f308a9fbee72 --- /dev/null +++ b/test/project/Extensions/HasDepWithExtensions.jl/Project.toml @@ -0,0 +1,8 @@ +name = "HasDepWithExtensions" +uuid = "d4ef3d4a-8e22-4710-85d8-c6cf2eb9efca" +version = "0.1.0" + +[deps] +ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c" +ExtDep2 = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d" +HasExtensions = "4d3288b3-3afc-4bb6-85f3-489fffe514c8" diff --git a/test/project/Extensions/HasDepWithExtensions.jl/src/HasDepWithExtensions.jl b/test/project/Extensions/HasDepWithExtensions.jl/src/HasDepWithExtensions.jl new file mode 100644 index 0000000000000..d64cbc680e3a5 --- /dev/null +++ b/test/project/Extensions/HasDepWithExtensions.jl/src/HasDepWithExtensions.jl @@ -0,0 +1,13 @@ +module HasDepWithExtensions + +using HasExtensions: HasExtensions, HasExtensionsStruct +using ExtDep: ExtDepStruct +# Loading ExtDep makes the extension "Extension" load + +function do_something() + HasExtensions.foo(HasExtensionsStruct()) == 1 || error() + HasExtensions.foo(ExtDepStruct()) == 2 || error() + return true +end + +end # module diff --git a/test/project/Extensions/HasExtensions.jl/Manifest.toml b/test/project/Extensions/HasExtensions.jl/Manifest.toml new file mode 100644 index 0000000000000..55f7958701a75 --- /dev/null +++ b/test/project/Extensions/HasExtensions.jl/Manifest.toml @@ -0,0 +1,7 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.10.0-DEV" +manifest_format = "2.0" +project_hash = "c87947f1f1f070eea848950c304d668a112dec3d" + +[deps] diff --git a/test/project/Extensions/HasExtensions.jl/Project.toml b/test/project/Extensions/HasExtensions.jl/Project.toml new file mode 100644 index 0000000000000..72577de36d65d --- /dev/null +++ b/test/project/Extensions/HasExtensions.jl/Project.toml @@ -0,0 +1,11 @@ +name = "HasExtensions" +uuid = "4d3288b3-3afc-4bb6-85f3-489fffe514c8" +version = "0.1.0" + +[weakdeps] +ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c" +ExtDep2 = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d" + +[extensions] +Extension = "ExtDep" +ExtensionFolder = ["ExtDep", "ExtDep2"] diff --git a/test/project/Extensions/HasExtensions.jl/ext/Extension.jl b/test/project/Extensions/HasExtensions.jl/ext/Extension.jl new file mode 100644 index 0000000000000..9216c403a485a --- /dev/null +++ b/test/project/Extensions/HasExtensions.jl/ext/Extension.jl @@ -0,0 +1,13 @@ +module Extension + +using HasExtensions, ExtDep + +HasExtensions.foo(::ExtDep.ExtDepStruct) = 2 + +function __init__() + HasExtensions.ext_loaded = true +end + +const extvar = 1 + +end diff --git a/test/project/Extensions/HasExtensions.jl/ext/ExtensionFolder/ExtensionFolder.jl b/test/project/Extensions/HasExtensions.jl/ext/ExtensionFolder/ExtensionFolder.jl new file mode 100644 index 0000000000000..1fb90d7989ca9 --- /dev/null +++ b/test/project/Extensions/HasExtensions.jl/ext/ExtensionFolder/ExtensionFolder.jl @@ -0,0 +1,9 @@ +module ExtensionFolder + +using ExtDep, ExtDep2, HasExtensions + +function __init__() + HasExtensions.ext_folder_loaded = true +end + +end diff --git a/test/project/Extensions/HasExtensions.jl/src/HasExtensions.jl b/test/project/Extensions/HasExtensions.jl/src/HasExtensions.jl new file mode 100644 index 0000000000000..dbfaeec4f8812 --- /dev/null +++ b/test/project/Extensions/HasExtensions.jl/src/HasExtensions.jl @@ -0,0 +1,10 @@ +module HasExtensions + +struct HasExtensionsStruct end + +foo(::HasExtensionsStruct) = 1 + +ext_loaded = false +ext_folder_loaded = false + +end # module From dc1369e9219195c1b9da4d85d2a145fdc3ab5bd5 Mon Sep 17 00:00:00 2001 From: DilumAluthgeBot <43731525+DilumAluthgeBot@users.noreply.github.com> Date: Wed, 7 Dec 2022 16:00:25 -0500 Subject: [PATCH 19/57] =?UTF-8?q?=F0=9F=A4=96=20Bump=20the=20Pkg=20stdlib?= =?UTF-8?q?=20from=20ed6a5497e=20to=205d8b9ddb8=20(#47828)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Dilum Aluthge (cherry picked from commit db00cc1a8455ceb4a8dc3cb8dd6ded3d62e46dcb) --- .../Pkg-5d8b9ddb89ef7eff7c4d032cd4a7e33778c0bbde.tar.gz/md5 | 1 + .../Pkg-5d8b9ddb89ef7eff7c4d032cd4a7e33778c0bbde.tar.gz/sha512 | 1 + .../Pkg-ed6a5497e46ed541b2718c404c0f468b7f92263a.tar.gz/md5 | 1 - .../Pkg-ed6a5497e46ed541b2718c404c0f468b7f92263a.tar.gz/sha512 | 1 - stdlib/Pkg.version | 2 +- 5 files changed, 3 insertions(+), 3 deletions(-) create mode 100644 deps/checksums/Pkg-5d8b9ddb89ef7eff7c4d032cd4a7e33778c0bbde.tar.gz/md5 create mode 100644 deps/checksums/Pkg-5d8b9ddb89ef7eff7c4d032cd4a7e33778c0bbde.tar.gz/sha512 delete mode 100644 deps/checksums/Pkg-ed6a5497e46ed541b2718c404c0f468b7f92263a.tar.gz/md5 delete mode 100644 deps/checksums/Pkg-ed6a5497e46ed541b2718c404c0f468b7f92263a.tar.gz/sha512 diff --git a/deps/checksums/Pkg-5d8b9ddb89ef7eff7c4d032cd4a7e33778c0bbde.tar.gz/md5 b/deps/checksums/Pkg-5d8b9ddb89ef7eff7c4d032cd4a7e33778c0bbde.tar.gz/md5 new file mode 100644 index 0000000000000..24682e718f701 --- /dev/null +++ b/deps/checksums/Pkg-5d8b9ddb89ef7eff7c4d032cd4a7e33778c0bbde.tar.gz/md5 @@ -0,0 +1 @@ +e0841b6343d50524c3bf694cab48ac16 diff --git a/deps/checksums/Pkg-5d8b9ddb89ef7eff7c4d032cd4a7e33778c0bbde.tar.gz/sha512 b/deps/checksums/Pkg-5d8b9ddb89ef7eff7c4d032cd4a7e33778c0bbde.tar.gz/sha512 new file mode 100644 index 0000000000000..15829d6b80fa3 --- /dev/null +++ b/deps/checksums/Pkg-5d8b9ddb89ef7eff7c4d032cd4a7e33778c0bbde.tar.gz/sha512 @@ -0,0 +1 @@ +89ed36a9e9b4b297d9480474401b2b337d736bc307684bb4d35841159400ff651d5fc57d7cd643a0d4a9dbd01d2773e86e32b3cbfb9e5a8df5dac64990ea99d0 diff --git a/deps/checksums/Pkg-ed6a5497e46ed541b2718c404c0f468b7f92263a.tar.gz/md5 b/deps/checksums/Pkg-ed6a5497e46ed541b2718c404c0f468b7f92263a.tar.gz/md5 deleted file mode 100644 index 8e1c22b677fcd..0000000000000 --- a/deps/checksums/Pkg-ed6a5497e46ed541b2718c404c0f468b7f92263a.tar.gz/md5 +++ /dev/null @@ -1 +0,0 @@ -4fe1e70708ff64fae949facfa3a7d419 diff --git a/deps/checksums/Pkg-ed6a5497e46ed541b2718c404c0f468b7f92263a.tar.gz/sha512 b/deps/checksums/Pkg-ed6a5497e46ed541b2718c404c0f468b7f92263a.tar.gz/sha512 deleted file mode 100644 index 72bc2e7bdaf20..0000000000000 --- a/deps/checksums/Pkg-ed6a5497e46ed541b2718c404c0f468b7f92263a.tar.gz/sha512 +++ /dev/null @@ -1 +0,0 @@ -806b5e215a4670b6bceaa85b20ebf305f07fd84700e02f2471ed52c18ee01323dd151141efff1904678aedbf832b72c6ab9fb031ea30189c897d934870c99c35 diff --git a/stdlib/Pkg.version b/stdlib/Pkg.version index faff813896433..9e91595d927d0 100644 --- a/stdlib/Pkg.version +++ b/stdlib/Pkg.version @@ -1,4 +1,4 @@ PKG_BRANCH = master -PKG_SHA1 = ed6a5497e46ed541b2718c404c0f468b7f92263a +PKG_SHA1 = 5d8b9ddb89ef7eff7c4d032cd4a7e33778c0bbde PKG_GIT_URL := https://github.com/JuliaLang/Pkg.jl.git PKG_TAR_URL = https://api.github.com/repos/JuliaLang/Pkg.jl/tarball/$1 From 9b9a703a11df6f99fce026450bb89cf45a3940c6 Mon Sep 17 00:00:00 2001 From: pchintalapudi <34727397+pchintalapudi@users.noreply.github.com> Date: Sat, 26 Nov 2022 04:58:10 -0500 Subject: [PATCH 20/57] Fix and simplify inference timing logic (#47711) * Fix and simplify inference timing logic * Reduce task struct size (cherry picked from commit 88a0627003c45ddac304b7be933c93caae8ae6b3) --- src/gf.c | 14 ++++++++------ src/julia.h | 4 ++-- src/task.c | 2 ++ 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/gf.c b/src/gf.c index 0bce672ca729c..0e98f2a140d4a 100644 --- a/src/gf.c +++ b/src/gf.c @@ -3419,18 +3419,20 @@ int jl_has_concrete_subtype(jl_value_t *typ) JL_DLLEXPORT void jl_typeinf_timing_begin(void) { - if (jl_atomic_load_relaxed(&jl_measure_compile_time_enabled)) { - jl_task_t *ct = jl_current_task; - if (ct->inference_start_time == 0 && ct->reentrant_inference == 1) - ct->inference_start_time = jl_hrtime(); + jl_task_t *ct = jl_current_task; + if (ct->reentrant_inference == 1) { + ct->inference_start_time = jl_hrtime(); } } JL_DLLEXPORT void jl_typeinf_timing_end(void) { jl_task_t *ct = jl_current_task; - if (ct->inference_start_time != 0 && ct->reentrant_inference == 1) { - jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - ct->inference_start_time)); + if (ct->reentrant_inference == 1) { + if (jl_atomic_load_relaxed(&jl_measure_compile_time_enabled)) { + uint64_t inftime = jl_hrtime() - ct->inference_start_time; + jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, inftime); + } ct->inference_start_time = 0; } } diff --git a/src/julia.h b/src/julia.h index 3a30a2e1c59d8..5ac6bdc36e181 100644 --- a/src/julia.h +++ b/src/julia.h @@ -1937,8 +1937,8 @@ typedef struct _jl_task_t { void *stkbuf; // malloc'd memory (either copybuf or stack) size_t bufsz; // actual sizeof stkbuf uint64_t inference_start_time; // time when inference started - unsigned int reentrant_inference; // How many times we've reentered inference - unsigned int reentrant_codegen; // How many times we've reentered codegen + uint16_t reentrant_inference; // How many times we've reentered inference + uint16_t reentrant_codegen; // How many times we've reentered codegen unsigned int copy_stack:31; // sizeof stack for copybuf unsigned int started:1; } jl_task_t; diff --git a/src/task.c b/src/task.c index 81b90a832e2dd..a5ebc1ce26005 100644 --- a/src/task.c +++ b/src/task.c @@ -940,6 +940,7 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion t->world_age = ct->world_age; t->reentrant_codegen = 0; t->reentrant_inference = 0; + t->inference_start_time = 0; #ifdef COPY_STACKS if (!t->copy_stack) { @@ -1527,6 +1528,7 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi) ct->world_age = 1; // OK to run Julia code on this task ct->reentrant_codegen = 0; ct->reentrant_inference = 0; + ct->inference_start_time = 0; ptls->root_task = ct; jl_atomic_store_relaxed(&ptls->current_task, ct); JL_GC_PROMISE_ROOTED(ct); From 1b85c1f8f2322e9b2d2b99befb6238f7acc691b9 Mon Sep 17 00:00:00 2001 From: apaz Date: Mon, 28 Nov 2022 14:33:45 -0600 Subject: [PATCH 21/57] Fix nth_methtable tparam of -1 when n==0 (#47666) Fixes #47625 (cherry picked from commit 3f9409c8b4e9c82d567f1dd3ac9980fad1872a3c) --- src/method.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/method.c b/src/method.c index ec49fdf32a193..6f240efaa0b7a 100644 --- a/src/method.c +++ b/src/method.c @@ -911,7 +911,7 @@ static jl_methtable_t *nth_methtable(jl_value_t *a JL_PROPAGATES_ROOT, int n) JL if (mt != NULL) return mt; } - if (jl_is_tuple_type(a)) { + else if (jl_is_tuple_type(a)) { if (jl_nparams(a) >= n) return nth_methtable(jl_tparam(a, n - 1), 0); } From 81f8582fb5f651a95f7973b79bd815fe9540520d Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Tue, 29 Nov 2022 18:48:15 -0500 Subject: [PATCH 22/57] Fix generator-invocation legality check for varargs generators (#47739) This code was introduced by me back in #31025 to speed up evaluation of generated functions that didn't make use of all of their arguments to make generation decisions. However, it neglected to take into account the possibility that the generator could be varargs. As a result, an unfortunate coincidence of an unused slot in the correct position could have allowed expansion of generators that were not supposed to be expandable. This can cause incorrect inference with all the usual consequences. However, fortunately this coincidence appears to be pretty rare. Fixes https://github.com/JuliaDebug/CassetteOverlay.jl/issues/12 (cherry picked from commit 328dd578958d9c2a22ddb11970324ecd04e94314) --- base/reflection.jl | 14 +++++++++++++- test/staged.jl | 15 +++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/base/reflection.jl b/base/reflection.jl index 1c7cc44845158..1adc69291934e 100644 --- a/base/reflection.jl +++ b/base/reflection.jl @@ -1164,13 +1164,25 @@ function may_invoke_generator(method::Method, @nospecialize(atype), sparams::Sim end end end - for i = 1:length(at.parameters) + non_va_args = method.isva ? method.nargs - 1 : method.nargs + for i = 1:non_va_args if !isdispatchelem(at.parameters[i]) if (ast_slotflag(code, 1 + i + nsparams) & SLOT_USED) != 0 return false end end end + if method.isva + # If the va argument is used, we need to ensure that all arguments that + # contribute to the va tuple are dispatchelemes + if (ast_slotflag(code, 1 + method.nargs + nsparams) & SLOT_USED) != 0 + for i = (non_va_args+1):length(at.parameters) + if !isdispatchelem(at.parameters[i]) + return false + end + end + end + end return true end diff --git a/test/staged.jl b/test/staged.jl index b99ef46a2bc1e..516baea93ec04 100644 --- a/test/staged.jl +++ b/test/staged.jl @@ -305,3 +305,18 @@ end end @test f33243() === 2 @test x33243 === 2 + +# https://github.com/JuliaDebug/CassetteOverlay.jl/issues/12 +# generated function with varargs and unfortunately placed unused slot +@generated function f_vararg_generated(args...) + :($args) +end +g_vararg_generated() = f_vararg_generated((;), (;), Base.inferencebarrier((;))) +let tup = g_vararg_generated() + @test !any(==(Any), tup) + # This is just to make sure that the test is actually testing what we want - + # the test only works if there's an unused that matches the position of the + # inferencebarrier argument above (N.B. the generator function itself + # shifts everything over by 1) + @test code_lowered(first(methods(f_vararg_generated)).generator.gen)[1].slotflags[5] == UInt8(0x00) +end From 9da50d2e86214ba8d4b162414d4bccd2f3b2ff78 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Wed, 30 Nov 2022 07:37:13 +0100 Subject: [PATCH 23/57] Bump libuv. (#47707) Adapts to the final version of the constrained/available memory APIs. (cherry picked from commit 41b73e2c71ca844a372af2b75325940cfcb681b6) --- base/sysinfo.jl | 9 ++--- deps/checksums/libuv | 64 +++++++++++++++++------------------ deps/libuv.version | 2 +- src/gc.c | 2 +- stdlib/LibUV_jll/Project.toml | 2 +- 5 files changed, 40 insertions(+), 39 deletions(-) diff --git a/base/sysinfo.jl b/base/sysinfo.jl index 85bf53efba1f2..be11d5fb1cc98 100644 --- a/base/sysinfo.jl +++ b/base/sysinfo.jl @@ -279,11 +279,12 @@ This amount may be constrained, e.g., by Linux control groups. For the unconstra amount, see `Sys.physical_memory()`. """ function total_memory() - memory = ccall(:uv_get_constrained_memory, UInt64, ()) - if memory == 0 - return total_physical_memory() + constrained = ccall(:uv_get_constrained_memory, UInt64, ()) + physical = total_physical_memory() + if 0 < constrained <= physical + return constrained else - return memory + return physical end end diff --git a/deps/checksums/libuv b/deps/checksums/libuv index 844b063287c6d..81bc2178963d3 100644 --- a/deps/checksums/libuv +++ b/deps/checksums/libuv @@ -1,34 +1,34 @@ -LibUV.v2.0.1+11.aarch64-apple-darwin.tar.gz/md5/60c0a26acbd9c6d35743c19ac917f9b9 -LibUV.v2.0.1+11.aarch64-apple-darwin.tar.gz/sha512/4f62658c10486040ffe04e8e694fbcdb2a07340d8f1d18b703598141f5b377c421e06b7896dc0be8472c6c9f748ff44be109db99304b0442f10eb878bf2af1df -LibUV.v2.0.1+11.aarch64-linux-gnu.tar.gz/md5/215a204f1fb13a8d1fc9b26106814bee -LibUV.v2.0.1+11.aarch64-linux-gnu.tar.gz/sha512/3f20dc865a1ebae98ac75581585c5057b6c27bbfe084580274089f3103b4ad5fceee7dd5822b6f1cee4dfdfe027a379ea5116e37ca331845108380d6c2ecf63f -LibUV.v2.0.1+11.aarch64-linux-musl.tar.gz/md5/b618837c1c2ff1e64578ae043c0a00c3 -LibUV.v2.0.1+11.aarch64-linux-musl.tar.gz/sha512/7a82709a183977237f76cc0048034522466843d583519cec95fc7dd39cab1891b397052c6deb69b8d6fab6d0f57c91b642431b579bfb6c790881509b8daaa24c -LibUV.v2.0.1+11.armv6l-linux-gnueabihf.tar.gz/md5/f09464b716b779b6cccc8e8103313acb -LibUV.v2.0.1+11.armv6l-linux-gnueabihf.tar.gz/sha512/7c39685bbb9beb39670c94a3dea0cfac8685c9ff1116026784e68610d9314c281690f87bba918dfcc60f39e3f5c54ce432ab7365f785510be4108fa2454905dc -LibUV.v2.0.1+11.armv6l-linux-musleabihf.tar.gz/md5/6a483f49e053a1d796c2280a165e5cdd -LibUV.v2.0.1+11.armv6l-linux-musleabihf.tar.gz/sha512/16d6ade651018b20e2b465ee9beab6d6442a8d3942249a90def2797ac2b2c0376173eb9411f26cdd3f82ae9798640f819e139dd3cd70ce7e4684f6154f68fbfa -LibUV.v2.0.1+11.armv7l-linux-gnueabihf.tar.gz/md5/d3c6110ba03be6136d0c0a3740b2bc21 -LibUV.v2.0.1+11.armv7l-linux-gnueabihf.tar.gz/sha512/a41c26cd52c82804bf14d783965ebf4893db0cae7319d9840777485a328237e9f7c54aa3c2dc9a0ee39f98db430b8616de6f60906fbd00771f9a50e989e68fde -LibUV.v2.0.1+11.armv7l-linux-musleabihf.tar.gz/md5/a302e22ac3bc6d0909cd1b2a90c712ac -LibUV.v2.0.1+11.armv7l-linux-musleabihf.tar.gz/sha512/dd0291b86e11dbf7a8cf5b22f862bb0a93dcfd0d5ae009fe0c53f569d012bc2ea4895976c699aabd79ce05f4ae6161ce56263859c1994ea696e50f918fc2f51b -LibUV.v2.0.1+11.i686-linux-gnu.tar.gz/md5/d3b8cfaee74da3f4ba58c6845345ebfe -LibUV.v2.0.1+11.i686-linux-gnu.tar.gz/sha512/9623b84f6411f9b7c5a67f5e346d6661f00103a8417e22018b513efa3b8904268c57c7de21cc2f66a55727060436159f70727beed49b7efc882befd4d399332d -LibUV.v2.0.1+11.i686-linux-musl.tar.gz/md5/0e04697b85d2798c19f56e437eb55e56 -LibUV.v2.0.1+11.i686-linux-musl.tar.gz/sha512/75373bb5a5e3dd8f3fa4a85664bcfa0c651a793d8b104264eafa9626520cfb936025d4b1540c8e6d16a73468b7a1068a5ab4fb3b37762404d1ef7225a85e1664 -LibUV.v2.0.1+11.i686-w64-mingw32.tar.gz/md5/617dfd4290517837ad4c709dc4301733 -LibUV.v2.0.1+11.i686-w64-mingw32.tar.gz/sha512/7069f8bbb876ab5e2a7f0d79f4a297cd7984e1a83eadb1f91f5de86afc951b38e5bf2641883a4b7f327eabbc2f25434453b855ff7d537d30cc5ae6c8a00341d4 -LibUV.v2.0.1+11.powerpc64le-linux-gnu.tar.gz/md5/70f16a63097a353fa45971d3e4313da4 -LibUV.v2.0.1+11.powerpc64le-linux-gnu.tar.gz/sha512/ecc9f39fef7e9917dbadf4a7fd7966d06fb240f73cc2df021d9b8fa1951655d078782f17948abbfb5a21f2b7fcd9c7390af0a05610a9b952d55d53b6826ec312 -LibUV.v2.0.1+11.x86_64-apple-darwin.tar.gz/md5/17fee1aaeb6947614705120a62a21fa4 -LibUV.v2.0.1+11.x86_64-apple-darwin.tar.gz/sha512/cf4c80e797e3d68f54916bae6163d948f0a300f201f2b8209310970751d68eef6c29da571721aa98794c9ae30f7dc655385a5091c716e0402d3241342a1d9544 -LibUV.v2.0.1+11.x86_64-linux-gnu.tar.gz/md5/7e2cfbd1d4cdf2afec2ab18f0f75e812 -LibUV.v2.0.1+11.x86_64-linux-gnu.tar.gz/sha512/8551dbaf242c859010481e12864d75e8df01c69a90b94293402881b50e32105add7f7fdae455144076a2169f37e5796eb528d8ef6fc02226fbbb9d0f1bc6f6d3 -LibUV.v2.0.1+11.x86_64-linux-musl.tar.gz/md5/3879f86977865ceac0ea36e3f563be73 -LibUV.v2.0.1+11.x86_64-linux-musl.tar.gz/sha512/0831c0606e9bed4f819cb8f2abba464c9e0034533abdb5bf6e6e92b9f37644103c39adc4498db5128395dc65da28c93d7cd01bfc474985fa5dd660b04ca14cc1 -LibUV.v2.0.1+11.x86_64-unknown-freebsd.tar.gz/md5/288d9ab3dd95028568880838462c1f35 -LibUV.v2.0.1+11.x86_64-unknown-freebsd.tar.gz/sha512/ac0366d8eb4d0908d5ea55105dc608418455bc601fc22058512e228225cbd1ad2c778f7838b9d2374a6f1661e386f4121bae0f4cecaa18a4ba70a3a743318e24 -LibUV.v2.0.1+11.x86_64-w64-mingw32.tar.gz/md5/2b390151d13474968444b0f07adc92c0 -LibUV.v2.0.1+11.x86_64-w64-mingw32.tar.gz/sha512/6c56a7ab3e28ebcc7e55917b5ba051b4725ca77752b5206f865b306e905d119170cd0bb4e117c7352a95aa13b814ec5e15547ec3904615b561775a17e6993741 +LibUV.v2.0.1+13.aarch64-apple-darwin.tar.gz/md5/1a58ce9dc88984c3b5f7df97af6cbf83 +LibUV.v2.0.1+13.aarch64-apple-darwin.tar.gz/sha512/2bfd482ac759ac88d885371854affa8e358a10fea6c7756e0d1b366bc82ecbea56bdf24ca634525fb2a6fc2b3a5c77b07a4c6dec2923d8bffe2bc962bd3e7f84 +LibUV.v2.0.1+13.aarch64-linux-gnu.tar.gz/md5/7f270dd1e3046c8db432e350dd5cf114 +LibUV.v2.0.1+13.aarch64-linux-gnu.tar.gz/sha512/c0debcf17b54ba9f1588d4b267d610751f739d8ff96936c9d5fb6d8742039f8736c63fa70037322705569e221d73fb83c03b6ba9fb4454442fffd3a9f1a1a2da +LibUV.v2.0.1+13.aarch64-linux-musl.tar.gz/md5/07f56c32d5a2c12e6c351cf9f705631c +LibUV.v2.0.1+13.aarch64-linux-musl.tar.gz/sha512/8037d7aa0cb06850f055fd19cebdcfcf3146dde0d12768a9669bf05dcab91fdf3708798203258cb3f452158bdec7faae41e6afbb0e60b21403e683db3e23a1c9 +LibUV.v2.0.1+13.armv6l-linux-gnueabihf.tar.gz/md5/5558a7f68c7c375f40bc64da59fef0ad +LibUV.v2.0.1+13.armv6l-linux-gnueabihf.tar.gz/sha512/92ed6601cb5aa9a3ea2478a1485849543c9e847c8e85542e72f372a2d37c4c8b90f5ecb1bee1e462db31e1e8dba460f584b3cca9c833989c2b9ee404e355654e +LibUV.v2.0.1+13.armv6l-linux-musleabihf.tar.gz/md5/de6bfb7f0c0468b79e8895f166fb6340 +LibUV.v2.0.1+13.armv6l-linux-musleabihf.tar.gz/sha512/7948d007171bf57b827b489f3627ac74df447f4d696e8226e54e95ef0c8eed5a5ddbf758fbad841bc367f78cd61e6a5899eb478003dca3a79cb494b38cab830b +LibUV.v2.0.1+13.armv7l-linux-gnueabihf.tar.gz/md5/5be35de1d881f80981647c369b9b4ec8 +LibUV.v2.0.1+13.armv7l-linux-gnueabihf.tar.gz/sha512/458e5058ea4e794e0dc790da4c98569676056bac336df69762e8ccfec8f2955dcc55e8d090daa1b191c0ffa41392a04530c9bc28aa27cf411c1df2f1ba14bb97 +LibUV.v2.0.1+13.armv7l-linux-musleabihf.tar.gz/md5/8d034490da1ec2ef3dd3c69336177654 +LibUV.v2.0.1+13.armv7l-linux-musleabihf.tar.gz/sha512/7f595a8ab8b664d229cf6144e9ed1b5936ba8aaa70b92611ddb85bbe9046bb1b94d8417355a5abf058fb00023d4d56be0b2ddfd5dba896cd7b64e84e32dbfc5a +LibUV.v2.0.1+13.i686-linux-gnu.tar.gz/md5/ccb9aba78456c99b8473e8ddd328f90e +LibUV.v2.0.1+13.i686-linux-gnu.tar.gz/sha512/d382d90137db308933257a75e51d90988d6d07663b3b2915478547127d32f73ae6cdb4575d5ee20758f8850c7e85908fe4710c053cb361826621f22bc5b6502d +LibUV.v2.0.1+13.i686-linux-musl.tar.gz/md5/5ade48f16aa26bb68dc046d285c73043 +LibUV.v2.0.1+13.i686-linux-musl.tar.gz/sha512/f5728a5dc567268e59aa2697deb793ae427e11dcb6796c577e3da3ac24225ece5d4a6c4f903d4a7b184d3c3a3c8c1586c34b97e4a75de0a4e23ace720020fa8c +LibUV.v2.0.1+13.i686-w64-mingw32.tar.gz/md5/399d6fbe54dcfb2f997f276cd38fd185 +LibUV.v2.0.1+13.i686-w64-mingw32.tar.gz/sha512/55707e02a4b5bdf9c94683dbaaea1cac58f7735d5ae22009c219ea61ddfab1fe19b9bc6e830fc32207efc588c27f92770d2441b972f351a1bb3fdbbf5671a58b +LibUV.v2.0.1+13.powerpc64le-linux-gnu.tar.gz/md5/26656d4eaae8739099c55054bad54f57 +LibUV.v2.0.1+13.powerpc64le-linux-gnu.tar.gz/sha512/f85f8cfd91e7b1b02b073931ef9a3bb05620641d18ada039744a92b8c40e5a3de8d7c5efa7189b88baf1eb11fbcf9e6d16031b86e40f99f1b7cfebb0f5c5adf1 +LibUV.v2.0.1+13.x86_64-apple-darwin.tar.gz/md5/c7da6b91394a20c43acdf6f680cb62e2 +LibUV.v2.0.1+13.x86_64-apple-darwin.tar.gz/sha512/238d22bd299ae3b0dfd24a5b38d6d0d07b751fb301487a2d1d2f5313ae3596f33492388ea9fbff549293787505fc527e174ebcd4068f1bda43b40bc19e016d89 +LibUV.v2.0.1+13.x86_64-linux-gnu.tar.gz/md5/8c8913068263257cce5042b725918e0e +LibUV.v2.0.1+13.x86_64-linux-gnu.tar.gz/sha512/a848381012d5a20a0c881f5835e479cfff811928ce508cc57041d69668782f2135c14c7e5388e7dbf693ae57aa1825d911f6f450b9e909cce45487b03a581a23 +LibUV.v2.0.1+13.x86_64-linux-musl.tar.gz/md5/16747c066b6d7fe56850c77f66ea7478 +LibUV.v2.0.1+13.x86_64-linux-musl.tar.gz/sha512/833a02f9191edf3b56f1e02f5671f22de6cb27ec3c9f770530ec95d8da7ba0b9c05bcdf6b094224ea8e43ba70918e1599f3237bd98900763daef80c327d3d2de +LibUV.v2.0.1+13.x86_64-unknown-freebsd.tar.gz/md5/71f7d9d9234a0623c4b2ee3a44089b62 +LibUV.v2.0.1+13.x86_64-unknown-freebsd.tar.gz/sha512/e73911c3ec35a2201d42c035ecc86e8bd860604b950cb1b7784ff49e27ef5ac9b1da09b59d359ff25b093b87593a8305105bc43711c12eb9654972e280c26d3c +LibUV.v2.0.1+13.x86_64-w64-mingw32.tar.gz/md5/471d20fa2eac6bfd5d7cdb1b7f58c602 +LibUV.v2.0.1+13.x86_64-w64-mingw32.tar.gz/sha512/3f5ad55268184227378ddcfed0146bf0386c8cf468bc53a348d21195d818db4db768be61fd23e1ee2ecbb52f073815884a04a923d815b9b5992825d144c0633a libuv-e6f0e4900e195c8352f821abe2b3cffc3089547b.tar.gz/md5/c4465d7bff6610761cf37a1e8e3da08c libuv-e6f0e4900e195c8352f821abe2b3cffc3089547b.tar.gz/sha512/3347668b2b377704f3188e8901b130e891d19ac944ab3b7c1f4939d7afa119afff7dc10feaa2a518ec4122968147e31eb8932c6dfc1142a58a4828488f343191 diff --git a/deps/libuv.version b/deps/libuv.version index e4b277e36b099..01bf4fecc6dc6 100644 --- a/deps/libuv.version +++ b/deps/libuv.version @@ -4,4 +4,4 @@ LIBUV_JLL_NAME := LibUV ## source build LIBUV_VER := 2 LIBUV_BRANCH=julia-uv2-1.44.2 -LIBUV_SHA1=e6f0e4900e195c8352f821abe2b3cffc3089547b +LIBUV_SHA1=2723e256e952be0b015b3c0086f717c3d365d97e diff --git a/src/gc.c b/src/gc.c index 15e3883df81ca..3f80e83d28c1b 100644 --- a/src/gc.c +++ b/src/gc.c @@ -3681,7 +3681,7 @@ void jl_gc_init(void) #ifdef _P64 total_mem = uv_get_total_memory(); uint64_t constrained_mem = uv_get_constrained_memory(); - if (constrained_mem != 0) + if (constrained_mem > 0 && constrained_mem < total_mem) total_mem = constrained_mem; #endif diff --git a/stdlib/LibUV_jll/Project.toml b/stdlib/LibUV_jll/Project.toml index 6f68176fc97e7..2954809921440 100644 --- a/stdlib/LibUV_jll/Project.toml +++ b/stdlib/LibUV_jll/Project.toml @@ -1,6 +1,6 @@ name = "LibUV_jll" uuid = "183b4373-6708-53ba-ad28-60e28bb38547" -version = "2.0.1+11" +version = "2.0.1+13" [deps] Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" From fcf02e8e6f0efa477b05e25055a62789730a6ec5 Mon Sep 17 00:00:00 2001 From: Max Horn Date: Wed, 30 Nov 2022 18:16:18 +0100 Subject: [PATCH 24/57] Provider cycleclock() for 32bit ARM targets (#47358) Based on https://github.com/google/benchmark/blob/main/src/cycleclock.h (cherry picked from commit 060a4920a03d062ee42c911ea262f53e3da45bbe) --- src/julia_internal.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/julia_internal.h b/src/julia_internal.h index fc30ed09277c4..8bcc2efb8d4a6 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -20,6 +20,9 @@ #else #define sleep(x) Sleep(1000*x) #endif +#if defined(_CPU_ARM_) +#include +#endif #ifdef __cplusplus extern "C" { @@ -216,6 +219,26 @@ static inline uint64_t cycleclock(void) JL_NOTSAFEPOINT int64_t virtual_timer_value; __asm__ volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value)); return virtual_timer_value; +#elif defined(_CPU_ARM_) + // V6 is the earliest arch that has a standard cyclecount +#if (__ARM_ARCH >= 6) + uint32_t pmccntr; + uint32_t pmuseren; + uint32_t pmcntenset; + // Read the user mode perf monitor counter access permissions. + asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren)); + if (pmuseren & 1) { // Allows reading perfmon counters for user mode code. + asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset)); + if (pmcntenset & 0x80000000ul) { // Is it counting? + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr)); + // The counter is set up to count every 64th cycle + return (int64_t)(pmccntr) * 64; // Should optimize to << 6 + } + } +#endif + struct timeval tv; + gettimeofday(&tv, NULL); + return (int64_t)(tv.tv_sec) * 1000000 + tv.tv_usec; #elif defined(_CPU_PPC64_) // This returns a time-base, which is not always precisely a cycle-count. // https://reviews.llvm.org/D78084 From 628c953f6b7425cbd1c24c12032c22d1f6f07eca Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Wed, 30 Nov 2022 14:19:17 -0500 Subject: [PATCH 25/57] strengthen setglobal to default to release-consume ordering (#47742) In looking at a TSAN report recently, I noticed that globals were getting stored as atomic-unordered (since c92ab5e79ea #44182), instead of atomic-release as intended (since 46135dfce9074e5bf94eb277de28a33cad9cc14f #45484). (cherry picked from commit f4534d16b47d11ce18902ff4cd8ac0936e5ce971) --- src/builtins.c | 2 +- src/codegen.cpp | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/builtins.c b/src/builtins.c index bf9f886d92ba8..90cc544b47986 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -1201,7 +1201,7 @@ JL_CALLABLE(jl_f_getglobal) JL_CALLABLE(jl_f_setglobal) { - enum jl_memory_order order = jl_memory_order_monotonic; + enum jl_memory_order order = jl_memory_order_release; JL_NARGS(setglobal!, 3, 4); if (nargs == 4) { JL_TYPECHK(setglobal!, symbol, args[3]); diff --git a/src/codegen.cpp b/src/codegen.cpp index f02815df37e73..c938651059ca7 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -2879,6 +2879,7 @@ static bool emit_f_opglobal(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, const jl_cgval_t &sym = argv[2]; const jl_cgval_t &val = argv[3]; enum jl_memory_order order = jl_memory_order_unspecified; + assert(f == jl_builtin_setglobal && modifyop == nullptr && "unimplemented"); if (nargs == 4) { const jl_cgval_t &arg4 = argv[4]; @@ -2888,7 +2889,7 @@ static bool emit_f_opglobal(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, return false; } else - order = jl_memory_order_monotonic; + order = jl_memory_order_release; if (order == jl_memory_order_invalid || order == jl_memory_order_notatomic) { emit_atomic_error(ctx, order == jl_memory_order_invalid ? "invalid atomic ordering" : "setglobal!: module binding cannot be written non-atomically"); @@ -4686,7 +4687,7 @@ static void emit_assignment(jl_codectx_t &ctx, jl_value_t *l, jl_value_t *r, ssi bp = global_binding_pointer(ctx, jl_globalref_mod(l), jl_globalref_name(l), &bnd, true); } if (bp != NULL) { - emit_globalset(ctx, bnd, bp, rval_info, AtomicOrdering::Unordered); + emit_globalset(ctx, bnd, bp, rval_info, AtomicOrdering::Release); // Global variable. Does not need debug info because the debugger knows about // its memory location. } From dfec1608d2777f1478224cedbacab622755cbf61 Mon Sep 17 00:00:00 2001 From: Simeon Schaub Date: Thu, 1 Dec 2022 21:42:20 +0100 Subject: [PATCH 26/57] fix unescaping in `global` expressions (#47719) This fixes some issues around macro hygiene in `global` expressions. Apparently we always treat l-values in global expressions as being escaped, but we still need to be careful to handle type annotations and destructuring correctly. (cherry picked from commit cc25a1369472756c63c4da81abbc106e2790b4f0) --- src/macroexpand.scm | 29 +++++++++++++++++++++-------- test/syntax.jl | 23 +++++++++++++++++++++++ 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/src/macroexpand.scm b/src/macroexpand.scm index 516dd9b29f354..2933ca4888c4e 100644 --- a/src/macroexpand.scm +++ b/src/macroexpand.scm @@ -183,6 +183,19 @@ (cadr e) e)) +(define (unescape-global-lhs e env m parent-scope inarg) + (cond ((not (pair? e)) e) + ((eq? (car e) 'escape) (cadr e)) + ((memq (car e) '(parameters tuple)) + (list* (car e) (map (lambda (e) + (unescape-global-lhs e env m parent-scope inarg)) + (cdr e)))) + ((and (memq (car e) '(|::| kw)) (length= e 3)) + (list (car e) (unescape-global-lhs (cadr e) env m parent-scope inarg) + (resolve-expansion-vars-with-new-env (caddr e) env m parent-scope inarg))) + (else + (resolve-expansion-vars-with-new-env e env m parent-scope inarg)))) + (define (typedef-expr-name e) (cond ((atom? e) e) ((or (eq? (car e) 'curly) (eq? (car e) '<:)) (typedef-expr-name (cadr e))) @@ -344,14 +357,14 @@ (m (cadr scope)) (parent-scope (cdr parent-scope))) (resolve-expansion-vars-with-new-env (cadr e) env m parent-scope inarg)))) - ((global) (let ((arg (cadr e))) - (cond ((symbol? arg) e) - ((assignment? arg) - `(global - (= ,(unescape (cadr arg)) - ,(resolve-expansion-vars-with-new-env (caddr arg) env m parent-scope inarg)))) - (else - `(global ,(resolve-expansion-vars-with-new-env arg env m parent-scope inarg)))))) + ((global) + `(global + ,@(map (lambda (arg) + (if (assignment? arg) + `(= ,(unescape-global-lhs (cadr arg) env m parent-scope inarg) + ,(resolve-expansion-vars-with-new-env (caddr arg) env m parent-scope inarg)) + (unescape-global-lhs arg env m parent-scope inarg))) + (cdr e)))) ((using import export meta line inbounds boundscheck loopinfo inline noinline) (map unescape e)) ((macrocall) e) ; invalid syntax anyways, so just act like it's quoted. ((symboliclabel) e) diff --git a/test/syntax.jl b/test/syntax.jl index cff8628290081..fe9f6c43332e5 100644 --- a/test/syntax.jl +++ b/test/syntax.jl @@ -3429,3 +3429,26 @@ end elseif false || (()->true)() 42 end)) == 42 + +macro _macroexpand(x, m=__module__) + :($__source__; macroexpand($m, Expr(:var"hygienic-scope", $(esc(Expr(:quote, x))), $m))) +end + +@testset "unescaping in :global expressions" begin + m = @__MODULE__ + @test @_macroexpand(global x::T) == :(global x::$(GlobalRef(m, :T))) + @test @_macroexpand(global (x, $(esc(:y)))) == :(global (x, y)) + @test @_macroexpand(global (x::S, $(esc(:y))::$(esc(:T)))) == + :(global (x::$(GlobalRef(m, :S)), y::T)) + @test @_macroexpand(global (; x, $(esc(:y)))) == :(global (; x, y)) + @test @_macroexpand(global (; x::S, $(esc(:y))::$(esc(:T)))) == + :(global (; x::$(GlobalRef(m, :S)), y::T)) + + @test @_macroexpand(global x::T = a) == :(global x::$(GlobalRef(m, :T)) = $(GlobalRef(m, :a))) + @test @_macroexpand(global (x, $(esc(:y))) = a) == :(global (x, y) = $(GlobalRef(m, :a))) + @test @_macroexpand(global (x::S, $(esc(:y))::$(esc(:T))) = a) == + :(global (x::$(GlobalRef(m, :S)), y::T) = $(GlobalRef(m, :a))) + @test @_macroexpand(global (; x, $(esc(:y))) = a) == :(global (; x, y) = $(GlobalRef(m, :a))) + @test @_macroexpand(global (; x::S, $(esc(:y))::$(esc(:T))) = a) == + :(global (; x::$(GlobalRef(m, :S)), y::T) = $(GlobalRef(m, :a))) +end From d8cbffd57284060cb420b2571233da5fc5073e59 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Sat, 3 Dec 2022 06:19:20 +0600 Subject: [PATCH 27/57] Refactor and document sorting dispatch (#47383) * create an internal `_sort!` function and use it (rename the existing `_sort!` to `__sort!`) * test for several of bugs that slipped through test suite * Give each sorting pass and DEFAULT_STABLE a docstring * add pretty printing for the new algorithms that are much more flexible than the old ones * fix unexpected allocations in Radix Sort fixes #47474 in this PR rather than separate to avoid dealing with the merge * support and test backwards compatibility with packages that depend in sorting internals * support 3-, 5-, and 6-argument sort! for backwards compatibility * overhall scratch space handling make _sort! return scratch space rather than sorted vector so that things like IEEEFloatOptimization can re-use the scratch space allocated on their first recursive call * test handling -0.0 in IEEEFloatOptimization * fix and test bug where countsort's correct overflow behavior triggers error due to unexpected promotion to UInt (cherry picked from commit cee0a0494c70208b6cd5a32ccdf75d954a429870) --- base/sort.jl | 1287 ++++++++++++++++++++++++++++++----------------- test/sorting.jl | 171 ++++++- 2 files changed, 968 insertions(+), 490 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index e995a64a9f76f..932da36b9e1d6 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -86,7 +86,7 @@ issorted(itr; issorted(itr, ord(lt,by,rev,order)) function partialsort!(v::AbstractVector, k::Union{Integer,OrdinalRange}, o::Ordering) - sort!(v, _PartialQuickSort(k), o) + _sort!(v, _PartialQuickSort(k), o, (;)) maybeview(v, k) end @@ -407,112 +407,315 @@ function insorted end insorted(x, v::AbstractVector; kw...) = !isempty(searchsorted(v, x; kw...)) insorted(x, r::AbstractRange) = in(x, r) -## sorting algorithms ## +## Alternative keyword management -abstract type Algorithm end +macro getkw(syms...) + getters = (getproperty(Sort, Symbol(:_, sym)) for sym in syms) + Expr(:block, (:($(esc(:((kw, $sym) = $getter(v, o, kw))))) for (sym, getter) in zip(syms, getters))...) +end -struct InsertionSortAlg <: Algorithm end -struct MergeSortAlg <: Algorithm end -struct AdaptiveSortAlg <: Algorithm end +for (sym, deps, exp, type) in [ + (:lo, (), :(firstindex(v)), Integer), + (:hi, (), :(lastindex(v)), Integer), + (:mn, (), :(throw(ArgumentError("mn is needed but has not been computed"))), :(eltype(v))), + (:mx, (), :(throw(ArgumentError("mx is needed but has not been computed"))), :(eltype(v))), + (:scratch, (), nothing, :(Union{Nothing, Vector})), # could have different eltype + (:allow_legacy_dispatch, (), true, Bool)] + usym = Symbol(:_, sym) + @eval function $usym(v, o, kw) + # using missing instead of nothing because scratch could === nothing. + res = get(kw, $(Expr(:quote, sym)), missing) + res !== missing && return kw, res::$type + @getkw $(deps...) + $sym = $exp + (;kw..., $sym), $sym::$type + end +end + +## Scratch space management """ - PartialQuickSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}) + make_scratch(scratch::Union{Nothing, Vector}, T::Type, len::Integer) -Indicate that a sorting function should use the partial quick sort algorithm. +Returns `(s, t)` where `t` is an `AbstractVector` of type `T` with length at least `len` +that is backed by the `Vector` `s`. If `scratch !== nothing`, then `s === scratch`. -Partial quick sort finds and sorts the elements that would end up in positions -`lo:hi` using [`QuickSort`](@ref). +This function will allocate a new vector if `scratch === nothing`, `resize!` `scratch` if it +is too short, and `reinterpret` `scratch` if its eltype is not `T`. +""" +function make_scratch(scratch::Nothing, T::Type, len::Integer) + s = Vector{T}(undef, len) + s, s +end +function make_scratch(scratch::Vector{T}, ::Type{T}, len::Integer) where T + len > length(scratch) && resize!(scratch, len) + scratch, scratch +end +function make_scratch(scratch::Vector, T::Type, len::Integer) + len_bytes = len * sizeof(T) + len_scratch = div(len_bytes, sizeof(eltype(scratch))) + len_scratch > length(scratch) && resize!(scratch, len_scratch) + scratch, reinterpret(T, scratch) +end + + +## sorting algorithm components ## -Characteristics: - * *stable*: preserves the ordering of elements which compare equal - (e.g. "a" and "A" in a sort of letters which ignores case). - * *not in-place* in memory. - * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref). """ -struct PartialQuickSort{L<:Union{Integer,Missing}, H<:Union{Integer,Missing}} <: Algorithm - lo::L - hi::H + _sort!(v::AbstractVector, a::Algorithm, o::Ordering, kw; t, offset) + +An internal function that sorts `v` using the algorithm `a` under the ordering `o`, +subject to specifications provided in `kw` (such as `lo` and `hi` in which case it only +sorts `view(v, lo:hi)`) + +Returns a scratch space if provided or constructed during the sort, or `nothing` if +no scratch space is present. + +!!! note + `_sort!` modifies but does not return `v`. + +A returned scratch space will be a `Vector{T}` where `T` is usually the eltype of `v`. There +are some exceptions, for example if `eltype(v) == Union{Missing, T}` then the scratch space +may be be a `Vector{T}` due to `MissingOptimization` changing the eltype of `v` to `T`. + +`t` is an appropriate scratch space for the algorithm at hand, to be accessed as +`t[i + offset]`. `t` is used for an algorithm to pass a scratch space back to itself in +internal or recursive calls. +""" +function _sort! end + +abstract type Algorithm end + + +""" + MissingOptimization(next) <: Algorithm + +Filter out missing values. + +Missing values are placed after other values according to `DirectOrdering`s. This pass puts +them there and passes on a view into the original vector that excludes the missing values. +This pass is triggered for both `sort([1, missing, 3])` and `sortperm([1, missing, 3])`. +""" +struct MissingOptimization{T <: Algorithm} <: Algorithm + next::T end -PartialQuickSort(k::Integer) = PartialQuickSort(missing, k) -PartialQuickSort(k::OrdinalRange) = PartialQuickSort(first(k), last(k)) -_PartialQuickSort(k::Integer) = PartialQuickSort(k, k) -_PartialQuickSort(k::OrdinalRange) = PartialQuickSort(k) + +struct WithoutMissingVector{T, U} <: AbstractVector{T} + data::U + function WithoutMissingVector(data; unsafe=false) + if !unsafe && any(ismissing, data) + throw(ArgumentError("data must not contain missing values")) + end + new{nonmissingtype(eltype(data)), typeof(data)}(data) + end +end +Base.@propagate_inbounds function Base.getindex(v::WithoutMissingVector, i::Integer) + out = v.data[i] + @assert !(out isa Missing) + out::eltype(v) +end +Base.@propagate_inbounds function Base.setindex!(v::WithoutMissingVector{T}, x::T, i) where T + v.data[i] = x + v +end +Base.size(v::WithoutMissingVector) = size(v.data) """ - InsertionSort + send_to_end!(f::Function, v::AbstractVector; [lo, hi]) -Indicate that a sorting function should use the insertion sort algorithm. +Send every element of `v` for which `f` returns `true` to the end of the vector and return +the index of the last element which for which `f` returns `false`. -Insertion sort traverses the collection one element at a time, inserting -each element into its correct, sorted position in the output vector. +`send_to_end!(f, v, lo, hi)` is equivalent to `send_to_end!(f, view(v, lo:hi))+lo-1` -Characteristics: - * *stable*: preserves the ordering of elements which - compare equal (e.g. "a" and "A" in a sort of letters - which ignores case). - * *in-place* in memory. - * *quadratic performance* in the number of elements to be sorted: - it is well-suited to small collections but should not be used for large ones. +Preserves the order of the elements that are not sent to the end. """ -const InsertionSort = InsertionSortAlg() +function send_to_end!(f::F, v::AbstractVector; lo=firstindex(v), hi=lastindex(v)) where F <: Function + i = lo + @inbounds while i <= hi && !f(v[i]) + i += 1 + end + j = i + 1 + @inbounds while j <= hi + if !f(v[j]) + v[i], v[j] = v[j], v[i] + i += 1 + end + j += 1 + end + i - 1 +end +""" + send_to_end!(f::Function, v::AbstractVector, o::DirectOrdering[, end_stable]; lo, hi) +Return `(a, b)` where `v[a:b]` are the elements that are not sent to the end. + +If `o isa ReverseOrdering` then the "end" of `v` is `v[lo]`. + +If `end_stable` is set, the elements that are sent to the end are stable instead of the +elements that are not """ - QuickSort +@inline send_to_end!(f::F, v::AbstractVector, ::ForwardOrdering, end_stable=false; lo, hi) where F <: Function = + end_stable ? (lo, hi-send_to_end!(!f, view(v, hi:-1:lo))) : (lo, send_to_end!(f, v; lo, hi)) +@inline send_to_end!(f::F, v::AbstractVector, ::ReverseOrdering, end_stable=false; lo, hi) where F <: Function = + end_stable ? (send_to_end!(!f, v; lo, hi)+1, hi) : (hi-send_to_end!(f, view(v, hi:-1:lo))+1, hi) + + +function _sort!(v::AbstractVector, a::MissingOptimization, o::Ordering, kw) + @getkw lo hi + if nonmissingtype(eltype(v)) != eltype(v) && o isa DirectOrdering + lo, hi = send_to_end!(ismissing, v, o; lo, hi) + _sort!(WithoutMissingVector(v, unsafe=true), a.next, o, (;kw..., lo, hi)) + elseif eltype(v) <: Integer && o isa Perm{DirectOrdering} && nonmissingtype(eltype(o.data)) != eltype(o.data) + lo, hi = send_to_end!(i -> ismissing(@inbounds o.data[i]), v, o) + _sort!(v, a.next, Perm(o.order, WithoutMissingVector(o.data, unsafe=true)), (;kw..., lo, hi)) + else + _sort!(v, a.next, o, kw) + end +end -Indicate that a sorting function should use the quick sort algorithm. -Quick sort picks a pivot element, partitions the array based on the pivot, -and then sorts the elements before and after the pivot recursively. +""" + IEEEFloatOptimization(next) <: Algorithm -Characteristics: - * *stable*: preserves the ordering of elements which compare equal - (e.g. "a" and "A" in a sort of letters which ignores case). - * *not in-place* in memory. - * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref). - * *good performance* for almost all large collections. - * *quadratic worst case runtime* in pathological cases - (vanishingly rare for non-malicious input) +Move NaN values to the end, partition by sign, and reinterpret the rest as unsigned integers. + +IEEE floating point numbers (`Float64`, `Float32`, and `Float16`) compare the same as +unsigned integers with the bits with a few exceptions. This pass + +This pass is triggered for both `sort([1.0, NaN, 3.0])` and `sortperm([1.0, NaN, 3.0])`. """ -const QuickSort = PartialQuickSort(missing, missing) -const QuickSortAlg = PartialQuickSort{Missing, Missing} # Exists for backward compatibility +struct IEEEFloatOptimization{T <: Algorithm} <: Algorithm + next::T +end + +UIntType(::Type{Float16}) = UInt16 +UIntType(::Type{Float32}) = UInt32 +UIntType(::Type{Float64}) = UInt64 +after_zero(::ForwardOrdering, x) = !signbit(x) +after_zero(::ReverseOrdering, x) = signbit(x) +is_concrete_IEEEFloat(T::Type) = T <: Base.IEEEFloat && isconcretetype(T) +function _sort!(v::AbstractVector, a::IEEEFloatOptimization, o::Ordering, kw) + @getkw lo hi + if is_concrete_IEEEFloat(eltype(v)) && o isa DirectOrdering + lo, hi = send_to_end!(isnan, v, o, true; lo, hi) + iv = reinterpret(UIntType(eltype(v)), v) + j = send_to_end!(x -> after_zero(o, x), v; lo, hi) + scratch = _sort!(iv, a.next, Reverse, (;kw..., lo, hi=j)) + if scratch === nothing # Union split + _sort!(iv, a.next, Forward, (;kw..., lo=j+1, hi, scratch)) + else + _sort!(iv, a.next, Forward, (;kw..., lo=j+1, hi, scratch)) + end + elseif eltype(v) <: Integer && o isa Perm && o.order isa DirectOrdering && is_concrete_IEEEFloat(eltype(o.data)) + lo, hi = send_to_end!(i -> isnan(@inbounds o.data[i]), v, o.order, true; lo, hi) + ip = reinterpret(UIntType(eltype(o.data)), o.data) + j = send_to_end!(i -> after_zero(o.order, @inbounds o.data[i]), v; lo, hi) + scratch = _sort!(v, a.next, Perm(Reverse, ip), (;kw..., lo, hi=j)) + if scratch === nothing # Union split + _sort!(v, a.next, Perm(Forward, ip), (;kw..., lo=j+1, hi, scratch)) + else + _sort!(v, a.next, Perm(Forward, ip), (;kw..., lo=j+1, hi, scratch)) + end + else + _sort!(v, a.next, o, kw) + end +end + """ - MergeSort + BoolOptimization(next) <: Algorithm -Indicate that a sorting function should use the merge sort algorithm. +Sort `AbstractVector{Bool}`s using a specialized version of counting sort. -Merge sort divides the collection into subcollections and -repeatedly merges them, sorting each subcollection at each step, -until the entire collection has been recombined in sorted form. +Accesses each element at most twice (one read and one write), and performs at most two +comparisons. +""" +struct BoolOptimization{T <: Algorithm} <: Algorithm + next::T +end +_sort!(v::AbstractVector, a::BoolOptimization, o::Ordering, kw) = _sort!(v, a.next, o, kw) +function _sort!(v::AbstractVector{Bool}, ::BoolOptimization, o::Ordering, kw) + first = lt(o, false, true) ? false : lt(o, true, false) ? true : return v + @getkw lo hi scratch + count = 0 + @inbounds for i in lo:hi + if v[i] == first + count += 1 + end + end + @inbounds v[lo:lo+count-1] .= first + @inbounds v[lo+count:hi] .= !first + scratch +end -Characteristics: - * *stable*: preserves the ordering of elements which compare - equal (e.g. "a" and "A" in a sort of letters which ignores - case). - * *not in-place* in memory. - * *divide-and-conquer* sort strategy. + +""" + IsUIntMappable(yes, no) <: Algorithm + +Determines if the elements of a vector can be mapped to unsigned integers while preserving +their order under the specified ordering. + +If they can be, dispatch to the `yes` algorithm and record the unsigned integer type that +the elements may be mapped to. Otherwise dispatch to the `no` algorithm. """ -const MergeSort = MergeSortAlg() +struct IsUIntMappable{T <: Algorithm, U <: Algorithm} <: Algorithm + yes::T + no::U +end +function _sort!(v::AbstractVector, a::IsUIntMappable, o::Ordering, kw) + if UIntMappable(eltype(v), o) !== nothing + _sort!(v, a.yes, o, kw) + else + _sort!(v, a.no, o, kw) + end +end + """ - AdaptiveSort + Small{N}(small=SMALL_ALGORITHM, big) <: Algorithm -Indicate that a sorting function should use the fastest available stable algorithm. +Sort inputs with `length(lo:hi) <= N` using the `small` algorithm. Otherwise use the `big` +algorithm. +""" +struct Small{N, T <: Algorithm, U <: Algorithm} <: Algorithm + small::T + big::U +end +Small{N}(small, big) where N = Small{N, typeof(small), typeof(big)}(small, big) +Small{N}(big) where N = Small{N}(SMALL_ALGORITHM, big) +function _sort!(v::AbstractVector, a::Small{N}, o::Ordering, kw) where N + @getkw lo hi + if (hi-lo) < N + _sort!(v, a.small, o, kw) + else + _sort!(v, a.big, o, kw) + end +end + + +struct InsertionSortAlg <: Algorithm end -Currently, AdaptiveSort uses - * [`InsertionSort`](@ref) for short vectors - * [`QuickSort`](@ref) for vectors that are not [`UIntMappable`](@ref) - * Radix sort for long vectors - * Counting sort for vectors of integers spanning a short range """ -const AdaptiveSort = AdaptiveSortAlg() + InsertionSort -const DEFAULT_UNSTABLE = AdaptiveSort -const DEFAULT_STABLE = AdaptiveSort -const SMALL_ALGORITHM = InsertionSort -const SMALL_THRESHOLD = 20 +Use the insertion sort algorithm. -function sort!(v::AbstractVector, lo::Integer, hi::Integer, ::InsertionSortAlg, o::Ordering) +Insertion sort traverses the collection one element at a time, inserting +each element into its correct, sorted position in the output vector. + +Characteristics: +* *stable*: preserves the ordering of elements which compare equal +(e.g. "a" and "A" in a sort of letters which ignores case). +* *in-place* in memory. +* *quadratic performance* in the number of elements to be sorted: +it is well-suited to small collections but should not be used for large ones. +""" +const InsertionSort = InsertionSortAlg() +const SMALL_ALGORITHM = InsertionSortAlg() + +function _sort!(v::AbstractVector, ::InsertionSortAlg, o::Ordering, kw) + @getkw lo hi scratch lo_plus_1 = (lo + 1)::Integer @inbounds for i = lo_plus_1:hi j = i @@ -527,9 +730,249 @@ function sort!(v::AbstractVector, lo::Integer, hi::Integer, ::InsertionSortAlg, end v[j] = x end - return v + scratch +end + + +""" + CheckSorted(next) <: Algorithm + +Check if the input is already sorted and for large inputs, also check if it is +reverse-sorted. The reverse-sorted check is unstable. +""" +struct CheckSorted{T <: Algorithm} <: Algorithm + next::T +end +function _sort!(v::AbstractVector, a::CheckSorted, o::Ordering, kw) + @getkw lo hi scratch + + # For most arrays, a presorted check is cheap (overhead < 5%) and for most large + # arrays it is essentially free (<1%). + _issorted(v, lo, hi, o) && return scratch + + # For most large arrays, a reverse-sorted check is essentially free (overhead < 1%) + if hi-lo >= 500 && _issorted(v, lo, hi, ReverseOrdering(o)) + # If reversing is valid, do so. This does violates stability. + reverse!(v, lo, hi) + return scratch + end + + _sort!(v, a.next, o, kw) +end + + +""" + ComputeExtrema(next) <: Algorithm + +Compute the extrema of the input under the provided order. + +If the minimum is no less than the maximum, then the input is already sorted. Otherwise, +dispatch to the `next` algorithm. +""" +struct ComputeExtrema{T <: Algorithm} <: Algorithm + next::T +end +function _sort!(v::AbstractVector, a::ComputeExtrema, o::Ordering, kw) + @getkw lo hi scratch + mn = mx = v[lo] + @inbounds for i in (lo+1):hi + vi = v[i] + lt(o, vi, mn) && (mn = vi) + lt(o, mx, vi) && (mx = vi) + end + mn, mx + + lt(o, mn, mx) || return scratch # all same + + _sort!(v, a.next, o, (;kw..., mn, mx)) +end + + +""" + ConsiderCountingSort(counting=CountingSort(), next) <: Algorithm + +If the input's range is small enough, use the `counting` algorithm. Otherwise, dispatch to +the `next` algorithm. + +For most types, the threshold is if the range is shorter than half the length, but for types +larger than Int64, bitshifts are expensive and RadixSort is not viable, so the threshold is +much more generous. +""" +struct ConsiderCountingSort{T <: Algorithm, U <: Algorithm} <: Algorithm + counting::T + next::U +end +ConsiderCountingSort(next) = ConsiderCountingSort(CountingSort(), next) +function _sort!(v::AbstractVector{<:Integer}, a::ConsiderCountingSort, o::DirectOrdering, kw) + @getkw lo hi mn mx + range = maybe_unsigned(o === Reverse ? mn-mx : mx-mn) + + if range < (sizeof(eltype(v)) > 8 ? 5(hi-lo)-100 : div(hi-lo, 2)) + _sort!(v, a.counting, o, kw) + else + _sort!(v, a.next, o, kw) + end +end +_sort!(v::AbstractVector, a::ConsiderCountingSort, o::Ordering, kw) = _sort!(v, a.next, o, kw) + + +""" + CountingSort <: Algorithm + +Use the counting sort algorithm. + +`CountingSort` is an algorithm for sorting integers that runs in Θ(length + range) time and +space. It counts the number of occurrences of each value in the input and then iterates +through those counts repopulating the input with the values in sorted order. +""" +struct CountingSort <: Algorithm end +maybe_reverse(o::ForwardOrdering, x) = x +maybe_reverse(o::ReverseOrdering, x) = reverse(x) +function _sort!(v::AbstractVector{<:Integer}, ::CountingSort, o::DirectOrdering, kw) + @getkw lo hi mn mx scratch + range = o === Reverse ? mn-mx : mx-mn + offs = 1 - (o === Reverse ? mx : mn) + + counts = fill(0, range+1) # TODO use scratch (but be aware of type stability) + @inbounds for i = lo:hi + counts[v[i] + offs] += 1 + end + + idx = lo + @inbounds for i = maybe_reverse(o, 1:range+1) + lastidx = idx + counts[i] - 1 + val = i-offs + for j = idx:lastidx + v[j] = val + end + idx = lastidx + 1 + end + + scratch +end + + +""" + ConsiderRadixSort(radix=RadixSort(), next) <: Algorithm + +If the number of bits in the input's range is small enough and the input supports efficient +bitshifts, use the `radix` algorithm. Otherwise, dispatch to the `next` algorithm. +""" +struct ConsiderRadixSort{T <: Algorithm, U <: Algorithm} <: Algorithm + radix::T + next::U +end +ConsiderRadixSort(next) = ConsiderRadixSort(RadixSort(), next) +function _sort!(v::AbstractVector, a::ConsiderRadixSort, o::DirectOrdering, kw) + @getkw lo hi mn mx + urange = uint_map(mx, o)-uint_map(mn, o) + bits = unsigned(8sizeof(urange) - leading_zeros(urange)) + if sizeof(eltype(v)) <= 8 && bits+70 < 22log(hi-lo) + _sort!(v, a.radix, o, kw) + else + _sort!(v, a.next, o, kw) + end +end + + +""" + RadixSort <: Algorithm + +Use the radix sort algorithm. + +`RadixSort` is a stable least significant bit first radix sort algorithm that runs in +`O(length * log(range))` time and linear space. + +It first sorts the entire vector by the last `chunk_size` bits, then by the second +to last `chunk_size` bits, and so on. Stability means that it will not reorder two elements +that compare equal. This is essential so that the order introduced by earlier, +less significant passes is preserved by later passes. + +Each pass divides the input into `2^chunk_size == mask+1` buckets. To do this, it + * counts the number of entries that fall into each bucket + * uses those counts to compute the indices to move elements of those buckets into + * moves elements into the computed indices in the swap array + * switches the swap and working array + +`chunk_size` is larger for larger inputs and determined by an empirical heuristic. +""" +struct RadixSort <: Algorithm end +function _sort!(v::AbstractVector, a::RadixSort, o::DirectOrdering, kw) + @getkw lo hi mn mx scratch + umn = uint_map(mn, o) + urange = uint_map(mx, o)-umn + bits = unsigned(8sizeof(urange) - leading_zeros(urange)) + + # At this point, we are committed to radix sort. + u = uint_map!(v, lo, hi, o) + + # we subtract umn to avoid radixing over unnecessary bits. For example, + # Int32[3, -1, 2] uint_maps to UInt32[0x80000003, 0x7fffffff, 0x80000002] + # which uses all 32 bits, but once we subtract umn = 0x7fffffff, we are left with + # UInt32[0x00000004, 0x00000000, 0x00000003] which uses only 3 bits, and + # Float32[2.012, 400.0, 12.345] uint_maps to UInt32[0x3fff3b63, 0x3c37ffff, 0x414570a4] + # which is reduced to UInt32[0x03c73b64, 0x00000000, 0x050d70a5] using only 26 bits. + # the overhead for this subtraction is small enough that it is worthwhile in many cases. + + # this is faster than u[lo:hi] .-= umn as of v1.9.0-DEV.100 + @inbounds for i in lo:hi + u[i] -= umn + end + + scratch, t = make_scratch(scratch, eltype(v), hi-lo+1) + tu = reinterpret(eltype(u), t) + if radix_sort!(u, lo, hi, bits, tu, 1-lo) + uint_unmap!(v, u, lo, hi, o, umn) + else + uint_unmap!(v, tu, lo, hi, o, umn, 1-lo) + end + scratch end + +""" + PartialQuickSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}, next::Algorithm) <: Algorithm + +Indicate that a sorting function should use the partial quick sort algorithm. + +Partial quick sort finds and sorts the elements that would end up in positions `lo:hi` using +[`QuickSort`](@ref). It is recursive and uses the `next` algorithm for small chunks + +Characteristics: + * *stable*: preserves the ordering of elements which compare equal + (e.g. "a" and "A" in a sort of letters which ignores case). + * *not in-place* in memory. + * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref). +""" +struct PartialQuickSort{L<:Union{Integer,Missing}, H<:Union{Integer,Missing}, T<:Algorithm} <: Algorithm + lo::L + hi::H + next::T +end +PartialQuickSort(k::Integer) = PartialQuickSort(missing, k, SMALL_ALGORITHM) +PartialQuickSort(k::OrdinalRange) = PartialQuickSort(first(k), last(k), SMALL_ALGORITHM) +_PartialQuickSort(k::Integer) = InitialOptimizations(PartialQuickSort(k:k)) +_PartialQuickSort(k::OrdinalRange) = InitialOptimizations(PartialQuickSort(k)) + +""" + QuickSort + +Indicate that a sorting function should use the quick sort algorithm. + +Quick sort picks a pivot element, partitions the array based on the pivot, +and then sorts the elements before and after the pivot recursively. + +Characteristics: + * *stable*: preserves the ordering of elements which compare equal + (e.g. "a" and "A" in a sort of letters which ignores case). + * *not in-place* in memory. + * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref). + * *good performance* for almost all large collections. + * *quadratic worst case runtime* in pathological cases + (vanishingly rare for non-malicious input) +""" +const QuickSort = PartialQuickSort(missing, missing, SMALL_ALGORITHM) + # select a pivot for QuickSort # # This method is redefined to rand(lo:hi) in Random.jl @@ -542,147 +985,127 @@ select_pivot(lo::Integer, hi::Integer) = typeof(hi-lo)(hash(lo) % (hi-lo+1)) + l # # returns (pivot, pivot_index) where pivot_index is the location the pivot # should end up, but does not set t[pivot_index] = pivot -function partition!(t::AbstractVector, lo::Integer, hi::Integer, o::Ordering, v::AbstractVector, rev::Bool) +function partition!(t::AbstractVector, lo::Integer, hi::Integer, offset::Integer, o::Ordering, v::AbstractVector, rev::Bool) pivot_index = select_pivot(lo, hi) - trues = 0 @inbounds begin pivot = v[pivot_index] while lo < pivot_index x = v[lo] fx = rev ? !lt(o, x, pivot) : lt(o, pivot, x) - t[(fx ? hi : lo) - trues] = x - trues += fx + t[(fx ? hi : lo) - offset] = x + offset += fx lo += 1 end while lo < hi x = v[lo+1] fx = rev ? lt(o, pivot, x) : !lt(o, x, pivot) - t[(fx ? hi : lo) - trues] = x - trues += fx + t[(fx ? hi : lo) - offset] = x + offset += fx lo += 1 end end - # pivot_index = lo-trues + # pivot_index = lo-offset # t[pivot_index] is whatever it was before # t[pivot_index] >* pivot, reverse stable - pivot, lo-trues + pivot, lo-offset end -function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::PartialQuickSort, - o::Ordering, t::AbstractVector=similar(v), swap=false, rev=false; - check_presorted=true) +function _sort!(v::AbstractVector, a::PartialQuickSort, o::Ordering, kw; + t=nothing, offset=nothing, swap=false, rev=false) + @getkw lo hi scratch - if check_presorted && !rev && !swap - # Even if we are only sorting a short region, we can only short-circuit if the whole - # vector is presorted. A weaker condition is possible, but unlikely to be useful. - if _issorted(v, lo, hi, o) - return v - elseif _issorted(v, lo, hi, Lt((x, y) -> !lt(o, x, y))) - # Reverse only if necessary. Using issorted(..., Reverse(o)) would violate stability. - return reverse!(v, lo, hi) - end + if t === nothing + scratch, t = make_scratch(scratch, eltype(v), hi-lo+1) + offset = 1-lo + kw = (;kw..., scratch) end while lo < hi && hi - lo > SMALL_THRESHOLD - pivot, j = swap ? partition!(v, lo, hi, o, t, rev) : partition!(t, lo, hi, o, v, rev) + pivot, j = swap ? partition!(v, lo+offset, hi+offset, offset, o, t, rev) : partition!(t, lo, hi, -offset, o, v, rev) + j -= !swap*offset @inbounds v[j] = pivot swap = !swap # For QuickSort, a.lo === a.hi === missing, so the first two branches get skipped if !ismissing(a.lo) && j <= a.lo # Skip sorting the lower part - swap && copyto!(v, lo, t, lo, j-lo) + swap && copyto!(v, lo, t, lo+offset, j-lo) rev && reverse!(v, lo, j-1) lo = j+1 rev = !rev elseif !ismissing(a.hi) && a.hi <= j # Skip sorting the upper part - swap && copyto!(v, j+1, t, j+1, hi-j) + swap && copyto!(v, j+1, t, j+1+offset, hi-j) rev || reverse!(v, j+1, hi) hi = j-1 elseif j-lo < hi-j # Sort the lower part recursively because it is smaller. Recursing on the # smaller part guarantees O(log(n)) stack space even on pathological inputs. - sort!(v, lo, j-1, a, o, t, swap, rev; check_presorted=false) + _sort!(v, a, o, (;kw..., lo, hi=j-1); t, offset, swap, rev) lo = j+1 rev = !rev else # Sort the higher part recursively - sort!(v, j+1, hi, a, o, t, swap, !rev; check_presorted=false) + _sort!(v, a, o, (;kw..., lo=j+1, hi); t, offset, swap, rev=!rev) hi = j-1 end end - hi < lo && return v - swap && copyto!(v, lo, t, lo, hi-lo+1) + hi < lo && return scratch + swap && copyto!(v, lo, t, lo+offset, hi-lo+1) rev && reverse!(v, lo, hi) - sort!(v, lo, hi, SMALL_ALGORITHM, o) + _sort!(v, a.next, o, (;kw..., lo, hi)) end -function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, a::MergeSortAlg, o::Ordering, - t0::Union{AbstractVector{T}, Nothing}=nothing) where T - @inbounds if lo < hi - hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o) - m = midpoint(lo, hi) - - t = t0 === nothing ? similar(v, m-lo+1) : t0 - length(t) < m-lo+1 && resize!(t, m-lo+1) - require_one_based_indexing(t) +""" + StableCheckSorted(next) <: Algorithm - sort!(v, lo, m, a, o, t) - sort!(v, m+1, hi, a, o, t) +Check if an input is sorted and/or reverse-sorted. - i, j = 1, lo - while j <= m - t[i] = v[j] - i += 1 - j += 1 - end - - i, k = 1, lo - while k < j <= hi - if lt(o, v[j], t[i]) - v[k] = v[j] - j += 1 - else - v[k] = t[i] - i += 1 - end - k += 1 - end - while k < j - v[k] = t[i] - k += 1 - i += 1 - end +The definition of reverse-sorted is that for every pair of adjacent elements, the latter is +less than the former. This is stricter than `issorted(v, Reverse(o))` to avoid swapping pairs +of elements that compare equal. +""" +struct StableCheckSorted{T<:Algorithm} <: Algorithm + next::T +end +function _sort!(v::AbstractVector, a::StableCheckSorted, o::Ordering, kw) + @getkw lo hi scratch + if _issorted(v, lo, hi, o) + return scratch + elseif _issorted(v, lo, hi, Lt((x, y) -> !lt(o, x, y))) + # Reverse only if necessary. Using issorted(..., Reverse(o)) would violate stability. + reverse!(v, lo, hi) + return scratch end - return v + _sort!(v, a.next, o, kw) end -# This is a stable least significant bit first radix sort. -# -# That is, it first sorts the entire vector by the last chunk_size bits, then by the second -# to last chunk_size bits, and so on. Stability means that it will not reorder two elements -# that compare equal. This is essential so that the order introduced by earlier, -# less significant passes is preserved by later passes. -# -# Each pass divides the input into 2^chunk_size == mask+1 buckets. To do this, it -# * counts the number of entries that fall into each bucket -# * uses those counts to compute the indices to move elements of those buckets into -# * moves elements into the computed indices in the swap array -# * switches the swap and working array -# -# In the case of an odd number of passes, the returned vector will === the input vector t, -# not v. This is one of the many reasons radix_sort! is not exported. + +# The return value indicates whether v is sorted (true) or t is sorted (false) +# This is one of the many reasons radix_sort! is not exported. function radix_sort!(v::AbstractVector{U}, lo::Integer, hi::Integer, bits::Unsigned, - t::AbstractVector{U}, chunk_size=radix_chunk_size_heuristic(lo, hi, bits)) where U <: Unsigned + t::AbstractVector{U}, offset::Integer, + chunk_size=radix_chunk_size_heuristic(lo, hi, bits)) where U <: Unsigned # bits is unsigned for performance reasons. - mask = UInt(1) << chunk_size - 1 - counts = Vector{Int}(undef, mask+2) - - @inbounds for shift in 0:chunk_size:bits-1 - + counts = Vector{Int}(undef, 1 << chunk_size + 1) # TODO use scratch for this + + shift = 0 + while true + @noinline radix_sort_pass!(t, lo, hi, offset, counts, v, shift, chunk_size) + # the latest data resides in t + shift += chunk_size + shift < bits || return false + @noinline radix_sort_pass!(v, lo+offset, hi+offset, -offset, counts, t, shift, chunk_size) + # the latest data resides in v + shift += chunk_size + shift < bits || return true + end +end +function radix_sort_pass!(t, lo, hi, offset, counts, v, shift, chunk_size) + mask = UInt(1) << chunk_size - 1 # mask is defined in pass so that the compiler + @inbounds begin # ↳ knows it's shape # counts[2:mask+2] will store the number of elements that fall into each bucket. # if chunk_size = 8, counts[2] is bucket 0x00 and counts[257] is bucket 0xff. counts .= 0 @@ -703,15 +1126,10 @@ function radix_sort!(v::AbstractVector{U}, lo::Integer, hi::Integer, bits::Unsig x = v[k] # lookup the element i = (x >> shift)&mask + 1 # compute its bucket's index for this pass j = counts[i] # lookup the target index - t[j] = x # put the element where it belongs + t[j + offset] = x # put the element where it belongs counts[i] = j + 1 # increment the target index for the next end # ↳ element in this bucket - - v, t = t, v # swap the now sorted destination vector t back into primary vector v - end - - v end function radix_chunk_size_heuristic(lo::Integer, hi::Integer, bits::Unsigned) # chunk_size is the number of bits to radix over at once. @@ -726,23 +1144,6 @@ function radix_chunk_size_heuristic(lo::Integer, hi::Integer, bits::Unsigned) UInt8(cld(bits, cld(bits, guess))) end -# For AbstractVector{Bool}, counting sort is always best. -# This is an implementation of counting sort specialized for Bools. -# Accepts unused scratch space to avoid method ambiguity. -function sort!(v::AbstractVector{Bool}, lo::Integer, hi::Integer, ::AdaptiveSortAlg, o::Ordering, - t::Union{AbstractVector{Bool}, Nothing}=nothing) - first = lt(o, false, true) ? false : lt(o, true, false) ? true : return v - count = 0 - @inbounds for i in lo:hi - if v[i] == first - count += 1 - end - end - @inbounds v[lo:lo+count-1] .= first - @inbounds v[lo+count:hi] .= !first - v -end - maybe_unsigned(x::Integer) = x # this is necessary to avoid calling unsigned on BigInt maybe_unsigned(x::BitSigned) = unsigned(x) function _extrema(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering) @@ -761,129 +1162,152 @@ function _issorted(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering) end true end -function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, ::AdaptiveSortAlg, o::Ordering, - t::Union{AbstractVector{T}, Nothing}=nothing) where T - # if the sorting task is not UIntMappable, then we can't radix sort or sort_int_range! - # so we skip straight to the fallback algorithm which is comparison based. - U = UIntMappable(eltype(v), o) - U === nothing && return sort!(v, lo, hi, QuickSort, o) - - # to avoid introducing excessive detection costs for the trivial sorting problem - # and to avoid overflow, we check for small inputs before any other runtime checks - hi <= lo && return v - lenm1 = maybe_unsigned(hi-lo) # adding 1 would risk overflow - # only count sort on a short range can compete with insertion sort when lenm1 < 40 - # and the optimization is not worth the detection cost, so we use insertion sort. - lenm1 < 40 && return sort!(v, lo, hi, SMALL_ALGORITHM, o) - # For most arrays, a presorted check is cheap (overhead < 5%) and for most large - # arrays it is essentially free (<1%). Insertion sort runs in a fast O(n) on presorted - # input and this guarantees presorted input will always be efficiently handled - _issorted(v, lo, hi, o) && return v - - # For large arrays, a reverse-sorted check is essentially free (overhead < 1%) - if lenm1 >= 500 && _issorted(v, lo, hi, ReverseOrdering(o)) - # If reversing is valid, do so. This does not violate stability - # because being UIntMappable implies a linear order. - reverse!(v, lo, hi) - return v - end - # UInt128 does not support fast bit shifting so we never - # dispatch to radix sort but we may still perform count sort - if sizeof(U) > 8 - if T <: Integer && o isa DirectOrdering - v_min, v_max = _extrema(v, lo, hi, Forward) - v_range = maybe_unsigned(v_max-v_min) - v_range == 0 && return v # all same - - # we know lenm1 ≥ 40, so this will never underflow. - # if lenm1 > 3.7e18 (59 exabytes), then this may incorrectly dispatch to fallback - if v_range < 5lenm1-100 # count sort will outperform comparison sort if v's range is small - return sort_int_range!(v, Int(v_range+1), v_min, o === Forward ? identity : reverse, lo, hi) - end - end - return sort!(v, lo, hi, QuickSort, o; check_presorted=false) - end +## default sorting policy ## - v_min, v_max = _extrema(v, lo, hi, o) - lt(o, v_min, v_max) || return v # all same - if T <: Integer && o isa DirectOrdering - R = o === Reverse - v_range = maybe_unsigned(R ? v_min-v_max : v_max-v_min) - if v_range < div(lenm1, 2) # count sort will be superior if v's range is very small - return sort_int_range!(v, Int(v_range+1), R ? v_max : v_min, R ? reverse : identity, lo, hi) - end - end +""" + InitialOptimizations(next) <: Algorithm - u_min, u_max = uint_map(v_min, o), uint_map(v_max, o) - u_range = maybe_unsigned(u_max-u_min) - if u_range < div(lenm1, 2) # count sort will be superior if u's range is very small - u = uint_map!(v, lo, hi, o) - sort_int_range!(u, Int(u_range+1), u_min, identity, lo, hi) - return uint_unmap!(v, u, lo, hi, o) - end +Attempt to apply a suite of low-cost optimizations to the input vector before sorting. - # if u's range is small, then once we subtract out v_min, we'll get a vector like - # UInt16[0x001a, 0x0015, 0x0006, 0x001b, 0x0008, 0x000c, 0x0001, 0x000e, 0x001c, 0x0009] - # where we only need to radix over the last few bits (5, in the example). - bits = unsigned(8sizeof(u_range) - leading_zeros(u_range)) - - # radix sort runs in O(bits * lenm1), quick sort runs in O(lenm1 * log(lenm1)). - # dividing both sides by lenm1 and introducing empirical constant factors yields - # the following heuristic for when QuickSort is faster than RadixSort - if 22log(lenm1) < bits + 70 - return if lenm1 > 80 - sort!(v, lo, hi, QuickSort, o; check_presorted=false) - else - sort!(v, lo, hi, SMALL_ALGORITHM, o) - end - end +`InitialOptimizations` is an implementation detail and subject to change or removal in +future versions of Julia. - # At this point, we are committed to radix sort. - u = uint_map!(v, lo, hi, o) +If `next` is stable, then `InitialOptimizations(next)` is also stable. - # we subtract u_min to avoid radixing over unnecessary bits. For example, - # Int32[3, -1, 2] uint_maps to UInt32[0x80000003, 0x7fffffff, 0x80000002] - # which uses all 32 bits, but once we subtract u_min = 0x7fffffff, we are left with - # UInt32[0x00000004, 0x00000000, 0x00000003] which uses only 3 bits, and - # Float32[2.012, 400.0, 12.345] uint_maps to UInt32[0x3fff3b63, 0x3c37ffff, 0x414570a4] - # which is reduced to UInt32[0x03c73b64, 0x00000000, 0x050d70a5] using only 26 bits. - # the overhead for this subtraction is small enough that it is worthwhile in many cases. +The specific optimizations attempted by `InitialOptimizations` are +[`MissingOptimization`](@ref), [`BoolOptimization`](@ref), dispatch to +[`InsertionSort`](@ref) for inputs with `length <= 10`, and [`IEEEFloatOptimization`](@ref). +""" +InitialOptimizations(next) = MissingOptimization( + BoolOptimization( + Small{10}( + IEEEFloatOptimization( + next)))) +""" + DEFAULT_STABLE - # this is faster than u[lo:hi] .-= u_min as of v1.9.0-DEV.100 - @inbounds for i in lo:hi - u[i] -= u_min - end +The default sorting algorithm. - len = lenm1 + 1 - if t !== nothing && checkbounds(Bool, t, lo:hi) # Fully preallocated and aligned scratch space - u2 = radix_sort!(u, lo, hi, bits, reinterpret(U, t)) - uint_unmap!(v, u2, lo, hi, o, u_min) - elseif t !== nothing && (applicable(resize!, t, len) || length(t) >= len) # Viable scratch space - length(t) >= len || resize!(t, len) - t1 = axes(t, 1) isa OneTo ? t : view(t, firstindex(t):lastindex(t)) - u2 = radix_sort!(view(u, lo:hi), 1, len, bits, reinterpret(U, t1)) - uint_unmap!(view(v, lo:hi), u2, 1, len, o, u_min) - else # No viable scratch space - u2 = radix_sort!(u, lo, hi, bits, similar(u)) - uint_unmap!(v, u2, lo, hi, o, u_min) - end -end +This algorithm is guaranteed to be stable (i.e. it will not reorder elements that compare +equal). It makes an effort to be fast for most inputs. -## generic sorting methods ## +The algorithms used by `DEFAULT_STABLE` are an implementation detail. See extended help +for the current dispatch system. -defalg(v::AbstractArray) = DEFAULT_STABLE +# Extended Help -function sort!(v::AbstractVector{T}, alg::Algorithm, - order::Ordering, t::Union{AbstractVector{T}, Nothing}=nothing) where T - sort!(v, firstindex(v), lastindex(v), alg, order, t) -end +`DEFAULT_STABLE` is composed of two parts: the [`InitialOptimizations`](@ref) and a hybrid +of Radix, Insertion, Counting, Quick sorts. + +We begin with MissingOptimization because it has no runtime cost when it is not +triggered and can enable other optimizations to be applied later. For example, +BoolOptimization cannot apply to an `AbstractVector{Union{Missing, Bool}}`, but after +[`MissingOptimization`](@ref) is applied, that input will be converted into am +`AbstractVector{Bool}`. + +We next apply [`BoolOptimization`](@ref) because it also has no runtime cost when it is not +triggered and when it is triggered, it is an incredibly efficient algorithm (sorting `Bool`s +is quite easy). + +Next, we dispatch to [`InsertionSort`](@ref) for inputs with `length <= 10`. This dispatch +occurs before the [`IEEEFloatOptimization`](@ref) pass because the +[`IEEEFloatOptimization`](@ref)s are not beneficial for very small inputs. + +To conclude the [`InitialOptimizations`](@ref), we apply [`IEEEFloatOptimization`](@ref). + +After these optimizations, we branch on whether radix sort and related algorithms can be +applied to the input vector and ordering. We conduct this branch by testing if +`UIntMappable(v, order) !== nothing`. That is, we see if we know of a reversible mapping +from `eltype(v)` to `UInt` that preserves the ordering `order`. We perform this check after +the initial optimizations because they can change the input vector's type and ordering to +make them `UIntMappable`. + +If the input is not [`UIntMappable`](@ref), then we perform a presorted check and dispatch +to [`QuickSort`](@ref). + +Otherwise, we dispatch to [`InsertionSort`](@ref) for inputs with `length <= 40` and then +perform a presorted check ([`CheckSorted`](@ref)). + +We check for short inputs before performing the presorted check to avoid the overhead of the +check for small inputs. Because the alternate dispatch is to [`InseritonSort`](@ref) which +has efficient `O(n)` runtime on presorted inputs, the check is not necessary for small +inputs. + +We check if the input is reverse-sorted for long vectors (more than 500 elements) because +the check is essentially free unless the input is almost entirely reverse sorted. + +Note that once the input is determined to be [`UIntMappable`](@ref), we know the order forms +a [total order](wikipedia.org/wiki/Total_order) over the inputs and so it is impossible to +perform an unstable sort because no two elements can compare equal unless they _are_ equal, +in which case switching them is undetectable. We utilize this fact to perform a more +aggressive reverse sorted check that will reverse the vector `[3, 2, 2, 1]`. + +After these potential fast-paths are tried and failed, we [`ComputeExtrema`](@ref) of the +input. This computation has a fairly fast `O(n)` runtime, but we still try to delay it until +it is necessary. + +Next, we [`ConsiderCountingSort`](@ref). If the range the input is small compared to its +length, we apply [`CountingSort`](@ref). + +Next, we [`ConsiderRadixSort`](@ref). This is similar to the dispatch to counting sort, +but we conside rthe number of _bits_ in the range, rather than the range itself. +Consequently, we apply [`RadixSort`](@ref) for any reasonably long inputs that reach this +stage. -function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, alg::Algorithm, - order::Ordering, t::Union{AbstractVector{T}, Nothing}=nothing) where T - sort!(v, lo, hi, alg, order) +Finally, if the input has length less than 80, we dispatch to [`InsertionSort`](@ref) and +otherwise we dispatch to [`QuickSort`](@ref). +""" +const DEFAULT_STABLE = InitialOptimizations( + IsUIntMappable( + Small{40}( + CheckSorted( + ComputeExtrema( + ConsiderCountingSort( + ConsiderRadixSort( + Small{80}( + QuickSort)))))), + StableCheckSorted( + QuickSort))) +""" + DEFAULT_UNSTABLE + +An efficient sorting algorithm. + +The algorithms used by `DEFAULT_UNSTABLE` are an implementation detail. They are currently +the same as those used by [`DEFAULT_STABLE`](@ref), but this is subject to change in future. +""" +const DEFAULT_UNSTABLE = DEFAULT_STABLE +const SMALL_THRESHOLD = 20 + +function Base.show(io::IO, alg::Algorithm) + print_tree(io, alg, 0) end +function print_tree(io::IO, alg::Algorithm, cols::Int) + print(io, " "^cols) + show_type(io, alg) + print(io, '(') + for (i, name) in enumerate(fieldnames(typeof(alg))) + arg = getproperty(alg, name) + i > 1 && print(io, ',') + if arg isa Algorithm + println(io) + print_tree(io, arg, cols+1) + else + i > 1 && print(io, ' ') + print(io, arg) + end + end + print(io, ')') +end +show_type(io::IO, alg::Algorithm) = Base.show_type_name(io, typeof(alg).name) +show_type(io::IO, alg::Small{N}) where N = print(io, "Base.Sort.Small{$N}") + +defalg(v::AbstractArray) = DEFAULT_STABLE +defalg(v::AbstractArray{<:Union{Number, Missing}}) = DEFAULT_UNSTABLE +defalg(v::AbstractArray{Missing}) = DEFAULT_UNSTABLE # for method disambiguation +defalg(v::AbstractArray{Union{}}) = DEFAULT_UNSTABLE # for method disambiguation """ sort!(v; alg::Algorithm=defalg(v), lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward) @@ -931,31 +1355,9 @@ function sort!(v::AbstractVector{T}; by=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward, - scratch::Union{AbstractVector{T}, Nothing}=nothing) where T - sort!(v, alg, ord(lt,by,rev,order), scratch) -end - -# sort! for vectors of few unique integers -function sort_int_range!(x::AbstractVector{<:Integer}, rangelen, minval, maybereverse, - lo=firstindex(x), hi=lastindex(x)) - offs = 1 - minval - - counts = fill(0, rangelen) - @inbounds for i = lo:hi - counts[x[i] + offs] += 1 - end - - idx = lo - @inbounds for i = maybereverse(1:rangelen) - lastidx = idx + counts[i] - 1 - val = i-offs - for j = idx:lastidx - x[j] = val - end - idx = lastidx + 1 - end - - return x + scratch::Union{Vector{T}, Nothing}=nothing) where T + _sort!(v, alg, ord(lt,by,rev,order), (;scratch)) + v end """ @@ -1081,7 +1483,7 @@ function partialsortperm!(ix::AbstractVector{<:Integer}, v::AbstractVector, end # do partial quicksort - sort!(ix, _PartialQuickSort(k), Perm(ord(lt, by, rev, order), v)) + _sort!(ix, _PartialQuickSort(k), Perm(ord(lt, by, rev, order), v), (;)) maybeview(ix, k) end @@ -1141,7 +1543,7 @@ function sortperm(A::AbstractArray; by=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward, - scratch::Union{AbstractVector{<:Integer}, Nothing}=nothing, + scratch::Union{Vector{<:Integer}, Nothing}=nothing, dims...) #to optionally specify dims argument ordr = ord(lt,by,rev,order) if ordr === Forward && isa(A,Vector) && eltype(A)<:Integer @@ -1205,7 +1607,7 @@ function sortperm!(ix::AbstractArray{T}, A::AbstractArray; rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward, initialized::Bool=false, - scratch::Union{AbstractVector{T}, Nothing}=nothing, + scratch::Union{Vector{T}, Nothing}=nothing, dims...) where T <: Integer #to optionally specify dims argument (typeof(A) <: AbstractVector) == (:dims in keys(dims)) && throw(ArgumentError("Dims argument incorrect for type $(typeof(A))")) axes(ix) == axes(A) || throw(ArgumentError("index array must have the same size/axes as the source array, $(axes(ix)) != $(axes(A))")) @@ -1278,7 +1680,7 @@ function sort(A::AbstractArray{T}; by=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward, - scratch::Union{AbstractVector{T}, Nothing}=similar(A, size(A, dims))) where T + scratch::Union{Vector{T}, Nothing}=nothing) where T dim = dims order = ord(lt,by,rev,order) n = length(axes(A, dim)) @@ -1295,14 +1697,27 @@ function sort(A::AbstractArray{T}; end end -@noinline function sort_chunks!(Av, n, alg, order, t) +@noinline function sort_chunks!(Av, n, alg, order, scratch) inds = LinearIndices(Av) - for s = first(inds):n:last(inds) - sort!(Av, s, s+n-1, alg, order, t) + sort_chunks!(Av, n, alg, order, scratch, first(inds), last(inds)) +end + +@noinline function sort_chunks!(Av, n, alg, order, scratch::Nothing, fst, lst) + for lo = fst:n:lst + s = _sort!(Av, alg, order, (; lo, hi=lo+n-1, scratch)) + s !== nothing && return sort_chunks!(Av, n, alg, order, s, lo+n, lst) end Av end +@noinline function sort_chunks!(Av, n, alg, order, scratch::AbstractVector, fst, lst) + for lo = fst:n:lst + _sort!(Av, alg, order, (; lo, hi=lo+n-1, scratch)) + end + Av +end + + """ sort!(A; dims::Integer, alg::Algorithm=defalg(A), lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward) @@ -1338,14 +1753,14 @@ function sort!(A::AbstractArray{T}; lt=isless, by=identity, rev::Union{Bool,Nothing}=nothing, - order::Ordering=Forward, - scratch::Union{AbstractVector{T}, Nothing}=similar(A, size(A, dims))) where T - _sort!(A, Val(dims), alg, ord(lt, by, rev, order), scratch) + order::Ordering=Forward, # TODO stop eagerly over-allocating. + scratch::Union{Vector{T}, Nothing}=similar(A, size(A, dims))) where T + __sort!(A, Val(dims), alg, ord(lt, by, rev, order), scratch) end -function _sort!(A::AbstractArray{T}, ::Val{K}, +function __sort!(A::AbstractArray{T}, ::Val{K}, alg::Algorithm, order::Ordering, - scratch::Union{AbstractVector{T}, Nothing}) where {K,T} + scratch::Union{Vector{T}, Nothing}) where {K,T} nd = ndims(A) 1 <= K <= nd || throw(ArgumentError("dimension out of range")) @@ -1353,7 +1768,7 @@ function _sort!(A::AbstractArray{T}, ::Val{K}, remdims = ntuple(i -> i == K ? 1 : axes(A, i), nd) for idx in CartesianIndices(remdims) Av = view(A, ntuple(i -> i == K ? Colon() : idx[i], nd)...) - sort!(Av, alg, order, scratch) + sort!(Av; alg, order, scratch) end A end @@ -1436,175 +1851,109 @@ function uint_map!(v::AbstractVector, lo::Integer, hi::Integer, order::Ordering) end function uint_unmap!(v::AbstractVector, u::AbstractVector{U}, lo::Integer, hi::Integer, - order::Ordering, offset::U=zero(U)) where U <: Unsigned + order::Ordering, offset::U=zero(U), + index_offset::Integer=0) where U <: Unsigned @inbounds for i in lo:hi - v[i] = uint_unmap(eltype(v), u[i]+offset, order) + v[i] = uint_unmap(eltype(v), u[i+index_offset]+offset, order) end v end -## fast clever sorting for floats ## - -module Float -using ..Sort -using ...Order -using Base: IEEEFloat - -import Core.Intrinsics: slt_int -import ..Sort: sort!, UIntMappable, uint_map, uint_unmap -import ...Order: lt, DirectOrdering - -# fpsort is not safe for vectors of mixed bitwidth such as Vector{Union{Float32, Float64}}. -# This type allows us to dispatch only when it is safe to do so. See #42739 for more info. -const FPSortable = Union{ - AbstractVector{Union{Float16, Missing}}, - AbstractVector{Union{Float32, Missing}}, - AbstractVector{Union{Float64, Missing}}, - AbstractVector{Float16}, - AbstractVector{Float32}, - AbstractVector{Float64}, - AbstractVector{Missing}} -struct Left <: Ordering end -struct Right <: Ordering end +### Unused constructs for backward compatibility ### -left(::DirectOrdering) = Left() -right(::DirectOrdering) = Right() +struct MergeSortAlg{T <: Algorithm} <: Algorithm + next::T +end -left(o::Perm) = Perm(left(o.order), o.data) -right(o::Perm) = Perm(right(o.order), o.data) +""" + MergeSort -lt(::Left, x::T, y::T) where {T<:IEEEFloat} = slt_int(y, x) -lt(::Right, x::T, y::T) where {T<:IEEEFloat} = slt_int(x, y) +Indicate that a sorting function should use the merge sort algorithm. -uint_map(x::Float16, ::Left) = ~reinterpret(UInt16, x) -uint_unmap(::Type{Float16}, u::UInt16, ::Left) = reinterpret(Float16, ~u) -uint_map(x::Float16, ::Right) = reinterpret(UInt16, x) -uint_unmap(::Type{Float16}, u::UInt16, ::Right) = reinterpret(Float16, u) -UIntMappable(::Type{Float16}, ::Union{Left, Right}) = UInt16 +Merge sort divides the collection into subcollections and +repeatedly merges them, sorting each subcollection at each step, +until the entire collection has been recombined in sorted form. -uint_map(x::Float32, ::Left) = ~reinterpret(UInt32, x) -uint_unmap(::Type{Float32}, u::UInt32, ::Left) = reinterpret(Float32, ~u) -uint_map(x::Float32, ::Right) = reinterpret(UInt32, x) -uint_unmap(::Type{Float32}, u::UInt32, ::Right) = reinterpret(Float32, u) -UIntMappable(::Type{Float32}, ::Union{Left, Right}) = UInt32 +Characteristics: + * *stable*: preserves the ordering of elements which compare + equal (e.g. "a" and "A" in a sort of letters which ignores + case). + * *not in-place* in memory. + * *divide-and-conquer* sort strategy. +""" +const MergeSort = MergeSortAlg(SMALL_ALGORITHM) -uint_map(x::Float64, ::Left) = ~reinterpret(UInt64, x) -uint_unmap(::Type{Float64}, u::UInt64, ::Left) = reinterpret(Float64, ~u) -uint_map(x::Float64, ::Right) = reinterpret(UInt64, x) -uint_unmap(::Type{Float64}, u::UInt64, ::Right) = reinterpret(Float64, u) -UIntMappable(::Type{Float64}, ::Union{Left, Right}) = UInt64 +function _sort!(v::AbstractVector, a::MergeSortAlg, o::Ordering, kw; t=nothing, offset=nothing) + @getkw lo hi scratch + @inbounds if lo < hi + hi-lo <= SMALL_THRESHOLD && return _sort!(v, a.next, o, kw) -isnan(o::DirectOrdering, x::IEEEFloat) = (x!=x) -isnan(o::DirectOrdering, x::Missing) = false -isnan(o::Perm, i::Integer) = isnan(o.order,o.data[i]) + m = midpoint(lo, hi) -ismissing(o::DirectOrdering, x::IEEEFloat) = false -ismissing(o::DirectOrdering, x::Missing) = true -ismissing(o::Perm, i::Integer) = ismissing(o.order,o.data[i]) + if t === nothing + scratch, t = make_scratch(scratch, eltype(v), m-lo+1) + end -allowsmissing(::AbstractVector{T}, ::DirectOrdering) where {T} = T >: Missing -allowsmissing(::AbstractVector{<:Integer}, - ::Perm{<:DirectOrdering,<:AbstractVector{T}}) where {T} = - T >: Missing + _sort!(v, a, o, (;kw..., hi=m, scratch); t, offset) + _sort!(v, a, o, (;kw..., lo=m+1, scratch); t, offset) -function specials2left!(testf::Function, v::AbstractVector, o::Ordering, - lo::Integer=firstindex(v), hi::Integer=lastindex(v)) - i = lo - @inbounds while i <= hi && testf(o,v[i]) - i += 1 - end - j = i + 1 - @inbounds while j <= hi - if testf(o,v[j]) - v[i], v[j] = v[j], v[i] + i, j = 1, lo + while j <= m + t[i] = v[j] i += 1 + j += 1 end - j += 1 - end - return i, hi -end -function specials2right!(testf::Function, v::AbstractVector, o::Ordering, - lo::Integer=firstindex(v), hi::Integer=lastindex(v)) - i = hi - @inbounds while lo <= i && testf(o,v[i]) - i -= 1 - end - j = i - 1 - @inbounds while lo <= j - if testf(o,v[j]) - v[i], v[j] = v[j], v[i] - i -= 1 + + i, k = 1, lo + while k < j <= hi + if lt(o, v[j], t[i]) + v[k] = v[j] + j += 1 + else + v[k] = t[i] + i += 1 + end + k += 1 + end + while k < j + v[k] = t[i] + k += 1 + i += 1 end - j -= 1 end - return lo, i + + scratch end -function specials2left!(v::AbstractVector, a::Algorithm, o::Ordering) - lo, hi = firstindex(v), lastindex(v) - if allowsmissing(v, o) - i, _ = specials2left!((v, o) -> ismissing(v, o) || isnan(v, o), v, o, lo, hi) - sort!(v, lo, i-1, a, o) - return i, hi - else - return specials2left!(isnan, v, o, lo, hi) - end +# Support 3-, 5-, and 6-argument versions of sort! for calling into the internals in the old way +sort!(v::AbstractVector, a::Algorithm, o::Ordering) = sort!(v, firstindex(v), lastindex(v), a, o) +function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering) + _sort!(v, a, o, (; lo, hi, allow_legacy_dispatch=false)) + v end -function specials2right!(v::AbstractVector, a::Algorithm, o::Ordering) - lo, hi = firstindex(v), lastindex(v) - if allowsmissing(v, o) - _, i = specials2right!((v, o) -> ismissing(v, o) || isnan(v, o), v, o, lo, hi) - sort!(v, i+1, hi, a, o) - return lo, i - else - return specials2right!(isnan, v, o, lo, hi) - end +sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering, _) = sort!(v, lo, hi, a, o) +function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering, scratch::Vector) + _sort!(v, a, o, (; lo, hi, scratch, allow_legacy_dispatch=false)) + v end -specials2end!(v::AbstractVector, a::Algorithm, o::ForwardOrdering) = - specials2right!(v, a, o) -specials2end!(v::AbstractVector, a::Algorithm, o::ReverseOrdering) = - specials2left!(v, a, o) -specials2end!(v::AbstractVector{<:Integer}, a::Algorithm, o::Perm{<:ForwardOrdering}) = - specials2right!(v, a, o) -specials2end!(v::AbstractVector{<:Integer}, a::Algorithm, o::Perm{<:ReverseOrdering}) = - specials2left!(v, a, o) - -issignleft(o::ForwardOrdering, x::IEEEFloat) = lt(o, x, zero(x)) -issignleft(o::ReverseOrdering, x::IEEEFloat) = lt(o, x, -zero(x)) -issignleft(o::Perm, i::Integer) = issignleft(o.order, o.data[i]) - -function fpsort!(v::AbstractVector{T}, a::Algorithm, o::Ordering, - t::Union{AbstractVector{T}, Nothing}=nothing) where T - # fpsort!'s optimizations speed up comparisons, of which there are O(nlogn). - # The overhead is O(n). For n < 10, it's not worth it. - length(v) < 10 && return sort!(v, firstindex(v), lastindex(v), SMALL_ALGORITHM, o, t) - - i, j = lo, hi = specials2end!(v,a,o) - @inbounds while true - while i <= j && issignleft(o,v[i]); i += 1; end - while i <= j && !issignleft(o,v[j]); j -= 1; end - i <= j || break - v[i], v[j] = v[j], v[i] - i += 1; j -= 1 +# Support dispatch on custom algorithms in the old way +# sort!(::AbstractVector, ::Integer, ::Integer, ::MyCustomAlgorithm, ::Ordering) = ... +function _sort!(v::AbstractVector, a::Algorithm, o::Ordering, kw) + @getkw lo hi scratch allow_legacy_dispatch + if allow_legacy_dispatch + sort!(v, lo, hi, a, o) + scratch + else + # This error prevents infinite recursion for unknown algorithms + throw(ArgumentError("Base.Sort._sort!(::$(typeof(v)), ::$(typeof(a)), ::$(typeof(o))) is not defined")) end - sort!(v, lo, j, a, left(o), t) - sort!(v, i, hi, a, right(o), t) - return v -end - - -function sort!(v::FPSortable, a::Algorithm, o::DirectOrdering, - t::Union{FPSortable, Nothing}=nothing) - fpsort!(v, a, o, t) -end -function sort!(v::AbstractVector{T}, a::Algorithm, o::Perm{<:DirectOrdering,<:FPSortable}, - t::Union{AbstractVector{T}, Nothing}=nothing) where T <: Union{Signed, Unsigned} - fpsort!(v, a, o, t) end -end # module Sort.Float +# Keep old internal types so that people can keep dispatching with +# sort!(::AbstractVector, ::Integer, ::Integer, ::Base.QuickSortAlg, ::Ordering) = ... +const QuickSortAlg = typeof(QuickSort) end # module Sort diff --git a/test/sorting.jl b/test/sorting.jl index 4a0299b2217c2..37bad7d23c94b 100644 --- a/test/sorting.jl +++ b/test/sorting.jl @@ -79,8 +79,9 @@ end end @testset "stability" begin - for Alg in [InsertionSort, MergeSort, QuickSort, Base.Sort.AdaptiveSort, Base.DEFAULT_STABLE, - PartialQuickSort(missing, 1729), PartialQuickSort(1729, missing)] + for Alg in [InsertionSort, MergeSort, QuickSort, Base.DEFAULT_STABLE, + PartialQuickSort(missing, 1729, Base.Sort.SMALL_ALGORITHM), + PartialQuickSort(1729, missing, Base.Sort.SMALL_ALGORITHM)] @test issorted(sort(1:2000, alg=Alg, by=x->0)) @test issorted(sort(1:2000, alg=Alg, by=x->x÷100)) end @@ -534,11 +535,11 @@ end @test issorted(a) a = view([9:-1:0;], :)::SubArray - Base.Sort.sort_int_range!(a, 10, 0, identity) # test it supports non-Vector + Base.Sort._sort!(a, Base.Sort.CountingSort(), Base.Forward, (; mn=0, mx=9)) # test it supports non-Vector @test issorted(a) a = OffsetArray([9:-1:0;], -5) - Base.Sort.sort_int_range!(a, 10, 0, identity) + Base.Sort._sort!(a, Base.Sort.CountingSort(), Base.Forward, (; mn=0, mx=9)) @test issorted(a) end @@ -632,9 +633,9 @@ end @testset "uint mappings" begin #Construct value lists - floats = [T[-π, -1.0, -1/π, 1/π, 1.0, π, -0.0, 0.0, Inf, -Inf, NaN, -NaN, - prevfloat(T(0)), nextfloat(T(0)), prevfloat(T(Inf)), nextfloat(T(-Inf))] - for T in [Float16, Float32, Float64]] + floats = [reinterpret(U, vcat(T[-π, -1.0, -1/π, 1/π, 1.0, π, -0.0, 0.0, Inf, -Inf, NaN, -NaN, + prevfloat(T(0)), nextfloat(T(0)), prevfloat(T(Inf)), nextfloat(T(-Inf))], randnans(4))) + for (U, T) in [(UInt16, Float16), (UInt32, Float32), (UInt64, Float64)]] ints = [T[17, -T(17), 0, -one(T), 1, typemax(T), typemin(T), typemax(T)-1, typemin(T)+1] for T in Base.BitInteger_types] @@ -650,21 +651,18 @@ end UIntN(::Val{8}) = UInt64 UIntN(::Val{16}) = UInt128 map(vals) do x + x isa Base.ReinterpretArray && return T = eltype(x) U = UIntN(Val(sizeof(T))) append!(x, rand(T, 4)) append!(x, reinterpret.(T, rand(U, 4))) - if T <: AbstractFloat - mask = reinterpret(U, T(NaN)) - append!(x, reinterpret.(T, mask .| rand(U, 4))) - end end for x in vals T = eltype(x) U = UIntN(Val(sizeof(T))) - for order in [Forward, Reverse, Base.Sort.Float.Left(), Base.Sort.Float.Right(), By(Forward, identity)] - if order isa Base.Order.By || ((T <: AbstractFloat) == (order isa DirectOrdering)) + for order in [Forward, Reverse, By(Forward, identity)] + if order isa Base.Order.By @test Base.Sort.UIntMappable(T, order) === nothing continue end @@ -681,10 +679,6 @@ end for a in x for b in x - if order === Base.Sort.Float.Left() || order === Base.Sort.Float.Right() - # Left and Right orderings guarantee homogeneous sign and no NaNs - (isnan(a) || isnan(b) || signbit(a) != signbit(b)) && continue - end @test Base.Order.lt(order, a, b) === Base.Order.lt(Forward, Base.Sort.uint_map(a, order), Base.Sort.uint_map(b, order)) end end @@ -705,7 +699,7 @@ end # Nevertheless, it still works... for alg in [InsertionSort, MergeSort, QuickSort, - Base.Sort.AdaptiveSort, Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] + Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] @test sort(v, alg=alg, lt = <=) == s end @test partialsort(v, 172, lt = <=) == s[172] @@ -716,7 +710,7 @@ end # this invalid lt order. perm = reverse(sortperm(v, rev=true)) for alg in [InsertionSort, MergeSort, QuickSort, - Base.Sort.AdaptiveSort, Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] + Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] @test sort(1:n, alg=alg, lt = (i,j) -> v[i]<=v[j]) == perm end @test partialsort(1:n, 172, lt = (i,j) -> v[i]<=v[j]) == perm[172] @@ -724,7 +718,7 @@ end # lt can be very poorly behaved and sort will still permute its input in some way. for alg in [InsertionSort, MergeSort, QuickSort, - Base.Sort.AdaptiveSort, Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] + Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] @test sort!(sort(v, alg=alg, lt = (x,y) -> rand([false, true]))) == s end @test partialsort(v, 172, lt = (x,y) -> rand([false, true])) ∈ 1:5 @@ -739,7 +733,6 @@ end @test issorted(k[idx], rev=true) end -# This testset is at the end of the file because it is slow @testset "sort(x; scratch)" begin for n in [1,10,100,1000] v = rand(n) @@ -770,6 +763,142 @@ end end end +@testset "Unions with missing" begin + @test issorted(sort(shuffle!(vcat(fill(missing, 10), rand(Int, 100))))) +end + +@testset "Specific algorithms" begin + let + requires_uint_mappable = Union{Base.Sort.RadixSort, Base.Sort.ConsiderRadixSort, + Base.Sort.CountingSort, Base.Sort.ConsiderCountingSort, + typeof(Base.Sort.DEFAULT_STABLE.next.next.big.next.yes), + typeof(Base.Sort.DEFAULT_STABLE.next.next.big.next.yes.big), + typeof(Base.Sort.DEFAULT_STABLE.next.next.big.next.yes.big.next)} + + function test_alg(kw, alg, float=true) + for order in [Base.Forward, Base.Reverse, Base.By(x -> x^2)] + order isa Base.By && alg isa requires_uint_mappable && continue + for n in [1,7,179,1312] + + n == 1 && alg isa Base.Sort.RadixSort && continue + + x = rand(1:n+1, n) + y = sort(x; order) + @test Base.Sort._sort!(x, alg, order, (;kw(y)...)) !== x + @test all(y .=== x) + + alg isa requires_uint_mappable && continue + + x = randn(n) + y = sort(x; order) + @test Base.Sort._sort!(x, alg, order, (;kw(y)...)) !== x + @test all(y .=== x) + end + end + end + test_alg(alg) = test_alg(x -> (), alg) + + function test_alg_rec(alg, extrema=false) + if extrema + test_alg(alg) do y + (;mn=first(y),mx=last(y)) + end + else + test_alg(alg) + end + extrema |= alg isa Base.Sort.ComputeExtrema + for name in fieldnames(typeof(alg)) + a = getfield(alg, name) + a isa Base.Sort.Algorithm && test_alg_rec(a, extrema) + end + end + + test_alg_rec(Base.DEFAULT_STABLE) + end +end + +@testset "show(::Algorithm)" begin + @test eval(Meta.parse(string(Base.DEFAULT_STABLE))) === Base.DEFAULT_STABLE + lines = split(string(Base.DEFAULT_STABLE), '\n') + @test 10 < maximum(length, lines) < 100 + @test 1 < length(lines) < 30 +end + +@testset "Extensibility" begin + # Defining new algorithms & backwards compatibility with packages that use sorting internals + + struct MyFirstAlg <: Base.Sort.Algorithm end + + @test_throws ArgumentError sort([1,2,3], alg=MyFirstAlg()) # not a stack overflow error + + v = shuffle(vcat(fill(missing, 10), rand(Int, 100))) + + # The pre 1.9 dispatch method + function Base.sort!(v::AbstractVector{Int}, lo::Integer, hi::Integer, ::MyFirstAlg, o::Base.Order.Ordering) + v[lo:hi] .= 7 + end + @test sort([1,2,3], alg=MyFirstAlg()) == [7,7,7] + @test all(sort(v, alg=Base.Sort.InitialOptimizations(MyFirstAlg())) .=== vcat(fill(7, 100), fill(missing, 10))) + + # Using the old hook with old entry-point + @test sort!([3,1,2], MyFirstAlg(), Base.Forward) == [7,7,7] + @test sort!([3,1,2], 1, 3, MyFirstAlg(), Base.Forward) == [7,7,7] + + # Use the pre 1.9 entry-point into the internals + function Base.sort!(v::AbstractVector{Int}, lo::Integer, hi::Integer, ::MyFirstAlg, o::Base.Order.Ordering) + sort!(v, lo, hi, Base.DEFAULT_STABLE, o) + end + @test sort([3,1,2], alg=MyFirstAlg()) == [1,2,3] + @test issorted(sort(v, alg=Base.Sort.InitialOptimizations(MyFirstAlg()))) + + # Another pre 1.9 entry-point into the internals + @test issorted(sort!(rand(100), InsertionSort, Base.Order.Forward)) + + struct MySecondAlg <: Base.Sort.Algorithm end + # A new dispatch method + function Base.Sort._sort!(v::AbstractVector, ::MySecondAlg, o::Base.Order.Ordering, kw) + Base.Sort.@getkw lo hi + v[lo:hi] .= 9 + end + @test sort([1,2,3], alg=MySecondAlg()) == [9,9,9] + @test all(sort(v, alg=Base.Sort.InitialOptimizations(MySecondAlg())) .=== vcat(fill(9, 100), fill(missing, 10))) +end + +@testset "sort!(v, lo, hi, alg, order)" begin + v = Vector{Float64}(undef, 4000) + for alg in [MergeSort, QuickSort, InsertionSort, Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] + rand!(v) + sort!(v, 1, 2000, alg, Base.Forward) + @test issorted(v[1:2000]) + @test !issorted(v) + + sort!(v, 2001, 4000, alg, Base.Forward) + @test issorted(v[1:2000]) + @test issorted(v[2001:4000]) + @test !issorted(v) + + sort!(v, 1001, 3000, alg, Base.Forward) + @test issorted(v[1:1000]) + @test issorted(v[1001:3000]) + @test issorted(v[3001:4000]) + @test !issorted(v[1:2000]) + @test !issorted(v[2001:4000]) + @test !issorted(v) + end +end + +@testset "IEEEFloatOptimization with -0.0" begin + x = vcat(round.(100 .* randn(1000)) ./ 100) # Also test lots of duplicates + x[rand(1:1000, 5)] .= 0.0 + x[rand(1:1000, 5)] .= -0.0 # To be sure that -0.0 is present + @test issorted(sort!(x)) +end + +@testset "Count sort near the edge of its range" begin + @test issorted(sort(rand(typemin(Int):typemin(Int)+100, 1000))) + @test issorted(sort(rand(typemax(Int)-100:typemax(Int), 1000))) +end + # This testset is at the end of the file because it is slow. @testset "searchsorted" begin numTypes = [ Int8, Int16, Int32, Int64, Int128, From 930314e68744fe5eb146b0da0f2f51a962d543c1 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Sat, 3 Dec 2022 18:26:47 -0500 Subject: [PATCH 28/57] Comment out test in subtype that causes hang due to StackOverflow(#47792) (cherry picked from commit 327b7acb8da726fcafec37a388fa58132a3032ce) --- test/subtype.jl | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/test/subtype.jl b/test/subtype.jl index 23aabf38e4fa1..70f3dd864cdbe 100644 --- a/test/subtype.jl +++ b/test/subtype.jl @@ -2201,11 +2201,12 @@ T46784{B<:Val, M<:AbstractMatrix} = Tuple{<:Union{B, <:Val{<:B}}, M, Union{Abstr @testset "known subtype/intersect issue" begin #issue 45874 - let S = Pair{Val{P}, AbstractVector{<:Union{P,<:AbstractMatrix{P}}}} where P, - T = Pair{Val{R}, AbstractVector{<:Union{P,<:AbstractMatrix{P}}}} where {P,R} - @test_broken S <: T - @test_broken typeintersect(S,T) === S - end + # Causes a hang due to jl_critical_error calling back into malloc... + # let S = Pair{Val{P}, AbstractVector{<:Union{P,<:AbstractMatrix{P}}}} where P, + # T = Pair{Val{R}, AbstractVector{<:Union{P,<:AbstractMatrix{P}}}} where {P,R} + # @test_broken S <: T + # @test_broken typeintersect(S,T) === S + # end #issue 44395 @test_broken typeintersect( From 01ae8b7cf20b1e6e9e3a2d6d4da24f55142b44ef Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Sat, 3 Dec 2022 20:46:27 -0500 Subject: [PATCH 29/57] Prioritize build_dir for generated headers (#47783) (cherry picked from commit 0feaf5cc3a6cec0a4f056e4e72ed6469769268a4) --- src/julia.h | 6 ++++-- src/julia_internal.h | 4 +++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/julia.h b/src/julia.h index 5ac6bdc36e181..cd2e1e0480e2d 100644 --- a/src/julia.h +++ b/src/julia.h @@ -4,7 +4,9 @@ #define JULIA_H #ifdef LIBRARY_EXPORTS -#include "jl_internal_funcs.inc" +// Generated file, needs to be searched in include paths so that the builddir +// retains priority +#include #undef jl_setjmp #undef jl_longjmp #undef jl_egal @@ -2183,7 +2185,7 @@ JL_DLLEXPORT int jl_generating_output(void) JL_NOTSAFEPOINT; #define JL_OPTIONS_USE_COMPILED_MODULES_NO 0 // Version information -#include "julia_version.h" +#include // Generated file JL_DLLEXPORT extern int jl_ver_major(void); JL_DLLEXPORT extern int jl_ver_minor(void); diff --git a/src/julia_internal.h b/src/julia_internal.h index 8bcc2efb8d4a6..b8a30801a66fe 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -1647,7 +1647,9 @@ JL_DLLEXPORT uint16_t julia__truncdfhf2(double param) JL_NOTSAFEPOINT; #endif #ifdef USE_DTRACE -#include "uprobes.h.gen" +// Generated file, needs to be searched in include paths so that the builddir +// retains priority +#include // uprobes.h.gen on systems with DTrace, is auto-generated to include // `JL_PROBE_{PROBE}` and `JL_PROBE_{PROBE}_ENABLED()` macros for every probe From dba443de64ed2d3519361c2d01e04ce221045d86 Mon Sep 17 00:00:00 2001 From: Elliot Saba Date: Mon, 5 Dec 2022 14:40:25 -0500 Subject: [PATCH 30/57] Set `OPENBLAS_NUM_THREADS=1` on local Distributed workers (#47803) This should prevent LinearAlgebra from trying to increase our OpenBLAS thread count in its `__init__()` method when we're not trying to enable threaded BLAS. (cherry picked from commit a8b399416208d91061324814ff8ae080a918e48b) --- stdlib/Distributed/src/managers.jl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/stdlib/Distributed/src/managers.jl b/stdlib/Distributed/src/managers.jl index 03adfd1371d15..57f58598e85dc 100644 --- a/stdlib/Distributed/src/managers.jl +++ b/stdlib/Distributed/src/managers.jl @@ -487,6 +487,13 @@ function launch(manager::LocalManager, params::Dict, launched::Array, c::Conditi if get(env, "JULIA_DEPOT_PATH", nothing) === nothing env["JULIA_DEPOT_PATH"] = join(DEPOT_PATH, pathsep) end + + # If we haven't explicitly asked for threaded BLAS, prevent OpenBLAS from starting + # up with multiple threads, thereby sucking up a bunch of wasted memory on Windows. + if !params[:enable_threaded_blas] && + get(env, "OPENBLAS_NUM_THREADS", nothing) === nothing + env["OPENBLAS_NUM_THREADS"] = "1" + end # Set the active project on workers using JULIA_PROJECT. # Users can opt-out of this by (i) passing `env = ...` or (ii) passing # `--project=...` as `exeflags` to addprocs(...). From ce7a372555b90ae881df24466133e64f2d6c6877 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Tue, 6 Dec 2022 09:49:40 -0300 Subject: [PATCH 31/57] Add native julia fmod (#47501) * Add native julia rem Co-authored-by: Alex Arslan (cherry picked from commit cf5ae0369ceae078cf6a29d7aa34f48a5a53531e) --- base/float.jl | 106 ++++++++++++++++++++++++++++++++++++++++- test/numbers.jl | 123 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 227 insertions(+), 2 deletions(-) diff --git a/base/float.jl b/base/float.jl index 75a2e0fcacc44..6109710d7a851 100644 --- a/base/float.jl +++ b/base/float.jl @@ -101,6 +101,8 @@ exponent_one(::Type{Float16}) = 0x3c00 exponent_half(::Type{Float16}) = 0x3800 significand_mask(::Type{Float16}) = 0x03ff +mantissa(x::T) where {T} = reinterpret(Unsigned, x) & significand_mask(T) + for T in (Float16, Float32, Float64) @eval significand_bits(::Type{$T}) = $(trailing_ones(significand_mask(T))) @eval exponent_bits(::Type{$T}) = $(sizeof(T)*8 - significand_bits(T) - 1) @@ -414,9 +416,109 @@ muladd(x::T, y::T, z::T) where {T<:IEEEFloat} = muladd_float(x, y, z) # TODO: faster floating point fld? # TODO: faster floating point mod? -rem(x::T, y::T) where {T<:IEEEFloat} = rem_float(x, y) +function unbiased_exponent(x::T) where {T<:IEEEFloat} + return (reinterpret(Unsigned, x) & exponent_mask(T)) >> significand_bits(T) +end + +function explicit_mantissa_noinfnan(x::T) where {T<:IEEEFloat} + m = mantissa(x) + issubnormal(x) || (m |= significand_mask(T) + uinttype(T)(1)) + return m +end + +function _to_float(number::U, ep) where {U<:Unsigned} + F = floattype(U) + S = signed(U) + epint = unsafe_trunc(S,ep) + lz::signed(U) = unsafe_trunc(S, Core.Intrinsics.ctlz_int(number) - U(exponent_bits(F))) + number <<= lz + epint -= lz + bits = U(0) + if epint >= 0 + bits = number & significand_mask(F) + bits |= ((epint + S(1)) << significand_bits(F)) & exponent_mask(F) + else + bits = (number >> -epint) & significand_mask(F) + end + return reinterpret(F, bits) +end + +@assume_effects :terminates_locally :nothrow function rem_internal(x::T, y::T) where {T<:IEEEFloat} + xuint = reinterpret(Unsigned, x) + yuint = reinterpret(Unsigned, y) + if xuint <= yuint + if xuint < yuint + return x + end + return zero(T) + end + + e_x = unbiased_exponent(x) + e_y = unbiased_exponent(y) + # Most common case where |y| is "very normal" and |x/y| < 2^EXPONENT_WIDTH + if e_y > (significand_bits(T)) && (e_x - e_y) <= (exponent_bits(T)) + m_x = explicit_mantissa_noinfnan(x) + m_y = explicit_mantissa_noinfnan(y) + d = urem_int((m_x << (e_x - e_y)), m_y) + iszero(d) && return zero(T) + return _to_float(d, e_y - uinttype(T)(1)) + end + # Both are subnormals + if e_x == 0 && e_y == 0 + return reinterpret(T, urem_int(xuint, yuint) & significand_mask(T)) + end + + m_x = explicit_mantissa_noinfnan(x) + e_x -= uinttype(T)(1) + m_y = explicit_mantissa_noinfnan(y) + lz_m_y = uinttype(T)(exponent_bits(T)) + if e_y > 0 + e_y -= uinttype(T)(1) + else + m_y = mantissa(y) + lz_m_y = Core.Intrinsics.ctlz_int(m_y) + end + + tz_m_y = Core.Intrinsics.cttz_int(m_y) + sides_zeroes_cnt = lz_m_y + tz_m_y + + # n>0 + exp_diff = e_x - e_y + # Shift hy right until the end or n = 0 + right_shift = min(exp_diff, tz_m_y) + m_y >>= right_shift + exp_diff -= right_shift + e_y += right_shift + # Shift hx left until the end or n = 0 + left_shift = min(exp_diff, uinttype(T)(exponent_bits(T))) + m_x <<= left_shift + exp_diff -= left_shift + + m_x = urem_int(m_x, m_y) + iszero(m_x) && return zero(T) + iszero(exp_diff) && return _to_float(m_x, e_y) + + while exp_diff > sides_zeroes_cnt + exp_diff -= sides_zeroes_cnt + m_x <<= sides_zeroes_cnt + m_x = urem_int(m_x, m_y) + end + m_x <<= exp_diff + m_x = urem_int(m_x, m_y) + return _to_float(m_x, e_y) +end + +function rem(x::T, y::T) where {T<:IEEEFloat} + if isfinite(x) && !iszero(x) && isfinite(y) && !iszero(y) + return copysign(rem_internal(abs(x), abs(y)), x) + elseif isinf(x) || isnan(y) || iszero(y) # y can still be Inf + return T(NaN) + else + return x + end +end -function mod(x::T, y::T) where T<:AbstractFloat +function mod(x::T, y::T) where {T<:AbstractFloat} r = rem(x,y) if r == 0 copysign(r,y) diff --git a/test/numbers.jl b/test/numbers.jl index 70f5f6f346d30..870acd37c089c 100644 --- a/test/numbers.jl +++ b/test/numbers.jl @@ -2929,3 +2929,126 @@ end @test false == ceil(Bool, -0.7) end end + +@testset "modf" begin + @testset "remd" begin + denorm_min = nextfloat(0.0) + minfloat = floatmin(Float64) + maxfloat = floatmax(Float64) + values = [3.0,denorm_min,-denorm_min, minfloat, + -minfloat, maxfloat, -maxfloat] + # rem (0, y) == 0 for y != 0. + for val in values + @test isequal(rem(0.0, val), 0.0) + end + # rem (-0, y) == -0 for y != 0. + for val in values + @test isequal(rem(-0.0, val), -0.0) + end + # rem (+Inf, y) == NaN + values2 = [3.0,-1.1,0.0,-0.0,denorm_min,minfloat, + maxfloat,Inf,-Inf] + for val in values2 + @test isequal(rem(Inf, val), NaN) + end + # rem (-Inf, y) == NaN + for val in values2 + @test isequal(rem(-Inf, val), NaN) + end + # rem (x, +0) == NaN + values3 = values2[begin:end-2] + for val in values3 + @test isequal(rem(val, 0.0), NaN) + end + # rem (x, -0) == NaN + for val in values3 + @test isequal(rem(val, -0.0), NaN) + end + # rem (x, +Inf) == x for x not infinite. + @test isequal(rem(0.0, Inf), 0.0) + @test isequal(rem(-0.0, Inf), -0.0) + @test isequal(rem(denorm_min, Inf), denorm_min) + @test isequal(rem(minfloat, Inf), minfloat) + @test isequal(rem(maxfloat, Inf), maxfloat) + @test isequal(rem(3.0, Inf), 3.0) + # rem (x, -Inf) == x for x not infinite. + @test isequal(rem(0.0, -Inf), 0.0) + @test isequal(rem(-0.0, -Inf), -0.0) + @test isequal(rem(denorm_min, -Inf), denorm_min) + @test isequal(rem(minfloat, -Inf), minfloat) + @test isequal(rem(maxfloat, -Inf), maxfloat) + @test isequal(rem(3.0, -Inf), 3.0) + #NaN tests + @test isequal(rem(0.0, NaN), NaN) + @test isequal(rem(1.0, NaN), NaN) + @test isequal(rem(Inf, NaN), NaN) + @test isequal(rem(NaN, 0.0), NaN) + @test isequal(rem(NaN, 1.0), NaN) + @test isequal(rem(NaN, Inf), NaN) + @test isequal(rem(NaN, NaN), NaN) + #Sign tests + @test isequal(rem(6.5, 2.25), 2.0) + @test isequal(rem(-6.5, 2.25), -2.0) + @test isequal(rem(6.5, -2.25), 2.0) + @test isequal(rem(-6.5, -2.25), -2.0) + values4 = [maxfloat,-maxfloat,minfloat,-minfloat, + denorm_min, -denorm_min] + for val in values4 + @test isequal(rem(maxfloat,val), 0.0) + end + for val in values4 + @test isequal(rem(-maxfloat,val), -0.0) + end + @test isequal(rem(minfloat, maxfloat), minfloat) + @test isequal(rem(minfloat, -maxfloat), minfloat) + values5 = values4[begin+2:end] + for val in values5 + @test isequal(rem(minfloat,val), 0.0) + end + @test isequal(rem(-minfloat, maxfloat), -minfloat) + @test isequal(rem(-minfloat, -maxfloat), -minfloat) + for val in values5 + @test isequal(rem(-minfloat,val), -0.0) + end + values6 = values4[begin:end-2] + for val in values6 + @test isequal(rem(denorm_min,val), denorm_min) + end + @test isequal(rem(denorm_min, denorm_min), 0.0) + @test isequal(rem(denorm_min, -denorm_min), 0.0) + for val in values6 + @test isequal(rem(-denorm_min,val), -denorm_min) + end + @test isequal(rem(-denorm_min, denorm_min), -0.0) + @test isequal(rem(-denorm_min, -denorm_min), -0.0) + #Max value tests + values7 = [0x3p-1074,-0x3p-1074,0x3p-1073,-0x3p-1073] + for val in values7 + @test isequal(rem(0x1p1023,val), 0x1p-1073) + end + @test isequal(rem(0x1p1023, 0x3p-1022), 0x1p-1021) + @test isequal(rem(0x1p1023, -0x3p-1022), 0x1p-1021) + for val in values7 + @test isequal(rem(-0x1p1023,val), -0x1p-1073) + end + @test isequal(rem(-0x1p1023, 0x3p-1022), -0x1p-1021) + @test isequal(rem(-0x1p1023, -0x3p-1022), -0x1p-1021) + + end + + @testset "remf" begin + @test isequal(rem(Float32(0x1p127), Float32(0x3p-149)), Float32(0x1p-149)) + @test isequal(rem(Float32(0x1p127), -Float32(0x3p-149)), Float32(0x1p-149)) + @test isequal(rem(Float32(0x1p127), Float32(0x3p-148)), Float32(0x1p-147)) + @test isequal(rem(Float32(0x1p127), -Float32(0x3p-148)), Float32(0x1p-147)) + @test isequal(rem(Float32(0x1p127), Float32(0x3p-126)), Float32(0x1p-125)) + @test isequal(rem(Float32(0x1p127), -Float32(0x3p-126)), Float32(0x1p-125)) + @test isequal(rem(-Float32(0x1p127), Float32(0x3p-149)), -Float32(0x1p-149)) + @test isequal(rem(-Float32(0x1p127), -Float32(0x3p-149)), -Float32(0x1p-149)) + @test isequal(rem(-Float32(0x1p127), Float32(0x3p-148)), -Float32(0x1p-147)) + @test isequal(rem(-Float32(0x1p127), -Float32(0x3p-148)), -Float32(0x1p-147)) + @test isequal(rem(-Float32(0x1p127), Float32(0x3p-126)), -Float32(0x1p-125)) + @test isequal(rem(-Float32(0x1p127), -Float32(0x3p-126)), -Float32(0x1p-125)) + end + +end From 712a12312d69f70d8427f19ed3248fa04b0477a7 Mon Sep 17 00:00:00 2001 From: jonathan-conder-sm <63538679+jonathan-conder-sm@users.noreply.github.com> Date: Wed, 7 Dec 2022 13:47:25 +0000 Subject: [PATCH 32/57] Fix libjulia install name and libjulia-internal rpath on OS X (#47220) (cherry picked from commit de4f1c3176e3766c6f7304dcac404dbaffb831c7) --- Makefile | 14 ++++++++++++-- cli/Makefile | 4 ++-- src/Makefile | 7 +++++-- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index c17d6ce694c5f..7159fa1854fe7 100644 --- a/Makefile +++ b/Makefile @@ -371,8 +371,18 @@ endif fi; endif - # Set rpath for libjulia-internal, which is moving from `../lib` to `../lib/julia`. We only need to do this for Linux/FreeBSD -ifneq (,$(findstring $(OS),Linux FreeBSD)) + # Set rpath for libjulia-internal, which is moving from `../lib` to `../lib/julia`. +ifeq ($(OS), Darwin) +ifneq ($(DARWIN_FRAMEWORK),1) +ifeq ($(JULIA_BUILD_MODE),release) + install_name_tool -add_rpath @loader_path/$(reverse_private_libdir_rel)/ $(DESTDIR)$(private_libdir)/libjulia-internal.$(SHLIB_EXT) + install_name_tool -add_rpath @loader_path/$(reverse_private_libdir_rel)/ $(DESTDIR)$(private_libdir)/libjulia-codegen.$(SHLIB_EXT) +else ifeq ($(JULIA_BUILD_MODE),debug) + install_name_tool -add_rpath @loader_path/$(reverse_private_libdir_rel)/ $(DESTDIR)$(private_libdir)/libjulia-internal-debug.$(SHLIB_EXT) + install_name_tool -add_rpath @loader_path/$(reverse_private_libdir_rel)/ $(DESTDIR)$(private_libdir)/libjulia-codegen-debug.$(SHLIB_EXT) +endif +endif +else ifneq (,$(findstring $(OS),Linux FreeBSD)) ifeq ($(JULIA_BUILD_MODE),release) $(PATCHELF) --set-rpath '$$ORIGIN:$$ORIGIN/$(reverse_private_libdir_rel)' $(DESTDIR)$(private_libdir)/libjulia-internal.$(SHLIB_EXT) $(PATCHELF) --set-rpath '$$ORIGIN:$$ORIGIN/$(reverse_private_libdir_rel)' $(DESTDIR)$(private_libdir)/libjulia-codegen.$(SHLIB_EXT) diff --git a/cli/Makefile b/cli/Makefile index 58c1f82f48662..7ba238e0deccc 100644 --- a/cli/Makefile +++ b/cli/Makefile @@ -113,7 +113,7 @@ endif $(build_shlibdir)/libjulia.$(JL_MAJOR_MINOR_SHLIB_EXT): $(LIB_OBJS) $(SRCDIR)/list_strip_symbols.h | $(build_shlibdir) $(build_libdir) @$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -DLIBRARY_EXPORTS -shared $(SHIPFLAGS) $(LIB_OBJS) -o $@ \ $(JLIBLDFLAGS) $(LOADER_LDFLAGS) $(call SONAME_FLAGS,libjulia.$(JL_MAJOR_SHLIB_EXT))) - @$(INSTALL_NAME_CMD)libjulia.$(SHLIB_EXT) $@ + @$(INSTALL_NAME_CMD)libjulia.$(JL_MAJOR_SHLIB_EXT) $@ ifeq ($(OS), WINNT) @# Note that if the objcopy command starts getting too long, we can use `@file` to read @# command-line options from `file` instead. @@ -123,7 +123,7 @@ endif $(build_shlibdir)/libjulia-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(LIB_DOBJS) $(SRCDIR)/list_strip_symbols.h | $(build_shlibdir) $(build_libdir) @$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -DLIBRARY_EXPORTS -shared $(DEBUGFLAGS) $(LIB_DOBJS) -o $@ \ $(JLIBLDFLAGS) $(LOADER_LDFLAGS) $(call SONAME_FLAGS,libjulia-debug.$(JL_MAJOR_SHLIB_EXT))) - @$(INSTALL_NAME_CMD)libjulia-debug.$(SHLIB_EXT) $@ + @$(INSTALL_NAME_CMD)libjulia-debug.$(JL_MAJOR_SHLIB_EXT) $@ ifeq ($(OS), WINNT) @$(call PRINT_ANALYZE, $(OBJCOPY) $(build_libdir)/$(notdir $@).tmp.a $(STRIP_EXPORTED_FUNCS) $(build_libdir)/$(notdir $@).a && rm $(build_libdir)/$(notdir $@).tmp.a) endif diff --git a/src/Makefile b/src/Makefile index 886a0a546ff3a..371cbfc2d921e 100644 --- a/src/Makefile +++ b/src/Makefile @@ -150,6 +150,9 @@ CLANG_LDFLAGS := $(LLVM_LDFLAGS) ifeq ($(OS), Darwin) CLANG_LDFLAGS += -Wl,-undefined,dynamic_lookup OSLIBS += $(SRCDIR)/mach_dyld_atfork.tbd +LIBJULIA_PATH_REL := @rpath/libjulia +else +LIBJULIA_PATH_REL := libjulia endif COMMON_LIBPATHS := -L$(build_libdir) -L$(build_shlibdir) @@ -174,8 +177,8 @@ SHIPFLAGS += "-DJL_SYSTEM_IMAGE_PATH=\"$(build_private_libdir_rel)/sys.$(SHLIB_ DEBUGFLAGS += "-DJL_SYSTEM_IMAGE_PATH=\"$(build_private_libdir_rel)/sys-debug.$(SHLIB_EXT)\"" # Add SONAME defines so we can embed proper `dlopen()` calls. -SHIPFLAGS += "-DJL_LIBJULIA_SONAME=\"libjulia.$(JL_MAJOR_SHLIB_EXT)\"" "-DJL_LIBJULIA_INTERNAL_SONAME=\"libjulia-internal.$(JL_MAJOR_SHLIB_EXT)\"" -DEBUGFLAGS += "-DJL_LIBJULIA_SONAME=\"libjulia-debug.$(JL_MAJOR_SHLIB_EXT)\"" "-DJL_LIBJULIA_INTERNAL_SONAME=\"libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT)\"" +SHIPFLAGS += "-DJL_LIBJULIA_SONAME=\"$(LIBJULIA_PATH_REL).$(JL_MAJOR_SHLIB_EXT)\"" +DEBUGFLAGS += "-DJL_LIBJULIA_SONAME=\"$(LIBJULIA_PATH_REL)-debug.$(JL_MAJOR_SHLIB_EXT)\"" ifeq ($(USE_CROSS_FLISP), 1) FLISPDIR := $(BUILDDIR)/flisp/host From d561447cfcee9bf11148d50c6e7c099c3f6fded0 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Mon, 7 Feb 2022 05:19:24 -0600 Subject: [PATCH 33/57] Replace the `.ji` serialization with sysimage format This unifies two serializers, `dump.c` (used for packages) and `staticdata.c` (used for system images). It adopts the `staticdata` strategy, adding support for external linkage, uniquing of MethodInstances & types, method extensions, external specializations, and invalidation. This lays the groundwork for native code caching as done with system images. Co-authored-by: Valentin Churavy Co-authored-by: Jameson Nash Co-authored-by: Tim Holy (cherry picked from commit cbfdb3facd0f2ece4088f43ef97533e9e0921081) --- base/compiler/typeinfer.jl | 8 +- base/loading.jl | 83 +- deps/llvm.mk | 2 +- src/Makefile | 7 +- src/aotcompile.cpp | 27 +- src/clangsa/GCChecker.cpp | 2 +- src/codegen-stubs.c | 4 +- src/codegen.cpp | 14 +- src/datatype.c | 2 +- src/dlload.c | 2 +- src/dump.c | 3577 --------------------------------- src/gc.c | 25 +- src/gf.c | 2 +- src/init.c | 6 +- src/ircode.c | 132 ++ src/jitlayers.h | 1 - src/jl_exported_funcs.inc | 7 +- src/julia.expmap | 1 + src/julia.h | 19 +- src/julia_internal.h | 62 +- src/llvm-multiversioning.cpp | 55 +- src/method.c | 2 +- src/module.c | 25 +- src/precompile.c | 118 +- src/processor.cpp | 9 +- src/processor.h | 1 + src/processor_arm.cpp | 23 + src/processor_fallback.cpp | 25 + src/processor_x86.cpp | 22 + src/rtutils.c | 6 + src/staticdata.c | 2317 +++++++++++++++------ src/staticdata_utils.c | 1279 ++++++++++++ src/subtype.c | 4 +- src/support/arraylist.h | 2 +- src/support/rle.h | 9 +- src/threading.c | 2 + stdlib/LLD_jll/src/LLD_jll.jl | 1 - stdlib/Profile/src/Allocs.jl | 6 +- test/precompile.jl | 29 +- 39 files changed, 3485 insertions(+), 4433 deletions(-) delete mode 100644 src/dump.c create mode 100644 src/staticdata_utils.c diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl index 0bc3d6a68e253..b14350cfa1ee8 100644 --- a/base/compiler/typeinfer.jl +++ b/base/compiler/typeinfer.jl @@ -1,8 +1,8 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license -# Tracking of newly-inferred MethodInstances during precompilation +# Tracking of newly-inferred CodeInstances during precompilation const track_newly_inferred = RefValue{Bool}(false) -const newly_inferred = MethodInstance[] +const newly_inferred = CodeInstance[] # build (and start inferring) the inference frame for the top-level MethodInstance function typeinf(interp::AbstractInterpreter, result::InferenceResult, cache::Symbol) @@ -400,11 +400,11 @@ function cache_result!(interp::AbstractInterpreter, result::InferenceResult) # TODO: also don't store inferred code if we've previously decided to interpret this function if !already_inferred inferred_result = transform_result_for_cache(interp, linfo, valid_worlds, result) - code_cache(interp)[linfo] = CodeInstance(result, inferred_result, valid_worlds) + code_cache(interp)[linfo] = ci = CodeInstance(result, inferred_result, valid_worlds) if track_newly_inferred[] m = linfo.def if isa(m, Method) && m.module != Core - ccall(:jl_push_newly_inferred, Cvoid, (Any,), linfo) + ccall(:jl_push_newly_inferred, Cvoid, (Any,), ci) end end end diff --git a/base/loading.jl b/base/loading.jl index 8846b5f197c94..ea350ff72d960 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -941,7 +941,7 @@ function _include_from_serialized(pkg::PkgId, path::String, depmods::Vector{Any} end @debug "Loading cache file $path for $pkg" - sv = ccall(:jl_restore_incremental, Any, (Cstring, Any), path, depmods) + sv = ccall(:jl_restore_incremental, Any, (Cstring, Any, Cint), path, depmods, false) if isa(sv, Exception) return sv end @@ -1165,7 +1165,7 @@ end # End extensions # loads a precompile cache file, after checking stale_cachefile tests -function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt64) +function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt128) assert_havelock(require_lock) loaded = nothing if root_module_exists(modkey) @@ -1214,7 +1214,7 @@ function _tryrequire_from_serialized(modkey::PkgId, path::String, sourcepath::St for i in 1:length(depmods) dep = depmods[i] dep isa Module && continue - _, depkey, depbuild_id = dep::Tuple{String, PkgId, UInt64} + _, depkey, depbuild_id = dep::Tuple{String, PkgId, UInt128} @assert root_module_exists(depkey) dep = root_module(depkey) depmods[i] = dep @@ -1246,7 +1246,7 @@ function _tryrequire_from_serialized(pkg::PkgId, path::String) local depmodnames io = open(path, "r") try - isvalid_cache_header(io) || return ArgumentError("Invalid header in cache file $path.") + iszero(isvalid_cache_header(io)) && return ArgumentError("Invalid header in cache file $path.") depmodnames = parse_cache_header(io)[3] isvalid_file_crc(io) || return ArgumentError("Invalid checksum in cache file $path.") finally @@ -1268,7 +1268,7 @@ end # returns `nothing` if require found a precompile cache for this sourcepath, but couldn't load it # returns the set of modules restored if the cache load succeeded -@constprop :none function _require_search_from_serialized(pkg::PkgId, sourcepath::String, build_id::UInt64) +@constprop :none function _require_search_from_serialized(pkg::PkgId, sourcepath::String, build_id::UInt128) assert_havelock(require_lock) paths = find_all_in_cache_path(pkg) for path_to_try in paths::Vector{String} @@ -1281,7 +1281,7 @@ end for i in 1:length(staledeps) dep = staledeps[i] dep isa Module && continue - modpath, modkey, modbuild_id = dep::Tuple{String, PkgId, UInt64} + modpath, modkey, modbuild_id = dep::Tuple{String, PkgId, UInt128} modpaths = find_all_in_cache_path(modkey) modfound = false for modpath_to_try in modpaths::Vector{String} @@ -1295,7 +1295,7 @@ end break end if !modfound - @debug "Rejecting cache file $path_to_try because required dependency $modkey with build ID $modbuild_id is missing from the cache." + @debug "Rejecting cache file $path_to_try because required dependency $modkey with build ID $(UUID(modbuild_id)) is missing from the cache." staledeps = true break end @@ -1347,7 +1347,7 @@ const package_callbacks = Any[] const include_callbacks = Any[] # used to optionally track dependencies when requiring a module: -const _concrete_dependencies = Pair{PkgId,UInt64}[] # these dependency versions are "set in stone", and the process should try to avoid invalidating them +const _concrete_dependencies = Pair{PkgId,UInt128}[] # these dependency versions are "set in stone", and the process should try to avoid invalidating them const _require_dependencies = Any[] # a list of (mod, path, mtime) tuples that are the file dependencies of the module currently being precompiled const _track_dependencies = Ref(false) # set this to true to track the list of file dependencies function _include_dependency(mod::Module, _path::AbstractString) @@ -1601,7 +1601,7 @@ function _require(pkg::PkgId, env=nothing) # attempt to load the module file via the precompile cache locations if JLOptions().use_compiled_modules != 0 - m = _require_search_from_serialized(pkg, path, UInt64(0)) + m = _require_search_from_serialized(pkg, path, UInt128(0)) if m isa Module return m end @@ -1611,7 +1611,7 @@ function _require(pkg::PkgId, env=nothing) # but it was not handled by the precompile loader, complain for (concrete_pkg, concrete_build_id) in _concrete_dependencies if pkg == concrete_pkg - @warn """Module $(pkg.name) with build ID $concrete_build_id is missing from the cache. + @warn """Module $(pkg.name) with build ID $((UUID(concrete_build_id))) is missing from the cache. This may mean $pkg does not support precompilation but is imported by a module that does.""" if JLOptions().incremental != 0 # during incremental precompilation, this should be fail-fast @@ -1982,9 +1982,13 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in close(tmpio) p = create_expr_cache(pkg, path, tmppath, concrete_deps, internal_stderr, internal_stdout) if success(p) - # append checksum to the end of the .ji file: - open(tmppath, "a+") do f - write(f, _crc32c(seekstart(f))) + # append extra crc to the end of the .ji file: + open(tmppath, "r+") do f + if iszero(isvalid_cache_header(f)) + error("Invalid header for $pkg in new cache file $(repr(tmppath)).") + end + seekstart(f) + write(f, _crc32c(f)) end # inherit permission from the source file (and make them writable) chmod(tmppath, filemode(path) & 0o777 | 0o200) @@ -2004,7 +2008,7 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in end end - # this is atomic according to POSIX: + # this is atomic according to POSIX (not Win32): rename(tmppath, cachefile; force=true) return cachefile end @@ -2014,13 +2018,16 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in if p.exitcode == 125 return PrecompilableError() else - error("Failed to precompile $pkg to $tmppath.") + error("Failed to precompile $pkg to $(repr(tmppath)).") end end -module_build_id(m::Module) = ccall(:jl_module_build_id, UInt64, (Any,), m) +function module_build_id(m::Module) + hi, lo = ccall(:jl_module_build_id, NTuple{2,UInt64}, (Any,), m) + return (UInt128(hi) << 64) | lo +end -isvalid_cache_header(f::IOStream) = (0 != ccall(:jl_read_verify_header, Cint, (Ptr{Cvoid},), f.ios)) +isvalid_cache_header(f::IOStream) = ccall(:jl_read_verify_header, UInt64, (Ptr{Cvoid},), f.ios) # returns checksum id or zero isvalid_file_crc(f::IOStream) = (_crc32c(seekstart(f), filesize(f) - 4) == read(f, UInt32)) struct CacheHeaderIncludes @@ -2094,13 +2101,14 @@ function parse_cache_header(f::IO) totbytes -= 8 @assert totbytes == 0 "header of cache file appears to be corrupt (totbytes == $(totbytes))" # read the list of modules that are required to be present during loading - required_modules = Vector{Pair{PkgId, UInt64}}() + required_modules = Vector{Pair{PkgId, UInt128}}() while true n = read(f, Int32) n == 0 && break sym = String(read(f, n)) # module name uuid = UUID((read(f, UInt64), read(f, UInt64))) # pkg UUID - build_id = read(f, UInt64) # build id + build_id = UInt128(read(f, UInt64)) << 64 + build_id |= read(f, UInt64) push!(required_modules, PkgId(uuid, sym) => build_id) end return modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash @@ -2109,17 +2117,17 @@ end function parse_cache_header(cachefile::String; srcfiles_only::Bool=false) io = open(cachefile, "r") try - !isvalid_cache_header(io) && throw(ArgumentError("Invalid header in cache file $cachefile.")) + iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile.")) ret = parse_cache_header(io) srcfiles_only || return ret - modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash = ret + _, (includes, _), _, srctextpos, _... = ret srcfiles = srctext_files(io, srctextpos) delidx = Int[] for (i, chi) in enumerate(includes) chi.filename ∈ srcfiles || push!(delidx, i) end deleteat!(includes, delidx) - return modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash + return ret finally close(io) end @@ -2127,11 +2135,11 @@ end -preferences_hash(f::IO) = parse_cache_header(f)[end] +preferences_hash(f::IO) = parse_cache_header(f)[6] function preferences_hash(cachefile::String) io = open(cachefile, "r") try - if !isvalid_cache_header(io) + if iszero(isvalid_cache_header(io)) throw(ArgumentError("Invalid header in cache file $cachefile.")) end return preferences_hash(io) @@ -2142,14 +2150,14 @@ end function cache_dependencies(f::IO) - defs, (includes, requires), modules, srctextpos, prefs, prefs_hash = parse_cache_header(f) + _, (includes, _), modules, _... = parse_cache_header(f) return modules, map(chi -> (chi.filename, chi.mtime), includes) # return just filename and mtime end function cache_dependencies(cachefile::String) io = open(cachefile, "r") try - !isvalid_cache_header(io) && throw(ArgumentError("Invalid header in cache file $cachefile.")) + iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile.")) return cache_dependencies(io) finally close(io) @@ -2157,7 +2165,7 @@ function cache_dependencies(cachefile::String) end function read_dependency_src(io::IO, filename::AbstractString) - modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash = parse_cache_header(io) + srctextpos = parse_cache_header(io)[4] srctextpos == 0 && error("no source-text stored in cache file") seek(io, srctextpos) return _read_dependency_src(io, filename) @@ -2180,7 +2188,7 @@ end function read_dependency_src(cachefile::String, filename::AbstractString) io = open(cachefile, "r") try - !isvalid_cache_header(io) && throw(ArgumentError("Invalid header in cache file $cachefile.")) + iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile.")) return read_dependency_src(io, filename) finally close(io) @@ -2370,12 +2378,13 @@ get_compiletime_preferences(::Nothing) = String[] # returns true if it "cachefile.ji" is stale relative to "modpath.jl" and build_id for modkey # otherwise returns the list of dependencies to also check @constprop :none function stale_cachefile(modpath::String, cachefile::String; ignore_loaded::Bool = false) - return stale_cachefile(PkgId(""), UInt64(0), modpath, cachefile; ignore_loaded) + return stale_cachefile(PkgId(""), UInt128(0), modpath, cachefile; ignore_loaded) end -@constprop :none function stale_cachefile(modkey::PkgId, build_id::UInt64, modpath::String, cachefile::String; ignore_loaded::Bool = false) +@constprop :none function stale_cachefile(modkey::PkgId, build_id::UInt128, modpath::String, cachefile::String; ignore_loaded::Bool = false) io = open(cachefile, "r") try - if !isvalid_cache_header(io) + checksum = isvalid_cache_header(io) + if iszero(checksum) @debug "Rejecting cache file $cachefile due to it containing an invalid cache header" return true # invalid cache file end @@ -2388,9 +2397,12 @@ end @debug "Rejecting cache file $cachefile for $modkey since it is for $id instead" return true end - if build_id != UInt64(0) && id.second != build_id - @debug "Ignoring cache file $cachefile for $modkey since it is does not provide desired build_id" - return true + if build_id != UInt128(0) + id_build = (UInt128(checksum) << 64) | id.second + if id_build != build_id + @debug "Ignoring cache file $cachefile for $modkey ($((UUID(id_build)))) since it is does not provide desired build_id ($((UUID(build_id))))" + return true + end end id = id.first modules = Dict{PkgId, UInt64}(modules) @@ -2430,11 +2442,12 @@ end for (req_key, req_build_id) in _concrete_dependencies build_id = get(modules, req_key, UInt64(0)) if build_id !== UInt64(0) + build_id |= UInt128(checksum) << 64 if build_id === req_build_id skip_timecheck = true break end - @debug "Rejecting cache file $cachefile because it provides the wrong build_id (got $build_id) for $req_key (want $req_build_id)" + @debug "Rejecting cache file $cachefile because it provides the wrong build_id (got $((UUID(build_id)))) for $req_key (want $(UUID(req_build_id)))" return true # cachefile doesn't provide the required version of the dependency end end diff --git a/deps/llvm.mk b/deps/llvm.mk index c13551ee331ef..78d037ec126d0 100644 --- a/deps/llvm.mk +++ b/deps/llvm.mk @@ -308,8 +308,8 @@ LLVM_TOOLS_JLL_TAGS := -llvm_version+$(LLVM_VER_MAJ) endif $(eval $(call bb-install,llvm,LLVM,false,true)) -$(eval $(call bb-install,clang,CLANG,false,true)) $(eval $(call bb-install,lld,LLD,false,true)) +$(eval $(call bb-install,clang,CLANG,false,true)) $(eval $(call bb-install,llvm-tools,LLVM_TOOLS,false,true)) endif # USE_BINARYBUILDER_LLVM diff --git a/src/Makefile b/src/Makefile index 371cbfc2d921e..380d2687e75a1 100644 --- a/src/Makefile +++ b/src/Makefile @@ -42,7 +42,7 @@ endif SRCS := \ jltypes gf typemap smallintset ast builtins module interpreter symbol \ - dlload sys init task array dump staticdata toplevel jl_uv datatype \ + dlload sys init task array staticdata toplevel jl_uv datatype \ simplevector runtime_intrinsics precompile jloptions \ threading partr stackwalk gc gc-debug gc-pages gc-stacks gc-alloc-profiler method \ jlapi signal-handling safepoint timing subtype rtutils gc-heap-snapshot \ @@ -294,7 +294,6 @@ $(BUILDDIR)/codegen.o $(BUILDDIR)/codegen.dbg.obj: $(addprefix $(SRCDIR)/,\ $(BUILDDIR)/datatype.o $(BUILDDIR)/datatype.dbg.obj: $(SRCDIR)/support/htable.h $(SRCDIR)/support/htable.inc $(BUILDDIR)/debuginfo.o $(BUILDDIR)/debuginfo.dbg.obj: $(addprefix $(SRCDIR)/,debuginfo.h processor.h jitlayers.h debug-registry.h) $(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR)/processor.h -$(BUILDDIR)/dump.o $(BUILDDIR)/dump.dbg.obj: $(addprefix $(SRCDIR)/,common_symbols1.inc common_symbols2.inc builtin_proto.h serialize.h) $(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc.h $(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc.h $(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h @@ -320,7 +319,7 @@ $(BUILDDIR)/llvm-remove-addrspaces.o $(BUILDDIR)/llvm-remove-addrspaces.dbg.obj: $(BUILDDIR)/llvm-ptls.o $(BUILDDIR)/llvm-ptls.dbg.obj: $(SRCDIR)/codegen_shared.h $(BUILDDIR)/processor.o $(BUILDDIR)/processor.dbg.obj: $(addprefix $(SRCDIR)/,processor_*.cpp processor.h features_*.h) $(BUILDDIR)/signal-handling.o $(BUILDDIR)/signal-handling.dbg.obj: $(addprefix $(SRCDIR)/,signals-*.c) -$(BUILDDIR)/staticdata.o $(BUILDDIR)/staticdata.dbg.obj: $(SRCDIR)/processor.h $(SRCDIR)/builtin_proto.h +$(BUILDDIR)/staticdata.o $(BUILDDIR)/staticdata.dbg.obj: $(SRCDIR)/staticdata_utils.c $(SRCDIR)/processor.h $(SRCDIR)/builtin_proto.h $(BUILDDIR)/toplevel.o $(BUILDDIR)/toplevel.dbg.obj: $(SRCDIR)/builtin_proto.h $(BUILDDIR)/ircode.o $(BUILDDIR)/ircode.dbg.obj: $(SRCDIR)/serialize.h $(BUILDDIR)/pipeline.o $(BUILDDIR)/pipeline.dbg.obj: $(SRCDIR)/passes.h $(SRCDIR)/jitlayers.h @@ -456,7 +455,7 @@ SA_EXCEPTIONS-jloptions.c := -Xanalyzer -analyzer-config -Xana SA_EXCEPTIONS-subtype.c := -Xanalyzer -analyzer-config -Xanalyzer silence-checkers="core.uninitialized.Assign;core.UndefinedBinaryOperatorResult" SA_EXCEPTIONS-codegen.c := -Xanalyzer -analyzer-config -Xanalyzer silence-checkers="core" # these need to be annotated (and possibly fixed) -SKIP_IMPLICIT_ATOMICS := dump.c module.c staticdata.c codegen.cpp +SKIP_IMPLICIT_ATOMICS := module.c staticdata.c codegen.cpp # these need to be annotated (and possibly fixed) SKIP_GC_CHECK := codegen.cpp rtutils.c diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index 83e1c6d150430..26ba66fa96737 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -92,7 +92,7 @@ typedef struct { std::vector jl_sysimg_fvars; std::vector jl_sysimg_gvars; std::map> jl_fvar_map; - std::map jl_value_to_llvm; // uses 1-based indexing + std::vector jl_value_to_llvm; } jl_native_code_desc_t; extern "C" JL_DLLEXPORT @@ -110,17 +110,12 @@ void jl_get_function_id_impl(void *native_code, jl_code_instance_t *codeinst, } extern "C" JL_DLLEXPORT -int32_t jl_get_llvm_gv_impl(void *native_code, jl_value_t *p) +void jl_get_llvm_gvs_impl(void *native_code, arraylist_t *gvs) { - // map a jl_value_t memory location to a GlobalVariable + // map a memory location (jl_value_t or jl_binding_t) to a GlobalVariable jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code; - if (data) { - auto it = data->jl_value_to_llvm.find(p); - if (it != data->jl_value_to_llvm.end()) { - return it->second; - } - } - return 0; + arraylist_grow(gvs, data->jl_value_to_llvm.size()); + memcpy(gvs->items, data->jl_value_to_llvm.data(), gvs->len * sizeof(void*)); } extern "C" JL_DLLEXPORT @@ -148,7 +143,6 @@ static void emit_offset_table(Module &mod, const std::vector &vars { // Emit a global variable with all the variable addresses. // The cloning pass will convert them into offsets. - assert(!vars.empty()); size_t nvars = vars.size(); std::vector addrs(nvars); for (size_t i = 0; i < nvars; i++) { @@ -258,9 +252,9 @@ static void jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_instance // this builds the object file portion of the sysimage files for fast startup, and can // also be used be extern consumers like GPUCompiler.jl to obtain a module containing // all reachable & inferrrable functions. The `policy` flag switches between the default -// mode `0`, the extern mode `1`, and imaging mode `2`. +// mode `0`, the extern mode `1`. extern "C" JL_DLLEXPORT -void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _policy) +void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _policy, int _imaging_mode) { ++CreateNativeCalls; CreateNativeMax.updateMax(jl_array_len(methods)); @@ -268,7 +262,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm cgparams = &jl_default_cgparams; jl_native_code_desc_t *data = new jl_native_code_desc_t; CompilationPolicy policy = (CompilationPolicy) _policy; - bool imaging = imaging_default() || policy == CompilationPolicy::ImagingMode; + bool imaging = imaging_default() || _imaging_mode == 1; jl_workqueue_t emitted; jl_method_instance_t *mi = NULL; jl_code_info_t *src = NULL; @@ -342,10 +336,11 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm // process the globals array, before jl_merge_module destroys them std::vector gvars; + data->jl_value_to_llvm.resize(params.globals.size()); for (auto &global : params.globals) { + data->jl_value_to_llvm.at(gvars.size()) = global.first; gvars.push_back(std::string(global.second->getName())); - data->jl_value_to_llvm[global.first] = gvars.size(); } CreateNativeMethods += emitted.size(); @@ -575,7 +570,7 @@ void jl_dump_native_impl(void *native_code, Type *T_psize = T_size->getPointerTo(); // add metadata information - if (imaging_default()) { + if (imaging_default() || jl_options.outputo) { emit_offset_table(*dataM, data->jl_sysimg_gvars, "jl_sysimg_gvars", T_psize); emit_offset_table(*dataM, data->jl_sysimg_fvars, "jl_sysimg_fvars", T_psize); diff --git a/src/clangsa/GCChecker.cpp b/src/clangsa/GCChecker.cpp index 34821d6bac9cb..513e6db606eb8 100644 --- a/src/clangsa/GCChecker.cpp +++ b/src/clangsa/GCChecker.cpp @@ -1332,7 +1332,7 @@ bool GCChecker::evalCall(const CallEvent &Call, CheckerContext &C) const { } else if (name == "JL_GC_PUSH1" || name == "JL_GC_PUSH2" || name == "JL_GC_PUSH3" || name == "JL_GC_PUSH4" || name == "JL_GC_PUSH5" || name == "JL_GC_PUSH6" || - name == "JL_GC_PUSH7") { + name == "JL_GC_PUSH7" || name == "JL_GC_PUSH8") { ProgramStateRef State = C.getState(); // Transform slots to roots, transform values to rooted unsigned NumArgs = CE->getNumArgs(); diff --git a/src/codegen-stubs.c b/src/codegen-stubs.c index 1f209f36291a2..01324e349f08f 100644 --- a/src/codegen-stubs.c +++ b/src/codegen-stubs.c @@ -13,7 +13,7 @@ JL_DLLEXPORT void jl_dump_native_fallback(void *native_code, const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname, const char *asm_fname, const char *sysimg_data, size_t sysimg_len) UNAVAILABLE -JL_DLLEXPORT int32_t jl_get_llvm_gv_fallback(void *native_code, jl_value_t *p) UNAVAILABLE +JL_DLLEXPORT void jl_get_llvm_gvs_fallback(void *native_code, arraylist_t *gvs) UNAVAILABLE JL_DLLEXPORT void jl_extern_c_fallback(jl_function_t *f, jl_value_t *rt, jl_value_t *argt, char *name) UNAVAILABLE JL_DLLEXPORT jl_value_t *jl_dump_method_asm_fallback(jl_method_instance_t *linfo, size_t world, @@ -66,7 +66,7 @@ JL_DLLEXPORT size_t jl_jit_total_bytes_fallback(void) return 0; } -JL_DLLEXPORT void *jl_create_native_fallback(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmctxt, const jl_cgparams_t *cgparams, int _policy) UNAVAILABLE +JL_DLLEXPORT void *jl_create_native_fallback(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _policy, int _imaging_mode) UNAVAILABLE JL_DLLEXPORT void jl_dump_compiles_fallback(void *s) { diff --git a/src/codegen.cpp b/src/codegen.cpp index c938651059ca7..cdc5e00a26281 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -2222,7 +2222,8 @@ static void visitLine(jl_codectx_t &ctx, uint64_t *ptr, Value *addend, const cha static void coverageVisitLine(jl_codectx_t &ctx, StringRef filename, int line) { - assert(!ctx.emission_context.imaging); + if (ctx.emission_context.imaging) + return; // TODO if (filename == "" || filename == "none" || filename == "no file" || filename == "" || line < 0) return; visitLine(ctx, jl_coverage_data_pointer(filename, line), ConstantInt::get(getInt64Ty(ctx.builder.getContext()), 1), "lcnt"); @@ -2232,7 +2233,8 @@ static void coverageVisitLine(jl_codectx_t &ctx, StringRef filename, int line) static void mallocVisitLine(jl_codectx_t &ctx, StringRef filename, int line, Value *sync) { - assert(!ctx.emission_context.imaging); + if (ctx.emission_context.imaging) + return; // TODO if (filename == "" || filename == "none" || filename == "no file" || filename == "" || line < 0) return; Value *addend = sync @@ -4021,6 +4023,8 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const std::string name; StringRef protoname; bool need_to_emit = true; + // TODO: We should check if the code is available externally + // and then emit a trampoline. if (ctx.use_cache) { // optimization: emit the correct name immediately, if we know it // TODO: use `emitted` map here too to try to consolidate names? @@ -6786,7 +6790,7 @@ static jl_llvm_functions_t }(); std::string wrapName; - raw_string_ostream(wrapName) << "jfptr_" << unadorned_name << "_" << globalUniqueGeneratedNames++; + raw_string_ostream(wrapName) << "jfptr_" << unadorned_name << "_" << globalUniqueGeneratedNames++; declarations.functionObject = wrapName; (void)gen_invoke_wrapper(lam, jlrettype, returninfo, retarg, declarations.functionObject, M, ctx.emission_context); // TODO: add attributes: maybe_mark_argument_dereferenceable(Arg, argType) @@ -8261,6 +8265,10 @@ void jl_compile_workqueue( StringRef preal_decl = ""; bool preal_specsig = false; auto invoke = jl_atomic_load_relaxed(&codeinst->invoke); + // TODO: available_extern + // We need to emit a trampoline that loads the target address in an extern_module from a GV + // Right now we will unecessarily emit a function we have already compiled in a native module + // again in a calling module. if (params.cache && invoke != NULL) { auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr); if (invoke == jl_fptr_args_addr) { diff --git a/src/datatype.c b/src/datatype.c index 0dcae8a6dec98..24b3c3ab6c1fb 100644 --- a/src/datatype.c +++ b/src/datatype.c @@ -72,7 +72,7 @@ JL_DLLEXPORT jl_typename_t *jl_new_typename_in(jl_sym_t *name, jl_module_t *modu jl_atomic_store_relaxed(&tn->cache, jl_emptysvec); jl_atomic_store_relaxed(&tn->linearcache, jl_emptysvec); tn->names = NULL; - tn->hash = bitmix(bitmix(module ? module->build_id : 0, name->hash), 0xa1ada1da); + tn->hash = bitmix(bitmix(module ? module->build_id.lo : 0, name->hash), 0xa1ada1da); tn->_reserved = 0; tn->abstract = abstract; tn->mutabl = mutabl; diff --git a/src/dlload.c b/src/dlload.c index 57310c18b0e46..dd5d75da31a34 100644 --- a/src/dlload.c +++ b/src/dlload.c @@ -73,7 +73,7 @@ const char *jl_crtdll_name = CRTDLL_BASENAME ".dll"; #define JL_RTLD(flags, FLAG) (flags & JL_RTLD_ ## FLAG ? RTLD_ ## FLAG : 0) #ifdef _OS_WINDOWS_ -static void win32_formatmessage(DWORD code, char *reason, int len) JL_NOTSAFEPOINT +void win32_formatmessage(DWORD code, char *reason, int len) JL_NOTSAFEPOINT { DWORD res; LPWSTR errmsg; diff --git a/src/dump.c b/src/dump.c deleted file mode 100644 index 96c875c4ec7f5..0000000000000 --- a/src/dump.c +++ /dev/null @@ -1,3577 +0,0 @@ -// This file is a part of Julia. License is MIT: https://julialang.org/license - -/* - saving and restoring precompiled modules (.ji files) -*/ -#include -#include - -#include "julia.h" -#include "julia_internal.h" -#include "julia_gcext.h" -#include "builtin_proto.h" -#include "serialize.h" - -#ifndef _OS_WINDOWS_ -#include -#endif - -#include "valgrind.h" -#include "julia_assert.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// This file, together with ircode.c, allows (de)serialization between -// modules and *.ji cache files. `jl_save_incremental` gets called as the final step -// during package precompilation, and `_jl_restore_incremental` by `using SomePkg` -// whenever `SomePkg` has not yet been loaded. - -// Types, methods, and method instances form a graph that may have cycles, so -// serialization has to break these cycles. This is handled via "backreferences," -// referring to already (de)serialized items by an index. It is critial to ensure -// that the indexes of these backreferences align precisely during serialization -// and deserialization, to ensure that these integer indexes mean the same thing -// under both circumstances. Consequently, if you are modifying this file, be -// careful to match the sequence, if necessary reserving space for something that will -// be updated later. - -// It is also necessary to save & restore references to externally-defined -// objects, e.g., for package methods that call methods defined in Base or -// elsewhere. Consequently during deserialization there's a distinction between -// "reference" types, methods, and method instances (essentially like a -// GlobalRef), and "recached" version that refer to the actual entity in the -// running session. As a concrete example, types have a module in which they are -// defined, but once defined those types can be used by any dependent package. -// We don't store the full type definition again in that dependent package, we -// just encode a reference to that type. In the running session, such references -// are merely pointers to the type-cache, but the specific address is obviously -// not likely to be reproducible across sessions (it will differ between the -// session in which you precompile and the session in which you're using the -// package). Hence, during serialization we recode them as "verbose" references -// (that follow Julia syntax to allow them to be reconstructed), but on -// deserialization we have to replace those verbose references with the -// appropriate pointer in the user's running session. We complete -// deserialization before beginning the process of recaching, because we need -// the backreferences during deserialization and the actual objects during -// recaching. - -// Finally, because our backedge graph is not bidirectional, special handling is -// required to identify backedges from external methods that call internal methods. -// These get set aside and restored at the end of deserialization. - -// In broad terms, the major steps in serialization are: -// - starting from a "worklist" of modules, write the header. This stores things -// like the Julia build this was precompiled for, the package dependencies, -// the list of include files, file modification times, etc. -// - gather the collection of items to be written to this precompile file. This -// includes accessible from the module's binding table (if they are owned by a -// worklist module), but also includes things like methods added to external -// functions, instances of external methods that were newly type-inferred -// while precompiling a worklist module, and backedges of callees that were -// called by methods in this package. By and large, these latter items are not -// referenced by the module(s) in the package, and so these have to be -// extracted by traversing the entire system searching for things that do link -// back to a module in the worklist. -// - serialize all the items. The first time we encounter an item, we serialized -// it, and on future references (pointers) to that item we replace them with -// with a backreference. `jl_serialize_*` functions handle this work. -// - write source text for the files that defined the package. This is primarily -// to support Revise.jl. - -// Deserialization is the mirror image of serialization, but in some ways is -// trickier: -// - we have to merge items into the running session (recaching as described -// above) and handle cases like having two dependent packages caching the same -// MethodInstance of a dependency -// - we have to check for invalidation---the user might have loaded other -// packages that define methods that supersede some of the dispatches chosen -// when the package was precompiled, or this package might define methods that -// supersede dispatches for previously-loaded packages. These two -// possibilities are checked during backedge and method insertion, -// respectively. -// Both of these mean that deserialization requires one to look up a lot of -// things in the running session; for example, for invalidation checks we have -// to do type-intersection between signatures used for MethodInstances and the -// current session's full MethodTable. In practice, such steps dominate package -// loading time (it has very little to do with I/O or deserialization -// performance). Paradoxically, sometimes storing more code in a package can -// lead to faster performance: references to things in the same .ji file can be -// precomputed, but external references have to be looked up. You can see this -// effect in the benchmarks for #43990, where storing external MethodInstances -// and CodeInstances (more code than was stored previously) actually decreased -// load times for many packages. - -// Note that one should prioritize deserialization performance over serialization performance, -// since deserialization may be performed much more often than serialization. -// Certain items are preprocessed during serialization to save work when they are -// later deserialized. - - -// TODO: put WeakRefs on the weak_refs list during deserialization -// TODO: handle finalizers - -// type => tag hash for a few core types (e.g., Expr, PhiNode, etc) -static htable_t ser_tag; -// tag => type mapping, the reverse of ser_tag -static jl_value_t *deser_tag[256]; -// hash of some common symbols, encoded as CommonSym_tag plus 1 byte -static htable_t common_symbol_tag; -static jl_value_t *deser_symbols[256]; - -// table of all objects that have been deserialized, indexed by pos -// (the order in the serializer stream). the low -// bit is reserved for flagging certain entries and pos is -// left shift by 1 -static htable_t backref_table; // pos = backref_table[obj] -static int backref_table_numel; -static arraylist_t backref_list; // obj = backref_list[pos] - -// set of all CodeInstances yet to be (in)validated -static htable_t new_code_instance_validate; - -// list of (jl_value_t **loc, size_t pos) entries -// for anything that was flagged by the deserializer for later -// type-rewriting of some sort. pos is the index in backref_list. -static arraylist_t flagref_list; -// ref => value hash for looking up the "real" entity from -// the deserialized ref. Used for entities that must be unique, -// like types, methods, and method instances -static htable_t uniquing_table; - -// list of (size_t pos, itemkey) entries -// for the serializer to mark values in need of rework -// during deserialization later -// This includes items that need rehashing (IdDict, TypeMapLevels) -// and modules. -static arraylist_t reinit_list; - -// list of modules being serialized -// This is not quite globally rooted, but we take care to only -// ever assigned rooted values here. -static jl_array_t *serializer_worklist JL_GLOBALLY_ROOTED; -// The set of external MethodInstances we want to serialize -// (methods owned by other modules that were first inferred for a -// module currently being serialized) -static htable_t external_mis; -// Inference tracks newly-inferred MethodInstances during precompilation -// and registers them by calling jl_set_newly_inferred -static jl_array_t *newly_inferred JL_GLOBALLY_ROOTED; -// Mutex for newly_inferred -static jl_mutex_t newly_inferred_mutex; - -// New roots to add to Methods. These can't be added until after -// recaching is complete, so we have to hold on to them separately -// Stored as method => (worklist_key, newroots) -// The worklist_key is the uuid of the module that triggered addition -// of `newroots`. This is needed because CodeInstances reference -// their roots by "index", and we use a bipartite index -// (module_uuid, integer_index) to make indexes "relocatable" -// (meaning that users can load modules in different orders and -// so the absolute integer index of a root is not reproducible). -// See the "root blocks" section of method.c for more detail. -static htable_t queued_method_roots; - -// inverse of backedges graph (caller=>callees hash) -jl_array_t *edges_map JL_GLOBALLY_ROOTED; // rooted for the duration of our uses of this - -// list of requested ccallable signatures -static arraylist_t ccallable_list; - -typedef struct { - ios_t *s; - jl_ptls_t ptls; - jl_array_t *loaded_modules_array; -} jl_serializer_state; - -static jl_value_t *jl_idtable_type = NULL; -static jl_typename_t *jl_idtable_typename = NULL; -static jl_value_t *jl_bigint_type = NULL; -static int gmp_limb_size = 0; - -static void write_float64(ios_t *s, double x) JL_NOTSAFEPOINT -{ - write_uint64(s, *((uint64_t*)&x)); -} - -void *jl_lookup_ser_tag(jl_value_t *v) -{ - return ptrhash_get(&ser_tag, v); -} - -void *jl_lookup_common_symbol(jl_value_t *v) -{ - return ptrhash_get(&common_symbol_tag, v); -} - -jl_value_t *jl_deser_tag(uint8_t tag) -{ - return deser_tag[tag]; -} - -jl_value_t *jl_deser_symbol(uint8_t tag) -{ - return deser_symbols[tag]; -} - -uint64_t jl_worklist_key(jl_array_t *worklist) -{ - assert(jl_is_array(worklist)); - size_t len = jl_array_len(worklist); - if (len > 0) { - jl_module_t *topmod = (jl_module_t*)jl_array_ptr_ref(worklist, len-1); - assert(jl_is_module(topmod)); - return topmod->build_id; - } - return 0; -} - -// --- serialize --- - -#define jl_serialize_value(s, v) jl_serialize_value_((s), (jl_value_t*)(v), 0) -static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_literal) JL_GC_DISABLED; - -static void jl_serialize_cnull(jl_serializer_state *s, jl_value_t *t) -{ - backref_table_numel++; - write_uint8(s->s, TAG_CNULL); - jl_serialize_value(s, t); -} - -static int module_in_worklist(jl_module_t *mod) JL_NOTSAFEPOINT -{ - int i, l = jl_array_len(serializer_worklist); - for (i = 0; i < l; i++) { - jl_module_t *workmod = (jl_module_t*)jl_array_ptr_ref(serializer_worklist, i); - if (jl_is_module(workmod) && jl_is_submodule(mod, workmod)) - return 1; - } - return 0; -} - -static int method_instance_in_queue(jl_method_instance_t *mi) -{ - return ptrhash_get(&external_mis, mi) != HT_NOTFOUND; -} - -// compute whether a type references something internal to worklist -// and thus could not have existed before deserialize -// and thus does not need delayed unique-ing -static int type_in_worklist(jl_datatype_t *dt) JL_NOTSAFEPOINT -{ - if (module_in_worklist(dt->name->module)) - return 1; - int i, l = jl_svec_len(dt->parameters); - for (i = 0; i < l; i++) { - jl_value_t *p = jl_unwrap_unionall(jl_tparam(dt, i)); - // TODO: what about Union and TypeVar?? - if (type_in_worklist((jl_datatype_t*)(jl_is_datatype(p) ? p : jl_typeof(p)))) - return 1; - } - return 0; -} - -static int type_recursively_external(jl_datatype_t *dt); - -static int type_parameter_recursively_external(jl_value_t *p0) JL_NOTSAFEPOINT -{ - if (!jl_is_concrete_type(p0)) - return 0; - jl_datatype_t *p = (jl_datatype_t*)p0; - //while (jl_is_unionall(p)) { - // if (!type_parameter_recursively_external(((jl_unionall_t*)p)->var->lb)) - // return 0; - // if (!type_parameter_recursively_external(((jl_unionall_t*)p)->var->ub)) - // return 0; - // p = (jl_datatype_t*)((jl_unionall_t*)p)->body; - //} - if (module_in_worklist(p->name->module)) - return 0; - if (p->name->wrapper != (jl_value_t*)p0) { - if (!type_recursively_external(p)) - return 0; - } - return 1; -} - -// returns true if all of the parameters are tag 6 or 7 -static int type_recursively_external(jl_datatype_t *dt) JL_NOTSAFEPOINT -{ - if (!dt->isconcretetype) - return 0; - if (jl_svec_len(dt->parameters) == 0) - return 1; - - int i, l = jl_svec_len(dt->parameters); - for (i = 0; i < l; i++) { - if (!type_parameter_recursively_external(jl_tparam(dt, i))) - return 0; - } - return 1; -} - -static void mark_backedges_in_worklist(jl_method_instance_t *mi, htable_t *visited, int found) -{ - int oldfound = (char*)ptrhash_get(visited, mi) - (char*)HT_NOTFOUND; - if (oldfound < 3) - return; // not in-progress - ptrhash_put(visited, mi, (void*)((char*)HT_NOTFOUND + 1 + found)); -#ifndef NDEBUG - jl_module_t *mod = mi->def.module; - if (jl_is_method(mod)) - mod = ((jl_method_t*)mod)->module; - assert(jl_is_module(mod)); - assert(!mi->precompiled && !module_in_worklist(mod)); - assert(mi->backedges); -#endif - size_t i = 0, n = jl_array_len(mi->backedges); - while (i < n) { - jl_method_instance_t *be; - i = get_next_edge(mi->backedges, i, NULL, &be); - mark_backedges_in_worklist(be, visited, found); - } -} - -// When we infer external method instances, ensure they link back to the -// package. Otherwise they might be, e.g., for external macros -static int has_backedge_to_worklist(jl_method_instance_t *mi, htable_t *visited, int depth) -{ - jl_module_t *mod = mi->def.module; - if (jl_is_method(mod)) - mod = ((jl_method_t*)mod)->module; - assert(jl_is_module(mod)); - if (mi->precompiled || module_in_worklist(mod)) { - return 1; - } - if (!mi->backedges) { - return 0; - } - void **bp = ptrhash_bp(visited, mi); - // HT_NOTFOUND: not yet analyzed - // HT_NOTFOUND + 1: no link back - // HT_NOTFOUND + 2: does link back - // HT_NOTFOUND + 3 + depth: in-progress - int found = (char*)*bp - (char*)HT_NOTFOUND; - if (found) - return found - 1; - *bp = (void*)((char*)HT_NOTFOUND + 3 + depth); // preliminarily mark as in-progress - size_t i = 0, n = jl_array_len(mi->backedges); - int cycle = 0; - while (i < n) { - jl_method_instance_t *be; - i = get_next_edge(mi->backedges, i, NULL, &be); - int child_found = has_backedge_to_worklist(be, visited, depth + 1); - if (child_found == 1) { - found = 1; - break; - } - else if (child_found >= 2 && child_found - 2 < cycle) { - // record the cycle will resolve at depth "cycle" - cycle = child_found - 2; - assert(cycle); - } - } - if (!found && cycle && cycle != depth) - return cycle + 2; - bp = ptrhash_bp(visited, mi); // re-acquire since rehashing might change the location - *bp = (void*)((char*)HT_NOTFOUND + 1 + found); - if (cycle) { - // If we are the top of the current cycle, now mark all other parts of - // our cycle by re-walking the backedges graph and marking all WIP - // items as found. - // Be careful to only re-walk as far as we had originally scanned above. - // Or if we found a backedge, also mark all of the other parts of the - // cycle as also having an backedge. - n = i; - i = 0; - while (i < n) { - jl_method_instance_t *be; - i = get_next_edge(mi->backedges, i, NULL, &be); - mark_backedges_in_worklist(be, visited, found); - } - } - return found; -} - -// given the list of MethodInstances that were inferred during the -// build, select those that are external and have at least one -// relocatable CodeInstance and are inferred to be called from the worklist -// or explicitly added by a precompile statement. -// Also prepares external_mis for method_instance_in_queue queries. -static jl_array_t *queue_external_mis(jl_array_t *list) -{ - if (list == NULL) - return NULL; - size_t i, n = 0; - htable_t visited; - assert(jl_is_array(list)); - size_t n0 = jl_array_len(list); - htable_new(&visited, n0); - for (i = 0; i < n0; i++) { - jl_method_instance_t *mi = (jl_method_instance_t*)jl_array_ptr_ref(list, i); - assert(jl_is_method_instance(mi)); - if (jl_is_method(mi->def.value)) { - jl_method_t *m = mi->def.method; - if (!module_in_worklist(m->module)) { - jl_code_instance_t *ci = mi->cache; - while (ci) { - if (ci->max_world == ~(size_t)0 && ci->relocatability && ci->inferred) - break; - ci = jl_atomic_load_relaxed(&ci->next); - } - if (ci && ptrhash_get(&external_mis, mi) == HT_NOTFOUND) { - int found = has_backedge_to_worklist(mi, &visited, 1); - assert(found == 0 || found == 1); - if (found == 1) { - ptrhash_put(&external_mis, mi, ci); - n++; - } - } - } - } - } - htable_free(&visited); - if (n == 0) - return NULL; - jl_array_t *mi_list = jl_alloc_vec_any(n); - n = 0; - for (size_t i = 0; i < external_mis.size; i += 2) { - void *ci = external_mis.table[i+1]; - if (ci != HT_NOTFOUND) { - jl_array_ptr_set(mi_list, n++, (jl_value_t*)ci); - } - } - assert(n == jl_array_len(mi_list)); - return mi_list; -} - -static void jl_serialize_datatype(jl_serializer_state *s, jl_datatype_t *dt) JL_GC_DISABLED -{ - int tag = 0; - int internal = module_in_worklist(dt->name->module); - if (!internal && jl_unwrap_unionall(dt->name->wrapper) == (jl_value_t*)dt) { - tag = 6; // external primary type - } - else if (jl_is_tuple_type(dt) ? !dt->isconcretetype : dt->hasfreetypevars) { - tag = 0; // normal struct - } - else if (internal) { - if (jl_unwrap_unionall(dt->name->wrapper) == (jl_value_t*)dt) // comes up often since functions create types - tag = 5; // internal, and not in the typename cache - else - tag = 10; // anything else that's internal (just may need recaching) - } - else if (type_recursively_external(dt)) { - tag = 7; // external type that can be immediately recreated (with apply_type) - } - else if (type_in_worklist(dt)) { - tag = 11; // external, but definitely new (still needs caching, but not full unique-ing) - } - else { - // this is eligible for (and possibly requires) unique-ing later, - // so flag this in the backref table as special - uintptr_t *bp = (uintptr_t*)ptrhash_bp(&backref_table, dt); - assert(*bp != (uintptr_t)HT_NOTFOUND); - *bp |= 1; - tag = 12; - } - - write_uint8(s->s, TAG_DATATYPE); - write_uint8(s->s, tag); - if (tag == 6 || tag == 7) { - // for tag==6, copy its typevars in case there are references to them elsewhere - jl_serialize_value(s, dt->name); - jl_serialize_value(s, dt->parameters); - return; - } - - int has_instance = (dt->instance != NULL); - int has_layout = (dt->layout != NULL); - write_uint8(s->s, has_layout | (has_instance << 1)); - write_uint8(s->s, dt->hasfreetypevars - | (dt->isconcretetype << 1) - | (dt->isdispatchtuple << 2) - | (dt->isbitstype << 3) - | (dt->zeroinit << 4) - | (dt->has_concrete_subtype << 5) - | (dt->cached_by_hash << 6) - | (dt->isprimitivetype << 7)); - write_int32(s->s, dt->hash); - - if (has_layout) { - uint8_t layout = 0; - if (dt->layout == ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_array_type))->layout) { - layout = 1; - } - else if (dt->layout == jl_nothing_type->layout) { - layout = 2; - } - else if (dt->layout == ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_pointer_type))->layout) { - layout = 3; - } - write_uint8(s->s, layout); - if (layout == 0) { - uint32_t nf = dt->layout->nfields; - uint32_t np = dt->layout->npointers; - size_t fieldsize = jl_fielddesc_size(dt->layout->fielddesc_type); - ios_write(s->s, (const char*)dt->layout, sizeof(*dt->layout)); - size_t fldsize = nf * fieldsize; - if (dt->layout->first_ptr != -1) - fldsize += np << dt->layout->fielddesc_type; - ios_write(s->s, (const char*)(dt->layout + 1), fldsize); - } - } - - if (has_instance) - jl_serialize_value(s, dt->instance); - jl_serialize_value(s, dt->name); - jl_serialize_value(s, dt->parameters); - jl_serialize_value(s, dt->super); - jl_serialize_value(s, dt->types); -} - -static void jl_serialize_module(jl_serializer_state *s, jl_module_t *m) -{ - write_uint8(s->s, TAG_MODULE); - jl_serialize_value(s, m->name); - size_t i; - if (!module_in_worklist(m)) { - if (m == m->parent) { - // top-level module - write_int8(s->s, 2); - int j = 0; - for (i = 0; i < jl_array_len(s->loaded_modules_array); i++) { - jl_module_t *mi = (jl_module_t*)jl_array_ptr_ref(s->loaded_modules_array, i); - if (!module_in_worklist(mi)) { - if (m == mi) { - write_int32(s->s, j); - return; - } - j++; - } - } - assert(0 && "top level module not found in modules array"); - } - else { - write_int8(s->s, 1); - jl_serialize_value(s, m->parent); - } - return; - } - write_int8(s->s, 0); - jl_serialize_value(s, m->parent); - void **table = m->bindings.table; - for (i = 0; i < m->bindings.size; i += 2) { - if (table[i+1] != HT_NOTFOUND) { - jl_serialize_value(s, (jl_value_t*)table[i]); - jl_binding_t *b = (jl_binding_t*)table[i+1]; - jl_serialize_value(s, b->name); - jl_value_t *e = jl_atomic_load_relaxed(&b->value); - if (!b->constp && e && jl_is_cpointer(e) && jl_unbox_voidpointer(e) != (void*)-1 && jl_unbox_voidpointer(e) != NULL) - // reset Ptr fields to C_NULL (but keep MAP_FAILED / INVALID_HANDLE) - jl_serialize_cnull(s, jl_typeof(e)); - else - jl_serialize_value(s, e); - jl_serialize_value(s, jl_atomic_load_relaxed(&b->globalref)); - jl_serialize_value(s, b->owner); - jl_serialize_value(s, jl_atomic_load_relaxed(&b->ty)); - write_int8(s->s, (b->deprecated<<3) | (b->constp<<2) | (b->exportp<<1) | (b->imported)); - } - } - jl_serialize_value(s, NULL); - write_int32(s->s, m->usings.len); - for(i=0; i < m->usings.len; i++) { - jl_serialize_value(s, (jl_value_t*)m->usings.items[i]); - } - write_uint8(s->s, m->istopmod); - write_uint64(s->s, m->uuid.hi); - write_uint64(s->s, m->uuid.lo); - write_uint64(s->s, m->build_id); - write_int32(s->s, m->counter); - write_int32(s->s, m->nospecialize); - write_uint8(s->s, m->optlevel); - write_uint8(s->s, m->compile); - write_uint8(s->s, m->infer); - write_uint8(s->s, m->max_methods); -} - -static int jl_serialize_generic(jl_serializer_state *s, jl_value_t *v) JL_GC_DISABLED -{ - if (v == NULL) { - write_uint8(s->s, TAG_NULL); - return 1; - } - - void *tag = ptrhash_get(&ser_tag, v); - if (tag != HT_NOTFOUND) { - uint8_t t8 = (intptr_t)tag; - if (t8 <= LAST_TAG) - write_uint8(s->s, 0); - write_uint8(s->s, t8); - return 1; - } - - if (jl_is_symbol(v)) { - void *idx = ptrhash_get(&common_symbol_tag, v); - if (idx != HT_NOTFOUND) { - write_uint8(s->s, TAG_COMMONSYM); - write_uint8(s->s, (uint8_t)(size_t)idx); - return 1; - } - } - else if (v == (jl_value_t*)jl_core_module) { - write_uint8(s->s, TAG_CORE); - return 1; - } - else if (v == (jl_value_t*)jl_base_module) { - write_uint8(s->s, TAG_BASE); - return 1; - } - - if (jl_typeis(v, jl_string_type) && jl_string_len(v) == 0) { - jl_serialize_value(s, jl_an_empty_string); - return 1; - } - else if (!jl_is_uint8(v)) { - void **bp = ptrhash_bp(&backref_table, v); - if (*bp != HT_NOTFOUND) { - uintptr_t pos = (char*)*bp - (char*)HT_NOTFOUND - 1; - if (pos < 65536) { - write_uint8(s->s, TAG_SHORT_BACKREF); - write_uint16(s->s, pos); - } - else { - write_uint8(s->s, TAG_BACKREF); - write_int32(s->s, pos); - } - return 1; - } - intptr_t pos = backref_table_numel++; - if (((jl_datatype_t*)(jl_typeof(v)))->name == jl_idtable_typename) { - // will need to rehash this, later (after types are fully constructed) - arraylist_push(&reinit_list, (void*)pos); - arraylist_push(&reinit_list, (void*)1); - } - if (jl_is_module(v)) { - jl_module_t *m = (jl_module_t*)v; - if (module_in_worklist(m) && !module_in_worklist(m->parent)) { - // will need to reinsert this into parent bindings, later (in case of any errors during reinsert) - arraylist_push(&reinit_list, (void*)pos); - arraylist_push(&reinit_list, (void*)2); - } - } - // TypeMapLevels need to be rehashed - if (jl_is_mtable(v)) { - arraylist_push(&reinit_list, (void*)pos); - arraylist_push(&reinit_list, (void*)3); - } - pos <<= 1; - ptrhash_put(&backref_table, v, (char*)HT_NOTFOUND + pos + 1); - } - - return 0; -} - -static void jl_serialize_code_instance(jl_serializer_state *s, jl_code_instance_t *codeinst, - int skip_partial_opaque, int force) JL_GC_DISABLED -{ - if (!force && jl_serialize_generic(s, (jl_value_t*)codeinst)) { - return; - } - assert(codeinst != NULL); // handle by jl_serialize_generic, but this makes clang-sa happy - - int validate = 0; - if (codeinst->max_world == ~(size_t)0 && codeinst->inferred) - // TODO: also check if this object is part of the codeinst cache and in edges_map - validate = 1; // can check on deserialize if this cache entry is still valid - int flags = validate << 0; - if (codeinst->invoke == jl_fptr_const_return) - flags |= 1 << 2; - if (codeinst->precompile) - flags |= 1 << 3; - - // CodeInstances with PartialOpaque return type are currently not allowed - // to be cached. We skip them in serialization here, forcing them to - // be re-infered on reload. - int write_ret_type = validate || codeinst->min_world == 0; - if (write_ret_type && codeinst->rettype_const && - jl_typeis(codeinst->rettype_const, jl_partial_opaque_type)) { - if (skip_partial_opaque) { - jl_serialize_code_instance(s, codeinst->next, skip_partial_opaque, 0); - return; - } - else { - jl_error("Cannot serialize CodeInstance with PartialOpaque rettype"); - } - } - - write_uint8(s->s, TAG_CODE_INSTANCE); - write_uint8(s->s, flags); - write_uint32(s->s, codeinst->ipo_purity_bits); - write_uint32(s->s, jl_atomic_load_relaxed(&codeinst->purity_bits)); - jl_serialize_value(s, (jl_value_t*)codeinst->def); - if (write_ret_type) { - jl_serialize_value(s, jl_atomic_load_relaxed(&codeinst->inferred)); - jl_serialize_value(s, codeinst->rettype_const); - jl_serialize_value(s, codeinst->rettype); - jl_serialize_value(s, codeinst->argescapes); - } - else { - // skip storing useless data - jl_serialize_value(s, NULL); - jl_serialize_value(s, NULL); - jl_serialize_value(s, jl_any_type); - jl_serialize_value(s, jl_nothing); - } - write_uint8(s->s, codeinst->relocatability); - jl_serialize_code_instance(s, codeinst->next, skip_partial_opaque, 0); -} - -enum METHOD_SERIALIZATION_MODE { - METHOD_INTERNAL = 1, - METHOD_EXTERNAL_MT = 2, - METHOD_HAS_NEW_ROOTS = 4, -}; - -static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_literal) JL_GC_DISABLED -{ - if (jl_serialize_generic(s, v)) { - return; - } - - size_t i; - if (jl_is_svec(v)) { - size_t l = jl_svec_len(v); - if (l <= 255) { - write_uint8(s->s, TAG_SVEC); - write_uint8(s->s, (uint8_t)l); - } - else { - write_uint8(s->s, TAG_LONG_SVEC); - write_int32(s->s, l); - } - for (i = 0; i < l; i++) { - jl_serialize_value(s, jl_svecref(v, i)); - } - } - else if (jl_is_symbol(v)) { - size_t l = strlen(jl_symbol_name((jl_sym_t*)v)); - if (l <= 255) { - write_uint8(s->s, TAG_SYMBOL); - write_uint8(s->s, (uint8_t)l); - } - else { - write_uint8(s->s, TAG_LONG_SYMBOL); - write_int32(s->s, l); - } - ios_write(s->s, jl_symbol_name((jl_sym_t*)v), l); - } - else if (jl_is_array(v)) { - jl_array_t *ar = (jl_array_t*)v; - jl_value_t *et = jl_tparam0(jl_typeof(ar)); - int isunion = jl_is_uniontype(et); - if (ar->flags.ndims == 1 && ar->elsize <= 0x1f) { - write_uint8(s->s, TAG_ARRAY1D); - write_uint8(s->s, (ar->flags.ptrarray << 7) | (ar->flags.hasptr << 6) | (isunion << 5) | (ar->elsize & 0x1f)); - } - else { - write_uint8(s->s, TAG_ARRAY); - write_uint16(s->s, ar->flags.ndims); - write_uint16(s->s, (ar->flags.ptrarray << 15) | (ar->flags.hasptr << 14) | (isunion << 13) | (ar->elsize & 0x1fff)); - } - for (i = 0; i < ar->flags.ndims; i++) - jl_serialize_value(s, jl_box_long(jl_array_dim(ar,i))); - jl_serialize_value(s, jl_typeof(ar)); - size_t l = jl_array_len(ar); - if (ar->flags.ptrarray) { - for (i = 0; i < l; i++) { - jl_value_t *e = jl_array_ptr_ref(v, i); - if (e && jl_is_cpointer(e) && jl_unbox_voidpointer(e) != (void*)-1 && jl_unbox_voidpointer(e) != NULL) - // reset Ptr elements to C_NULL (but keep MAP_FAILED / INVALID_HANDLE) - jl_serialize_cnull(s, jl_typeof(e)); - else - jl_serialize_value(s, e); - } - } - else if (ar->flags.hasptr) { - const char *data = (const char*)jl_array_data(ar); - uint16_t elsz = ar->elsize; - size_t j, np = ((jl_datatype_t*)et)->layout->npointers; - for (i = 0; i < l; i++) { - const char *start = data; - for (j = 0; j < np; j++) { - uint32_t ptr = jl_ptr_offset((jl_datatype_t*)et, j); - const jl_value_t *const *fld = &((const jl_value_t *const *)data)[ptr]; - if ((const char*)fld != start) - ios_write(s->s, start, (const char*)fld - start); - JL_GC_PROMISE_ROOTED(*fld); - jl_serialize_value(s, *fld); - start = (const char*)&fld[1]; - } - data += elsz; - if (data != start) - ios_write(s->s, start, data - start); - } - } - else if (jl_is_cpointer_type(et)) { - // reset Ptr elements to C_NULL - const void **data = (const void**)jl_array_data(ar); - for (i = 0; i < l; i++) { - const void *e = data[i]; - if (e != (void*)-1) - e = NULL; - ios_write(s->s, (const char*)&e, sizeof(e)); - } - } - else { - ios_write(s->s, (char*)jl_array_data(ar), l * ar->elsize); - if (jl_array_isbitsunion(ar)) - ios_write(s->s, jl_array_typetagdata(ar), l); - } - } - else if (jl_is_datatype(v)) { - jl_serialize_datatype(s, (jl_datatype_t*)v); - } - else if (jl_is_unionall(v)) { - write_uint8(s->s, TAG_UNIONALL); - jl_datatype_t *d = (jl_datatype_t*)jl_unwrap_unionall(v); - if (jl_is_datatype(d) && d->name->wrapper == v && - !module_in_worklist(d->name->module)) { - write_uint8(s->s, 1); - jl_serialize_value(s, d->name->module); - jl_serialize_value(s, d->name->name); - } - else { - write_uint8(s->s, 0); - jl_serialize_value(s, ((jl_unionall_t*)v)->var); - jl_serialize_value(s, ((jl_unionall_t*)v)->body); - } - } - else if (jl_is_typevar(v)) { - write_uint8(s->s, TAG_TVAR); - jl_serialize_value(s, ((jl_tvar_t*)v)->name); - jl_serialize_value(s, ((jl_tvar_t*)v)->lb); - jl_serialize_value(s, ((jl_tvar_t*)v)->ub); - } - else if (jl_is_method(v)) { - write_uint8(s->s, TAG_METHOD); - jl_method_t *m = (jl_method_t*)v; - uint64_t key = 0; - int serialization_mode = 0, nwithkey = 0; - if (m->is_for_opaque_closure || module_in_worklist(m->module)) - serialization_mode |= METHOD_INTERNAL; - if (!(serialization_mode & METHOD_INTERNAL)) { - key = jl_worklist_key(serializer_worklist); - nwithkey = nroots_with_key(m, key); - if (nwithkey > 0) - serialization_mode |= METHOD_HAS_NEW_ROOTS; - } - if (!(serialization_mode & METHOD_INTERNAL)) { - // flag this in the backref table as special - uintptr_t *bp = (uintptr_t*)ptrhash_bp(&backref_table, v); - assert(*bp != (uintptr_t)HT_NOTFOUND); - *bp |= 1; - } - jl_serialize_value(s, (jl_value_t*)m->sig); - jl_serialize_value(s, (jl_value_t*)m->module); - if (m->external_mt != NULL) { - assert(jl_typeis(m->external_mt, jl_methtable_type)); - jl_methtable_t *mt = (jl_methtable_t*)m->external_mt; - if (!module_in_worklist(mt->module)) { - serialization_mode |= METHOD_EXTERNAL_MT; - } - } - write_uint8(s->s, serialization_mode); - if (serialization_mode & METHOD_EXTERNAL_MT) { - // We reference this method table by module and binding - jl_methtable_t *mt = (jl_methtable_t*)m->external_mt; - jl_serialize_value(s, mt->module); - jl_serialize_value(s, mt->name); - } - else { - jl_serialize_value(s, (jl_value_t*)m->external_mt); - } - if (!(serialization_mode & METHOD_INTERNAL)) { - if (serialization_mode & METHOD_HAS_NEW_ROOTS) { - // Serialize the roots that belong to key - write_uint64(s->s, key); - write_int32(s->s, nwithkey); - rle_iter_state rootiter = rle_iter_init(0); - uint64_t *rletable = NULL; - size_t nblocks2 = 0, nroots = jl_array_len(m->roots); - if (m->root_blocks) { - rletable = (uint64_t*)jl_array_data(m->root_blocks); - nblocks2 = jl_array_len(m->root_blocks); - } - // this visits every item, if it becomes a bottleneck we could hop blocks - while (rle_iter_increment(&rootiter, nroots, rletable, nblocks2)) - if (rootiter.key == key) - jl_serialize_value(s, jl_array_ptr_ref(m->roots, rootiter.i)); - } - return; - } - jl_serialize_value(s, m->specializations); - jl_serialize_value(s, jl_atomic_load_relaxed(&m->speckeyset)); - jl_serialize_value(s, (jl_value_t*)m->name); - jl_serialize_value(s, (jl_value_t*)m->file); - write_int32(s->s, m->line); - write_int32(s->s, m->called); - write_int32(s->s, m->nargs); - write_int32(s->s, m->nospecialize); - write_int32(s->s, m->nkw); - write_int8(s->s, m->isva); - write_int8(s->s, m->pure); - write_int8(s->s, m->is_for_opaque_closure); - write_int8(s->s, m->constprop); - write_uint8(s->s, m->purity.bits); - jl_serialize_value(s, (jl_value_t*)m->slot_syms); - jl_serialize_value(s, (jl_value_t*)m->roots); - jl_serialize_value(s, (jl_value_t*)m->root_blocks); - write_int32(s->s, m->nroots_sysimg); - jl_serialize_value(s, (jl_value_t*)m->ccallable); - jl_serialize_value(s, (jl_value_t*)m->source); - jl_serialize_value(s, (jl_value_t*)m->unspecialized); - jl_serialize_value(s, (jl_value_t*)m->generator); - jl_serialize_value(s, (jl_value_t*)m->invokes); - jl_serialize_value(s, (jl_value_t*)m->recursion_relation); - } - else if (jl_is_method_instance(v)) { - jl_method_instance_t *mi = (jl_method_instance_t*)v; - if (jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure) { - jl_error("unimplemented: serialization of MethodInstances for OpaqueClosure"); - } - write_uint8(s->s, TAG_METHOD_INSTANCE); - int internal = 0; - if (!jl_is_method(mi->def.method)) - internal = 1; - else if (module_in_worklist(mi->def.method->module)) - internal = 2; - write_uint8(s->s, internal); - if (!internal) { - // also flag this in the backref table as special - uintptr_t *bp = (uintptr_t*)ptrhash_bp(&backref_table, v); - assert(*bp != (uintptr_t)HT_NOTFOUND); - *bp |= 1; - } - if (internal == 1) - jl_serialize_value(s, (jl_value_t*)mi->uninferred); - jl_serialize_value(s, (jl_value_t*)mi->specTypes); - jl_serialize_value(s, mi->def.value); - if (!internal) - return; - jl_serialize_value(s, (jl_value_t*)mi->sparam_vals); - jl_array_t *backedges = mi->backedges; - if (backedges) { - // filter backedges to only contain pointers - // to items that we will actually store (internal >= 2) - size_t ins = 0, i = 0, l = jl_array_len(backedges); - jl_value_t **b_edges = (jl_value_t**)jl_array_data(backedges); - jl_value_t *invokeTypes; - jl_method_instance_t *backedge; - while (i < l) { - i = get_next_edge(backedges, i, &invokeTypes, &backedge); - if (module_in_worklist(backedge->def.method->module) || method_instance_in_queue(backedge)) { - if (invokeTypes) - b_edges[ins++] = invokeTypes; - b_edges[ins++] = (jl_value_t*)backedge; - } - } - if (ins != l) - jl_array_del_end(backedges, l - ins); - if (ins == 0) - backedges = NULL; - } - jl_serialize_value(s, (jl_value_t*)backedges); - jl_serialize_value(s, (jl_value_t*)NULL); //callbacks - jl_serialize_code_instance(s, mi->cache, 1, 0); - } - else if (jl_is_code_instance(v)) { - jl_serialize_code_instance(s, (jl_code_instance_t*)v, 0, 1); - } - else if (jl_typeis(v, jl_module_type)) { - jl_serialize_module(s, (jl_module_t*)v); - } - else if (jl_typeis(v, jl_task_type)) { - jl_error("Task cannot be serialized"); - } - else if (jl_typeis(v, jl_opaque_closure_type)) { - jl_error("Live opaque closures cannot be serialized"); - } - else if (jl_typeis(v, jl_string_type)) { - write_uint8(s->s, TAG_STRING); - write_int32(s->s, jl_string_len(v)); - ios_write(s->s, jl_string_data(v), jl_string_len(v)); - } - else if (jl_typeis(v, jl_int64_type)) { - void *data = jl_data_ptr(v); - if (*(int64_t*)data >= INT16_MIN && *(int64_t*)data <= INT16_MAX) { - write_uint8(s->s, TAG_SHORTER_INT64); - write_uint16(s->s, (uint16_t)*(int64_t*)data); - } - else if (*(int64_t*)data >= S32_MIN && *(int64_t*)data <= S32_MAX) { - write_uint8(s->s, TAG_SHORT_INT64); - write_int32(s->s, (int32_t)*(int64_t*)data); - } - else { - write_uint8(s->s, TAG_INT64); - write_uint64(s->s, *(int64_t*)data); - } - } - else if (jl_typeis(v, jl_int32_type)) { - void *data = jl_data_ptr(v); - if (*(int32_t*)data >= INT16_MIN && *(int32_t*)data <= INT16_MAX) { - write_uint8(s->s, TAG_SHORT_INT32); - write_uint16(s->s, (uint16_t)*(int32_t*)data); - } - else { - write_uint8(s->s, TAG_INT32); - write_int32(s->s, *(int32_t*)data); - } - } - else if (jl_typeis(v, jl_uint8_type)) { - write_uint8(s->s, TAG_UINT8); - write_int8(s->s, *(int8_t*)jl_data_ptr(v)); - } - else if (jl_is_cpointer(v) && jl_unbox_voidpointer(v) == NULL) { - write_uint8(s->s, TAG_CNULL); - jl_serialize_value(s, jl_typeof(v)); - return; - } - else if (jl_bigint_type && jl_typeis(v, jl_bigint_type)) { - write_uint8(s->s, TAG_SHORT_GENERAL); - write_uint8(s->s, jl_datatype_size(jl_bigint_type)); - jl_serialize_value(s, jl_bigint_type); - jl_value_t *sizefield = jl_get_nth_field(v, 1); - jl_serialize_value(s, sizefield); - void *data = jl_unbox_voidpointer(jl_get_nth_field(v, 2)); - int32_t sz = jl_unbox_int32(sizefield); - size_t nb = (sz == 0 ? 1 : (sz < 0 ? -sz : sz)) * gmp_limb_size; - ios_write(s->s, (char*)data, nb); - } - else { - jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v); - if (v == t->instance) { - if (!type_in_worklist(t)) { - // also flag this in the backref table as special - // if it might not be unique (is external) - uintptr_t *bp = (uintptr_t*)ptrhash_bp(&backref_table, v); - assert(*bp != (uintptr_t)HT_NOTFOUND); - *bp |= 1; - } - write_uint8(s->s, TAG_SINGLETON); - jl_serialize_value(s, t); - return; - } - assert(!t->instance && "detected singleton construction corruption"); - - if (t == jl_typename_type) { - void *bttag = ptrhash_get(&ser_tag, ((jl_typename_t*)t)->wrapper); - if (bttag != HT_NOTFOUND) { - write_uint8(s->s, TAG_BITYPENAME); - write_uint8(s->s, (uint8_t)(intptr_t)bttag); - return; - } - } - size_t tsz = jl_datatype_size(t); - if (tsz <= 255) { - write_uint8(s->s, TAG_SHORT_GENERAL); - write_uint8(s->s, tsz); - } - else { - write_uint8(s->s, TAG_GENERAL); - write_int32(s->s, tsz); - } - jl_serialize_value(s, t); - if (t == jl_typename_type) { - jl_typename_t *tn = (jl_typename_t*)v; - int internal = module_in_worklist(tn->module); - write_uint8(s->s, internal); - jl_serialize_value(s, tn->module); - jl_serialize_value(s, tn->name); - if (internal) { - jl_serialize_value(s, tn->names); - jl_serialize_value(s, tn->wrapper); - jl_serialize_value(s, tn->mt); - ios_write(s->s, (char*)&tn->hash, sizeof(tn->hash)); - write_uint8(s->s, tn->abstract | (tn->mutabl << 1) | (tn->mayinlinealloc << 2)); - write_uint8(s->s, tn->max_methods); - if (!tn->abstract) - write_uint16(s->s, tn->n_uninitialized); - size_t nb = tn->atomicfields ? (jl_svec_len(tn->names) + 31) / 32 * sizeof(uint32_t) : 0; - write_int32(s->s, nb); - if (nb) - ios_write(s->s, (char*)tn->atomicfields, nb); - nb = tn->constfields ? (jl_svec_len(tn->names) + 31) / 32 * sizeof(uint32_t) : 0; - write_int32(s->s, nb); - if (nb) - ios_write(s->s, (char*)tn->constfields, nb); - } - return; - } - - if (jl_is_foreign_type(t)) { - jl_error("Cannot serialize instances of foreign datatypes"); - } - - char *data = (char*)jl_data_ptr(v); - size_t i, j, np = t->layout->npointers; - uint32_t nf = t->layout->nfields; - char *last = data; - for (i = 0, j = 0; i < nf+1; i++) { - char *ptr = data + (i < nf ? jl_field_offset(t, i) : jl_datatype_size(t)); - if (j < np) { - char *prevptr = (char*)&((jl_value_t**)data)[jl_ptr_offset(t, j)]; - while (ptr > prevptr) { - // previous field contained pointers; write them and their interleaved data - if (prevptr > last) - ios_write(s->s, last, prevptr - last); - jl_value_t *e = *(jl_value_t**)prevptr; - JL_GC_PROMISE_ROOTED(e); - if (t->name->mutabl && e && jl_field_isptr(t, i - 1) && jl_is_cpointer(e) && - jl_unbox_voidpointer(e) != (void*)-1 && jl_unbox_voidpointer(e) != NULL) - // reset Ptr fields to C_NULL (but keep MAP_FAILED / INVALID_HANDLE) - jl_serialize_cnull(s, jl_typeof(e)); - else - jl_serialize_value(s, e); - last = prevptr + sizeof(jl_value_t*); - j++; - if (j < np) - prevptr = (char*)&((jl_value_t**)data)[jl_ptr_offset(t, j)]; - else - break; - } - } - if (i == nf) - break; - if (t->name->mutabl && jl_is_cpointer_type(jl_field_type(t, i)) && *(void**)ptr != (void*)-1) { - if (ptr > last) - ios_write(s->s, last, ptr - last); - char *n = NULL; - ios_write(s->s, (char*)&n, sizeof(n)); - last = ptr + sizeof(n); - } - } - char *ptr = data + jl_datatype_size(t); - if (ptr > last) - ios_write(s->s, last, ptr - last); - } -} - - -// Create the forward-edge map (caller => callees) -// the intent of these functions is to invert the backedges tree -// for anything that points to a method not part of the worklist -// -// from MethodTables -static void jl_collect_missing_backedges(jl_methtable_t *mt) -{ - jl_array_t *backedges = mt->backedges; - if (backedges) { - size_t i, l = jl_array_len(backedges); - for (i = 1; i < l; i += 2) { - jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(backedges, i); - jl_value_t *missing_callee = jl_array_ptr_ref(backedges, i - 1); // signature of abstract callee - jl_array_t *edges = (jl_array_t*)jl_eqtable_get(edges_map, (jl_value_t*)caller, NULL); - if (edges == NULL) { - edges = jl_alloc_vec_any(0); - JL_GC_PUSH1(&edges); - edges_map = jl_eqtable_put(edges_map, (jl_value_t*)caller, (jl_value_t*)edges, NULL); - JL_GC_POP(); - } - jl_array_ptr_1d_push(edges, NULL); - jl_array_ptr_1d_push(edges, missing_callee); - } - } -} - - -// from MethodInstances -static void collect_backedges(jl_method_instance_t *callee, int internal) JL_GC_DISABLED -{ - jl_array_t *backedges = callee->backedges; - if (backedges) { - size_t i = 0, l = jl_array_len(backedges); - while (i < l) { - jl_value_t *invokeTypes; - jl_method_instance_t *caller; - i = get_next_edge(backedges, i, &invokeTypes, &caller); - jl_array_t *edges = (jl_array_t*)jl_eqtable_get(edges_map, (jl_value_t*)caller, NULL); - if (edges == NULL) { - edges = jl_alloc_vec_any(0); - JL_GC_PUSH1(&edges); - edges_map = jl_eqtable_put(edges_map, (jl_value_t*)caller, (jl_value_t*)edges, NULL); - JL_GC_POP(); - } - jl_array_ptr_1d_push(edges, invokeTypes); - jl_array_ptr_1d_push(edges, (jl_value_t*)callee); - } - } -} - - -// For functions owned by modules not on the worklist, call this on each method. -// - if the method is owned by a worklist module, add it to the list of things to be -// fully serialized -// - Collect all backedges (may be needed later when we invert this list). -static int jl_collect_methcache_from_mod(jl_typemap_entry_t *ml, void *closure) JL_GC_DISABLED -{ - jl_array_t *s = (jl_array_t*)closure; - jl_method_t *m = ml->func.method; - if (s && module_in_worklist(m->module)) { - jl_array_ptr_1d_push(s, (jl_value_t*)m); - jl_array_ptr_1d_push(s, (jl_value_t*)ml->simplesig); - } - jl_svec_t *specializations = m->specializations; - size_t i, l = jl_svec_len(specializations); - for (i = 0; i < l; i++) { - jl_method_instance_t *callee = (jl_method_instance_t*)jl_svecref(specializations, i); - if ((jl_value_t*)callee != jl_nothing) - collect_backedges(callee, !s); - } - return 1; -} - -static void jl_collect_methtable_from_mod(jl_array_t *s, jl_methtable_t *mt) JL_GC_DISABLED -{ - jl_typemap_visitor(mt->defs, jl_collect_methcache_from_mod, (void*)s); -} - -// Collect methods of external functions defined by modules in the worklist -// "extext" = "extending external" -// Also collect relevant backedges -static void jl_collect_extext_methods_from_mod(jl_array_t *s, jl_module_t *m) JL_GC_DISABLED -{ - if (s && module_in_worklist(m)) - s = NULL; // do not collect any methods - size_t i; - void **table = m->bindings.table; - for (i = 1; i < m->bindings.size; i += 2) { - if (table[i] != HT_NOTFOUND) { - jl_binding_t *b = (jl_binding_t*)table[i]; - if (b->owner == m && b->value && b->constp) { - jl_value_t *bv = jl_unwrap_unionall(b->value); - if (jl_is_datatype(bv)) { - jl_typename_t *tn = ((jl_datatype_t*)bv)->name; - if (tn->module == m && tn->name == b->name && tn->wrapper == b->value) { - jl_methtable_t *mt = tn->mt; - if (mt != NULL && - (jl_value_t*)mt != jl_nothing && - (mt != jl_type_type_mt && mt != jl_nonfunction_mt)) { - assert(mt->module == tn->module); - jl_collect_methtable_from_mod(s, mt); - if (s) - jl_collect_missing_backedges(mt); - } - } - } - else if (jl_is_module(b->value)) { - jl_module_t *child = (jl_module_t*)b->value; - if (child != m && child->parent == m && child->name == b->name) { - // this is the original/primary binding for the submodule - jl_collect_extext_methods_from_mod(s, (jl_module_t*)b->value); - } - } - else if (jl_is_mtable(b->value)) { - jl_methtable_t *mt = (jl_methtable_t*)b->value; - if (mt->module == m && mt->name == b->name) { - // this is probably an external method table, so let's assume so - // as there is no way to precisely distinguish them, - // and the rest of this serializer does not bother - // to handle any method tables specially - jl_collect_methtable_from_mod(s, (jl_methtable_t*)bv); - } - } - } - } - } -} - -static void jl_record_edges(jl_method_instance_t *caller, arraylist_t *wq, jl_array_t *edges) JL_GC_DISABLED -{ - jl_array_t *callees = (jl_array_t*)jl_eqtable_pop(edges_map, (jl_value_t*)caller, NULL, NULL); - if (callees != NULL) { - jl_array_ptr_1d_push(edges, (jl_value_t*)caller); - jl_array_ptr_1d_push(edges, (jl_value_t*)callees); - size_t i, l = jl_array_len(callees); - for (i = 1; i < l; i += 2) { - jl_method_instance_t *c = (jl_method_instance_t*)jl_array_ptr_ref(callees, i); - if (c && jl_is_method_instance(c)) { - arraylist_push(wq, c); - } - } - } -} - - -// Extract `edges` and `ext_targets` from `edges_map` -// `edges` = [caller1, targets_indexes1, ...], the list of methods and their edges -// `ext_targets` is [invokesig1, callee1, matches1, ...], the edges for each target -static void jl_collect_edges(jl_array_t *edges, jl_array_t *ext_targets) -{ - size_t world = jl_atomic_load_acquire(&jl_world_counter); - arraylist_t wq; - arraylist_new(&wq, 0); - void **table = (void**)jl_array_data(edges_map); // edges is caller => callees - size_t table_size = jl_array_len(edges_map); - for (size_t i = 0; i < table_size; i += 2) { - assert(table == jl_array_data(edges_map) && table_size == jl_array_len(edges_map) && - "edges_map changed during iteration"); - jl_method_instance_t *caller = (jl_method_instance_t*)table[i]; - jl_array_t *callees = (jl_array_t*)table[i + 1]; - if (callees == NULL) - continue; - assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method)); - if (module_in_worklist(caller->def.method->module) || - method_instance_in_queue(caller)) { - jl_record_edges(caller, &wq, edges); - } - } - while (wq.len) { - jl_method_instance_t *caller = (jl_method_instance_t*)arraylist_pop(&wq); - jl_record_edges(caller, &wq, edges); - } - arraylist_free(&wq); - edges_map = NULL; - htable_t edges_map2; - htable_new(&edges_map2, 0); - htable_t edges_ids; - size_t l = jl_array_len(edges); - htable_new(&edges_ids, l); - for (size_t i = 0; i < l / 2; i++) { - jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, i * 2); - void *target = (void*)((char*)HT_NOTFOUND + i + 1); - ptrhash_put(&edges_ids, (void*)caller, target); - } - // process target list to turn it into a memoized validity table - // and compute the old methods list, ready for serialization - jl_value_t *matches = NULL; - jl_array_t *callee_ids = NULL; - JL_GC_PUSH2(&matches, &callee_ids); - for (size_t i = 0; i < l; i += 2) { - jl_array_t *callees = (jl_array_t*)jl_array_ptr_ref(edges, i + 1); - size_t l = jl_array_len(callees); - callee_ids = jl_alloc_array_1d(jl_array_int32_type, l + 1); - int32_t *idxs = (int32_t*)jl_array_data(callee_ids); - idxs[0] = 0; - size_t nt = 0; - for (size_t j = 0; j < l; j += 2) { - jl_value_t *invokeTypes = jl_array_ptr_ref(callees, j); - jl_value_t *callee = jl_array_ptr_ref(callees, j + 1); - assert(callee && "unsupported edge"); - - if (jl_is_method_instance(callee)) { - jl_methtable_t *mt = jl_method_get_table(((jl_method_instance_t*)callee)->def.method); - if (module_in_worklist(mt->module)) - continue; - } - - // (nullptr, c) => call - // (invokeTypes, c) => invoke - // (nullptr, invokeTypes) => missing call - // (invokeTypes, nullptr) => missing invoke (unused--inferred as Any) - void *target = ptrhash_get(&edges_map2, invokeTypes ? (void*)invokeTypes : (void*)callee); - if (target == HT_NOTFOUND) { - size_t min_valid = 0; - size_t max_valid = ~(size_t)0; - if (invokeTypes) { - jl_methtable_t *mt = jl_method_get_table(((jl_method_instance_t*)callee)->def.method); - if ((jl_value_t*)mt == jl_nothing) { - callee_ids = NULL; // invalid - break; - } - else { - matches = jl_gf_invoke_lookup_worlds(invokeTypes, (jl_value_t*)mt, world, &min_valid, &max_valid); - if (matches == jl_nothing) { - callee_ids = NULL; // invalid - break; - } - matches = (jl_value_t*)((jl_method_match_t*)matches)->method; - } - } - else { - jl_value_t *sig; - if (jl_is_method_instance(callee)) - sig = ((jl_method_instance_t*)callee)->specTypes; - else - sig = callee; - int ambig = 0; - matches = jl_matching_methods((jl_tupletype_t*)sig, jl_nothing, - -1, 0, world, &min_valid, &max_valid, &ambig); - if (matches == jl_false) { - callee_ids = NULL; // invalid - break; - } - size_t k; - for (k = 0; k < jl_array_len(matches); k++) { - jl_method_match_t *match = (jl_method_match_t *)jl_array_ptr_ref(matches, k); - jl_array_ptr_set(matches, k, match->method); - } - } - jl_array_ptr_1d_push(ext_targets, invokeTypes); - jl_array_ptr_1d_push(ext_targets, callee); - jl_array_ptr_1d_push(ext_targets, matches); - target = (void*)((char*)HT_NOTFOUND + jl_array_len(ext_targets) / 3); - ptrhash_put(&edges_map2, (void*)callee, target); - } - idxs[++nt] = (char*)target - (char*)HT_NOTFOUND - 1; - } - jl_array_ptr_set(edges, i + 1, callee_ids); // swap callees for ids - if (!callee_ids) - continue; - idxs[0] = nt; - // record place of every method in edges - // add method edges to the callee_ids list - for (size_t j = 0; j < l; j += 2) { - jl_value_t *callee = jl_array_ptr_ref(callees, j + 1); - if (callee && jl_is_method_instance(callee)) { - void *target = ptrhash_get(&edges_ids, (void*)callee); - if (target != HT_NOTFOUND) { - idxs[++nt] = (char*)target - (char*)HT_NOTFOUND - 1; - } - } - } - jl_array_del_end(callee_ids, l - nt); - } - JL_GC_POP(); - htable_free(&edges_map2); -} - -// serialize information about all loaded modules -static void write_mod_list(ios_t *s, jl_array_t *a) -{ - size_t i; - size_t len = jl_array_len(a); - for (i = 0; i < len; i++) { - jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(a, i); - assert(jl_is_module(m)); - if (!module_in_worklist(m)) { - const char *modname = jl_symbol_name(m->name); - size_t l = strlen(modname); - write_int32(s, l); - ios_write(s, modname, l); - write_uint64(s, m->uuid.hi); - write_uint64(s, m->uuid.lo); - write_uint64(s, m->build_id); - } - } - write_int32(s, 0); -} - -// "magic" string and version header of .ji file -static const int JI_FORMAT_VERSION = 11; -static const char JI_MAGIC[] = "\373jli\r\n\032\n"; // based on PNG signature -static const uint16_t BOM = 0xFEFF; // byte-order marker -static void write_header(ios_t *s) -{ - ios_write(s, JI_MAGIC, strlen(JI_MAGIC)); - write_uint16(s, JI_FORMAT_VERSION); - ios_write(s, (char *) &BOM, 2); - write_uint8(s, sizeof(void*)); - ios_write(s, JL_BUILD_UNAME, strlen(JL_BUILD_UNAME)+1); - ios_write(s, JL_BUILD_ARCH, strlen(JL_BUILD_ARCH)+1); - ios_write(s, JULIA_VERSION_STRING, strlen(JULIA_VERSION_STRING)+1); - const char *branch = jl_git_branch(), *commit = jl_git_commit(); - ios_write(s, branch, strlen(branch)+1); - ios_write(s, commit, strlen(commit)+1); -} - -// serialize information about the result of deserializing this file -static void write_work_list(ios_t *s) -{ - int i, l = jl_array_len(serializer_worklist); - for (i = 0; i < l; i++) { - jl_module_t *workmod = (jl_module_t*)jl_array_ptr_ref(serializer_worklist, i); - if (workmod->parent == jl_main_module || workmod->parent == workmod) { - size_t l = strlen(jl_symbol_name(workmod->name)); - write_int32(s, l); - ios_write(s, jl_symbol_name(workmod->name), l); - write_uint64(s, workmod->uuid.hi); - write_uint64(s, workmod->uuid.lo); - write_uint64(s, workmod->build_id); - } - } - write_int32(s, 0); -} - -static void write_module_path(ios_t *s, jl_module_t *depmod) JL_NOTSAFEPOINT -{ - if (depmod->parent == jl_main_module || depmod->parent == depmod) - return; - const char *mname = jl_symbol_name(depmod->name); - size_t slen = strlen(mname); - write_module_path(s, depmod->parent); - write_int32(s, slen); - ios_write(s, mname, slen); -} - -// Cache file header -// Serialize the global Base._require_dependencies array of pathnames that -// are include dependencies. Also write Preferences and return -// the location of the srctext "pointer" in the header index. -static int64_t write_dependency_list(ios_t *s, jl_array_t **udepsp) -{ - int64_t initial_pos = 0; - int64_t pos = 0; - static jl_array_t *deps = NULL; - if (!deps) - deps = (jl_array_t*)jl_get_global(jl_base_module, jl_symbol("_require_dependencies")); - - // unique(deps) to eliminate duplicates while preserving order: - // we preserve order so that the topmost included .jl file comes first - static jl_value_t *unique_func = NULL; - if (!unique_func) - unique_func = jl_get_global(jl_base_module, jl_symbol("unique")); - jl_value_t *uniqargs[2] = {unique_func, (jl_value_t*)deps}; - jl_task_t *ct = jl_current_task; - size_t last_age = ct->world_age; - ct->world_age = jl_atomic_load_acquire(&jl_world_counter); - jl_array_t *udeps = (*udepsp = deps && unique_func ? (jl_array_t*)jl_apply(uniqargs, 2) : NULL); - ct->world_age = last_age; - - // write a placeholder for total size so that we can quickly seek past all of the - // dependencies if we don't need them - initial_pos = ios_pos(s); - write_uint64(s, 0); - if (udeps) { - size_t i, l = jl_array_len(udeps); - for (i = 0; i < l; i++) { - jl_value_t *deptuple = jl_array_ptr_ref(udeps, i); - jl_value_t *dep = jl_fieldref(deptuple, 1); // file abspath - size_t slen = jl_string_len(dep); - write_int32(s, slen); - ios_write(s, jl_string_data(dep), slen); - write_float64(s, jl_unbox_float64(jl_fieldref(deptuple, 2))); // mtime - jl_module_t *depmod = (jl_module_t*)jl_fieldref(deptuple, 0); // evaluating module - jl_module_t *depmod_top = depmod; - while (depmod_top->parent != jl_main_module && depmod_top->parent != depmod_top) - depmod_top = depmod_top->parent; - unsigned provides = 0; - size_t j, lj = jl_array_len(serializer_worklist); - for (j = 0; j < lj; j++) { - jl_module_t *workmod = (jl_module_t*)jl_array_ptr_ref(serializer_worklist, j); - if (workmod->parent == jl_main_module || workmod->parent == workmod) { - ++provides; - if (workmod == depmod_top) { - write_int32(s, provides); - write_module_path(s, depmod); - break; - } - } - } - write_int32(s, 0); - } - write_int32(s, 0); // terminator, for ease of reading - - // Calculate Preferences hash for current package. - jl_value_t *prefs_hash = NULL; - jl_value_t *prefs_list = NULL; - JL_GC_PUSH1(&prefs_list); - if (jl_base_module) { - // Toplevel module is the module we're currently compiling, use it to get our preferences hash - jl_value_t * toplevel = (jl_value_t*)jl_get_global(jl_base_module, jl_symbol("__toplevel__")); - jl_value_t * prefs_hash_func = jl_get_global(jl_base_module, jl_symbol("get_preferences_hash")); - jl_value_t * get_compiletime_prefs_func = jl_get_global(jl_base_module, jl_symbol("get_compiletime_preferences")); - - if (toplevel && prefs_hash_func && get_compiletime_prefs_func) { - // Temporary invoke in newest world age - size_t last_age = ct->world_age; - ct->world_age = jl_atomic_load_acquire(&jl_world_counter); - - // call get_compiletime_prefs(__toplevel__) - jl_value_t *args[3] = {get_compiletime_prefs_func, (jl_value_t*)toplevel, NULL}; - prefs_list = (jl_value_t*)jl_apply(args, 2); - - // Call get_preferences_hash(__toplevel__, prefs_list) - args[0] = prefs_hash_func; - args[2] = prefs_list; - prefs_hash = (jl_value_t*)jl_apply(args, 3); - - // Reset world age to normal - ct->world_age = last_age; - } - } - - // If we successfully got the preferences, write it out, otherwise write `0` for this `.ji` file. - if (prefs_hash != NULL && prefs_list != NULL) { - size_t i, l = jl_array_len(prefs_list); - for (i = 0; i < l; i++) { - jl_value_t *pref_name = jl_array_ptr_ref(prefs_list, i); - size_t slen = jl_string_len(pref_name); - write_int32(s, slen); - ios_write(s, jl_string_data(pref_name), slen); - } - write_int32(s, 0); // terminator - write_uint64(s, jl_unbox_uint64(prefs_hash)); - } else { - // This is an error path, but let's at least generate a valid `.ji` file. - // We declare an empty list of preference names, followed by a zero-hash. - // The zero-hash is not what would be generated for an empty set of preferences, - // and so this `.ji` file will be invalidated by a future non-erroring pass - // through this function. - write_int32(s, 0); - write_uint64(s, 0); - } - JL_GC_POP(); // for prefs_list - - // write a dummy file position to indicate the beginning of the source-text - pos = ios_pos(s); - ios_seek(s, initial_pos); - write_uint64(s, pos - initial_pos); - ios_seek(s, pos); - write_uint64(s, 0); - } - return pos; -} - -// --- deserialize --- - -static jl_value_t *jl_deserialize_value(jl_serializer_state *s, jl_value_t **loc) JL_GC_DISABLED; - -static jl_value_t *jl_deserialize_datatype(jl_serializer_state *s, int pos, jl_value_t **loc) JL_GC_DISABLED -{ - assert(pos == backref_list.len - 1 && "nothing should have been deserialized since assigning pos"); - int tag = read_uint8(s->s); - if (tag == 6 || tag == 7) { - jl_typename_t *name = (jl_typename_t*)jl_deserialize_value(s, NULL); - jl_value_t *dtv = name->wrapper; - jl_svec_t *parameters = (jl_svec_t*)jl_deserialize_value(s, NULL); - dtv = jl_apply_type(dtv, jl_svec_data(parameters), jl_svec_len(parameters)); - backref_list.items[pos] = dtv; - return dtv; - } - if (!(tag == 0 || tag == 5 || tag == 10 || tag == 11 || tag == 12)) { - assert(0 && "corrupt deserialization state"); - abort(); - } - jl_datatype_t *dt = jl_new_uninitialized_datatype(); - backref_list.items[pos] = dt; - if (loc != NULL && loc != HT_NOTFOUND) - *loc = (jl_value_t*)dt; - uint8_t flags = read_uint8(s->s); - uint8_t memflags = read_uint8(s->s); - int has_layout = flags & 1; - int has_instance = (flags >> 1) & 1; - dt->hasfreetypevars = memflags & 1; - dt->isconcretetype = (memflags >> 1) & 1; - dt->isdispatchtuple = (memflags >> 2) & 1; - dt->isbitstype = (memflags >> 3) & 1; - dt->zeroinit = (memflags >> 4) & 1; - dt->has_concrete_subtype = (memflags >> 5) & 1; - dt->cached_by_hash = (memflags >> 6) & 1; - dt->isprimitivetype = (memflags >> 7) & 1; - dt->hash = read_int32(s->s); - - if (has_layout) { - uint8_t layout = read_uint8(s->s); - if (layout == 1) { - dt->layout = ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_array_type))->layout; - } - else if (layout == 2) { - dt->layout = jl_nothing_type->layout; - } - else if (layout == 3) { - dt->layout = ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_pointer_type))->layout; - } - else { - assert(layout == 0); - jl_datatype_layout_t buffer; - ios_readall(s->s, (char*)&buffer, sizeof(buffer)); - uint32_t nf = buffer.nfields; - uint32_t np = buffer.npointers; - uint8_t fielddesc_type = buffer.fielddesc_type; - size_t fielddesc_size = nf > 0 ? jl_fielddesc_size(fielddesc_type) : 0; - size_t fldsize = nf * fielddesc_size; - if (buffer.first_ptr != -1) - fldsize += np << fielddesc_type; - jl_datatype_layout_t *layout = (jl_datatype_layout_t*)jl_gc_perm_alloc( - sizeof(jl_datatype_layout_t) + fldsize, - 0, 4, 0); - *layout = buffer; - ios_readall(s->s, (char*)(layout + 1), fldsize); - dt->layout = layout; - } - } - - if (tag == 10 || tag == 11 || tag == 12) { - assert(pos > 0); - arraylist_push(&flagref_list, loc == HT_NOTFOUND ? NULL : loc); - arraylist_push(&flagref_list, (void*)(uintptr_t)pos); - ptrhash_put(&uniquing_table, dt, NULL); - } - - if (has_instance) { - assert(dt->isconcretetype && "there shouldn't be an instance on an abstract type"); - dt->instance = jl_deserialize_value(s, &dt->instance); - jl_gc_wb(dt, dt->instance); - } - dt->name = (jl_typename_t*)jl_deserialize_value(s, (jl_value_t**)&dt->name); - jl_gc_wb(dt, dt->name); - dt->parameters = (jl_svec_t*)jl_deserialize_value(s, (jl_value_t**)&dt->parameters); - jl_gc_wb(dt, dt->parameters); - dt->super = (jl_datatype_t*)jl_deserialize_value(s, (jl_value_t**)&dt->super); - jl_gc_wb(dt, dt->super); - dt->types = (jl_svec_t*)jl_deserialize_value(s, (jl_value_t**)&dt->types); - if (dt->types) jl_gc_wb(dt, dt->types); - - return (jl_value_t*)dt; -} - -static jl_value_t *jl_deserialize_value_svec(jl_serializer_state *s, uint8_t tag, jl_value_t **loc) JL_GC_DISABLED -{ - size_t i, len; - if (tag == TAG_SVEC) - len = read_uint8(s->s); - else - len = read_int32(s->s); - jl_svec_t *sv = jl_alloc_svec(len); - if (loc != NULL) - *loc = (jl_value_t*)sv; - arraylist_push(&backref_list, (jl_value_t*)sv); - jl_value_t **data = jl_svec_data(sv); - for (i = 0; i < len; i++) { - data[i] = jl_deserialize_value(s, &data[i]); - } - return (jl_value_t*)sv; -} - -static jl_value_t *jl_deserialize_value_symbol(jl_serializer_state *s, uint8_t tag) JL_GC_DISABLED -{ - size_t len; - if (tag == TAG_SYMBOL) - len = read_uint8(s->s); - else - len = read_int32(s->s); - char *name = (char*)(len >= 256 ? malloc_s(len + 1) : alloca(len + 1)); - ios_readall(s->s, name, len); - name[len] = '\0'; - jl_value_t *sym = (jl_value_t*)jl_symbol(name); - if (len >= 256) - free(name); - arraylist_push(&backref_list, sym); - return sym; -} - -static jl_value_t *jl_deserialize_value_array(jl_serializer_state *s, uint8_t tag) JL_GC_DISABLED -{ - int16_t i, ndims; - int isptr, isunion, hasptr, elsize; - if (tag == TAG_ARRAY1D) { - ndims = 1; - elsize = read_uint8(s->s); - isptr = (elsize >> 7) & 1; - hasptr = (elsize >> 6) & 1; - isunion = (elsize >> 5) & 1; - elsize = elsize & 0x1f; - } - else { - ndims = read_uint16(s->s); - elsize = read_uint16(s->s); - isptr = (elsize >> 15) & 1; - hasptr = (elsize >> 14) & 1; - isunion = (elsize >> 13) & 1; - elsize = elsize & 0x1fff; - } - uintptr_t pos = backref_list.len; - arraylist_push(&backref_list, NULL); - size_t *dims = (size_t*)alloca(ndims * sizeof(size_t)); - for (i = 0; i < ndims; i++) { - dims[i] = jl_unbox_long(jl_deserialize_value(s, NULL)); - } - jl_array_t *a = jl_new_array_for_deserialization( - (jl_value_t*)NULL, ndims, dims, !isptr, hasptr, isunion, elsize); - backref_list.items[pos] = a; - jl_value_t *aty = jl_deserialize_value(s, &jl_astaggedvalue(a)->type); - jl_set_typeof(a, aty); - if (a->flags.ptrarray) { - jl_value_t **data = (jl_value_t**)jl_array_data(a); - size_t i, numel = jl_array_len(a); - for (i = 0; i < numel; i++) { - data[i] = jl_deserialize_value(s, &data[i]); - //if (data[i]) // not needed because `a` is new (gc is disabled) - // jl_gc_wb(a, data[i]); - } - assert(jl_astaggedvalue(a)->bits.gc == GC_CLEAN); // gc is disabled - } - else if (a->flags.hasptr) { - size_t i, numel = jl_array_len(a); - char *data = (char*)jl_array_data(a); - uint16_t elsz = a->elsize; - jl_datatype_t *et = (jl_datatype_t*)jl_tparam0(jl_typeof(a)); - size_t j, np = et->layout->npointers; - for (i = 0; i < numel; i++) { - char *start = data; - for (j = 0; j < np; j++) { - uint32_t ptr = jl_ptr_offset(et, j); - jl_value_t **fld = &((jl_value_t**)data)[ptr]; - if ((char*)fld != start) - ios_readall(s->s, start, (const char*)fld - start); - *fld = jl_deserialize_value(s, fld); - //if (*fld) // not needed because `a` is new (gc is disabled) - // jl_gc_wb(a, *fld); - start = (char*)&fld[1]; - } - data += elsz; - if (data != start) - ios_readall(s->s, start, data - start); - } - assert(jl_astaggedvalue(a)->bits.gc == GC_CLEAN); // gc is disabled - } - else { - size_t extra = jl_array_isbitsunion(a) ? jl_array_len(a) : 0; - size_t tot = jl_array_len(a) * a->elsize + extra; - ios_readall(s->s, (char*)jl_array_data(a), tot); - } - return (jl_value_t*)a; -} - -static jl_value_t *jl_deserialize_value_method(jl_serializer_state *s, jl_value_t **loc) JL_GC_DISABLED -{ - jl_method_t *m = - (jl_method_t*)jl_gc_alloc(s->ptls, sizeof(jl_method_t), - jl_method_type); - memset(m, 0, sizeof(jl_method_t)); - uintptr_t pos = backref_list.len; - arraylist_push(&backref_list, m); - m->sig = (jl_value_t*)jl_deserialize_value(s, (jl_value_t**)&m->sig); - jl_gc_wb(m, m->sig); - m->module = (jl_module_t*)jl_deserialize_value(s, (jl_value_t**)&m->module); - jl_gc_wb(m, m->module); - int serialization_mode = read_uint8(s->s); - if (serialization_mode & METHOD_EXTERNAL_MT) { - jl_module_t *mt_mod = (jl_module_t*)jl_deserialize_value(s, NULL); - jl_sym_t *mt_name = (jl_sym_t*)jl_deserialize_value(s, NULL); - m->external_mt = jl_get_global(mt_mod, mt_name); - jl_gc_wb(m, m->external_mt); - assert(jl_typeis(m->external_mt, jl_methtable_type)); - } - else { - m->external_mt = jl_deserialize_value(s, &m->external_mt); - jl_gc_wb(m, m->external_mt); - } - if (!(serialization_mode & METHOD_INTERNAL)) { - assert(loc != NULL && loc != HT_NOTFOUND); - arraylist_push(&flagref_list, loc); - arraylist_push(&flagref_list, (void*)pos); - if (serialization_mode & METHOD_HAS_NEW_ROOTS) { - uint64_t key = read_uint64(s->s); - int i, nnew = read_int32(s->s); - jl_array_t *newroots = jl_alloc_vec_any(nnew); - jl_value_t **data = (jl_value_t**)jl_array_data(newroots); - for (i = 0; i < nnew; i++) - data[i] = jl_deserialize_value(s, &(data[i])); - // Storing the new roots in `m->roots` risks losing them due to recaching - // (which replaces pointers to `m` with ones to the "live" method). - // Put them in separate storage so we can find them later. - assert(ptrhash_get(&queued_method_roots, m) == HT_NOTFOUND); - // In storing the key, on 32-bit platforms we need two slots. Might as well do this for all platforms. - jl_svec_t *qmrval = jl_alloc_svec_uninit(3); // GC is disabled - jl_svec_data(qmrval)[0] = (jl_value_t*)(uintptr_t)(key & ((((uint64_t)1) << 32) - 1)); // lo bits - jl_svec_data(qmrval)[1] = (jl_value_t*)(uintptr_t)((key >> 32) & ((((uint64_t)1) << 32) - 1)); // hi bits - jl_svec_data(qmrval)[2] = (jl_value_t*)newroots; - ptrhash_put(&queued_method_roots, m, qmrval); - } - return (jl_value_t*)m; - } - m->specializations = (jl_svec_t*)jl_deserialize_value(s, (jl_value_t**)&m->specializations); - jl_gc_wb(m, m->specializations); - jl_array_t *speckeyset = (jl_array_t*)jl_deserialize_value(s, (jl_value_t**)&m->speckeyset); - jl_atomic_store_relaxed(&m->speckeyset, speckeyset); - jl_gc_wb(m, speckeyset); - m->name = (jl_sym_t*)jl_deserialize_value(s, NULL); - jl_gc_wb(m, m->name); - m->file = (jl_sym_t*)jl_deserialize_value(s, NULL); - m->line = read_int32(s->s); - m->primary_world = jl_atomic_load_acquire(&jl_world_counter); - m->deleted_world = ~(size_t)0; - m->called = read_int32(s->s); - m->nargs = read_int32(s->s); - m->nospecialize = read_int32(s->s); - m->nkw = read_int32(s->s); - m->isva = read_int8(s->s); - m->pure = read_int8(s->s); - m->is_for_opaque_closure = read_int8(s->s); - m->constprop = read_int8(s->s); - m->purity.bits = read_uint8(s->s); - m->slot_syms = jl_deserialize_value(s, (jl_value_t**)&m->slot_syms); - jl_gc_wb(m, m->slot_syms); - m->roots = (jl_array_t*)jl_deserialize_value(s, (jl_value_t**)&m->roots); - if (m->roots) - jl_gc_wb(m, m->roots); - m->root_blocks = (jl_array_t*)jl_deserialize_value(s, (jl_value_t**)&m->root_blocks); - if (m->root_blocks) - jl_gc_wb(m, m->root_blocks); - m->nroots_sysimg = read_int32(s->s); - m->ccallable = (jl_svec_t*)jl_deserialize_value(s, (jl_value_t**)&m->ccallable); - if (m->ccallable) { - jl_gc_wb(m, m->ccallable); - arraylist_push(&ccallable_list, m->ccallable); - } - m->source = jl_deserialize_value(s, &m->source); - if (m->source) - jl_gc_wb(m, m->source); - m->unspecialized = (jl_method_instance_t*)jl_deserialize_value(s, (jl_value_t**)&m->unspecialized); - if (m->unspecialized) - jl_gc_wb(m, m->unspecialized); - m->generator = jl_deserialize_value(s, (jl_value_t**)&m->generator); - if (m->generator) - jl_gc_wb(m, m->generator); - m->invokes = jl_deserialize_value(s, (jl_value_t**)&m->invokes); - jl_gc_wb(m, m->invokes); - m->recursion_relation = jl_deserialize_value(s, (jl_value_t**)&m->recursion_relation); - if (m->recursion_relation) - jl_gc_wb(m, m->recursion_relation); - JL_MUTEX_INIT(&m->writelock); - return (jl_value_t*)m; -} - -static jl_value_t *jl_deserialize_value_method_instance(jl_serializer_state *s, jl_value_t **loc) JL_GC_DISABLED -{ - jl_method_instance_t *mi = - (jl_method_instance_t*)jl_gc_alloc(s->ptls, sizeof(jl_method_instance_t), - jl_method_instance_type); - memset(mi, 0, sizeof(jl_method_instance_t)); - uintptr_t pos = backref_list.len; - arraylist_push(&backref_list, mi); - int internal = read_uint8(s->s); - if (internal == 1) { - mi->uninferred = jl_deserialize_value(s, &mi->uninferred); - jl_gc_wb(mi, mi->uninferred); - } - mi->specTypes = (jl_value_t*)jl_deserialize_value(s, (jl_value_t**)&mi->specTypes); - jl_gc_wb(mi, mi->specTypes); - mi->def.value = jl_deserialize_value(s, &mi->def.value); - jl_gc_wb(mi, mi->def.value); - - if (!internal) { - assert(loc != NULL && loc != HT_NOTFOUND); - arraylist_push(&flagref_list, loc); - arraylist_push(&flagref_list, (void*)pos); - return (jl_value_t*)mi; - } - - mi->sparam_vals = (jl_svec_t*)jl_deserialize_value(s, (jl_value_t**)&mi->sparam_vals); - jl_gc_wb(mi, mi->sparam_vals); - mi->backedges = (jl_array_t*)jl_deserialize_value(s, (jl_value_t**)&mi->backedges); - if (mi->backedges) - jl_gc_wb(mi, mi->backedges); - mi->callbacks = (jl_array_t*)jl_deserialize_value(s, (jl_value_t**)&mi->callbacks); - if (mi->callbacks) - jl_gc_wb(mi, mi->callbacks); - mi->cache = (jl_code_instance_t*)jl_deserialize_value(s, (jl_value_t**)&mi->cache); - if (mi->cache) - jl_gc_wb(mi, mi->cache); - return (jl_value_t*)mi; -} - -static jl_value_t *jl_deserialize_value_code_instance(jl_serializer_state *s, jl_value_t **loc) JL_GC_DISABLED -{ - jl_code_instance_t *codeinst = - (jl_code_instance_t*)jl_gc_alloc(s->ptls, sizeof(jl_code_instance_t), jl_code_instance_type); - memset(codeinst, 0, sizeof(jl_code_instance_t)); - arraylist_push(&backref_list, codeinst); - int flags = read_uint8(s->s); - int validate = (flags >> 0) & 3; - int constret = (flags >> 2) & 1; - codeinst->ipo_purity_bits = read_uint32(s->s); - jl_atomic_store_relaxed(&codeinst->purity_bits, read_uint32(s->s)); - codeinst->def = (jl_method_instance_t*)jl_deserialize_value(s, (jl_value_t**)&codeinst->def); - jl_gc_wb(codeinst, codeinst->def); - jl_value_t *inferred = jl_deserialize_value(s, NULL); - jl_atomic_store_release(&codeinst->inferred, inferred); - jl_gc_wb(codeinst, inferred); - codeinst->rettype_const = jl_deserialize_value(s, &codeinst->rettype_const); - if (codeinst->rettype_const) - jl_gc_wb(codeinst, codeinst->rettype_const); - codeinst->rettype = jl_deserialize_value(s, &codeinst->rettype); - jl_gc_wb(codeinst, codeinst->rettype); - codeinst->argescapes = jl_deserialize_value(s, &codeinst->argescapes); - jl_gc_wb(codeinst, codeinst->argescapes); - if (constret) - codeinst->invoke = jl_fptr_const_return; - if ((flags >> 3) & 1) - codeinst->precompile = 1; - codeinst->relocatability = read_uint8(s->s); - assert(codeinst->relocatability <= 1); - codeinst->next = (jl_code_instance_t*)jl_deserialize_value(s, (jl_value_t**)&codeinst->next); - jl_gc_wb(codeinst, codeinst->next); - if (validate) { - codeinst->min_world = jl_atomic_load_acquire(&jl_world_counter); - ptrhash_put(&new_code_instance_validate, codeinst, (void*)(~(uintptr_t)HT_NOTFOUND)); // "HT_FOUND" - } - return (jl_value_t*)codeinst; -} - -static jl_value_t *jl_deserialize_value_module(jl_serializer_state *s) JL_GC_DISABLED -{ - uintptr_t pos = backref_list.len; - arraylist_push(&backref_list, NULL); - jl_sym_t *mname = (jl_sym_t*)jl_deserialize_value(s, NULL); - int ref_only = read_uint8(s->s); - if (ref_only) { - jl_value_t *m_ref; - if (ref_only == 1) - m_ref = jl_get_global((jl_module_t*)jl_deserialize_value(s, NULL), mname); - else - m_ref = jl_array_ptr_ref(s->loaded_modules_array, read_int32(s->s)); - backref_list.items[pos] = m_ref; - return m_ref; - } - jl_module_t *m = jl_new_module(mname); - backref_list.items[pos] = m; - m->parent = (jl_module_t*)jl_deserialize_value(s, (jl_value_t**)&m->parent); - jl_gc_wb(m, m->parent); - - while (1) { - jl_sym_t *asname = (jl_sym_t*)jl_deserialize_value(s, NULL); - if (asname == NULL) - break; - jl_binding_t *b = jl_get_binding_wr(m, asname, 1); - b->name = (jl_sym_t*)jl_deserialize_value(s, (jl_value_t**)&b->name); - jl_value_t *bvalue = jl_deserialize_value(s, (jl_value_t**)&b->value); - *(jl_value_t**)&b->value = bvalue; - if (bvalue != NULL) jl_gc_wb(m, bvalue); - jl_value_t *bglobalref = jl_deserialize_value(s, (jl_value_t**)&b->globalref); - *(jl_value_t**)&b->globalref = bglobalref; - if (bglobalref != NULL) jl_gc_wb(m, bglobalref); - b->owner = (jl_module_t*)jl_deserialize_value(s, (jl_value_t**)&b->owner); - if (b->owner != NULL) jl_gc_wb(m, b->owner); - jl_value_t *bty = jl_deserialize_value(s, (jl_value_t**)&b->ty); - *(jl_value_t**)&b->ty = bty; - int8_t flags = read_int8(s->s); - b->deprecated = (flags>>3) & 1; - b->constp = (flags>>2) & 1; - b->exportp = (flags>>1) & 1; - b->imported = (flags) & 1; - } - size_t i = m->usings.len; - size_t ni = read_int32(s->s); - arraylist_grow(&m->usings, ni); - ni += i; - while (i < ni) { - m->usings.items[i] = jl_deserialize_value(s, (jl_value_t**)&m->usings.items[i]); - i++; - } - m->istopmod = read_uint8(s->s); - m->uuid.hi = read_uint64(s->s); - m->uuid.lo = read_uint64(s->s); - m->build_id = read_uint64(s->s); - m->counter = read_int32(s->s); - m->nospecialize = read_int32(s->s); - m->optlevel = read_int8(s->s); - m->compile = read_int8(s->s); - m->infer = read_int8(s->s); - m->max_methods = read_int8(s->s); - m->primary_world = jl_atomic_load_acquire(&jl_world_counter); - return (jl_value_t*)m; -} - -static jl_value_t *jl_deserialize_value_singleton(jl_serializer_state *s, jl_value_t **loc) JL_GC_DISABLED -{ - jl_value_t *v = (jl_value_t*)jl_gc_alloc(s->ptls, 0, NULL); - uintptr_t pos = backref_list.len; - arraylist_push(&backref_list, (void*)v); - // TODO: optimize the case where the value can easily be obtained - // from an external module (tag == 6) as dt->instance - assert(loc != HT_NOTFOUND); - // if loc == NULL, then the caller can't provide the address where the instance will be - // stored. this happens if a field might store a 0-size value, but the field itself is - // not 0 size, e.g. `::Union{Int,Nothing}` - if (loc != NULL) { - arraylist_push(&flagref_list, loc); - arraylist_push(&flagref_list, (void*)pos); - } - jl_datatype_t *dt = (jl_datatype_t*)jl_deserialize_value(s, (jl_value_t**)HT_NOTFOUND); // no loc, since if dt is replaced, then dt->instance would be also - jl_set_typeof(v, dt); - if (dt->instance == NULL) - return v; - return dt->instance; -} - -static void jl_deserialize_struct(jl_serializer_state *s, jl_value_t *v) JL_GC_DISABLED -{ - jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(v); - char *data = (char*)jl_data_ptr(v); - size_t i, np = dt->layout->npointers; - char *start = data; - for (i = 0; i < np; i++) { - uint32_t ptr = jl_ptr_offset(dt, i); - jl_value_t **fld = &((jl_value_t**)data)[ptr]; - if ((char*)fld != start) - ios_readall(s->s, start, (const char*)fld - start); - *fld = jl_deserialize_value(s, fld); - //if (*fld)// a is new (gc is disabled) - // jl_gc_wb(a, *fld); - start = (char*)&fld[1]; - } - data += jl_datatype_size(dt); - if (data != start) - ios_readall(s->s, start, data - start); - if (dt == jl_typemap_entry_type) { - jl_typemap_entry_t *entry = (jl_typemap_entry_t*)v; - if (entry->max_world == ~(size_t)0) { - if (entry->min_world > 1) { - // update world validity to reflect current state of the counter - entry->min_world = jl_atomic_load_acquire(&jl_world_counter); - } - } - else { - // garbage entry - delete it :( - entry->min_world = 1; - entry->max_world = 0; - } - } else if (dt == jl_globalref_type) { - jl_globalref_t *r = (jl_globalref_t*)v; - jl_binding_t *b = jl_get_binding_if_bound(r->mod, r->name); - r->bnd_cache = b && b->value ? b : NULL; - } -} - -static jl_value_t *jl_deserialize_value_any(jl_serializer_state *s, uint8_t tag, jl_value_t **loc) JL_GC_DISABLED -{ - int32_t sz = (tag == TAG_SHORT_GENERAL ? read_uint8(s->s) : read_int32(s->s)); - jl_value_t *v = jl_gc_alloc(s->ptls, sz, NULL); - jl_set_typeof(v, (void*)(intptr_t)0x50); - uintptr_t pos = backref_list.len; - arraylist_push(&backref_list, v); - jl_datatype_t *dt = (jl_datatype_t*)jl_deserialize_value(s, &jl_astaggedvalue(v)->type); - assert(sz != 0 || loc); - if (dt == jl_typename_type) { - int internal = read_uint8(s->s); - jl_typename_t *tn; - if (internal) { - tn = (jl_typename_t*)jl_gc_alloc( - s->ptls, sizeof(jl_typename_t), jl_typename_type); - memset(tn, 0, sizeof(jl_typename_t)); - tn->cache = jl_emptysvec; // the cache is refilled later (tag 5) - tn->linearcache = jl_emptysvec; // the cache is refilled later (tag 5) - backref_list.items[pos] = tn; - } - jl_module_t *m = (jl_module_t*)jl_deserialize_value(s, NULL); - jl_sym_t *sym = (jl_sym_t*)jl_deserialize_value(s, NULL); - if (internal) { - tn->module = m; - tn->name = sym; - tn->names = (jl_svec_t*)jl_deserialize_value(s, (jl_value_t**)&tn->names); - jl_gc_wb(tn, tn->names); - tn->wrapper = jl_deserialize_value(s, &tn->wrapper); - jl_gc_wb(tn, tn->wrapper); - tn->Typeofwrapper = NULL; - tn->mt = (jl_methtable_t*)jl_deserialize_value(s, (jl_value_t**)&tn->mt); - jl_gc_wb(tn, tn->mt); - ios_read(s->s, (char*)&tn->hash, sizeof(tn->hash)); - int8_t flags = read_int8(s->s); - tn->_reserved = 0; - tn->abstract = flags & 1; - tn->mutabl = (flags>>1) & 1; - tn->mayinlinealloc = (flags>>2) & 1; - tn->max_methods = read_uint8(s->s); - if (tn->abstract) - tn->n_uninitialized = 0; - else - tn->n_uninitialized = read_uint16(s->s); - size_t nfields = read_int32(s->s); - if (nfields) { - tn->atomicfields = (uint32_t*)malloc(nfields); - ios_read(s->s, (char*)tn->atomicfields, nfields); - } - nfields = read_int32(s->s); - if (nfields) { - tn->constfields = (uint32_t*)malloc(nfields); - ios_read(s->s, (char*)tn->constfields, nfields); - } - } - else { - jl_datatype_t *dt = (jl_datatype_t*)jl_unwrap_unionall(jl_get_global(m, sym)); - assert(jl_is_datatype(dt)); - tn = dt->name; - backref_list.items[pos] = tn; - } - return (jl_value_t*)tn; - } - jl_set_typeof(v, dt); - if ((jl_value_t*)dt == jl_bigint_type) { - jl_value_t *sizefield = jl_deserialize_value(s, NULL); - int32_t sz = jl_unbox_int32(sizefield); - int32_t nw = (sz == 0 ? 1 : (sz < 0 ? -sz : sz)); - size_t nb = nw * gmp_limb_size; - void *buf = jl_gc_counted_malloc(nb); - if (buf == NULL) - jl_throw(jl_memory_exception); - ios_readall(s->s, (char*)buf, nb); - jl_set_nth_field(v, 0, jl_box_int32(nw)); - jl_set_nth_field(v, 1, sizefield); - jl_set_nth_field(v, 2, jl_box_voidpointer(buf)); - } - else { - jl_deserialize_struct(s, v); - } - return v; -} - -static jl_value_t *jl_deserialize_value(jl_serializer_state *s, jl_value_t **loc) JL_GC_DISABLED -{ - assert(!ios_eof(s->s)); - jl_value_t *v; - size_t n; - uintptr_t pos; - uint8_t tag = read_uint8(s->s); - if (tag > LAST_TAG) - return deser_tag[tag]; - switch (tag) { - case TAG_NULL: return NULL; - case 0: - tag = read_uint8(s->s); - return deser_tag[tag]; - case TAG_BACKREF: JL_FALLTHROUGH; case TAG_SHORT_BACKREF: ; - uintptr_t offs = (tag == TAG_BACKREF) ? read_int32(s->s) : read_uint16(s->s); - int isflagref = 0; - isflagref = !!(offs & 1); - offs >>= 1; - // assert(offs >= 0); // offs is unsigned so this is always true - assert(offs < backref_list.len); - jl_value_t *bp = (jl_value_t*)backref_list.items[offs]; - assert(bp); - if (isflagref && loc != HT_NOTFOUND) { - if (loc != NULL) { - // as in jl_deserialize_value_singleton, the caller won't have a place to - // store this reference given a field type like Union{Int,Nothing} - arraylist_push(&flagref_list, loc); - arraylist_push(&flagref_list, (void*)(uintptr_t)-1); - } - } - return (jl_value_t*)bp; - case TAG_SVEC: JL_FALLTHROUGH; case TAG_LONG_SVEC: - return jl_deserialize_value_svec(s, tag, loc); - case TAG_COMMONSYM: - return deser_symbols[read_uint8(s->s)]; - case TAG_SYMBOL: JL_FALLTHROUGH; case TAG_LONG_SYMBOL: - return jl_deserialize_value_symbol(s, tag); - case TAG_ARRAY: JL_FALLTHROUGH; case TAG_ARRAY1D: - return jl_deserialize_value_array(s, tag); - case TAG_UNIONALL: - pos = backref_list.len; - arraylist_push(&backref_list, NULL); - if (read_uint8(s->s)) { - jl_module_t *m = (jl_module_t*)jl_deserialize_value(s, NULL); - jl_sym_t *sym = (jl_sym_t*)jl_deserialize_value(s, NULL); - jl_value_t *v = jl_get_global(m, sym); - assert(jl_is_unionall(v)); - backref_list.items[pos] = v; - return v; - } - v = jl_gc_alloc(s->ptls, sizeof(jl_unionall_t), jl_unionall_type); - backref_list.items[pos] = v; - ((jl_unionall_t*)v)->var = (jl_tvar_t*)jl_deserialize_value(s, (jl_value_t**)&((jl_unionall_t*)v)->var); - jl_gc_wb(v, ((jl_unionall_t*)v)->var); - ((jl_unionall_t*)v)->body = jl_deserialize_value(s, &((jl_unionall_t*)v)->body); - jl_gc_wb(v, ((jl_unionall_t*)v)->body); - return v; - case TAG_TVAR: - v = jl_gc_alloc(s->ptls, sizeof(jl_tvar_t), jl_tvar_type); - jl_tvar_t *tv = (jl_tvar_t*)v; - arraylist_push(&backref_list, tv); - tv->name = (jl_sym_t*)jl_deserialize_value(s, NULL); - jl_gc_wb(tv, tv->name); - tv->lb = jl_deserialize_value(s, &tv->lb); - jl_gc_wb(tv, tv->lb); - tv->ub = jl_deserialize_value(s, &tv->ub); - jl_gc_wb(tv, tv->ub); - return (jl_value_t*)tv; - case TAG_METHOD: - return jl_deserialize_value_method(s, loc); - case TAG_METHOD_INSTANCE: - return jl_deserialize_value_method_instance(s, loc); - case TAG_CODE_INSTANCE: - return jl_deserialize_value_code_instance(s, loc); - case TAG_MODULE: - return jl_deserialize_value_module(s); - case TAG_SHORTER_INT64: - v = jl_box_int64((int16_t)read_uint16(s->s)); - arraylist_push(&backref_list, v); - return v; - case TAG_SHORT_INT64: - v = jl_box_int64(read_int32(s->s)); - arraylist_push(&backref_list, v); - return v; - case TAG_INT64: - v = jl_box_int64((int64_t)read_uint64(s->s)); - arraylist_push(&backref_list, v); - return v; - case TAG_SHORT_INT32: - v = jl_box_int32((int16_t)read_uint16(s->s)); - arraylist_push(&backref_list, v); - return v; - case TAG_INT32: - v = jl_box_int32(read_int32(s->s)); - arraylist_push(&backref_list, v); - return v; - case TAG_UINT8: - return jl_box_uint8(read_uint8(s->s)); - case TAG_SINGLETON: - return jl_deserialize_value_singleton(s, loc); - case TAG_CORE: - return (jl_value_t*)jl_core_module; - case TAG_BASE: - return (jl_value_t*)jl_base_module; - case TAG_CNULL: - v = jl_gc_alloc(s->ptls, sizeof(void*), NULL); - jl_set_typeof(v, (void*)(intptr_t)0x50); - *(void**)v = NULL; - uintptr_t pos = backref_list.len; - arraylist_push(&backref_list, v); - jl_set_typeof(v, jl_deserialize_value(s, &jl_astaggedvalue(v)->type)); - return v; - case TAG_BITYPENAME: - v = deser_tag[read_uint8(s->s)]; - return (jl_value_t*)((jl_datatype_t*)jl_unwrap_unionall(v))->name; - case TAG_STRING: - n = read_int32(s->s); - v = jl_alloc_string(n); - arraylist_push(&backref_list, v); - ios_readall(s->s, jl_string_data(v), n); - return v; - case TAG_DATATYPE: - pos = backref_list.len; - arraylist_push(&backref_list, NULL); - return jl_deserialize_datatype(s, pos, loc); - default: - assert(tag == TAG_GENERAL || tag == TAG_SHORT_GENERAL); - return jl_deserialize_value_any(s, tag, loc); - } -} - -// Add methods to external (non-worklist-owned) functions -static void jl_insert_methods(jl_array_t *list) -{ - size_t i, l = jl_array_len(list); - for (i = 0; i < l; i += 2) { - jl_method_t *meth = (jl_method_t*)jl_array_ptr_ref(list, i); - assert(jl_is_method(meth)); - assert(!meth->is_for_opaque_closure); - jl_tupletype_t *simpletype = (jl_tupletype_t*)jl_array_ptr_ref(list, i + 1); - jl_methtable_t *mt = jl_method_get_table(meth); - assert((jl_value_t*)mt != jl_nothing); - jl_method_table_insert(mt, meth, simpletype); - } -} - -int remove_code_instance_from_validation(jl_code_instance_t *codeinst) -{ - return ptrhash_remove(&new_code_instance_validate, codeinst); -} - -// verify that these edges intersect with the same methods as before -static jl_array_t *jl_verify_edges(jl_array_t *targets) -{ - size_t world = jl_atomic_load_acquire(&jl_world_counter); - size_t i, l = jl_array_len(targets) / 3; - jl_array_t *valids = jl_alloc_array_1d(jl_array_uint8_type, l); - memset(jl_array_data(valids), 1, l); - jl_value_t *loctag = NULL; - jl_value_t *matches = NULL; - JL_GC_PUSH3(&valids, &matches, &loctag); - for (i = 0; i < l; i++) { - jl_value_t *invokesig = jl_array_ptr_ref(targets, i * 3); - jl_value_t *callee = jl_array_ptr_ref(targets, i * 3 + 1); - jl_value_t *expected = jl_array_ptr_ref(targets, i * 3 + 2); - int valid = 1; - size_t min_valid = 0; - size_t max_valid = ~(size_t)0; - if (invokesig) { - assert(callee && "unsupported edge"); - jl_methtable_t *mt = jl_method_get_table(((jl_method_instance_t*)callee)->def.method); - if ((jl_value_t*)mt == jl_nothing) { - valid = 0; - } - else { - matches = jl_gf_invoke_lookup_worlds(invokesig, (jl_value_t*)mt, world, &min_valid, &max_valid); - if (matches == jl_nothing) { - valid = 0; - } - else { - matches = (jl_value_t*)((jl_method_match_t*)matches)->method; - if (matches != expected) { - valid = 0; - } - } - } - } - else { - jl_value_t *sig; - if (jl_is_method_instance(callee)) - sig = ((jl_method_instance_t*)callee)->specTypes; - else - sig = callee; - assert(jl_is_array(expected)); - int ambig = 0; - // TODO: possibly need to included ambiguities too (for the optimizer correctness)? - matches = jl_matching_methods((jl_tupletype_t*)sig, jl_nothing, - -1, 0, world, &min_valid, &max_valid, &ambig); - if (matches == jl_false) { - valid = 0; - } - else { - // setdiff!(matches, expected) - size_t j, k, ins = 0; - if (jl_array_len(matches) != jl_array_len(expected)) { - valid = 0; - } - for (k = 0; k < jl_array_len(matches); k++) { - jl_method_t *match = ((jl_method_match_t*)jl_array_ptr_ref(matches, k))->method; - size_t l = jl_array_len(expected); - for (j = 0; j < l; j++) - if (match == (jl_method_t*)jl_array_ptr_ref(expected, j)) - break; - if (j == l) { - // intersection has a new method or a method was - // deleted--this is now probably no good, just invalidate - // everything about it now - valid = 0; - if (!_jl_debug_method_invalidation) - break; - jl_array_ptr_set(matches, ins++, match); - } - } - if (!valid && _jl_debug_method_invalidation) - jl_array_del_end((jl_array_t*)matches, jl_array_len(matches) - ins); - } - } - jl_array_uint8_set(valids, i, valid); - if (!valid && _jl_debug_method_invalidation) { - jl_array_ptr_1d_push(_jl_debug_method_invalidation, invokesig ? (jl_value_t*)invokesig : callee); - loctag = jl_cstr_to_string("insert_backedges_callee"); - jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag); - loctag = jl_box_int32((int32_t)i); - jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag); - jl_array_ptr_1d_push(_jl_debug_method_invalidation, matches); - } - //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)invokesig); - //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)callee); - //ios_puts(valid ? "valid\n" : "INVALID\n", ios_stderr); - } - JL_GC_POP(); - return valids; -} - -// Combine all edges relevant to a method into the visited table -void jl_verify_methods(jl_array_t *edges, jl_array_t *valids, htable_t *visited) -{ - jl_value_t *loctag = NULL; - JL_GC_PUSH1(&loctag); - size_t i, l = jl_array_len(edges) / 2; - htable_new(visited, l); - for (i = 0; i < l; i++) { - jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * i); - assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method)); - jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, 2 * i + 1); - assert(jl_typeis((jl_value_t*)callee_ids, jl_array_int32_type)); - int valid = 1; - if (callee_ids == NULL) { - // serializing the edges had failed - valid = 0; - } - else { - int32_t *idxs = (int32_t*)jl_array_data(callee_ids); - size_t j; - for (j = 0; valid && j < idxs[0]; j++) { - int32_t idx = idxs[j + 1]; - valid = jl_array_uint8_ref(valids, idx); - if (!valid && _jl_debug_method_invalidation) { - jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)caller); - loctag = jl_cstr_to_string("verify_methods"); - jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag); - loctag = jl_box_int32((int32_t)idx); - jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag); - } - } - } - ptrhash_put(visited, caller, (void*)(((char*)HT_NOTFOUND) + valid + 1)); - //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)caller); - //ios_puts(valid ? "valid\n" : "INVALID\n", ios_stderr); - // HT_NOTFOUND: valid (no invalid edges) - // HT_NOTFOUND + 1: invalid - // HT_NOTFOUND + 2: need to scan - // HT_NOTFOUND + 3 + depth: in-progress - } - JL_GC_POP(); -} - - -// Propagate the result of cycle-resolution to all edges (recursively) -static int mark_edges_in_worklist(jl_array_t *edges, int idx, jl_method_instance_t *cycle, htable_t *visited, int found) -{ - jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, idx * 2); - int oldfound = (char*)ptrhash_get(visited, caller) - (char*)HT_NOTFOUND; - if (oldfound < 3) - return 0; // not in-progress - if (!found) { - ptrhash_remove(visited, (void*)caller); - } - else { - ptrhash_put(visited, (void*)caller, (void*)((char*)HT_NOTFOUND + 1 + found)); - } - jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, idx * 2 + 1); - assert(jl_typeis((jl_value_t*)callee_ids, jl_array_int32_type)); - int32_t *idxs = (int32_t*)jl_array_data(callee_ids); - size_t i, badidx = 0, n = jl_array_len(callee_ids); - for (i = idxs[0] + 1; i < n; i++) { - if (mark_edges_in_worklist(edges, idxs[i], cycle, visited, found) && badidx == 0) - badidx = i - idxs[0]; - } - if (_jl_debug_method_invalidation) { - jl_value_t *loctag = NULL; - JL_GC_PUSH1(&loctag); - jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)caller); - loctag = jl_cstr_to_string("verify_methods"); - jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag); - jl_method_instance_t *callee = cycle; - if (badidx--) - callee = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * badidx); - jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)callee); - JL_GC_POP(); - } - return 1; -} - - -// Visit the entire call graph, starting from edges[idx] to determine if that method is valid -static int jl_verify_graph_edge(jl_array_t *edges, int idx, htable_t *visited, int depth) -{ - jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, idx * 2); - assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method)); - int found = (char*)ptrhash_get(visited, (void*)caller) - (char*)HT_NOTFOUND; - if (found == 0) - return 1; // valid - if (found == 1) - return 0; // invalid - if (found != 2) - return found - 1; // depth - found = 0; - ptrhash_put(visited, (void*)caller, (void*)((char*)HT_NOTFOUND + 3 + depth)); // change 2 to in-progress at depth - jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, idx * 2 + 1); - assert(jl_typeis((jl_value_t*)callee_ids, jl_array_int32_type)); - int32_t *idxs = (int32_t*)jl_array_data(callee_ids); - int cycle = 0; - size_t i, n = jl_array_len(callee_ids); - for (i = idxs[0] + 1; i < n; i++) { - int32_t idx = idxs[i]; - int child_found = jl_verify_graph_edge(edges, idx, visited, depth + 1); - if (child_found == 0) { - found = 1; - if (_jl_debug_method_invalidation) { - jl_value_t *loctag = NULL; - JL_GC_PUSH1(&loctag); - jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)caller); - loctag = jl_cstr_to_string("verify_methods"); - jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag); - jl_array_ptr_1d_push(_jl_debug_method_invalidation, jl_array_ptr_ref(edges, idx * 2)); - JL_GC_POP(); - } - break; - } - else if (child_found >= 2 && child_found - 2 < cycle) { - // record the cycle will resolve at depth "cycle" - cycle = child_found - 2; - assert(cycle); - } - } - if (!found) { - if (cycle && cycle != depth) - return cycle + 2; - ptrhash_remove(visited, (void*)caller); - } - else { // found invalid - ptrhash_put(visited, (void*)caller, (void*)((char*)HT_NOTFOUND + 1 + found)); - } - if (cycle) { - // If we are the top of the current cycle, now mark all other parts of - // our cycle by re-walking the backedges graph and marking all WIP - // items as found. - // Be careful to only re-walk as far as we had originally scanned above. - // Or if we found a backedge, also mark all of the other parts of the - // cycle as also having an backedge. - n = i; - for (i = idxs[0] + 1; i < n; i++) { - mark_edges_in_worklist(edges, idxs[i], caller, visited, found); - } - } - return found ? 0 : 1; -} - -// Visit all entries in edges, verify if they are valid -static jl_array_t *jl_verify_graph(jl_array_t *edges, htable_t *visited) -{ - size_t i, n = jl_array_len(edges) / 2; - jl_array_t *valids = jl_alloc_array_1d(jl_array_uint8_type, n); - JL_GC_PUSH1(&valids); - int8_t *valids_data = (int8_t*)jl_array_data(valids); - for (i = 0; i < n; i++) { - valids_data[i] = jl_verify_graph_edge(edges, i, visited, 1); - } - JL_GC_POP(); - return valids; -} - -// Restore backedges to external targets -// `edges` = [caller1, targets_indexes1, ...], the list of worklist-owned methods calling external methods. -// `ext_targets` is [invokesig1, callee1, matches1, ...], the global set of non-worklist callees of worklist-owned methods. -static void jl_insert_backedges(jl_array_t *edges, jl_array_t *ext_targets, jl_array_t *mi_list) -{ - // determine which CodeInstance objects are still valid in our image - size_t world = jl_atomic_load_acquire(&jl_world_counter); - jl_array_t *valids = jl_verify_edges(ext_targets); - JL_GC_PUSH1(&valids); - htable_t visited; - htable_new(&visited, 0); - jl_verify_methods(edges, valids, &visited); - valids = jl_verify_graph(edges, &visited); - size_t i, l = jl_array_len(edges) / 2; - - // next build a map from external_mis to their CodeInstance for insertion - if (mi_list == NULL) { - htable_reset(&visited, 0); - } - else { - size_t i, l = jl_array_len(mi_list); - htable_reset(&visited, l); - for (i = 0; i < l; i++) { - jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(mi_list, i); - ptrhash_put(&visited, (void*)ci->def, (void*)ci); - } - } - - // next disable any invalid codes, so we do not try to enable them - for (i = 0; i < l; i++) { - jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * i); - assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method)); - int valid = jl_array_uint8_ref(valids, i); - if (valid) - continue; - void *ci = ptrhash_get(&visited, (void*)caller); - if (ci != HT_NOTFOUND) { - assert(jl_is_code_instance(ci)); - remove_code_instance_from_validation((jl_code_instance_t*)ci); // mark it as handled - } - else { - jl_code_instance_t *codeinst = caller->cache; - while (codeinst) { - remove_code_instance_from_validation(codeinst); // should be left invalid - codeinst = jl_atomic_load_relaxed(&codeinst->next); - } - } - } - - // finally enable any applicable new codes - for (i = 0; i < l; i++) { - jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * i); - int valid = jl_array_uint8_ref(valids, i); - if (!valid) - continue; - // if this callee is still valid, add all the backedges - jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, 2 * i + 1); - int32_t *idxs = (int32_t*)jl_array_data(callee_ids); - for (size_t j = 0; j < idxs[0]; j++) { - int32_t idx = idxs[j + 1]; - jl_value_t *invokesig = jl_array_ptr_ref(ext_targets, idx * 3); - jl_value_t *callee = jl_array_ptr_ref(ext_targets, idx * 3 + 1); - if (callee && jl_is_method_instance(callee)) { - jl_method_instance_add_backedge((jl_method_instance_t*)callee, invokesig, caller); - } - else { - jl_value_t *sig = callee == NULL ? invokesig : callee; - jl_methtable_t *mt = jl_method_table_for(sig); - // FIXME: rarely, `callee` has an unexpected `Union` signature, - // see https://github.com/JuliaLang/julia/pull/43990#issuecomment-1030329344 - // Fix the issue and turn this back into an `assert((jl_value_t*)mt != jl_nothing)` - // This workaround exposes us to (rare) 265-violations. - if ((jl_value_t*)mt != jl_nothing) - jl_method_table_add_backedge(mt, sig, (jl_value_t*)caller); - } - } - // then enable it - void *ci = ptrhash_get(&visited, (void*)caller); - if (ci != HT_NOTFOUND) { - // have some new external code to use - assert(jl_is_code_instance(ci)); - jl_code_instance_t *codeinst = (jl_code_instance_t*)ci; - remove_code_instance_from_validation(codeinst); // mark it as handled - assert(codeinst->min_world >= world && codeinst->inferred); - codeinst->max_world = ~(size_t)0; - if (jl_rettype_inferred(caller, world, ~(size_t)0) == jl_nothing) { - jl_mi_cache_insert(caller, codeinst); - } - } - else { - jl_code_instance_t *codeinst = caller->cache; - while (codeinst) { - if (remove_code_instance_from_validation(codeinst)) { // mark it as handled - assert(codeinst->min_world >= world && codeinst->inferred); - codeinst->max_world = ~(size_t)0; - } - codeinst = jl_atomic_load_relaxed(&codeinst->next); - } - } - } - - htable_free(&visited); - JL_GC_POP(); -} - -static void validate_new_code_instances(void) -{ - size_t world = jl_atomic_load_acquire(&jl_world_counter); - size_t i; - for (i = 0; i < new_code_instance_validate.size; i += 2) { - if (new_code_instance_validate.table[i+1] != HT_NOTFOUND) { - jl_code_instance_t *ci = (jl_code_instance_t*)new_code_instance_validate.table[i]; - JL_GC_PROMISE_ROOTED(ci); // TODO: this needs a root (or restructuring to avoid it) - assert(ci->min_world >= world && ci->inferred); - ci->max_world = ~(size_t)0; - jl_method_instance_t *caller = ci->def; - if (jl_rettype_inferred(caller, world, ~(size_t)0) == jl_nothing) { - jl_mi_cache_insert(caller, ci); - } - //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)caller); - //ios_puts("FREE\n", ios_stderr); - } - } -} - -static jl_value_t *read_verify_mod_list(ios_t *s, jl_array_t *mod_list) -{ - if (!jl_main_module->build_id) { - return jl_get_exceptionf(jl_errorexception_type, - "Main module uuid state is invalid for module deserialization."); - } - size_t i, l = jl_array_len(mod_list); - for (i = 0; ; i++) { - size_t len = read_int32(s); - if (len == 0 && i == l) - return NULL; // success - if (len == 0 || i == l) - return jl_get_exceptionf(jl_errorexception_type, "Wrong number of entries in module list."); - char *name = (char*)alloca(len + 1); - ios_readall(s, name, len); - name[len] = '\0'; - jl_uuid_t uuid; - uuid.hi = read_uint64(s); - uuid.lo = read_uint64(s); - uint64_t build_id = read_uint64(s); - jl_sym_t *sym = _jl_symbol(name, len); - jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(mod_list, i); - if (!m || !jl_is_module(m) || m->uuid.hi != uuid.hi || m->uuid.lo != uuid.lo || m->name != sym || m->build_id != build_id) { - return jl_get_exceptionf(jl_errorexception_type, - "Invalid input in module list: expected %s.", name); - } - } -} - -static int readstr_verify(ios_t *s, const char *str) -{ - size_t i, len = strlen(str); - for (i = 0; i < len; ++i) - if ((char)read_uint8(s) != str[i]) - return 0; - return 1; -} - -JL_DLLEXPORT int jl_read_verify_header(ios_t *s) -{ - uint16_t bom; - return (readstr_verify(s, JI_MAGIC) && - read_uint16(s) == JI_FORMAT_VERSION && - ios_read(s, (char *) &bom, 2) == 2 && bom == BOM && - read_uint8(s) == sizeof(void*) && - readstr_verify(s, JL_BUILD_UNAME) && !read_uint8(s) && - readstr_verify(s, JL_BUILD_ARCH) && !read_uint8(s) && - readstr_verify(s, JULIA_VERSION_STRING) && !read_uint8(s) && - readstr_verify(s, jl_git_branch()) && !read_uint8(s) && - readstr_verify(s, jl_git_commit()) && !read_uint8(s)); -} - -static void jl_finalize_serializer(jl_serializer_state *s) -{ - size_t i, l; - // save module initialization order - if (jl_module_init_order != NULL) { - l = jl_array_len(jl_module_init_order); - for (i = 0; i < l; i++) { - // verify that all these modules were saved - assert(ptrhash_get(&backref_table, jl_array_ptr_ref(jl_module_init_order, i)) != HT_NOTFOUND); - } - } - jl_serialize_value(s, jl_module_init_order); - - // record list of reinitialization functions - l = reinit_list.len; - for (i = 0; i < l; i += 2) { - write_int32(s->s, (int)((uintptr_t) reinit_list.items[i])); - write_int32(s->s, (int)((uintptr_t) reinit_list.items[i+1])); - } - write_int32(s->s, -1); -} - -static void jl_reinit_item(jl_value_t *v, int how, arraylist_t *tracee_list) -{ - JL_TRY { - switch (how) { - case 1: { // rehash IdDict - jl_array_t **a = (jl_array_t**)v; - // Assume *a don't need a write barrier - *a = jl_idtable_rehash(*a, jl_array_len(*a)); - jl_gc_wb(v, *a); - break; - } - case 2: { // reinsert module v into parent (const) - jl_module_t *mod = (jl_module_t*)v; - if (mod->parent == mod) // top level modules handled by loader - break; - jl_binding_t *b = jl_get_binding_wr(mod->parent, mod->name, 1); // this can throw - jl_declare_constant(b); // this can also throw - if (b->value != NULL) { - if (!jl_is_module(b->value)) { - jl_errorf("Invalid redefinition of constant %s.", - jl_symbol_name(mod->name)); // this also throws - } - if (jl_generating_output() && jl_options.incremental) { - jl_errorf("Cannot replace module %s during incremental precompile.", jl_symbol_name(mod->name)); - } - jl_printf(JL_STDERR, "WARNING: replacing module %s.\n", jl_symbol_name(mod->name)); - } - b->value = v; - jl_gc_wb_binding(b, v); - break; - } - case 3: { // rehash MethodTable - jl_methtable_t *mt = (jl_methtable_t*)v; - if (tracee_list) - arraylist_push(tracee_list, mt); - break; - } - default: - assert(0 && "corrupt deserialization state"); - abort(); - } - } - JL_CATCH { - jl_printf((JL_STREAM*)STDERR_FILENO, "WARNING: error while reinitializing value "); - jl_static_show((JL_STREAM*)STDERR_FILENO, v); - jl_printf((JL_STREAM*)STDERR_FILENO, ":\n"); - jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception()); - jl_printf((JL_STREAM*)STDERR_FILENO, "\n"); - jlbacktrace(); // written to STDERR_FILENO - } -} - -static jl_array_t *jl_finalize_deserializer(jl_serializer_state *s, arraylist_t *tracee_list) -{ - jl_array_t *init_order = (jl_array_t*)jl_deserialize_value(s, NULL); - - // run reinitialization functions - int pos = read_int32(s->s); - while (pos != -1) { - jl_reinit_item((jl_value_t*)backref_list.items[pos], read_int32(s->s), tracee_list); - pos = read_int32(s->s); - } - return init_order; -} - -JL_DLLEXPORT void jl_init_restored_modules(jl_array_t *init_order) -{ - int i, l = jl_array_len(init_order); - for (i = 0; i < l; i++) { - jl_value_t *mod = jl_array_ptr_ref(init_order, i); - if (!jl_generating_output() || jl_options.incremental) { - jl_module_run_initializer((jl_module_t*)mod); - } - else { - if (jl_module_init_order == NULL) - jl_module_init_order = jl_alloc_vec_any(0); - jl_array_ptr_1d_push(jl_module_init_order, mod); - } - } -} - - -// --- entry points --- - -// Register array of newly-inferred MethodInstances -// This gets called as the first step of Base.include_package_for_output -JL_DLLEXPORT void jl_set_newly_inferred(jl_value_t* _newly_inferred) -{ - assert(_newly_inferred == NULL || jl_is_array(_newly_inferred)); - newly_inferred = (jl_array_t*) _newly_inferred; -} - -JL_DLLEXPORT void jl_push_newly_inferred(jl_value_t* linfo) -{ - JL_LOCK(&newly_inferred_mutex); - size_t end = jl_array_len(newly_inferred); - jl_array_grow_end(newly_inferred, 1); - jl_arrayset(newly_inferred, linfo, end); - JL_UNLOCK(&newly_inferred_mutex); -} - -// Serialize the modules in `worklist` to file `fname` -JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist) -{ - JL_TIMING(SAVE_MODULE); - jl_task_t *ct = jl_current_task; - ios_t f; - if (ios_file(&f, fname, 1, 1, 1, 1) == NULL) { - jl_printf(JL_STDERR, "Cannot open cache file \"%s\" for writing.\n", fname); - return 1; - } - - jl_array_t *mod_array = NULL, *udeps = NULL; - jl_array_t *extext_methods = NULL, *mi_list = NULL; - jl_array_t *ext_targets = NULL, *edges = NULL; - JL_GC_PUSH7(&mod_array, &udeps, &extext_methods, &mi_list, &ext_targets, &edges, &edges_map); - - mod_array = jl_get_loaded_modules(); // __toplevel__ modules loaded in this session (from Base.loaded_modules_array) - assert(jl_precompile_toplevel_module == NULL); - jl_precompile_toplevel_module = (jl_module_t*)jl_array_ptr_ref(worklist, jl_array_len(worklist)-1); - - serializer_worklist = worklist; - write_header(&f); - // write description of contents (name, uuid, buildid) - write_work_list(&f); - // Determine unique (module, abspath, mtime) dependencies for the files defining modules in the worklist - // (see Base._require_dependencies). These get stored in `udeps` and written to the ji-file header. - // Also write Preferences. - int64_t srctextpos = write_dependency_list(&f, &udeps); // srctextpos: position of srctext entry in header index (update later) - // write description of requirements for loading (modules that must be pre-loaded if initialization is to succeed) - // this can return errors during deserialize, - // best to keep it early (before any actual initialization) - write_mod_list(&f, mod_array); - - arraylist_new(&reinit_list, 0); - htable_new(&backref_table, 5000); - htable_new(&external_mis, newly_inferred ? jl_array_len(newly_inferred) : 0); - ptrhash_put(&backref_table, jl_main_module, (char*)HT_NOTFOUND + 1); - backref_table_numel = 1; - jl_idtable_type = jl_base_module ? jl_get_global(jl_base_module, jl_symbol("IdDict")) : NULL; - jl_idtable_typename = jl_base_module ? ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_idtable_type))->name : NULL; - jl_bigint_type = jl_base_module ? jl_get_global(jl_base_module, jl_symbol("BigInt")) : NULL; - if (jl_bigint_type) { - gmp_limb_size = jl_unbox_long(jl_get_global((jl_module_t*)jl_get_global(jl_base_module, jl_symbol("GMP")), - jl_symbol("BITS_PER_LIMB"))) / 8; - } - - jl_gc_enable_finalizers(ct, 0); // make sure we don't run any Julia code concurrently after this point - - // Save the inferred code from newly inferred, external methods - mi_list = queue_external_mis(newly_inferred); - - edges_map = jl_alloc_vec_any(0); - extext_methods = jl_alloc_vec_any(0); // [method1, simplesig1, ...], worklist-owned "extending external" methods added to functions owned by modules outside the worklist - size_t i, len = jl_array_len(mod_array); - for (i = 0; i < len; i++) { - jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(mod_array, i); - assert(jl_is_module(m)); - if (m->parent == m) // some toplevel modules (really just Base) aren't actually - jl_collect_extext_methods_from_mod(extext_methods, m); - } - jl_collect_methtable_from_mod(extext_methods, jl_type_type_mt); - jl_collect_missing_backedges(jl_type_type_mt); - jl_collect_methtable_from_mod(extext_methods, jl_nonfunction_mt); - jl_collect_missing_backedges(jl_nonfunction_mt); - // jl_collect_extext_methods_from_mod and jl_collect_missing_backedges also accumulate data in edges_map. - // Process this to extract `edges` and `ext_targets`. - ext_targets = jl_alloc_vec_any(0); // [invokesig1, callee1, matches1, ...] non-worklist callees of worklist-owned methods - // ordinary dispatch: invokesig=NULL, callee is MethodInstance - // `invoke` dispatch: invokesig is signature, callee is MethodInstance - // abstract call: callee is signature - edges = jl_alloc_vec_any(0); // [caller1, ext_targets_indexes1, ...] for worklist-owned methods calling external methods - jl_collect_edges(edges, ext_targets); - - jl_serializer_state s = { - &f, - jl_current_task->ptls, - mod_array - }; - jl_serialize_value(&s, worklist); // serialize module-owned items (those accessible from the bindings table) - jl_serialize_value(&s, extext_methods); // serialize new worklist-owned methods for external functions - - // The next three allow us to restore code instances, if still valid - jl_serialize_value(&s, mi_list); - jl_serialize_value(&s, edges); - jl_serialize_value(&s, ext_targets); - jl_finalize_serializer(&s); - serializer_worklist = NULL; - - htable_free(&backref_table); - htable_free(&external_mis); - arraylist_free(&reinit_list); - - jl_gc_enable_finalizers(ct, 1); // make sure we don't run any Julia code concurrently before this point - - // Write the source-text for the dependent files - if (udeps) { - // Go back and update the source-text position to point to the current position - int64_t posfile = ios_pos(&f); - ios_seek(&f, srctextpos); - write_uint64(&f, posfile); - ios_seek_end(&f); - // Each source-text file is written as - // int32: length of abspath - // char*: abspath - // uint64: length of src text - // char*: src text - // At the end we write int32(0) as a terminal sentinel. - len = jl_array_len(udeps); - ios_t srctext; - for (i = 0; i < len; i++) { - jl_value_t *deptuple = jl_array_ptr_ref(udeps, i); - jl_value_t *depmod = jl_fieldref(deptuple, 0); // module - // Dependencies declared with `include_dependency` are excluded - // because these may not be Julia code (and could be huge) - if (depmod != (jl_value_t*)jl_main_module) { - jl_value_t *dep = jl_fieldref(deptuple, 1); // file abspath - const char *depstr = jl_string_data(dep); - if (!depstr[0]) - continue; - ios_t *srctp = ios_file(&srctext, depstr, 1, 0, 0, 0); - if (!srctp) { - jl_printf(JL_STDERR, "WARNING: could not cache source text for \"%s\".\n", - jl_string_data(dep)); - continue; - } - size_t slen = jl_string_len(dep); - write_int32(&f, slen); - ios_write(&f, depstr, slen); - posfile = ios_pos(&f); - write_uint64(&f, 0); // placeholder for length of this file in bytes - uint64_t filelen = (uint64_t) ios_copyall(&f, &srctext); - ios_close(&srctext); - ios_seek(&f, posfile); - write_uint64(&f, filelen); - ios_seek_end(&f); - } - } - } - write_int32(&f, 0); // mark the end of the source text - ios_close(&f); - JL_GC_POP(); - jl_precompile_toplevel_module = NULL; - - return 0; -} - -#ifndef JL_NDEBUG -// skip the performance optimizations of jl_types_equal and just use subtyping directly -// one of these types is invalid - that's why we're doing the recache type operation -static int jl_invalid_types_equal(jl_datatype_t *a, jl_datatype_t *b) -{ - return jl_subtype((jl_value_t*)a, (jl_value_t*)b) && jl_subtype((jl_value_t*)b, (jl_value_t*)a); -} -STATIC_INLINE jl_value_t *verify_type(jl_value_t *v) JL_NOTSAFEPOINT -{ - assert(v && jl_typeof(v) && jl_typeof(jl_typeof(v)) == (jl_value_t*)jl_datatype_type); - return v; -} -#endif - - -static jl_datatype_t *recache_datatype(jl_datatype_t *dt) JL_GC_DISABLED; - -static jl_value_t *recache_type(jl_value_t *p) JL_GC_DISABLED -{ - if (jl_is_datatype(p)) { - jl_datatype_t *pdt = (jl_datatype_t*)p; - if (ptrhash_get(&uniquing_table, p) != HT_NOTFOUND) { - p = (jl_value_t*)recache_datatype(pdt); - } - else { - jl_svec_t *tt = pdt->parameters; - // ensure all type parameters are recached - size_t i, l = jl_svec_len(tt); - for (i = 0; i < l; i++) - jl_svecset(tt, i, recache_type(jl_svecref(tt, i))); - ptrhash_put(&uniquing_table, p, p); // ensures this algorithm isn't too exponential - } - } - else if (jl_is_typevar(p)) { - jl_tvar_t *ptv = (jl_tvar_t*)p; - ptv->lb = recache_type(ptv->lb); - ptv->ub = recache_type(ptv->ub); - } - else if (jl_is_uniontype(p)) { - jl_uniontype_t *pu = (jl_uniontype_t*)p; - pu->a = recache_type(pu->a); - pu->b = recache_type(pu->b); - } - else if (jl_is_unionall(p)) { - jl_unionall_t *pa = (jl_unionall_t*)p; - pa->var = (jl_tvar_t*)recache_type((jl_value_t*)pa->var); - pa->body = recache_type(pa->body); - } - else { - jl_datatype_t *pt = (jl_datatype_t*)jl_typeof(p); - jl_datatype_t *cachep = recache_datatype(pt); - if (cachep->instance) - p = cachep->instance; - else if (pt != cachep) - jl_set_typeof(p, cachep); - } - return p; -} - -// Extract pre-existing datatypes from cache, and insert new types into cache -// insertions also update uniquing_table -static jl_datatype_t *recache_datatype(jl_datatype_t *dt) JL_GC_DISABLED -{ - jl_datatype_t *t; // the type after unique'ing - assert(verify_type((jl_value_t*)dt)); - t = (jl_datatype_t*)ptrhash_get(&uniquing_table, dt); - if (t == HT_NOTFOUND) - return dt; - if (t != NULL) - return t; - - jl_svec_t *tt = dt->parameters; - // recache all type parameters - size_t i, l = jl_svec_len(tt); - for (i = 0; i < l; i++) - jl_svecset(tt, i, recache_type(jl_svecref(tt, i))); - - // then recache the type itself - if (jl_svec_len(tt) == 0) { // jl_cache_type doesn't work if length(parameters) == 0 - t = dt; - } - else { - t = jl_lookup_cache_type_(dt); - if (t == NULL) { - jl_cache_type_(dt); - t = dt; - } - assert(t->hash == dt->hash); - assert(jl_invalid_types_equal(t, dt)); - } - ptrhash_put(&uniquing_table, dt, t); - return t; -} - -// Recache everything from flagref_list except methods and method instances -// Cleans out any handled items so that anything left in flagref_list still needs future processing -static void jl_recache_types(void) JL_GC_DISABLED -{ - size_t i; - // first rewrite all the unique'd objects - for (i = 0; i < flagref_list.len; i += 2) { - jl_value_t **loc = (jl_value_t**)flagref_list.items[i + 0]; - int offs = (int)(intptr_t)flagref_list.items[i + 1]; - jl_value_t *o = loc ? *loc : (jl_value_t*)backref_list.items[offs]; - if (!jl_is_method(o) && !jl_is_method_instance(o)) { - jl_datatype_t *dt; - jl_value_t *v; - if (jl_is_datatype(o)) { - dt = (jl_datatype_t*)o; - v = dt->instance; - } - else { - dt = (jl_datatype_t*)jl_typeof(o); - v = o; - } - jl_datatype_t *t = recache_datatype(dt); // get or create cached type (also updates uniquing_table) - if ((jl_value_t*)dt == o && t != dt) { - assert(!type_in_worklist(dt)); - if (loc) - *loc = (jl_value_t*)t; - if (offs > 0) - backref_list.items[offs] = t; - } - if (v == o && t->instance != v) { - assert(t->instance); - assert(loc); - *loc = t->instance; - if (offs > 0) - backref_list.items[offs] = t->instance; - } - } - } - // invalidate the old datatypes to help catch errors - for (i = 0; i < uniquing_table.size; i += 2) { - jl_datatype_t *o = (jl_datatype_t*)uniquing_table.table[i]; // deserialized ref - jl_datatype_t *t = (jl_datatype_t*)uniquing_table.table[i + 1]; // the real type - if (o != t) { - assert(t != NULL && jl_is_datatype(o)); - if (t->instance != o->instance) - jl_set_typeof(o->instance, (void*)(intptr_t)0x20); - jl_set_typeof(o, (void*)(intptr_t)0x10); - } - } - // then do a cleanup pass to drop these from future iterations of flagref_list - i = 0; - while (i < flagref_list.len) { - jl_value_t **loc = (jl_value_t**)flagref_list.items[i + 0]; - int offs = (int)(intptr_t)flagref_list.items[i + 1]; - jl_value_t *o = loc ? *loc : (jl_value_t*)backref_list.items[offs]; - if (jl_is_method(o) || jl_is_method_instance(o)) { - i += 2; - } - else { - // delete this item from the flagref list, so it won't be re-encountered later - flagref_list.len -= 2; - if (i >= flagref_list.len) - break; - flagref_list.items[i + 0] = flagref_list.items[flagref_list.len + 0]; // move end-of-list here (executes a `reverse()`) - flagref_list.items[i + 1] = flagref_list.items[flagref_list.len + 1]; - } - } -} - -// look up a method from a previously deserialized dependent module -static jl_method_t *jl_lookup_method(jl_methtable_t *mt, jl_datatype_t *sig, size_t world) -{ - if (world < jl_main_module->primary_world) - world = jl_main_module->primary_world; - struct jl_typemap_assoc search = {(jl_value_t*)sig, world, NULL, 0, ~(size_t)0}; - jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(mt->defs, &search, /*offs*/0, /*subtype*/0); - return (jl_method_t*)entry->func.value; -} - -static jl_method_t *jl_recache_method(jl_method_t *m) -{ - assert(!m->is_for_opaque_closure); - assert(jl_is_method(m)); - jl_datatype_t *sig = (jl_datatype_t*)m->sig; - jl_methtable_t *mt = jl_method_get_table(m); - assert((jl_value_t*)mt != jl_nothing); - jl_set_typeof(m, (void*)(intptr_t)0x30); // invalidate the old value to help catch errors - return jl_lookup_method(mt, sig, m->module->primary_world); -} - -static jl_value_t *jl_recache_other_(jl_value_t *o); - -static jl_method_instance_t *jl_recache_method_instance(jl_method_instance_t *mi) -{ - jl_method_t *m = mi->def.method; - m = (jl_method_t*)jl_recache_other_((jl_value_t*)m); - assert(jl_is_method(m)); - jl_datatype_t *argtypes = (jl_datatype_t*)mi->specTypes; - jl_set_typeof(mi, (void*)(intptr_t)0x40); // invalidate the old value to help catch errors - jl_svec_t *env = jl_emptysvec; - jl_value_t *ti = jl_type_intersection_env((jl_value_t*)argtypes, (jl_value_t*)m->sig, &env); - //assert(ti != jl_bottom_type); (void)ti; - if (ti == jl_bottom_type) - env = jl_emptysvec; // the intersection may fail now if the type system had made an incorrect subtype env in the past - jl_method_instance_t *_new = jl_specializations_get_linfo(m, (jl_value_t*)argtypes, env); - return _new; -} - -static jl_value_t *jl_recache_other_(jl_value_t *o) -{ - jl_value_t *newo = (jl_value_t*)ptrhash_get(&uniquing_table, o); - if (newo != HT_NOTFOUND) - return newo; - if (jl_is_method(o)) { - // lookup the real Method based on the placeholder sig - newo = (jl_value_t*)jl_recache_method((jl_method_t*)o); - ptrhash_put(&uniquing_table, newo, newo); - } - else if (jl_is_method_instance(o)) { - // lookup the real MethodInstance based on the placeholder specTypes - newo = (jl_value_t*)jl_recache_method_instance((jl_method_instance_t*)o); - } - else { - abort(); - } - ptrhash_put(&uniquing_table, o, newo); - return newo; -} - -static void jl_recache_other(void) -{ - size_t i = 0; - while (i < flagref_list.len) { - jl_value_t **loc = (jl_value_t**)flagref_list.items[i + 0]; - int offs = (int)(intptr_t)flagref_list.items[i + 1]; - jl_value_t *o = loc ? *loc : (jl_value_t*)backref_list.items[offs]; - i += 2; - jl_value_t *newo = jl_recache_other_(o); - if (loc) - *loc = newo; - if (offs > 0) - backref_list.items[offs] = newo; - } - flagref_list.len = 0; -} - -// Wait to copy roots until recaching is done -// This is because recaching requires that all pointers to methods and methodinstances -// stay at their source location as recorded by flagref_list. Once recaching is complete, -// they can be safely copied over. -static void jl_copy_roots(void) -{ - size_t i, j, l; - for (i = 0; i < queued_method_roots.size; i+=2) { - jl_method_t *m = (jl_method_t*)queued_method_roots.table[i]; - m = (jl_method_t*)ptrhash_get(&uniquing_table, m); - jl_svec_t *keyroots = (jl_svec_t*)queued_method_roots.table[i+1]; - if (keyroots != HT_NOTFOUND) { - uint64_t key = (uint64_t)(uintptr_t)jl_svec_ref(keyroots, 0) | ((uint64_t)(uintptr_t)jl_svec_ref(keyroots, 1) << 32); - jl_array_t *roots = (jl_array_t*)jl_svec_ref(keyroots, 2); - assert(jl_is_array(roots)); - l = jl_array_len(roots); - for (j = 0; j < l; j++) { - jl_value_t *r = jl_array_ptr_ref(roots, j); - jl_value_t *newr = (jl_value_t*)ptrhash_get(&uniquing_table, r); - if (newr != HT_NOTFOUND) { - jl_array_ptr_set(roots, j, newr); - } - } - jl_append_method_roots(m, key, roots); - } - } -} - -static int trace_method(jl_typemap_entry_t *entry, void *closure) -{ - jl_call_tracer(jl_newmeth_tracer, (jl_value_t*)entry->func.method); - return 1; -} - -// Restore module(s) from a cache file f -static jl_value_t *_jl_restore_incremental(ios_t *f, jl_array_t *mod_array) -{ - JL_TIMING(LOAD_MODULE); - jl_task_t *ct = jl_current_task; - if (ios_eof(f) || !jl_read_verify_header(f)) { - ios_close(f); - return jl_get_exceptionf(jl_errorexception_type, - "Precompile file header verification checks failed."); - } - { // skip past the mod list - size_t len; - while ((len = read_int32(f))) - ios_skip(f, len + 3 * sizeof(uint64_t)); - } - { // skip past the dependency list - size_t deplen = read_uint64(f); - ios_skip(f, deplen); - } - - jl_bigint_type = jl_base_module ? jl_get_global(jl_base_module, jl_symbol("BigInt")) : NULL; - if (jl_bigint_type) { - gmp_limb_size = jl_unbox_long(jl_get_global((jl_module_t*)jl_get_global(jl_base_module, jl_symbol("GMP")), - jl_symbol("BITS_PER_LIMB"))) / 8; - } - - // verify that the system state is valid - jl_value_t *verify_fail = read_verify_mod_list(f, mod_array); - if (verify_fail) { - ios_close(f); - return verify_fail; - } - - // prepare to deserialize - int en = jl_gc_enable(0); - jl_gc_enable_finalizers(ct, 0); - jl_atomic_fetch_add(&jl_world_counter, 1); // reserve a world age for the deserialization - - arraylist_new(&backref_list, 4000); - arraylist_push(&backref_list, jl_main_module); - arraylist_new(&flagref_list, 0); - htable_new(&queued_method_roots, 0); - htable_new(&new_code_instance_validate, 0); - arraylist_new(&ccallable_list, 0); - htable_new(&uniquing_table, 0); - - jl_serializer_state s = { - f, - ct->ptls, - mod_array - }; - jl_array_t *restored = (jl_array_t*)jl_deserialize_value(&s, (jl_value_t**)&restored); - serializer_worklist = restored; - assert(jl_typeis((jl_value_t*)restored, jl_array_any_type)); - - // See explanation in jl_save_incremental for variables of the same names - jl_value_t *extext_methods = jl_deserialize_value(&s, &extext_methods); - jl_value_t *mi_list = jl_deserialize_value(&s, &mi_list); // reload MIs stored by queue_external_mis - jl_value_t *edges = jl_deserialize_value(&s, &edges); - jl_value_t *ext_targets = jl_deserialize_value(&s, &ext_targets); - - arraylist_t *tracee_list = NULL; - if (jl_newmeth_tracer) // debugging - tracee_list = arraylist_new((arraylist_t*)malloc_s(sizeof(arraylist_t)), 0); - - // at this point, the AST is fully reconstructed, but still completely disconnected - // now all of the interconnects will be created - jl_recache_types(); // make all of the types identities correct - jl_insert_methods((jl_array_t*)extext_methods); // hook up extension methods for external generic functions (needs to be after recache types) - jl_recache_other(); // make all of the other objects identities correct (needs to be after insert methods) - jl_copy_roots(); // copying new roots of external methods (must wait until recaching is complete) - htable_free(&uniquing_table); - jl_array_t *init_order = jl_finalize_deserializer(&s, tracee_list); // done with f and s (needs to be after recache) - if (init_order == NULL) - init_order = (jl_array_t*)jl_an_empty_vec_any; - assert(jl_typeis((jl_value_t*)init_order, jl_array_any_type)); - - JL_GC_PUSH5(&init_order, &restored, &edges, &ext_targets, &mi_list); - jl_gc_enable(en); // subtyping can allocate a lot, not valid before recache-other - - jl_insert_backedges((jl_array_t*)edges, (jl_array_t*)ext_targets, (jl_array_t*)mi_list); // restore external backedges (needs to be last) - // check new CodeInstances and validate any that lack external backedges - validate_new_code_instances(); - - serializer_worklist = NULL; - htable_free(&new_code_instance_validate); - arraylist_free(&flagref_list); - arraylist_free(&backref_list); - htable_free(&queued_method_roots); - ios_close(f); - - jl_gc_enable_finalizers(ct, 1); // make sure we don't run any Julia code concurrently before this point - if (tracee_list) { - jl_methtable_t *mt; - while ((mt = (jl_methtable_t*)arraylist_pop(tracee_list)) != NULL) { - JL_GC_PROMISE_ROOTED(mt); - jl_typemap_visitor(mt->defs, trace_method, NULL); - } - arraylist_free(tracee_list); - free(tracee_list); - } - for (int i = 0; i < ccallable_list.len; i++) { - jl_svec_t *item = (jl_svec_t*)ccallable_list.items[i]; - JL_GC_PROMISE_ROOTED(item); - int success = jl_compile_extern_c(NULL, NULL, NULL, jl_svecref(item, 0), jl_svecref(item, 1)); - if (!success) - jl_safe_printf("@ccallable was already defined for this method name\n"); - } - arraylist_free(&ccallable_list); - jl_value_t *ret = (jl_value_t*)jl_svec(2, restored, init_order); - JL_GC_POP(); - - return (jl_value_t*)ret; -} - -JL_DLLEXPORT jl_value_t *jl_restore_incremental_from_buf(const char *buf, size_t sz, jl_array_t *mod_array) -{ - ios_t f; - ios_static_buffer(&f, (char*)buf, sz); - return _jl_restore_incremental(&f, mod_array); -} - -JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *mod_array) -{ - ios_t f; - if (ios_file(&f, fname, 1, 0, 0, 0) == NULL) { - return jl_get_exceptionf(jl_errorexception_type, - "Cache file \"%s\" not found.\n", fname); - } - return _jl_restore_incremental(&f, mod_array); -} - -// --- init --- - -void jl_init_serializer(void) -{ - jl_task_t *ct = jl_current_task; - htable_new(&ser_tag, 0); - htable_new(&common_symbol_tag, 0); - htable_new(&backref_table, 0); - - void *vals[] = { jl_emptysvec, jl_emptytuple, jl_false, jl_true, jl_nothing, jl_any_type, - jl_call_sym, jl_invoke_sym, jl_invoke_modify_sym, jl_goto_ifnot_sym, jl_return_sym, jl_symbol("tuple"), - jl_an_empty_string, jl_an_empty_vec_any, - - // empirical list of very common symbols - #include "common_symbols1.inc" - - jl_box_int32(0), jl_box_int32(1), jl_box_int32(2), - jl_box_int32(3), jl_box_int32(4), jl_box_int32(5), - jl_box_int32(6), jl_box_int32(7), jl_box_int32(8), - jl_box_int32(9), jl_box_int32(10), jl_box_int32(11), - jl_box_int32(12), jl_box_int32(13), jl_box_int32(14), - jl_box_int32(15), jl_box_int32(16), jl_box_int32(17), - jl_box_int32(18), jl_box_int32(19), jl_box_int32(20), - - jl_box_int64(0), jl_box_int64(1), jl_box_int64(2), - jl_box_int64(3), jl_box_int64(4), jl_box_int64(5), - jl_box_int64(6), jl_box_int64(7), jl_box_int64(8), - jl_box_int64(9), jl_box_int64(10), jl_box_int64(11), - jl_box_int64(12), jl_box_int64(13), jl_box_int64(14), - jl_box_int64(15), jl_box_int64(16), jl_box_int64(17), - jl_box_int64(18), jl_box_int64(19), jl_box_int64(20), - - jl_bool_type, jl_linenumbernode_type, jl_pinode_type, - jl_upsilonnode_type, jl_type_type, jl_bottom_type, jl_ref_type, - jl_pointer_type, jl_abstractarray_type, jl_nothing_type, - jl_vararg_type, - jl_densearray_type, jl_function_type, jl_typename_type, - jl_builtin_type, jl_task_type, jl_uniontype_type, - jl_array_any_type, jl_intrinsic_type, - jl_abstractslot_type, jl_methtable_type, jl_typemap_level_type, - jl_voidpointer_type, jl_newvarnode_type, jl_abstractstring_type, - jl_array_symbol_type, jl_anytuple_type, jl_tparam0(jl_anytuple_type), - jl_emptytuple_type, jl_array_uint8_type, jl_code_info_type, - jl_typeofbottom_type, jl_typeofbottom_type->super, - jl_namedtuple_type, jl_array_int32_type, - jl_typedslot_type, jl_uint32_type, jl_uint64_type, - jl_type_type_mt, jl_nonfunction_mt, - jl_opaque_closure_type, - - ct->ptls->root_task, - - NULL }; - - // more common symbols, less common than those above. will get 2-byte encodings. - void *common_symbols[] = { - #include "common_symbols2.inc" - NULL - }; - - deser_tag[TAG_SYMBOL] = (jl_value_t*)jl_symbol_type; - deser_tag[TAG_SSAVALUE] = (jl_value_t*)jl_ssavalue_type; - deser_tag[TAG_DATATYPE] = (jl_value_t*)jl_datatype_type; - deser_tag[TAG_SLOTNUMBER] = (jl_value_t*)jl_slotnumber_type; - deser_tag[TAG_SVEC] = (jl_value_t*)jl_simplevector_type; - deser_tag[TAG_ARRAY] = (jl_value_t*)jl_array_type; - deser_tag[TAG_EXPR] = (jl_value_t*)jl_expr_type; - deser_tag[TAG_PHINODE] = (jl_value_t*)jl_phinode_type; - deser_tag[TAG_PHICNODE] = (jl_value_t*)jl_phicnode_type; - deser_tag[TAG_STRING] = (jl_value_t*)jl_string_type; - deser_tag[TAG_MODULE] = (jl_value_t*)jl_module_type; - deser_tag[TAG_TVAR] = (jl_value_t*)jl_tvar_type; - deser_tag[TAG_METHOD_INSTANCE] = (jl_value_t*)jl_method_instance_type; - deser_tag[TAG_METHOD] = (jl_value_t*)jl_method_type; - deser_tag[TAG_CODE_INSTANCE] = (jl_value_t*)jl_code_instance_type; - deser_tag[TAG_GLOBALREF] = (jl_value_t*)jl_globalref_type; - deser_tag[TAG_INT32] = (jl_value_t*)jl_int32_type; - deser_tag[TAG_INT64] = (jl_value_t*)jl_int64_type; - deser_tag[TAG_UINT8] = (jl_value_t*)jl_uint8_type; - deser_tag[TAG_LINEINFO] = (jl_value_t*)jl_lineinfonode_type; - deser_tag[TAG_UNIONALL] = (jl_value_t*)jl_unionall_type; - deser_tag[TAG_GOTONODE] = (jl_value_t*)jl_gotonode_type; - deser_tag[TAG_QUOTENODE] = (jl_value_t*)jl_quotenode_type; - deser_tag[TAG_GOTOIFNOT] = (jl_value_t*)jl_gotoifnot_type; - deser_tag[TAG_RETURNNODE] = (jl_value_t*)jl_returnnode_type; - deser_tag[TAG_ARGUMENT] = (jl_value_t*)jl_argument_type; - - intptr_t i = 0; - while (vals[i] != NULL) { - deser_tag[LAST_TAG+1+i] = (jl_value_t*)vals[i]; - i += 1; - } - assert(LAST_TAG+1+i < 256); - - for (i = 2; i < 256; i++) { - if (deser_tag[i]) - ptrhash_put(&ser_tag, deser_tag[i], (void*)i); - } - - i = 2; - while (common_symbols[i-2] != NULL) { - ptrhash_put(&common_symbol_tag, common_symbols[i-2], (void*)i); - deser_symbols[i] = (jl_value_t*)common_symbols[i-2]; - i += 1; - } - assert(i <= 256); -} - -#ifdef __cplusplus -} -#endif diff --git a/src/gc.c b/src/gc.c index 3f80e83d28c1b..17ad622900a43 100644 --- a/src/gc.c +++ b/src/gc.c @@ -173,6 +173,11 @@ jl_gc_num_t gc_num = {0}; static size_t last_long_collect_interval; int gc_n_threads; jl_ptls_t* gc_all_tls_states; +const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 +JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) +{ + return jl_buff_tag; +} pagetable_t memory_map; @@ -1759,14 +1764,6 @@ JL_DLLEXPORT void jl_gc_queue_binding(jl_binding_t *bnd) static void *volatile gc_findval; // for usage from gdb, for finding the gc-root for a value #endif -static void *sysimg_base; -static void *sysimg_end; -void jl_gc_set_permalloc_region(void *start, void *end) -{ - sysimg_base = start; - sysimg_end = end; -} - // Handle the case where the stack is only partially copied. STATIC_INLINE uintptr_t gc_get_stack_addr(void *_addr, uintptr_t offset, @@ -2551,7 +2548,7 @@ module_binding: { jl_binding_t *b = *begin; if (b == (jl_binding_t*)HT_NOTFOUND) continue; - if ((void*)b >= sysimg_base && (void*)b < sysimg_end) { + if (jl_object_in_image((jl_value_t*)b)) { jl_taggedvalue_t *buf = jl_astaggedvalue(b); uintptr_t tag = buf->header; uint8_t bits; @@ -2676,7 +2673,7 @@ mark: { jl_datatype_t *vt = (jl_datatype_t*)tag; int foreign_alloc = 0; int update_meta = __likely(!meta_updated && !gc_verifying); - if (update_meta && (void*)o >= sysimg_base && (void*)o < sysimg_end) { + if (update_meta && jl_object_in_image(new_obj)) { foreign_alloc = 1; update_meta = 0; } @@ -3025,6 +3022,8 @@ static void mark_roots(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp) } if (_jl_debug_method_invalidation != NULL) gc_mark_queue_obj(gc_cache, sp, _jl_debug_method_invalidation); + if (jl_build_ids != NULL) + gc_mark_queue_obj(gc_cache, sp, jl_build_ids); // constants gc_mark_queue_obj(gc_cache, sp, jl_emptytuple_type); @@ -4085,8 +4084,6 @@ JL_DLLEXPORT jl_value_t *jl_gc_alloc_3w(void) JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void) { - static_assert(jl_buff_tag % GC_PAGE_SZ == 0, - "jl_buff_tag must be a multiple of GC_PAGE_SZ"); if (jl_is_initialized()) { int result = jl_atomic_fetch_or(&support_conservative_marking, 1); if (!result) { @@ -4193,8 +4190,8 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) valid_object: // We have to treat objects with type `jl_buff_tag` differently, // as they must not be passed to the usual marking functions. - // Note that jl_buff_tag is a multiple of GC_PAGE_SZ, thus it - // cannot be a type reference. + // Note that jl_buff_tag is real pointer into libjulia, + // thus it cannot be a type reference. if ((cell->header & ~(uintptr_t) 3) == jl_buff_tag) return NULL; return jl_valueof(cell); diff --git a/src/gf.c b/src/gf.c index 0e98f2a140d4a..d9bb6994e8ea7 100644 --- a/src/gf.c +++ b/src/gf.c @@ -459,7 +459,7 @@ static int get_method_unspec_list(jl_typemap_entry_t *def, void *closure) return 1; } -static int foreach_mtable_in_module( +int foreach_mtable_in_module( jl_module_t *m, int (*visit)(jl_methtable_t *mt, void *env), void *env) diff --git a/src/init.c b/src/init.c index 926aa05062926..89f4153ff1538 100644 --- a/src/init.c +++ b/src/init.c @@ -783,6 +783,10 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel) jl_install_default_signal_handlers(); jl_gc_init(); + + arraylist_new(&jl_linkage_blobs, 0); + arraylist_new(&jl_image_relocs, 0); + jl_ptls_t ptls = jl_init_threadtls(0); #pragma GCC diagnostic push #if defined(_COMPILER_GCC_) && __GNUC__ >= 12 @@ -808,7 +812,7 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_ jl_restore_system_image(jl_options.image_file); } else { jl_init_types(); - jl_global_roots_table = jl_alloc_vec_any(16); + jl_global_roots_table = jl_alloc_vec_any(0); jl_init_codegen(); } diff --git a/src/ircode.c b/src/ircode.c index 1c857051217d0..9f71d8e8dd28c 100644 --- a/src/ircode.c +++ b/src/ircode.c @@ -29,6 +29,34 @@ typedef struct { uint8_t relocatability; } jl_ircode_state; +// type => tag hash for a few core types (e.g., Expr, PhiNode, etc) +static htable_t ser_tag; +// tag => type mapping, the reverse of ser_tag +static jl_value_t *deser_tag[256]; +// hash of some common symbols, encoded as CommonSym_tag plus 1 byte +static htable_t common_symbol_tag; +static jl_value_t *deser_symbols[256]; + +void *jl_lookup_ser_tag(jl_value_t *v) +{ + return ptrhash_get(&ser_tag, v); +} + +void *jl_lookup_common_symbol(jl_value_t *v) +{ + return ptrhash_get(&common_symbol_tag, v); +} + +jl_value_t *jl_deser_tag(uint8_t tag) +{ + return deser_tag[tag]; +} + +jl_value_t *jl_deser_symbol(uint8_t tag) +{ + return deser_symbols[tag]; +} + // --- encoding --- #define jl_encode_value(s, v) jl_encode_value_((s), (jl_value_t*)(v), 0) @@ -1020,6 +1048,110 @@ JL_DLLEXPORT jl_value_t *jl_uncompress_argname_n(jl_value_t *syms, size_t i) return jl_nothing; } +void jl_init_serializer(void) +{ + jl_task_t *ct = jl_current_task; + htable_new(&ser_tag, 0); + htable_new(&common_symbol_tag, 0); + + void *vals[] = { jl_emptysvec, jl_emptytuple, jl_false, jl_true, jl_nothing, jl_any_type, + jl_call_sym, jl_invoke_sym, jl_invoke_modify_sym, jl_goto_ifnot_sym, jl_return_sym, jl_symbol("tuple"), + jl_an_empty_string, jl_an_empty_vec_any, + + // empirical list of very common symbols + #include "common_symbols1.inc" + + jl_box_int32(0), jl_box_int32(1), jl_box_int32(2), + jl_box_int32(3), jl_box_int32(4), jl_box_int32(5), + jl_box_int32(6), jl_box_int32(7), jl_box_int32(8), + jl_box_int32(9), jl_box_int32(10), jl_box_int32(11), + jl_box_int32(12), jl_box_int32(13), jl_box_int32(14), + jl_box_int32(15), jl_box_int32(16), jl_box_int32(17), + jl_box_int32(18), jl_box_int32(19), jl_box_int32(20), + + jl_box_int64(0), jl_box_int64(1), jl_box_int64(2), + jl_box_int64(3), jl_box_int64(4), jl_box_int64(5), + jl_box_int64(6), jl_box_int64(7), jl_box_int64(8), + jl_box_int64(9), jl_box_int64(10), jl_box_int64(11), + jl_box_int64(12), jl_box_int64(13), jl_box_int64(14), + jl_box_int64(15), jl_box_int64(16), jl_box_int64(17), + jl_box_int64(18), jl_box_int64(19), jl_box_int64(20), + + jl_bool_type, jl_linenumbernode_type, jl_pinode_type, + jl_upsilonnode_type, jl_type_type, jl_bottom_type, jl_ref_type, + jl_pointer_type, jl_abstractarray_type, jl_nothing_type, + jl_vararg_type, + jl_densearray_type, jl_function_type, jl_typename_type, + jl_builtin_type, jl_task_type, jl_uniontype_type, + jl_array_any_type, jl_intrinsic_type, + jl_abstractslot_type, jl_methtable_type, jl_typemap_level_type, + jl_voidpointer_type, jl_newvarnode_type, jl_abstractstring_type, + jl_array_symbol_type, jl_anytuple_type, jl_tparam0(jl_anytuple_type), + jl_emptytuple_type, jl_array_uint8_type, jl_code_info_type, + jl_typeofbottom_type, jl_typeofbottom_type->super, + jl_namedtuple_type, jl_array_int32_type, + jl_typedslot_type, jl_uint32_type, jl_uint64_type, + jl_type_type_mt, jl_nonfunction_mt, + jl_opaque_closure_type, + + ct->ptls->root_task, + + NULL }; + + // more common symbols, less common than those above. will get 2-byte encodings. + void *common_symbols[] = { + #include "common_symbols2.inc" + NULL + }; + + deser_tag[TAG_SYMBOL] = (jl_value_t*)jl_symbol_type; + deser_tag[TAG_SSAVALUE] = (jl_value_t*)jl_ssavalue_type; + deser_tag[TAG_DATATYPE] = (jl_value_t*)jl_datatype_type; + deser_tag[TAG_SLOTNUMBER] = (jl_value_t*)jl_slotnumber_type; + deser_tag[TAG_SVEC] = (jl_value_t*)jl_simplevector_type; + deser_tag[TAG_ARRAY] = (jl_value_t*)jl_array_type; + deser_tag[TAG_EXPR] = (jl_value_t*)jl_expr_type; + deser_tag[TAG_PHINODE] = (jl_value_t*)jl_phinode_type; + deser_tag[TAG_PHICNODE] = (jl_value_t*)jl_phicnode_type; + deser_tag[TAG_STRING] = (jl_value_t*)jl_string_type; + deser_tag[TAG_MODULE] = (jl_value_t*)jl_module_type; + deser_tag[TAG_TVAR] = (jl_value_t*)jl_tvar_type; + deser_tag[TAG_METHOD_INSTANCE] = (jl_value_t*)jl_method_instance_type; + deser_tag[TAG_METHOD] = (jl_value_t*)jl_method_type; + deser_tag[TAG_CODE_INSTANCE] = (jl_value_t*)jl_code_instance_type; + deser_tag[TAG_GLOBALREF] = (jl_value_t*)jl_globalref_type; + deser_tag[TAG_INT32] = (jl_value_t*)jl_int32_type; + deser_tag[TAG_INT64] = (jl_value_t*)jl_int64_type; + deser_tag[TAG_UINT8] = (jl_value_t*)jl_uint8_type; + deser_tag[TAG_LINEINFO] = (jl_value_t*)jl_lineinfonode_type; + deser_tag[TAG_UNIONALL] = (jl_value_t*)jl_unionall_type; + deser_tag[TAG_GOTONODE] = (jl_value_t*)jl_gotonode_type; + deser_tag[TAG_QUOTENODE] = (jl_value_t*)jl_quotenode_type; + deser_tag[TAG_GOTOIFNOT] = (jl_value_t*)jl_gotoifnot_type; + deser_tag[TAG_RETURNNODE] = (jl_value_t*)jl_returnnode_type; + deser_tag[TAG_ARGUMENT] = (jl_value_t*)jl_argument_type; + + intptr_t i = 0; + while (vals[i] != NULL) { + deser_tag[LAST_TAG+1+i] = (jl_value_t*)vals[i]; + i += 1; + } + assert(LAST_TAG+1+i < 256); + + for (i = 2; i < 256; i++) { + if (deser_tag[i]) + ptrhash_put(&ser_tag, deser_tag[i], (void*)i); + } + + i = 2; + while (common_symbols[i-2] != NULL) { + ptrhash_put(&common_symbol_tag, common_symbols[i-2], (void*)i); + deser_symbols[i] = (jl_value_t*)common_symbols[i-2]; + i += 1; + } + assert(i <= 256); +} + #ifdef __cplusplus } #endif diff --git a/src/jitlayers.h b/src/jitlayers.h index ba38abff0d6f4..77ac5d64bb46d 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -220,7 +220,6 @@ jl_llvm_functions_t jl_emit_codeinst( enum CompilationPolicy { Default = 0, Extern = 1, - ImagingMode = 2 }; typedef std::map> jl_workqueue_t; diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index 80b56f735b68b..67392b106cc66 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -399,7 +399,7 @@ XX(jl_resolve_globals_in_ir) \ XX(jl_restore_excstack) \ XX(jl_restore_incremental) \ - XX(jl_restore_incremental_from_buf) \ + XX(jl_restore_package_image_from_file) \ XX(jl_restore_system_image) \ XX(jl_restore_system_image_data) \ XX(jl_rethrow) \ @@ -407,8 +407,6 @@ XX(jl_rettype_inferred) \ XX(jl_running_on_valgrind) \ XX(jl_safe_printf) \ - XX(jl_save_incremental) \ - XX(jl_save_system_image) \ XX(jl_SC_CLK_TCK) \ XX(jl_set_ARGS) \ XX(jl_set_const) \ @@ -519,6 +517,7 @@ XX(jl_vexceptionf) \ XX(jl_vprintf) \ XX(jl_wakeup_thread) \ + XX(jl_write_compiler_output) \ XX(jl_yield) \ #define JL_RUNTIME_EXPORTED_FUNCS_WIN(XX) \ @@ -534,7 +533,7 @@ YY(jl_get_llvm_module) \ YY(jl_get_LLVM_VERSION) \ YY(jl_dump_native) \ - YY(jl_get_llvm_gv) \ + YY(jl_get_llvm_gvs) \ YY(jl_dump_function_asm) \ YY(jl_LLVMCreateDisasm) \ YY(jl_LLVMDisasmInstruction) \ diff --git a/src/julia.expmap b/src/julia.expmap index 41299aa808572..35cc5eac48b6a 100644 --- a/src/julia.expmap +++ b/src/julia.expmap @@ -5,6 +5,7 @@ asprintf; bitvector_*; ios_*; + arraylist_grow; small_arraylist_grow; jl_*; ijl_*; diff --git a/src/julia.h b/src/julia.h index cd2e1e0480e2d..ee4ca50356756 100644 --- a/src/julia.h +++ b/src/julia.h @@ -317,7 +317,7 @@ typedef struct _jl_method_t { jl_array_t *roots; // pointers in generated code (shared to reduce memory), or null // Identify roots by module-of-origin. We only track the module for roots added during incremental compilation. // May be NULL if no external roots have been added, otherwise it's a Vector{UInt64} - jl_array_t *root_blocks; // RLE (build_id, offset) pairs (even/odd indexing) + jl_array_t *root_blocks; // RLE (build_id.lo, offset) pairs (even/odd indexing) int32_t nroots_sysimg; // # of roots stored in the system image jl_svec_t *ccallable; // svec(rettype, sig) if a ccallable entry point is requested for this @@ -593,7 +593,7 @@ typedef struct _jl_module_t { // hidden fields: htable_t bindings; arraylist_t usings; // modules with all bindings potentially imported - uint64_t build_id; + jl_uuid_t build_id; jl_uuid_t uuid; size_t primary_world; _Atomic(uint32_t) counter; @@ -842,6 +842,7 @@ extern void JL_GC_PUSH3(void *, void *, void *) JL_NOTSAFEPOINT; extern void JL_GC_PUSH4(void *, void *, void *, void *) JL_NOTSAFEPOINT; extern void JL_GC_PUSH5(void *, void *, void *, void *, void *) JL_NOTSAFEPOINT; extern void JL_GC_PUSH7(void *, void *, void *, void *, void *, void *, void *) JL_NOTSAFEPOINT; +extern void JL_GC_PUSH8(void *, void *, void *, void *, void *, void *, void *, void *) JL_NOTSAFEPOINT; extern void _JL_GC_PUSHARGS(jl_value_t **, size_t) JL_NOTSAFEPOINT; // This is necessary, because otherwise the analyzer considers this undefined // behavior and terminates the exploration @@ -881,6 +882,9 @@ extern void JL_GC_POP() JL_NOTSAFEPOINT; #define JL_GC_PUSH7(arg1, arg2, arg3, arg4, arg5, arg6, arg7) \ void *__gc_stkf[] = {(void*)JL_GC_ENCODE_PUSH(7), jl_pgcstack, arg1, arg2, arg3, arg4, arg5, arg6, arg7}; \ jl_pgcstack = (jl_gcframe_t*)__gc_stkf; +#define JL_GC_PUSH8(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8) \ + void *__gc_stkf[] = {(void*)JL_GC_ENCODE_PUSH(8), jl_pgcstack, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8}; \ + jl_pgcstack = (jl_gcframe_t*)__gc_stkf; #define JL_GC_PUSHARGS(rts_var,n) \ @@ -1762,15 +1766,14 @@ JL_DLLEXPORT jl_gcframe_t **jl_adopt_thread(void); JL_DLLEXPORT int jl_deserialize_verify_header(ios_t *s); JL_DLLEXPORT void jl_preload_sysimg_so(const char *fname); JL_DLLEXPORT void jl_set_sysimg_so(void *handle); -JL_DLLEXPORT ios_t *jl_create_system_image(void *); -JL_DLLEXPORT void jl_save_system_image(const char *fname); +JL_DLLEXPORT ios_t *jl_create_system_image(void *, jl_array_t *worklist); JL_DLLEXPORT void jl_restore_system_image(const char *fname); JL_DLLEXPORT void jl_restore_system_image_data(const char *buf, size_t len); +JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *depmods, int complete); + JL_DLLEXPORT void jl_set_newly_inferred(jl_value_t *newly_inferred); -JL_DLLEXPORT void jl_push_newly_inferred(jl_value_t *linfo); -JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist); -JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *depmods); -JL_DLLEXPORT jl_value_t *jl_restore_incremental_from_buf(const char *buf, size_t sz, jl_array_t *depmods); +JL_DLLEXPORT void jl_push_newly_inferred(jl_value_t *ci); +JL_DLLEXPORT void jl_write_compiler_output(void); // parsing JL_DLLEXPORT jl_value_t *jl_parse_all(const char *text, size_t text_len, diff --git a/src/julia_internal.h b/src/julia_internal.h index b8a30801a66fe..9410cdc300cad 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -313,6 +313,9 @@ extern tracer_cb jl_newmeth_tracer; void jl_call_tracer(tracer_cb callback, jl_value_t *tracee); void print_func_loc(JL_STREAM *s, jl_method_t *m); extern jl_array_t *_jl_debug_method_invalidation JL_GLOBALLY_ROOTED; +extern arraylist_t jl_linkage_blobs; // external linkage: sysimg/pkgimages +extern jl_array_t *jl_build_ids JL_GLOBALLY_ROOTED; // external linkage: corresponding build_ids +extern arraylist_t jl_image_relocs; // external linkage: sysimg/pkgimages extern JL_DLLEXPORT size_t jl_page_size; extern jl_function_t *jl_typeinf_func JL_GLOBALLY_ROOTED; @@ -483,9 +486,12 @@ JL_DLLEXPORT jl_value_t *jl_gc_alloc(jl_ptls_t ptls, size_t sz, void *ty); # define jl_gc_alloc(ptls, sz, ty) jl_gc_alloc_(ptls, sz, ty) #endif -// jl_buff_tag must be a multiple of GC_PAGE_SZ so that it can't be -// confused for an actual type reference. -#define jl_buff_tag ((uintptr_t)0x4eadc000) +// jl_buff_tag must be an actual pointer here, so it cannot be confused for an actual type reference. +// defined as uint64_t[3] so that we can get the right alignment of this and a "type tag" on it +const extern uint64_t _jl_buff_tag[3]; +#define jl_buff_tag ((uintptr_t)LLT_ALIGN((uintptr_t)&_jl_buff_tag[1],16)) +JL_DLLEXPORT uintptr_t jl_get_buff_tag(void); + typedef void jl_gc_tracked_buffer_t; // For the benefit of the static analyzer STATIC_INLINE jl_gc_tracked_buffer_t *jl_gc_alloc_buf(jl_ptls_t ptls, size_t sz) { @@ -634,9 +640,9 @@ void push_edge(jl_array_t *list, jl_value_t *invokesig, jl_method_instance_t *ca JL_DLLEXPORT void jl_add_method_root(jl_method_t *m, jl_module_t *mod, jl_value_t* root); void jl_append_method_roots(jl_method_t *m, uint64_t modid, jl_array_t* roots); -int get_root_reference(rle_reference *rr, jl_method_t *m, size_t i); -jl_value_t *lookup_root(jl_method_t *m, uint64_t key, int index); -int nroots_with_key(jl_method_t *m, uint64_t key); +int get_root_reference(rle_reference *rr, jl_method_t *m, size_t i) JL_NOTSAFEPOINT; +jl_value_t *lookup_root(jl_method_t *m, uint64_t key, int index) JL_NOTSAFEPOINT; +int nroots_with_key(jl_method_t *m, uint64_t key) JL_NOTSAFEPOINT; int jl_valid_type_param(jl_value_t *v); @@ -716,6 +722,7 @@ jl_expr_t *jl_exprn(jl_sym_t *head, size_t n); jl_function_t *jl_new_generic_function(jl_sym_t *name, jl_module_t *module); jl_function_t *jl_new_generic_function_with_supertype(jl_sym_t *name, jl_module_t *module, jl_datatype_t *st); int jl_foreach_reachable_mtable(int (*visit)(jl_methtable_t *mt, void *env), void *env); +int foreach_mtable_in_module(jl_module_t *m, int (*visit)(jl_methtable_t *mt, void *env), void *env); void jl_init_main_module(void); JL_DLLEXPORT int jl_is_submodule(jl_module_t *child, jl_module_t *parent) JL_NOTSAFEPOINT; jl_array_t *jl_get_loaded_modules(void); @@ -926,7 +933,7 @@ typedef DWORD jl_pgcstack_key_t; #else typedef jl_gcframe_t ***(*jl_pgcstack_key_t)(void) JL_NOTSAFEPOINT; #endif -JL_DLLEXPORT void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k); +JL_DLLEXPORT void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k) JL_NOTSAFEPOINT; #if !defined(_OS_WINDOWS_) && !defined(__APPLE__) && !defined(JL_DISABLE_LIBUNWIND) extern pthread_mutex_t in_signal_lock; @@ -944,7 +951,38 @@ static inline void jl_set_gc_and_wait(void) jl_atomic_store_release(&ct->ptls->gc_state, state); } #endif -void jl_gc_set_permalloc_region(void *start, void *end); + +// Query if a Julia object is if a permalloc region (due to part of a sys- pkg-image) +STATIC_INLINE size_t n_linkage_blobs(void) JL_NOTSAFEPOINT +{ + if (!jl_build_ids) + return 0; + assert(jl_is_array(jl_build_ids)); + return jl_array_len(jl_build_ids); +} + +// TODO: Makes this a binary search +STATIC_INLINE size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT { + size_t i, nblobs = n_linkage_blobs(); + assert(jl_linkage_blobs.len == 2*nblobs); + for (i = 0; i < nblobs; i++) { + uintptr_t left = (uintptr_t)jl_linkage_blobs.items[2*i]; + uintptr_t right = (uintptr_t)jl_linkage_blobs.items[2*i + 1]; + if (left < (uintptr_t)v && (uintptr_t)v <= right) { + // the last object may be a singleton (v is shifted by a type tag, so we use exclusive bounds here) + break; + } + } + return i; +} + +STATIC_INLINE uint8_t jl_object_in_image(jl_value_t* v) JL_NOTSAFEPOINT { + size_t blob = external_blob_index(v); + if (blob == n_linkage_blobs()) { + return 0; + } + return 1; +} typedef struct { LLVMOrcThreadSafeModuleRef TSM; @@ -958,11 +996,11 @@ JL_DLLEXPORT jl_value_t *jl_dump_fptr_asm(uint64_t fptr, char raw_mc, const char JL_DLLEXPORT jl_value_t *jl_dump_function_ir(jl_llvmf_dump_t *dump, char strip_ir_metadata, char dump_module, const char *debuginfo); JL_DLLEXPORT jl_value_t *jl_dump_function_asm(jl_llvmf_dump_t *dump, char raw_mc, const char* asm_variant, const char *debuginfo, char binary); -void *jl_create_native(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int policy); +void *jl_create_native(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int policy, int imaging_mode); void jl_dump_native(void *native_code, const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname, const char *asm_fname, const char *sysimg_data, size_t sysimg_len); -int32_t jl_get_llvm_gv(void *native_code, jl_value_t *p) JL_NOTSAFEPOINT; +void jl_get_llvm_gvs(void *native_code, arraylist_t *gvs); JL_DLLEXPORT void jl_get_function_id(void *native_code, jl_code_instance_t *ncode, int32_t *func_idx, int32_t *specfunc_idx); @@ -1249,6 +1287,7 @@ extern void *jl_ntdll_handle; extern void *jl_kernel32_handle; extern void *jl_crtdll_handle; extern void *jl_winsock_handle; +void win32_formatmessage(DWORD code, char *reason, int len) JL_NOTSAFEPOINT; #endif JL_DLLEXPORT void *jl_get_library_(const char *f_lib, int throw_err); @@ -1589,7 +1628,6 @@ void jl_register_fptrs(uint64_t sysimage_base, const struct _jl_sysimg_fptrs_t * jl_method_instance_t **linfos, size_t n); void jl_write_coverage_data(const char*); void jl_write_malloc_log(void); -void jl_write_compiler_output(void); #if jl_has_builtin(__builtin_unreachable) || defined(_COMPILER_GCC_) || defined(_COMPILER_INTEL_) # define jl_unreachable() __builtin_unreachable() @@ -1642,6 +1680,8 @@ JL_DLLEXPORT uint16_t julia__truncdfhf2(double param) JL_NOTSAFEPOINT; //JL_DLLEXPORT uint16_t julia__floatunsihf(uint32_t n) JL_NOTSAFEPOINT; //JL_DLLEXPORT uint16_t julia__floatundihf(uint64_t n) JL_NOTSAFEPOINT; +JL_DLLEXPORT uint32_t jl_crc32c(uint32_t crc, const char *buf, size_t len); + #ifdef __cplusplus } #endif diff --git a/src/llvm-multiversioning.cpp b/src/llvm-multiversioning.cpp index 7e0bed6276a2d..8cabfeb334096 100644 --- a/src/llvm-multiversioning.cpp +++ b/src/llvm-multiversioning.cpp @@ -304,23 +304,31 @@ static inline std::vector consume_gv(Module &M, const char *name, bool allow // Strip them from the Module so that it's easier to handle the uses. GlobalVariable *gv = M.getGlobalVariable(name); assert(gv && gv->hasInitializer()); - auto *ary = cast(gv->getInitializer()); - unsigned nele = ary->getNumOperands(); + ArrayType *Ty = cast(gv->getInitializer()->getType()); + unsigned nele = Ty->getArrayNumElements(); std::vector res(nele); - unsigned i = 0; - while (i < nele) { - llvm::Value *val = ary->getOperand(i)->stripPointerCasts(); - if (allow_bad_fvars && (!isa(val) || (isa(val) && cast(val)->isDeclaration()))) { - // Shouldn't happen in regular use, but can happen in bugpoint. - nele--; - continue; + ConstantArray *ary = nullptr; + if (gv->getInitializer()->isNullValue()) { + for (unsigned i = 0; i < nele; ++i) + res[i] = cast(Constant::getNullValue(Ty->getArrayElementType())); + } + else { + ary = cast(gv->getInitializer()); + unsigned i = 0; + while (i < nele) { + llvm::Value *val = ary->getOperand(i)->stripPointerCasts(); + if (allow_bad_fvars && (!isa(val) || (isa(val) && cast(val)->isDeclaration()))) { + // Shouldn't happen in regular use, but can happen in bugpoint. + nele--; + continue; + } + res[i++] = cast(val); } - res[i++] = cast(val); + res.resize(nele); } - res.resize(nele); assert(gv->use_empty()); gv->eraseFromParent(); - if (ary->use_empty()) + if (ary && ary->use_empty()) ary->destroyConstant(); return res; } @@ -925,17 +933,24 @@ Constant *CloneCtx::emit_offset_table(const std::vector &vars, StringRef nam { auto T_int32 = Type::getInt32Ty(M.getContext()); auto T_size = getSizeTy(M.getContext()); - assert(!vars.empty()); - add_comdat(GlobalAlias::create(T_size, 0, GlobalVariable::ExternalLinkage, - name + "_base", - ConstantExpr::getBitCast(vars[0], T_size->getPointerTo()), &M)); - auto vbase = ConstantExpr::getPtrToInt(vars[0], T_size); uint32_t nvars = vars.size(); + Constant *base = nullptr; + if (nvars > 0) { + base = ConstantExpr::getBitCast(vars[0], T_size->getPointerTo()); + add_comdat(GlobalAlias::create(T_size, 0, GlobalVariable::ExternalLinkage, + name + "_base", + base, &M)); + } else { + base = ConstantExpr::getNullValue(T_size->getPointerTo()); + } + auto vbase = ConstantExpr::getPtrToInt(base, T_size); std::vector offsets(nvars + 1); offsets[0] = ConstantInt::get(T_int32, nvars); - offsets[1] = ConstantInt::get(T_int32, 0); - for (uint32_t i = 1; i < nvars; i++) - offsets[i + 1] = get_ptrdiff32(vars[i], vbase); + if (nvars > 0) { + offsets[1] = ConstantInt::get(T_int32, 0); + for (uint32_t i = 1; i < nvars; i++) + offsets[i + 1] = get_ptrdiff32(vars[i], vbase); + } ArrayType *vars_type = ArrayType::get(T_int32, nvars + 1); add_comdat(new GlobalVariable(M, vars_type, true, GlobalVariable::ExternalLinkage, diff --git a/src/method.c b/src/method.c index 6f240efaa0b7a..852c5bf25659a 100644 --- a/src/method.c +++ b/src/method.c @@ -1188,7 +1188,7 @@ JL_DLLEXPORT void jl_add_method_root(jl_method_t *m, jl_module_t *mod, jl_value_ uint64_t modid = 0; if (mod) { assert(jl_is_module(mod)); - modid = mod->build_id; + modid = mod->build_id.lo; } assert(jl_is_method(m)); prepare_method_for_roots(m, modid); diff --git a/src/module.c b/src/module.c index 0dc5e20d18b89..605bcd3c2b773 100644 --- a/src/module.c +++ b/src/module.c @@ -23,9 +23,10 @@ JL_DLLEXPORT jl_module_t *jl_new_module_(jl_sym_t *name, uint8_t default_names) m->istopmod = 0; m->uuid = uuid_zero; static unsigned int mcounter; // simple counter backup, in case hrtime is not incrementing - m->build_id = jl_hrtime() + (++mcounter); - if (!m->build_id) - m->build_id++; // build id 0 is invalid + m->build_id.lo = jl_hrtime() + (++mcounter); + if (!m->build_id.lo) + m->build_id.lo++; // build id 0 is invalid + m->build_id.hi = ~(uint64_t)0; m->primary_world = 0; m->counter = 1; m->nospecialize = 0; @@ -936,7 +937,7 @@ JL_DLLEXPORT jl_value_t *jl_module_names(jl_module_t *m, int all, int imported) JL_DLLEXPORT jl_sym_t *jl_module_name(jl_module_t *m) { return m->name; } JL_DLLEXPORT jl_module_t *jl_module_parent(jl_module_t *m) { return m->parent; } -JL_DLLEXPORT uint64_t jl_module_build_id(jl_module_t *m) { return m->build_id; } +JL_DLLEXPORT jl_uuid_t jl_module_build_id(jl_module_t *m) { return m->build_id; } JL_DLLEXPORT jl_uuid_t jl_module_uuid(jl_module_t* m) { return m->uuid; } // TODO: make this part of the module constructor and read-only? @@ -972,6 +973,22 @@ JL_DLLEXPORT void jl_clear_implicit_imports(jl_module_t *m) JL_UNLOCK(&m->lock); } +JL_DLLEXPORT void jl_init_restored_modules(jl_array_t *init_order) +{ + int i, l = jl_array_len(init_order); + for (i = 0; i < l; i++) { + jl_value_t *mod = jl_array_ptr_ref(init_order, i); + if (!jl_generating_output() || jl_options.incremental) { + jl_module_run_initializer((jl_module_t*)mod); + } + else { + if (jl_module_init_order == NULL) + jl_module_init_order = jl_alloc_vec_any(0); + jl_array_ptr_1d_push(jl_module_init_order, mod); + } + } +} + #ifdef __cplusplus } #endif diff --git a/src/precompile.c b/src/precompile.c index d5d8416c1097b..9c9c79b154a32 100644 --- a/src/precompile.c +++ b/src/precompile.c @@ -21,17 +21,14 @@ JL_DLLEXPORT int jl_generating_output(void) } static void *jl_precompile(int all); +static void *jl_precompile_worklist(jl_array_t *worklist); -void jl_write_compiler_output(void) +JL_DLLEXPORT void jl_write_compiler_output(void) { if (!jl_generating_output()) { return; } - void *native_code = NULL; - if (!jl_options.incremental) - native_code = jl_precompile(jl_options.compile_enabled == JL_OPTIONS_COMPILE_ALL); - if (!jl_module_init_order) { jl_printf(JL_STDERR, "WARNING: --output requested, but no modules defined during run\n"); return; @@ -60,46 +57,51 @@ void jl_write_compiler_output(void) } } + assert(jl_precompile_toplevel_module == NULL); + void *native_code = NULL; + if (jl_options.outputo || jl_options.outputbc || jl_options.outputunoptbc || jl_options.outputasm) { + if (jl_options.incremental) + jl_precompile_toplevel_module = (jl_module_t*)jl_array_ptr_ref(worklist, jl_array_len(worklist)-1); + native_code = jl_options.incremental ? jl_precompile_worklist(worklist) : jl_precompile(jl_options.compile_enabled == JL_OPTIONS_COMPILE_ALL); + if (jl_options.incremental) + jl_precompile_toplevel_module = NULL; + } + if (jl_options.incremental) { - if (jl_options.outputji) - if (jl_save_incremental(jl_options.outputji, worklist)) - jl_exit(1); if (jl_options.outputbc || jl_options.outputunoptbc) jl_printf(JL_STDERR, "WARNING: incremental output to a .bc file is not implemented\n"); - if (jl_options.outputo) - jl_printf(JL_STDERR, "WARNING: incremental output to a .o file is not implemented\n"); if (jl_options.outputasm) jl_printf(JL_STDERR, "WARNING: incremental output to a .s file is not implemented\n"); + if (jl_options.outputo) { + jl_printf(JL_STDERR, "WARNING: incremental output to a .o file is not implemented\n"); + } } - else { - ios_t *s = NULL; - if (jl_options.outputo || jl_options.outputbc || jl_options.outputunoptbc || jl_options.outputasm) - s = jl_create_system_image(native_code); - if (jl_options.outputji) { - if (s == NULL) { - jl_save_system_image(jl_options.outputji); - } - else { - ios_t f; - if (ios_file(&f, jl_options.outputji, 1, 1, 1, 1) == NULL) - jl_errorf("cannot open system image file \"%s\" for writing", jl_options.outputji); - ios_write(&f, (const char*)s->buf, (size_t)s->size); - ios_close(&f); - } - } + ios_t *s = jl_create_system_image(native_code, jl_options.incremental ? worklist : NULL); - if (jl_options.outputo || jl_options.outputbc || jl_options.outputunoptbc || jl_options.outputasm) { - assert(s); - jl_dump_native(native_code, - jl_options.outputbc, - jl_options.outputunoptbc, - jl_options.outputo, - jl_options.outputasm, - (const char*)s->buf, (size_t)s->size); - jl_postoutput_hook(); - } + if (jl_options.outputji) { + ios_t f; + if (ios_file(&f, jl_options.outputji, 1, 1, 1, 1) == NULL) + jl_errorf("cannot open system image file \"%s\" for writing", jl_options.outputji); + ios_write(&f, (const char*)s->buf, (size_t)s->size); + ios_close(&f); } + + if (native_code) { + jl_dump_native(native_code, + jl_options.outputbc, + jl_options.outputunoptbc, + jl_options.outputo, + jl_options.outputasm, + (const char*)s->buf, (size_t)s->size); + jl_postoutput_hook(); + } + + if (s) { + ios_close(s); + free(s); + } + for (size_t i = 0; i < jl_current_modules.size; i += 2) { if (jl_current_modules.table[i + 1] != HT_NOTFOUND) { jl_printf(JL_STDERR, "\nWARNING: detected unclosed module: "); @@ -340,16 +342,11 @@ static int precompile_enq_all_specializations_(jl_methtable_t *mt, void *env) return jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), precompile_enq_all_specializations__, env); } -static void *jl_precompile(int all) +static void *jl_precompile_(jl_array_t *m) { - // array of MethodInstances and ccallable aliases to include in the output - jl_array_t *m = jl_alloc_vec_any(0); jl_array_t *m2 = NULL; jl_method_instance_t *mi = NULL; - JL_GC_PUSH3(&m, &m2, &mi); - if (all) - jl_compile_all_defs(m); - jl_foreach_reachable_mtable(precompile_enq_all_specializations_, m); + JL_GC_PUSH2(&m2, &mi); m2 = jl_alloc_vec_any(0); for (size_t i = 0; i < jl_array_len(m); i++) { jl_value_t *item = jl_array_ptr_ref(m, i); @@ -368,8 +365,39 @@ static void *jl_precompile(int all) jl_array_ptr_1d_push(m2, item); } } - m = NULL; - void *native_code = jl_create_native(m2, NULL, NULL, 0); + void *native_code = jl_create_native(m2, NULL, NULL, 0, 1); + JL_GC_POP(); + return native_code; +} + +static void *jl_precompile(int all) +{ + // array of MethodInstances and ccallable aliases to include in the output + jl_array_t *m = jl_alloc_vec_any(0); + JL_GC_PUSH1(&m); + if (all) + jl_compile_all_defs(m); + jl_foreach_reachable_mtable(precompile_enq_all_specializations_, m); + void *native_code = jl_precompile_(m); + JL_GC_POP(); + return native_code; +} + +static void *jl_precompile_worklist(jl_array_t *worklist) +{ + if (!worklist) + return NULL; + // this "found" array will contain function + // type signatures that were inferred but haven't been compiled + jl_array_t *m = jl_alloc_vec_any(0); + JL_GC_PUSH1(&m); + size_t i, nw = jl_array_len(worklist); + for (i = 0; i < nw; i++) { + jl_module_t *mod = (jl_module_t*)jl_array_ptr_ref(worklist, i); + assert(jl_is_module(mod)); + foreach_mtable_in_module(mod, precompile_enq_all_specializations_, m); + } + void *native_code = jl_precompile_(m); JL_GC_POP(); return native_code; } diff --git a/src/processor.cpp b/src/processor.cpp index b9dfc2b7f0b4e..df114b4d80257 100644 --- a/src/processor.cpp +++ b/src/processor.cpp @@ -627,10 +627,14 @@ static inline jl_sysimg_fptrs_t parse_sysimg(void *hdl, F &&callback) // .data base char *data_base; - jl_dlsym(hdl, "jl_sysimg_gvars_base", (void**)&data_base, 1); + if (!jl_dlsym(hdl, "jl_sysimg_gvars_base", (void**)&data_base, 0)) { + data_base = NULL; + } // .text base char *text_base; - jl_dlsym(hdl, "jl_sysimg_fvars_base", (void**)&text_base, 1); + if (!jl_dlsym(hdl, "jl_sysimg_fvars_base", (void**)&text_base, 0)) { + text_base = NULL; + } res.base = text_base; int32_t *offsets; @@ -713,6 +717,7 @@ static inline jl_sysimg_fptrs_t parse_sysimg(void *hdl, F &&callback) if (reloc_idx == idx) { found = true; auto slot = (const void**)(data_base + reloc_slots[reloc_i * 2 + 1]); + assert(slot); *slot = offset + res.base; } else if (reloc_idx > idx) { diff --git a/src/processor.h b/src/processor.h index f3b571cf9b937..43c009ba72648 100644 --- a/src/processor.h +++ b/src/processor.h @@ -164,6 +164,7 @@ typedef struct _jl_sysimg_fptrs_t { * Return the data about the function pointers selected. */ jl_sysimg_fptrs_t jl_init_processor_sysimg(void *hdl); +jl_sysimg_fptrs_t jl_init_processor_pkgimg(void *hdl); // Return the name of the host CPU as a julia string. JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void); diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp index ea8dddf629d62..a46db93488770 100644 --- a/src/processor_arm.cpp +++ b/src/processor_arm.cpp @@ -1586,6 +1586,20 @@ static uint32_t sysimg_init_cb(const void *id) return match.best_idx; } +static uint32_t pkgimg_init_cb(const void *id) +{ + TargetData target = jit_targets.front(); + auto pkgimg = deserialize_target_data((const uint8_t*)id); + for (auto &t: pkgimg) { + if (auto nname = normalize_cpu_name(t.name)) { + t.name = nname; + } + } + auto match = match_sysimg_targets(pkgimg, target, max_vector_size); + + return match.best_idx; +} + static void ensure_jit_target(bool imaging) { auto &cmdline = get_cmdline_targets(); @@ -1788,6 +1802,15 @@ jl_sysimg_fptrs_t jl_init_processor_sysimg(void *hdl) return parse_sysimg(hdl, sysimg_init_cb); } +jl_sysimg_fptrs_t jl_init_processor_pkgimg(void *hdl) +{ + if (jit_targets.empty()) + jl_error("JIT targets not initialized"); + if (jit_targets.size() > 1) + jl_error("Expected only one JIT target"); + return parse_sysimg(hdl, pkgimg_init_cb); +} + std::pair> jl_get_llvm_target(bool imaging, uint32_t &flags) { ensure_jit_target(imaging); diff --git a/src/processor_fallback.cpp b/src/processor_fallback.cpp index 1f314eb460f0f..3160bd0ba6750 100644 --- a/src/processor_fallback.cpp +++ b/src/processor_fallback.cpp @@ -51,6 +51,22 @@ static uint32_t sysimg_init_cb(const void *id) return best_idx; } +static uint32_t pkgimg_init_cb(const void *id) +{ + TargetData<1> target = jit_targets.front(); + // Find the last name match or use the default one. + uint32_t best_idx = 0; + auto pkgimg = deserialize_target_data<1>((const uint8_t*)id); + for (uint32_t i = 0; i < pkgimg.size(); i++) { + auto &imgt = pkgimg[i]; + if (imgt.name == target.name) { + best_idx = i; + } + } + + return best_idx; +} + static void ensure_jit_target(bool imaging) { auto &cmdline = get_cmdline_targets(); @@ -103,6 +119,15 @@ jl_sysimg_fptrs_t jl_init_processor_sysimg(void *hdl) return parse_sysimg(hdl, sysimg_init_cb); } +jl_sysimg_fptrs_t jl_init_processor_pkgimg(void *hdl) +{ + if (jit_targets.empty()) + jl_error("JIT targets not initialized"); + if (jit_targets.size() > 1) + jl_error("Expected only one JIT target"); + return parse_sysimg(hdl, pkgimg_init_cb); +} + std::pair> jl_get_llvm_target(bool imaging, uint32_t &flags) { ensure_jit_target(imaging); diff --git a/src/processor_x86.cpp b/src/processor_x86.cpp index 77ee5afaf5e85..b73838a55777e 100644 --- a/src/processor_x86.cpp +++ b/src/processor_x86.cpp @@ -878,6 +878,19 @@ static uint32_t sysimg_init_cb(const void *id) return match.best_idx; } +static uint32_t pkgimg_init_cb(const void *id) +{ + TargetData target = jit_targets.front(); + auto pkgimg = deserialize_target_data((const uint8_t*)id); + for (auto &t: pkgimg) { + if (auto nname = normalize_cpu_name(t.name)) { + t.name = nname; + } + } + auto match = match_sysimg_targets(pkgimg, target, max_vector_size); + return match.best_idx; +} + static void ensure_jit_target(bool imaging) { auto &cmdline = get_cmdline_targets(); @@ -1018,6 +1031,15 @@ jl_sysimg_fptrs_t jl_init_processor_sysimg(void *hdl) return parse_sysimg(hdl, sysimg_init_cb); } +jl_sysimg_fptrs_t jl_init_processor_pkgimg(void *hdl) +{ + if (jit_targets.empty()) + jl_error("JIT targets not initialized"); + if (jit_targets.size() > 1) + jl_error("Expected only one JIT target"); + return parse_sysimg(hdl, pkgimg_init_cb); +} + extern "C" JL_DLLEXPORT std::pair> jl_get_llvm_target(bool imaging, uint32_t &flags) { ensure_jit_target(imaging); diff --git a/src/rtutils.c b/src/rtutils.c index 497b348f871d5..f34303b9aeea5 100644 --- a/src/rtutils.c +++ b/src/rtutils.c @@ -708,6 +708,12 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt n += jl_static_show_x(out, (jl_value_t*)vt, depth); n += jl_printf(out, ">"); } + else if (vt == (jl_datatype_t*)jl_buff_tag) { + n += jl_printf(out, "", (void*)v); + } + else if (vt == (jl_datatype_t*)(uintptr_t)(0xbabababababababaull & ~15)) { + n += jl_printf(out, "", (void*)v); + } // These need to be special cased because they // exist only by pointer identity in early startup else if (v == (jl_value_t*)jl_simplevector_type) { diff --git a/src/staticdata.c b/src/staticdata.c index 5e005ff462e3b..4457c51fa03f1 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -3,33 +3,24 @@ /* saving and restoring system images - This performs serialization and deserialization of in-memory data. The dump.c file is similar, but has less complete coverage: - dump.c has no knowledge of native code (and simply discards it), whereas this supports native code caching in .o files. - Duplication is avoided by elevating the .o-serialized versions of global variables and native-compiled functions to become - the authoritative source for such entities in the system image, with references to these objects appropriately inserted into - the (de)serialized version of Julia's internal data. This makes deserialization simple and fast: we only need to deal with - pointer relocation, registering with the garbage collector, and making note of special internal types. During serialization, - we also need to pay special attention to things like builtin functions, C-implemented types (those in jltypes.c), the metadata - for documentation, optimal layouts, integration with native system image generation, and preparing other preprocessing - directives. - - dump.c has capabilities missing from this serializer, most notably the ability to handle external references. This is not needed - for system images as they are self-contained. However, it would be needed to support incremental compilation of packages. + This performs serialization and deserialization of system and package images. It creates and saves a compact binary + blob, making deserialization "simple" and fast: we "only" need to deal with uniquing, pointer relocation, + method root insertion, registering with the garbage collector, making note of special internal types, and + backedges/invalidation. Special objects include things like builtin functions, C-implemented types (those in jltypes.c), + the metadata for documentation, optimal layouts, integration with native system image generation, and preparing other + preprocessing directives. During serialization, the flow has several steps: - - step 1 inserts relevant items into `backref_table`, an `obj` => `id::Int` mapping. `id` is assigned by - order of insertion. This is effectively a recursive traversal, singling out items like pointers and symbols - that need restoration when the system image is loaded. This stage is implemented by `jl_serialize_value` - and its callees; while it would be simplest to use recursion, this risks stack overflow, so recursion is mimicked + - step 1 inserts relevant items into `serialization_order`, an `obj` => `id::Int` mapping. `id` is assigned by + order of insertion. This stage is implemented by `jl_queue_for_serialization` and its callees; + while it would be simplest to use recursion, this risks stack overflow, so recursion is mimicked using a work-queue managed by `jl_serialize_reachable`. - It's worth emphasizing that despite the name `jl_serialize_value`, the only goal of this stage is to - insert objects into `backref_table`. The entire system gets inserted, either directly or indirectly via - fields of other objects. Objects requiring pointer relocation or gc registration must be inserted directly. - In later stages, such objects get referenced by their `id`. + It's worth emphasizing that the only goal of this stage is to insert objects into `serialization_order`. + In later stages, such objects get written in order of `id`. - - step 2 (the biggest of four steps) takes all items in `backref_table` and actually serializes them ordered + - step 2 (the biggest of four steps) takes all items in `serialization_order` and actually serializes them ordered by `id`. The system is serialized into several distinct streams (see `jl_serializer_state`), a "main stream" (the `s` field) as well as parallel streams for writing specific categories of additional internal data (e.g., global data invisible to codegen, as well as deserialization "touch-up" tables, see below). These different streams @@ -46,14 +37,36 @@ one of the corresponding categorical list, then `index = t << RELOC_TAG_OFFSET + i`. The simplest source for the details of this encoding can be found in the pair of functions `get_reloc_for_item` and `get_item_for_reloc`. + `uniquing` also holds the serialized location of external DataTypes, MethodInstances, and singletons + in the serialized blob (i.e., new-at-the-time-of-serialization specializations). + Most of step 2 is handled by `jl_write_values`, followed by special handling of the dedicated parallel streams. - step 3 combines the different sections (fields of `jl_serializer_state`) into one - - step 4 writes the values of the hard-coded tagged items and `reinit_list`/`ccallable_list` - -The tables written to the serializer stream make deserialization fairly straightforward. Much of the "real work" is -done by `get_item_for_reloc`. + - step 4 writes the values of the hard-coded tagged items and `ccallable_list` + +Much of the "real work" during deserialization is done by `get_item_for_reloc`. But a few items require specific +attention: +- uniquing: during deserialization, the target item (an "external" type or MethodInstance) must be checked against + the running system to see whether such an object already exists (i.e., whether some other previously-loaded package + or workload has created such types/MethodInstances previously) or whether it needs to be created de-novo. + In either case, all references at `location` must be updated to the one in the running system. + `new_dt_objs` is a hash set of newly allocated datatype-reachable objects +- method root insertion: when new specializations generate new roots, these roots must be inserted into + method root tables +- backedges & invalidation: external edges have to be checked against the running system and any invalidations executed. + +Encoding of a pointer: +- in the location of the pointer, we initially write zero padding +- for both relocs_list and gctags_list, we write loc/backrefid (for gctags_list this is handled by the caller of write_gctaggedfield, + for relocs_list it's handled by write_pointerfield) +- when writing to disk, both call get_reloc_for_item, and its return value (subject to modification by gc bits) + ends up being written into the data stream (s->s), and the data stream's position written to s->relocs + +External links: +- location holds the offset +- loc/0 in relocs_list */ #include @@ -74,6 +87,8 @@ done by `get_item_for_reloc`. #include "valgrind.h" #include "julia_assert.h" +#include "staticdata_utils.c" + #ifdef __cplusplus extern "C" { #endif @@ -271,23 +286,27 @@ static uintptr_t nsym_tag; // array of definitions for the predefined tagged object types // (reverse of symbol_table) static arraylist_t deser_sym; - -// table of all objects that are serialized -static htable_t backref_table; -static int backref_table_numel; -static arraylist_t layout_table; // cache of `position(s)` for each `id` in `backref_table` +// Predefined tags that do not have special handling in `externally_linked` +static htable_t external_objects; + +static htable_t serialization_order; // to break cycles, mark all objects that are serialized +static htable_t unique_ready; // as we serialize types, we need to know if all reachable objects are also already serialized. This tracks whether `immediate` has been set for all of them. +static htable_t nullptrs; +static htable_t bindings; // because they are not first-class objects +// FIFO queue for objects to be serialized. Anything requiring fixup upon deserialization +// must be "toplevel" in this queue. For types, parameters and field types must appear +// before the "wrapper" type so they can be properly recached against the running system. +static arraylist_t serialization_queue; +static arraylist_t layout_table; // cache of `position(s)` for each `id` in `serialization_order` static arraylist_t object_worklist; // used to mimic recursion by jl_serialize_reachable -// Both `reinit_list` and `ccallable_list` are lists of (size_t pos, code) entries -// for the serializer to mark values in need of rework during deserialization -// codes: -// 1: typename (reinit_list) -// 2: module (reinit_list) -// 3: method (ccallable_list) -static arraylist_t reinit_list; - -// @ccallable entry points to install -static arraylist_t ccallable_list; +// Permanent list of void* (begin, end+1) pairs of system/package images we've loaded previously +// togther with their module build_ids (used for external linkage) +// jl_linkage_blobs.items[2i:2i+1] correspond to jl_build_ids[i] (0-offset indexing) +// TODO: Keep this sorted so that we can use binary-search +arraylist_t jl_linkage_blobs; +arraylist_t jl_image_relocs; +jl_array_t *jl_build_ids JL_GLOBALLY_ROOTED = NULL; // hash of definitions for predefined function pointers static htable_t fptr_to_id; @@ -296,7 +315,12 @@ void *native_functions; // opaque jl_native_code_desc_t blob used for fetching // table of struct field addresses to rewrite during saving static htable_t field_replace; -static htable_t layout_cache; +typedef struct { + uint64_t base; + uintptr_t *gvars_base; + int32_t *gvars_offsets; + jl_sysimg_fptrs_t fptrs; +} jl_image_t; // array of definitions for the predefined function pointers // (reverse of fptr_to_id) @@ -325,26 +349,42 @@ typedef struct { ios_t *fptr_record; // serialized array mapping fptrid => spos arraylist_t relocs_list; // a list of (location, target) pairs, see description at top arraylist_t gctags_list; // " + arraylist_t uniquing_types; // a list of locations that reference types that must be de-duplicated + arraylist_t uniquing_objs; // a list of locations that reference non-types that must be de-duplicated + arraylist_t fixup_types; // a list of locations of types requiring (re)caching + arraylist_t fixup_objs; // a list of locations of objects requiring (re)caching + arraylist_t ccallable_list; // @ccallable entry points to install + // record of build_ids for all external linkages, in order of serialization for the current sysimg/pkgimg + // conceptually, the base pointer for the jth externally-linked item is determined from + // i = findfirst(==(link_ids[j]), jl_build_ids) + // blob_base = jl_linkage_blobs.items[2i] # 0-offset indexing + // We need separate lists since they are intermingled at creation but split when written. + jl_array_t *link_ids_relocs; + jl_array_t *link_ids_gctags; + jl_array_t *link_ids_gvars; jl_ptls_t ptls; + htable_t callers_with_edges; + jl_image_t *image; + int8_t incremental; } jl_serializer_state; static jl_value_t *jl_idtable_type = NULL; static jl_typename_t *jl_idtable_typename = NULL; static jl_value_t *jl_bigint_type = NULL; static int gmp_limb_size = 0; - static jl_sym_t *jl_docmeta_sym = NULL; // Tags of category `t` are located at offsets `t << RELOC_TAG_OFFSET` // Consequently there is room for 2^RELOC_TAG_OFFSET pointers, etc enum RefTags { - DataRef, // mutable data - ConstDataRef, // constant data (e.g., layouts) - TagRef, // items serialized via their tags - SymbolRef, // symbols - BindingRef, // module bindings - FunctionRef, // generic functions - BuiltinFunctionRef // builtin functions + DataRef, // mutable data + ConstDataRef, // constant data (e.g., layouts) + TagRef, // items serialized via their tags + SymbolRef, // symbols + BindingRef, // module bindings + FunctionRef, // generic functions + BuiltinFunctionRef, // builtin functions + ExternalLinkage // items defined externally (used when serializing packages) }; // calling conventions for internal entry points. @@ -383,17 +423,29 @@ static void write_reloc_t(ios_t *s, uintptr_t reloc_id) JL_NOTSAFEPOINT } } -// --- Static Compile --- +static int jl_is_binding(uintptr_t v) JL_NOTSAFEPOINT +{ + return jl_typeis(v, (jl_datatype_t*)jl_buff_tag); +} + +// Reporting to PkgCacheInspector +typedef struct { + size_t sysdata; + size_t isbitsdata; + size_t symboldata; + size_t tagslist; + size_t reloclist; + size_t gvarlist; + size_t fptrlist; +} pkgcachesizes; +// --- Static Compile --- static void *jl_sysimg_handle = NULL; -static uint64_t sysimage_base = 0; -static uintptr_t *sysimg_gvars_base = NULL; -static const int32_t *sysimg_gvars_offsets = NULL; -static jl_sysimg_fptrs_t sysimg_fptrs; +static jl_image_t sysimage; -static inline uintptr_t *sysimg_gvars(uintptr_t *base, size_t idx) +static inline uintptr_t *sysimg_gvars(uintptr_t *base, int32_t *offsets, size_t idx) { - return base + sysimg_gvars_offsets[idx] / sizeof(base[0]); + return base + offsets[idx] / sizeof(base[0]); } JL_DLLEXPORT int jl_running_on_valgrind(void) @@ -406,10 +458,10 @@ static void jl_load_sysimg_so(void) int imaging_mode = jl_generating_output() && !jl_options.incremental; // in --build mode only use sysimg data, not precompiled native code if (!imaging_mode && jl_options.use_sysimage_native_code==JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_YES) { - jl_dlsym(jl_sysimg_handle, "jl_sysimg_gvars_base", (void **)&sysimg_gvars_base, 1); - jl_dlsym(jl_sysimg_handle, "jl_sysimg_gvars_offsets", (void **)&sysimg_gvars_offsets, 1); - sysimg_gvars_offsets += 1; - assert(sysimg_fptrs.base); + jl_dlsym(jl_sysimg_handle, "jl_sysimg_gvars_base", (void **)&sysimage.gvars_base, 1); + jl_dlsym(jl_sysimg_handle, "jl_sysimg_gvars_offsets", (void **)&sysimage.gvars_offsets, 1); + sysimage.gvars_offsets += 1; + assert(sysimage.fptrs.base); void *pgcstack_func_slot; jl_dlsym(jl_sysimg_handle, "jl_pgcstack_func_slot", &pgcstack_func_slot, 1); @@ -422,19 +474,19 @@ static void jl_load_sysimg_so(void) *tls_offset_idx = (uintptr_t)(jl_tls_offset == -1 ? 0 : jl_tls_offset); #ifdef _OS_WINDOWS_ - sysimage_base = (intptr_t)jl_sysimg_handle; + sysimage.base = (intptr_t)jl_sysimg_handle; #else Dl_info dlinfo; - if (dladdr((void*)sysimg_gvars_base, &dlinfo) != 0) { - sysimage_base = (intptr_t)dlinfo.dli_fbase; + if (dladdr((void*)sysimage.gvars_base, &dlinfo) != 0) { + sysimage.base = (intptr_t)dlinfo.dli_fbase; } else { - sysimage_base = 0; + sysimage.base = 0; } #endif } else { - memset(&sysimg_fptrs, 0, sizeof(sysimg_fptrs)); + memset(&sysimage.fptrs, 0, sizeof(sysimage.fptrs)); } const char *sysimg_data; jl_dlsym(jl_sysimg_handle, "jl_system_image_data", (void **)&sysimg_data, 1); @@ -446,6 +498,94 @@ static void jl_load_sysimg_so(void) // --- serializer --- +#define NBOX_C 1024 + +static int jl_needs_serialization(jl_serializer_state *s, jl_value_t *v) +{ + // ignore items that are given a special relocation representation + if (s->incremental && jl_object_in_image(v)) + return 0; + + if (v == NULL || jl_is_symbol(v) || v == jl_nothing) { + return 0; + } + else if (jl_typeis(v, jl_int64_type)) { + int64_t i64 = *(int64_t*)v + NBOX_C / 2; + if ((uint64_t)i64 < NBOX_C) + return 0; + } + else if (jl_typeis(v, jl_int32_type)) { + int32_t i32 = *(int32_t*)v + NBOX_C / 2; + if ((uint32_t)i32 < NBOX_C) + return 0; + } + else if (jl_typeis(v, jl_uint8_type)) { + return 0; + } + else if (jl_typeis(v, jl_task_type)) { + return 0; + } + + return 1; +} + + +static int caching_tag(jl_value_t *v) JL_NOTSAFEPOINT +{ + if (jl_is_method_instance(v)) { + jl_method_instance_t *mi = (jl_method_instance_t*)v; + jl_value_t *m = mi->def.value; + if (jl_is_method(m) && jl_object_in_image(m)) + return 1 + type_in_worklist(mi->specTypes); + } + if (jl_is_datatype(v)) { + jl_datatype_t *dt = (jl_datatype_t*)v; + if (jl_is_tuple_type(dt) ? !dt->isconcretetype : dt->hasfreetypevars) + return 0; // aka !is_cacheable from jltypes.c + if (jl_object_in_image((jl_value_t*)dt->name)) + return 1 + type_in_worklist(v); + } + jl_value_t *dtv = jl_typeof(v); + if (jl_is_datatype_singleton((jl_datatype_t*)dtv)) { + return 1 - type_in_worklist(dtv); // these are already recached in the datatype in the image + } + return 0; +} + +static int needs_recaching(jl_value_t *v) JL_NOTSAFEPOINT +{ + return caching_tag(v) == 2; +} + +static int needs_uniquing(jl_value_t *v) JL_NOTSAFEPOINT +{ + assert(!jl_object_in_image(v)); + return caching_tag(v) == 1; +} + +static void record_field_change(jl_value_t **addr, jl_value_t *newval) JL_NOTSAFEPOINT +{ + ptrhash_put(&field_replace, (void*)addr, newval); +} + +static jl_value_t *get_replaceable_field(jl_value_t **addr, int mutabl) JL_GC_DISABLED +{ + jl_value_t *fld = (jl_value_t*)ptrhash_get(&field_replace, addr); + if (fld == HT_NOTFOUND) { + fld = *addr; + if (mutabl && fld && jl_is_cpointer_type(jl_typeof(fld)) && jl_unbox_voidpointer(fld) != NULL && jl_unbox_voidpointer(fld) != (void*)(uintptr_t)-1) { + void **nullval = ptrhash_bp(&nullptrs, (void*)jl_typeof(fld)); + if (*nullval == HT_NOTFOUND) { + void *C_NULL = NULL; + *nullval = (void*)jl_new_bits(jl_typeof(fld), &C_NULL); + } + fld = (jl_value_t*)*nullval; + } + return fld; + } + return fld; +} + static uintptr_t jl_fptr_id(void *fptr) { void **pbp = ptrhash_bp(&fptr_to_id, fptr); @@ -455,113 +595,126 @@ static uintptr_t jl_fptr_id(void *fptr) return *(uintptr_t*)pbp; } -#define jl_serialize_value(s, v) jl_serialize_value_(s,(jl_value_t*)(v),1) -static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int recursive); +// `jl_queue_for_serialization` adds items to `serialization_order` +#define jl_queue_for_serialization(s, v) jl_queue_for_serialization_((s), (jl_value_t*)(v), 1, 0) +static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate); -static void jl_serialize_module(jl_serializer_state *s, jl_module_t *m) +static void jl_queue_module_for_serialization(jl_serializer_state *s, jl_module_t *m) { - jl_serialize_value(s, m->name); - jl_serialize_value(s, m->parent); + jl_queue_for_serialization(s, m->name); + jl_queue_for_serialization(s, m->parent); size_t i; void **table = m->bindings.table; for (i = 0; i < m->bindings.size; i += 2) { if (table[i+1] != HT_NOTFOUND) { - jl_serialize_value(s, (jl_value_t*)table[i]); + jl_queue_for_serialization(s, (jl_value_t*)table[i]); jl_binding_t *b = (jl_binding_t*)table[i+1]; - jl_serialize_value(s, b->name); + ptrhash_put(&bindings, b, (void*)(uintptr_t)-1); + jl_queue_for_serialization(s, b->name); + jl_value_t *value; if (jl_docmeta_sym && b->name == jl_docmeta_sym && jl_options.strip_metadata) - jl_serialize_value(s, jl_nothing); + value = jl_nothing; else - jl_serialize_value(s, jl_atomic_load_relaxed(&b->value)); - jl_serialize_value(s, jl_atomic_load_relaxed(&b->globalref)); - jl_serialize_value(s, b->owner); - jl_serialize_value(s, jl_atomic_load_relaxed(&b->ty)); + value = get_replaceable_field((jl_value_t**)&b->value, !b->constp); + jl_queue_for_serialization(s, value); + jl_queue_for_serialization(s, jl_atomic_load_relaxed(&b->globalref)); + jl_queue_for_serialization(s, b->owner); + jl_queue_for_serialization(s, jl_atomic_load_relaxed(&b->ty)); } } for (i = 0; i < m->usings.len; i++) { - jl_serialize_value(s, (jl_value_t*)m->usings.items[i]); + jl_queue_for_serialization(s, (jl_value_t*)m->usings.items[i]); } } -static jl_value_t *get_replaceable_field(jl_value_t **addr) +// Anything that requires uniquing or fixing during deserialization needs to be "toplevel" +// in serialization (i.e., have its own entry in `serialization_order`). Consequently, +// objects that act as containers for other potentially-"problematic" objects must add such "children" +// to the queue. +// Most objects use preorder traversal. But things that need uniquing require postorder: +// you want to handle uniquing of `Dict{String,Float64}` before you tackle `Vector{Dict{String,Float64}}`. +// Uniquing is done in `serialization_order`, so the very first mention of such an object must +// be the "source" rather than merely a cross-reference. +static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate) { - jl_value_t *fld = (jl_value_t*)ptrhash_get(&field_replace, addr); - if (fld == HT_NOTFOUND) - return *addr; - return fld; -} - -#define NBOX_C 1024 - -static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int recursive) -{ - // ignore items that are given a special representation - if (v == NULL || jl_is_symbol(v) || v == jl_nothing) { - return; - } - else if (jl_typeis(v, jl_task_type)) { - if (v == (jl_value_t*)s->ptls->root_task) { - jl_serialize_value(s, ((jl_task_t*)v)->tls); - return; + jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v); + jl_queue_for_serialization_(s, (jl_value_t*)t, 1, immediate); + + if (!recursive) + goto done_fields; + + if (s->incremental && jl_is_datatype(v) && immediate) { + jl_datatype_t *dt = (jl_datatype_t*)v; + // ensure super is queued (though possibly not yet handled, since it may have cycles) + jl_queue_for_serialization_(s, (jl_value_t*)dt->super, 1, 1); + // ensure all type parameters are recached + jl_queue_for_serialization_(s, (jl_value_t*)dt->parameters, 1, 1); + jl_value_t *singleton = dt->instance; + if (singleton && needs_uniquing(singleton)) { + assert(jl_needs_serialization(s, singleton)); // should be true, since we visited dt + // do not visit dt->instance for our template object as it leads to unwanted cycles here + // (it may get serialized from elsewhere though) + record_field_change(&dt->instance, jl_nothing); + } + immediate = 0; // do not handle remaining fields immediately (just field types remains) + } + if (s->incremental && jl_is_method_instance(v)) { + if (needs_uniquing(v)) { + // we only need 3 specific fields of this (the rest are not used) + jl_method_instance_t *mi = (jl_method_instance_t*)v; + jl_queue_for_serialization(s, mi->def.value); + jl_queue_for_serialization(s, mi->specTypes); + jl_queue_for_serialization(s, (jl_value_t*)mi->sparam_vals); + recursive = 0; + goto done_fields; + } + else if (needs_recaching(v)) { + // we only need 3 specific fields of this (the rest are restored afterward, if valid) + jl_method_instance_t *mi = (jl_method_instance_t*)v; + record_field_change((jl_value_t**)&mi->uninferred, NULL); + record_field_change((jl_value_t**)&mi->backedges, NULL); + record_field_change((jl_value_t**)&mi->callbacks, NULL); + record_field_change((jl_value_t**)&mi->cache, NULL); } } - else if (jl_typeis(v, jl_int64_type)) { - int64_t i64 = *(int64_t*)v + NBOX_C / 2; - if ((uint64_t)i64 < NBOX_C) - return; - } - else if (jl_typeis(v, jl_int32_type)) { - int32_t i32 = *(int32_t*)v + NBOX_C / 2; - if ((uint32_t)i32 < NBOX_C) - return; - } - else if (jl_typeis(v, jl_uint8_type)) { - return; - } - arraylist_push(&object_worklist, (void*)((uintptr_t)v | recursive)); -} - -static void jl_serialize_value__(jl_serializer_state *s, jl_value_t *v, int recursive) -{ - void **bp = ptrhash_bp(&backref_table, v); - if (*bp != HT_NOTFOUND) { - return; + if (jl_is_typename(v)) { + jl_typename_t *tn = (jl_typename_t*)v; + // don't recurse into several fields (yet) + jl_queue_for_serialization_(s, (jl_value_t*)tn->cache, 0, 1); + jl_queue_for_serialization_(s, (jl_value_t*)tn->linearcache, 0, 1); + if (s->incremental) { + assert(!jl_object_in_image((jl_value_t*)tn->module)); + assert(!jl_object_in_image((jl_value_t*)tn->wrapper)); + } } - size_t item = ++backref_table_numel; - assert(item < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "too many items to serialize"); - char *pos = (char*)HT_NOTFOUND + item; - *bp = (void*)pos; - - // some values have special representations - jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v); - jl_serialize_value(s, t); + if (immediate) // must be things that can be recursively handled, and valid as type parameters + assert(jl_is_immutable(t) || jl_is_typevar(v) || jl_is_symbol(v) || jl_is_svec(v)); - if (t->layout->npointers == 0) { - // skip it + const jl_datatype_layout_t *layout = t->layout; + if (layout->npointers == 0) { + // bitstypes do not require recursion } else if (jl_is_svec(v)) { - if (!recursive) - return; size_t i, l = jl_svec_len(v); jl_value_t **data = jl_svec_data(v); for (i = 0; i < l; i++) { - jl_serialize_value(s, data[i]); + jl_queue_for_serialization_(s, data[i], 1, immediate); } } else if (jl_is_array(v)) { jl_array_t *ar = (jl_array_t*)v; - jl_serialize_value(s, jl_typeof(ar)); + const char *data = (const char*)jl_array_data(ar); if (ar->flags.ptrarray) { size_t i, l = jl_array_len(ar); for (i = 0; i < l; i++) { - jl_serialize_value(s, jl_array_ptr_ref(ar, i)); + jl_value_t *fld = get_replaceable_field(&((jl_value_t**)data)[i], 1); + jl_queue_for_serialization_(s, fld, 1, immediate); } } else if (ar->flags.hasptr) { - const char *data = (const char*)jl_array_data(ar); uint16_t elsz = ar->elsize; size_t i, l = jl_array_len(ar); jl_datatype_t *et = (jl_datatype_t*)jl_tparam0(jl_typeof(ar)); @@ -569,46 +722,90 @@ static void jl_serialize_value__(jl_serializer_state *s, jl_value_t *v, int recu for (i = 0; i < l; i++) { for (j = 0; j < np; j++) { uint32_t ptr = jl_ptr_offset(et, j); - jl_value_t *fld = ((jl_value_t**)data)[ptr]; - JL_GC_PROMISE_ROOTED(fld); - jl_serialize_value(s, fld); + jl_value_t *fld = get_replaceable_field(&((jl_value_t**)data)[ptr], 1); + jl_queue_for_serialization_(s, fld, 1, immediate); } data += elsz; } } } else if (jl_typeis(v, jl_module_type)) { - jl_serialize_module(s, (jl_module_t*)v); + jl_queue_module_for_serialization(s, (jl_module_t*)v); } - else if (jl_is_typename(v)) { - jl_typename_t *tn = (jl_typename_t*)v; - jl_serialize_value(s, tn->name); - jl_serialize_value(s, tn->module); - jl_serialize_value(s, tn->names); - jl_serialize_value(s, tn->wrapper); - jl_serialize_value(s, tn->Typeofwrapper); - jl_serialize_value_(s, (jl_value_t*)tn->cache, 0); - jl_serialize_value_(s, (jl_value_t*)tn->linearcache, 0); - jl_serialize_value(s, tn->mt); - jl_serialize_value(s, tn->partial); - } - else if (t->layout->nfields > 0) { - if (jl_typeis(v, jl_globalref_type)) { - // Don't save the cached binding reference in staticdata - ((jl_globalref_t*)v)->bnd_cache = NULL; - } + else if (layout->nfields > 0) { char *data = (char*)jl_data_ptr(v); - size_t i, np = t->layout->npointers; + size_t i, np = layout->npointers; for (i = 0; i < np; i++) { uint32_t ptr = jl_ptr_offset(t, i); - jl_value_t *fld = get_replaceable_field(&((jl_value_t**)data)[ptr]); - jl_serialize_value(s, fld); + jl_value_t *fld = get_replaceable_field(&((jl_value_t**)data)[ptr], t->name->mutabl); + jl_queue_for_serialization_(s, fld, 1, immediate); } } + +done_fields: ; + + // We've encountered an item we need to cache + void **bp = ptrhash_bp(&serialization_order, v); + assert(*bp != (void*)(uintptr_t)-1); + if (s->incremental) { + void **bp2 = ptrhash_bp(&unique_ready, v); + if (*bp2 == HT_NOTFOUND) + assert(*bp == (void*)(uintptr_t)-2); + else if (*bp != (void*)(uintptr_t)-2) + return; + } + else { + assert(*bp == (void*)(uintptr_t)-2); + } + arraylist_push(&serialization_queue, (void*) v); + size_t idx = serialization_queue.len - 1; + assert(serialization_queue.len < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "too many items to serialize"); + + *bp = (void*)((char*)HT_NOTFOUND + 1 + idx); +} + +static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate) +{ + if (!jl_needs_serialization(s, v)) + return; + + jl_value_t *t = jl_typeof(v); + // Items that require postorder traversal must visit their children prior to insertion into + // the worklist/serialization_order (and also before their first use) + if (s->incremental && !immediate) { + if (jl_is_datatype(t) && needs_uniquing(v)) + immediate = 1; + if (jl_is_datatype_singleton((jl_datatype_t*)t) && needs_uniquing(v)) + immediate = 1; + } + + void **bp = ptrhash_bp(&serialization_order, v); + if (*bp == HT_NOTFOUND) { + *bp = (void*)(uintptr_t)(immediate ? -2 : -1); + } + else { + if (!s->incremental || !immediate || !recursive) + return; + void **bp2 = ptrhash_bp(&unique_ready, v); + if (*bp2 == HT_NOTFOUND) + *bp2 = v; // now is unique_ready + else { + assert(*bp != (void*)(uintptr_t)-1); + return; // already was unique_ready + } + assert(*bp != (void*)(uintptr_t)-2); // should be unique_ready then + if (*bp == (void*)(uintptr_t)-1) + *bp = (void*)(uintptr_t)-2; // now immediate + } + + if (immediate) + jl_insert_into_serialization_queue(s, v, recursive, immediate); + else + arraylist_push(&object_worklist, (void*)v); } // Do a pre-order traversal of the to-serialize worklist, in the identical order -// to the calls to jl_serialize_value would occur in a purely recursive +// to the calls to jl_queue_for_serialization would occur in a purely recursive // implementation, but without potentially running out of stack. static void jl_serialize_reachable(jl_serializer_state *s) { @@ -623,10 +820,16 @@ static void jl_serialize_reachable(jl_serializer_state *s) object_worklist.items[j] = tmp; } prevlen = --object_worklist.len; - uintptr_t v = (uintptr_t)object_worklist.items[prevlen]; - int recursive = v & 1; - v &= ~(uintptr_t)1; // untag v - jl_serialize_value__(s, (jl_value_t*)v, recursive); + jl_value_t *v = (jl_value_t*)object_worklist.items[prevlen]; + void **bp = ptrhash_bp(&serialization_order, (void*)v); + assert(*bp != HT_NOTFOUND && *bp != (void*)(uintptr_t)-2); + if (*bp == (void*)(uintptr_t)-1) { // might have been eagerly handled for post-order while in the lazy pre-order queue + *bp = (void*)(uintptr_t)-2; + jl_insert_into_serialization_queue(s, v, 1, 0); + } + else { + assert(s->incremental); + } } } @@ -640,19 +843,6 @@ static void ios_ensureroom(ios_t *s, size_t newsize) JL_NOTSAFEPOINT } } -// Maybe encode a global variable. `gid` is the LLVM index, 0 if the object is not serialized -// in the generated code (and thus not a gvar from that standpoint, maybe only stored in the internal-data sysimg). -// `reloc_id` is the RefTags-encoded `target`. -static void record_gvar(jl_serializer_state *s, int gid, uintptr_t reloc_id) JL_NOTSAFEPOINT -{ - if (gid == 0) - return; - ios_ensureroom(s->gvar_record, gid * sizeof(reloc_t)); - ios_seek(s->gvar_record, (gid - 1) * sizeof(reloc_t)); - write_reloc_t(s->gvar_record, reloc_id); -} - - static void write_padding(ios_t *s, size_t nb) JL_NOTSAFEPOINT { static const char zeros[16] = {0}; @@ -671,11 +861,34 @@ static void write_pointer(ios_t *s) JL_NOTSAFEPOINT write_uint(s, 0); } -// Return the integer `id` for `v`. Generically this is looked up in `backref_table`, +// Records the buildid holding `v` and returns the tagged offset within the corresponding image +static uintptr_t add_external_linkage(jl_serializer_state *s, jl_value_t *v, jl_array_t *link_ids) { + size_t i = external_blob_index(v); + if (i < n_linkage_blobs()) { + assert(link_ids && jl_is_array(link_ids)); + assert(jl_build_ids && jl_is_array(jl_build_ids)); + uint64_t *build_id_data = (uint64_t*)jl_array_data(jl_build_ids); + // We found the sysimg/pkg that this item links against + // Store the image key in `link_ids` + jl_array_grow_end(link_ids, 1); + uint64_t *link_id_data = (uint64_t*)jl_array_data(link_ids); + link_id_data[jl_array_len(link_ids)-1] = build_id_data[i]; + // Compute the relocation code + size_t offset = (uintptr_t)v - (uintptr_t)jl_linkage_blobs.items[2*i]; + offset /= sizeof(void*); + assert(offset < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "offset to external image too large"); + // jl_printf(JL_STDOUT, "External link %ld against blob %d with key %ld at position 0x%lx with offset 0x%lx to \n", jl_array_len(link_ids), i, build_id_data[i>>1], ios_pos(s->s), offset); + // jl_(v); + return ((uintptr_t)ExternalLinkage << RELOC_TAG_OFFSET) + offset; + } + return 0; +} + +// Return the integer `id` for `v`. Generically this is looked up in `serialization_order`, // but symbols, small integers, and a couple of special items (`nothing` and the root Task) // have special handling. -#define backref_id(s, v) _backref_id(s, (jl_value_t*)(v)) -static uintptr_t _backref_id(jl_serializer_state *s, jl_value_t *v) JL_NOTSAFEPOINT +#define backref_id(s, v, link_ids) _backref_id(s, (jl_value_t*)(v), link_ids) +static uintptr_t _backref_id(jl_serializer_state *s, jl_value_t *v, jl_array_t *link_ids) JL_NOTSAFEPOINT { assert(v != NULL && "cannot get backref to NULL object"); void *idx = HT_NOTFOUND; @@ -712,21 +925,44 @@ static uintptr_t _backref_id(jl_serializer_state *s, jl_value_t *v) JL_NOTSAFEPO uint8_t u8 = *(uint8_t*)v; return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + u8 + 2 + NBOX_C + NBOX_C; } + if (s->incremental && jl_object_in_image(v)) { + assert(link_ids); + uintptr_t item = add_external_linkage(s, v, link_ids); + assert(item && "no external linkage identified"); + return item; + } if (idx == HT_NOTFOUND) { - idx = ptrhash_get(&backref_table, v); - assert(idx != HT_NOTFOUND && "object missed during jl_serialize_value pass"); + idx = ptrhash_get(&serialization_order, v); + if (idx == HT_NOTFOUND) { + jl_(jl_typeof(v)); + jl_(v); + } + assert(idx != HT_NOTFOUND && "object missed during jl_queue_for_serialization pass"); + assert(idx != (void*)(uintptr_t)-1 && "object missed during jl_insert_into_serialization_queue pass"); + assert(idx != (void*)(uintptr_t)-2 && "object missed during jl_insert_into_serialization_queue pass"); } return (char*)idx - 1 - (char*)HT_NOTFOUND; } +static void record_uniquing(jl_serializer_state *s, jl_value_t *fld, uintptr_t offset) JL_NOTSAFEPOINT +{ + if (s->incremental && jl_needs_serialization(s, fld) && needs_uniquing(fld)) { + if (jl_is_datatype(fld) || jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(fld))) + arraylist_push(&s->uniquing_types, (void*)(uintptr_t)offset); + else + arraylist_push(&s->uniquing_objs, (void*)(uintptr_t)offset); + } +} + // Save blank space in stream `s` for a pointer `fld`, storing both location and target // in `relocs_list`. static void write_pointerfield(jl_serializer_state *s, jl_value_t *fld) JL_NOTSAFEPOINT { if (fld != NULL) { arraylist_push(&s->relocs_list, (void*)(uintptr_t)ios_pos(s->s)); - arraylist_push(&s->relocs_list, (void*)backref_id(s, fld)); + arraylist_push(&s->relocs_list, (void*)backref_id(s, fld, s->link_ids_relocs)); + record_uniquing(s, fld, ios_pos(s->s)); } write_pointer(s->s); } @@ -735,26 +971,29 @@ static void write_pointerfield(jl_serializer_state *s, jl_value_t *fld) JL_NOTSA // in `gctags_list`. static void write_gctaggedfield(jl_serializer_state *s, uintptr_t ref) JL_NOTSAFEPOINT { + // jl_printf(JL_STDOUT, "gctaggedfield: position %p, value 0x%lx\n", (void*)(uintptr_t)ios_pos(s->s), ref); arraylist_push(&s->gctags_list, (void*)(uintptr_t)ios_pos(s->s)); arraylist_push(&s->gctags_list, (void*)ref); write_pointer(s->s); } // Special handling from `jl_write_values` for modules -static void jl_write_module(jl_serializer_state *s, uintptr_t item, jl_module_t *m) +static void jl_write_module(jl_serializer_state *s, uintptr_t item, jl_module_t *m) JL_GC_DISABLED { size_t reloc_offset = ios_pos(s->s); size_t tot = sizeof(jl_module_t); ios_write(s->s, (char*)m, tot); // raw memory dump of the `jl_module_t` structure + // will need to recreate the binding table for this + arraylist_push(&s->fixup_objs, (void*)reloc_offset); // Handle the fields requiring special attention jl_module_t *newm = (jl_module_t*)&s->s->buf[reloc_offset]; newm->name = NULL; arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, name))); - arraylist_push(&s->relocs_list, (void*)backref_id(s, m->name)); + arraylist_push(&s->relocs_list, (void*)backref_id(s, m->name, s->link_ids_relocs)); newm->parent = NULL; arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, parent))); - arraylist_push(&s->relocs_list, (void*)backref_id(s, m->parent)); + arraylist_push(&s->relocs_list, (void*)backref_id(s, m->parent, s->link_ids_relocs)); newm->primary_world = jl_atomic_load_acquire(&jl_world_counter); // write out the bindings table as a list @@ -771,13 +1010,14 @@ static void jl_write_module(jl_serializer_state *s, uintptr_t item, jl_module_t write_gctaggedfield(s, (uintptr_t)BindingRef << RELOC_TAG_OFFSET); tot += sizeof(void*); size_t binding_reloc_offset = ios_pos(s->s); - record_gvar(s, jl_get_llvm_gv(native_functions, (jl_value_t*)b), - ((uintptr_t)DataRef << RELOC_TAG_OFFSET) + binding_reloc_offset); + ptrhash_put(&bindings, b, (void*)(((uintptr_t)DataRef << RELOC_TAG_OFFSET) + binding_reloc_offset)); write_pointerfield(s, (jl_value_t*)b->name); + jl_value_t *value; if (jl_docmeta_sym && b->name == jl_docmeta_sym && jl_options.strip_metadata) - write_pointerfield(s, jl_nothing); + value = jl_nothing; else - write_pointerfield(s, jl_atomic_load_relaxed(&b->value)); + value = get_replaceable_field((jl_value_t**)&b->value, !b->constp); + write_pointerfield(s, value); write_pointerfield(s, jl_atomic_load_relaxed(&b->globalref)); write_pointerfield(s, (jl_value_t*)b->owner); write_pointerfield(s, jl_atomic_load_relaxed(&b->ty)); @@ -802,7 +1042,7 @@ static void jl_write_module(jl_serializer_state *s, uintptr_t item, jl_module_t size_t i; for (i = 0; i < m->usings.len; i++) { arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, usings._space[i]))); - arraylist_push(&s->relocs_list, (void*)backref_id(s, m->usings._space[i])); + arraylist_push(&s->relocs_list, (void*)backref_id(s, m->usings._space[i], s->link_ids_relocs)); } } else { @@ -821,92 +1061,74 @@ static void jl_write_module(jl_serializer_state *s, uintptr_t item, jl_module_t } } -#if 0 -static size_t jl_sort_size(jl_datatype_t *dt) +static void record_gvars(jl_serializer_state *s, arraylist_t *globals) JL_NOTSAFEPOINT { - if (dt == jl_simplevector_type) - return SIZE_MAX - 5; - if (dt == jl_string_type) - return SIZE_MAX - 4; - if (dt->name == jl_array_typename) - return SIZE_MAX - 3; - if (dt == jl_datatype_type) - return SIZE_MAX - 2; - if (dt == jl_module_type) - return SIZE_MAX - 1; - return jl_datatype_size(dt); -} -#endif - -// Used by `qsort` to order `backref_table` by `id` -static int sysimg_sort_order(const void *pa, const void *pb) -{ - uintptr_t sa = ((uintptr_t*)pa)[1]; - uintptr_t sb = ((uintptr_t*)pb)[1]; - return (sa > sb ? 1 : (sa < sb ? -1 : 0)); -#if 0 - jl_value_t *a = *(jl_value_t**)pa; - jl_datatype_t *tya = (jl_datatype_t*)jl_typeof(a); - size_t sa = jl_sort_size(tya); - jl_value_t *b = *(jl_value_t**)pb; - jl_datatype_t *tyb = (jl_datatype_t*)jl_typeof(b); - size_t sb = jl_sort_size(tyb); - if (sa == sb) { - sa = tya->uid; - sb = tyb->uid; - } - return (sa > sb ? 1 : (sa < sb ? -1 : 0)); -#endif + for (size_t i = 0; i < globals->len; i++) { + void *g = globals->items[i]; + if (jl_is_binding((uintptr_t)g)) { + if (!ptrhash_has(&bindings, g)) { + // need to deal with foreign bindings here too + assert(s->incremental); + jl_binding_t *b = (jl_binding_t*)g; + jl_value_t *gr = jl_module_globalref(b->owner, b->name); + jl_queue_for_serialization(s, gr); + } + continue; + } + assert(!ptrhash_has(&bindings, g)); + jl_queue_for_serialization(s, g); + } } jl_value_t *jl_find_ptr = NULL; -// The main function for serializing all the items queued in `backref_table` -static void jl_write_values(jl_serializer_state *s) +// The main function for serializing all the items queued in `serialization_order` +// (They are also stored in `serialization_queue` which is order-preserving, unlike the hash table used +// for `serialization_order`). +static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED { - arraylist_t objects_list; - arraylist_new(&objects_list, backref_table_numel * 2); + size_t l = serialization_queue.len; arraylist_new(&layout_table, 0); - arraylist_grow(&layout_table, backref_table_numel); - memset(layout_table.items, 0, backref_table_numel * sizeof(void*)); - - // Order `backref_table` by `id` - size_t i, len = backref_table.size; - void **p = backref_table.table; - for (i = 0; i < len; i += 2) { - char *reloc_id = (char*)p[i + 1]; - if (reloc_id != HT_NOTFOUND) { - jl_value_t *v = (jl_value_t*)p[i]; - uintptr_t item = reloc_id - 1 - (char*)HT_NOTFOUND; - objects_list.items[objects_list.len++] = (void*)v; - objects_list.items[objects_list.len++] = (void*)item; - } - } - assert(backref_table_numel * 2 == objects_list.len); - qsort(objects_list.items, backref_table_numel, sizeof(void*) * 2, sysimg_sort_order); + arraylist_grow(&layout_table, l * 2); + memset(layout_table.items, 0, l * 2 * sizeof(void*)); // Serialize all entries - for (i = 0, len = backref_table_numel * 2; i < len; i += 2) { - jl_value_t *v = (jl_value_t*)objects_list.items[i]; // the object + for (size_t item = 0; item < l; item++) { + jl_value_t *v = (jl_value_t*)serialization_queue.items[item]; // the object JL_GC_PROMISE_ROOTED(v); - uintptr_t item = (uintptr_t)objects_list.items[i + 1]; // the id + assert(!(s->incremental && jl_object_in_image(v))); jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v); assert((t->instance == NULL || t->instance == v) && "detected singleton construction corruption"); // realign stream to expected gc alignment (16 bytes) uintptr_t skip_header_pos = ios_pos(s->s) + sizeof(jl_taggedvalue_t); write_padding(s->s, LLT_ALIGN(skip_header_pos, 16) - skip_header_pos); + // write header - write_gctaggedfield(s, backref_id(s, t)); + if (s->incremental && jl_needs_serialization(s, (jl_value_t*)t) && needs_uniquing((jl_value_t*)t)) + arraylist_push(&s->uniquing_types, (void*)(uintptr_t)(ios_pos(s->s)|1)); + write_gctaggedfield(s, backref_id(s, t, s->link_ids_gctags)); size_t reloc_offset = ios_pos(s->s); assert(item < layout_table.len && layout_table.items[item] == NULL); - layout_table.items[item] = (void*)reloc_offset; // store the inverse mapping of `backref_table` (`id` => object) - record_gvar(s, jl_get_llvm_gv(native_functions, v), ((uintptr_t)DataRef << RELOC_TAG_OFFSET) + reloc_offset); + layout_table.items[item] = (void*)reloc_offset; // store the inverse mapping of `serialization_order` (`id` => object-as-streampos) + + if (s->incremental && needs_uniquing(v)) { + if (jl_is_method_instance(v)) { + jl_method_instance_t *mi = (jl_method_instance_t*)v; + write_pointerfield(s, mi->def.value); + write_pointerfield(s, mi->specTypes); + write_pointerfield(s, (jl_value_t*)mi->sparam_vals); + continue; + } + else if (!jl_is_datatype(v)) { + assert(jl_is_datatype_singleton(t) && "unreachable"); + } + } + else if (s->incremental && needs_recaching(v)) { + arraylist_push(jl_is_datatype(v) ? &s->fixup_types : &s->fixup_objs, (void*)reloc_offset); + } // write data - if (jl_is_cpointer(v)) { - write_pointer(s->s); - } - else if (jl_is_array(v)) { + if (jl_is_array(v)) { // Internal data for types in julia.h with `jl_array_t` field(s) #define JL_ARRAY_ALIGN(jl_value, nbytes) LLT_ALIGN(jl_value, nbytes) jl_array_t *ar = (jl_array_t*)v; @@ -947,10 +1169,15 @@ static void jl_write_values(jl_serializer_state *s) arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_array_t, data))); // relocation location arraylist_push(&s->relocs_list, (void*)(((uintptr_t)ConstDataRef << RELOC_TAG_OFFSET) + data)); // relocation target if (jl_is_cpointer_type(et)) { - // reset Ptr elements to C_NULL + // reset Ptr fields to C_NULL (but keep MAP_FAILED / INVALID_HANDLE) + const intptr_t *data = (const intptr_t*)jl_array_data(ar); size_t i; - for (i = 0; i < alen; i++) - write_pointer(s->const_data); + for (i = 0; i < alen; i++) { + if (data[i] != -1) + write_pointer(s->const_data); + else + ios_write(s->const_data, (char*)&data[i], sizeof(data[i])); + } } else { if (isbitsunion) { @@ -966,11 +1193,11 @@ static void jl_write_values(jl_serializer_state *s) // Pointer eltypes are encoded in the mutable data section size_t data = LLT_ALIGN(ios_pos(s->s), alignment_amt); size_t padding_amt = data - ios_pos(s->s); - write_padding(s->s, padding_amt); headersize += padding_amt; newa->data = (void*)headersize; // relocation offset arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_array_t, data))); // relocation location arraylist_push(&s->relocs_list, (void*)(((uintptr_t)DataRef << RELOC_TAG_OFFSET) + item)); // relocation target + write_padding(s->s, padding_amt); if (ar->flags.hasptr) { // copy all of the data first const char *data = (const char*)jl_array_data(ar); @@ -982,22 +1209,22 @@ static void jl_write_values(jl_serializer_state *s) for (i = 0; i < alen; i++) { for (j = 0; j < np; j++) { size_t offset = i * elsz + jl_ptr_offset(((jl_datatype_t*)et), j) * sizeof(jl_value_t*); - jl_value_t *fld = *(jl_value_t**)&data[offset]; + jl_value_t *fld = get_replaceable_field((jl_value_t**)&data[offset], 1); + size_t fld_pos = reloc_offset + headersize + offset; if (fld != NULL) { - arraylist_push(&s->relocs_list, (void*)(uintptr_t)(reloc_offset + headersize + offset)); // relocation location - arraylist_push(&s->relocs_list, (void*)backref_id(s, fld)); // relocation target - memset(&s->s->buf[reloc_offset + headersize + offset], 0, sizeof(fld)); // relocation offset (none) - } - else { - assert(*(jl_value_t**)&s->s->buf[reloc_offset + headersize + offset] == NULL); + arraylist_push(&s->relocs_list, (void*)(uintptr_t)fld_pos); // relocation location + arraylist_push(&s->relocs_list, (void*)backref_id(s, fld, s->link_ids_relocs)); // relocation target + record_uniquing(s, fld, fld_pos); } + memset(&s->s->buf[fld_pos], 0, sizeof(fld)); // relocation offset (none) } } } else { + jl_value_t **data = (jl_value_t**)jl_array_data(ar); size_t i; for (i = 0; i < alen; i++) { - jl_value_t *e = jl_array_ptr_ref(v, i); + jl_value_t *e = get_replaceable_field(&data[i], 1); write_pointerfield(s, e); } } @@ -1005,19 +1232,16 @@ static void jl_write_values(jl_serializer_state *s) } else if (jl_typeis(v, jl_module_type)) { jl_write_module(s, item, (jl_module_t*)v); - // will need to recreate the binding table for this - arraylist_push(&reinit_list, (void*)item); - arraylist_push(&reinit_list, (void*)2); } else if (jl_typeis(v, jl_task_type)) { jl_error("Task cannot be serialized"); } else if (jl_is_svec(v)) { ios_write(s->s, (char*)v, sizeof(void*)); - size_t i, l = jl_svec_len(v); + size_t ii, l = jl_svec_len(v); assert(l > 0 || (jl_svec_t*)v == jl_emptysvec); - for (i = 0; i < l; i++) { - write_pointerfield(s, jl_svecref(v, i)); + for (ii = 0; ii < l; ii++) { + write_pointerfield(s, jl_svecref(v, ii)); } } else if (jl_is_string(v)) { @@ -1025,6 +1249,8 @@ static void jl_write_values(jl_serializer_state *s) write_uint8(s->s, '\0'); // null-terminated strings for easier C-compatibility } else if (jl_datatype_nfields(t) == 0) { + // The object has no fields, so we just snapshot its byte representation + assert(!t->layout->npointers); assert(t->layout->npointers == 0); ios_write(s->s, (char*)v, jl_datatype_size(t)); } @@ -1057,8 +1283,8 @@ static void jl_write_values(jl_serializer_state *s) write_padding(s->s, offset - tot); tot = offset; size_t fsz = jl_field_size(t, i); - if (t->name->mutabl && jl_is_cpointer_type(jl_field_type(t, i))) { - // reset Ptr fields to C_NULL + if (t->name->mutabl && jl_is_cpointer_type(jl_field_type(t, i)) && *(intptr_t*)slot != -1) { + // reset Ptr fields to C_NULL (but keep MAP_FAILED / INVALID_HANDLE) assert(!jl_field_isptr(t, i)); write_pointer(s->s); } @@ -1071,22 +1297,46 @@ static void jl_write_values(jl_serializer_state *s) size_t np = t->layout->npointers; for (i = 0; i < np; i++) { size_t offset = jl_ptr_offset(t, i) * sizeof(jl_value_t*); - jl_value_t *fld = get_replaceable_field((jl_value_t**)&data[offset]); + jl_value_t *fld = get_replaceable_field((jl_value_t**)&data[offset], t->name->mutabl); + size_t fld_pos = offset + reloc_offset; if (fld != NULL) { - arraylist_push(&s->relocs_list, (void*)(uintptr_t)(offset + reloc_offset)); // relocation location - arraylist_push(&s->relocs_list, (void*)backref_id(s, fld)); // relocation target + arraylist_push(&s->relocs_list, (void*)(uintptr_t)(fld_pos)); // relocation location + arraylist_push(&s->relocs_list, (void*)backref_id(s, fld, s->link_ids_relocs)); // relocation target + record_uniquing(s, fld, fld_pos); } - memset(&s->s->buf[offset + reloc_offset], 0, sizeof(fld)); // relocation offset (none) + memset(&s->s->buf[fld_pos], 0, sizeof(fld)); // relocation offset (none) } // A few objects need additional handling beyond the generic serialization above - if (jl_is_method(v)) { - write_padding(s->s, sizeof(jl_method_t) - tot); - if (((jl_method_t*)v)->ccallable) { - arraylist_push(&ccallable_list, (void*)item); - arraylist_push(&ccallable_list, (void*)3); + + if (s->incremental && jl_typeis(v, jl_typemap_entry_type)) { + jl_typemap_entry_t *newentry = (jl_typemap_entry_t*)&s->s->buf[reloc_offset]; + if (newentry->max_world == ~(size_t)0) { + if (newentry->min_world > 1) { + newentry->min_world = ~(size_t)0; + arraylist_push(&s->fixup_objs, (void*)reloc_offset); + } + } + else { + // garbage newentry - delete it :( + newentry->min_world = 1; + newentry->max_world = 0; } } + else if (jl_is_method(v)) { + write_padding(s->s, sizeof(jl_method_t) - tot); // hidden fields + jl_method_t *m = (jl_method_t*)v; + jl_method_t *newm = (jl_method_t*)&s->s->buf[reloc_offset]; + if (s->incremental) { + if (newm->deleted_world != ~(size_t)0) + newm->deleted_world = 1; + else + arraylist_push(&s->fixup_objs, (void*)reloc_offset); + newm->primary_world = ~(size_t)0; + } + if (m->ccallable) + arraylist_push(&s->ccallable_list, (void*)reloc_offset); + } else if (jl_is_method_instance(v)) { jl_method_instance_t *newmi = (jl_method_instance_t*)&s->s->buf[reloc_offset]; newmi->precompiled = 0; @@ -1096,6 +1346,22 @@ static void jl_write_values(jl_serializer_state *s) jl_code_instance_t *m = (jl_code_instance_t*)v; jl_code_instance_t *newm = (jl_code_instance_t*)&s->s->buf[reloc_offset]; + if (s->incremental) { + arraylist_push(&s->fixup_objs, (void*)reloc_offset); + if (m->min_world > 1) + newm->min_world = ~(size_t)0; // checks that we reprocess this upon deserialization + if (m->max_world != ~(size_t)0) + newm->max_world = 0; + else { + if (m->inferred && ptrhash_has(&s->callers_with_edges, m->def)) + newm->max_world = 1; // sentinel value indicating this will need validation + if (m->min_world > 0 && m->inferred) { + // TODO: also check if this object is part of the codeinst cache + // will check on deserialize if this cache entry is still valid + } + } + } + newm->invoke = NULL; newm->isspecsig = 0; newm->specptr.fptr = NULL; @@ -1156,36 +1422,33 @@ static void jl_write_values(jl_serializer_state *s) arraylist_push(&s->relocs_list, (void*)(((uintptr_t)BuiltinFunctionRef << RELOC_TAG_OFFSET) + builtin_id - 2)); // relocation target } } + else if (jl_is_globalref(v)) { + jl_globalref_t *newg = (jl_globalref_t*)&s->s->buf[reloc_offset]; + // Don't save the cached binding reference in staticdata + // TODO: this should be a relocation pointing to the binding in the new image + newg->bnd_cache = NULL; + if (s->incremental) + arraylist_push(&s->fixup_objs, (void*)reloc_offset); + } else if (jl_is_datatype(v)) { jl_datatype_t *dt = (jl_datatype_t*)v; jl_datatype_t *newdt = (jl_datatype_t*)&s->s->buf[reloc_offset]; - if (dt->layout != NULL) { - newdt->layout = NULL; + if (dt->layout != NULL) { + size_t nf = dt->layout->nfields; + size_t np = dt->layout->npointers; + size_t fieldsize = jl_fielddesc_size(dt->layout->fielddesc_type); char *flddesc = (char*)dt->layout; - void* reloc_from = (void*)(reloc_offset + offsetof(jl_datatype_t, layout)); - void* reloc_to; - - void** bp = ptrhash_bp(&layout_cache, flddesc); - if (*bp == HT_NOTFOUND) { - int64_t streampos = ios_pos(s->const_data); - uintptr_t align = LLT_ALIGN(streampos, sizeof(void*)); - uintptr_t layout = align / sizeof(void*); - *bp = reloc_to = (void*)(((uintptr_t)ConstDataRef << RELOC_TAG_OFFSET) + layout); - - size_t fieldsize = jl_fielddesc_size(dt->layout->fielddesc_type); - size_t layoutsize = sizeof(jl_datatype_layout_t) + dt->layout->nfields * fieldsize; - if (dt->layout->first_ptr != -1) - layoutsize += dt->layout->npointers << dt->layout->fielddesc_type; - write_padding(s->const_data, align - streampos); - ios_write(s->const_data, flddesc, layoutsize); - } - else { - reloc_to = *bp; - } - - arraylist_push(&s->relocs_list, reloc_from); - arraylist_push(&s->relocs_list, reloc_to); + size_t fldsize = sizeof(jl_datatype_layout_t) + nf * fieldsize; + if (dt->layout->first_ptr != -1) + fldsize += np << dt->layout->fielddesc_type; + uintptr_t layout = LLT_ALIGN(ios_pos(s->const_data), sizeof(void*)); + write_padding(s->const_data, layout - ios_pos(s->const_data)); // realign stream + newdt->layout = NULL; // relocation offset + layout /= sizeof(void*); + arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_datatype_t, layout))); // relocation location + arraylist_push(&s->relocs_list, (void*)(((uintptr_t)ConstDataRef << RELOC_TAG_OFFSET) + layout)); // relocation target + ios_write(s->const_data, flddesc, fldsize); } } else if (jl_is_typename(v)) { @@ -1214,8 +1477,7 @@ static void jl_write_values(jl_serializer_state *s) } else if (((jl_datatype_t*)(jl_typeof(v)))->name == jl_idtable_typename) { // will need to rehash this, later (after types are fully constructed) - arraylist_push(&reinit_list, (void*)item); - arraylist_push(&reinit_list, (void*)1); + arraylist_push(&s->fixup_objs, (void*)reloc_offset); } else { write_padding(s->s, jl_datatype_size(t) - tot); @@ -1224,61 +1486,11 @@ static void jl_write_values(jl_serializer_state *s) } } - -// Record all symbols that get referenced by the generated code -// and queue them for pointer relocation -static void jl_write_gv_syms(jl_serializer_state *s, jl_sym_t *v) -{ - // since symbols are static, they might not have had a - // reference anywhere in the code image other than here - int32_t gv = jl_get_llvm_gv(native_functions, (jl_value_t*)v); - if (gv != 0) { - uintptr_t item = backref_id(s, v); - assert(item >> RELOC_TAG_OFFSET == SymbolRef); - record_gvar(s, gv, item); - } - if (v->left) - jl_write_gv_syms(s, v->left); - if (v->right) - jl_write_gv_syms(s, v->right); -} - -// Record all hardcoded-tagged items that get referenced by -// the generated code and queue them for pointer relocation -static void jl_write_gv_tagref(jl_serializer_state *s, jl_value_t *v) -{ - int32_t gv = jl_get_llvm_gv(native_functions, (jl_value_t*)v); - if (gv != 0) { - uintptr_t item = backref_id(s, v); - assert(item >> RELOC_TAG_OFFSET == TagRef); - record_gvar(s, gv, item); - } -} -static void jl_write_gv_tagrefs(jl_serializer_state *s) -{ - // this also ensures all objects referenced in the code have - // references in the system image to their global variable - // since codegen knows that some integer boxes are static, - // they might not have had a reference anywhere in the code - // image other than here - size_t i; - jl_write_gv_tagref(s, (jl_value_t*)s->ptls->root_task); - jl_write_gv_tagref(s, s->ptls->root_task->tls); - jl_write_gv_tagref(s, jl_nothing); - for (i = 0; i < NBOX_C; i++) { - jl_write_gv_tagref(s, jl_box_int32((int32_t)i - NBOX_C / 2)); - jl_write_gv_tagref(s, jl_box_int64((int64_t)i - NBOX_C / 2)); - } - for (i = 0; i < 256; i++) { - jl_write_gv_tagref(s, jl_box_uint8(i)); - } -} - // In deserialization, create Symbols and set up the // index for backreferencing static void jl_read_symbols(jl_serializer_state *s) { - assert(deser_sym.len == nsym_tag); + assert(deser_sym.len == 0); uintptr_t base = (uintptr_t)&s->symbols->buf[0]; uintptr_t end = base + s->symbols->size; while (base < end) { @@ -1330,6 +1542,8 @@ static uintptr_t get_reloc_for_item(uintptr_t reloc_item, size_t reloc_offset) case FunctionRef: assert(offset < JL_API_MAX && "unknown function pointer id"); break; + case ExternalLinkage: + break; case DataRef: default: assert(0 && "corrupt relocation item id"); @@ -1341,7 +1555,7 @@ static uintptr_t get_reloc_for_item(uintptr_t reloc_item, size_t reloc_offset) } // Compute target location at deserialization -static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t base, size_t size, uintptr_t reloc_id) +static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t base, size_t size, uintptr_t reloc_id, jl_array_t *link_ids, int *link_index) { enum RefTags tag = (enum RefTags)(reloc_id >> RELOC_TAG_OFFSET); size_t offset = (reloc_id & (((uintptr_t)1 << RELOC_TAG_OFFSET) - 1)); @@ -1379,11 +1593,11 @@ static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t bas case FunctionRef: switch ((jl_callingconv_t)offset) { case JL_API_BOXED: - if (sysimg_fptrs.base) + if (s->image->fptrs.base) return (uintptr_t)jl_fptr_args; JL_FALLTHROUGH; case JL_API_WITH_PARAMETERS: - if (sysimg_fptrs.base) + if (s->image->fptrs.base) return (uintptr_t)jl_fptr_sparam; return (uintptr_t)NULL; case JL_API_CONST: @@ -1397,17 +1611,35 @@ static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t bas //default: assert("corrupt relocation item id"); } + case ExternalLinkage: + assert(link_ids); + assert(link_index); + assert(jl_build_ids); + uint64_t *link_id_data = (uint64_t*)jl_array_data(link_ids); + uint64_t *build_id_data = (uint64_t*)jl_array_data(jl_build_ids); + assert(0 <= *link_index && *link_index < jl_array_len(link_ids)); + uint64_t build_id = link_id_data[*link_index]; + *link_index += 1; + size_t i = 0, nids = jl_array_len(jl_build_ids); + while (i < nids) { + if (build_id == build_id_data[i]) + break; + i++; + } + assert(i < nids); + assert(2*i < jl_linkage_blobs.len); + return (uintptr_t)jl_linkage_blobs.items[2*i] + offset*sizeof(void*); } abort(); } -static void jl_write_reloclist(ios_t *s, char *base, size_t size, arraylist_t *list) +static void jl_write_offsetlist(ios_t *s, char *base, size_t size, arraylist_t *list) { for (size_t i = 0; i < list->len; i += 2) { size_t last_pos = i ? (size_t)list->items[i - 2] : 0; size_t pos = (size_t)list->items[i]; - size_t item = (size_t)list->items[i + 1]; + size_t item = (size_t)list->items[i + 1]; // item is tagref-encoded uintptr_t *pv = (uintptr_t*)(base + pos); assert(pos < size && pos != 0); *pv = get_reloc_for_item(item, *pv); @@ -1434,19 +1666,32 @@ static void jl_write_reloclist(ios_t *s, char *base, size_t size, arraylist_t *l } +static void jl_write_arraylist(ios_t *s, arraylist_t *list) +{ + write_uint(s, list->len); + ios_write(s, (const char*)list->items, list->len * sizeof(void*)); +} + static void jl_write_relocations(jl_serializer_state *s) { char *base = &s->s->buf[0]; - jl_write_reloclist(s->relocs, base, s->s->size, &s->gctags_list); - jl_write_reloclist(s->relocs, base, s->s->size, &s->relocs_list); + jl_write_offsetlist(s->relocs, base, s->s->size, &s->gctags_list); + jl_write_offsetlist(s->relocs, base, s->s->size, &s->relocs_list); + if (s->incremental) { + jl_write_arraylist(s->relocs, &s->uniquing_types); + jl_write_arraylist(s->relocs, &s->uniquing_objs); + jl_write_arraylist(s->relocs, &s->fixup_types); + } + jl_write_arraylist(s->relocs, &s->fixup_objs); } -static void jl_read_reloclist(jl_serializer_state *s, uint8_t bits) +static void jl_read_reloclist(jl_serializer_state *s, jl_array_t *link_ids, uint8_t bits) { uintptr_t base = (uintptr_t)s->s->buf; size_t size = s->s->size; uintptr_t last_pos = 0; uint8_t *current = (uint8_t *)(s->relocs->buf + s->relocs->bpos); + int link_index = 0; while (1) { // Read the offset of the next object size_t pos_diff = 0; @@ -1468,40 +1713,58 @@ static void jl_read_reloclist(jl_serializer_state *s, uint8_t bits) last_pos = pos; uintptr_t *pv = (uintptr_t *)(base + pos); uintptr_t v = *pv; - v = get_item_for_reloc(s, base, size, v); + v = get_item_for_reloc(s, base, size, v, link_ids, &link_index); *pv = v | bits; } + assert(!link_ids || link_index == jl_array_len(link_ids)); +} + +static void jl_read_arraylist(ios_t *s, arraylist_t *list) +{ + size_t list_len = read_uint(s); + arraylist_new(list, 0); + arraylist_grow(list, list_len); + ios_read(s, (char*)list->items, list_len * sizeof(void*)); } -static char *sysimg_base; -static char *sysimg_relocs; void gc_sweep_sysimg(void) { - if (!sysimg_relocs) + size_t nblobs = n_linkage_blobs(); + if (nblobs == 0) return; - uintptr_t base = (uintptr_t)sysimg_base; - uintptr_t last_pos = 0; - uint8_t *current = (uint8_t *)sysimg_relocs; - while (1) { - // Read the offset of the next object - size_t pos_diff = 0; - size_t cnt = 0; + assert(jl_linkage_blobs.len == 2*nblobs); + assert(jl_image_relocs.len == nblobs); + for (size_t i = 0; i < 2*nblobs; i+=2) { + reloc_t *relocs = (reloc_t*)jl_image_relocs.items[i>>1]; + if (!relocs) + continue; + uintptr_t base = (uintptr_t)jl_linkage_blobs.items[i]; + uintptr_t last_pos = 0; + uint8_t *current = (uint8_t *)relocs; while (1) { - int8_t c = *current++; - pos_diff |= ((size_t)c & 0x7F) << (7 * cnt++); - if ((c >> 7) == 0) + // Read the offset of the next object + size_t pos_diff = 0; + size_t cnt = 0; + while (1) { + int8_t c = *current++; + pos_diff |= ((size_t)c & 0x7F) << (7 * cnt++); + if ((c >> 7) == 0) + break; + } + if (pos_diff == 0) break; - } - if (pos_diff == 0) - break; - uintptr_t pos = last_pos + pos_diff; - last_pos = pos; - jl_taggedvalue_t *o = (jl_taggedvalue_t *)(base + pos); - o->bits.gc = GC_OLD; + uintptr_t pos = last_pos + pos_diff; + last_pos = pos; + jl_taggedvalue_t *o = (jl_taggedvalue_t *)(base + pos); + o->bits.gc = GC_OLD; + } } } +// jl_write_value and jl_read_value are used for storing Julia objects that are adjuncts to +// the image proper. For example, new methods added to external callables require +// insertion into the appropriate method table. #define jl_write_value(s, v) _jl_write_value((s), (jl_value_t*)(v)) static void _jl_write_value(jl_serializer_state *s, jl_value_t *v) { @@ -1509,12 +1772,11 @@ static void _jl_write_value(jl_serializer_state *s, jl_value_t *v) write_reloc_t(s->s, 0); return; } - uintptr_t item = backref_id(s, v); + uintptr_t item = backref_id(s, v, NULL); uintptr_t reloc = get_reloc_for_item(item, 0); write_reloc_t(s->s, reloc); } - static jl_value_t *jl_read_value(jl_serializer_state *s) { uintptr_t base = (uintptr_t)&s->s->buf[0]; @@ -1523,16 +1785,44 @@ static jl_value_t *jl_read_value(jl_serializer_state *s) s->s->bpos += sizeof(reloc_t); if (offset == 0) return NULL; - return (jl_value_t*)get_item_for_reloc(s, base, size, offset); + return (jl_value_t*)get_item_for_reloc(s, base, size, offset, NULL, NULL); +} + +// The next two, `jl_read_offset` and `jl_delayed_reloc`, are essentially a split version +// of `jl_read_value` that allows usage of the relocation data rather than passing NULL +// to `get_item_for_reloc`. +// This works around what would otherwise be an order-dependency conundrum: objects +// that may require relocation data have to be inserted into `serialization_order`, +// and that may include some of the adjunct data that gets serialized via +// `jl_write_value`. But we can't interpret them properly until we read the relocation +// data, and that happens after we pull items out of the serialization stream. +static uintptr_t jl_read_offset(jl_serializer_state *s) +{ + uintptr_t base = (uintptr_t)&s->s->buf[0]; + uintptr_t offset = *(reloc_t*)(base + (uintptr_t)s->s->bpos); + s->s->bpos += sizeof(reloc_t); + return offset; } +static jl_value_t *jl_delayed_reloc(jl_serializer_state *s, uintptr_t offset) JL_GC_DISABLED +{ + if (!offset) + return NULL; + uintptr_t base = (uintptr_t)&s->s->buf[0]; + size_t size = s->s->size; + int link_index = 0; + jl_value_t *ret = (jl_value_t*)get_item_for_reloc(s, base, size, offset, s->link_ids_relocs, &link_index); + assert(link_index < jl_array_len(s->link_ids_relocs)); + return ret; +} -static void jl_update_all_fptrs(jl_serializer_state *s) + +static void jl_update_all_fptrs(jl_serializer_state *s, jl_image_t *image) { - jl_sysimg_fptrs_t fvars = sysimg_fptrs; + jl_sysimg_fptrs_t fvars = image->fptrs; // make these NULL now so we skip trying to restore GlobalVariable pointers later - sysimg_gvars_base = NULL; - sysimg_fptrs.base = NULL; + image->gvars_base = NULL; + image->fptrs.base = NULL; if (fvars.base == NULL) return; int sysimg_fvars_max = s->fptr_record->size / sizeof(void*); @@ -1577,152 +1867,112 @@ static void jl_update_all_fptrs(jl_serializer_state *s) } } // Tell LLVM about the native code - jl_register_fptrs(sysimage_base, &fvars, linfos, sysimg_fvars_max); + jl_register_fptrs(image->base, &fvars, linfos, sysimg_fvars_max); } +static void write_gvars(jl_serializer_state *s, arraylist_t *globals) JL_NOTSAFEPOINT +{ + ios_ensureroom(s->gvar_record, globals->len * sizeof(reloc_t)); + for (size_t i = 0; i < globals->len; i++) { + void *g = globals->items[i]; + if (jl_is_binding((uintptr_t)g)) { + jl_binding_t *b = (jl_binding_t*)g; + void *reloc = ptrhash_get(&bindings, g); + if (reloc != HT_NOTFOUND) { + assert(reloc != (void*)(uintptr_t)-1); + write_reloc_t(s->gvar_record, (uintptr_t)reloc); + continue; + } + // need to deal with foreign bindings here too + assert(s->incremental); + arraylist_push(&s->uniquing_objs, (void*)((i << 2) | 2)); // mark as gvar && !tag + g = (void*)jl_module_globalref(b->owner, b->name); + } + uintptr_t item = backref_id(s, g, s->link_ids_gvars); + uintptr_t reloc = get_reloc_for_item(item, 0); + write_reloc_t(s->gvar_record, reloc); + record_uniquing(s, (jl_value_t*)g, ((i << 2) | 2)); // mark as gvar && !tag + } +} // Pointer relocation for native-code referenced global variables -static void jl_update_all_gvars(jl_serializer_state *s) +static void jl_update_all_gvars(jl_serializer_state *s, jl_image_t *image) { - if (sysimg_gvars_base == NULL) + if (image->gvars_base == NULL) return; - size_t gvname_index = 0; + size_t i = 0; + size_t l = s->gvar_record->size / sizeof(reloc_t); uintptr_t base = (uintptr_t)&s->s->buf[0]; size_t size = s->s->size; reloc_t *gvars = (reloc_t*)&s->gvar_record->buf[0]; - reloc_t *end = gvars + s->gvar_record->size / sizeof(reloc_t); - while (gvars < end) { - uintptr_t offset = *gvars; - if (offset) { - uintptr_t v = get_item_for_reloc(s, base, size, offset); - *sysimg_gvars(sysimg_gvars_base, gvname_index) = v; - } - gvname_index += 1; - gvars++; + int link_index = 0; + for (i = 0; i < l; i++) { + uintptr_t offset = gvars[i]; + uintptr_t v = get_item_for_reloc(s, base, size, offset, s->link_ids_gvars, &link_index); + uintptr_t *gv = sysimg_gvars(image->gvars_base, image->gvars_offsets, i); + *gv = v; } + assert(!s->link_ids_gvars || link_index == jl_array_len(s->link_ids_gvars)); } - -// Reinitialization -static void jl_finalize_serializer(jl_serializer_state *s, arraylist_t *list) +static void jl_root_new_gvars(jl_serializer_state *s, jl_image_t *image) { - size_t i, l; - - // record list of reinitialization functions - l = list->len; - for (i = 0; i < l; i += 2) { - size_t item = (size_t)list->items[i]; - size_t reloc_offset = (size_t)layout_table.items[item]; - assert(reloc_offset != 0); - write_reloc_t(s->s, reloc_offset); - write_uint8(s->s, (uintptr_t)list->items[i + 1]); + if (image->gvars_base == NULL) + return; + size_t i = 0; + size_t l = s->gvar_record->size / sizeof(reloc_t); + for (i = 0; i < l; i++) { + uintptr_t *gv = sysimg_gvars(image->gvars_base, image->gvars_offsets, i); + uintptr_t v = *gv; + if (!jl_is_binding(v)) + v = (uintptr_t)jl_as_global_root((jl_value_t*)v); + *gv = v; } - write_reloc_t(s->s, 0); } -static void jl_reinit_item(jl_value_t *v, uint8_t how) JL_GC_DISABLED +static void jl_compile_extern(jl_method_t *m, void *sysimg_handle) JL_GC_DISABLED { - switch (how) { - case 1: { // rehash IdDict - jl_array_t **a = (jl_array_t**)v; - assert(jl_is_array(*a)); - // Assume *a don't need a write barrier - *a = jl_idtable_rehash(*a, jl_array_len(*a)); - jl_gc_wb(v, *a); - break; - } - case 2: { // rebuild the binding table for module v - jl_module_t *mod = (jl_module_t*)v; - assert(jl_is_module(mod)); - size_t nbindings = mod->bindings.size; - htable_new(&mod->bindings, nbindings); - struct binding { - jl_sym_t *asname; - uintptr_t tag; - jl_binding_t b; - } *b; - b = (struct binding*)&mod[1]; - while (nbindings > 0) { - ptrhash_put(&mod->bindings, b->asname, &b->b); - b += 1; - nbindings -= 1; - } - if (mod->usings.items != &mod->usings._space[0]) { - void **newitems = (void**)malloc_s(mod->usings.max * sizeof(void*)); - memcpy(newitems, mod->usings.items, mod->usings.len * sizeof(void*)); - mod->usings.items = newitems; - } - break; - } - case 3: { // install ccallable entry point in JIT - jl_svec_t *sv = ((jl_method_t*)v)->ccallable; - int success = jl_compile_extern_c(NULL, NULL, jl_sysimg_handle, jl_svecref(sv, 0), jl_svecref(sv, 1)); - assert(success); (void)success; - break; - } - default: - assert(0 && "corrupt deserialization state"); - abort(); - } + // install ccallable entry point in JIT + jl_svec_t *sv = m->ccallable; + int success = jl_compile_extern_c(NULL, NULL, sysimg_handle, jl_svecref(sv, 0), jl_svecref(sv, 1)); + if (!success) + jl_safe_printf("WARNING: @ccallable was already defined for this method name\n"); // enjoy a very bad time + assert(success || !sysimg_handle); } -static void jl_finalize_deserializer(jl_serializer_state *s) JL_GC_DISABLED +static void jl_reinit_ccallable(arraylist_t *ccallable_list, char *base, void *sysimg_handle) { - // run reinitialization functions - uintptr_t base = (uintptr_t)&s->s->buf[0]; - while (1) { - size_t offset; - if (sizeof(reloc_t) <= 4) { - offset = read_uint32(s->s); - } - else { - offset = read_uint64(s->s); - } - if (offset == 0) - break; - jl_value_t *v = (jl_value_t*)(base + offset); - jl_reinit_item(v, read_uint8(s->s)); + for (size_t i = 0; i < ccallable_list->len; i++) { + uintptr_t item = (uintptr_t)ccallable_list->items[i]; + jl_method_t *m = (jl_method_t*)(base + item); + jl_compile_extern(m, sysimg_handle); } } - -// Code below helps slim down the images -static void jl_scan_type_cache_gv(jl_serializer_state *s, jl_svec_t *cache) -{ - size_t l = jl_svec_len(cache), i; - for (i = 0; i < l; i++) { - jl_value_t *ti = jl_svecref(cache, i); - if (ti == jl_nothing) - continue; - if (jl_get_llvm_gv(native_functions, ti)) { - jl_serialize_value(s, ti); - } - else if (jl_is_datatype(ti)) { - jl_value_t *singleton = ((jl_datatype_t*)ti)->instance; - if (singleton && jl_get_llvm_gv(native_functions, singleton)) - jl_serialize_value(s, ti); - } - } -} - -// remove cached types not referenced in the stream +// Code below helps slim down the images by +// removing cached types not referenced in the stream static jl_svec_t *jl_prune_type_cache_hash(jl_svec_t *cache) JL_GC_DISABLED { size_t l = jl_svec_len(cache), i; + if (l == 0) + return cache; for (i = 0; i < l; i++) { jl_value_t *ti = jl_svecref(cache, i); if (ti == jl_nothing) continue; - if (ptrhash_get(&backref_table, ti) == HT_NOTFOUND) + if (ptrhash_get(&serialization_order, ti) == HT_NOTFOUND) jl_svecset(cache, i, jl_nothing); } - void *idx = ptrhash_get(&backref_table, cache); - ptrhash_remove(&backref_table, cache); + void *idx = ptrhash_get(&serialization_order, cache); + assert(idx != HT_NOTFOUND && idx != (void*)(uintptr_t)-1); + assert(serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] == cache); cache = cache_rehash_set(cache, l); - ptrhash_put(&backref_table, cache, idx); + // redirect all references to the old cache to relocate to the new cache object + ptrhash_put(&serialization_order, cache, idx); + serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] = cache; return cache; } @@ -1733,7 +1983,7 @@ static void jl_prune_type_cache_linear(jl_svec_t *cache) jl_value_t *ti = jl_svecref(cache, i); if (ti == jl_nothing) break; - if (ptrhash_get(&backref_table, ti) != HT_NOTFOUND) + if (ptrhash_get(&serialization_order, ti) != HT_NOTFOUND) jl_svecset(cache, ins++, ti); } while (ins < l) @@ -1776,11 +2026,6 @@ static jl_value_t *strip_codeinfo_meta(jl_method_t *m, jl_value_t *ci_, int orig return ret; } -static void record_field_change(jl_value_t **addr, jl_value_t *newval) -{ - ptrhash_put(&field_replace, (void*)addr, newval); -} - static void strip_specializations_(jl_method_instance_t *mi) { assert(jl_is_method_instance(mi)); @@ -1865,6 +2110,7 @@ static void jl_strip_all_codeinfos(void) // triggering non-relocatability of compressed CodeInfos. // Set the number of such roots in each method when the sysimg is // serialized. +// TODO: move this to `jl_write_values` static int set_nroots_sysimg__(jl_typemap_entry_t *def, void *_env) { jl_method_t *m = def->func.method; @@ -1884,9 +2130,6 @@ static void jl_set_nroots_sysimg(void) // --- entry points --- -static void jl_init_serializer2(int); -static void jl_cleanup_serializer2(void); - jl_array_t *jl_global_roots_table; static jl_mutex_t global_roots_lock; @@ -1930,33 +2173,93 @@ JL_DLLEXPORT jl_value_t *jl_as_global_root(jl_value_t *val JL_MAYBE_UNROOTED) return val; } -static void jl_save_system_image_to_stream(ios_t *f) JL_GC_DISABLED +static void jl_prepare_serialization_data(jl_array_t *mod_array, jl_array_t *newly_inferred, uint64_t worklist_key, + /* outputs */ jl_array_t **extext_methods, + jl_array_t **new_specializations, jl_array_t **method_roots_list, + jl_array_t **ext_targets, jl_array_t **edges) { - jl_gc_collect(JL_GC_FULL); - jl_gc_collect(JL_GC_INCREMENTAL); // sweep finalizers - JL_TIMING(SYSIMG_DUMP); + // extext_methods: [method1, ...], worklist-owned "extending external" methods added to functions owned by modules outside the worklist + // ext_targets: [invokesig1, callee1, matches1, ...] non-worklist callees of worklist-owned methods + // ordinary dispatch: invokesig=NULL, callee is MethodInstance + // `invoke` dispatch: invokesig is signature, callee is MethodInstance + // abstract call: callee is signature + // edges: [caller1, ext_targets_indexes1, ...] for worklist-owned methods calling external methods + + assert(edges_map == NULL); + JL_GC_PUSH1(&edges_map); + + // Save the inferred code from newly inferred, external methods + htable_new(&external_mis, 0); // we need external_mis until after `jl_collect_edges` finishes + *new_specializations = queue_external_cis(newly_inferred); + // Collect the new method roots + htable_t methods_with_newspecs; + htable_new(&methods_with_newspecs, 0); + jl_collect_methods(&methods_with_newspecs, *new_specializations); + *method_roots_list = jl_alloc_vec_any(0); + jl_collect_new_roots(*method_roots_list, &methods_with_newspecs, worklist_key); + htable_free(&methods_with_newspecs); + + // Collect method extensions and edges data + edges_map = jl_alloc_vec_any(0); + *extext_methods = jl_alloc_vec_any(0); + size_t i, len = jl_array_len(mod_array); + for (i = 0; i < len; i++) { + jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(mod_array, i); + assert(jl_is_module(m)); + if (m->parent == m) // some toplevel modules (really just Base) aren't actually + jl_collect_extext_methods_from_mod(*extext_methods, m); + } + jl_collect_methtable_from_mod(*extext_methods, jl_type_type_mt); + jl_collect_missing_backedges(jl_type_type_mt); + jl_collect_methtable_from_mod(*extext_methods, jl_nonfunction_mt); + jl_collect_missing_backedges(jl_nonfunction_mt); + // jl_collect_extext_methods_from_mod and jl_collect_missing_backedges also accumulate data in callers_with_edges. + // Process this to extract `edges` and `ext_targets`. + *ext_targets = jl_alloc_vec_any(0); + *edges = jl_alloc_vec_any(0); + jl_collect_edges(*edges, *ext_targets); + htable_free(&external_mis); + assert(edges_map == NULL); // jl_collect_edges clears this when done - htable_new(&field_replace, 10000); + JL_GC_POP(); +} + +// In addition to the system image (where `worklist = NULL`), this can also save incremental images with external linkage +static void jl_save_system_image_to_stream(ios_t *f, + jl_array_t *worklist, jl_array_t *extext_methods, + jl_array_t *new_specializations, jl_array_t *method_roots_list, + jl_array_t *ext_targets, jl_array_t *edges) JL_GC_DISABLED +{ + htable_new(&field_replace, 0); // strip metadata and IR when requested if (jl_options.strip_metadata || jl_options.strip_ir) jl_strip_all_codeinfos(); - jl_set_nroots_sysimg(); + if (worklist == NULL) + jl_set_nroots_sysimg(); int en = jl_gc_enable(0); - jl_init_serializer2(1); - htable_reset(&backref_table, 250000); - arraylist_new(&reinit_list, 0); - arraylist_new(&ccallable_list, 0); + nsym_tag = 0; + htable_new(&symbol_table, 0); + htable_new(&fptr_to_id, sizeof(id_to_fptrs) / sizeof(*id_to_fptrs)); + uintptr_t i; + for (i = 0; id_to_fptrs[i] != NULL; i++) { + ptrhash_put(&fptr_to_id, (void*)(uintptr_t)id_to_fptrs[i], (void*)(i + 2)); + } + htable_new(&serialization_order, 25000); + htable_new(&unique_ready, 0); + htable_new(&nullptrs, 0); + htable_new(&bindings, 0); arraylist_new(&object_worklist, 0); - backref_table_numel = 0; + arraylist_new(&serialization_queue, 0); ios_t sysimg, const_data, symbols, relocs, gvar_record, fptr_record; - ios_mem(&sysimg, 1000000); - ios_mem(&const_data, 100000); - ios_mem(&symbols, 100000); - ios_mem(&relocs, 100000); - ios_mem(&gvar_record, 100000); - ios_mem(&fptr_record, 100000); + ios_mem(&sysimg, 0); + ios_mem(&const_data, 0); + ios_mem(&symbols, 0); + ios_mem(&relocs, 0); + ios_mem(&gvar_record, 0); + ios_mem(&fptr_record, 0); jl_serializer_state s; + s.incremental = !(worklist == NULL); s.s = &sysimg; s.const_data = &const_data; s.symbols = &symbols; @@ -1966,16 +2269,31 @@ static void jl_save_system_image_to_stream(ios_t *f) JL_GC_DISABLED s.ptls = jl_current_task->ptls; arraylist_new(&s.relocs_list, 0); arraylist_new(&s.gctags_list, 0); - jl_value_t **const*const tags = get_tags(); - - // empty!(Core.ARGS) - if (jl_core_module != NULL) { - jl_array_t *args = (jl_array_t*)jl_get_global(jl_core_module, jl_symbol("ARGS")); - if (args != NULL) { - jl_array_del_end(args, jl_array_len(args)); + arraylist_new(&s.uniquing_types, 0); + arraylist_new(&s.uniquing_objs, 0); + arraylist_new(&s.fixup_types, 0); + arraylist_new(&s.fixup_objs, 0); + arraylist_new(&s.ccallable_list, 0); + s.link_ids_relocs = jl_alloc_array_1d(jl_array_uint64_type, 0); + s.link_ids_gctags = jl_alloc_array_1d(jl_array_uint64_type, 0); + s.link_ids_gvars = jl_alloc_array_1d(jl_array_uint64_type, 0); + htable_new(&s.callers_with_edges, 0); + jl_value_t **const*const tags = get_tags(); // worklist == NULL ? get_tags() : NULL; + + arraylist_t gvars; + arraylist_new(&gvars, 0); + if (native_functions) + jl_get_llvm_gvs(native_functions, &gvars); + + if (worklist == NULL) { + // empty!(Core.ARGS) + if (jl_core_module != NULL) { + jl_array_t *args = (jl_array_t*)jl_get_global(jl_core_module, jl_symbol("ARGS")); + if (args != NULL) { + jl_array_del_end(args, jl_array_len(args)); + } } } - jl_idtable_type = jl_base_module ? jl_get_global(jl_base_module, jl_symbol("IdDict")) : NULL; jl_idtable_typename = jl_base_module ? ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_idtable_type))->name : NULL; jl_bigint_type = jl_base_module ? jl_get_global(jl_base_module, jl_symbol("BigInt")) : NULL; @@ -1992,44 +2310,63 @@ static void jl_save_system_image_to_stream(ios_t *f) JL_GC_DISABLED { // step 1: record values (recursively) that need to go in the image size_t i; - for (i = 0; tags[i] != NULL; i++) { - jl_value_t *tag = *tags[i]; - jl_serialize_value(&s, tag); + if (worklist == NULL) { + for (i = 0; tags[i] != NULL; i++) { + jl_value_t *tag = *tags[i]; + jl_queue_for_serialization(&s, tag); + } + jl_queue_for_serialization(&s, jl_global_roots_table); + jl_queue_for_serialization(&s, s.ptls->root_task->tls); } - jl_serialize_value(&s, jl_global_roots_table); - jl_serialize_reachable(&s); - // step 1.1: check for values only found in the generated code - arraylist_t typenames; - arraylist_new(&typenames, 0); - for (i = 0; i < backref_table.size; i += 2) { - jl_typename_t *tn = (jl_typename_t*)backref_table.table[i]; - if (tn == HT_NOTFOUND || !jl_is_typename(tn)) - continue; - arraylist_push(&typenames, tn); + else { + // To ensure we don't have to manually update the list, go through all tags and queue any that are not otherwise + // judged to be externally-linked + htable_new(&external_objects, NUM_TAGS); + for (size_t i = 0; tags[i] != NULL; i++) { + jl_value_t *tag = *tags[i]; + ptrhash_put(&external_objects, tag, tag); + } + // Queue the worklist itself as the first item we serialize + jl_queue_for_serialization(&s, worklist); + jl_queue_for_serialization(&s, jl_module_init_order); + // Classify the CodeInstances with respect to their need for validation + classify_callers(&s.callers_with_edges, edges); } - for (i = 0; i < typenames.len; i++) { - jl_typename_t *tn = (jl_typename_t*)typenames.items[i]; - jl_scan_type_cache_gv(&s, tn->cache); - jl_scan_type_cache_gv(&s, tn->linearcache); + // step 1.1: as needed, serialize the data needed for insertion into the running system + if (extext_methods) { + assert(ext_targets); + assert(edges); + // Queue method extensions + jl_queue_for_serialization(&s, extext_methods); + // Queue the new specializations + jl_queue_for_serialization(&s, new_specializations); + // Queue the new roots + jl_queue_for_serialization(&s, method_roots_list); + // Queue the edges + jl_queue_for_serialization(&s, ext_targets); + jl_queue_for_serialization(&s, edges); } jl_serialize_reachable(&s); - // step 1.2: prune (garbage collect) some special weak references from + // step 1.2: now that we have marked all bindings (badly), ensure all gvars are part of the sysimage + record_gvars(&s, &gvars); + jl_serialize_reachable(&s); + // step 1.3: prune (garbage collect) some special weak references from // built-in type caches - for (i = 0; i < typenames.len; i++) { - jl_typename_t *tn = (jl_typename_t*)typenames.items[i]; - tn->cache = jl_prune_type_cache_hash(tn->cache); - jl_gc_wb(tn, tn->cache); - jl_prune_type_cache_linear(tn->linearcache); + for (i = 0; i < serialization_queue.len; i++) { + jl_typename_t *tn = (jl_typename_t*)serialization_queue.items[i]; + if (jl_is_typename(tn)) { + tn->cache = jl_prune_type_cache_hash(tn->cache); + jl_gc_wb(tn, tn->cache); + jl_prune_type_cache_linear(tn->linearcache); + } } - arraylist_free(&typenames); } { // step 2: build all the sysimg sections write_padding(&sysimg, sizeof(uintptr_t)); jl_write_values(&s); + write_gvars(&s, &gvars); jl_write_relocations(&s); - jl_write_gv_syms(&s, jl_get_root_symbol()); - jl_write_gv_tagrefs(&s); } if (sysimg.size > ((uintptr_t)1 << RELOC_TAG_OFFSET)) { @@ -2050,8 +2387,10 @@ static void jl_save_system_image_to_stream(ios_t *f) JL_GC_DISABLED ); jl_exit(1); } + htable_free(&s.callers_with_edges); // step 3: combine all of the sections into one file + assert(ios_pos(f) % JL_CACHE_BYTE_ALIGNMENT == 0); write_uint(f, sysimg.size - sizeof(uintptr_t)); ios_seek(&sysimg, sizeof(uintptr_t)); ios_copyall(f, &sysimg); @@ -2089,56 +2428,181 @@ static void jl_save_system_image_to_stream(ios_t *f) JL_GC_DISABLED ios_close(&fptr_record); { // step 4: record locations of special roots - s.s = f; write_padding(f, LLT_ALIGN(ios_pos(f), 8) - ios_pos(f)); - size_t i; - for (i = 0; tags[i] != NULL; i++) { - jl_value_t *tag = *tags[i]; - jl_write_value(&s, tag); + s.s = f; + if (worklist == NULL) { + size_t i; + for (i = 0; tags[i] != NULL; i++) { + jl_value_t *tag = *tags[i]; + jl_write_value(&s, tag); + } + jl_write_value(&s, jl_global_roots_table); + jl_write_value(&s, s.ptls->root_task->tls); + write_uint32(f, jl_get_gs_ctr()); + write_uint(f, jl_atomic_load_acquire(&jl_world_counter)); + write_uint(f, jl_typeinf_world); } - jl_write_value(&s, jl_global_roots_table); - jl_write_value(&s, s.ptls->root_task->tls); - write_uint32(f, jl_get_gs_ctr()); - write_uint(f, jl_atomic_load_acquire(&jl_world_counter)); - write_uint(f, jl_typeinf_world); - jl_finalize_serializer(&s, &reinit_list); - jl_finalize_serializer(&s, &ccallable_list); - } + else { + jl_write_value(&s, worklist); + // save module initialization order + if (jl_module_init_order != NULL) { + size_t i, l = jl_array_len(jl_module_init_order); + for (i = 0; i < l; i++) { + // verify that all these modules were saved + assert(ptrhash_get(&serialization_order, jl_array_ptr_ref(jl_module_init_order, i)) != HT_NOTFOUND); + } + } + jl_write_value(&s, jl_module_init_order); + jl_write_value(&s, extext_methods); + jl_write_value(&s, new_specializations); + jl_write_value(&s, method_roots_list); + jl_write_value(&s, ext_targets); + jl_write_value(&s, edges); + } + write_uint32(f, jl_array_len(s.link_ids_gctags)); + ios_write(f, (char*)jl_array_data(s.link_ids_gctags), jl_array_len(s.link_ids_gctags)*sizeof(uint64_t)); + write_uint32(f, jl_array_len(s.link_ids_relocs)); + ios_write(f, (char*)jl_array_data(s.link_ids_relocs), jl_array_len(s.link_ids_relocs)*sizeof(uint64_t)); + write_uint32(f, jl_array_len(s.link_ids_gvars)); + ios_write(f, (char*)jl_array_data(s.link_ids_gvars), jl_array_len(s.link_ids_gvars)*sizeof(uint64_t)); + jl_write_arraylist(s.s, &s.ccallable_list); + } + // Write the build_id key + uint64_t buildid = 0; + if (worklist) + buildid = jl_worklist_key(worklist); + write_uint32(f, buildid >> 32); + write_uint32(f, buildid & (((uint64_t)1 << 32) - 1)); assert(object_worklist.len == 0); arraylist_free(&object_worklist); + arraylist_free(&serialization_queue); arraylist_free(&layout_table); - arraylist_free(&reinit_list); - arraylist_free(&ccallable_list); + arraylist_free(&s.ccallable_list); arraylist_free(&s.relocs_list); arraylist_free(&s.gctags_list); + arraylist_free(&gvars); htable_free(&field_replace); - jl_cleanup_serializer2(); + if (worklist) + htable_free(&external_objects); + htable_free(&serialization_order); + htable_free(&unique_ready); + htable_free(&nullptrs); + htable_free(&bindings); + htable_free(&symbol_table); + htable_free(&fptr_to_id); + nsym_tag = 0; jl_gc_enable(en); } -JL_DLLEXPORT ios_t *jl_create_system_image(void *_native_data) +static void jl_write_header_for_incremental(ios_t *f, jl_array_t *worklist, jl_array_t **mod_array, jl_array_t **udeps, int64_t *srctextpos, int64_t *checksumpos) { + *mod_array = jl_get_loaded_modules(); // __toplevel__ modules loaded in this session (from Base.loaded_modules_array) + assert(jl_precompile_toplevel_module == NULL); + jl_precompile_toplevel_module = (jl_module_t*)jl_array_ptr_ref(worklist, jl_array_len(worklist)-1); + + write_header(f); + // last word of the header is the checksumpos + *checksumpos = ios_pos(f) - sizeof(uint64_t); + // write description of contents (name, uuid, buildid) + write_worklist_for_header(f, worklist); + // Determine unique (module, abspath, mtime) dependencies for the files defining modules in the worklist + // (see Base._require_dependencies). These get stored in `udeps` and written to the ji-file header. + // Also write Preferences. + // last word of the dependency list is the end of the data / start of the srctextpos + *srctextpos = write_dependency_list(f, worklist, udeps); // srctextpos: position of srctext entry in header index (update later) + // write description of requirements for loading (modules that must be pre-loaded if initialization is to succeed) + // this can return errors during deserialize, + // best to keep it early (before any actual initialization) + write_mod_list(f, *mod_array); +} + + +JL_DLLEXPORT ios_t *jl_create_system_image(void *_native_data, jl_array_t *worklist) +{ + jl_gc_collect(JL_GC_FULL); + jl_gc_collect(JL_GC_INCREMENTAL); // sweep finalizers + JL_TIMING(SYSIMG_DUMP); + + jl_task_t *ct = jl_current_task; ios_t *f = (ios_t*)malloc_s(sizeof(ios_t)); ios_mem(f, 0); + jl_array_t *mod_array = NULL, *udeps = NULL, *extext_methods = NULL, *new_specializations = NULL; + jl_array_t *method_roots_list = NULL, *ext_targets = NULL, *edges = NULL; + JL_GC_PUSH7(&mod_array, &udeps, &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges); + int64_t srctextpos = 0; + int64_t checksumpos = 0; + int64_t datastartpos = 0; + if (worklist) { + jl_write_header_for_incremental(f, worklist, &mod_array, &udeps, &srctextpos, &checksumpos); + jl_gc_enable_finalizers(ct, 0); // make sure we don't run any Julia code concurrently after this point + jl_prepare_serialization_data(mod_array, newly_inferred, jl_worklist_key(worklist), &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges); + write_padding(f, LLT_ALIGN(ios_pos(f), JL_CACHE_BYTE_ALIGNMENT) - ios_pos(f)); + datastartpos = ios_pos(f); + } native_functions = _native_data; - jl_save_system_image_to_stream(f); + jl_save_system_image_to_stream(f, worklist, extext_methods, new_specializations, method_roots_list, ext_targets, edges); + native_functions = NULL; + if (worklist) { + jl_gc_enable_finalizers(ct, 1); // make sure we don't run any Julia code concurrently before this point + // Go back and update the checksum in the header + int64_t dataendpos = ios_pos(f); + uint32_t checksum = jl_crc32c(0, &f->buf[datastartpos], dataendpos - datastartpos); + ios_seek(f, checksumpos); + write_uint64(f, checksum | ((uint64_t)0xfafbfcfd << 32)); + ios_seek(f, srctextpos); + write_uint64(f, dataendpos); + // Write the source-text for the dependent files + // Go back and update the source-text position to point to the current position + if (udeps) { + ios_seek_end(f); + // Each source-text file is written as + // int32: length of abspath + // char*: abspath + // uint64: length of src text + // char*: src text + // At the end we write int32(0) as a terminal sentinel. + size_t len = jl_array_len(udeps); + ios_t srctext; + for (size_t i = 0; i < len; i++) { + jl_value_t *deptuple = jl_array_ptr_ref(udeps, i); + jl_value_t *depmod = jl_fieldref(deptuple, 0); // module + // Dependencies declared with `include_dependency` are excluded + // because these may not be Julia code (and could be huge) + if (depmod != (jl_value_t*)jl_main_module) { + jl_value_t *dep = jl_fieldref(deptuple, 1); // file abspath + const char *depstr = jl_string_data(dep); + if (!depstr[0]) + continue; + ios_t *srctp = ios_file(&srctext, depstr, 1, 0, 0, 0); + if (!srctp) { + jl_printf(JL_STDERR, "WARNING: could not cache source text for \"%s\".\n", + jl_string_data(dep)); + continue; + } + size_t slen = jl_string_len(dep); + write_int32(f, slen); + ios_write(f, depstr, slen); + int64_t posfile = ios_pos(f); + write_uint64(f, 0); // placeholder for length of this file in bytes + uint64_t filelen = (uint64_t) ios_copyall(f, &srctext); + ios_close(&srctext); + ios_seek(f, posfile); + write_uint64(f, filelen); + ios_seek_end(f); + } + } + } + write_int32(f, 0); // mark the end of the source text + jl_precompile_toplevel_module = NULL; + } + + JL_GC_POP(); return f; } JL_DLLEXPORT size_t ios_write_direct(ios_t *dest, ios_t *src); -JL_DLLEXPORT void jl_save_system_image(const char *fname) -{ - ios_t f; - if (ios_file(&f, fname, 1, 1, 1, 1) == NULL) { - jl_errorf("cannot open system image file \"%s\" for writing", fname); - } - JL_SIGATOMIC_BEGIN(); - jl_save_system_image_to_stream(&f); - ios_close(&f); - JL_SIGATOMIC_END(); -} // Takes in a path of the form "usr/lib/julia/sys.so" (jl_restore_system_image should be passed the same string) JL_DLLEXPORT void jl_preload_sysimg_so(const char *fname) @@ -2164,16 +2628,31 @@ JL_DLLEXPORT void jl_set_sysimg_so(void *handle) if (jl_options.cpu_target == NULL) jl_options.cpu_target = "native"; jl_sysimg_handle = handle; - sysimg_fptrs = jl_init_processor_sysimg(handle); + sysimage.fptrs = jl_init_processor_sysimg(handle); } -static void jl_restore_system_image_from_stream(ios_t *f) JL_GC_DISABLED +#ifndef JL_NDEBUG +// skip the performance optimizations of jl_types_equal and just use subtyping directly +// one of these types is invalid - that's why we're doing the recache type operation +// static int jl_invalid_types_equal(jl_datatype_t *a, jl_datatype_t *b) +// { +// return jl_subtype((jl_value_t*)a, (jl_value_t*)b) && jl_subtype((jl_value_t*)b, (jl_value_t*)a); +// } +#endif + +static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl_array_t *depmods, uint64_t checksum, + /* outputs */ jl_array_t **restored, jl_array_t **init_order, + jl_array_t **extext_methods, + jl_array_t **new_specializations, jl_array_t **method_roots_list, + jl_array_t **ext_targets, jl_array_t **edges, + char **base, arraylist_t *ccallable_list, pkgcachesizes *cachesizes) JL_GC_DISABLED { JL_TIMING(SYSIMG_LOAD); int en = jl_gc_enable(0); - jl_init_serializer2(0); ios_t sysimg, const_data, symbols, relocs, gvar_record, fptr_record; jl_serializer_state s; + s.incremental = restored != NULL; // jl_linkage_blobs.len > 0; + s.image = image; s.s = NULL; s.const_data = &const_data; s.symbols = &symbols; @@ -2183,7 +2662,11 @@ static void jl_restore_system_image_from_stream(ios_t *f) JL_GC_DISABLED s.ptls = jl_current_task->ptls; arraylist_new(&s.relocs_list, 0); arraylist_new(&s.gctags_list, 0); + s.link_ids_relocs = s.link_ids_gctags = s.link_ids_gvars = NULL; jl_value_t **const*const tags = get_tags(); + htable_t new_dt_objs; + htable_new(&new_dt_objs, 0); + arraylist_new(&deser_sym, 0); // step 1: read section map assert(ios_pos(f) == 0 && f->bm == bm_mem); @@ -2221,27 +2704,67 @@ static void jl_restore_system_image_from_stream(ios_t *f) JL_GC_DISABLED ios_skip(f, sizeof_fptr_record); // step 2: get references to special values - s.s = f; ios_seek(f, LLT_ALIGN(ios_pos(f), 8)); assert(!ios_eof(f)); - size_t i; - for (i = 0; tags[i] != NULL; i++) { - jl_value_t **tag = tags[i]; - *tag = jl_read_value(&s); - } - jl_global_roots_table = (jl_array_t*)jl_read_value(&s); - // set typeof extra-special values now that we have the type set by tags above - jl_astaggedvalue(jl_current_task)->header = (uintptr_t)jl_task_type | jl_astaggedvalue(jl_current_task)->header; - jl_astaggedvalue(jl_nothing)->header = (uintptr_t)jl_nothing_type | jl_astaggedvalue(jl_nothing)->header; - s.ptls->root_task->tls = jl_read_value(&s); - jl_gc_wb(s.ptls->root_task, s.ptls->root_task->tls); - jl_init_int32_int64_cache(); - jl_init_box_caches(); - - uint32_t gs_ctr = read_uint32(f); - jl_atomic_store_release(&jl_world_counter, read_uint(f)); - jl_typeinf_world = read_uint(f); - jl_set_gs_ctr(gs_ctr); + s.s = f; + uintptr_t offset_restored = 0, offset_init_order = 0, offset_extext_methods = 0, offset_new_specializations = 0, offset_method_roots_list = 0; + uintptr_t offset_ext_targets = 0, offset_edges = 0; + if (!s.incremental) { + size_t i; + for (i = 0; tags[i] != NULL; i++) { + jl_value_t **tag = tags[i]; + *tag = jl_read_value(&s); + } + jl_global_roots_table = (jl_array_t*)jl_read_value(&s); + // set typeof extra-special values now that we have the type set by tags above + jl_astaggedvalue(jl_current_task)->header = (uintptr_t)jl_task_type | jl_astaggedvalue(jl_current_task)->header; + jl_astaggedvalue(jl_nothing)->header = (uintptr_t)jl_nothing_type | jl_astaggedvalue(jl_nothing)->header; + s.ptls->root_task->tls = jl_read_value(&s); + jl_gc_wb(s.ptls->root_task, s.ptls->root_task->tls); + jl_init_int32_int64_cache(); + jl_init_box_caches(); + + uint32_t gs_ctr = read_uint32(f); + jl_atomic_store_release(&jl_world_counter, read_uint(f)); + jl_typeinf_world = read_uint(f); + jl_set_gs_ctr(gs_ctr); + } + else { + jl_atomic_fetch_add(&jl_world_counter, 1); + offset_restored = jl_read_offset(&s); + offset_init_order = jl_read_offset(&s); + offset_extext_methods = jl_read_offset(&s); + offset_new_specializations = jl_read_offset(&s); + offset_method_roots_list = jl_read_offset(&s); + offset_ext_targets = jl_read_offset(&s); + offset_edges = jl_read_offset(&s); + } + size_t nlinks_gctags = read_uint32(f); + if (nlinks_gctags > 0) { + s.link_ids_gctags = jl_alloc_array_1d(jl_array_uint64_type, nlinks_gctags); + ios_read(f, (char*)jl_array_data(s.link_ids_gctags), nlinks_gctags * sizeof(uint64_t)); + } + size_t nlinks_relocs = read_uint32(f); + if (nlinks_relocs > 0) { + s.link_ids_relocs = jl_alloc_array_1d(jl_array_uint64_type, nlinks_relocs); + ios_read(f, (char*)jl_array_data(s.link_ids_relocs), nlinks_relocs * sizeof(uint64_t)); + } + size_t nlinks_gvars = read_uint32(f); + if (nlinks_gvars > 0) { + s.link_ids_gvars = jl_alloc_array_1d(jl_array_uint64_type, nlinks_gvars); + ios_read(f, (char*)jl_array_data(s.link_ids_gvars), nlinks_gvars * sizeof(uint64_t)); + } + jl_read_arraylist(s.s, ccallable_list ? ccallable_list : &s.ccallable_list); + if (s.incremental) { + assert(restored && init_order && extext_methods && new_specializations && method_roots_list && ext_targets && edges); + *restored = (jl_array_t*)jl_delayed_reloc(&s, offset_restored); + *init_order = (jl_array_t*)jl_delayed_reloc(&s, offset_init_order); + *extext_methods = (jl_array_t*)jl_delayed_reloc(&s, offset_extext_methods); + *new_specializations = (jl_array_t*)jl_delayed_reloc(&s, offset_new_specializations); + *method_roots_list = (jl_array_t*)jl_delayed_reloc(&s, offset_method_roots_list); + *ext_targets = (jl_array_t*)jl_delayed_reloc(&s, offset_ext_targets); + *edges = (jl_array_t*)jl_delayed_reloc(&s, offset_edges); + } s.s = NULL; // step 3: apply relocations @@ -2249,26 +2772,333 @@ static void jl_restore_system_image_from_stream(ios_t *f) JL_GC_DISABLED jl_read_symbols(&s); ios_close(&symbols); - sysimg_base = &sysimg.buf[0]; - sysimg_relocs = &relocs.buf[0]; - jl_gc_set_permalloc_region((void*)sysimg_base, (void*)(sysimg_base + sysimg.size)); + char *image_base = (char*)&sysimg.buf[0]; + reloc_t *relocs_base = (reloc_t*)&relocs.buf[0]; + if (base) + *base = image_base; s.s = &sysimg; - jl_read_reloclist(&s, GC_OLD); // gctags + jl_read_reloclist(&s, s.link_ids_gctags, GC_OLD); // gctags size_t sizeof_tags = ios_pos(&relocs); (void)sizeof_tags; - jl_read_reloclist(&s, 0); // general relocs + jl_read_reloclist(&s, s.link_ids_relocs, 0); // general relocs + // s.link_ids_gvars will be processed in `jl_update_all_gvars` + jl_update_all_gvars(&s, image); // gvars relocs + if (s.incremental) { + jl_read_arraylist(s.relocs, &s.uniquing_types); + jl_read_arraylist(s.relocs, &s.uniquing_objs); + jl_read_arraylist(s.relocs, &s.fixup_types); + } + else { + arraylist_new(&s.uniquing_types, 0); + arraylist_new(&s.uniquing_objs, 0); + arraylist_new(&s.fixup_types, 0); + } + jl_read_arraylist(s.relocs, &s.fixup_objs); + // Perform the uniquing of objects that we don't "own" and consequently can't promise + // weren't created by some other package before this one got loaded: + // - iterate through all objects that need to be uniqued. The first encounter has to be the + // "reconstructable blob". We either look up the object (if something has created it previously) + // or construct it for the first time, crucially outside the pointer range of any pkgimage. + // This ensures it stays unique-worthy. + // - after we've stored the address of the "real" object (which for convenience we do among the data + // written to allow lookup/reconstruction), then we have to update references to that "reconstructable blob": + // instead of performing the relocation within the package image, we instead (re)direct all references + // to the external object. + arraylist_t cleanup_list; + arraylist_new(&cleanup_list, 0); + arraylist_t delay_list; + arraylist_new(&delay_list, 0); + for (size_t i = 0; i < s.uniquing_types.len; i++) { + uintptr_t item = (uintptr_t)s.uniquing_types.items[i]; + // check whether we are operating on the typetag + // (needing to ignore GC bits) or a regular field + int tag = (item & 1) == 1; + // check whether this is a gvar index + int gvar = (item & 2) == 2; + item &= ~(uintptr_t)3; + uintptr_t *pfld; + jl_value_t **obj, *newobj; + if (gvar) { + if (image->gvars_base == NULL) + continue; + item >>= 2; + assert(item < s.gvar_record->size / sizeof(reloc_t)); + pfld = sysimg_gvars(image->gvars_base, image->gvars_offsets, item); + obj = *(jl_value_t***)pfld; + assert(tag == 0); + } + else { + pfld = (uintptr_t*)(image_base + item); + if (tag) + obj = (jl_value_t**)jl_typeof(jl_valueof(pfld)); + else + obj = *(jl_value_t***)pfld; + if ((char*)obj > (char*)pfld) { + assert(tag == 0); + arraylist_push(&delay_list, pfld); + arraylist_push(&delay_list, obj); + ptrhash_put(&new_dt_objs, (void*)obj, obj); // mark obj as invalid + *pfld = (uintptr_t)NULL; + continue; + } + } + jl_value_t *otyp = jl_typeof(obj); // the original type of the object that was written here + assert(image_base < (char*)obj && (char*)obj <= image_base + sizeof_sysimg + sizeof(uintptr_t)); + if (otyp == (jl_value_t*)jl_datatype_type) { + jl_datatype_t *dt = (jl_datatype_t*)obj[0], *newdt; + if (jl_is_datatype(dt)) { + newdt = dt; // already done + } + else { + dt = (jl_datatype_t*)obj; + arraylist_push(&cleanup_list, (void*)obj); + ptrhash_remove(&new_dt_objs, (void*)obj); // unmark obj as invalid before must_be_new_dt + if (must_be_new_dt((jl_value_t*)dt, &new_dt_objs, image_base, sizeof_sysimg)) + newdt = NULL; + else + newdt = jl_lookup_cache_type_(dt); + if (newdt == NULL) { + // make a non-owned copy of obj so we don't accidentally + // assume this is the unique copy later + newdt = jl_new_uninitialized_datatype(); + jl_astaggedvalue(newdt)->bits.gc = GC_OLD; + // leave most fields undefined for now, but we may need instance later, + // and we overwrite the name field (field 0) now so preserve it too + if (dt->instance) { + assert(dt->instance == jl_nothing); + newdt->instance = dt->instance = jl_gc_permobj(0, newdt); + } + static_assert(offsetof(jl_datatype_t, name) == 0, ""); + newdt->name = dt->name; + ptrhash_put(&new_dt_objs, (void*)newdt, dt); + } + else { + assert(newdt->hash == dt->hash); + } + obj[0] = (jl_value_t*)newdt; + } + newobj = (jl_value_t*)newdt; + } + else { + assert(!(image_base < (char*)otyp && (char*)otyp <= image_base + sizeof_sysimg + sizeof(uintptr_t))); + assert(jl_is_datatype_singleton((jl_datatype_t*)otyp) && "unreachable"); + newobj = ((jl_datatype_t*)otyp)->instance; + assert(newobj != jl_nothing); + arraylist_push(&cleanup_list, (void*)obj); + } + if (tag) + *pfld = (uintptr_t)newobj | GC_OLD; + else + *pfld = (uintptr_t)newobj; + assert(!(image_base < (char*)newobj && (char*)newobj <= image_base + sizeof_sysimg + sizeof(uintptr_t))); + assert(jl_typeis(obj, otyp)); + } + // A few fields (reached via super) might be self-recursive. This is rare, but handle them now. + // They cannot be instances though, since the type must fully exist before the singleton field can be allocated + for (size_t i = 0; i < delay_list.len; ) { + uintptr_t *pfld = (uintptr_t*)delay_list.items[i++]; + jl_value_t **obj = (jl_value_t **)delay_list.items[i++]; + assert(jl_is_datatype(obj)); + jl_datatype_t *dt = (jl_datatype_t*)obj[0]; + assert(jl_is_datatype(dt)); + jl_value_t *newobj = (jl_value_t*)dt; + *pfld = (uintptr_t)newobj; + assert(!(image_base < (char*)newobj && (char*)newobj <= image_base + sizeof_sysimg + sizeof(uintptr_t))); + } + arraylist_free(&delay_list); + // now that all the fields of dt are assigned and unique, copy them into + // their final newdt memory location: this ensures we do not accidentally + // think this pkg image has the singular unique copy of it + void **table = new_dt_objs.table; + for (size_t i = 0; i < new_dt_objs.size; i += 2) { + void *dt = table[i + 1]; + if (dt != HT_NOTFOUND) { + jl_datatype_t *newdt = (jl_datatype_t*)table[i]; + jl_typename_t *name = newdt->name; + static_assert(offsetof(jl_datatype_t, name) == 0, ""); + assert(*(void**)dt == (void*)newdt); + *newdt = *(jl_datatype_t*)dt; // copy the datatype fields (except field 1, which we corrupt above) + newdt->name = name; + } + } + // we should never see these pointers again, so scramble their memory, so any attempt to look at them crashes + for (size_t i = 0; i < cleanup_list.len; i++) { + void *item = cleanup_list.items[i]; + jl_taggedvalue_t *o = jl_astaggedvalue(item); + jl_value_t *t = jl_typeof(item); // n.b. might be 0xbabababa already + if (t == (jl_value_t*)jl_datatype_type) + memset(o, 0xba, sizeof(jl_value_t*) + sizeof(jl_datatype_t)); + else + memset(o, 0xba, sizeof(jl_value_t*) + 0); // singleton + } + arraylist_grow(&cleanup_list, -cleanup_list.len); + // finally cache all our new types now + for (size_t i = 0; i < new_dt_objs.size; i += 2) { + void *dt = table[i + 1]; + if (dt != HT_NOTFOUND) { + jl_datatype_t *newdt = (jl_datatype_t*)table[i]; + jl_cache_type_(newdt); + } + } + for (size_t i = 0; i < s.fixup_types.len; i++) { + uintptr_t item = (uintptr_t)s.fixup_types.items[i]; + jl_value_t *obj = (jl_value_t*)(image_base + item); + assert(jl_is_datatype(obj)); + jl_cache_type_((jl_datatype_t*)obj); + } + // Perform fixups: things like updating world ages, inserting methods & specializations, etc. + size_t world = jl_atomic_load_acquire(&jl_world_counter); + for (size_t i = 0; i < s.uniquing_objs.len; i++) { + uintptr_t item = (uintptr_t)s.uniquing_objs.items[i]; + // check whether this is a gvar index + int gvar = (item & 2) == 2; + item &= ~(uintptr_t)3; + uintptr_t *pfld; + jl_value_t **obj, *newobj; + if (gvar) { + if (image->gvars_base == NULL) + continue; + item >>= 2; + assert(item < s.gvar_record->size / sizeof(reloc_t)); + pfld = sysimg_gvars(image->gvars_base, image->gvars_offsets, item); + obj = *(jl_value_t***)pfld; + } + else { + pfld = (uintptr_t*)(image_base + item); + obj = *(jl_value_t***)pfld; + } + jl_value_t *otyp = jl_typeof(obj); // the original type of the object that was written here + if (otyp == (jl_value_t*)jl_method_instance_type) { + assert(image_base < (char*)obj && (char*)obj <= image_base + sizeof_sysimg + sizeof(uintptr_t)); + jl_value_t *m = obj[0]; + if (jl_is_method_instance(m)) { + newobj = m; // already done + } + else { + arraylist_push(&cleanup_list, (void*)obj); + jl_value_t *specTypes = obj[1]; + jl_value_t *sparams = obj[2]; + newobj = (jl_value_t*)jl_specializations_get_linfo((jl_method_t*)m, specTypes, (jl_svec_t*)sparams); + obj[0] = newobj; + } + } + else if (otyp == (jl_value_t*)jl_globalref_type) { + // this actually needs a binding_t object at that gvar slot if we encountered it in the uniquing_objs + jl_globalref_t *g = (jl_globalref_t*)obj; + jl_binding_t *b = jl_get_binding_if_bound(g->mod, g->name); + assert(b); // XXX: actually this is probably quite buggy, since julia's handling of global resolution is rather bad + newobj = (jl_value_t*)b; + } + else { + abort(); // should be unreachable + } + *pfld = (uintptr_t)newobj; + assert(!(image_base < (char*)newobj && (char*)newobj <= image_base + sizeof_sysimg + sizeof(uintptr_t))); + assert(jl_typeis(obj, otyp)); + } + arraylist_free(&s.uniquing_types); + arraylist_free(&s.uniquing_objs); + for (size_t i = 0; i < cleanup_list.len; i++) { + void *item = cleanup_list.items[i]; + jl_taggedvalue_t *o = jl_astaggedvalue(item); + jl_value_t *t = jl_typeof(item); + if (t == (jl_value_t*)jl_method_instance_type) + memset(o, 0xba, sizeof(jl_value_t*) * 3); // only specTypes and sparams fields stored + } + arraylist_free(&cleanup_list); + for (size_t i = 0; i < s.fixup_objs.len; i++) { + uintptr_t item = (uintptr_t)s.fixup_objs.items[i]; + jl_value_t *obj = (jl_value_t*)(image_base + item); + if (jl_typeis(obj, jl_typemap_entry_type)) { + jl_typemap_entry_t *entry = (jl_typemap_entry_t*)obj; + entry->min_world = world; + } + else if (jl_is_method(obj)) { + jl_method_t *m = (jl_method_t*)obj; + m->primary_world = world; + } + else if (jl_is_method_instance(obj)) { + jl_method_instance_t *newobj = jl_specializations_get_or_insert((jl_method_instance_t*)obj); + assert(newobj == (jl_method_instance_t*)obj); // strict insertion expected + (void)newobj; + } + else if (jl_is_code_instance(obj)) { + jl_code_instance_t *ci = (jl_code_instance_t*)obj; + assert(s.incremental); + ci->min_world = world; + if (ci->max_world == 1) { // sentinel value: has edges to external callables + ptrhash_put(&new_code_instance_validate, ci, (void*)(~(uintptr_t)HT_NOTFOUND)); // "HT_FOUND" + } + else if (ci->max_world) { + // It's valid, but it may not be connected + if (!ci->def->cache) + ci->def->cache = ci; + } + else { + // Ensure this code instance is not connected + if (ci->def->cache == ci) + ci->def->cache = NULL; + } + } + else if (jl_is_globalref(obj)) { + continue; // wait until all the module binding tables have been initialized + } + else if (jl_is_module(obj)) { + // rebuild the binding table for module v + // TODO: maybe want to delay this more, but that only strongly matters for async / thread safety + // and we are already bad at that + jl_module_t *mod = (jl_module_t*)obj; + mod->build_id.hi = checksum; + size_t nbindings = mod->bindings.size; + htable_new(&mod->bindings, nbindings); + struct binding { + jl_sym_t *asname; + uintptr_t tag; + jl_binding_t b; + } *b; + b = (struct binding*)&mod[1]; + while (nbindings > 0) { + ptrhash_put(&mod->bindings, b->asname, &b->b); + b += 1; + nbindings -= 1; + } + if (mod->usings.items != &mod->usings._space[0]) { + void **newitems = (void**)malloc_s(mod->usings.max * sizeof(void*)); + memcpy(newitems, mod->usings.items, mod->usings.len * sizeof(void*)); + mod->usings.items = newitems; + } + } + else { + // rehash IdDict + //assert(((jl_datatype_t*)(jl_typeof(obj)))->name == jl_idtable_typename); + jl_array_t **a = (jl_array_t**)obj; + assert(jl_typeis(*a, jl_array_any_type)); + *a = jl_idtable_rehash(*a, jl_array_len(*a)); + jl_gc_wb(obj, *a); + } + } + // Now pick up the globalref binding pointer field, when we can + for (size_t i = 0; i < s.fixup_objs.len; i++) { + uintptr_t item = (uintptr_t)s.fixup_objs.items[i]; + jl_value_t *obj = (jl_value_t*)(image_base + item); + if (jl_is_globalref(obj)) { + jl_globalref_t *r = (jl_globalref_t*)obj; + jl_binding_t *b = jl_get_binding_if_bound(r->mod, r->name); + r->bnd_cache = b && b->value ? b : NULL; + } + } + arraylist_free(&s.fixup_types); + arraylist_free(&s.fixup_objs); + + if (s.incremental) + jl_root_new_gvars(&s, image); ios_close(&relocs); ios_close(&const_data); - jl_update_all_gvars(&s); // gvars relocs ios_close(&gvar_record); - s.s = NULL; - jl_kwcall_mt = ((jl_datatype_t*)jl_typeof(jl_kwcall_func))->name->mt; + htable_free(&new_dt_objs); - s.s = f; - // reinit items except ccallables - jl_finalize_deserializer(&s); s.s = NULL; if (0) { @@ -2288,21 +3118,166 @@ static void jl_restore_system_image_from_stream(ios_t *f) JL_GC_DISABLED (unsigned)sizeof_gvar_record, (unsigned)sizeof_fptr_record); } + if (cachesizes) { + cachesizes->sysdata = sizeof_sysimg; + cachesizes->isbitsdata = sizeof_constdata; + cachesizes->symboldata = sizeof_symbols; + cachesizes->tagslist = sizeof_tags; + cachesizes->reloclist = sizeof_relocations - sizeof_tags; + cachesizes->gvarlist = sizeof_gvar_record; + cachesizes->fptrlist = sizeof_fptr_record; + } + if (!s.incremental) + jl_init_codegen(); s.s = &sysimg; - jl_init_codegen(); - jl_update_all_fptrs(&s); // fptr relocs and registration - // reinit ccallables, which require codegen to be initialized - s.s = f; - jl_finalize_deserializer(&s); + jl_update_all_fptrs(&s, image); // fptr relocs and registration + if (!ccallable_list) { + // TODO: jl_sysimg_handle or img_handle? + jl_reinit_ccallable(&s.ccallable_list, image_base, jl_sysimg_handle); + arraylist_free(&s.ccallable_list); + } + s.s = NULL; ios_close(&fptr_record); ios_close(&sysimg); - s.s = NULL; - jl_gc_reset_alloc_count(); + if (!s.incremental) + jl_gc_reset_alloc_count(); + arraylist_free(&deser_sym); + + // Prepare for later external linkage against the sysimg + // Also sets up images for protection against garbage collection + arraylist_push(&jl_linkage_blobs, (void*)image_base); + arraylist_push(&jl_linkage_blobs, (void*)(image_base + sizeof_sysimg + sizeof(uintptr_t))); + arraylist_push(&jl_image_relocs, (void*)relocs_base); + + // jl_printf(JL_STDOUT, "%ld blobs to link against\n", jl_linkage_blobs.len >> 1); + uint64_t buildid = (((uint64_t)read_uint32(f)) << 32) | read_uint32(f); + if (!jl_build_ids) + jl_build_ids = jl_alloc_array_1d(jl_array_uint64_type, 0); + jl_array_grow_end(jl_build_ids, 1); + uint64_t *build_id_data = (uint64_t*)jl_array_data(jl_build_ids); + build_id_data[jl_array_len(jl_build_ids)-1] = buildid; jl_gc_enable(en); - jl_cleanup_serializer2(); +} + +static jl_value_t *jl_validate_cache_file(ios_t *f, jl_array_t *depmods, uint64_t *checksum, int64_t *dataendpos) +{ + if (ios_eof(f) || 0 == (*checksum = jl_read_verify_header(f)) || (*checksum >> 32 != 0xfafbfcfd)) { + return jl_get_exceptionf(jl_errorexception_type, + "Precompile file header verification checks failed."); + } + { // skip past the mod list + size_t len; + while ((len = read_int32(f))) + ios_skip(f, len + 3 * sizeof(uint64_t)); + } + { // skip past the dependency list + size_t deplen = read_uint64(f); + ios_skip(f, deplen - sizeof(uint64_t)); + *dataendpos = read_uint64(f); + } + + // verify that the system state is valid + return read_verify_mod_list(f, depmods); +} + +// TODO?: refactor to make it easier to create the "package inspector" +static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *image, jl_array_t *depmods, int complete) +{ + uint64_t checksum = 0; + int64_t dataendpos = 0; + jl_value_t *verify_fail = jl_validate_cache_file(f, depmods, &checksum, &dataendpos); + if (verify_fail) + return verify_fail; + + jl_value_t *restored = NULL; + jl_array_t *init_order = NULL, *extext_methods = NULL, *new_specializations = NULL, *method_roots_list = NULL, *ext_targets = NULL, *edges = NULL; + jl_svec_t *cachesizes_sv = NULL; + char *base; + arraylist_t ccallable_list; + JL_GC_PUSH8(&restored, &init_order, &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges, &cachesizes_sv); + + { // make a permanent in-memory copy of f (excluding the header) + ios_bufmode(f, bm_none); + JL_SIGATOMIC_BEGIN(); + size_t len_begin = LLT_ALIGN(ios_pos(f), JL_CACHE_BYTE_ALIGNMENT); + assert(len_begin > 0 && len_begin < dataendpos); + size_t len = dataendpos - len_begin; + char *sysimg = (char*)jl_gc_perm_alloc(len, 0, 64, 0); + ios_seek(f, len_begin); + if (ios_readall(f, sysimg, len) != len || jl_crc32c(0, sysimg, len) != (uint32_t)checksum) { + restored = jl_get_exceptionf(jl_errorexception_type, "Error reading system image file."); + JL_SIGATOMIC_END(); + } + else { + ios_close(f); + ios_static_buffer(f, sysimg, len); + htable_new(&new_code_instance_validate, 0); + pkgcachesizes cachesizes; + jl_restore_system_image_from_stream_(f, image, depmods, checksum, (jl_array_t**)&restored, &init_order, &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges, &base, &ccallable_list, &cachesizes); + JL_SIGATOMIC_END(); + + // Insert method extensions + jl_insert_methods(extext_methods); + // No special processing of `new_specializations` is required because recaching handled it + // Add roots to methods + jl_copy_roots(method_roots_list, jl_worklist_key((jl_array_t*)restored)); + // Handle edges + jl_insert_backedges((jl_array_t*)edges, (jl_array_t*)ext_targets, (jl_array_t*)new_specializations); // restore external backedges (needs to be last) + // check new CodeInstances and validate any that lack external backedges + validate_new_code_instances(); + // reinit ccallables + jl_reinit_ccallable(&ccallable_list, base, NULL); + arraylist_free(&ccallable_list); + htable_free(&new_code_instance_validate); + if (complete) { + cachesizes_sv = jl_alloc_svec_uninit(7); + jl_svec_data(cachesizes_sv)[0] = jl_box_long(cachesizes.sysdata); + jl_svec_data(cachesizes_sv)[1] = jl_box_long(cachesizes.isbitsdata); + jl_svec_data(cachesizes_sv)[2] = jl_box_long(cachesizes.symboldata); + jl_svec_data(cachesizes_sv)[3] = jl_box_long(cachesizes.tagslist); + jl_svec_data(cachesizes_sv)[4] = jl_box_long(cachesizes.reloclist); + jl_svec_data(cachesizes_sv)[5] = jl_box_long(cachesizes.gvarlist); + jl_svec_data(cachesizes_sv)[6] = jl_box_long(cachesizes.fptrlist); + restored = (jl_value_t*)jl_svec(8, restored, init_order, extext_methods, new_specializations, method_roots_list, + ext_targets, edges, cachesizes_sv); + } else + restored = (jl_value_t*)jl_svec(2, restored, init_order); + } + } + + JL_GC_POP(); + return restored; +} + +static void jl_restore_system_image_from_stream(ios_t *f, jl_image_t *image) +{ + uint64_t checksum = 0; // TODO: make this real + jl_restore_system_image_from_stream_(f, image, NULL, checksum, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); +} + +JL_DLLEXPORT jl_value_t *jl_restore_incremental_from_buf(const char *buf, jl_image_t *image, size_t sz, jl_array_t *depmods, int complete) +{ + ios_t f; + ios_static_buffer(&f, (char*)buf, sz); + jl_value_t *ret = jl_restore_package_image_from_stream(&f, image, depmods, complete); + ios_close(&f); + return ret; +} + +JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *depmods, int complete) +{ + ios_t f; + if (ios_file(&f, fname, 1, 0, 0, 0) == NULL) { + return jl_get_exceptionf(jl_errorexception_type, + "Cache file \"%s\" not found.\n", fname); + } + jl_image_t pkgimage = {}; + jl_value_t *ret = jl_restore_package_image_from_stream(&f, &pkgimage, depmods, complete); + ios_close(&f); + return ret; } // TODO: need to enforce that the alignment of the buffer is suitable for vectors @@ -2332,7 +3307,7 @@ JL_DLLEXPORT void jl_restore_system_image(const char *fname) jl_errorf("Error reading system image file."); ios_close(&f); ios_static_buffer(&f, sysimg, len); - jl_restore_system_image_from_stream(&f); + jl_restore_system_image_from_stream(&f, &sysimage); ios_close(&f); JL_SIGATOMIC_END(); } @@ -2343,38 +3318,52 @@ JL_DLLEXPORT void jl_restore_system_image_data(const char *buf, size_t len) ios_t f; JL_SIGATOMIC_BEGIN(); ios_static_buffer(&f, (char*)buf, len); - jl_restore_system_image_from_stream(&f); + jl_restore_system_image_from_stream(&f, &sysimage); ios_close(&f); JL_SIGATOMIC_END(); } -// --- init --- - -static void jl_init_serializer2(int for_serialize) +JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, jl_array_t *depmods) { - if (for_serialize) { - htable_new(&symbol_table, 0); - htable_new(&fptr_to_id, sizeof(id_to_fptrs) / sizeof(*id_to_fptrs)); - htable_new(&backref_table, 0); - htable_new(&layout_cache, 0); - uintptr_t i; - for (i = 0; id_to_fptrs[i] != NULL; i++) { - ptrhash_put(&fptr_to_id, (void*)(uintptr_t)id_to_fptrs[i], (void*)(i + 2)); - } + void *pkgimg_handle = jl_dlopen(fname, JL_RTLD_LAZY); + if (!pkgimg_handle) { +#ifdef _OS_WINDOWS_ + int err; + char reason[256]; + err = GetLastError(); + win32_formatmessage(err, reason, sizeof(reason)); +#else + const char *reason = dlerror(); +#endif + jl_errorf("Error opening package file %s: %s\n", fname, reason); } - else { - arraylist_new(&deser_sym, 0); + const char *pkgimg_data; + jl_dlsym(pkgimg_handle, "jl_system_image_data", (void **)&pkgimg_data, 1); + size_t *plen; + jl_dlsym(pkgimg_handle, "jl_system_image_size", (void **)&plen, 1); + + jl_image_t pkgimage; + pkgimage.fptrs = jl_init_processor_pkgimg(pkgimg_handle); + if (!jl_dlsym(pkgimg_handle, "jl_sysimg_gvars_base", (void **)&pkgimage.gvars_base, 0)) { + pkgimage.gvars_base = NULL; } - nsym_tag = 0; -} + jl_dlsym(pkgimg_handle, "jl_sysimg_gvars_offsets", (void **)&pkgimage.gvars_offsets, 1); + pkgimage.gvars_offsets += 1; + jl_value_t* mod = jl_restore_incremental_from_buf(pkgimg_data, &pkgimage, *plen, depmods, 0); -static void jl_cleanup_serializer2(void) -{ - htable_reset(&symbol_table, 0); - htable_reset(&fptr_to_id, 0); - htable_reset(&backref_table, 0); - htable_reset(&layout_cache, 0); - arraylist_free(&deser_sym); + void *pgcstack_func_slot; + jl_dlsym(pkgimg_handle, "jl_pgcstack_func_slot", &pgcstack_func_slot, 0); + if (pgcstack_func_slot) { // Empty package images might miss these + void *pgcstack_key_slot; + jl_dlsym(pkgimg_handle, "jl_pgcstack_key_slot", &pgcstack_key_slot, 1); + jl_pgcstack_getkey((jl_get_pgcstack_func**)pgcstack_func_slot, (jl_pgcstack_key_t*)pgcstack_key_slot); + + size_t *tls_offset_idx; + jl_dlsym(pkgimg_handle, "jl_tls_offset", (void **)&tls_offset_idx, 1); + *tls_offset_idx = (uintptr_t)(jl_tls_offset == -1 ? 0 : jl_tls_offset); + } + + return mod; } #ifdef __cplusplus diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c new file mode 100644 index 0000000000000..3d02dddbd5a70 --- /dev/null +++ b/src/staticdata_utils.c @@ -0,0 +1,1279 @@ +static htable_t new_code_instance_validate; +static htable_t external_mis; + +// inverse of backedges graph (caller=>callees hash) +jl_array_t *edges_map JL_GLOBALLY_ROOTED = NULL; // rooted for the duration of our uses of this + +static void write_float64(ios_t *s, double x) JL_NOTSAFEPOINT +{ + write_uint64(s, *((uint64_t*)&x)); +} + +// Decide if `t` must be new, because it points to something new. +// If it is new, the object (in particular, the super field) might not be entirely +// valid for the cache, so we want to finish transforming it before attempting +// to look in the cache for it +int must_be_new_dt(jl_value_t *t, htable_t *news, char *image_base, size_t sizeof_sysimg) +{ + //if (jl_object_in_image(t)) + // return 0; // fast-path for rejection + assert(ptrhash_get(news, (void*)t) != (void*)t); + if (ptrhash_has(news, (void*)t) || ptrhash_has(news, (void*)jl_typeof(t))) + return 1; + if (!(image_base < (char*)t && (char*)t <= image_base + sizeof_sysimg)) + return 0; // fast-path for rejection + if (jl_is_uniontype(t)) { + jl_uniontype_t *u = (jl_uniontype_t*)t; + return must_be_new_dt(u->a, news, image_base, sizeof_sysimg) || + must_be_new_dt(u->b, news, image_base, sizeof_sysimg); + } + else if (jl_is_unionall(t)) { + jl_unionall_t *ua = (jl_unionall_t*)t; + return must_be_new_dt((jl_value_t*)ua->var, news, image_base, sizeof_sysimg) || + must_be_new_dt(ua->body, news, image_base, sizeof_sysimg); + } + else if (jl_is_typevar(t)) { + jl_tvar_t *tv = (jl_tvar_t*)t; + return must_be_new_dt(tv->lb, news, image_base, sizeof_sysimg) || + must_be_new_dt(tv->ub, news, image_base, sizeof_sysimg); + } + else if (jl_is_vararg(t)) { + jl_vararg_t *tv = (jl_vararg_t*)t; + if (tv->T && must_be_new_dt(tv->T, news, image_base, sizeof_sysimg)) + return 1; + if (tv->N && must_be_new_dt(tv->N, news, image_base, sizeof_sysimg)) + return 1; + } + else if (jl_is_datatype(t)) { + jl_datatype_t *dt = (jl_datatype_t*)t; + assert(jl_object_in_image((jl_value_t*)dt->name) && "type_in_worklist mistake?"); + jl_datatype_t *super = dt->super; + // check if super is news, since then we must be new also + // (it is also possible that super is indeterminate now, wait for `t` + // to be resolved, then will be determined later and fixed up by the + // delay_list, for this and any other references to it). + while (super != jl_any_type) { + assert(super); + if (ptrhash_has(news, (void*)super)) + return 1; + if (!(image_base < (char*)super && (char*)super <= image_base + sizeof_sysimg)) + break; // fast-path for rejection of super + // otherwise super might be something that was not cached even though a later supertype might be + // for example while handling `Type{Mask{4, U} where U}`, if we have `Mask{4, U} <: AbstractSIMDVector{4}` + super = super->super; + } + jl_svec_t *tt = dt->parameters; + size_t i, l = jl_svec_len(tt); + for (i = 0; i < l; i++) + if (must_be_new_dt(jl_tparam(dt, i), news, image_base, sizeof_sysimg)) + return 1; + } + else { + return must_be_new_dt(jl_typeof(t), news, image_base, sizeof_sysimg); + } + return 0; +} + +static uint64_t jl_worklist_key(jl_array_t *worklist) JL_NOTSAFEPOINT +{ + assert(jl_is_array(worklist)); + size_t len = jl_array_len(worklist); + if (len > 0) { + jl_module_t *topmod = (jl_module_t*)jl_array_ptr_ref(worklist, len-1); + assert(jl_is_module(topmod)); + return topmod->build_id.lo; + } + return 0; +} + +static jl_array_t *newly_inferred JL_GLOBALLY_ROOTED /*FIXME*/; +// Mutex for newly_inferred +static jl_mutex_t newly_inferred_mutex; + +// Register array of newly-inferred MethodInstances +// This gets called as the first step of Base.include_package_for_output +JL_DLLEXPORT void jl_set_newly_inferred(jl_value_t* _newly_inferred) +{ + assert(_newly_inferred == NULL || jl_is_array(_newly_inferred)); + newly_inferred = (jl_array_t*) _newly_inferred; +} + +JL_DLLEXPORT void jl_push_newly_inferred(jl_value_t* ci) +{ + JL_LOCK(&newly_inferred_mutex); + size_t end = jl_array_len(newly_inferred); + jl_array_grow_end(newly_inferred, 1); + jl_arrayset(newly_inferred, ci, end); + JL_UNLOCK(&newly_inferred_mutex); +} + + +static int method_instance_in_queue(jl_method_instance_t *mi) +{ + return ptrhash_get(&external_mis, mi) != HT_NOTFOUND; +} + +// compute whether a type references something internal to worklist +// and thus could not have existed before deserialize +// and thus does not need delayed unique-ing +static int type_in_worklist(jl_value_t *v) JL_NOTSAFEPOINT +{ + if (jl_object_in_image(v)) + return 0; // fast-path for rejection + if (jl_is_uniontype(v)) { + jl_uniontype_t *u = (jl_uniontype_t*)v; + return type_in_worklist(u->a) || + type_in_worklist(u->b); + } + else if (jl_is_unionall(v)) { + jl_unionall_t *ua = (jl_unionall_t*)v; + return type_in_worklist((jl_value_t*)ua->var) || + type_in_worklist(ua->body); + } + else if (jl_is_typevar(v)) { + jl_tvar_t *tv = (jl_tvar_t*)v; + return type_in_worklist(tv->lb) || + type_in_worklist(tv->ub); + } + else if (jl_is_vararg(v)) { + jl_vararg_t *tv = (jl_vararg_t*)v; + if (tv->T && type_in_worklist(tv->T)) + return 1; + if (tv->N && type_in_worklist(tv->N)) + return 1; + } + else if (jl_is_datatype(v)) { + jl_datatype_t *dt = (jl_datatype_t*)v; + if (!jl_object_in_image((jl_value_t*)dt->name)) + return 1; + jl_svec_t *tt = dt->parameters; + size_t i, l = jl_svec_len(tt); + for (i = 0; i < l; i++) + if (type_in_worklist(jl_tparam(dt, i))) + return 1; + } + else { + return type_in_worklist(jl_typeof(v)); + } + return 0; +} + +static void mark_backedges_in_worklist(jl_method_instance_t *mi, htable_t *visited, int found) +{ + int oldfound = (char*)ptrhash_get(visited, mi) - (char*)HT_NOTFOUND; + if (oldfound < 3) + return; // not in-progress + ptrhash_put(visited, mi, (void*)((char*)HT_NOTFOUND + 1 + found)); +#ifndef NDEBUG + jl_module_t *mod = mi->def.module; + if (jl_is_method(mod)) + mod = ((jl_method_t*)mod)->module; + assert(jl_is_module(mod)); + assert(!mi->precompiled && jl_object_in_image((jl_value_t*)mod)); + assert(mi->backedges); +#endif + size_t i = 0, n = jl_array_len(mi->backedges); + while (i < n) { + jl_method_instance_t *be; + i = get_next_edge(mi->backedges, i, NULL, &be); + mark_backedges_in_worklist(be, visited, found); + } +} + +// When we infer external method instances, ensure they link back to the +// package. Otherwise they might be, e.g., for external macros +static int has_backedge_to_worklist(jl_method_instance_t *mi, htable_t *visited, int depth) +{ + jl_module_t *mod = mi->def.module; + if (jl_is_method(mod)) + mod = ((jl_method_t*)mod)->module; + assert(jl_is_module(mod)); + if (mi->precompiled || !jl_object_in_image((jl_value_t*)mod)) { + return 1; + } + if (!mi->backedges) { + return 0; + } + void **bp = ptrhash_bp(visited, mi); + // HT_NOTFOUND: not yet analyzed + // HT_NOTFOUND + 1: no link back + // HT_NOTFOUND + 2: does link back + // HT_NOTFOUND + 3 + depth: in-progress + int found = (char*)*bp - (char*)HT_NOTFOUND; + if (found) + return found - 1; + *bp = (void*)((char*)HT_NOTFOUND + 3 + depth); // preliminarily mark as in-progress + size_t i = 0, n = jl_array_len(mi->backedges); + int cycle = 0; + while (i < n) { + jl_method_instance_t *be; + i = get_next_edge(mi->backedges, i, NULL, &be); + int child_found = has_backedge_to_worklist(be, visited, depth + 1); + if (child_found == 1) { + found = 1; + break; + } + else if (child_found >= 2 && child_found - 2 < cycle) { + // record the cycle will resolve at depth "cycle" + cycle = child_found - 2; + assert(cycle); + } + } + if (!found && cycle && cycle != depth) + return cycle + 2; + bp = ptrhash_bp(visited, mi); // re-acquire since rehashing might change the location + *bp = (void*)((char*)HT_NOTFOUND + 1 + found); + if (cycle) { + // If we are the top of the current cycle, now mark all other parts of + // our cycle by re-walking the backedges graph and marking all WIP + // items as found. + // Be careful to only re-walk as far as we had originally scanned above. + // Or if we found a backedge, also mark all of the other parts of the + // cycle as also having an backedge. + n = i; + i = 0; + while (i < n) { + jl_method_instance_t *be; + i = get_next_edge(mi->backedges, i, NULL, &be); + mark_backedges_in_worklist(be, visited, found); + } + } + return found; +} + +// given the list of CodeInstances that were inferred during the +// build, select those that are (1) external, and (2) are inferred to be called +// from the worklist or explicitly added by a `precompile` statement. +// Also prepares for method_instance_in_queue queries. +static jl_array_t *queue_external_cis(jl_array_t *list) +{ + if (list == NULL) + return NULL; + size_t i; + htable_t visited; + assert(jl_is_array(list)); + size_t n0 = jl_array_len(list); + htable_new(&visited, n0); + jl_array_t *new_specializations = jl_alloc_vec_any(0); + JL_GC_PUSH1(&new_specializations); + for (i = 0; i < n0; i++) { + jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(list, i); + assert(jl_is_code_instance(ci)); + jl_method_instance_t *mi = ci->def; + jl_method_t *m = mi->def.method; + if (jl_is_method(m)) { + if (jl_object_in_image((jl_value_t*)m->module)) { + if (ptrhash_get(&external_mis, mi) == HT_NOTFOUND) { + int found = has_backedge_to_worklist(mi, &visited, 1); + assert(found == 0 || found == 1); + if (found == 1) { + ptrhash_put(&external_mis, mi, mi); + jl_array_ptr_1d_push(new_specializations, (jl_value_t*)ci); + } + } + } + } + } + htable_free(&visited); + JL_GC_POP(); + return new_specializations; +} + +// New roots for external methods +static void jl_collect_methods(htable_t *mset, jl_array_t *new_specializations) +{ + size_t i, l = new_specializations ? jl_array_len(new_specializations) : 0; + jl_value_t *v; + jl_method_t *m; + for (i = 0; i < l; i++) { + v = jl_array_ptr_ref(new_specializations, i); + assert(jl_is_code_instance(v)); + m = ((jl_code_instance_t*)v)->def->def.method; + assert(jl_is_method(m)); + ptrhash_put(mset, (void*)m, (void*)m); + } +} + +static void jl_collect_new_roots(jl_array_t *roots, htable_t *mset, uint64_t key) +{ + size_t i, sz = mset->size; + int nwithkey; + jl_method_t *m; + void **table = mset->table; + jl_array_t *newroots = NULL; + JL_GC_PUSH1(&newroots); + for (i = 0; i < sz; i += 2) { + if (table[i+1] != HT_NOTFOUND) { + m = (jl_method_t*)table[i]; + assert(jl_is_method(m)); + nwithkey = nroots_with_key(m, key); + if (nwithkey) { + jl_array_ptr_1d_push(roots, (jl_value_t*)m); + newroots = jl_alloc_vec_any(nwithkey); + jl_array_ptr_1d_push(roots, (jl_value_t*)newroots); + rle_iter_state rootiter = rle_iter_init(0); + uint64_t *rletable = NULL; + size_t nblocks2 = 0, nroots = jl_array_len(m->roots), k = 0; + if (m->root_blocks) { + rletable = (uint64_t*)jl_array_data(m->root_blocks); + nblocks2 = jl_array_len(m->root_blocks); + } + while (rle_iter_increment(&rootiter, nroots, rletable, nblocks2)) + if (rootiter.key == key) + jl_array_ptr_set(newroots, k++, jl_array_ptr_ref(m->roots, rootiter.i)); + assert(k == nwithkey); + } + } + } + JL_GC_POP(); +} + +// Create the forward-edge map (caller => callees) +// the intent of these functions is to invert the backedges tree +// for anything that points to a method not part of the worklist +// +// from MethodTables +static void jl_collect_missing_backedges(jl_methtable_t *mt) +{ + jl_array_t *backedges = mt->backedges; + if (backedges) { + size_t i, l = jl_array_len(backedges); + for (i = 1; i < l; i += 2) { + jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(backedges, i); + jl_value_t *missing_callee = jl_array_ptr_ref(backedges, i - 1); // signature of abstract callee + jl_array_t *edges = (jl_array_t*)jl_eqtable_get(edges_map, (jl_value_t*)caller, NULL); + if (edges == NULL) { + edges = jl_alloc_vec_any(0); + JL_GC_PUSH1(&edges); + edges_map = jl_eqtable_put(edges_map, (jl_value_t*)caller, (jl_value_t*)edges, NULL); + JL_GC_POP(); + } + jl_array_ptr_1d_push(edges, NULL); + jl_array_ptr_1d_push(edges, missing_callee); + } + } +} + + +// from MethodInstances +static void collect_backedges(jl_method_instance_t *callee, int internal) +{ + jl_array_t *backedges = callee->backedges; + if (backedges) { + size_t i = 0, l = jl_array_len(backedges); + while (i < l) { + jl_value_t *invokeTypes; + jl_method_instance_t *caller; + i = get_next_edge(backedges, i, &invokeTypes, &caller); + jl_array_t *edges = (jl_array_t*)jl_eqtable_get(edges_map, (jl_value_t*)caller, NULL); + if (edges == NULL) { + edges = jl_alloc_vec_any(0); + JL_GC_PUSH1(&edges); + edges_map = jl_eqtable_put(edges_map, (jl_value_t*)caller, (jl_value_t*)edges, NULL); + JL_GC_POP(); + } + jl_array_ptr_1d_push(edges, invokeTypes); + jl_array_ptr_1d_push(edges, (jl_value_t*)callee); + } + } +} + + +// For functions owned by modules not on the worklist, call this on each method. +// - if the method is owned by a worklist module, add it to the list of things to be +// fully serialized +// - Collect all backedges (may be needed later when we invert this list). +static int jl_collect_methcache_from_mod(jl_typemap_entry_t *ml, void *closure) +{ + jl_array_t *s = (jl_array_t*)closure; + jl_method_t *m = ml->func.method; + if (s && !jl_object_in_image((jl_value_t*)m->module)) { + jl_array_ptr_1d_push(s, (jl_value_t*)m); + } + jl_svec_t *specializations = m->specializations; + size_t i, l = jl_svec_len(specializations); + for (i = 0; i < l; i++) { + jl_method_instance_t *callee = (jl_method_instance_t*)jl_svecref(specializations, i); + if ((jl_value_t*)callee != jl_nothing) + collect_backedges(callee, !s); + } + return 1; +} + +static void jl_collect_methtable_from_mod(jl_array_t *s, jl_methtable_t *mt) +{ + jl_typemap_visitor(mt->defs, jl_collect_methcache_from_mod, (void*)s); +} + +// Collect methods of external functions defined by modules in the worklist +// "extext" = "extending external" +// Also collect relevant backedges +static void jl_collect_extext_methods_from_mod(jl_array_t *s, jl_module_t *m) +{ + if (s && !jl_object_in_image((jl_value_t*)m)) + s = NULL; // do not collect any methods + size_t i; + void **table = m->bindings.table; + for (i = 1; i < m->bindings.size; i += 2) { + if (table[i] != HT_NOTFOUND) { + jl_binding_t *b = (jl_binding_t*)table[i]; + if (b->owner == m && b->value && b->constp) { + jl_value_t *bv = jl_unwrap_unionall(b->value); + if (jl_is_datatype(bv)) { + jl_typename_t *tn = ((jl_datatype_t*)bv)->name; + if (tn->module == m && tn->name == b->name && tn->wrapper == b->value) { + jl_methtable_t *mt = tn->mt; + if (mt != NULL && + (jl_value_t*)mt != jl_nothing && + (mt != jl_type_type_mt && mt != jl_nonfunction_mt)) { + assert(mt->module == tn->module); + jl_collect_methtable_from_mod(s, mt); + if (s) + jl_collect_missing_backedges(mt); + } + } + } + else if (jl_is_module(b->value)) { + jl_module_t *child = (jl_module_t*)b->value; + if (child != m && child->parent == m && child->name == b->name) { + // this is the original/primary binding for the submodule + jl_collect_extext_methods_from_mod(s, (jl_module_t*)b->value); + } + } + else if (jl_is_mtable(b->value)) { + jl_methtable_t *mt = (jl_methtable_t*)b->value; + if (mt->module == m && mt->name == b->name) { + // this is probably an external method table, so let's assume so + // as there is no way to precisely distinguish them, + // and the rest of this serializer does not bother + // to handle any method tables specially + jl_collect_methtable_from_mod(s, (jl_methtable_t*)bv); + } + } + } + } + } +} + +static void jl_record_edges(jl_method_instance_t *caller, arraylist_t *wq, jl_array_t *edges) +{ + jl_array_t *callees = NULL; + JL_GC_PUSH2(&caller, &callees); + callees = (jl_array_t*)jl_eqtable_pop(edges_map, (jl_value_t*)caller, NULL, NULL); + if (callees != NULL) { + jl_array_ptr_1d_push(edges, (jl_value_t*)caller); + jl_array_ptr_1d_push(edges, (jl_value_t*)callees); + size_t i, l = jl_array_len(callees); + for (i = 1; i < l; i += 2) { + jl_method_instance_t *c = (jl_method_instance_t*)jl_array_ptr_ref(callees, i); + if (c && jl_is_method_instance(c)) { + arraylist_push(wq, c); + } + } + } + JL_GC_POP(); +} + + +// Extract `edges` and `ext_targets` from `edges_map` +// `edges` = [caller1, targets_indexes1, ...], the list of methods and their edges +// `ext_targets` is [invokesig1, callee1, matches1, ...], the edges for each target +static void jl_collect_edges(jl_array_t *edges, jl_array_t *ext_targets) +{ + size_t world = jl_atomic_load_acquire(&jl_world_counter); + arraylist_t wq; + arraylist_new(&wq, 0); + void **table = (void**)jl_array_data(edges_map); // edges_map is caller => callees + size_t table_size = jl_array_len(edges_map); + for (size_t i = 0; i < table_size; i += 2) { + assert(table == jl_array_data(edges_map) && table_size == jl_array_len(edges_map) && + "edges_map changed during iteration"); + jl_method_instance_t *caller = (jl_method_instance_t*)table[i]; + jl_array_t *callees = (jl_array_t*)table[i + 1]; + if (callees == NULL) + continue; + assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method)); + if (!jl_object_in_image((jl_value_t*)caller->def.method->module) || + method_instance_in_queue(caller)) { + jl_record_edges(caller, &wq, edges); + } + } + while (wq.len) { + jl_method_instance_t *caller = (jl_method_instance_t*)arraylist_pop(&wq); + jl_record_edges(caller, &wq, edges); + } + arraylist_free(&wq); + edges_map = NULL; + htable_t edges_map2; + htable_new(&edges_map2, 0); + htable_t edges_ids; + size_t l = edges ? jl_array_len(edges) : 0; + htable_new(&edges_ids, l); + for (size_t i = 0; i < l / 2; i++) { + jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, i * 2); + void *target = (void*)((char*)HT_NOTFOUND + i + 1); + ptrhash_put(&edges_ids, (void*)caller, target); + } + // process target list to turn it into a memoized validity table + // and compute the old methods list, ready for serialization + jl_value_t *matches = NULL; + jl_array_t *callee_ids = NULL; + JL_GC_PUSH2(&matches, &callee_ids); + for (size_t i = 0; i < l; i += 2) { + jl_array_t *callees = (jl_array_t*)jl_array_ptr_ref(edges, i + 1); + size_t l = jl_array_len(callees); + callee_ids = jl_alloc_array_1d(jl_array_int32_type, l + 1); + int32_t *idxs = (int32_t*)jl_array_data(callee_ids); + idxs[0] = 0; + size_t nt = 0; + for (size_t j = 0; j < l; j += 2) { + jl_value_t *invokeTypes = jl_array_ptr_ref(callees, j); + jl_value_t *callee = jl_array_ptr_ref(callees, j + 1); + assert(callee && "unsupported edge"); + + if (jl_is_method_instance(callee)) { + jl_methtable_t *mt = jl_method_get_table(((jl_method_instance_t*)callee)->def.method); + if (!jl_object_in_image((jl_value_t*)mt->module)) + continue; + } + + // (nullptr, c) => call + // (invokeTypes, c) => invoke + // (nullptr, invokeTypes) => missing call + // (invokeTypes, nullptr) => missing invoke (unused--inferred as Any) + void *target = ptrhash_get(&edges_map2, invokeTypes ? (void*)invokeTypes : (void*)callee); + if (target == HT_NOTFOUND) { + size_t min_valid = 0; + size_t max_valid = ~(size_t)0; + if (invokeTypes) { + jl_methtable_t *mt = jl_method_get_table(((jl_method_instance_t*)callee)->def.method); + if ((jl_value_t*)mt == jl_nothing) { + callee_ids = NULL; // invalid + break; + } + else { + matches = jl_gf_invoke_lookup_worlds(invokeTypes, (jl_value_t*)mt, world, &min_valid, &max_valid); + if (matches == jl_nothing) { + callee_ids = NULL; // invalid + break; + } + matches = (jl_value_t*)((jl_method_match_t*)matches)->method; + } + } + else { + jl_value_t *sig; + if (jl_is_method_instance(callee)) + sig = ((jl_method_instance_t*)callee)->specTypes; + else + sig = callee; + int ambig = 0; + matches = jl_matching_methods((jl_tupletype_t*)sig, jl_nothing, + -1, 0, world, &min_valid, &max_valid, &ambig); + if (matches == jl_nothing) { + callee_ids = NULL; // invalid + break; + } + size_t k; + for (k = 0; k < jl_array_len(matches); k++) { + jl_method_match_t *match = (jl_method_match_t *)jl_array_ptr_ref(matches, k); + jl_array_ptr_set(matches, k, match->method); + } + } + jl_array_ptr_1d_push(ext_targets, invokeTypes); + jl_array_ptr_1d_push(ext_targets, callee); + jl_array_ptr_1d_push(ext_targets, matches); + target = (void*)((char*)HT_NOTFOUND + jl_array_len(ext_targets) / 3); + ptrhash_put(&edges_map2, (void*)callee, target); + } + idxs[++nt] = (char*)target - (char*)HT_NOTFOUND - 1; + } + jl_array_ptr_set(edges, i + 1, callee_ids); // swap callees for ids + if (!callee_ids) + continue; + idxs[0] = nt; + // record place of every method in edges + // add method edges to the callee_ids list + for (size_t j = 0; j < l; j += 2) { + jl_value_t *callee = jl_array_ptr_ref(callees, j + 1); + if (callee && jl_is_method_instance(callee)) { + void *target = ptrhash_get(&edges_ids, (void*)callee); + if (target != HT_NOTFOUND) { + idxs[++nt] = (char*)target - (char*)HT_NOTFOUND - 1; + } + } + } + jl_array_del_end(callee_ids, l - nt); + } + JL_GC_POP(); + htable_free(&edges_map2); +} + +// Headers + +// serialize information about all loaded modules +static void write_mod_list(ios_t *s, jl_array_t *a) +{ + size_t i; + size_t len = jl_array_len(a); + for (i = 0; i < len; i++) { + jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(a, i); + assert(jl_is_module(m)); + if (jl_object_in_image((jl_value_t*)m)) { + const char *modname = jl_symbol_name(m->name); + size_t l = strlen(modname); + write_int32(s, l); + ios_write(s, modname, l); + write_uint64(s, m->uuid.hi); + write_uint64(s, m->uuid.lo); + write_uint64(s, m->build_id.hi); + write_uint64(s, m->build_id.lo); + } + } + write_int32(s, 0); +} + +// "magic" string and version header of .ji file +static const int JI_FORMAT_VERSION = 12; +static const char JI_MAGIC[] = "\373jli\r\n\032\n"; // based on PNG signature +static const uint16_t BOM = 0xFEFF; // byte-order marker +static void write_header(ios_t *s) +{ + ios_write(s, JI_MAGIC, strlen(JI_MAGIC)); + write_uint16(s, JI_FORMAT_VERSION); + ios_write(s, (char *) &BOM, 2); + write_uint8(s, sizeof(void*)); + ios_write(s, JL_BUILD_UNAME, strlen(JL_BUILD_UNAME)+1); + ios_write(s, JL_BUILD_ARCH, strlen(JL_BUILD_ARCH)+1); + ios_write(s, JULIA_VERSION_STRING, strlen(JULIA_VERSION_STRING)+1); + const char *branch = jl_git_branch(), *commit = jl_git_commit(); + ios_write(s, branch, strlen(branch)+1); + ios_write(s, commit, strlen(commit)+1); + write_uint64(s, 0); // eventually will hold checksum for the content portion of this (build_id.hi) +} + +// serialize information about the result of deserializing this file +static void write_worklist_for_header(ios_t *s, jl_array_t *worklist) +{ + int i, l = jl_array_len(worklist); + for (i = 0; i < l; i++) { + jl_module_t *workmod = (jl_module_t*)jl_array_ptr_ref(worklist, i); + if (workmod->parent == jl_main_module || workmod->parent == workmod) { + size_t l = strlen(jl_symbol_name(workmod->name)); + write_int32(s, l); + ios_write(s, jl_symbol_name(workmod->name), l); + write_uint64(s, workmod->uuid.hi); + write_uint64(s, workmod->uuid.lo); + write_uint64(s, workmod->build_id.lo); + } + } + write_int32(s, 0); +} + +static void write_module_path(ios_t *s, jl_module_t *depmod) JL_NOTSAFEPOINT +{ + if (depmod->parent == jl_main_module || depmod->parent == depmod) + return; + const char *mname = jl_symbol_name(depmod->name); + size_t slen = strlen(mname); + write_module_path(s, depmod->parent); + write_int32(s, slen); + ios_write(s, mname, slen); +} + +// Cache file header +// Serialize the global Base._require_dependencies array of pathnames that +// are include dependencies. Also write Preferences and return +// the location of the srctext "pointer" in the header index. +static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t **udepsp) +{ + int64_t initial_pos = 0; + int64_t pos = 0; + static jl_array_t *deps = NULL; + if (!deps) + deps = (jl_array_t*)jl_get_global(jl_base_module, jl_symbol("_require_dependencies")); + + // unique(deps) to eliminate duplicates while preserving order: + // we preserve order so that the topmost included .jl file comes first + static jl_value_t *unique_func = NULL; + if (!unique_func) + unique_func = jl_get_global(jl_base_module, jl_symbol("unique")); + jl_value_t *uniqargs[2] = {unique_func, (jl_value_t*)deps}; + jl_task_t *ct = jl_current_task; + size_t last_age = ct->world_age; + ct->world_age = jl_atomic_load_acquire(&jl_world_counter); + jl_array_t *udeps = (*udepsp = deps && unique_func ? (jl_array_t*)jl_apply(uniqargs, 2) : NULL); + ct->world_age = last_age; + + // write a placeholder for total size so that we can quickly seek past all of the + // dependencies if we don't need them + initial_pos = ios_pos(s); + write_uint64(s, 0); + size_t i, l = udeps ? jl_array_len(udeps) : 0; + for (i = 0; i < l; i++) { + jl_value_t *deptuple = jl_array_ptr_ref(udeps, i); + jl_value_t *dep = jl_fieldref(deptuple, 1); // file abspath + size_t slen = jl_string_len(dep); + write_int32(s, slen); + ios_write(s, jl_string_data(dep), slen); + write_float64(s, jl_unbox_float64(jl_fieldref(deptuple, 2))); // mtime + jl_module_t *depmod = (jl_module_t*)jl_fieldref(deptuple, 0); // evaluating module + jl_module_t *depmod_top = depmod; + while (depmod_top->parent != jl_main_module && depmod_top->parent != depmod_top) + depmod_top = depmod_top->parent; + unsigned provides = 0; + size_t j, lj = jl_array_len(worklist); + for (j = 0; j < lj; j++) { + jl_module_t *workmod = (jl_module_t*)jl_array_ptr_ref(worklist, j); + if (workmod->parent == jl_main_module || workmod->parent == workmod) { + ++provides; + if (workmod == depmod_top) { + write_int32(s, provides); + write_module_path(s, depmod); + break; + } + } + } + write_int32(s, 0); + } + write_int32(s, 0); // terminator, for ease of reading + + // Calculate Preferences hash for current package. + jl_value_t *prefs_hash = NULL; + jl_value_t *prefs_list = NULL; + JL_GC_PUSH1(&prefs_list); + if (jl_base_module) { + // Toplevel module is the module we're currently compiling, use it to get our preferences hash + jl_value_t * toplevel = (jl_value_t*)jl_get_global(jl_base_module, jl_symbol("__toplevel__")); + jl_value_t * prefs_hash_func = jl_get_global(jl_base_module, jl_symbol("get_preferences_hash")); + jl_value_t * get_compiletime_prefs_func = jl_get_global(jl_base_module, jl_symbol("get_compiletime_preferences")); + + if (toplevel && prefs_hash_func && get_compiletime_prefs_func) { + // Temporary invoke in newest world age + size_t last_age = ct->world_age; + ct->world_age = jl_atomic_load_acquire(&jl_world_counter); + + // call get_compiletime_prefs(__toplevel__) + jl_value_t *args[3] = {get_compiletime_prefs_func, (jl_value_t*)toplevel, NULL}; + prefs_list = (jl_value_t*)jl_apply(args, 2); + + // Call get_preferences_hash(__toplevel__, prefs_list) + args[0] = prefs_hash_func; + args[2] = prefs_list; + prefs_hash = (jl_value_t*)jl_apply(args, 3); + + // Reset world age to normal + ct->world_age = last_age; + } + } + + // If we successfully got the preferences, write it out, otherwise write `0` for this `.ji` file. + if (prefs_hash != NULL && prefs_list != NULL) { + size_t i, l = jl_array_len(prefs_list); + for (i = 0; i < l; i++) { + jl_value_t *pref_name = jl_array_ptr_ref(prefs_list, i); + size_t slen = jl_string_len(pref_name); + write_int32(s, slen); + ios_write(s, jl_string_data(pref_name), slen); + } + write_int32(s, 0); // terminator + write_uint64(s, jl_unbox_uint64(prefs_hash)); + } + else { + // This is an error path, but let's at least generate a valid `.ji` file. + // We declare an empty list of preference names, followed by a zero-hash. + // The zero-hash is not what would be generated for an empty set of preferences, + // and so this `.ji` file will be invalidated by a future non-erroring pass + // through this function. + write_int32(s, 0); + write_uint64(s, 0); + } + JL_GC_POP(); // for prefs_list + + // write a dummy file position to indicate the beginning of the source-text + pos = ios_pos(s); + ios_seek(s, initial_pos); + write_uint64(s, pos - initial_pos); + ios_seek(s, pos); + write_uint64(s, 0); + return pos; +} + + +// Deserialization + +// Add methods to external (non-worklist-owned) functions +static void jl_insert_methods(jl_array_t *list) +{ + size_t i, l = jl_array_len(list); + for (i = 0; i < l; i++) { + jl_method_t *meth = (jl_method_t*)jl_array_ptr_ref(list, i); + assert(jl_is_method(meth)); + assert(!meth->is_for_opaque_closure); + jl_methtable_t *mt = jl_method_get_table(meth); + assert((jl_value_t*)mt != jl_nothing); + jl_method_table_insert(mt, meth, NULL); + } +} + +static void jl_copy_roots(jl_array_t *method_roots_list, uint64_t key) +{ + size_t i, l = jl_array_len(method_roots_list); + for (i = 0; i < l; i+=2) { + jl_method_t *m = (jl_method_t*)jl_array_ptr_ref(method_roots_list, i); + jl_array_t *roots = (jl_array_t*)jl_array_ptr_ref(method_roots_list, i+1); + if (roots) { + assert(jl_is_array(roots)); + jl_append_method_roots(m, key, roots); + } + } +} + +static int remove_code_instance_from_validation(jl_code_instance_t *codeinst) +{ + return ptrhash_remove(&new_code_instance_validate, codeinst); +} + +// verify that these edges intersect with the same methods as before +static jl_array_t *jl_verify_edges(jl_array_t *targets) +{ + size_t world = jl_atomic_load_acquire(&jl_world_counter); + size_t i, l = jl_array_len(targets) / 3; + jl_array_t *valids = jl_alloc_array_1d(jl_array_uint8_type, l); + memset(jl_array_data(valids), 1, l); + jl_value_t *loctag = NULL; + jl_value_t *matches = NULL; + JL_GC_PUSH3(&valids, &matches, &loctag); + for (i = 0; i < l; i++) { + jl_value_t *invokesig = jl_array_ptr_ref(targets, i * 3); + jl_value_t *callee = jl_array_ptr_ref(targets, i * 3 + 1); + jl_value_t *expected = jl_array_ptr_ref(targets, i * 3 + 2); + int valid = 1; + size_t min_valid = 0; + size_t max_valid = ~(size_t)0; + if (invokesig) { + assert(callee && "unsupported edge"); + jl_methtable_t *mt = jl_method_get_table(((jl_method_instance_t*)callee)->def.method); + if ((jl_value_t*)mt == jl_nothing) { + valid = 0; + } + else { + matches = jl_gf_invoke_lookup_worlds(invokesig, (jl_value_t*)mt, world, &min_valid, &max_valid); + if (matches == jl_nothing) { + valid = 0; + } + else { + matches = (jl_value_t*)((jl_method_match_t*)matches)->method; + if (matches != expected) { + valid = 0; + } + } + } + } + else { + jl_value_t *sig; + if (jl_is_method_instance(callee)) + sig = ((jl_method_instance_t*)callee)->specTypes; + else + sig = callee; + assert(jl_is_array(expected)); + int ambig = 0; + // TODO: possibly need to included ambiguities too (for the optimizer correctness)? + matches = jl_matching_methods((jl_tupletype_t*)sig, jl_nothing, + -1, 0, world, &min_valid, &max_valid, &ambig); + if (matches == jl_nothing) { + valid = 0; + } + else { + // setdiff!(matches, expected) + size_t j, k, ins = 0; + if (jl_array_len(matches) != jl_array_len(expected)) { + valid = 0; + } + for (k = 0; k < jl_array_len(matches); k++) { + jl_method_t *match = ((jl_method_match_t*)jl_array_ptr_ref(matches, k))->method; + size_t l = jl_array_len(expected); + for (j = 0; j < l; j++) + if (match == (jl_method_t*)jl_array_ptr_ref(expected, j)) + break; + if (j == l) { + // intersection has a new method or a method was + // deleted--this is now probably no good, just invalidate + // everything about it now + valid = 0; + if (!_jl_debug_method_invalidation) + break; + jl_array_ptr_set(matches, ins++, match); + } + } + if (!valid && _jl_debug_method_invalidation) + jl_array_del_end((jl_array_t*)matches, jl_array_len(matches) - ins); + } + } + jl_array_uint8_set(valids, i, valid); + if (!valid && _jl_debug_method_invalidation) { + jl_array_ptr_1d_push(_jl_debug_method_invalidation, invokesig ? (jl_value_t*)invokesig : callee); + loctag = jl_cstr_to_string("insert_backedges_callee"); + jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag); + loctag = jl_box_int32((int32_t)i); + jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag); + jl_array_ptr_1d_push(_jl_debug_method_invalidation, matches); + } + //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)invokesig); + //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)callee); + //ios_puts(valid ? "valid\n" : "INVALID\n", ios_stderr); + } + JL_GC_POP(); + return valids; +} + +// Combine all edges relevant to a method into the visited table +static void jl_verify_methods(jl_array_t *edges, jl_array_t *valids, htable_t *visited) +{ + jl_value_t *loctag = NULL; + JL_GC_PUSH1(&loctag); + size_t i, l = jl_array_len(edges) / 2; + htable_new(visited, l); + for (i = 0; i < l; i++) { + jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * i); + assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method)); + jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, 2 * i + 1); + assert(jl_typeis((jl_value_t*)callee_ids, jl_array_int32_type)); + int valid = 1; + if (callee_ids == NULL) { + // serializing the edges had failed + valid = 0; + } + else { + int32_t *idxs = (int32_t*)jl_array_data(callee_ids); + size_t j; + for (j = 0; valid && j < idxs[0]; j++) { + int32_t idx = idxs[j + 1]; + valid = jl_array_uint8_ref(valids, idx); + if (!valid && _jl_debug_method_invalidation) { + jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)caller); + loctag = jl_cstr_to_string("verify_methods"); + jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag); + loctag = jl_box_int32((int32_t)idx); + jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag); + } + } + } + ptrhash_put(visited, caller, (void*)(((char*)HT_NOTFOUND) + valid + 1)); + //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)caller); + //ios_puts(valid ? "valid\n" : "INVALID\n", ios_stderr); + // HT_NOTFOUND: valid (no invalid edges) + // HT_NOTFOUND + 1: invalid + // HT_NOTFOUND + 2: need to scan + // HT_NOTFOUND + 3 + depth: in-progress + } + JL_GC_POP(); +} + + +// Propagate the result of cycle-resolution to all edges (recursively) +static int mark_edges_in_worklist(jl_array_t *edges, int idx, jl_method_instance_t *cycle, htable_t *visited, int found) +{ + jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, idx * 2); + int oldfound = (char*)ptrhash_get(visited, caller) - (char*)HT_NOTFOUND; + if (oldfound < 3) + return 0; // not in-progress + if (!found) { + ptrhash_remove(visited, (void*)caller); + } + else { + ptrhash_put(visited, (void*)caller, (void*)((char*)HT_NOTFOUND + 1 + found)); + } + jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, idx * 2 + 1); + assert(jl_typeis((jl_value_t*)callee_ids, jl_array_int32_type)); + int32_t *idxs = (int32_t*)jl_array_data(callee_ids); + size_t i, badidx = 0, n = jl_array_len(callee_ids); + for (i = idxs[0] + 1; i < n; i++) { + if (mark_edges_in_worklist(edges, idxs[i], cycle, visited, found) && badidx == 0) + badidx = i - idxs[0]; + } + if (_jl_debug_method_invalidation) { + jl_value_t *loctag = NULL; + JL_GC_PUSH1(&loctag); + jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)caller); + loctag = jl_cstr_to_string("verify_methods"); + jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag); + jl_method_instance_t *callee = cycle; + if (badidx--) + callee = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * badidx); + jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)callee); + JL_GC_POP(); + } + return 1; +} + + +// Visit the entire call graph, starting from edges[idx] to determine if that method is valid +static int jl_verify_graph_edge(jl_array_t *edges, int idx, htable_t *visited, int depth) +{ + jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, idx * 2); + assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method)); + int found = (char*)ptrhash_get(visited, (void*)caller) - (char*)HT_NOTFOUND; + if (found == 0) + return 1; // valid + if (found == 1) + return 0; // invalid + if (found != 2) + return found - 1; // depth + found = 0; + ptrhash_put(visited, (void*)caller, (void*)((char*)HT_NOTFOUND + 3 + depth)); // change 2 to in-progress at depth + jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, idx * 2 + 1); + assert(jl_typeis((jl_value_t*)callee_ids, jl_array_int32_type)); + int32_t *idxs = (int32_t*)jl_array_data(callee_ids); + int cycle = 0; + size_t i, n = jl_array_len(callee_ids); + for (i = idxs[0] + 1; i < n; i++) { + int32_t idx = idxs[i]; + int child_found = jl_verify_graph_edge(edges, idx, visited, depth + 1); + if (child_found == 0) { + found = 1; + if (_jl_debug_method_invalidation) { + jl_value_t *loctag = NULL; + JL_GC_PUSH1(&loctag); + jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)caller); + loctag = jl_cstr_to_string("verify_methods"); + jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag); + jl_array_ptr_1d_push(_jl_debug_method_invalidation, jl_array_ptr_ref(edges, idx * 2)); + JL_GC_POP(); + } + break; + } + else if (child_found >= 2 && child_found - 2 < cycle) { + // record the cycle will resolve at depth "cycle" + cycle = child_found - 2; + assert(cycle); + } + } + if (!found) { + if (cycle && cycle != depth) + return cycle + 2; + ptrhash_remove(visited, (void*)caller); + } + else { // found invalid + ptrhash_put(visited, (void*)caller, (void*)((char*)HT_NOTFOUND + 1 + found)); + } + if (cycle) { + // If we are the top of the current cycle, now mark all other parts of + // our cycle by re-walking the backedges graph and marking all WIP + // items as found. + // Be careful to only re-walk as far as we had originally scanned above. + // Or if we found a backedge, also mark all of the other parts of the + // cycle as also having an backedge. + n = i; + for (i = idxs[0] + 1; i < n; i++) { + mark_edges_in_worklist(edges, idxs[i], caller, visited, found); + } + } + return found ? 0 : 1; +} + +// Visit all entries in edges, verify if they are valid +static jl_array_t *jl_verify_graph(jl_array_t *edges, htable_t *visited) +{ + size_t i, n = jl_array_len(edges) / 2; + jl_array_t *valids = jl_alloc_array_1d(jl_array_uint8_type, n); + JL_GC_PUSH1(&valids); + int8_t *valids_data = (int8_t*)jl_array_data(valids); + for (i = 0; i < n; i++) { + valids_data[i] = jl_verify_graph_edge(edges, i, visited, 1); + } + JL_GC_POP(); + return valids; +} + +// Restore backedges to external targets +// `edges` = [caller1, targets_indexes1, ...], the list of worklist-owned methods calling external methods. +// `ext_targets` is [invokesig1, callee1, matches1, ...], the global set of non-worklist callees of worklist-owned methods. +static void jl_insert_backedges(jl_array_t *edges, jl_array_t *ext_targets, jl_array_t *ci_list) +{ + // determine which CodeInstance objects are still valid in our image + size_t world = jl_atomic_load_acquire(&jl_world_counter); + jl_array_t *valids = jl_verify_edges(ext_targets); + JL_GC_PUSH1(&valids); + htable_t visited; + htable_new(&visited, 0); + jl_verify_methods(edges, valids, &visited); + valids = jl_verify_graph(edges, &visited); + size_t i, l = jl_array_len(edges) / 2; + + // next build a map from external MethodInstances to their CodeInstance for insertion + if (ci_list == NULL) { + htable_reset(&visited, 0); + } + else { + size_t i, l = jl_array_len(ci_list); + htable_reset(&visited, l); + for (i = 0; i < l; i++) { + jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(ci_list, i); + assert(ptrhash_get(&visited, (void*)ci->def) == HT_NOTFOUND); // check that we don't have multiple cis for same mi + ptrhash_put(&visited, (void*)ci->def, (void*)ci); + } + } + + // next disable any invalid codes, so we do not try to enable them + for (i = 0; i < l; i++) { + jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * i); + assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method)); + int valid = jl_array_uint8_ref(valids, i); + if (valid) + continue; + void *ci = ptrhash_get(&visited, (void*)caller); + if (ci != HT_NOTFOUND) { + assert(jl_is_code_instance(ci)); + remove_code_instance_from_validation((jl_code_instance_t*)ci); // mark it as handled + } + else { + jl_code_instance_t *codeinst = caller->cache; + while (codeinst) { + remove_code_instance_from_validation(codeinst); // should be left invalid + codeinst = jl_atomic_load_relaxed(&codeinst->next); + } + } + } + + // finally enable any applicable new codes + for (i = 0; i < l; i++) { + jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * i); + int valid = jl_array_uint8_ref(valids, i); + if (!valid) + continue; + // if this callee is still valid, add all the backedges + jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, 2 * i + 1); + int32_t *idxs = (int32_t*)jl_array_data(callee_ids); + for (size_t j = 0; j < idxs[0]; j++) { + int32_t idx = idxs[j + 1]; + jl_value_t *invokesig = jl_array_ptr_ref(ext_targets, idx * 3); + jl_value_t *callee = jl_array_ptr_ref(ext_targets, idx * 3 + 1); + if (callee && jl_is_method_instance(callee)) { + jl_method_instance_add_backedge((jl_method_instance_t*)callee, invokesig, caller); + } + else { + jl_value_t *sig = callee == NULL ? invokesig : callee; + jl_methtable_t *mt = jl_method_table_for(sig); + // FIXME: rarely, `callee` has an unexpected `Union` signature, + // see https://github.com/JuliaLang/julia/pull/43990#issuecomment-1030329344 + // Fix the issue and turn this back into an `assert((jl_value_t*)mt != jl_nothing)` + // This workaround exposes us to (rare) 265-violations. + if ((jl_value_t*)mt != jl_nothing) + jl_method_table_add_backedge(mt, sig, (jl_value_t*)caller); + } + } + // then enable it + void *ci = ptrhash_get(&visited, (void*)caller); + if (ci != HT_NOTFOUND) { + // have some new external code to use + assert(jl_is_code_instance(ci)); + jl_code_instance_t *codeinst = (jl_code_instance_t*)ci; + remove_code_instance_from_validation(codeinst); // mark it as handled + assert(codeinst->min_world >= world && codeinst->inferred); + codeinst->max_world = ~(size_t)0; + if (jl_rettype_inferred(caller, world, ~(size_t)0) == jl_nothing) { + jl_mi_cache_insert(caller, codeinst); + } + } + else { + jl_code_instance_t *codeinst = caller->cache; + while (codeinst) { + if (remove_code_instance_from_validation(codeinst)) { // mark it as handled + assert(codeinst->min_world >= world && codeinst->inferred); + codeinst->max_world = ~(size_t)0; + } + codeinst = jl_atomic_load_relaxed(&codeinst->next); + } + } + } + + htable_free(&visited); + JL_GC_POP(); +} + +static void classify_callers(htable_t *callers_with_edges, jl_array_t *edges) +{ + size_t l = edges ? jl_array_len(edges) / 2 : 0; + for (size_t i = 0; i < l; i++) { + jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * i); + ptrhash_put(callers_with_edges, (void*)caller, (void*)caller); + } +} + +static void validate_new_code_instances(void) +{ + size_t world = jl_atomic_load_acquire(&jl_world_counter); + size_t i; + for (i = 0; i < new_code_instance_validate.size; i += 2) { + if (new_code_instance_validate.table[i+1] != HT_NOTFOUND) { + //assert(0 && "unexpected unprocessed CodeInstance found"); + jl_code_instance_t *ci = (jl_code_instance_t*)new_code_instance_validate.table[i]; + JL_GC_PROMISE_ROOTED(ci); // TODO: this needs a root (or restructuring to avoid it) + assert(ci->min_world >= world && ci->inferred); + ci->max_world = ~(size_t)0; + jl_method_instance_t *caller = ci->def; + if (jl_rettype_inferred(caller, world, ~(size_t)0) == jl_nothing) { + jl_mi_cache_insert(caller, ci); + } + //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)caller); + //ios_puts("FREE\n", ios_stderr); + } + } +} + +static jl_value_t *read_verify_mod_list(ios_t *s, jl_array_t *depmods) +{ + if (!jl_main_module->build_id.lo) { + return jl_get_exceptionf(jl_errorexception_type, + "Main module uuid state is invalid for module deserialization."); + } + size_t i, l = jl_array_len(depmods); + for (i = 0; ; i++) { + size_t len = read_int32(s); + if (len == 0 && i == l) + return NULL; // success + if (len == 0 || i == l) + return jl_get_exceptionf(jl_errorexception_type, "Wrong number of entries in module list."); + char *name = (char*)alloca(len + 1); + ios_readall(s, name, len); + name[len] = '\0'; + jl_uuid_t uuid; + uuid.hi = read_uint64(s); + uuid.lo = read_uint64(s); + jl_uuid_t build_id; + build_id.hi = read_uint64(s); + build_id.lo = read_uint64(s); + jl_sym_t *sym = _jl_symbol(name, len); + jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(depmods, i); + if (!m || !jl_is_module(m) || m->uuid.hi != uuid.hi || m->uuid.lo != uuid.lo || m->name != sym || + m->build_id.hi != build_id.hi || m->build_id.lo != build_id.lo) { + return jl_get_exceptionf(jl_errorexception_type, + "Invalid input in module list: expected %s.", name); + } + } +} + +static int readstr_verify(ios_t *s, const char *str, int include_null) +{ + size_t i, len = strlen(str) + include_null; + for (i = 0; i < len; ++i) + if ((char)read_uint8(s) != str[i]) + return 0; + return 1; +} + +JL_DLLEXPORT uint64_t jl_read_verify_header(ios_t *s) +{ + uint16_t bom; + if (readstr_verify(s, JI_MAGIC, 0) && + read_uint16(s) == JI_FORMAT_VERSION && + ios_read(s, (char *) &bom, 2) == 2 && bom == BOM && + read_uint8(s) == sizeof(void*) && + readstr_verify(s, JL_BUILD_UNAME, 1) && + readstr_verify(s, JL_BUILD_ARCH, 1) && + readstr_verify(s, JULIA_VERSION_STRING, 1) && + readstr_verify(s, jl_git_branch(), 1) && + readstr_verify(s, jl_git_commit(), 1)) + return read_uint64(s); + return 0; +} diff --git a/src/subtype.c b/src/subtype.c index 9a5a9fdbbbfd4..cbb11520190cb 100644 --- a/src/subtype.c +++ b/src/subtype.c @@ -1289,8 +1289,10 @@ static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param) return issub; } while (xd != jl_any_type && xd->name != yd->name) { - if (xd->super == NULL) + if (xd->super == NULL) { + assert(xd->parameters && jl_is_typename(xd->name)); jl_errorf("circular type parameter constraint in definition of %s", jl_symbol_name(xd->name->name)); + } xd = xd->super; } if (xd == jl_any_type) return 0; diff --git a/src/support/arraylist.h b/src/support/arraylist.h index 03bfd45f8f525..6ad2f0e2f28c9 100644 --- a/src/support/arraylist.h +++ b/src/support/arraylist.h @@ -25,7 +25,7 @@ void arraylist_free(arraylist_t *a) JL_NOTSAFEPOINT; void arraylist_push(arraylist_t *a, void *elt) JL_NOTSAFEPOINT; void *arraylist_pop(arraylist_t *a) JL_NOTSAFEPOINT; -void arraylist_grow(arraylist_t *a, size_t n) JL_NOTSAFEPOINT; +JL_DLLEXPORT void arraylist_grow(arraylist_t *a, size_t n) JL_NOTSAFEPOINT; typedef struct { uint32_t len; diff --git a/src/support/rle.h b/src/support/rle.h index f85d9f35c4b80..bd2fdafc0f79f 100644 --- a/src/support/rle.h +++ b/src/support/rle.h @@ -10,6 +10,7 @@ extern "C" { #include #include #include +#include "analyzer_annotations.h" /* Run-length encoding (RLE) utilities */ /* In the RLE table, even indexes encode the key (the item classification), odd indexes encode the item index */ @@ -28,8 +29,8 @@ typedef struct _rle_iter_state_t { uint64_t key; // current identifier } rle_iter_state; -rle_iter_state rle_iter_init(/* implicit value of key for indexes prior to first explicit rle pair */ uint64_t key0); -int rle_iter_increment(rle_iter_state *state, /* number of items */ size_t len, uint64_t *rletable, /*length of rletable */ size_t npairs); +rle_iter_state rle_iter_init(/* implicit value of key for indexes prior to first explicit rle pair */ uint64_t key0) JL_NOTSAFEPOINT; +int rle_iter_increment(rle_iter_state *state, /* number of items */ size_t len, uint64_t *rletable, /*length of rletable */ size_t npairs) JL_NOTSAFEPOINT; /* indexing */ typedef struct { @@ -37,8 +38,8 @@ typedef struct { int index; // number of preceding items in the list with the same key } rle_reference; -void rle_index_to_reference(rle_reference *rr, /* item index */ size_t i, uint64_t *rletable, size_t npairs, uint64_t key0); -size_t rle_reference_to_index(rle_reference *rr, uint64_t *rletable, size_t npairs, uint64_t key0); +void rle_index_to_reference(rle_reference *rr, /* item index */ size_t i, uint64_t *rletable, size_t npairs, uint64_t key0) JL_NOTSAFEPOINT; +size_t rle_reference_to_index(rle_reference *rr, uint64_t *rletable, size_t npairs, uint64_t key0) JL_NOTSAFEPOINT; #ifdef __cplusplus diff --git a/src/threading.c b/src/threading.c index e33d22c24581a..dcb57cce23a79 100644 --- a/src/threading.c +++ b/src/threading.c @@ -291,8 +291,10 @@ JL_DLLEXPORT jl_gcframe_t **jl_get_pgcstack(void) JL_GLOBALLY_ROOTED void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k) { +#ifndef __clang_gcanalyzer__ if (jl_get_pgcstack_cb == jl_get_pgcstack_init) jl_get_pgcstack_init(); +#endif // for codegen *f = jl_get_pgcstack_cb; *k = jl_pgcstack_key; diff --git a/stdlib/LLD_jll/src/LLD_jll.jl b/stdlib/LLD_jll/src/LLD_jll.jl index d14d740fc5e5b..80653353a7c17 100644 --- a/stdlib/LLD_jll/src/LLD_jll.jl +++ b/stdlib/LLD_jll/src/LLD_jll.jl @@ -1,4 +1,3 @@ - # This file is a part of Julia. License is MIT: https://julialang.org/license ## dummy stub for https://github.com/JuliaBinaryWrappers/LLD_jll.jl diff --git a/stdlib/Profile/src/Allocs.jl b/stdlib/Profile/src/Allocs.jl index 2bf06550b72d6..1a52c1ec782de 100644 --- a/stdlib/Profile/src/Allocs.jl +++ b/stdlib/Profile/src/Allocs.jl @@ -144,9 +144,13 @@ end const BacktraceCache = Dict{BTElement,Vector{StackFrame}} # copied from julia_internal.h -const JL_BUFF_TAG = UInt(0x4eadc000) +JL_BUFF_TAG::UInt = ccall(:jl_get_buff_tag, UInt, ()) const JL_GC_UNKNOWN_TYPE_TAG = UInt(0xdeadaa03) +function __init__() + global JL_BUFF_TAG = ccall(:jl_get_buff_tag, UInt, ()) +end + struct CorruptType end struct BufferType end struct UnknownType end diff --git a/test/precompile.jl b/test/precompile.jl index 5b49ad4a3b31a..eaf755046d366 100644 --- a/test/precompile.jl +++ b/test/precompile.jl @@ -1,6 +1,7 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license original_depot_path = copy(Base.DEPOT_PATH) +original_load_path = copy(Base.LOAD_PATH) using Test, Distributed, Random @@ -37,7 +38,7 @@ end # method root provenance -rootid(m::Module) = ccall(:jl_module_build_id, UInt64, (Any,), Base.parentmodule(m)) +rootid(m::Module) = Base.module_build_id(Base.parentmodule(m)) % UInt64 rootid(m::Method) = rootid(m.module) function root_provenance(m::Method, i::Int) @@ -344,7 +345,7 @@ precompile_test_harness(false) do dir modules, (deps, requires), required_modules = Base.parse_cache_header(cachefile) discard_module = mod_fl_mt -> (mod_fl_mt.filename, mod_fl_mt.mtime) - @test modules == [ Base.PkgId(Foo) => Base.module_build_id(Foo) ] + @test modules == [ Base.PkgId(Foo) => Base.module_build_id(Foo) % UInt64 ] @test map(x -> x.filename, deps) == [ Foo_file, joinpath(dir, "foo.jl"), joinpath(dir, "bar.jl") ] @test requires == [ Base.PkgId(Foo) => Base.PkgId(string(FooBase_module)), Base.PkgId(Foo) => Base.PkgId(Foo2), @@ -1554,8 +1555,23 @@ precompile_test_harness("issue #46296") do load_path (@eval (using CodeInstancePrecompile)) end -empty!(Base.DEPOT_PATH) -append!(Base.DEPOT_PATH, original_depot_path) +precompile_test_harness("Recursive types") do load_path + write(joinpath(load_path, "RecursiveTypeDef.jl"), + """ + module RecursiveTypeDef + + struct C{T,O} end + struct A{T,N,O} <: AbstractArray{C{T,A{T,N,O}},N} + sz::NTuple{N,Int} + end + + end + """) + Base.compilecache(Base.PkgId("RecursiveTypeDef")) + (@eval (using RecursiveTypeDef)) + a = Base.invokelatest(RecursiveTypeDef.A{Float64,2,String}, (3, 3)) + @test isa(a, AbstractArray) +end @testset "issue 46778" begin f46778(::Any, ::Type{Int}) = 1 @@ -1563,3 +1579,8 @@ append!(Base.DEPOT_PATH, original_depot_path) @test precompile(Tuple{typeof(f46778), Int, DataType}) @test which(f46778, Tuple{Any,DataType}).specializations[1].cache.invoke != C_NULL end + +empty!(Base.DEPOT_PATH) +append!(Base.DEPOT_PATH, original_depot_path) +empty!(Base.LOAD_PATH) +append!(Base.LOAD_PATH, original_load_path) From f8a5cd623c366419891efa331811141cd525ded3 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Sun, 27 Nov 2022 18:26:31 -0500 Subject: [PATCH 34/57] Allow re-initialization and caching of foreign types (#47407) Co-authored-by: Tim Holy Co-authored-by: Max Horn (cherry picked from commit e06a5915a9e93ed5d4c25cf819275af18adf8187) --- src/datatype.c | 16 ++++++ src/jl_exported_funcs.inc | 1 + src/julia_gcext.h | 7 +++ src/staticdata.c | 19 ++++++- test/gcext/.gitignore | 1 + test/gcext/DependsOnForeign/Manifest.toml | 14 +++++ test/gcext/DependsOnForeign/Project.toml | 6 ++ .../DependsOnForeign/src/DependsOnForeign.jl | 14 +++++ test/gcext/Foreign/Manifest.toml | 8 +++ test/gcext/Foreign/Project.toml | 6 ++ test/gcext/Foreign/deps/foreignlib.c | 56 +++++++++++++++++++ test/gcext/Foreign/src/Foreign.jl | 29 ++++++++++ .../ForeignObjSerialization/Manifest.toml | 14 +++++ .../ForeignObjSerialization/Project.toml | 6 ++ .../src/ForeignObjSerialization.jl | 6 ++ test/gcext/Makefile | 24 ++++++-- test/gcext/gcext-test.jl | 33 +++++++++++ 17 files changed, 254 insertions(+), 6 deletions(-) create mode 100644 test/gcext/DependsOnForeign/Manifest.toml create mode 100644 test/gcext/DependsOnForeign/Project.toml create mode 100644 test/gcext/DependsOnForeign/src/DependsOnForeign.jl create mode 100644 test/gcext/Foreign/Manifest.toml create mode 100644 test/gcext/Foreign/Project.toml create mode 100644 test/gcext/Foreign/deps/foreignlib.c create mode 100644 test/gcext/Foreign/src/Foreign.jl create mode 100644 test/gcext/ForeignObjSerialization/Manifest.toml create mode 100644 test/gcext/ForeignObjSerialization/Project.toml create mode 100644 test/gcext/ForeignObjSerialization/src/ForeignObjSerialization.jl diff --git a/src/datatype.c b/src/datatype.c index 24b3c3ab6c1fb..17f5d53e59d23 100644 --- a/src/datatype.c +++ b/src/datatype.c @@ -824,6 +824,22 @@ JL_DLLEXPORT jl_datatype_t * jl_new_foreign_type(jl_sym_t *name, return bt; } +JL_DLLEXPORT int jl_reinit_foreign_type(jl_datatype_t *dt, + jl_markfunc_t markfunc, + jl_sweepfunc_t sweepfunc) +{ + if (!jl_is_foreign_type(dt)) + return 0; + const jl_datatype_layout_t *layout = dt->layout; + jl_fielddescdyn_t * desc = + (jl_fielddescdyn_t *) ((char *)layout + sizeof(*layout)); + assert(!desc->markfunc); + assert(!desc->sweepfunc); + desc->markfunc = markfunc; + desc->sweepfunc = sweepfunc; + return 1; +} + JL_DLLEXPORT int jl_is_foreign_type(jl_datatype_t *dt) { return jl_is_datatype(dt) && dt->layout && dt->layout->fielddesc_type == 3; diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index 67392b106cc66..cc387a2769ac1 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -342,6 +342,7 @@ XX(jl_new_code_info_uninit) \ XX(jl_new_datatype) \ XX(jl_new_foreign_type) \ + XX(jl_reinit_foreign_type) \ XX(jl_new_method_instance_uninit) \ XX(jl_new_method_table) \ XX(jl_new_method_uninit) \ diff --git a/src/julia_gcext.h b/src/julia_gcext.h index 669e80d069fa4..27f0a6b5ec11c 100644 --- a/src/julia_gcext.h +++ b/src/julia_gcext.h @@ -49,6 +49,13 @@ JL_DLLEXPORT jl_datatype_t *jl_new_foreign_type( int haspointers, int large); + +#define HAVE_JL_REINIT_FOREIGN_TYPE 1 +JL_DLLEXPORT int jl_reinit_foreign_type( + jl_datatype_t *dt, + jl_markfunc_t markfunc, + jl_sweepfunc_t sweepfunc); + JL_DLLEXPORT int jl_is_foreign_type(jl_datatype_t *dt); JL_DLLEXPORT size_t jl_gc_max_internal_obj_size(void); diff --git a/src/staticdata.c b/src/staticdata.c index 4457c51fa03f1..f4e0c9b99e1ca 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -76,6 +76,7 @@ External links: #include "julia.h" #include "julia_internal.h" +#include "julia_gcext.h" #include "builtin_proto.h" #include "processor.h" #include "serialize.h" @@ -1248,6 +1249,9 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED ios_write(s->s, (char*)v, sizeof(void*) + jl_string_len(v)); write_uint8(s->s, '\0'); // null-terminated strings for easier C-compatibility } + else if (jl_is_foreign_type(t) == 1) { + jl_error("Cannot serialize instances of foreign datatypes"); + } else if (jl_datatype_nfields(t) == 0) { // The object has no fields, so we just snapshot its byte representation assert(!t->layout->npointers); @@ -1437,10 +1441,14 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED if (dt->layout != NULL) { size_t nf = dt->layout->nfields; size_t np = dt->layout->npointers; - size_t fieldsize = jl_fielddesc_size(dt->layout->fielddesc_type); + size_t fieldsize = 0; + uint8_t is_foreign_type = dt->layout->fielddesc_type == 3; + if (!is_foreign_type) { + fieldsize = jl_fielddesc_size(dt->layout->fielddesc_type); + } char *flddesc = (char*)dt->layout; size_t fldsize = sizeof(jl_datatype_layout_t) + nf * fieldsize; - if (dt->layout->first_ptr != -1) + if (!is_foreign_type && dt->layout->first_ptr != -1) fldsize += np << dt->layout->fielddesc_type; uintptr_t layout = LLT_ALIGN(ios_pos(s->const_data), sizeof(void*)); write_padding(s->const_data, layout - ios_pos(s->const_data)); // realign stream @@ -1449,6 +1457,13 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_datatype_t, layout))); // relocation location arraylist_push(&s->relocs_list, (void*)(((uintptr_t)ConstDataRef << RELOC_TAG_OFFSET) + layout)); // relocation target ios_write(s->const_data, flddesc, fldsize); + if (is_foreign_type) { + // make sure we have space for the extra hidden pointers + // zero them since they will need to be re-initialized externally + assert(fldsize == sizeof(jl_datatype_layout_t)); + jl_fielddescdyn_t dyn = {0, 0}; + ios_write(s->const_data, (char*)&dyn, sizeof(jl_fielddescdyn_t)); + } } } else if (jl_is_typename(v)) { diff --git a/test/gcext/.gitignore b/test/gcext/.gitignore index 0f8c848e5cea6..829c3297dfa2c 100644 --- a/test/gcext/.gitignore +++ b/test/gcext/.gitignore @@ -1,2 +1,3 @@ /gcext /gcext-debug +/Foreign/deps diff --git a/test/gcext/DependsOnForeign/Manifest.toml b/test/gcext/DependsOnForeign/Manifest.toml new file mode 100644 index 0000000000000..d830116bb54ca --- /dev/null +++ b/test/gcext/DependsOnForeign/Manifest.toml @@ -0,0 +1,14 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.8.3" +manifest_format = "2.0" +project_hash = "e7199d961a5f4ebad68a3deaf5beaa7406a0afcb" + +[[deps.Foreign]] +deps = ["Libdl"] +path = "../Foreign" +uuid = "de1f6f7a-d7b3-400f-91c2-33f248ee89c4" +version = "0.1.0" + +[[deps.Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" diff --git a/test/gcext/DependsOnForeign/Project.toml b/test/gcext/DependsOnForeign/Project.toml new file mode 100644 index 0000000000000..b2bee1338c2b7 --- /dev/null +++ b/test/gcext/DependsOnForeign/Project.toml @@ -0,0 +1,6 @@ +name = "DependsOnForeign" +uuid = "4b0716e0-dfb5-4e00-8b44-e2685a41517f" +version = "0.1.0" + +[deps] +Foreign = "de1f6f7a-d7b3-400f-91c2-33f248ee89c4" diff --git a/test/gcext/DependsOnForeign/src/DependsOnForeign.jl b/test/gcext/DependsOnForeign/src/DependsOnForeign.jl new file mode 100644 index 0000000000000..cdf31774956e1 --- /dev/null +++ b/test/gcext/DependsOnForeign/src/DependsOnForeign.jl @@ -0,0 +1,14 @@ +module DependsOnForeign + +using Foreign + +f(obj::FObj) = Base.pointer_from_objref(obj) +precompile(f, (FObj,)) + +const FObjRef = Ref{FObj}() + +function __init__() + FObjRef[] = FObj() +end + +end # module DependsOnForeign diff --git a/test/gcext/Foreign/Manifest.toml b/test/gcext/Foreign/Manifest.toml new file mode 100644 index 0000000000000..25cf111aa50ba --- /dev/null +++ b/test/gcext/Foreign/Manifest.toml @@ -0,0 +1,8 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.9.0-DEV" +manifest_format = "2.0" +project_hash = "7b70172a2edbdc772ed789e79d4411d7528eae86" + +[[deps.Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" diff --git a/test/gcext/Foreign/Project.toml b/test/gcext/Foreign/Project.toml new file mode 100644 index 0000000000000..819f64beee442 --- /dev/null +++ b/test/gcext/Foreign/Project.toml @@ -0,0 +1,6 @@ +name = "Foreign" +uuid = "de1f6f7a-d7b3-400f-91c2-33f248ee89c4" +version = "0.1.0" + +[deps] +Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" diff --git a/test/gcext/Foreign/deps/foreignlib.c b/test/gcext/Foreign/deps/foreignlib.c new file mode 100644 index 0000000000000..72e02e9bef0cf --- /dev/null +++ b/test/gcext/Foreign/deps/foreignlib.c @@ -0,0 +1,56 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +#include "julia.h" +#include "julia_gcext.h" + +// TODO make these atomics +int nmarks = 0; +int nsweeps = 0; + +uintptr_t mark(jl_ptls_t ptls, jl_value_t *p) +{ + nmarks += 1; + return 0; +} + +void sweep(jl_value_t *p) +{ + nsweeps++; +} + +JL_DLLEXPORT jl_datatype_t *declare_foreign(jl_sym_t* name, jl_module_t *module, jl_datatype_t *parent) +{ + return jl_new_foreign_type(name, module, parent, mark, sweep, 1, 0); +} + +// #define GC_MAX_SZCLASS (2032 - sizeof(void *)) + +JL_DLLEXPORT int reinit_foreign(jl_datatype_t *dt) +{ + int ret = jl_reinit_foreign_type(dt, mark, sweep); + nmarks = nsweeps = 0; + if (ret == 0) + return 0; + if (dt->layout->npointers != 1) + return -1; + if (dt->layout->size != 0) + return -2; + return ret; +} + +JL_DLLEXPORT jl_value_t *allocate_foreign(jl_ptls_t ptls, size_t sz, jl_datatype_t *dt) +{ + jl_value_t* obj = jl_gc_alloc_typed(ptls, sz, dt); + jl_gc_schedule_foreign_sweepfunc(ptls, obj); + return obj; +} + +JL_DLLEXPORT int nmark_counter() +{ + return nmarks; +} + +JL_DLLEXPORT int nsweep_counter() +{ + return nsweeps; +} diff --git a/test/gcext/Foreign/src/Foreign.jl b/test/gcext/Foreign/src/Foreign.jl new file mode 100644 index 0000000000000..a1ab79fab586a --- /dev/null +++ b/test/gcext/Foreign/src/Foreign.jl @@ -0,0 +1,29 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +module Foreign + +using Libdl + +const foreignlib = joinpath(ENV["BINDIR"], "foreignlib.$(dlext)") + +const FObj = ccall((:declare_foreign, foreignlib), Any, (Any, Any, Any), :FObj, @__MODULE__, Any) +FObj() = ccall((:allocate_foreign, foreignlib), Any, (Ptr{Cvoid}, Csize_t, Any,), Core.getptls(), sizeof(Ptr{Cvoid}), FObj)::FObj + +export FObj + +get_nmark() = ccall((:nmark_counter, foreignlib), Cint, ()) +get_nsweep() = ccall((:nsweep_counter, foreignlib), Cint, ()) + +function __init__() + @assert ccall((:reinit_foreign, foreignlib), Cint, (Any,), FObj) == 1 +end + +allocs(N) = [Foreign.FObj() for _ in 1:N] + +function test(N) + x = allocs(N) + Core.donotdelete(x) + x = nothing +end + +end # module Foreign diff --git a/test/gcext/ForeignObjSerialization/Manifest.toml b/test/gcext/ForeignObjSerialization/Manifest.toml new file mode 100644 index 0000000000000..d830116bb54ca --- /dev/null +++ b/test/gcext/ForeignObjSerialization/Manifest.toml @@ -0,0 +1,14 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.8.3" +manifest_format = "2.0" +project_hash = "e7199d961a5f4ebad68a3deaf5beaa7406a0afcb" + +[[deps.Foreign]] +deps = ["Libdl"] +path = "../Foreign" +uuid = "de1f6f7a-d7b3-400f-91c2-33f248ee89c4" +version = "0.1.0" + +[[deps.Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" diff --git a/test/gcext/ForeignObjSerialization/Project.toml b/test/gcext/ForeignObjSerialization/Project.toml new file mode 100644 index 0000000000000..1a26ff7884481 --- /dev/null +++ b/test/gcext/ForeignObjSerialization/Project.toml @@ -0,0 +1,6 @@ +name = "ForeignObjSerialization" +uuid = "2c015d96-a6ca-42f0-bc68-f9090de6bc2c" +version = "0.1.0" + +[deps] +Foreign = "de1f6f7a-d7b3-400f-91c2-33f248ee89c4" diff --git a/test/gcext/ForeignObjSerialization/src/ForeignObjSerialization.jl b/test/gcext/ForeignObjSerialization/src/ForeignObjSerialization.jl new file mode 100644 index 0000000000000..e32753aecb3b4 --- /dev/null +++ b/test/gcext/ForeignObjSerialization/src/ForeignObjSerialization.jl @@ -0,0 +1,6 @@ +module ForeignObjSerialization + +using Foreign +const FObjRef = Ref{FObj}(FObj()) + +end # module ForeignObjSerialization diff --git a/test/gcext/Makefile b/test/gcext/Makefile index b3314d1f9b32b..2a77b76ede50d 100644 --- a/test/gcext/Makefile +++ b/test/gcext/Makefile @@ -19,18 +19,26 @@ SRCDIR := $(abspath $(dir $(lastword $(MAKEFILE_LIST)))) # get the executable suffix, if any EXE := $(suffix $(abspath $(JULIA))) +OS := $(shell uname) +ifeq ($(OS), Darwin) + DYLIB := .dylib +else + DYLIB := .so +endif + # get compiler and linker flags. (see: `contrib/julia-config.jl`) JULIA_CONFIG := $(JULIA) -e 'include(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "julia-config.jl"))' -- CPPFLAGS_ADD := CFLAGS_ADD = $(shell $(JULIA_CONFIG) --cflags) LDFLAGS_ADD = -lm $(shell $(JULIA_CONFIG) --ldflags --ldlibs) +DYLIBFLAGS := --shared -fPIC DEBUGFLAGS += -g #============================================================================= -release: $(BIN)/gcext$(EXE) -debug: $(BIN)/gcext-debug$(EXE) +release: $(BIN)/gcext$(EXE) $(BIN)/Foreign/deps/foreignlib$(DYLIB) +debug: $(BIN)/gcext-debug$(EXE) $(BIN)/Foreign/deps/foreignlib-debug$(DYLIB) $(BIN)/gcext$(EXE): $(SRCDIR)/gcext.c $(CC) $^ -o $@ $(CPPFLAGS_ADD) $(CPPFLAGS) $(CFLAGS_ADD) $(CFLAGS) $(LDFLAGS_ADD) $(LDFLAGS) @@ -38,6 +46,12 @@ $(BIN)/gcext$(EXE): $(SRCDIR)/gcext.c $(BIN)/gcext-debug$(EXE): $(SRCDIR)/gcext.c $(CC) $^ -o $@ $(CPPFLAGS_ADD) $(CPPFLAGS) $(CFLAGS_ADD) $(CFLAGS) $(LDFLAGS_ADD) $(LDFLAGS) $(DEBUGFLAGS) +$(BIN)/foreignlib$(DYLIB): $(SRCDIR)/Foreign/deps/foreignlib.c + $(CC) $^ -o $@ $(DYLIBFLAGS) $(CPPFLAGS_ADD) $(CPPFLAGS) $(CFLAGS_ADD) $(CFLAGS) $(LDFLAGS_ADD) $(LDFLAGS) + +$(BIN)/foreignlib-debug$(DYLIB): $(SRCDIR)/Foreign/deps/foreignlib.c + $(CC) $^ -o $@ $(DYLIBFLAGS) $(CPPFLAGS_ADD) $(CPPFLAGS) $(CFLAGS_ADD) $(CFLAGS) $(LDFLAGS_ADD) $(LDFLAGS) $(DEBUGFLAGS) + ifneq ($(abspath $(BIN)),$(abspath $(SRCDIR))) # for demonstration purposes, our demo code is also installed # in $BIN, although this would likely not be typical @@ -45,12 +59,14 @@ $(BIN)/LocalTest.jl: $(SRCDIR)/LocalTest.jl cp $< $@ endif -check: $(BIN)/gcext$(EXE) $(BIN)/LocalTest.jl - $(JULIA) --depwarn=error $(SRCDIR)/gcext-test.jl $< +check: $(BIN)/gcext$(EXE) $(BIN)/LocalTest.jl $(BIN)/foreignlib$(DYLIB) + BINDIR=$(BIN) $(JULIA) --depwarn=error $(SRCDIR)/gcext-test.jl $< @echo SUCCESS clean: -rm -f $(BIN)/gcext-debug$(EXE) $(BIN)/gcext$(EXE) + -rm -f $(BIN)/foreignlib$(DYLIB) + -rm -f $(BIN)/foreignlib-debug$(DYLIB) .PHONY: release debug clean check diff --git a/test/gcext/gcext-test.jl b/test/gcext/gcext-test.jl index 0dc9bbadd92b5..81637392e3c5d 100644 --- a/test/gcext/gcext-test.jl +++ b/test/gcext/gcext-test.jl @@ -2,6 +2,7 @@ # tests the output of the embedding example is correct using Test +using Pkg if Sys.iswindows() # libjulia needs to be in the same directory as the embedding executable or in path @@ -43,3 +44,35 @@ end @test checknum(lines[5], r"([0-9]+) corrupted auxiliary roots", n -> n == 0) end + +@testset "Package with foreign type" begin + load_path = copy(LOAD_PATH) + push!(LOAD_PATH, joinpath(@__DIR__, "Foreign")) + push!(LOAD_PATH, joinpath(@__DIR__, "DependsOnForeign")) + try + # Force recaching + Base.compilecache(Base.identify_package("Foreign")) + Base.compilecache(Base.identify_package("DependsOnForeign")) + + push!(LOAD_PATH, joinpath(@__DIR__, "ForeignObjSerialization")) + @test_throws ErrorException Base.compilecache(Base.identify_package("ForeignObjSerialization"), Base.DevNull()) + pop!(LOAD_PATH) + + (@eval (using Foreign)) + @test Base.invokelatest(Foreign.get_nmark) == 0 + @test Base.invokelatest(Foreign.get_nsweep) == 0 + + obj = Base.invokelatest(Foreign.FObj) + GC.@preserve obj begin + GC.gc(true) + end + @test Base.invokelatest(Foreign.get_nmark) > 0 + @time Base.invokelatest(Foreign.test, 10) + GC.gc(true) + @test Base.invokelatest(Foreign.get_nsweep) > 0 + (@eval (using DependsOnForeign)) + Base.invokelatest(DependsOnForeign.f, obj) + finally + copy!(LOAD_PATH, load_path) + end +end From 12a4863b7a51f4273844ba11ad0b91d308c97674 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Sat, 10 Dec 2022 17:24:55 +0100 Subject: [PATCH 35/57] Fix physical_memory exports. (#47859) (cherry picked from commit 5a6c80828698d58405ca38b27628d426665324b5) --- base/sysinfo.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/sysinfo.jl b/base/sysinfo.jl index be11d5fb1cc98..b885d88a5f3cb 100644 --- a/base/sysinfo.jl +++ b/base/sysinfo.jl @@ -20,8 +20,8 @@ export BINDIR, loadavg, free_memory, total_memory, - physical_free_memory, - physical_total_memory, + free_physical_memory, + total_physical_memory, isapple, isbsd, isdragonfly, From 2866e2665fb1d5f4e8a474419c8eefeced26d4a8 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Sun, 11 Dec 2022 15:34:57 -0500 Subject: [PATCH 36/57] Fix missing GC root in Symbol construction (#47865) The `Symbol` constructor in boot.jl was not using the unsafe_convert mechanism, becuase it is unavailable at this point in bootstrap. However, it was also not GC-rooting the string some other way, resulting in potential memory corruption. Fix that by manually inlining the :foreigncall and setting up the root appropriately. (cherry picked from commit b5a6b0f1acfe980cab4ab933c7f25d0d3a8fcb96) --- base/boot.jl | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/base/boot.jl b/base/boot.jl index 80ef23cd0fd78..3cd03b5398223 100644 --- a/base/boot.jl +++ b/base/boot.jl @@ -503,16 +503,19 @@ Array{T}(A::AbstractArray{S,N}) where {T,N,S} = Array{T,N}(A) AbstractArray{T}(A::AbstractArray{S,N}) where {T,S,N} = AbstractArray{T,N}(A) # primitive Symbol constructors + +## Helper for proper GC rooting without unsafe_convert +eval(Core, quote + _Symbol(ptr::Ptr{UInt8}, sz::Int, root::Any) = $(Expr(:foreigncall, QuoteNode(:jl_symbol_n), + Ref{Symbol}, svec(Ptr{UInt8}, Int), 0, QuoteNode(:ccall), :ptr, :sz, :root)) +end) + function Symbol(s::String) @_foldable_meta - return ccall(:jl_symbol_n, Ref{Symbol}, (Ptr{UInt8}, Int), - ccall(:jl_string_ptr, Ptr{UInt8}, (Any,), s), - sizeof(s)) + return _Symbol(ccall(:jl_string_ptr, Ptr{UInt8}, (Any,), s), sizeof(s), s) end function Symbol(a::Array{UInt8,1}) - return ccall(:jl_symbol_n, Ref{Symbol}, (Ptr{UInt8}, Int), - ccall(:jl_array_ptr, Ptr{UInt8}, (Any,), a), - Intrinsics.arraylen(a)) + return _Symbol(ccall(:jl_array_ptr, Ptr{UInt8}, (Any,), a), Intrinsics.arraylen(a), a) end Symbol(s::Symbol) = s From 0b845b1eac5af5aa00694d1c6eed93bae37a28b3 Mon Sep 17 00:00:00 2001 From: Fons van der Plas Date: Mon, 12 Dec 2022 21:35:55 +0100 Subject: [PATCH 37/57] TOML: print: handle mixed vector of dicts and non-dicts (#47876) (cherry picked from commit 4ff62883130802a44a5b4b3aea85c2aa0d6f98cf) --- stdlib/TOML/src/print.jl | 5 ++++- stdlib/TOML/test/print.jl | 16 ++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/stdlib/TOML/src/print.jl b/stdlib/TOML/src/print.jl index c9709cd7e4283..74efdfc97a05d 100644 --- a/stdlib/TOML/src/print.jl +++ b/stdlib/TOML/src/print.jl @@ -122,7 +122,10 @@ end is_table(value) = isa(value, AbstractDict) is_array_of_tables(value) = isa(value, AbstractArray) && - length(value) > 0 && isa(value[1], AbstractDict) + length(value) > 0 && ( + isa(value, AbstractArray{<:AbstractDict}) || + all(v -> isa(v, AbstractDict), value) + ) is_tabular(value) = is_table(value) || is_array_of_tables(value) function print_table(f::MbyFunc, io::IO, a::AbstractDict, diff --git a/stdlib/TOML/test/print.jl b/stdlib/TOML/test/print.jl index 9479a14ca8796..bbfce3b7d7474 100644 --- a/stdlib/TOML/test/print.jl +++ b/stdlib/TOML/test/print.jl @@ -80,6 +80,22 @@ loaders = ["gzip", { driver = "csv", args = {delim = "\t"}}] @test roundtrip(str) +@testset "vec with dicts and non-dicts" begin + # https://github.com/JuliaLang/julia/issues/45340 + d = Dict("b" => Any[111, Dict("a" => 222, "d" => 333)]) + @test toml_str(d) == "b = [111, {a = 222, d = 333}]\n" + + d = Dict("b" => Any[Dict("a" => 222, "d" => 333), 111]) + @test toml_str(d) == "b = [{a = 222, d = 333}, 111]\n" + + d = Dict("b" => Any[Dict("a" => 222, "d" => 333)]) + @test toml_str(d) == """ + [[b]] + a = 222 + d = 333 + """ +end + struct Foo a::Int64 b::Float64 From 3d806536b475e7717f78c47c86a309ba723fa32a Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Tue, 13 Dec 2022 19:38:56 +0700 Subject: [PATCH 38/57] Fixups for #47383 (fixes `runbenchmarks("sort")`) (#47822) * add test demonstrating overflow in countsort * fix overflow in countsort * remove unnecessary type annotations (fixes tests) This fixes the test failure because it allows for automatic conversion. The manual for implementing the AbstractArray interface also does not recomend a type signature for the value arg in setindex!. Co-authored-by: Lilith Hafner (cherry picked from commit 965bc7d89e9f54b92a046a8488994acc41f376c4) --- base/sort.jl | 8 ++++---- test/sorting.jl | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index 932da36b9e1d6..2dd81829312d0 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -509,12 +509,12 @@ struct WithoutMissingVector{T, U} <: AbstractVector{T} new{nonmissingtype(eltype(data)), typeof(data)}(data) end end -Base.@propagate_inbounds function Base.getindex(v::WithoutMissingVector, i::Integer) +Base.@propagate_inbounds function Base.getindex(v::WithoutMissingVector, i) out = v.data[i] @assert !(out isa Missing) out::eltype(v) end -Base.@propagate_inbounds function Base.setindex!(v::WithoutMissingVector{T}, x::T, i) where T +Base.@propagate_inbounds function Base.setindex!(v::WithoutMissingVector, x, i) v.data[i] = x v end @@ -830,7 +830,7 @@ maybe_reverse(o::ForwardOrdering, x) = x maybe_reverse(o::ReverseOrdering, x) = reverse(x) function _sort!(v::AbstractVector{<:Integer}, ::CountingSort, o::DirectOrdering, kw) @getkw lo hi mn mx scratch - range = o === Reverse ? mn-mx : mx-mn + range = maybe_unsigned(o === Reverse ? mn-mx : mx-mn) offs = 1 - (o === Reverse ? mx : mn) counts = fill(0, range+1) # TODO use scratch (but be aware of type stability) @@ -843,7 +843,7 @@ function _sort!(v::AbstractVector{<:Integer}, ::CountingSort, o::DirectOrdering, lastidx = idx + counts[i] - 1 val = i-offs for j = idx:lastidx - v[j] = val + v[j] = val isa Unsigned && eltype(v) <: Signed ? signed(val) : val end idx = lastidx + 1 end diff --git a/test/sorting.jl b/test/sorting.jl index 37bad7d23c94b..614946a8cc4f6 100644 --- a/test/sorting.jl +++ b/test/sorting.jl @@ -765,6 +765,7 @@ end @testset "Unions with missing" begin @test issorted(sort(shuffle!(vcat(fill(missing, 10), rand(Int, 100))))) + @test issorted(sort(vcat(rand(Int8, 600), [missing]))) end @testset "Specific algorithms" begin @@ -897,6 +898,7 @@ end @testset "Count sort near the edge of its range" begin @test issorted(sort(rand(typemin(Int):typemin(Int)+100, 1000))) @test issorted(sort(rand(typemax(Int)-100:typemax(Int), 1000))) + @test issorted(sort(rand(Int8, 600))) end # This testset is at the end of the file because it is slow. From a548ee354aa24eaa902f9a0bb4350af45d26e271 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Wed, 30 Nov 2022 16:13:50 -0500 Subject: [PATCH 39/57] fixes for jl_rewrap_unionall This behaved a bit differently than Base.rewrap_unionall, which meant it might make types like `Any where T` from `supertype(struct A{T} <: Any)`. This can confuse subtyping, which does not expect other types to appear to be wider than Any. (cherry picked from commit c0d9367d049c2674c97f147e3bb2d69f00ce1e81) --- src/gf.c | 2 +- src/jltypes.c | 26 +++++++++++++++++++++++++- src/julia_internal.h | 1 + src/subtype.c | 6 +++--- 4 files changed, 30 insertions(+), 5 deletions(-) diff --git a/src/gf.c b/src/gf.c index d9bb6994e8ea7..a50c6998b90a2 100644 --- a/src/gf.c +++ b/src/gf.c @@ -2467,7 +2467,7 @@ static jl_value_t *jl_argtype_with_function(jl_function_t *f, jl_value_t *types0 for(i=0; i < l; i++) jl_svecset(tt, i+1, jl_tparam(types,i)); tt = (jl_value_t*)jl_apply_tuple_type((jl_svec_t*)tt); - tt = jl_rewrap_unionall(tt, types0); + tt = jl_rewrap_unionall_(tt, types0); JL_GC_POP(); return tt; } diff --git a/src/jltypes.c b/src/jltypes.c index 4203caf92e4cd..4ede9467be043 100644 --- a/src/jltypes.c +++ b/src/jltypes.c @@ -1093,12 +1093,36 @@ jl_value_t *jl_unwrap_unionall(jl_value_t *v) } // wrap `t` in the same unionalls that surround `u` +// where `t` is derived from `u`, so the error checks in jl_type_unionall are unnecessary jl_value_t *jl_rewrap_unionall(jl_value_t *t, jl_value_t *u) { if (!jl_is_unionall(u)) return t; - JL_GC_PUSH1(&t); t = jl_rewrap_unionall(t, ((jl_unionall_t*)u)->body); + jl_tvar_t *v = ((jl_unionall_t*)u)->var; + // normalize `T where T<:S` => S + if (t == (jl_value_t*)v) + return v->ub; + // where var doesn't occur in body just return body + if (!jl_has_typevar(t, v)) + return t; + JL_GC_PUSH1(&t); + //if (v->lb == v->ub) // TODO maybe + // t = jl_substitute_var(body, v, v->ub); + //else + t = jl_new_struct(jl_unionall_type, v, t); + JL_GC_POP(); + return t; +} + +// wrap `t` in the same unionalls that surround `u` +// where `t` is extended from `u`, so the checks in jl_rewrap_unionall are unnecessary +jl_value_t *jl_rewrap_unionall_(jl_value_t *t, jl_value_t *u) +{ + if (!jl_is_unionall(u)) + return t; + t = jl_rewrap_unionall_(t, ((jl_unionall_t*)u)->body); + JL_GC_PUSH1(&t); t = jl_new_struct(jl_unionall_type, ((jl_unionall_t*)u)->var, t); JL_GC_POP(); return t; diff --git a/src/julia_internal.h b/src/julia_internal.h index 9410cdc300cad..54daf076a1030 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -701,6 +701,7 @@ JL_DLLEXPORT jl_value_t *jl_instantiate_type_in_env(jl_value_t *ty, jl_unionall_ jl_value_t *jl_substitute_var(jl_value_t *t, jl_tvar_t *var, jl_value_t *val); JL_DLLEXPORT jl_value_t *jl_unwrap_unionall(jl_value_t *v JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT; JL_DLLEXPORT jl_value_t *jl_rewrap_unionall(jl_value_t *t, jl_value_t *u); +JL_DLLEXPORT jl_value_t *jl_rewrap_unionall_(jl_value_t *t, jl_value_t *u); int jl_count_union_components(jl_value_t *v); JL_DLLEXPORT jl_value_t *jl_nth_union_component(jl_value_t *v JL_PROPAGATES_ROOT, int i) JL_NOTSAFEPOINT; int jl_find_union_component(jl_value_t *haystack, jl_value_t *needle, unsigned *nth) JL_NOTSAFEPOINT; diff --git a/src/subtype.c b/src/subtype.c index cbb11520190cb..1d9d3d875675d 100644 --- a/src/subtype.c +++ b/src/subtype.c @@ -2890,8 +2890,8 @@ static jl_value_t *intersect_sub_datatype(jl_datatype_t *xd, jl_datatype_t *yd, jl_value_t *super_pattern=NULL; JL_GC_PUSH2(&isuper, &super_pattern); jl_value_t *wrapper = xd->name->wrapper; - super_pattern = jl_rewrap_unionall((jl_value_t*)((jl_datatype_t*)jl_unwrap_unionall(wrapper))->super, - wrapper); + super_pattern = jl_rewrap_unionall_((jl_value_t*)((jl_datatype_t*)jl_unwrap_unionall(wrapper))->super, + wrapper); int envsz = jl_subtype_env_size(super_pattern); jl_value_t *ii = jl_bottom_type; { @@ -3528,7 +3528,7 @@ jl_value_t *jl_type_intersection_env_s(jl_value_t *a, jl_value_t *b, jl_svec_t * if (jl_is_uniontype(ans_unwrapped)) { ans_unwrapped = switch_union_tuple(((jl_uniontype_t*)ans_unwrapped)->a, ((jl_uniontype_t*)ans_unwrapped)->b); if (ans_unwrapped != NULL) { - *ans = jl_rewrap_unionall(ans_unwrapped, *ans); + *ans = jl_rewrap_unionall_(ans_unwrapped, *ans); } } JL_GC_POP(); From 31df7c8f7c1de37eaf7e432bc607906d6015e250 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Sat, 19 Nov 2022 17:03:16 -0500 Subject: [PATCH 40/57] call specialized method instance when encountering unspecialized sparams In some instances, the preferred compilation signature will require sparams to be provided at runtime. When we build the cache around these, we need to make sure the method instance we are calling has those values computed for the current signature, and not use the widened signature. But we can still compile for the widened signature, we just need to make sure we create a cache entry for every narrower call signature. Fix #47476 (cherry picked from commit 16d3b9205b3223a9c843f49e6c03e190c52726f5) --- src/gf.c | 85 ++++++++++++++++++++++++++++++++++++++--------- src/jitlayers.cpp | 2 +- test/core.jl | 12 +++++++ 3 files changed, 82 insertions(+), 17 deletions(-) diff --git a/src/gf.c b/src/gf.c index a50c6998b90a2..bcd509f9e9955 100644 --- a/src/gf.c +++ b/src/gf.c @@ -826,15 +826,6 @@ static void jl_compilation_sig( jl_svecset(limited, i, lastdeclt); } *newparams = limited; - // now there is a problem: the widened signature is more - // general than just the given arguments, so it might conflict - // with another definition that doesn't have cache instances yet. - // to fix this, we insert guard cache entries for all intersections - // of this signature and definitions. those guard entries will - // supersede this one in conflicted cases, alerting us that there - // should actually be a cache miss. - // TODO: the above analysis assumes that there will never - // be a call attempted that should throw a no-method error JL_GC_POP(); } } @@ -1078,18 +1069,35 @@ static jl_method_instance_t *cache_method( jl_svec_t *newparams = NULL; JL_GC_PUSH5(&temp, &temp2, &temp3, &newmeth, &newparams); + // Consider if we can cache with the preferred compile signature + // so that we can minimize the number of required cache entries. int cache_with_orig = 1; jl_tupletype_t *compilationsig = tt; jl_methtable_t *kwmt = mt == jl_kwcall_mt ? jl_kwmethod_table_for(definition->sig) : mt; intptr_t nspec = (kwmt == NULL || kwmt == jl_type_type_mt || kwmt == jl_nonfunction_mt || kwmt == jl_kwcall_mt ? definition->nargs + 1 : kwmt->max_args + 2 + 2 * (mt == jl_kwcall_mt)); jl_compilation_sig(tt, sparams, definition, nspec, &newparams); if (newparams) { - compilationsig = jl_apply_tuple_type(newparams); - temp2 = (jl_value_t*)compilationsig; - // In most cases `!jl_isa_compileable_sig(tt, definition))`, + temp2 = (jl_value_t*)jl_apply_tuple_type(newparams); + // Now there may be a problem: the widened signature is more general + // than just the given arguments, so it might conflict with another + // definition that does not have cache instances yet. To fix this, we + // may insert guard cache entries for all intersections of this + // signature and definitions. Those guard entries will supersede this + // one in conflicted cases, alerting us that there should actually be a + // cache miss. Alternatively, we may use the original signature in the + // cache, but use this return for compilation. + // + // In most cases `!jl_isa_compileable_sig(tt, definition)`, // although for some cases, (notably Varargs) // we might choose a replacement type that's preferable but not strictly better - cache_with_orig = !jl_subtype((jl_value_t*)compilationsig, definition->sig); + int issubty; + temp = jl_type_intersection_env_s(temp2, (jl_value_t*)definition->sig, &newparams, &issubty); + assert(temp != (jl_value_t*)jl_bottom_type); (void)temp; + if (jl_egal((jl_value_t*)newparams, (jl_value_t*)sparams)) { + cache_with_orig = !issubty; + compilationsig = (jl_datatype_t*)temp2; + } + newparams = NULL; } // TODO: maybe assert(jl_isa_compileable_sig(compilationsig, definition)); newmeth = jl_specializations_get_linfo(definition, (jl_value_t*)compilationsig, sparams); @@ -1110,6 +1118,8 @@ static jl_method_instance_t *cache_method( size_t i, l = jl_array_len(temp); for (i = 0; i < l; i++) { jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(temp, i); + if (matc->method == definition) + continue; jl_svec_t *env = matc->sparams; int k, l; for (k = 0, l = jl_svec_len(env); k < l; k++) { @@ -1128,9 +1138,7 @@ static jl_method_instance_t *cache_method( cache_with_orig = 1; break; } - if (matc->method != definition) { - guards++; - } + guards++; } } if (!cache_with_orig && guards > 0) { @@ -2095,11 +2103,35 @@ static void record_precompile_statement(jl_method_instance_t *mi) JL_UNLOCK(&precomp_statement_out_lock); } +jl_method_instance_t *jl_normalize_to_compilable_mi(jl_method_instance_t *mi JL_PROPAGATES_ROOT); + jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t world) { + // quick check if we already have a compiled result jl_code_instance_t *codeinst = jl_method_compiled(mi, world); if (codeinst) return codeinst; + + // if mi has a better (wider) signature for compilation use that instead + // and just copy it here for caching + jl_method_instance_t *mi2 = jl_normalize_to_compilable_mi(mi); + if (mi2 != mi) { + jl_code_instance_t *codeinst2 = jl_compile_method_internal(mi2, world); + jl_code_instance_t *codeinst = jl_get_method_inferred( + mi, codeinst2->rettype, + codeinst2->min_world, codeinst2->max_world); + if (jl_atomic_load_relaxed(&codeinst->invoke) == NULL) { + // once set, don't change invoke-ptr, as that leads to race conditions + // with the (not) simultaneous updates to invoke and specptr + codeinst->isspecsig = codeinst2->isspecsig; + codeinst->rettype_const = codeinst2->rettype_const; + jl_atomic_store_release(&codeinst->specptr.fptr, jl_atomic_load_relaxed(&codeinst2->specptr.fptr)); + jl_atomic_store_release(&codeinst->invoke, jl_atomic_load_relaxed(&codeinst2->invoke)); + } + // don't call record_precompile_statement here, since we already compiled it as mi2 which is better + return codeinst; + } + int compile_option = jl_options.compile_enabled; jl_method_t *def = mi->def.method; // disabling compilation per-module can override global setting @@ -2134,6 +2166,7 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t } } } + // if that didn't work and compilation is off, try running in the interpreter if (compile_option == JL_OPTIONS_COMPILE_OFF || compile_option == JL_OPTIONS_COMPILE_MIN) { @@ -2254,6 +2287,26 @@ JL_DLLEXPORT jl_value_t *jl_normalize_to_compilable_sig(jl_methtable_t *mt, jl_t return is_compileable ? (jl_value_t*)tt : jl_nothing; } +jl_method_instance_t *jl_normalize_to_compilable_mi(jl_method_instance_t *mi JL_PROPAGATES_ROOT) +{ + jl_method_t *def = mi->def.method; + if (!jl_is_method(def)) + return mi; + jl_methtable_t *mt = jl_method_get_table(def); + if ((jl_value_t*)mt == jl_nothing) + return mi; + jl_value_t *compilationsig = jl_normalize_to_compilable_sig(mt, (jl_datatype_t*)mi->specTypes, mi->sparam_vals, def); + if (compilationsig == jl_nothing || jl_egal(compilationsig, mi->specTypes)) + return mi; + jl_svec_t *env = NULL; + JL_GC_PUSH2(&compilationsig, &env); + jl_value_t *ti = jl_type_intersection_env((jl_value_t*)mi->specTypes, (jl_value_t*)def->sig, &env); + assert(ti != jl_bottom_type); (void)ti; + mi = jl_specializations_get_linfo(def, (jl_value_t*)compilationsig, env); + JL_GC_POP(); + return mi; +} + // return a MethodInstance for a compileable method_match jl_method_instance_t *jl_method_match_to_mi(jl_method_match_t *match, size_t world, size_t min_valid, size_t max_valid, int mt_cache) { diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index b6a30d3380b27..21147d02f4997 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -267,7 +267,7 @@ static jl_callptr_t _jl_compile_codeinst( // hack to export this pointer value to jl_dump_method_disasm jl_atomic_store_release(&this_code->specptr.fptr, (void*)getAddressForFunction(decls.specFunctionObject)); } - if (this_code== codeinst) + if (this_code == codeinst) fptr = addr; } diff --git a/test/core.jl b/test/core.jl index 801058a0b87eb..a40bbc1440c67 100644 --- a/test/core.jl +++ b/test/core.jl @@ -7873,3 +7873,15 @@ let # https://github.com/JuliaLang/julia/issues/46918 @test isempty(String(take!(stderr))) # make sure no error has happened @test String(take!(stdout)) == "nothing IO IO" end + +# issue #47476 +f47476(::Union{Int, NTuple{N,Int}}...) where {N} = N +# force it to populate the MethodInstance specializations cache +# with the correct sparams +code_typed(f47476, (Vararg{Union{Int, NTuple{2,Int}}},)); +code_typed(f47476, (Int, Vararg{Union{Int, NTuple{2,Int}}},)); +code_typed(f47476, (Int, Int, Vararg{Union{Int, NTuple{2,Int}}},)) +code_typed(f47476, (Int, Int, Int, Vararg{Union{Int, NTuple{2,Int}}},)) +code_typed(f47476, (Int, Int, Int, Int, Vararg{Union{Int, NTuple{2,Int}}},)) +@test f47476(1, 2, 3, 4, 5, 6, (7, 8)) === 2 +@test_throws UndefVarError(:N) f47476(1, 2, 3, 4, 5, 6, 7) From 9827f1db2203c01b424215c295b97efd2dc4f908 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Mon, 21 Nov 2022 14:45:08 -0500 Subject: [PATCH 41/57] ensure sparams are cached correctly for widened methods Follow-up issue found while working on #47476 (cherry picked from commit 71ab5fa95fcef70fca73dbde2a398675ad564553) --- base/compiler/typeinfer.jl | 2 +- base/compiler/utilities.jl | 11 +++-- src/gf.c | 98 ++++++++++++++++++++++--------------- src/julia.h | 2 +- src/precompile.c | 4 +- stdlib/Random/src/Random.jl | 2 +- test/compiler/inference.jl | 2 +- test/core.jl | 4 ++ test/precompile.jl | 4 +- 9 files changed, 79 insertions(+), 50 deletions(-) diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl index b14350cfa1ee8..6336059c79960 100644 --- a/base/compiler/typeinfer.jl +++ b/base/compiler/typeinfer.jl @@ -344,7 +344,7 @@ function maybe_compress_codeinfo(interp::AbstractInterpreter, linfo::MethodInsta return ci end if may_discard_trees(interp) - cache_the_tree = ci.inferred && (is_inlineable(ci) || isa_compileable_sig(linfo.specTypes, def)) + cache_the_tree = ci.inferred && (is_inlineable(ci) || isa_compileable_sig(linfo.specTypes, linfo.sparam_vals, def)) else cache_the_tree = true end diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl index 94e547f1de806..24d8a1d2b317c 100644 --- a/base/compiler/utilities.jl +++ b/base/compiler/utilities.jl @@ -152,8 +152,8 @@ function get_compileable_sig(method::Method, @nospecialize(atype), sparams::Simp mt, atype, sparams, method) end -isa_compileable_sig(@nospecialize(atype), method::Method) = - !iszero(ccall(:jl_isa_compileable_sig, Int32, (Any, Any), atype, method)) +isa_compileable_sig(@nospecialize(atype), sparams::SimpleVector, method::Method) = + !iszero(ccall(:jl_isa_compileable_sig, Int32, (Any, Any, Any), atype, sparams, method)) # eliminate UnionAll vars that might be degenerate due to having identical bounds, # or a concrete upper bound and appearing covariantly. @@ -200,7 +200,12 @@ function specialize_method(method::Method, @nospecialize(atype), sparams::Simple if compilesig new_atype = get_compileable_sig(method, atype, sparams) new_atype === nothing && return nothing - atype = new_atype + if atype !== new_atype + sp_ = ccall(:jl_type_intersection_with_env, Any, (Any, Any), new_atype, method.sig)::SimpleVector + if sparams === sp_[2]::SimpleVector + atype = new_atype + end + end end if preexisting # check cached specializations diff --git a/src/gf.c b/src/gf.c index bcd509f9e9955..e2e64fc717483 100644 --- a/src/gf.c +++ b/src/gf.c @@ -637,13 +637,14 @@ static void jl_compilation_sig( for (i = 0; i < np; i++) { jl_value_t *elt = jl_tparam(tt, i); jl_value_t *decl_i = jl_nth_slot_type(decl, i); + jl_value_t *type_i = jl_rewrap_unionall(decl_i, decl); size_t i_arg = (i < nargs - 1 ? i : nargs - 1); - if (jl_is_kind(decl_i)) { + if (jl_is_kind(type_i)) { // if we can prove the match was against the kind (not a Type) // we want to put that in the cache instead if (!*newparams) *newparams = jl_svec_copy(tt->parameters); - elt = decl_i; + elt = type_i; jl_svecset(*newparams, i, elt); } else if (jl_is_type_type(elt)) { @@ -652,7 +653,7 @@ static void jl_compilation_sig( // and the result of matching the type signature // needs to be restricted to the concrete type 'kind' jl_value_t *kind = jl_typeof(jl_tparam0(elt)); - if (jl_subtype(kind, decl_i) && !jl_subtype((jl_value_t*)jl_type_type, decl_i)) { + if (jl_subtype(kind, type_i) && !jl_subtype((jl_value_t*)jl_type_type, type_i)) { // if we can prove the match was against the kind (not a Type) // it's simpler (and thus better) to put that cache instead if (!*newparams) *newparams = jl_svec_copy(tt->parameters); @@ -664,7 +665,7 @@ static void jl_compilation_sig( // not triggered for isdispatchtuple(tt), this attempts to handle // some cases of adapting a random signature into a compilation signature // if we get a kind, where we don't expect to accept one, widen it to something more expected (Type{T}) - if (!(jl_subtype(elt, decl_i) && !jl_subtype((jl_value_t*)jl_type_type, decl_i))) { + if (!(jl_subtype(elt, type_i) && !jl_subtype((jl_value_t*)jl_type_type, type_i))) { if (!*newparams) *newparams = jl_svec_copy(tt->parameters); elt = (jl_value_t*)jl_type_type; jl_svecset(*newparams, i, elt); @@ -703,7 +704,7 @@ static void jl_compilation_sig( jl_svecset(*newparams, i, jl_type_type); } else if (jl_is_type_type(elt)) { // elt isa Type{T} - if (very_general_type(decl_i)) { + if (!jl_has_free_typevars(decl_i) && very_general_type(type_i)) { /* Here's a fairly simple heuristic: if this argument slot's declared type is general (Type or Any), @@ -742,15 +743,13 @@ static void jl_compilation_sig( */ if (!*newparams) *newparams = jl_svec_copy(tt->parameters); if (i < nargs || !definition->isva) { - jl_value_t *di = jl_type_intersection(decl_i, (jl_value_t*)jl_type_type); + jl_value_t *di = jl_type_intersection(type_i, (jl_value_t*)jl_type_type); assert(di != (jl_value_t*)jl_bottom_type); // issue #11355: DataType has a UID and so would take precedence in the cache if (jl_is_kind(di)) jl_svecset(*newparams, i, (jl_value_t*)jl_type_type); else jl_svecset(*newparams, i, di); - // TODO: recompute static parameter values, so in extreme cases we - // can give `T=Type` instead of `T=Type{Type{Type{...`. /* make editors happy:}}} */ } else { jl_svecset(*newparams, i, (jl_value_t*)jl_type_type); @@ -759,14 +758,15 @@ static void jl_compilation_sig( } int notcalled_func = (i_arg > 0 && i_arg <= 8 && !(definition->called & (1 << (i_arg - 1))) && + !jl_has_free_typevars(decl_i) && jl_subtype(elt, (jl_value_t*)jl_function_type)); - if (notcalled_func && (decl_i == (jl_value_t*)jl_any_type || - decl_i == (jl_value_t*)jl_function_type || - (jl_is_uniontype(decl_i) && // Base.Callable - ((((jl_uniontype_t*)decl_i)->a == (jl_value_t*)jl_function_type && - ((jl_uniontype_t*)decl_i)->b == (jl_value_t*)jl_type_type) || - (((jl_uniontype_t*)decl_i)->b == (jl_value_t*)jl_function_type && - ((jl_uniontype_t*)decl_i)->a == (jl_value_t*)jl_type_type))))) { + if (notcalled_func && (type_i == (jl_value_t*)jl_any_type || + type_i == (jl_value_t*)jl_function_type || + (jl_is_uniontype(type_i) && // Base.Callable + ((((jl_uniontype_t*)type_i)->a == (jl_value_t*)jl_function_type && + ((jl_uniontype_t*)type_i)->b == (jl_value_t*)jl_type_type) || + (((jl_uniontype_t*)type_i)->b == (jl_value_t*)jl_function_type && + ((jl_uniontype_t*)type_i)->a == (jl_value_t*)jl_type_type))))) { // and attempt to despecialize types marked Function, Callable, or Any // when called with a subtype of Function but is not called if (!*newparams) *newparams = jl_svec_copy(tt->parameters); @@ -833,6 +833,7 @@ static void jl_compilation_sig( // compute whether this type signature is a possible return value from jl_compilation_sig given a concrete-type for `tt` JL_DLLEXPORT int jl_isa_compileable_sig( jl_tupletype_t *type, + jl_svec_t *sparams, jl_method_t *definition) { jl_value_t *decl = definition->sig; @@ -886,6 +887,7 @@ JL_DLLEXPORT int jl_isa_compileable_sig( for (i = 0; i < np; i++) { jl_value_t *elt = jl_tparam(type, i); jl_value_t *decl_i = jl_nth_slot_type((jl_value_t*)decl, i); + jl_value_t *type_i = jl_rewrap_unionall(decl_i, decl); size_t i_arg = (i < nargs - 1 ? i : nargs - 1); if (jl_is_vararg(elt)) { @@ -919,25 +921,26 @@ JL_DLLEXPORT int jl_isa_compileable_sig( if (jl_is_kind(elt)) { // kind slots always get guard entries (checking for subtypes of Type) - if (jl_subtype(elt, decl_i) && !jl_subtype((jl_value_t*)jl_type_type, decl_i)) + if (jl_subtype(elt, type_i) && !jl_subtype((jl_value_t*)jl_type_type, type_i)) continue; // TODO: other code paths that could reach here return 0; } - else if (jl_is_kind(decl_i)) { + else if (jl_is_kind(type_i)) { return 0; } if (jl_is_type_type(jl_unwrap_unionall(elt))) { - int iscalled = i_arg > 0 && i_arg <= 8 && (definition->called & (1 << (i_arg - 1))); + int iscalled = (i_arg > 0 && i_arg <= 8 && (definition->called & (1 << (i_arg - 1)))) || + jl_has_free_typevars(decl_i); if (jl_types_equal(elt, (jl_value_t*)jl_type_type)) { - if (!iscalled && very_general_type(decl_i)) + if (!iscalled && very_general_type(type_i)) continue; if (i >= nargs && definition->isva) continue; return 0; } - if (!iscalled && very_general_type(decl_i)) + if (!iscalled && very_general_type(type_i)) return 0; if (!jl_is_datatype(elt)) return 0; @@ -949,7 +952,7 @@ JL_DLLEXPORT int jl_isa_compileable_sig( jl_value_t *kind = jl_typeof(jl_tparam0(elt)); if (kind == jl_bottom_type) return 0; // Type{Union{}} gets normalized to typeof(Union{}) - if (jl_subtype(kind, decl_i) && !jl_subtype((jl_value_t*)jl_type_type, decl_i)) + if (jl_subtype(kind, type_i) && !jl_subtype((jl_value_t*)jl_type_type, type_i)) return 0; // gets turned into a kind else if (jl_is_type_type(jl_tparam0(elt)) && @@ -963,7 +966,7 @@ JL_DLLEXPORT int jl_isa_compileable_sig( this can be determined using a type intersection. */ if (i < nargs || !definition->isva) { - jl_value_t *di = jl_type_intersection(decl_i, (jl_value_t*)jl_type_type); + jl_value_t *di = jl_type_intersection(type_i, (jl_value_t*)jl_type_type); JL_GC_PUSH1(&di); assert(di != (jl_value_t*)jl_bottom_type); if (jl_is_kind(di)) { @@ -984,14 +987,15 @@ JL_DLLEXPORT int jl_isa_compileable_sig( } int notcalled_func = (i_arg > 0 && i_arg <= 8 && !(definition->called & (1 << (i_arg - 1))) && + !jl_has_free_typevars(decl_i) && jl_subtype(elt, (jl_value_t*)jl_function_type)); - if (notcalled_func && (decl_i == (jl_value_t*)jl_any_type || - decl_i == (jl_value_t*)jl_function_type || - (jl_is_uniontype(decl_i) && // Base.Callable - ((((jl_uniontype_t*)decl_i)->a == (jl_value_t*)jl_function_type && - ((jl_uniontype_t*)decl_i)->b == (jl_value_t*)jl_type_type) || - (((jl_uniontype_t*)decl_i)->b == (jl_value_t*)jl_function_type && - ((jl_uniontype_t*)decl_i)->a == (jl_value_t*)jl_type_type))))) { + if (notcalled_func && (type_i == (jl_value_t*)jl_any_type || + type_i == (jl_value_t*)jl_function_type || + (jl_is_uniontype(type_i) && // Base.Callable + ((((jl_uniontype_t*)type_i)->a == (jl_value_t*)jl_function_type && + ((jl_uniontype_t*)type_i)->b == (jl_value_t*)jl_type_type) || + (((jl_uniontype_t*)type_i)->b == (jl_value_t*)jl_function_type && + ((jl_uniontype_t*)type_i)->a == (jl_value_t*)jl_type_type))))) { // and attempt to despecialize types marked Function, Callable, or Any // when called with a subtype of Function but is not called if (elt == (jl_value_t*)jl_function_type) @@ -1087,7 +1091,7 @@ static jl_method_instance_t *cache_method( // cache miss. Alternatively, we may use the original signature in the // cache, but use this return for compilation. // - // In most cases `!jl_isa_compileable_sig(tt, definition)`, + // In most cases `!jl_isa_compileable_sig(tt, sparams, definition)`, // although for some cases, (notably Varargs) // we might choose a replacement type that's preferable but not strictly better int issubty; @@ -1099,7 +1103,7 @@ static jl_method_instance_t *cache_method( } newparams = NULL; } - // TODO: maybe assert(jl_isa_compileable_sig(compilationsig, definition)); + // TODO: maybe assert(jl_isa_compileable_sig(compilationsig, sparams, definition)); newmeth = jl_specializations_get_linfo(definition, (jl_value_t*)compilationsig, sparams); jl_tupletype_t *cachett = tt; @@ -2280,9 +2284,21 @@ JL_DLLEXPORT jl_value_t *jl_normalize_to_compilable_sig(jl_methtable_t *mt, jl_t jl_methtable_t *kwmt = mt == jl_kwcall_mt ? jl_kwmethod_table_for(m->sig) : mt; intptr_t nspec = (kwmt == NULL || kwmt == jl_type_type_mt || kwmt == jl_nonfunction_mt || kwmt == jl_kwcall_mt ? m->nargs + 1 : kwmt->max_args + 2 + 2 * (mt == jl_kwcall_mt)); jl_compilation_sig(ti, env, m, nspec, &newparams); - tt = (newparams ? jl_apply_tuple_type(newparams) : ti); - int is_compileable = ((jl_datatype_t*)ti)->isdispatchtuple || - jl_isa_compileable_sig(tt, m); + int is_compileable = ((jl_datatype_t*)ti)->isdispatchtuple; + if (newparams) { + tt = jl_apply_tuple_type(newparams); + if (!is_compileable) { + // compute new env, if used below + jl_value_t *ti = jl_type_intersection_env((jl_value_t*)tt, (jl_value_t*)m->sig, &newparams); + assert(ti != jl_bottom_type); (void)ti; + env = newparams; + } + } + else { + tt = ti; + } + if (!is_compileable) + is_compileable = jl_isa_compileable_sig(tt, env, m); JL_GC_POP(); return is_compileable ? (jl_value_t*)tt : jl_nothing; } @@ -2300,7 +2316,7 @@ jl_method_instance_t *jl_normalize_to_compilable_mi(jl_method_instance_t *mi JL_ return mi; jl_svec_t *env = NULL; JL_GC_PUSH2(&compilationsig, &env); - jl_value_t *ti = jl_type_intersection_env((jl_value_t*)mi->specTypes, (jl_value_t*)def->sig, &env); + jl_value_t *ti = jl_type_intersection_env((jl_value_t*)compilationsig, (jl_value_t*)def->sig, &env); assert(ti != jl_bottom_type); (void)ti; mi = jl_specializations_get_linfo(def, (jl_value_t*)compilationsig, env); JL_GC_POP(); @@ -2317,7 +2333,7 @@ jl_method_instance_t *jl_method_match_to_mi(jl_method_match_t *match, size_t wor if (jl_is_datatype(ti)) { jl_methtable_t *mt = jl_method_get_table(m); if ((jl_value_t*)mt != jl_nothing) { - // get the specialization without caching it + // get the specialization, possibly also caching it if (mt_cache && ((jl_datatype_t*)ti)->isdispatchtuple) { // Since we also use this presence in the cache // to trigger compilation when producing `.ji` files, @@ -2329,11 +2345,15 @@ jl_method_instance_t *jl_method_match_to_mi(jl_method_match_t *match, size_t wor } else { jl_value_t *tt = jl_normalize_to_compilable_sig(mt, ti, env, m); - JL_GC_PUSH1(&tt); if (tt != jl_nothing) { + JL_GC_PUSH2(&tt, &env); + if (!jl_egal(tt, (jl_value_t*)ti)) { + jl_value_t *ti = jl_type_intersection_env((jl_value_t*)tt, (jl_value_t*)m->sig, &env); + assert(ti != jl_bottom_type); (void)ti; + } mi = jl_specializations_get_linfo(m, (jl_value_t*)tt, env); + JL_GC_POP(); } - JL_GC_POP(); } } } @@ -2396,7 +2416,7 @@ jl_method_instance_t *jl_get_compile_hint_specialization(jl_tupletype_t *types J size_t count = 0; for (i = 0; i < n; i++) { jl_method_match_t *match1 = (jl_method_match_t*)jl_array_ptr_ref(matches, i); - if (jl_isa_compileable_sig(types, match1->method)) + if (jl_isa_compileable_sig(types, match1->sparams, match1->method)) jl_array_ptr_set(matches, count++, (jl_value_t*)match1); } jl_array_del_end((jl_array_t*)matches, n - count); diff --git a/src/julia.h b/src/julia.h index ee4ca50356756..48352364d7a13 100644 --- a/src/julia.h +++ b/src/julia.h @@ -1439,7 +1439,7 @@ STATIC_INLINE int jl_is_concrete_type(jl_value_t *v) JL_NOTSAFEPOINT return jl_is_datatype(v) && ((jl_datatype_t*)v)->isconcretetype; } -JL_DLLEXPORT int jl_isa_compileable_sig(jl_tupletype_t *type, jl_method_t *definition); +JL_DLLEXPORT int jl_isa_compileable_sig(jl_tupletype_t *type, jl_svec_t *sparams, jl_method_t *definition); // type constructors JL_DLLEXPORT jl_typename_t *jl_new_typename_in(jl_sym_t *name, jl_module_t *inmodule, int abstract, int mutabl); diff --git a/src/precompile.c b/src/precompile.c index 9c9c79b154a32..ebe7afae69f64 100644 --- a/src/precompile.c +++ b/src/precompile.c @@ -269,7 +269,7 @@ static void jl_compile_all_defs(jl_array_t *mis) size_t i, l = jl_array_len(allmeths); for (i = 0; i < l; i++) { jl_method_t *m = (jl_method_t*)jl_array_ptr_ref(allmeths, i); - if (jl_isa_compileable_sig((jl_tupletype_t*)m->sig, m)) { + if (jl_is_datatype(m->sig) && jl_isa_compileable_sig((jl_tupletype_t*)m->sig, jl_emptysvec, m)) { // method has a single compilable specialization, e.g. its definition // signature is concrete. in this case we can just hint it. jl_compile_hint((jl_tupletype_t*)m->sig); @@ -354,7 +354,7 @@ static void *jl_precompile_(jl_array_t *m) mi = (jl_method_instance_t*)item; size_t min_world = 0; size_t max_world = ~(size_t)0; - if (mi != jl_atomic_load_relaxed(&mi->def.method->unspecialized) && !jl_isa_compileable_sig((jl_tupletype_t*)mi->specTypes, mi->def.method)) + if (mi != jl_atomic_load_relaxed(&mi->def.method->unspecialized) && !jl_isa_compileable_sig((jl_tupletype_t*)mi->specTypes, mi->sparam_vals, mi->def.method)) mi = jl_get_specialization1((jl_tupletype_t*)mi->specTypes, jl_atomic_load_acquire(&jl_world_counter), &min_world, &max_world, 0); if (mi) jl_array_ptr_1d_push(m2, (jl_value_t*)mi); diff --git a/stdlib/Random/src/Random.jl b/stdlib/Random/src/Random.jl index 95125422eeee5..bc016fc1cd057 100644 --- a/stdlib/Random/src/Random.jl +++ b/stdlib/Random/src/Random.jl @@ -256,7 +256,7 @@ rand(rng::AbstractRNG, ::UniformT{T}) where {T} = rand(rng, T) rand(rng::AbstractRNG, X) = rand(rng, Sampler(rng, X, Val(1))) # this is needed to disambiguate rand(rng::AbstractRNG, X::Dims) = rand(rng, Sampler(rng, X, Val(1))) -rand(rng::AbstractRNG=default_rng(), ::Type{X}=Float64) where {X} = rand(rng, Sampler(rng, X, Val(1)))::X +rand(rng::AbstractRNG=default_rng(), ::Type{X}=Float64) where {X} = rand(rng, Sampler(rng, X, Val(1)))::X rand(X) = rand(default_rng(), X) rand(::Type{X}) where {X} = rand(default_rng(), X) diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl index 00972920d5406..d47c8da4f9872 100644 --- a/test/compiler/inference.jl +++ b/test/compiler/inference.jl @@ -406,7 +406,7 @@ f11366(x::Type{Ref{T}}) where {T} = Ref{x} let f(T) = Type{T} - @test Base.return_types(f, Tuple{Type{Int}}) == [Type{Type{Int}}] + @test Base.return_types(f, Tuple{Type{Int}}) == Any[Type{Type{Int}}] end # issue #9222 diff --git a/test/core.jl b/test/core.jl index a40bbc1440c67..bc25f96182b22 100644 --- a/test/core.jl +++ b/test/core.jl @@ -7885,3 +7885,7 @@ code_typed(f47476, (Int, Int, Int, Vararg{Union{Int, NTuple{2,Int}}},)) code_typed(f47476, (Int, Int, Int, Int, Vararg{Union{Int, NTuple{2,Int}}},)) @test f47476(1, 2, 3, 4, 5, 6, (7, 8)) === 2 @test_throws UndefVarError(:N) f47476(1, 2, 3, 4, 5, 6, 7) + +vect47476(::Type{T}) where {T} = T +@test vect47476(Type{Type{Type{Int32}}}) === Type{Type{Type{Int32}}} +@test vect47476(Type{Type{Type{Int64}}}) === Type{Type{Type{Int64}}} diff --git a/test/precompile.jl b/test/precompile.jl index eaf755046d366..3794cd353f41f 100644 --- a/test/precompile.jl +++ b/test/precompile.jl @@ -1493,8 +1493,8 @@ end f(x, y) = x + y f(x::Int, y) = 2x + y end - precompile(M.f, (Int, Any)) - precompile(M.f, (AbstractFloat, Any)) + @test precompile(M.f, (Int, Any)) + @test precompile(M.f, (AbstractFloat, Any)) mis = map(methods(M.f)) do m m.specializations[1] end From a506f4393f8321a834d57c7c271d0641b67d5554 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Tue, 22 Nov 2022 11:50:15 -0500 Subject: [PATCH 42/57] ensure types are UnionAll wrapped are cached correctly for widened Vararg methods And fix a related accuracy issue in jl_isa_compileable_sig Follow-up issue found while working on #47476 (cherry picked from commit 9e5e28fa3e85b1d1fee247a814ec7f017a78a83c) --- src/gf.c | 167 +++++++++++++++++++++++++++++++++------------------ test/core.jl | 9 +++ 2 files changed, 117 insertions(+), 59 deletions(-) diff --git a/src/gf.c b/src/gf.c index e2e64fc717483..9092eec0e4ebc 100644 --- a/src/gf.c +++ b/src/gf.c @@ -606,6 +606,46 @@ jl_value_t *jl_nth_slot_type(jl_value_t *sig, size_t i) JL_NOTSAFEPOINT // return 1; //} +static jl_value_t *inst_varargp_in_env(jl_value_t *decl, jl_svec_t *sparams) +{ + jl_value_t *unw = jl_unwrap_unionall(decl); + jl_value_t *vm = jl_tparam(unw, jl_nparams(unw) - 1); + assert(jl_is_vararg(vm)); + int nsp = jl_svec_len(sparams); + if (nsp > 0 && jl_has_free_typevars(vm)) { + JL_GC_PUSH1(&vm); + assert(jl_subtype_env_size(decl) == nsp); + vm = jl_instantiate_type_in_env(vm, (jl_unionall_t*)decl, jl_svec_data(sparams)); + assert(jl_is_vararg(vm)); + // rewrap_unionall(lastdeclt, sparams) if any sparams isa TypeVar + // for example, `Tuple{Vararg{Union{Nothing,Int,Val{T}}}} where T` + // and the user called it with `Tuple{Vararg{Union{Nothing,Int},N}}`, then T is unbound + jl_value_t **sp = jl_svec_data(sparams); + while (jl_is_unionall(decl)) { + jl_tvar_t *v = (jl_tvar_t*)*sp; + if (jl_is_typevar(v)) { + // must unwrap and re-wrap Vararg object explicitly here since jl_type_unionall handles it differently + jl_value_t *T = ((jl_vararg_t*)vm)->T; + jl_value_t *N = ((jl_vararg_t*)vm)->N; + int T_has_tv = T && jl_has_typevar(T, v); + int N_has_tv = N && jl_has_typevar(N, v); // n.b. JL_VARARG_UNBOUND check means this should be false + assert(!N_has_tv || N == (jl_value_t*)v); + if (T_has_tv) + vm = jl_type_unionall(v, T); + if (N_has_tv) + N = NULL; + vm = (jl_value_t*)jl_wrap_vararg(vm, N); // this cannot throw for these inputs + } + sp++; + decl = ((jl_unionall_t*)decl)->body; + nsp--; + } + assert(nsp == 0); + JL_GC_POP(); + } + return vm; +} + static jl_value_t *ml_matches(jl_methtable_t *mt, jl_tupletype_t *type, int lim, int include_ambiguous, int intersections, size_t world, int cache_result, @@ -634,10 +674,12 @@ static void jl_compilation_sig( assert(jl_is_tuple_type(tt)); size_t i, np = jl_nparams(tt); size_t nargs = definition->nargs; // == jl_nparams(jl_unwrap_unionall(decl)); + jl_value_t *type_i = NULL; + JL_GC_PUSH1(&type_i); for (i = 0; i < np; i++) { jl_value_t *elt = jl_tparam(tt, i); jl_value_t *decl_i = jl_nth_slot_type(decl, i); - jl_value_t *type_i = jl_rewrap_unionall(decl_i, decl); + type_i = jl_rewrap_unionall(decl_i, decl); size_t i_arg = (i < nargs - 1 ? i : nargs - 1); if (jl_is_kind(type_i)) { @@ -779,15 +821,9 @@ static void jl_compilation_sig( // supertype of any other method signatures. so far we are conservative // and the types we find should be bigger. if (jl_nparams(tt) >= nspec && jl_va_tuple_kind((jl_datatype_t*)decl) == JL_VARARG_UNBOUND) { - jl_svec_t *limited = jl_alloc_svec(nspec); - JL_GC_PUSH1(&limited); if (!*newparams) *newparams = tt->parameters; - size_t i; - for (i = 0; i < nspec - 1; i++) { - jl_svecset(limited, i, jl_svecref(*newparams, i)); - } - jl_value_t *lasttype = jl_svecref(*newparams, i - 1); - // if all subsequent arguments are subtypes of lasttype, specialize + type_i = jl_svecref(*newparams, nspec - 2); + // if all subsequent arguments are subtypes of type_i, specialize // on that instead of decl. for example, if decl is // (Any...) // and type is @@ -795,39 +831,35 @@ static void jl_compilation_sig( // then specialize as (Symbol...), but if type is // (Symbol, Int32, Expr) // then specialize as (Any...) - size_t j = i; + size_t j = nspec - 1; int all_are_subtypes = 1; for (; j < jl_svec_len(*newparams); j++) { jl_value_t *paramj = jl_svecref(*newparams, j); if (jl_is_vararg(paramj)) paramj = jl_unwrap_vararg(paramj); - if (!jl_subtype(paramj, lasttype)) { + if (!jl_subtype(paramj, type_i)) { all_are_subtypes = 0; break; } } if (all_are_subtypes) { // avoid Vararg{Type{Type{...}}} - if (jl_is_type_type(lasttype) && jl_is_type_type(jl_tparam0(lasttype))) - lasttype = (jl_value_t*)jl_type_type; - jl_svecset(limited, i, jl_wrap_vararg(lasttype, (jl_value_t*)NULL)); + if (jl_is_type_type(type_i) && jl_is_type_type(jl_tparam0(type_i))) + type_i = (jl_value_t*)jl_type_type; + type_i = (jl_value_t*)jl_wrap_vararg(type_i, (jl_value_t*)NULL); // this cannot throw for these inputs } else { - jl_value_t *unw = jl_unwrap_unionall(decl); - jl_value_t *lastdeclt = jl_tparam(unw, jl_nparams(unw) - 1); - assert(jl_is_vararg(lastdeclt)); - int nsp = jl_svec_len(sparams); - if (nsp > 0 && jl_has_free_typevars(lastdeclt)) { - assert(jl_subtype_env_size(decl) == nsp); - lastdeclt = jl_instantiate_type_in_env(lastdeclt, (jl_unionall_t*)decl, jl_svec_data(sparams)); - // TODO: rewrap_unionall(lastdeclt, sparams) if any sparams isa TypeVar??? - // TODO: if we made any replacements above, sparams may now be incorrect - } - jl_svecset(limited, i, lastdeclt); + type_i = inst_varargp_in_env(decl, sparams); + } + jl_svec_t *limited = jl_alloc_svec(nspec); + size_t i; + for (i = 0; i < nspec - 1; i++) { + jl_svecset(limited, i, jl_svecref(*newparams, i)); } + jl_svecset(limited, i, type_i); *newparams = limited; - JL_GC_POP(); } + JL_GC_POP(); } // compute whether this type signature is a possible return value from jl_compilation_sig given a concrete-type for `tt` @@ -865,18 +897,20 @@ JL_DLLEXPORT int jl_isa_compileable_sig( jl_methtable_t *kwmt = mt == jl_kwcall_mt ? jl_kwmethod_table_for(decl) : mt; if ((jl_value_t*)mt != jl_nothing) { // try to refine estimate of min and max - if (kwmt && kwmt != jl_type_type_mt && kwmt != jl_nonfunction_mt && kwmt != jl_kwcall_mt) + if (kwmt != NULL && kwmt != jl_type_type_mt && kwmt != jl_nonfunction_mt && kwmt != jl_kwcall_mt) + // new methods may be added, increasing nspec_min later nspec_min = kwmt->max_args + 2 + 2 * (mt == jl_kwcall_mt); else + // nspec is always nargs+1, regardless of the other contents of these mt nspec_max = nspec_min; } - int isbound = (jl_va_tuple_kind((jl_datatype_t*)decl) == JL_VARARG_UNBOUND); + int isunbound = (jl_va_tuple_kind((jl_datatype_t*)decl) == JL_VARARG_UNBOUND); if (jl_is_vararg(jl_tparam(type, np - 1))) { - if (!isbound || np < nspec_min || np > nspec_max) + if (!isunbound || np < nspec_min || np > nspec_max) return 0; } else { - if (np < nargs - 1 || (isbound && np >= nspec_max)) + if (np < nargs - 1 || (isunbound && np >= nspec_max)) return 0; } } @@ -884,37 +918,37 @@ JL_DLLEXPORT int jl_isa_compileable_sig( return 0; } + jl_value_t *type_i = NULL; + JL_GC_PUSH1(&type_i); for (i = 0; i < np; i++) { jl_value_t *elt = jl_tparam(type, i); - jl_value_t *decl_i = jl_nth_slot_type((jl_value_t*)decl, i); - jl_value_t *type_i = jl_rewrap_unionall(decl_i, decl); size_t i_arg = (i < nargs - 1 ? i : nargs - 1); if (jl_is_vararg(elt)) { - elt = jl_unwrap_vararg(elt); - if (jl_has_free_typevars(decl_i)) { - // TODO: in this case, answer semi-conservatively that these varargs are always compilable - // we don't have the ability to get sparams, so deciding if elt - // is a potential result of jl_instantiate_type_in_env for decl_i - // for any sparams that is consistent with the rest of the arguments - // seems like it would be extremely difficult - // and hopefully the upstream code probably gave us something reasonable - continue; - } - else if (jl_egal(elt, decl_i)) { - continue; + type_i = inst_varargp_in_env(decl, sparams); + if (jl_has_free_typevars(type_i)) { + JL_GC_POP(); + return 0; // something went badly wrong? } - else if (jl_is_type_type(elt) && jl_is_type_type(jl_tparam0(elt))) { - return 0; + if (jl_egal(elt, type_i)) + continue; // elt could be chosen by inst_varargp_in_env for these sparams + elt = jl_unwrap_vararg(elt); + if (jl_is_type_type(elt) && jl_is_type_type(jl_tparam0(elt))) { + JL_GC_POP(); + return 0; // elt would be set equal to jl_type_type instead } - // else, it needs to meet the usual rules + // else, elt also needs to meet the usual rules } + jl_value_t *decl_i = jl_nth_slot_type(decl, i); + type_i = jl_rewrap_unionall(decl_i, decl); + if (i_arg > 0 && i_arg <= sizeof(definition->nospecialize) * 8 && (definition->nospecialize & (1 << (i_arg - 1)))) { if (!jl_has_free_typevars(decl_i) && !jl_is_kind(decl_i)) { if (jl_egal(elt, decl_i)) continue; + JL_GC_POP(); return 0; } } @@ -923,10 +957,12 @@ JL_DLLEXPORT int jl_isa_compileable_sig( // kind slots always get guard entries (checking for subtypes of Type) if (jl_subtype(elt, type_i) && !jl_subtype((jl_value_t*)jl_type_type, type_i)) continue; - // TODO: other code paths that could reach here + // TODO: other code paths that could reach here? + JL_GC_POP(); return 0; } else if (jl_is_kind(type_i)) { + JL_GC_POP(); return 0; } @@ -938,22 +974,31 @@ JL_DLLEXPORT int jl_isa_compileable_sig( continue; if (i >= nargs && definition->isva) continue; + JL_GC_POP(); return 0; } - if (!iscalled && very_general_type(type_i)) + if (!iscalled && very_general_type(type_i)) { + JL_GC_POP(); return 0; - if (!jl_is_datatype(elt)) + } + if (!jl_is_datatype(elt)) { + JL_GC_POP(); return 0; + } // if the declared type was not Any or Union{Type, ...}, // then the match must been with kind, such as UnionAll or DataType, // and the result of matching the type signature // needs to be corrected to the concrete type 'kind' (and not to Type) jl_value_t *kind = jl_typeof(jl_tparam0(elt)); - if (kind == jl_bottom_type) + if (kind == jl_bottom_type) { + JL_GC_POP(); return 0; // Type{Union{}} gets normalized to typeof(Union{}) - if (jl_subtype(kind, type_i) && !jl_subtype((jl_value_t*)jl_type_type, type_i)) + } + if (jl_subtype(kind, type_i) && !jl_subtype((jl_value_t*)jl_type_type, type_i)) { + JL_GC_POP(); return 0; // gets turned into a kind + } else if (jl_is_type_type(jl_tparam0(elt)) && // give up on specializing static parameters for Type{Type{Type{...}}} @@ -966,20 +1011,20 @@ JL_DLLEXPORT int jl_isa_compileable_sig( this can be determined using a type intersection. */ if (i < nargs || !definition->isva) { - jl_value_t *di = jl_type_intersection(type_i, (jl_value_t*)jl_type_type); - JL_GC_PUSH1(&di); - assert(di != (jl_value_t*)jl_bottom_type); - if (jl_is_kind(di)) { + type_i = jl_type_intersection(type_i, (jl_value_t*)jl_type_type); + assert(type_i != (jl_value_t*)jl_bottom_type); + if (jl_is_kind(type_i)) { JL_GC_POP(); return 0; } - else if (!jl_types_equal(di, elt)) { + else if (!jl_types_equal(type_i, elt)) { JL_GC_POP(); return 0; } - JL_GC_POP(); + continue; } else { + JL_GC_POP(); return 0; } } @@ -1000,12 +1045,16 @@ JL_DLLEXPORT int jl_isa_compileable_sig( // when called with a subtype of Function but is not called if (elt == (jl_value_t*)jl_function_type) continue; + JL_GC_POP(); return 0; } - if (!jl_is_concrete_type(elt)) + if (!jl_is_concrete_type(elt)) { + JL_GC_POP(); return 0; + } } + JL_GC_POP(); return 1; } diff --git a/test/core.jl b/test/core.jl index bc25f96182b22..f4e463cd61326 100644 --- a/test/core.jl +++ b/test/core.jl @@ -7889,3 +7889,12 @@ code_typed(f47476, (Int, Int, Int, Int, Vararg{Union{Int, NTuple{2,Int}}},)) vect47476(::Type{T}) where {T} = T @test vect47476(Type{Type{Type{Int32}}}) === Type{Type{Type{Int32}}} @test vect47476(Type{Type{Type{Int64}}}) === Type{Type{Type{Int64}}} + +g47476(::Union{Nothing,Int,Val{T}}...) where {T} = T +@test_throws UndefVarError(:T) g47476(nothing, 1, nothing, 2, nothing, 3, nothing, 4, nothing, 5) +@test g47476(nothing, 1, nothing, 2, nothing, 3, nothing, 4, nothing, 5, Val(6)) === 6 +let spec = only(methods(g47476)).specializations + @test !isempty(spec) + @test any(mi -> mi !== nothing && Base.isvatuple(mi.specTypes), spec) + @test all(mi -> mi === nothing || !Base.has_free_typevars(mi.specTypes), spec) +end From 3e1373e1cfe5626208af41d55d478afc0637a55f Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 14 Dec 2022 20:15:15 +0100 Subject: [PATCH 43/57] add back wordaround for `Slot objects should not occur in an AST` in Ipython mode (#47878) (cherry picked from commit 7b10d5fe0159e21e8299681c33605f0b10dbdcfa) --- stdlib/REPL/src/REPL.jl | 6 +++--- stdlib/REPL/test/repl.jl | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl index 4c83cdf33508d..9c8712e0d41fc 100644 --- a/stdlib/REPL/src/REPL.jl +++ b/stdlib/REPL/src/REPL.jl @@ -1404,9 +1404,9 @@ end function out_transform(@nospecialize(x), n::Ref{Int}) return quote - let x = $x - $capture_result($n, x) - x + let __temp_val_a72df459 = $x + $capture_result($n, __temp_val_a72df459) + __temp_val_a72df459 end end end diff --git a/stdlib/REPL/test/repl.jl b/stdlib/REPL/test/repl.jl index ab25a56510262..edcb91defc9ab 100644 --- a/stdlib/REPL/test/repl.jl +++ b/stdlib/REPL/test/repl.jl @@ -1641,6 +1641,10 @@ fake_repl() do stdin_write, stdout_read, repl s = sendrepl2("REPL\n", "In [10]") @test contains(s, "Out[9]: REPL") + # Test for https://github.com/JuliaLang/julia/issues/46451 + s = sendrepl2("x_47878 = range(-1; stop = 1)\n", "-1:1") + @test contains(s, "Out[11]: -1:1") + write(stdin_write, '\x04') Base.wait(repltask) end From 5848e99831b6eeede632f81ef89c479b1dbd88b5 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Thu, 15 Dec 2022 03:39:58 -0600 Subject: [PATCH 44/57] Reduce invalidations when loading JuliaData packages (#47889) (cherry picked from commit e84634e3b2c7354a4ac99024ed839d3e720a40cb) --- base/Base.jl | 1 + base/array.jl | 7 ++++--- base/loading.jl | 3 ++- base/logging.jl | 4 ++-- base/reinterpretarray.jl | 16 +++++++--------- base/show.jl | 13 +++++++++---- base/strings/util.jl | 2 +- 7 files changed, 26 insertions(+), 20 deletions(-) diff --git a/base/Base.jl b/base/Base.jl index 29a6f9ed4366d..8d207ee909201 100644 --- a/base/Base.jl +++ b/base/Base.jl @@ -168,6 +168,7 @@ include("idset.jl") include("iterators.jl") using .Iterators: zip, enumerate, only using .Iterators: Flatten, Filter, product # for generators +using .Iterators: Stateful # compat (was formerly used in reinterpretarray.jl) include("namedtuple.jl") diff --git a/base/array.jl b/base/array.jl index 64d0ac05fd507..5257caabf2d45 100644 --- a/base/array.jl +++ b/base/array.jl @@ -2730,7 +2730,8 @@ keepat!(a::Vector, m::AbstractVector{Bool}) = _keepat!(a, m) # set-like operators for vectors # These are moderately efficient, preserve order, and remove dupes. -_unique_filter!(pred, update!, state) = function (x) +_unique_filter!(pred::P, update!::U, state) where {P,U} = function (x) + # P, U force specialization if pred(x, state) update!(state, x) true @@ -2756,7 +2757,7 @@ union!(v::AbstractVector{T}, itrs...) where {T} = symdiff!(v::AbstractVector{T}, itrs...) where {T} = _grow!(_shrink_filter!(symdiff!(Set{T}(), v, itrs...)), v, itrs) -function _shrink!(shrinker!, v::AbstractVector, itrs) +function _shrink!(shrinker!::F, v::AbstractVector, itrs) where F seen = Set{eltype(v)}() filter!(_grow_filter!(seen), v) shrinker!(seen, itrs...) @@ -2768,7 +2769,7 @@ setdiff!( v::AbstractVector, itrs...) = _shrink!(setdiff!, v, itrs) vectorfilter(T::Type, f, v) = T[x for x in v if f(x)] -function _shrink(shrinker!, itr, itrs) +function _shrink(shrinker!::F, itr, itrs) where F T = promote_eltype(itr, itrs...) keep = shrinker!(Set{T}(itr), itrs...) vectorfilter(T, _shrink_filter!(keep), itr) diff --git a/base/loading.jl b/base/loading.jl index ea350ff72d960..7c71167a8c176 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -809,7 +809,8 @@ function explicit_manifest_uuid_path(project_file::String, pkg::PkgId)::Union{No end end # Extensions - for (name, entries::Vector{Any}) in d + for (name, entries) in d + entries = entries::Vector{Any} for entry in entries uuid = get(entry, "uuid", nothing)::Union{Nothing, String} extensions = get(entry, "extensions", nothing)::Union{Nothing, Dict{String, Any}} diff --git a/base/logging.jl b/base/logging.jl index d7dc45122e063..c670d658cdaeb 100644 --- a/base/logging.jl +++ b/base/logging.jl @@ -378,14 +378,14 @@ function logmsg_code(_module, file, line, level, message, exs...) id = $(log_data._id) # Second chance at an early bail-out (before computing the message), # based on arbitrary logger-specific logic. - if _invoked_shouldlog(logger, level, _module, group, id) + if invokelatest(shouldlog, logger, level, _module, group, id) file = $(log_data._file) if file isa String file = Base.fixup_stdlib_path(file) end line = $(log_data._line) local msg, kwargs - $(logrecord) && handle_message( + $(logrecord) && invokelatest(handle_message, logger, level, msg, _module, group, id, file, line; kwargs...) end diff --git a/base/reinterpretarray.jl b/base/reinterpretarray.jl index f198761a09500..1fe0788a1739a 100644 --- a/base/reinterpretarray.jl +++ b/base/reinterpretarray.jl @@ -722,25 +722,23 @@ function CyclePadding(T::DataType) CyclePadding(pad, as) end -using .Iterators: Stateful @assume_effects :total function array_subpadding(S, T) - checked_size = 0 lcm_size = lcm(sizeof(S), sizeof(T)) - s, t = Stateful{<:Any, Any}(CyclePadding(S)), - Stateful{<:Any, Any}(CyclePadding(T)) + s, t = CyclePadding(S), CyclePadding(T) isempty(t) && return true isempty(s) && return false + checked_size = 0 + ps, sstate = iterate(s) # use of Stateful harms inference and makes this vulnerable to invalidation + pad, tstate = iterate(t) while checked_size < lcm_size - # Take padding in T - pad = popfirst!(t) - # See if there's corresponding padding in S while true - ps = peek(s) + # See if there's corresponding padding in S ps.offset > pad.offset && return false intersect(ps, pad) == pad && break - popfirst!(s) + ps, sstate = iterate(s, sstate) end checked_size = pad.offset + pad.size + pad, tstate = iterate(t, tstate) end return true end diff --git a/base/show.jl b/base/show.jl index 8769a414a269e..e05a5874e867f 100644 --- a/base/show.jl +++ b/base/show.jl @@ -1878,8 +1878,12 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In # . print(io, '.') # item - parens = !(field isa Symbol) || (field::Symbol in quoted_syms) - quoted = parens || isoperator(field) + if isa(field, Symbol) + parens = field in quoted_syms + quoted = parens || isoperator(field) + else + parens = quoted = true + end quoted && print(io, ':') parens && print(io, '(') show_unquoted(io, field, indent, 0, quote_level) @@ -2003,10 +2007,11 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In # binary operator (i.e. "x + y") elseif func_prec > 0 # is a binary operator + func = func::Symbol # operator_precedence returns func_prec == 0 for non-Symbol na = length(func_args) - if (na == 2 || (na > 2 && isa(func, Symbol) && func in (:+, :++, :*)) || (na == 3 && func === :(:))) && + if (na == 2 || (na > 2 && func in (:+, :++, :*)) || (na == 3 && func === :(:))) && all(a -> !isa(a, Expr) || a.head !== :..., func_args) - sep = func === :(:) ? "$func" : " " * convert(String, string(func))::String * " " # if func::Any, avoid string interpolation (invalidation) + sep = func === :(:) ? "$func" : " $func " if func_prec <= prec show_enclosed_list(io, '(', func_args, sep, ')', indent, func_prec, quote_level, true) diff --git a/base/strings/util.jl b/base/strings/util.jl index 7d48fee9b1c52..dabb84ae65639 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -830,7 +830,7 @@ julia> hex2bytes(a) """ function hex2bytes end -hex2bytes(s) = hex2bytes!(Vector{UInt8}(undef, length(s) >> 1), s) +hex2bytes(s) = hex2bytes!(Vector{UInt8}(undef, length(s)::Int >> 1), s) # special case - valid bytes are checked in the generic implementation function hex2bytes!(dest::AbstractArray{UInt8}, s::String) From 86e8ef94730a35ad4b40572baeb3090ac5924ffd Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Thu, 15 Dec 2022 20:25:50 -0500 Subject: [PATCH 45/57] intersect: fix a minor soundness issue with supertypes (#47813) When doing intersection, we might end up with a value in `env` (as the only possible *value* for that parameter) without properly considering that the parameter might be a TypeVar. (cherry picked from commit 26a7dbb8e23e4b61a75b626cae5741ff6fd30ded) --- src/subtype.c | 46 ++++++---------------------------------------- test/docs.jl | 1 + test/subtype.jl | 11 ++++++++++- 3 files changed, 17 insertions(+), 41 deletions(-) diff --git a/src/subtype.c b/src/subtype.c index 1d9d3d875675d..e2b132eedc8e9 100644 --- a/src/subtype.c +++ b/src/subtype.c @@ -2882,48 +2882,14 @@ static void flip_vars(jl_stenv_t *e) // intersection where xd nominally inherits from yd static jl_value_t *intersect_sub_datatype(jl_datatype_t *xd, jl_datatype_t *yd, jl_stenv_t *e, int R, int param) { + // attempt to populate additional constraints into `e` + // if that attempt fails, then return bottom + // otherwise return xd (finish_unionall will later handle propagating those constraints) jl_value_t *isuper = R ? intersect((jl_value_t*)yd, (jl_value_t*)xd->super, e, param) : intersect((jl_value_t*)xd->super, (jl_value_t*)yd, e, param); - if (isuper == jl_bottom_type) return jl_bottom_type; - if (jl_nparams(xd) == 0 || jl_nparams(xd->super) == 0 || !jl_has_free_typevars((jl_value_t*)xd)) - return (jl_value_t*)xd; - jl_value_t *super_pattern=NULL; - JL_GC_PUSH2(&isuper, &super_pattern); - jl_value_t *wrapper = xd->name->wrapper; - super_pattern = jl_rewrap_unionall_((jl_value_t*)((jl_datatype_t*)jl_unwrap_unionall(wrapper))->super, - wrapper); - int envsz = jl_subtype_env_size(super_pattern); - jl_value_t *ii = jl_bottom_type; - { - jl_value_t **env; - JL_GC_PUSHARGS(env, envsz); - jl_stenv_t tempe; - init_stenv(&tempe, env, envsz); - tempe.intersection = tempe.ignore_free = 1; - if (subtype_in_env(isuper, super_pattern, &tempe)) { - jl_value_t *wr = wrapper; - int i; - for(i=0; ivar || - (jl_is_typevar(ei) && lookup(e, (jl_tvar_t*)ei) == NULL)) - env[i] = jl_tparam(xd,i); - wr = ((jl_unionall_t*)wr)->body; - } - JL_TRY { - ii = jl_apply_type(wrapper, env, envsz); - } - JL_CATCH { - ii = jl_bottom_type; - } - } - JL_GC_POP(); - } - JL_GC_POP(); - return ii; + if (isuper == jl_bottom_type) + return jl_bottom_type; + return (jl_value_t*)xd; } static jl_value_t *intersect_invariant(jl_value_t *x, jl_value_t *y, jl_stenv_t *e) diff --git a/test/docs.jl b/test/docs.jl index 4399722e864c1..6707278c53847 100644 --- a/test/docs.jl +++ b/test/docs.jl @@ -970,6 +970,7 @@ abstract type $(curmod_prefix)Undocumented.at1{T>:Integer, N} ``` $(curmod_prefix)Undocumented.mt6{Integer, N} +$(curmod_prefix)Undocumented.st5{T>:Integer, N} ``` # Supertype Hierarchy diff --git a/test/subtype.jl b/test/subtype.jl index 70f3dd864cdbe..59e5b82fdc8c0 100644 --- a/test/subtype.jl +++ b/test/subtype.jl @@ -2188,7 +2188,16 @@ for T in (B46871{Int, N} where {N}, B46871{Int}) # intentional duplication end abstract type C38497{e,g<:Tuple,i} end struct Q38497{o,e<:NTuple{o},g} <: C38497{e,g,Array{o}} end -@testintersect(Q38497{<:Any, Tuple{Int}}, C38497, Q38497{1, Tuple{Int}, <:Tuple}) +@testintersect(Q38497{<:Any, Tuple{Int}}, C38497, Q38497{<:Any, Tuple{Int}, <:Tuple}) +# n.b. the only concrete instance of this type is Q38497{1, Tuple{Int}, <:Tuple} (since NTuple{o} also adds an ::Int constraint) +# but this abstract type is also part of the intersection abstractly + +abstract type X38497{T<:Number} end +abstract type Y38497{T>:Integer} <: X38497{T} end +struct Z38497{T>:Int} <: Y38497{T} end +@testintersect(Z38497, X38497, Z38497{T} where Int<:T<:Number) +@testintersect(Z38497, Y38497, Z38497{T} where T>:Integer) +@testintersect(X38497, Y38497, Y38497{T} where Integer<:T<:Number) #issue #33138 @test Vector{Vector{Tuple{T,T}} where Int<:T<:Int} <: Vector{Vector{Tuple{S1,S1} where S<:S1<:S}} where S From 8432d4f376bb7e67f3eec546faf517bf2a4b60f0 Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Thu, 15 Dec 2022 20:26:25 -0500 Subject: [PATCH 46/57] make Ctrl-C during sleeping work better (#47901) fixes #46635 co-authored-by: Jameson Nash (cherry picked from commit b6f32bc023ae285a9ed0e7b405b0fb86da0f2f21) --- src/partr.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/partr.c b/src/partr.c index ec6bbe3e5720a..f5f63f54e7d25 100644 --- a/src/partr.c +++ b/src/partr.c @@ -368,14 +368,14 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q, JL_UV_LOCK(); // jl_mutex_lock(&jl_uv_mutex); } if (uvlock) { - int active = 1; - // otherwise, we block until someone asks us for the lock - uv_loop_t *loop = jl_global_event_loop(); - while (active && may_sleep(ptls)) { - if (jl_atomic_load_relaxed(&jl_uv_n_waiters) != 0) - // but if we won the race against someone who actually needs - // the lock to do real work, we need to let them have it instead - break; + int enter_eventloop = may_sleep(ptls); + int active = 0; + if (jl_atomic_load_relaxed(&jl_uv_n_waiters) != 0) + // if we won the race against someone who actually needs + // the lock to do real work, we need to let them have it instead + enter_eventloop = 0; + if (enter_eventloop) { + uv_loop_t *loop = jl_global_event_loop(); loop->stop_flag = 0; JULIA_DEBUG_SLEEPWAKE( ptls->uv_run_enter = cycleclock() ); active = uv_run(loop, UV_RUN_ONCE); @@ -388,11 +388,11 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q, // that just wanted to steal libuv from us. We will just go // right back to sleep on the individual wake signal to let // them take it from us without conflict. - if (!may_sleep(ptls)) { + if (active || !may_sleep(ptls)) { start_cycles = 0; continue; } - if (!jl_atomic_load_relaxed(&_threadedregion) && active && ptls->tid == 0) { + if (!enter_eventloop && !jl_atomic_load_relaxed(&_threadedregion) && ptls->tid == 0) { // thread 0 is the only thread permitted to run the event loop // so it needs to stay alive, just spin-looping if necessary if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping) { From eba98e56ba568b0871490baf7988898b40458bb9 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Mon, 19 Dec 2022 12:53:55 +0100 Subject: [PATCH 47/57] revert promotions of abstract arrays inside other arrays (#47893) (cherry picked from commit 427432e5c6ea90aa2f4616a380b4f4322ff30bbe) --- base/range.jl | 5 ----- stdlib/LinearAlgebra/src/adjtrans.jl | 4 ++-- test/bitarray.jl | 4 ++-- test/broadcast.jl | 8 ++++---- 4 files changed, 8 insertions(+), 13 deletions(-) diff --git a/base/range.jl b/base/range.jl index 9986fa6a21def..9d12ae1001784 100644 --- a/base/range.jl +++ b/base/range.jl @@ -1277,11 +1277,6 @@ el_same(::Type{T}, a::Type{<:AbstractArray{T,n}}, b::Type{<:AbstractArray{S,n}}) el_same(::Type{T}, a::Type{<:AbstractArray{S,n}}, b::Type{<:AbstractArray{T,n}}) where {T,S,n} = b el_same(::Type, a, b) = promote_typejoin(a, b) -promote_result(::Type{<:AbstractArray}, ::Type{<:AbstractArray}, ::Type{T}, ::Type{S}) where {T,S} = (@inline; promote_type(T,S)) -promote_result(::Type{T}, ::Type{S}, ::Type{Bottom}, ::Type{Bottom}) where {T<:AbstractArray,S<:AbstractArray} = (@inline; promote_typejoin(T,S)) -# If no promote_rule is defined, both directions give Bottom. In that case use typejoin on the eltypes instead and give Array as the container. -promote_result(::Type{<:AbstractArray{T,n}}, ::Type{<:AbstractArray{S,n}}, ::Type{Bottom}, ::Type{Bottom}) where {T,S,n} = (@inline; Array{promote_type(T,S),n}) - promote_rule(a::Type{UnitRange{T1}}, b::Type{UnitRange{T2}}) where {T1,T2} = el_same(promote_type(T1, T2), a, b) UnitRange{T}(r::UnitRange{T}) where {T<:Real} = r diff --git a/stdlib/LinearAlgebra/src/adjtrans.jl b/stdlib/LinearAlgebra/src/adjtrans.jl index ef815b3ad708b..3642e49778d04 100644 --- a/stdlib/LinearAlgebra/src/adjtrans.jl +++ b/stdlib/LinearAlgebra/src/adjtrans.jl @@ -237,8 +237,8 @@ julia> transpose(v) * v # compute the dot product For a matrix of matrices, the individual blocks are recursively operated on: ```jldoctest -julia> C = reshape(1:4, 2, 2) -2×2 reshape(::UnitRange{Int64}, 2, 2) with eltype Int64: +julia> C = [1 3; 2 4] +2×2 Matrix{Int64}: 1 3 2 4 diff --git a/test/bitarray.jl b/test/bitarray.jl index 05abd610682a2..dd1d0d7d6c5a4 100644 --- a/test/bitarray.jl +++ b/test/bitarray.jl @@ -98,9 +98,9 @@ end timesofar("conversions") @testset "Promotions for size $sz" for (sz, T) in allsizes - @test isequal(promote(falses(sz...), zeros(sz...)), + @test_broken isequal(promote(falses(sz...), zeros(sz...)), (zeros(sz...), zeros(sz...))) - @test isequal(promote(trues(sz...), ones(sz...)), + @test_broken isequal(promote(trues(sz...), ones(sz...)), (ones(sz...), ones(sz...))) ae = falses(1, sz...) ex = (@test_throws ErrorException promote(ae, ones(sz...))).value diff --git a/test/broadcast.jl b/test/broadcast.jl index bd9cb9e8e8fa3..1893acc8c1149 100644 --- a/test/broadcast.jl +++ b/test/broadcast.jl @@ -699,11 +699,11 @@ end @test_throws Base.CanonicalIndexError A[2] .= 0 @test_throws MethodError A[3] .= 0 A = [[1, 2, 3], 4:5] - @test A isa Vector{Vector{Int}} A[1] .= 0 - A[2] .= 0 - @test A[1] == [0, 0, 0] - @test A[2] == [0, 0] + @test A[1] isa Vector{Int} + @test A[2] isa UnitRange + @test A[1] == [0,0,0] + @test_throws Base.CanonicalIndexError A[2] .= 0 end # Issue #22180 From a16ffd6e7906e38fc9cdf85f262e39275df620ae Mon Sep 17 00:00:00 2001 From: KristofferC Date: Tue, 20 Dec 2022 17:10:40 +0100 Subject: [PATCH 48/57] Revert "Emit safepoints at function entry (#41616)" This reverts commit 1a7a1316a9df94eafef537be2eca6600fb422a13. --- base/reflection.jl | 3 --- src/cgutils.cpp | 1 + src/codegen.cpp | 6 +----- src/julia.h | 4 +--- test/compiler/codegen.jl | 9 +++------ 5 files changed, 6 insertions(+), 17 deletions(-) diff --git a/base/reflection.jl b/base/reflection.jl index 1adc69291934e..a5aaf0ad20d4a 100644 --- a/base/reflection.jl +++ b/base/reflection.jl @@ -1092,7 +1092,6 @@ struct CodegenParams prefer_specsig::Cint gnu_pubnames::Cint debug_info_kind::Cint - safepoint_on_entry::Cint lookup::Ptr{Cvoid} @@ -1101,14 +1100,12 @@ struct CodegenParams function CodegenParams(; track_allocations::Bool=true, code_coverage::Bool=true, prefer_specsig::Bool=false, gnu_pubnames=true, debug_info_kind::Cint = default_debug_info_kind(), - safepoint_on_entry::Bool=true, lookup::Ptr{Cvoid}=cglobal(:jl_rettype_inferred), generic_context = nothing) return new( Cint(track_allocations), Cint(code_coverage), Cint(prefer_specsig), Cint(gnu_pubnames), debug_info_kind, - Cint(safepoint_on_entry), lookup, generic_context) end end diff --git a/src/cgutils.cpp b/src/cgutils.cpp index ba13e1cbe86c9..6027a3b18bd94 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -3896,6 +3896,7 @@ static Value *emit_defer_signal(jl_codectx_t &ctx) return ctx.builder.CreateInBoundsGEP(ctx.types().T_sigatomic, ptls, ArrayRef(offset), "jl_defer_signal"); } + #ifndef JL_NDEBUG static int compare_cgparams(const jl_cgparams_t *a, const jl_cgparams_t *b) { diff --git a/src/codegen.cpp b/src/codegen.cpp index cdc5e00a26281..cdbc833267bb8 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -1209,7 +1209,6 @@ extern "C" { 1, #endif (int) DICompileUnit::DebugEmissionKind::FullDebug, - 1, jl_rettype_inferred, NULL }; } @@ -7464,11 +7463,8 @@ static jl_llvm_functions_t Instruction &prologue_end = ctx.builder.GetInsertBlock()->back(); - // step 11a. Emit the entry safepoint - if (JL_FEAT_TEST(ctx, safepoint_on_entry)) - emit_gc_safepoint(ctx.builder, get_current_ptls(ctx), ctx.tbaa().tbaa_const); - // step 11b. Do codegen in control flow order + // step 11. Do codegen in control flow order std::vector workstack; std::map BB; std::map come_from_bb; diff --git a/src/julia.h b/src/julia.h index 48352364d7a13..fa6b8d32d10c0 100644 --- a/src/julia.h +++ b/src/julia.h @@ -2229,11 +2229,9 @@ typedef struct { // controls the emission of debug-info. mirrors the clang options int gnu_pubnames; // can we emit the gnu pubnames debuginfo - int debug_info_kind; // Enum for line-table-only, line-directives-only, + int debug_info_kind; // Enum for line-table-only, line-directives-only, // limited, standalone - int safepoint_on_entry; // Emit a safepoint on entry to each function - // Cache access. Default: jl_rettype_inferred. jl_codeinstance_lookup_t lookup; diff --git a/test/compiler/codegen.jl b/test/compiler/codegen.jl index 11cbd21b793a1..0b9cc30257cc7 100644 --- a/test/compiler/codegen.jl +++ b/test/compiler/codegen.jl @@ -15,12 +15,9 @@ function libjulia_codegen_name() is_debug_build ? "libjulia-codegen-debug" : "libjulia-codegen" end -# The tests below assume a certain format and safepoint_on_entry=true breaks that. -function get_llvm(@nospecialize(f), @nospecialize(t), raw=true, dump_module=false, optimize=true) - params = Base.CodegenParams(safepoint_on_entry=false) - d = InteractiveUtils._dump_function(f, t, false, false, !raw, dump_module, :att, optimize, :none, false, params) - sprint(print, d) -end +# `_dump_function` might be more efficient but it doesn't really matter here... +get_llvm(@nospecialize(f), @nospecialize(t), raw=true, dump_module=false, optimize=true) = + sprint(code_llvm, f, t, raw, dump_module, optimize) if !is_debug_build && opt_level > 0 # Make sure getptls call is removed at IR level with optimization on From 5a684f0852a93f62d1b05017662bfa8387c77f54 Mon Sep 17 00:00:00 2001 From: KristofferC Date: Tue, 20 Dec 2022 17:21:40 +0100 Subject: [PATCH 49/57] Revert "improve performance issue of `@nospecialize`-d keyword func call (#47059)" This reverts commit 95cfd62d0953395b9b9f37399a9e761cb44cee6e. --- base/boot.jl | 3 +- base/compiler/abstractinterpretation.jl | 12 +++---- base/compiler/ssair/passes.jl | 12 +------ base/compiler/tfuncs.jl | 20 ++---------- base/namedtuple.jl | 11 ++----- test/compiler/inference.jl | 12 ------- test/compiler/inline.jl | 42 ------------------------- test/compiler/irutils.jl | 1 - 8 files changed, 14 insertions(+), 99 deletions(-) diff --git a/base/boot.jl b/base/boot.jl index 3cd03b5398223..35e3e0399cc69 100644 --- a/base/boot.jl +++ b/base/boot.jl @@ -620,8 +620,7 @@ end NamedTuple() = NamedTuple{(),Tuple{}}(()) -eval(Core, :(NamedTuple{names}(args::Tuple) where {names} = - $(Expr(:splatnew, :(NamedTuple{names,typeof(args)}), :args)))) +NamedTuple{names}(args::Tuple) where {names} = NamedTuple{names,typeof(args)}(args) using .Intrinsics: sle_int, add_int diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index 86834c84f7847..d0ff33a96b525 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -2180,16 +2180,16 @@ function abstract_eval_statement_expr(interp::AbstractInterpreter, e::Expr, vtyp elseif ehead === :splatnew t, isexact = instanceof_tfunc(abstract_eval_value(interp, e.args[1], vtypes, sv)) nothrow = false # TODO: More precision - if length(e.args) == 2 && isconcretedispatch(t) && !ismutabletype(t) + if length(e.args) == 2 && isconcretetype(t) && !ismutabletype(t) at = abstract_eval_value(interp, e.args[2], vtypes, sv) n = fieldcount(t) if isa(at, Const) && isa(at.val, Tuple) && n == length(at.val::Tuple) && - let t = t, at = at; all(i::Int->getfield(at.val::Tuple, i) isa fieldtype(t, i), 1:n); end - nothrow = isexact + let t = t, at = at; _all(i->getfield(at.val::Tuple, i) isa fieldtype(t, i), 1:n); end + nothrow = isexact && isconcretedispatch(t) t = Const(ccall(:jl_new_structt, Any, (Any, Any), t, at.val)) - elseif isa(at, PartialStruct) && at ⊑ᵢ Tuple && n == length(at.fields::Vector{Any}) && - let t = t, at = at; all(i::Int->(at.fields::Vector{Any})[i] ⊑ᵢ fieldtype(t, i), 1:n); end - nothrow = isexact + elseif isa(at, PartialStruct) && at ⊑ Tuple && n == length(at.fields::Vector{Any}) && + let t = t, at = at; _all(i->(at.fields::Vector{Any})[i] ⊑ fieldtype(t, i), 1:n); end + nothrow = isexact && isconcretedispatch(t) t = PartialStruct(t, at.fields::Vector{Any}) end end diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index 9f37105259f4e..a511e898f6b32 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -401,16 +401,6 @@ function lift_leaves(compact::IncrementalCompact, end lift_arg!(compact, leaf, cache_key, def, 1+field, lifted_leaves) continue - # NOTE we can enable this, but most `:splatnew` expressions are transformed into - # `:new` expressions by the inlinear - # elseif isexpr(def, :splatnew) && length(def.args) == 2 && isa(def.args[2], AnySSAValue) - # tplssa = def.args[2]::AnySSAValue - # tplexpr = compact[tplssa][:inst] - # if is_known_call(tplexpr, tuple, compact) && 1 ≤ field < length(tplexpr.args) - # lift_arg!(compact, tplssa, cache_key, tplexpr, 1+field, lifted_leaves) - # continue - # end - # return nothing elseif is_getfield_captures(def, compact) # Walk to new_opaque_closure ocleaf = def.args[2] @@ -479,7 +469,7 @@ function lift_arg!( end end lifted_leaves[cache_key] = LiftedValue(lifted) - return nothing + nothing end function walk_to_def(compact::IncrementalCompact, @nospecialize(leaf)) diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl index 8faeb5db53794..9cfcbcce80f56 100644 --- a/base/compiler/tfuncs.jl +++ b/base/compiler/tfuncs.jl @@ -407,19 +407,11 @@ add_tfunc(Core.sizeof, 1, 1, sizeof_tfunc, 1) function nfields_tfunc(@nospecialize(x)) isa(x, Const) && return Const(nfields(x.val)) isa(x, Conditional) && return Const(0) - xt = widenconst(x) - x = unwrap_unionall(xt) + x = unwrap_unionall(widenconst(x)) isconstType(x) && return Const(nfields(x.parameters[1])) if isa(x, DataType) && !isabstracttype(x) - if x.name === Tuple.name - isvatuple(x) && return Int - return Const(length(x.types)) - elseif x.name === _NAMEDTUPLE_NAME - length(x.parameters) == 2 || return Int - names = x.parameters[1] - isa(names, Tuple{Vararg{Symbol}}) || return nfields_tfunc(rewrap_unionall(x.parameters[2], xt)) - return Const(length(names)) - else + if !(x.name === Tuple.name && isvatuple(x)) && + !(x.name === _NAMEDTUPLE_NAME && !isconcretetype(x)) return Const(isdefined(x, :types) ? length(x.types) : length(x.name.names)) end end @@ -1660,12 +1652,6 @@ function apply_type_tfunc(@nospecialize(headtypetype), @nospecialize args...) end if istuple return Type{<:appl} - elseif isa(appl, DataType) && appl.name === _NAMEDTUPLE_NAME && length(appl.parameters) == 2 && - (appl.parameters[1] === () || appl.parameters[2] === Tuple{}) - # if the first/second parameter of `NamedTuple` is known to be empty, - # the second/first argument should also be empty tuple type, - # so refine it here - return Const(NamedTuple{(),Tuple{}}) end ans = Type{appl} for i = length(outervars):-1:1 diff --git a/base/namedtuple.jl b/base/namedtuple.jl index c994cd977be08..1612a06518841 100644 --- a/base/namedtuple.jl +++ b/base/namedtuple.jl @@ -335,7 +335,7 @@ reverse(nt::NamedTuple) = NamedTuple{reverse(keys(nt))}(reverse(values(nt))) end """ - structdiff(a::NamedTuple, b::Union{NamedTuple,Type{NamedTuple}}) + structdiff(a::NamedTuple{an}, b::Union{NamedTuple{bn},Type{NamedTuple{bn}}}) where {an,bn} Construct a copy of named tuple `a`, except with fields that exist in `b` removed. `b` can be a named tuple, or a type of the form `NamedTuple{field_names}`. @@ -343,19 +343,14 @@ Construct a copy of named tuple `a`, except with fields that exist in `b` remove function structdiff(a::NamedTuple{an}, b::Union{NamedTuple{bn}, Type{NamedTuple{bn}}}) where {an, bn} if @generated names = diff_names(an, bn) - isempty(names) && return (;) # just a fast pass idx = Int[ fieldindex(a, names[n]) for n in 1:length(names) ] types = Tuple{Any[ fieldtype(a, idx[n]) for n in 1:length(idx) ]...} vals = Any[ :(getfield(a, $(idx[n]))) for n in 1:length(idx) ] - return :( NamedTuple{$names,$types}(($(vals...),)) ) + :( NamedTuple{$names,$types}(($(vals...),)) ) else names = diff_names(an, bn) - # N.B this early return is necessary to get a better type stability, - # and also allows us to cut off the cost from constructing - # potentially type unstable closure passed to the `map` below - isempty(names) && return (;) types = Tuple{Any[ fieldtype(typeof(a), names[n]) for n in 1:length(names) ]...} - return NamedTuple{names,types}(map(n::Symbol->getfield(a, n), names)) + NamedTuple{names,types}(map(Fix1(getfield, a), names)) end end diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl index d47c8da4f9872..3e10d75bb2fe7 100644 --- a/test/compiler/inference.jl +++ b/test/compiler/inference.jl @@ -1540,11 +1540,6 @@ end @test nfields_tfunc(Tuple{Int, Vararg{Int}}) === Int @test nfields_tfunc(Tuple{Int, Integer}) === Const(2) @test nfields_tfunc(Union{Tuple{Int, Float64}, Tuple{Int, Int}}) === Const(2) -@test nfields_tfunc(@NamedTuple{a::Int,b::Integer}) === Const(2) -@test nfields_tfunc(NamedTuple{(:a,:b),T} where T<:Tuple{Int,Integer}) === Const(2) -@test nfields_tfunc(NamedTuple{(:a,:b)}) === Const(2) -@test nfields_tfunc(NamedTuple{names,Tuple{Any,Any}} where names) === Const(2) -@test nfields_tfunc(Union{NamedTuple{(:a,:b)},NamedTuple{(:c,:d)}}) === Const(2) using Core.Compiler: typeof_tfunc @test typeof_tfunc(Tuple{Vararg{Int}}) == Type{Tuple{Vararg{Int,N}}} where N @@ -2369,13 +2364,6 @@ end |> only === Int # Equivalence of Const(T.instance) and T for singleton types @test Const(nothing) ⊑ Nothing && Nothing ⊑ Const(nothing) -# `apply_type_tfunc` should always return accurate result for empty NamedTuple case -import Core: Const -import Core.Compiler: apply_type_tfunc -@test apply_type_tfunc(Const(NamedTuple), Const(()), Type{T} where T<:Tuple{}) === Const(typeof((;))) -@test apply_type_tfunc(Const(NamedTuple), Const(()), Type{T} where T<:Tuple) === Const(typeof((;))) -@test apply_type_tfunc(Const(NamedTuple), Tuple{Vararg{Symbol}}, Type{Tuple{}}) === Const(typeof((;))) - # Don't pessimize apply_type to anything worse than Type and yield Bottom for invalid Unions @test only(Base.return_types(Core.apply_type, Tuple{Type{Union}})) == Type{Union{}} @test only(Base.return_types(Core.apply_type, Tuple{Type{Union},Any})) == Type diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl index 1119c6d01b8e9..2eacda4b02581 100644 --- a/test/compiler/inline.jl +++ b/test/compiler/inline.jl @@ -1749,48 +1749,6 @@ f_ifelse_3(a, b) = Core.ifelse(a, true, b) @test fully_eliminated(f_ifelse_2, Tuple{Any, Any}; retval=Core.Argument(3)) @test !fully_eliminated(f_ifelse_3, Tuple{Any, Any}) -# inline_splatnew for abstract `NamedTuple` -@eval construct_splatnew(T, fields) = $(Expr(:splatnew, :T, :fields)) -for tt = Any[(Int,Int), (Integer,Integer), (Any,Any)] - let src = code_typed1(tt) do a, b - construct_splatnew(NamedTuple{(:a,:b),typeof((a,b))}, (a,b)) - end - @test count(issplatnew, src.code) == 0 - @test count(isnew, src.code) == 1 - end -end - -# optimize away `NamedTuple`s used for handling `@nospecialize`d keyword-argument -# https://github.com/JuliaLang/julia/pull/47059 -abstract type CallInfo end -struct NewInstruction - stmt::Any - type::Any - info::CallInfo - line::Int32 - flag::UInt8 - function NewInstruction(@nospecialize(stmt), @nospecialize(type), @nospecialize(info::CallInfo), - line::Int32, flag::UInt8) - return new(stmt, type, info, line, flag) - end -end -@nospecialize -function NewInstruction(newinst::NewInstruction; - stmt=newinst.stmt, - type=newinst.type, - info::CallInfo=newinst.info, - line::Int32=newinst.line, - flag::UInt8=newinst.flag) - return NewInstruction(stmt, type, info, line, flag) -end -@specialize -let src = code_typed1((NewInstruction,Any,Any,CallInfo)) do newinst, stmt, type, info - NewInstruction(newinst; stmt, type, info) - end - @test count(issplatnew, src.code) == 0 - @test count(iscall((src,NamedTuple)), src.code) == 0 - @test count(isnew, src.code) == 1 -end # Test that inlining can still use nothrow information from concrete-eval # even if the result itself is too big to be inlined, and nothrow is not diff --git a/test/compiler/irutils.jl b/test/compiler/irutils.jl index ef8fe3efbb315..76f883d6cea2c 100644 --- a/test/compiler/irutils.jl +++ b/test/compiler/irutils.jl @@ -8,7 +8,6 @@ get_code(args...; kwargs...) = code_typed1(args...; kwargs...).code # check if `x` is a statement with a given `head` isnew(@nospecialize x) = isexpr(x, :new) -issplatnew(@nospecialize x) = isexpr(x, :splatnew) isreturn(@nospecialize x) = isa(x, ReturnNode) # check if `x` is a dynamic call of a given function From 327e0815d6dc7efef0cfaf82c2d2e9c01d55e304 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Mon, 19 Dec 2022 22:17:13 +0100 Subject: [PATCH 50/57] only load extensions once dependencies have finished loading (#47927) (cherry picked from commit 9be3c85e49f51fa558a3e6522ed79fe32ff2617b) --- base/loading.jl | 2 +- test/loading.jl | 4 ++-- test/project/Extensions/ExtDep.jl/Project.toml | 3 +++ test/project/Extensions/ExtDep.jl/src/ExtDep.jl | 4 ++++ .../Extensions/HasDepWithExtensions.jl/Manifest.toml | 10 ++++++++-- test/project/Extensions/SomePackage/Project.toml | 4 ++++ test/project/Extensions/SomePackage/src/SomePackage.jl | 5 +++++ 7 files changed, 27 insertions(+), 5 deletions(-) create mode 100644 test/project/Extensions/SomePackage/Project.toml create mode 100644 test/project/Extensions/SomePackage/src/SomePackage.jl diff --git a/base/loading.jl b/base/loading.jl index 7c71167a8c176..61c1f13a3eef3 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -1102,7 +1102,7 @@ function run_extension_callbacks(; force::Bool=false) for extid in EXT_DORMITORY extid.succeeded && continue !force && extid.triggered && continue - if all(x -> haskey(Base.loaded_modules, x), extid.triggers) + if all(x -> haskey(Base.loaded_modules, x) && !haskey(package_locks, x), extid.triggers) ext_not_allowed_load = nothing extid.triggered = true # It is possible that some of the triggers were loaded in an environment diff --git a/test/loading.jl b/test/loading.jl index 99f39ae237532..d52a7246abe7c 100644 --- a/test/loading.jl +++ b/test/loading.jl @@ -998,8 +998,8 @@ end push!(empty!(DEPOT_PATH), joinpath(tmp, "depot")) proj = joinpath(@__DIR__, "project", "Extensions", "HasDepWithExtensions.jl") - for i in 1:2 # Once when requiring precomilation, once where it is already precompiled - cmd = `$(Base.julia_cmd()) --project=$proj --startup-file=no -e ' + for compile in (`--compiled-modules=no`, ``, ``) # Once when requiring precomilation, once where it is already precompiled + cmd = `$(Base.julia_cmd()) $compile --project=$proj --startup-file=no -e ' begin using HasExtensions # Base.get_extension(HasExtensions, :Extension) === nothing || error("unexpectedly got an extension") diff --git a/test/project/Extensions/ExtDep.jl/Project.toml b/test/project/Extensions/ExtDep.jl/Project.toml index 93c5e3925f06b..d246934b7f958 100644 --- a/test/project/Extensions/ExtDep.jl/Project.toml +++ b/test/project/Extensions/ExtDep.jl/Project.toml @@ -1,3 +1,6 @@ name = "ExtDep" uuid = "fa069be4-f60b-4d4c-8b95-f8008775090c" version = "0.1.0" + +[deps] +SomePackage = "678608ae-7bb3-42c7-98b1-82102067a3d8" diff --git a/test/project/Extensions/ExtDep.jl/src/ExtDep.jl b/test/project/Extensions/ExtDep.jl/src/ExtDep.jl index f0ca8c62d04b2..1c0022d879f51 100644 --- a/test/project/Extensions/ExtDep.jl/src/ExtDep.jl +++ b/test/project/Extensions/ExtDep.jl/src/ExtDep.jl @@ -1,5 +1,9 @@ module ExtDep +# loading this package makes the check for loading extensions trigger +# which tests #47921 +using SomePackage + struct ExtDepStruct end end # module ExtDep diff --git a/test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml b/test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml index c96e3ef508ca8..52542fc822094 100644 --- a/test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml +++ b/test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml @@ -2,9 +2,10 @@ julia_version = "1.10.0-DEV" manifest_format = "2.0" -project_hash = "7cbe1857ecc6692a8cc8be428a5ad5073531ff98" +project_hash = "d523b3401f72a1ed34b7b43749fd2655c6b78542" [[deps.ExtDep]] +deps = ["SomePackage"] path = "../ExtDep.jl" uuid = "fa069be4-f60b-4d4c-8b95-f8008775090c" version = "0.1.0" @@ -15,11 +16,16 @@ uuid = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d" version = "0.1.0" [[deps.HasExtensions]] -weakdeps = ["ExtDep", "ExtDep2"] path = "../HasExtensions.jl" uuid = "4d3288b3-3afc-4bb6-85f3-489fffe514c8" version = "0.1.0" +weakdeps = ["ExtDep", "ExtDep2"] [deps.HasExtensions.extensions] Extension = "ExtDep" ExtensionFolder = ["ExtDep", "ExtDep2"] + +[[deps.SomePackage]] +path = "../SomePackage" +uuid = "678608ae-7bb3-42c7-98b1-82102067a3d8" +version = "0.1.0" diff --git a/test/project/Extensions/SomePackage/Project.toml b/test/project/Extensions/SomePackage/Project.toml new file mode 100644 index 0000000000000..b2d43340b39a8 --- /dev/null +++ b/test/project/Extensions/SomePackage/Project.toml @@ -0,0 +1,4 @@ +name = "SomePackage" +uuid = "678608ae-7bb3-42c7-98b1-82102067a3d8" +authors = ["Kristoffer "] +version = "0.1.0" diff --git a/test/project/Extensions/SomePackage/src/SomePackage.jl b/test/project/Extensions/SomePackage/src/SomePackage.jl new file mode 100644 index 0000000000000..a41e0b7482bae --- /dev/null +++ b/test/project/Extensions/SomePackage/src/SomePackage.jl @@ -0,0 +1,5 @@ +module SomePackage + +greet() = print("Hello World!") + +end # module SomePackage From 95cb3a82eaa767c6a4956b0834f59c5fd5e6e2c0 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Tue, 20 Dec 2022 08:30:20 -0600 Subject: [PATCH 51/57] Precompile cache: always add worklist CIs (#47924) We cache only those external CodeInstances that link back to the package being precompiled. Formerly we required a backedge; this PRs adds any whose `specTypes` could only link back to the package. This scoops up a few runtime-dispatched CodeInstances and their callees. (cherry picked from commit 1f0700a29a4e0250c5c31cbc02e624009d1ed741) --- src/staticdata_utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c index 3d02dddbd5a70..19ccc591ea2cd 100644 --- a/src/staticdata_utils.c +++ b/src/staticdata_utils.c @@ -188,7 +188,7 @@ static int has_backedge_to_worklist(jl_method_instance_t *mi, htable_t *visited, if (jl_is_method(mod)) mod = ((jl_method_t*)mod)->module; assert(jl_is_module(mod)); - if (mi->precompiled || !jl_object_in_image((jl_value_t*)mod)) { + if (mi->precompiled || !jl_object_in_image((jl_value_t*)mod) || type_in_worklist(mi->specTypes)) { return 1; } if (!mi->backedges) { From f17d1df54e09fa17b598e86e2b4cff78cdd8a089 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Wed, 21 Dec 2022 01:07:55 +0900 Subject: [PATCH 52/57] put back the old QuickSort, PartialQuickSort, and MergeSort algorithms... (#47788) ...as they were in 1.8 and rename the new PartialQuickSort to QuickerSort Also improve the documentation and API for constructing QuickerSort and test the API Co-authored-by: Lilith Hafner (cherry picked from commit 8cdb17b48a005a97889f07593c4a619add46ea76) --- base/sort.jl | 241 ++++++++++++++++++++++++++++++++++++------------ test/sorting.jl | 43 ++++++--- 2 files changed, 212 insertions(+), 72 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index 2dd81829312d0..6d9f65c61b390 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -86,7 +86,7 @@ issorted(itr; issorted(itr, ord(lt,by,rev,order)) function partialsort!(v::AbstractVector, k::Union{Integer,OrdinalRange}, o::Ordering) - _sort!(v, _PartialQuickSort(k), o, (;)) + _sort!(v, QuickerSort(k), o, (;)) maybeview(v, k) end @@ -931,49 +931,40 @@ end """ - PartialQuickSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}, next::Algorithm) <: Algorithm + QuickerSort(next::Algorithm=SMALL_ALGORITHM) <: Algorithm + QuickerSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}=lo, next::Algorithm=SMALL_ALGORITHM) <: Algorithm -Indicate that a sorting function should use the partial quick sort algorithm. +Use the `QuickerSort` algorithm with the `next` algorithm as a base case. -Partial quick sort finds and sorts the elements that would end up in positions `lo:hi` using -[`QuickSort`](@ref). It is recursive and uses the `next` algorithm for small chunks +`QuickerSort` is like `QuickSort`, but utilizes scratch space to operate faster and allow +for the possibility of maintaining stability. + +If `lo` and `hi` are provided, finds and sorts the elements in the range `lo:hi`, reordering +but not necessarily sorting other elements in the process. If `lo` or `hi` is `missing`, it +is treated as the first or last index of the input, respectively. + +`lo` and `hi` may be specified together as an `AbstractUnitRange`. Characteristics: * *stable*: preserves the ordering of elements which compare equal (e.g. "a" and "A" in a sort of letters which ignores case). * *not in-place* in memory. - * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref). + * *divide-and-conquer*: sort strategy similar to [`QuickSort`](@ref). + * *linear runtime* if `length(lo:hi)` is constant + * *quadratic worst case runtime* in pathological cases + (vanishingly rare for non-malicious input) """ -struct PartialQuickSort{L<:Union{Integer,Missing}, H<:Union{Integer,Missing}, T<:Algorithm} <: Algorithm +struct QuickerSort{L<:Union{Integer,Missing}, H<:Union{Integer,Missing}, T<:Algorithm} <: Algorithm lo::L hi::H next::T end -PartialQuickSort(k::Integer) = PartialQuickSort(missing, k, SMALL_ALGORITHM) -PartialQuickSort(k::OrdinalRange) = PartialQuickSort(first(k), last(k), SMALL_ALGORITHM) -_PartialQuickSort(k::Integer) = InitialOptimizations(PartialQuickSort(k:k)) -_PartialQuickSort(k::OrdinalRange) = InitialOptimizations(PartialQuickSort(k)) - -""" - QuickSort - -Indicate that a sorting function should use the quick sort algorithm. +QuickerSort(next::Algorithm=SMALL_ALGORITHM) = QuickerSort(missing, missing, next) +QuickerSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}) = QuickerSort(lo, hi, SMALL_ALGORITHM) +QuickerSort(lo::Union{Integer, Missing}, next::Algorithm=SMALL_ALGORITHM) = QuickerSort(lo, lo, next) +QuickerSort(r::OrdinalRange, next::Algorithm=SMALL_ALGORITHM) = QuickerSort(first(r), last(r), next) -Quick sort picks a pivot element, partitions the array based on the pivot, -and then sorts the elements before and after the pivot recursively. - -Characteristics: - * *stable*: preserves the ordering of elements which compare equal - (e.g. "a" and "A" in a sort of letters which ignores case). - * *not in-place* in memory. - * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref). - * *good performance* for almost all large collections. - * *quadratic worst case runtime* in pathological cases - (vanishingly rare for non-malicious input) -""" -const QuickSort = PartialQuickSort(missing, missing, SMALL_ALGORITHM) - -# select a pivot for QuickSort +# select a pivot for QuickerSort # # This method is redefined to rand(lo:hi) in Random.jl # We can't use rand here because it is not available in Core.Compiler and @@ -1013,7 +1004,7 @@ function partition!(t::AbstractVector, lo::Integer, hi::Integer, offset::Integer pivot, lo-offset end -function _sort!(v::AbstractVector, a::PartialQuickSort, o::Ordering, kw; +function _sort!(v::AbstractVector, a::QuickerSort, o::Ordering, kw; t=nothing, offset=nothing, swap=false, rev=false) @getkw lo hi scratch @@ -1029,7 +1020,7 @@ function _sort!(v::AbstractVector, a::PartialQuickSort, o::Ordering, kw; @inbounds v[j] = pivot swap = !swap - # For QuickSort, a.lo === a.hi === missing, so the first two branches get skipped + # For QuickerSort(), a.lo === a.hi === missing, so the first two branches get skipped if !ismissing(a.lo) && j <= a.lo # Skip sorting the lower part swap && copyto!(v, lo, t, lo+offset, j-lo) rev && reverse!(v, lo, j-1) @@ -1225,7 +1216,7 @@ the initial optimizations because they can change the input vector's type and or make them `UIntMappable`. If the input is not [`UIntMappable`](@ref), then we perform a presorted check and dispatch -to [`QuickSort`](@ref). +to [`QuickerSort`](@ref). Otherwise, we dispatch to [`InsertionSort`](@ref) for inputs with `length <= 40` and then perform a presorted check ([`CheckSorted`](@ref)). @@ -1257,7 +1248,7 @@ Consequently, we apply [`RadixSort`](@ref) for any reasonably long inputs that r stage. Finally, if the input has length less than 80, we dispatch to [`InsertionSort`](@ref) and -otherwise we dispatch to [`QuickSort`](@ref). +otherwise we dispatch to [`QuickerSort`](@ref). """ const DEFAULT_STABLE = InitialOptimizations( IsUIntMappable( @@ -1267,9 +1258,9 @@ const DEFAULT_STABLE = InitialOptimizations( ConsiderCountingSort( ConsiderRadixSort( Small{80}( - QuickSort)))))), + QuickerSort())))))), StableCheckSorted( - QuickSort))) + QuickerSort()))) """ DEFAULT_UNSTABLE @@ -1483,7 +1474,7 @@ function partialsortperm!(ix::AbstractVector{<:Integer}, v::AbstractVector, end # do partial quicksort - _sort!(ix, _PartialQuickSort(k), Perm(ord(lt, by, rev, order), v), (;)) + _sort!(ix, QuickerSort(k), Perm(ord(lt, by, rev, order), v), (;)) maybeview(ix, k) end @@ -1863,18 +1854,53 @@ end ### Unused constructs for backward compatibility ### -struct MergeSortAlg{T <: Algorithm} <: Algorithm - next::T +## Old algorithms ## + +struct QuickSortAlg <: Algorithm end +struct MergeSortAlg <: Algorithm end + +""" + PartialQuickSort{T <: Union{Integer,OrdinalRange}} + +Indicate that a sorting function should use the partial quick sort +algorithm. Partial quick sort returns the smallest `k` elements sorted from smallest +to largest, finding them and sorting them using [`QuickSort`](@ref). + +Characteristics: + * *not stable*: does not preserve the ordering of elements which + compare equal (e.g. "a" and "A" in a sort of letters which + ignores case). + * *in-place* in memory. + * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref). +""" +struct PartialQuickSort{T <: Union{Integer,OrdinalRange}} <: Algorithm + k::T end """ - MergeSort + QuickSort -Indicate that a sorting function should use the merge sort algorithm. +Indicate that a sorting function should use the quick sort +algorithm, which is *not* stable. -Merge sort divides the collection into subcollections and -repeatedly merges them, sorting each subcollection at each step, -until the entire collection has been recombined in sorted form. +Characteristics: + * *not stable*: does not preserve the ordering of elements which + compare equal (e.g. "a" and "A" in a sort of letters which + ignores case). + * *in-place* in memory. + * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref). + * *good performance* for large collections. +""" +const QuickSort = QuickSortAlg() + +""" + MergeSort + +Indicate that a sorting function should use the merge sort +algorithm. Merge sort divides the collection into +subcollections and repeatedly merges them, sorting each +subcollection at each step, until the entire +collection has been recombined in sorted form. Characteristics: * *stable*: preserves the ordering of elements which compare @@ -1883,21 +1909,94 @@ Characteristics: * *not in-place* in memory. * *divide-and-conquer* sort strategy. """ -const MergeSort = MergeSortAlg(SMALL_ALGORITHM) +const MergeSort = MergeSortAlg() -function _sort!(v::AbstractVector, a::MergeSortAlg, o::Ordering, kw; t=nothing, offset=nothing) - @getkw lo hi scratch +# selectpivot! +# +# Given 3 locations in an array (lo, mi, and hi), sort v[lo], v[mi], v[hi]) and +# choose the middle value as a pivot +# +# Upon return, the pivot is in v[lo], and v[hi] is guaranteed to be +# greater than the pivot + +@inline function selectpivot!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering) + @inbounds begin + mi = midpoint(lo, hi) + + # sort v[mi] <= v[lo] <= v[hi] such that the pivot is immediately in place + if lt(o, v[lo], v[mi]) + v[mi], v[lo] = v[lo], v[mi] + end + + if lt(o, v[hi], v[lo]) + if lt(o, v[hi], v[mi]) + v[hi], v[lo], v[mi] = v[lo], v[mi], v[hi] + else + v[hi], v[lo] = v[lo], v[hi] + end + end + + # return the pivot + return v[lo] + end +end + +# partition! +# +# select a pivot, and partition v according to the pivot + +function partition!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering) + pivot = selectpivot!(v, lo, hi, o) + # pivot == v[lo], v[hi] > pivot + i, j = lo, hi + @inbounds while true + i += 1; j -= 1 + while lt(o, v[i], pivot); i += 1; end; + while lt(o, pivot, v[j]); j -= 1; end; + i >= j && break + v[i], v[j] = v[j], v[i] + end + v[j], v[lo] = pivot, v[j] + + # v[j] == pivot + # v[k] >= pivot for k > j + # v[i] <= pivot for i < j + return j +end + +function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::QuickSortAlg, o::Ordering) + @inbounds while lo < hi + hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o) + j = partition!(v, lo, hi, o) + if j-lo < hi-j + # recurse on the smaller chunk + # this is necessary to preserve O(log(n)) + # stack space in the worst case (rather than O(n)) + lo < (j-1) && sort!(v, lo, j-1, a, o) + lo = j+1 + else + j+1 < hi && sort!(v, j+1, hi, a, o) + hi = j-1 + end + end + return v +end + +sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, a::MergeSortAlg, o::Ordering, t0::Vector{T}) where T = + invoke(sort!, Tuple{typeof.((v, lo, hi, a, o))..., AbstractVector{T}}, v, lo, hi, a, o, t0) # For disambiguation +function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, a::MergeSortAlg, o::Ordering, + t0::Union{AbstractVector{T}, Nothing}=nothing) where T @inbounds if lo < hi - hi-lo <= SMALL_THRESHOLD && return _sort!(v, a.next, o, kw) + hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o) m = midpoint(lo, hi) - if t === nothing - scratch, t = make_scratch(scratch, eltype(v), m-lo+1) - end + t = t0 === nothing ? similar(v, m-lo+1) : t0 + length(t) < m-lo+1 && resize!(t, m-lo+1) + Base.require_one_based_indexing(t) - _sort!(v, a, o, (;kw..., hi=m, scratch); t, offset) - _sort!(v, a, o, (;kw..., lo=m+1, scratch); t, offset) + sort!(v, lo, m, a, o, t) + sort!(v, m+1, hi, a, o, t) i, j = 1, lo while j <= m @@ -1924,9 +2023,37 @@ function _sort!(v::AbstractVector, a::MergeSortAlg, o::Ordering, kw; t=nothing, end end - scratch + return v +end + +function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::PartialQuickSort, + o::Ordering) + @inbounds while lo < hi + hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o) + j = partition!(v, lo, hi, o) + + if j <= first(a.k) + lo = j+1 + elseif j >= last(a.k) + hi = j-1 + else + # recurse on the smaller chunk + # this is necessary to preserve O(log(n)) + # stack space in the worst case (rather than O(n)) + if j-lo < hi-j + lo < (j-1) && sort!(v, lo, j-1, a, o) + lo = j+1 + else + hi > (j+1) && sort!(v, j+1, hi, a, o) + hi = j-1 + end + end + end + return v end +## Old extensibility mechanisms ## + # Support 3-, 5-, and 6-argument versions of sort! for calling into the internals in the old way sort!(v::AbstractVector, a::Algorithm, o::Ordering) = sort!(v, firstindex(v), lastindex(v), a, o) function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering) @@ -1952,8 +2079,4 @@ function _sort!(v::AbstractVector, a::Algorithm, o::Ordering, kw) end end -# Keep old internal types so that people can keep dispatching with -# sort!(::AbstractVector, ::Integer, ::Integer, ::Base.QuickSortAlg, ::Ordering) = ... -const QuickSortAlg = typeof(QuickSort) - end # module Sort diff --git a/test/sorting.jl b/test/sorting.jl index 614946a8cc4f6..eb5020547c789 100644 --- a/test/sorting.jl +++ b/test/sorting.jl @@ -79,9 +79,8 @@ end end @testset "stability" begin - for Alg in [InsertionSort, MergeSort, QuickSort, Base.DEFAULT_STABLE, - PartialQuickSort(missing, 1729, Base.Sort.SMALL_ALGORITHM), - PartialQuickSort(1729, missing, Base.Sort.SMALL_ALGORITHM)] + for Alg in [InsertionSort, MergeSort, Base.Sort.QuickerSort(), Base.DEFAULT_STABLE, + Base.Sort.QuickerSort(missing, 1729), Base.Sort.QuickerSort(1729, missing)] @test issorted(sort(1:2000, alg=Alg, by=x->0)) @test issorted(sort(1:2000, alg=Alg, by=x->x÷100)) end @@ -334,7 +333,7 @@ end @test c == v # stable algorithms - for alg in [MergeSort, QuickSort, PartialQuickSort(1:n), Base.DEFAULT_STABLE] + for alg in [MergeSort, Base.Sort.QuickerSort(), Base.Sort.QuickerSort(1:n), Base.DEFAULT_STABLE] p = sortperm(v, alg=alg, rev=rev) p2 = sortperm(float(v), alg=alg, rev=rev) @test p == p2 @@ -382,7 +381,7 @@ end end v = randn_with_nans(n,0.1) - for alg in [InsertionSort, MergeSort, QuickSort, PartialQuickSort(n), Base.DEFAULT_UNSTABLE, Base.DEFAULT_STABLE], + for alg in [InsertionSort, MergeSort, Base.Sort.QuickerSort(), Base.Sort.QuickerSort(1, n), Base.DEFAULT_UNSTABLE, Base.DEFAULT_STABLE], rev in [false,true] alg === InsertionSort && n >= 3000 && continue # test float sorting with NaNs @@ -589,7 +588,7 @@ end @testset "fallback" begin @test adaptive_sort_test(rand(1:typemax(Int32), len), by=x->x^2)# fallback - @test adaptive_sort_test(rand(Int, len), by=x->0, trusted=QuickSort) + @test adaptive_sort_test(rand(Int, len), by=x->0, trusted=Base.Sort.QuickerSort()) end @test adaptive_sort_test(rand(Int, 20)) # InsertionSort @@ -691,15 +690,16 @@ end @testset "invalid lt (#11429)" begin # lt must be a total linear order (e.g. < not <=) so this usage is # not allowed. Consequently, none of the behavior tested in this - # testset is gaurunteed to work in future minor versions of Julia. + # testset is guaranteed to work in future minor versions of Julia. + + safe_algs = [InsertionSort, MergeSort, Base.Sort.QuickerSort(), Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] n = 1000 v = rand(1:5, n); s = sort(v); # Nevertheless, it still works... - for alg in [InsertionSort, MergeSort, QuickSort, - Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] + for alg in safe_algs @test sort(v, alg=alg, lt = <=) == s end @test partialsort(v, 172, lt = <=) == s[172] @@ -709,16 +709,14 @@ end # where i < j if and only if lt(o, v[j], v[i]). This invariant holds even for # this invalid lt order. perm = reverse(sortperm(v, rev=true)) - for alg in [InsertionSort, MergeSort, QuickSort, - Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] + for alg in safe_algs @test sort(1:n, alg=alg, lt = (i,j) -> v[i]<=v[j]) == perm end @test partialsort(1:n, 172, lt = (i,j) -> v[i]<=v[j]) == perm[172] @test partialsort(1:n, 315:415, lt = (i,j) -> v[i]<=v[j]) == perm[315:415] # lt can be very poorly behaved and sort will still permute its input in some way. - for alg in [InsertionSort, MergeSort, QuickSort, - Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] + for alg in safe_algs @test sort!(sort(v, alg=alg, lt = (x,y) -> rand([false, true]))) == s end @test partialsort(v, 172, lt = (x,y) -> rand([false, true])) ∈ 1:5 @@ -901,6 +899,25 @@ end @test issorted(sort(rand(Int8, 600))) end +@testset "QuickerSort API" begin + bsqs = Base.Sort.QuickerSort + @test bsqs(1, 2, MergeSort) === bsqs(1, 2, MergeSort) + @test bsqs(missing, 2, MergeSort) === bsqs(missing, 2, MergeSort) + @test bsqs(1, missing, MergeSort) === bsqs(1, missing, MergeSort) + @test bsqs(missing, missing, MergeSort) === bsqs(missing, missing, MergeSort) + @test bsqs(1, MergeSort) === bsqs(1, 1, MergeSort) + @test bsqs(missing, MergeSort) === bsqs(missing, missing, MergeSort) + @test bsqs(MergeSort) === bsqs(missing, missing, MergeSort) + + @test bsqs(1, 2) === bsqs(1, 2, InsertionSort) + @test bsqs(missing, 2) === bsqs(missing, 2, InsertionSort) + @test bsqs(1, missing) === bsqs(1, missing, InsertionSort) + @test bsqs(missing, missing) === bsqs(missing, missing, InsertionSort) + @test bsqs(1) === bsqs(1, 1, InsertionSort) + @test bsqs(missing) === bsqs(missing, missing, InsertionSort) + @test bsqs() === bsqs(missing, missing, InsertionSort) +end + # This testset is at the end of the file because it is slow. @testset "searchsorted" begin numTypes = [ Int8, Int16, Int32, Int64, Int128, From 3ea7f6c4dfa692a15a3cb9ec2e5427dec5d0556b Mon Sep 17 00:00:00 2001 From: Michael Abbott <32575566+mcabbott@users.noreply.github.com> Date: Tue, 20 Dec 2022 10:09:18 -0500 Subject: [PATCH 53/57] add bounds check to Slices indexing (#47622) Co-authored-by: Simon Byrne (cherry picked from commit d7363d894f95e7168fb490a64b65cd2c2301a11b) --- base/slicearray.jl | 14 +++++++++----- test/arrayops.jl | 9 +++++++++ 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/base/slicearray.jl b/base/slicearray.jl index 506cc900ba781..fae353dbe7690 100644 --- a/base/slicearray.jl +++ b/base/slicearray.jl @@ -85,7 +85,7 @@ the ordering of the dimensions will match those in `dims`. If `drop = false`, th `Slices` will have the same dimensionality as the underlying array, with inner dimensions having size 1. -See [`stack`](@ref)`(slices; dims)` for the inverse of `eachcol(A; dims::Integer, drop=true)`. +See [`stack`](@ref)`(slices; dims)` for the inverse of `eachslice(A; dims::Integer)`. See also [`eachrow`](@ref), [`eachcol`](@ref), [`mapslices`](@ref) and [`selectdim`](@ref). @@ -232,9 +232,13 @@ size(s::Slices) = map(length, s.axes) return map(l -> l === (:) ? (:) : c[l], s.slicemap) end -Base.@propagate_inbounds getindex(s::Slices{P,SM,AX,S,N}, I::Vararg{Int,N}) where {P,SM,AX,S,N} = - view(s.parent, _slice_index(s, I...)...) -Base.@propagate_inbounds setindex!(s::Slices{P,SM,AX,S,N}, val, I::Vararg{Int,N}) where {P,SM,AX,S,N} = - s.parent[_slice_index(s, I...)...] = val +@inline function getindex(s::Slices{P,SM,AX,S,N}, I::Vararg{Int,N}) where {P,SM,AX,S,N} + @boundscheck checkbounds(s, I...) + @inbounds view(s.parent, _slice_index(s, I...)...) +end +@inline function setindex!(s::Slices{P,SM,AX,S,N}, val, I::Vararg{Int,N}) where {P,SM,AX,S,N} + @boundscheck checkbounds(s, I...) + @inbounds s.parent[_slice_index(s, I...)...] = val +end parent(s::Slices) = s.parent diff --git a/test/arrayops.jl b/test/arrayops.jl index c2698b3c70a90..e7ac6a1132568 100644 --- a/test/arrayops.jl +++ b/test/arrayops.jl @@ -2293,6 +2293,15 @@ end f2(a) = eachslice(a, dims=2) @test (@inferred f2(a)) == eachcol(a) end + + @testset "eachslice bounds checking" begin + # https://github.com/JuliaLang/julia/pull/32310#issuecomment-1146911510 + A = eachslice(rand(2,3), dims = 2, drop = false) + @test_throws BoundsError A[2, 1] + @test_throws BoundsError A[4] + @test_throws BoundsError A[2,3] = [4,5] + @test_throws BoundsError A[2,3] .= [4,5] + end end ### From 22789c05e883edb69fb29f3bc891c760e23b41c1 Mon Sep 17 00:00:00 2001 From: pchintalapudi <34727397+pchintalapudi@users.noreply.github.com> Date: Mon, 12 Dec 2022 19:07:31 -0500 Subject: [PATCH 54/57] Reduce codegen lock scope (#46836) (cherry picked from commit 09a6ff8cabefc4ecfa8cacb5185c2d94b026bced) --- src/aotcompile.cpp | 33 ++++++++++++++++++--------------- src/gf.c | 6 ++---- src/jitlayers.cpp | 2 +- 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index 26ba66fa96737..7325adde8b060 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -267,7 +267,6 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm jl_method_instance_t *mi = NULL; jl_code_info_t *src = NULL; JL_GC_PUSH1(&src); - JL_LOCK(&jl_codegen_lock); auto ct = jl_current_task; ct->reentrant_codegen++; orc::ThreadSafeContext ctx; @@ -278,16 +277,18 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm } orc::ThreadSafeModule &clone = llvmmod ? *unwrap(llvmmod) : backing; auto ctxt = clone.getContext(); - jl_codegen_params_t params(ctxt); - params.params = cgparams; + uint64_t compiler_start_time = 0; uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled); if (measure_compile_time_enabled) compiler_start_time = jl_hrtime(); - params.imaging = imaging; - // compile all methods for the current world and type-inference world + + JL_LOCK(&jl_codegen_lock); + jl_codegen_params_t params(ctxt); + params.params = cgparams; + params.imaging = imaging; size_t compile_for[] = { jl_typeinf_world, jl_atomic_load_acquire(&jl_world_counter) }; for (int worlds = 0; worlds < 2; worlds++) { params.world = compile_for[worlds]; @@ -332,15 +333,18 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm // finally, make sure all referenced methods also get compiled or fixed up jl_compile_workqueue(emitted, *clone.getModuleUnlocked(), params, policy); } + JL_UNLOCK(&jl_codegen_lock); // Might GC JL_GC_POP(); // process the globals array, before jl_merge_module destroys them - std::vector gvars; + std::vector gvars(params.globals.size()); data->jl_value_to_llvm.resize(params.globals.size()); + size_t idx = 0; for (auto &global : params.globals) { - data->jl_value_to_llvm.at(gvars.size()) = global.first; - gvars.push_back(std::string(global.second->getName())); + gvars[idx] = global.second->getName().str(); + data->jl_value_to_llvm[idx] = global.first; + idx++; } CreateNativeMethods += emitted.size(); @@ -423,7 +427,6 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm jl_ExecutionEngine->releaseContext(std::move(ctx)); } ct->reentrant_codegen--; - JL_UNLOCK(&jl_codegen_lock); // Might GC return (void*)data; } @@ -1013,17 +1016,18 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, siz // emit this function into a new llvm module if (src && jl_is_code_info(src)) { - JL_LOCK(&jl_codegen_lock); auto ctx = jl_ExecutionEngine->getContext(); - jl_codegen_params_t output(*ctx); - output.world = world; - output.params = ¶ms; - orc::ThreadSafeModule m = jl_create_llvm_module(name_from_method_instance(mi), output.tsctx, output.imaging); + orc::ThreadSafeModule m = jl_create_llvm_module(name_from_method_instance(mi), *ctx, imaging_default()); uint64_t compiler_start_time = 0; uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled); if (measure_compile_time_enabled) compiler_start_time = jl_hrtime(); + JL_LOCK(&jl_codegen_lock); + jl_codegen_params_t output(*ctx); + output.world = world; + output.params = ¶ms; auto decls = jl_emit_code(m, mi, src, jlrettype, output); + JL_UNLOCK(&jl_codegen_lock); // Might GC Function *F = NULL; if (m) { @@ -1059,7 +1063,6 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, siz JL_GC_POP(); if (measure_compile_time_enabled) jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time)); - JL_UNLOCK(&jl_codegen_lock); // Might GC if (F) { dump->TSM = wrap(new orc::ThreadSafeModule(std::move(m))); dump->F = wrap(F); diff --git a/src/gf.c b/src/gf.c index 9092eec0e4ebc..6829de529d27d 100644 --- a/src/gf.c +++ b/src/gf.c @@ -3537,8 +3537,6 @@ int jl_has_concrete_subtype(jl_value_t *typ) return ((jl_datatype_t*)typ)->has_concrete_subtype; } -#define typeinf_lock jl_codegen_lock - JL_DLLEXPORT void jl_typeinf_timing_begin(void) { jl_task_t *ct = jl_current_task; @@ -3561,7 +3559,7 @@ JL_DLLEXPORT void jl_typeinf_timing_end(void) JL_DLLEXPORT void jl_typeinf_lock_begin(void) { - JL_LOCK(&typeinf_lock); + JL_LOCK(&jl_codegen_lock); //Although this is claiming to be a typeinfer lock, it is actually //affecting the codegen lock count, not type inference's inferencing count jl_task_t *ct = jl_current_task; @@ -3572,7 +3570,7 @@ JL_DLLEXPORT void jl_typeinf_lock_end(void) { jl_task_t *ct = jl_current_task; ct->reentrant_codegen--; - JL_UNLOCK(&typeinf_lock); + JL_UNLOCK(&jl_codegen_lock); } #ifdef __cplusplus diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 21147d02f4997..d79b8e11dfb4e 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -544,9 +544,9 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world, } JL_GC_POP(); } + JL_UNLOCK(&jl_codegen_lock); if (!--ct->reentrant_codegen && measure_compile_time_enabled) jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time)); - JL_UNLOCK(&jl_codegen_lock); } if (specfptr != 0) return jl_dump_fptr_asm(specfptr, raw_mc, asm_variant, debuginfo, binary); From 9a592dd2316da2abf541ab5511e2dd4a39fe99af Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 27 Dec 2022 14:46:24 +0100 Subject: [PATCH 55/57] Implement support for object caching through pkgimages (#47184) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This implements caching native code in "package images" (pkgimages). We now write two serialization files, one ending in `*.ji` and the other with the platform dynamic library extension (e.g., `*.so`). The `*.ji` contains "extended" header information (include the source-code dump for Revise), whereas the dynamic library includes the Julia objects, including LLVM-generated native code. Native code is compiled once during precompilation and then a second time to build a "clean" module. When we find an edge to an external function (already cached in anloaded pkgimage), we emit a global variable which we will patch during loading with the address of the function to call. This allows us to leverage the standard multiversioning capabilities. Co-authored-by: Tim Holy Co-authored-by: Kristoffer Carlsson Co-authored-by: Mosè Giordano Co-authored-by: Ian Butterworth Co-authored-by: Max Horn Co-authored-by: Michael Schlottke-Lakemper Co-authored-by: Alex Ames (cherry picked from commit a2db90fe8d9158923ebd5f45c443b12968d4e379) --- Makefile | 13 + base/Base.jl | 1 + base/Makefile | 4 + base/linking.jl | 147 +++++++ base/loading.jl | 292 ++++++++++--- base/options.jl | 1 + base/util.jl | 31 +- contrib/cache_stdlibs.jl | 49 +++ contrib/print_sorted_stdlibs.jl | 11 +- deps/checksums/compilersupportlibraries | 184 ++++---- doc/make.jl | 1 + doc/src/devdocs/pkgimg.md | 48 +++ doc/src/devdocs/sysimg.md | 2 +- doc/src/manual/code-loading.md | 9 +- doc/src/manual/command-line-interface.md | 9 +- doc/src/manual/environment-variables.md | 6 +- doc/src/manual/methods.md | 36 +- doc/src/manual/modules.md | 24 +- doc/src/manual/performance-tips.md | 2 +- src/aotcompile.cpp | 121 +++++- src/codegen-stubs.c | 9 +- src/codegen.cpp | 47 ++- src/coverage.cpp | 2 +- src/debug-registry.h | 16 +- src/debuginfo.cpp | 92 ++-- src/debuginfo.h | 2 +- src/disasm.cpp | 2 +- src/jitlayers.h | 4 +- src/jl_exported_data.inc | 2 + src/jl_exported_funcs.inc | 4 + src/jloptions.c | 34 +- src/jloptions.h | 1 + src/julia.h | 5 +- src/julia_internal.h | 9 +- src/llvm-multiversioning.cpp | 78 ++-- src/precompile.c | 398 ++++-------------- src/precompile_utils.c | 306 ++++++++++++++ src/processor.cpp | 4 +- src/processor.h | 9 +- src/processor_arm.cpp | 9 +- src/processor_fallback.cpp | 9 +- src/processor_x86.cpp | 9 +- src/staticdata.c | 279 +++++++----- src/staticdata_utils.c | 54 ++- .../CompilerSupportLibraries_jll/Project.toml | 2 +- test/compiler/contextual.jl | 6 +- test/loading.jl | 6 +- test/precompile.jl | 126 +++++- 48 files changed, 1751 insertions(+), 764 deletions(-) create mode 100644 base/linking.jl create mode 100644 contrib/cache_stdlibs.jl create mode 100644 doc/src/devdocs/pkgimg.md create mode 100644 src/precompile_utils.c diff --git a/Makefile b/Makefile index 7159fa1854fe7..9ff447e04c144 100644 --- a/Makefile +++ b/Makefile @@ -247,13 +247,21 @@ ifeq ($(OS),WINNT) -$(INSTALL_M) $(wildcard $(build_bindir)/*.dll) $(DESTDIR)$(bindir)/ ifeq ($(JULIA_BUILD_MODE),release) -$(INSTALL_M) $(build_libdir)/libjulia.dll.a $(DESTDIR)$(libdir)/ + -$(INSTALL_M) $(build_libdir)/libjulia-internal.dll.a $(DESTDIR)$(libdir)/ else ifeq ($(JULIA_BUILD_MODE),debug) -$(INSTALL_M) $(build_libdir)/libjulia-debug.dll.a $(DESTDIR)$(libdir)/ + -$(INSTALL_M) $(build_libdir)/libjulia-internal-debug.dll.a $(DESTDIR)$(libdir)/ endif # We have a single exception; we want 7z.dll to live in libexec, not bin, so that 7z.exe can find it. -mv $(DESTDIR)$(bindir)/7z.dll $(DESTDIR)$(libexecdir)/ -$(INSTALL_M) $(build_bindir)/libopenlibm.dll.a $(DESTDIR)$(libdir)/ + -$(INSTALL_M) $(build_libdir)/libssp.dll.a $(DESTDIR)$(libdir)/ + # The rest are compiler dependencies, as an example memcpy is exported by msvcrt + # These are files from mingw32 and required for creating shared libraries like our caches. + -$(INSTALL_M) $(build_libdir)/libgcc_s.a $(DESTDIR)$(libdir)/ + -$(INSTALL_M) $(build_libdir)/libgcc.a $(DESTDIR)$(libdir)/ + -$(INSTALL_M) $(build_libdir)/libmsvcrt.a $(DESTDIR)$(libdir)/ else # Copy over .dSYM directories directly for Darwin @@ -318,6 +326,11 @@ else ifeq ($(JULIA_BUILD_MODE),debug) $(INSTALL_M) $(build_private_libdir)/sys-debug.$(SHLIB_EXT) $(DESTDIR)$(private_libdir) endif + # Cache stdlibs + @$(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) --startup-file=no $(JULIAHOME)/contrib/cache_stdlibs.jl) + # CI uses `--check-bounds=yes` which impacts the cache flags + @$(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) --startup-file=no --check-bounds=yes $(JULIAHOME)/contrib/cache_stdlibs.jl) + # Copy in all .jl sources as well mkdir -p $(DESTDIR)$(datarootdir)/julia/base $(DESTDIR)$(datarootdir)/julia/test cp -R -L $(JULIAHOME)/base/* $(DESTDIR)$(datarootdir)/julia/base diff --git a/base/Base.jl b/base/Base.jl index 8d207ee909201..50b7f4822ed05 100644 --- a/base/Base.jl +++ b/base/Base.jl @@ -418,6 +418,7 @@ include("threadcall.jl") include("uuid.jl") include("pkgid.jl") include("toml_parser.jl") +include("linking.jl") include("loading.jl") # misc useful functions & macros diff --git a/base/Makefile b/base/Makefile index bb79549aeea2e..e4b28f1380705 100644 --- a/base/Makefile +++ b/base/Makefile @@ -81,6 +81,10 @@ ifeq ($(DARWIN_FRAMEWORK), 1) @echo "const DARWIN_FRAMEWORK_NAME = \"$(FRAMEWORK_NAME)\"" >> $@ else @echo "const DARWIN_FRAMEWORK = false" >> $@ +endif +ifeq ($(OS), Darwin) + @echo "const MACOS_PRODUCT_VERSION = \"$(shell sw_vers -productVersion)\"" >> $@ + @echo "const MACOS_PLATFORM_VERSION = \"$(shell xcrun --show-sdk-version)\"" >> $@ endif @echo "const BUILD_TRIPLET = \"$(BB_TRIPLET_LIBGFORTRAN_CXXABI)\"" >> $@ diff --git a/base/linking.jl b/base/linking.jl new file mode 100644 index 0000000000000..288279347f1c5 --- /dev/null +++ b/base/linking.jl @@ -0,0 +1,147 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license +module Linking + +import Base.Libc: Libdl + +# inlined LLD_jll +# These get calculated in __init__() +const PATH = Ref("") +const LIBPATH = Ref("") +const PATH_list = String[] +const LIBPATH_list = String[] +const lld_path = Ref{String}() +const lld_exe = Sys.iswindows() ? "lld.exe" : "lld" + +if Sys.iswindows() + const LIBPATH_env = "PATH" + const LIBPATH_default = "" + const pathsep = ';' +elseif Sys.isapple() + const LIBPATH_env = "DYLD_FALLBACK_LIBRARY_PATH" + const LIBPATH_default = "~/lib:/usr/local/lib:/lib:/usr/lib" + const pathsep = ':' +else + const LIBPATH_env = "LD_LIBRARY_PATH" + const LIBPATH_default = "" + const pathsep = ':' +end + +function adjust_ENV!(env::Dict, PATH::String, LIBPATH::String, adjust_PATH::Bool, adjust_LIBPATH::Bool) + if adjust_LIBPATH + LIBPATH_base = get(env, LIBPATH_env, expanduser(LIBPATH_default)) + if !isempty(LIBPATH_base) + env[LIBPATH_env] = string(LIBPATH, pathsep, LIBPATH_base) + else + env[LIBPATH_env] = LIBPATH + end + end + if adjust_PATH && (LIBPATH_env != "PATH" || !adjust_LIBPATH) + if !isempty(get(env, "PATH", "")) + env["PATH"] = string(PATH, pathsep, env["PATH"]) + else + env["PATH"] = PATH + end + end + return env +end + +function __init_lld_path() + # Prefer our own bundled lld, but if we don't have one, pick it up off of the PATH + # If this is an in-tree build, `lld` will live in `tools`. Otherwise, it'll be in `libexec` + for bundled_lld_path in (joinpath(Sys.BINDIR, Base.LIBEXECDIR, lld_exe), + joinpath(Sys.BINDIR, "..", "tools", lld_exe), + joinpath(Sys.BINDIR, lld_exe)) + if isfile(bundled_lld_path) + lld_path[] = abspath(bundled_lld_path) + return + end + end + lld_path[] = something(Sys.which(lld_exe), lld_exe) + return +end + +const VERBOSE = Ref{Bool}(false) + +function __init__() + VERBOSE[] = parse(Bool, get(ENV, "JULIA_VERBOSE_LINKING", "false")) + + __init_lld_path() + PATH[] = dirname(lld_path[]) + if Sys.iswindows() + # On windows, the dynamic libraries (.dll) are in Sys.BINDIR ("usr\\bin") + append!(LIBPATH_list, [abspath(Sys.BINDIR, Base.LIBDIR, "julia"), Sys.BINDIR]) + else + append!(LIBPATH_list, [abspath(Sys.BINDIR, Base.LIBDIR, "julia"), abspath(Sys.BINDIR, Base.LIBDIR)]) + end + LIBPATH[] = join(LIBPATH_list, pathsep) + return +end + +function lld(; adjust_PATH::Bool = true, adjust_LIBPATH::Bool = true) + env = adjust_ENV!(copy(ENV), PATH[], LIBPATH[], adjust_PATH, adjust_LIBPATH) + return Cmd(Cmd([lld_path[]]); env) +end + +function ld() + default_args = `` + @static if Sys.iswindows() + # LLD supports mingw style linking + flavor = "gnu" + m = Sys.ARCH == :x86_64 ? "i386pep" : "i386pe" + default_args = `-m $m -Bdynamic --enable-auto-image-base --allow-multiple-definition` + elseif Sys.isapple() + flavor = "darwin" + arch = Sys.ARCH == :aarch64 ? :arm64 : Sys.ARCH + default_args = `-arch $arch -undefined dynamic_lookup -platform_version macos $(Base.MACOS_PRODUCT_VERSION) $(Base.MACOS_PLATFORM_VERSION)` + else + flavor = "gnu" + end + + `$(lld()) -flavor $flavor $default_args` +end + +const WHOLE_ARCHIVE = if Sys.isapple() + "-all_load" +else + "--whole-archive" +end + +const NO_WHOLE_ARCHIVE = if Sys.isapple() + "" +else + "--no-whole-archive" +end + +const SHARED = if Sys.isapple() + "-dylib" +else + "-shared" +end + +is_debug() = ccall(:jl_is_debugbuild, Cint, ()) == 1 +libdir() = abspath(Sys.BINDIR, Base.LIBDIR) +private_libdir() = abspath(Sys.BINDIR, Base.PRIVATE_LIBDIR) +if Sys.iswindows() + shlibdir() = Sys.BINDIR +else + shlibdir() = libdir() +end + +function link_image_cmd(path, out) + LIBDIR = "-L$(libdir())" + PRIVATE_LIBDIR = "-L$(private_libdir())" + SHLIBDIR = "-L$(shlibdir())" + LIBS = is_debug() ? ("-ljulia-debug", "-ljulia-internal-debug") : ("-ljulia", "-ljulia-internal") + @static if Sys.iswindows() + LIBS = (LIBS..., "-lopenlibm", "-lssp", "-lgcc_s", "-lgcc", "-lmsvcrt") + end + + V = VERBOSE[] ? "--verbose" : "" + `$(ld()) $V $SHARED -o $out $WHOLE_ARCHIVE $path $NO_WHOLE_ARCHIVE $LIBDIR $PRIVATE_LIBDIR $SHLIBDIR $LIBS` +end + +function link_image(path, out, internal_stderr::IO = stderr, internal_stdout::IO = stdout) + run(link_image_cmd(path, out), Base.DevNull(), stderr, stdout) +end + +end # module Linking diff --git a/base/loading.jl b/base/loading.jl index 61c1f13a3eef3..e92c0b864d156 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -908,7 +908,8 @@ function find_all_in_cache_path(pkg::PkgId) isdir(path) || continue for file in readdir(path, sort = false) # no sort given we sort later if !((pkg.uuid === nothing && file == entryfile * ".ji") || - (pkg.uuid !== nothing && startswith(file, entryfile * "_"))) + (pkg.uuid !== nothing && startswith(file, entryfile * "_") && + endswith(file, ".ji"))) continue end filepath = joinpath(path, file) @@ -925,13 +926,15 @@ function find_all_in_cache_path(pkg::PkgId) end end +ocachefile_from_cachefile(cachefile) = string(chopsuffix(cachefile, ".ji"), ".", Base.Libc.dlext) + # use an Int counter so that nested @time_imports calls all remain open const TIMING_IMPORTS = Threads.Atomic{Int}(0) # these return either the array of modules loaded from the path / content given # or an Exception that describes why it couldn't be loaded # and it reconnects the Base.Docs.META -function _include_from_serialized(pkg::PkgId, path::String, depmods::Vector{Any}) +function _include_from_serialized(pkg::PkgId, path::String, ocachepath::Union{Nothing, String}, depmods::Vector{Any}) assert_havelock(require_lock) timing_imports = TIMING_IMPORTS[] > 0 try @@ -941,36 +944,18 @@ function _include_from_serialized(pkg::PkgId, path::String, depmods::Vector{Any} t_comp_before = cumulative_compile_time_ns() end - @debug "Loading cache file $path for $pkg" - sv = ccall(:jl_restore_incremental, Any, (Cstring, Any, Cint), path, depmods, false) + if ocachepath !== nothing + @debug "Loading object cache file $ocachepath for $pkg" + sv = ccall(:jl_restore_package_image_from_file, Any, (Cstring, Any, Cint), ocachepath, depmods, false) + else + @debug "Loading cache file $path for $pkg" + sv = ccall(:jl_restore_incremental, Any, (Cstring, Any, Cint), path, depmods, false) + end if isa(sv, Exception) return sv end - sv = sv::SimpleVector - restored = sv[1]::Vector{Any} - for M in restored - M = M::Module - if isdefined(M, Base.Docs.META) && getfield(M, Base.Docs.META) !== nothing - push!(Base.Docs.modules, M) - end - if parentmodule(M) === M - register_root_module(M) - end - end - - # Register this cache path now - If Requires.jl is loaded, Revise may end - # up looking at the cache path during the init callback. - get!(PkgOrigin, pkgorigins, pkg).cachepath = path - inits = sv[2]::Vector{Any} - if !isempty(inits) - unlock(require_lock) # temporarily _unlock_ during these callbacks - try - ccall(:jl_init_restored_modules, Cvoid, (Any,), inits) - finally - lock(require_lock) - end - end + restored = register_restored_modules(sv, pkg, path) for M in restored M = M::Module @@ -999,6 +984,35 @@ function _include_from_serialized(pkg::PkgId, path::String, depmods::Vector{Any} end end +function register_restored_modules(sv::SimpleVector, pkg::PkgId, path::String) + # This function is also used by PkgCacheInspector.jl + restored = sv[1]::Vector{Any} + for M in restored + M = M::Module + if isdefined(M, Base.Docs.META) && getfield(M, Base.Docs.META) !== nothing + push!(Base.Docs.modules, M) + end + if parentmodule(M) === M + register_root_module(M) + end + end + + # Register this cache path now - If Requires.jl is loaded, Revise may end + # up looking at the cache path during the init callback. + get!(PkgOrigin, pkgorigins, pkg).cachepath = path + + inits = sv[2]::Vector{Any} + if !isempty(inits) + unlock(require_lock) # temporarily _unlock_ during these callbacks + try + ccall(:jl_init_restored_modules, Cvoid, (Any,), inits) + finally + lock(require_lock) + end + end + return restored +end + function run_package_callbacks(modkey::PkgId) assert_havelock(require_lock) unlock(require_lock) @@ -1201,7 +1215,7 @@ end # loads a precompile cache file, ignoring stale_cachefile tests # assuming all depmods are already loaded and everything is valid -function _tryrequire_from_serialized(modkey::PkgId, path::String, sourcepath::String, depmods::Vector{Any}) +function _tryrequire_from_serialized(modkey::PkgId, path::String, ocachepath::Union{Nothing, String}, sourcepath::String, depmods::Vector{Any}) assert_havelock(require_lock) loaded = nothing if root_module_exists(modkey) @@ -1223,7 +1237,7 @@ function _tryrequire_from_serialized(modkey::PkgId, path::String, sourcepath::St package_locks[modkey] = Threads.Condition(require_lock) try set_pkgorigin_version_path(modkey, sourcepath) - loaded = _include_from_serialized(modkey, path, depmods) + loaded = _include_from_serialized(modkey, path, ocachepath, depmods) finally loading = pop!(package_locks, modkey) notify(loading, loaded, all=true) @@ -1242,13 +1256,23 @@ end # loads a precompile cache file, ignoring stale_cachefile tests # load the best available (non-stale) version of all dependent modules first -function _tryrequire_from_serialized(pkg::PkgId, path::String) +function _tryrequire_from_serialized(pkg::PkgId, path::String, ocachepath::Union{Nothing, String}) assert_havelock(require_lock) local depmodnames io = open(path, "r") try iszero(isvalid_cache_header(io)) && return ArgumentError("Invalid header in cache file $path.") - depmodnames = parse_cache_header(io)[3] + _, _, depmodnames, _, _, _, clone_targets, _ = parse_cache_header(io) + pkgimage = !isempty(clone_targets) + if pkgimage + ocachepath !== nothing || return ArgumentError("Expected ocachepath to be provided") + isfile(ocachepath) || return ArgumentError("Ocachepath $ocachpath is not a file.") + ocachepath == ocachefile_from_cachefile(path) || return ArgumentError("$ocachepath is not the expected ocachefile") + # TODO: Check for valid clone_targets? + isvalid_pkgimage_crc(io, ocachepath) || return ArgumentError("Invalid checksum in cache file $ocachepath.") + else + @assert ocachepath === nothing + end isvalid_file_crc(io) || return ArgumentError("Invalid checksum in cache file $path.") finally close(io) @@ -1264,7 +1288,7 @@ function _tryrequire_from_serialized(pkg::PkgId, path::String) depmods[i] = dep end # then load the file - return _include_from_serialized(pkg, path, depmods) + return _include_from_serialized(pkg, path, ocachepath, depmods) end # returns `nothing` if require found a precompile cache for this sourcepath, but couldn't load it @@ -1272,12 +1296,13 @@ end @constprop :none function _require_search_from_serialized(pkg::PkgId, sourcepath::String, build_id::UInt128) assert_havelock(require_lock) paths = find_all_in_cache_path(pkg) + ocachefile = nothing for path_to_try in paths::Vector{String} staledeps = stale_cachefile(pkg, build_id, sourcepath, path_to_try) if staledeps === true continue end - staledeps = staledeps::Vector{Any} + staledeps, ocachefile = staledeps::Tuple{Vector{Any}, Union{Nothing, String}} # finish checking staledeps module graph for i in 1:length(staledeps) dep = staledeps[i] @@ -1290,8 +1315,8 @@ end if modstaledeps === true continue end - modstaledeps = modstaledeps::Vector{Any} - staledeps[i] = (modpath, modkey, modpath_to_try, modstaledeps) + modstaledeps, modocachepath = modstaledeps::Tuple{Vector{Any}, Union{Nothing, String}} + staledeps[i] = (modpath, modkey, modpath_to_try, modstaledeps, modocachepath) modfound = true break end @@ -1302,6 +1327,7 @@ end end end if staledeps === true + ocachefile = nothing continue end try @@ -1313,19 +1339,20 @@ end for i in 1:length(staledeps) dep = staledeps[i] dep isa Module && continue - modpath, modkey, modpath_to_try, modstaledeps = dep::Tuple{String, PkgId, String, Vector{Any}} - dep = _tryrequire_from_serialized(modkey, modpath_to_try, modpath, modstaledeps) + modpath, modkey, modcachepath, modstaledeps, modocachepath = dep::Tuple{String, PkgId, String, Vector{Any}, Union{Nothing, String}} + dep = _tryrequire_from_serialized(modkey, modcachepath, modocachepath, modpath, modstaledeps) if !isa(dep, Module) - @debug "Rejecting cache file $path_to_try because required dependency $modkey failed to load from cache file for $modpath." exception=dep + @debug "Rejecting cache file $path_to_try because required dependency $modkey failed to load from cache file for $modcachepath." exception=dep staledeps = true break end staledeps[i] = dep end if staledeps === true + ocachefile = nothing continue end - restored = _include_from_serialized(pkg, path_to_try, staledeps) + restored = _include_from_serialized(pkg, path_to_try, ocachefile, staledeps) if !isa(restored, Module) @debug "Deserialization checks failed while attempting to load cache from $path_to_try" exception=restored else @@ -1634,7 +1661,8 @@ function _require(pkg::PkgId, env=nothing) end # fall-through to loading the file locally else - m = _tryrequire_from_serialized(pkg, cachefile) + cachefile, ocachefile = cachefile::Tuple{String, Union{Nothing, String}} + m = _tryrequire_from_serialized(pkg, cachefile, ocachefile) if !isa(m, Module) @warn "The call to compilecache failed to create a usable precompiled cache file for $pkg" exception=m else @@ -1670,10 +1698,11 @@ function _require(pkg::PkgId, env=nothing) return loaded end -function _require_from_serialized(uuidkey::PkgId, path::String) +# Only used from test/precompile.jl +function _require_from_serialized(uuidkey::PkgId, path::String, ocachepath::Union{String, Nothing}) @lock require_lock begin set_pkgorigin_version_path(uuidkey, nothing) - newm = _tryrequire_from_serialized(uuidkey, path) + newm = _tryrequire_from_serialized(uuidkey, path, ocachepath) newm isa Module || throw(newm) insert_extension_triggers(uuidkey) # After successfully loading, notify downstream consumers @@ -1873,9 +1902,11 @@ function include_package_for_output(pkg::PkgId, input::String, depot_path::Vecto end const PRECOMPILE_TRACE_COMPILE = Ref{String}() -function create_expr_cache(pkg::PkgId, input::String, output::String, concrete_deps::typeof(_concrete_dependencies), internal_stderr::IO = stderr, internal_stdout::IO = stdout) +function create_expr_cache(pkg::PkgId, input::String, output::String, output_o::Union{Nothing, String}, + concrete_deps::typeof(_concrete_dependencies), internal_stderr::IO = stderr, internal_stdout::IO = stdout) @nospecialize internal_stderr internal_stdout rm(output, force=true) # Remove file if it exists + output_o === nothing || rm(output_o, force=true) depot_path = map(abspath, DEPOT_PATH) dl_load_path = map(abspath, DL_LOAD_PATH) load_path = map(abspath, Base.load_path()) @@ -1894,11 +1925,20 @@ function create_expr_cache(pkg::PkgId, input::String, output::String, concrete_d for (pkg, build_id) in concrete_deps push!(deps_strs, "$(pkg_str(pkg)) => $(repr(build_id))") end + + if output_o !== nothing + cpu_target = get(ENV, "JULIA_CPU_TARGET", nothing) + opt_level = Base.JLOptions().opt_level + opts = `-O$(opt_level) --output-o $(output_o) --output-ji $(output) --output-incremental=yes` + else + cpu_target = nothing + opts = `-O0 --output-ji $(output) --output-incremental=yes` + end + deps_eltype = sprint(show, eltype(concrete_deps); context = :module=>nothing) deps = deps_eltype * "[" * join(deps_strs, ",") * "]" trace = isassigned(PRECOMPILE_TRACE_COMPILE) ? `--trace-compile=$(PRECOMPILE_TRACE_COMPILE[])` : `` - io = open(pipeline(addenv(`$(julia_cmd()::Cmd) -O0 - --output-ji $output --output-incremental=yes + io = open(pipeline(addenv(`$(julia_cmd(;cpu_target)::Cmd) $(opts) --startup-file=no --history-file=no --warn-overwrite=yes --color=$(have_color === nothing ? "auto" : have_color ? "yes" : "no") $trace @@ -1931,6 +1971,14 @@ function compilecache_path(pkg::PkgId, prefs_hash::UInt64)::String crc = _crc32c(something(Base.active_project(), "")) crc = _crc32c(unsafe_string(JLOptions().image_file), crc) crc = _crc32c(unsafe_string(JLOptions().julia_bin), crc) + crc = _crc32c(ccall(:jl_cache_flags, UInt8, ()), crc) + + cpu_target = get(ENV, "JULIA_CPU_TARGET", nothing) + if cpu_target === nothing + cpu_target = unsafe_string(JLOptions().cpu_target) + end + crc = _crc32c(cpu_target, crc) + crc = _crc32c(prefs_hash, crc) project_precompile_slug = slug(crc, 5) abspath(cachepath, string(entryfile, "_", project_precompile_slug, ".ji")) @@ -1977,44 +2025,92 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in # create a temporary file in `cachepath` directory, write the cache in it, # write the checksum, _and then_ atomically move the file to `cachefile`. mkpath(cachepath) + cache_objects = JLOptions().use_pkgimages != 0 tmppath, tmpio = mktemp(cachepath) + + if cache_objects + tmppath_o, tmpio_o = mktemp(cachepath) + tmppath_so, tmpio_so = mktemp(cachepath) + else + tmppath_o = nothing + end local p try close(tmpio) - p = create_expr_cache(pkg, path, tmppath, concrete_deps, internal_stderr, internal_stdout) + if cache_objects + close(tmpio_o) + close(tmpio_so) + end + p = create_expr_cache(pkg, path, tmppath, tmppath_o, concrete_deps, internal_stderr, internal_stdout) + if success(p) + if cache_objects + # Run linker over tmppath_o + Linking.link_image(tmppath_o, tmppath_so) + end + + # Read preferences hash back from .ji file (we can't precompute because + # we don't actually know what the list of compile-time preferences are without compiling) + prefs_hash = preferences_hash(tmppath) + cachefile = compilecache_path(pkg, prefs_hash) + ocachefile = cache_objects ? ocachefile_from_cachefile(cachefile) : nothing + + # append checksum for so to the end of the .ji file: + crc_so = UInt32(0) + if cache_objects + crc_so = open(_crc32c, tmppath_so, "r") + end + # append extra crc to the end of the .ji file: open(tmppath, "r+") do f if iszero(isvalid_cache_header(f)) error("Invalid header for $pkg in new cache file $(repr(tmppath)).") end + seekend(f) + write(f, crc_so) seekstart(f) write(f, _crc32c(f)) end + # inherit permission from the source file (and make them writable) chmod(tmppath, filemode(path) & 0o777 | 0o200) - - # Read preferences hash back from .ji file (we can't precompute because - # we don't actually know what the list of compile-time preferences are without compiling) - prefs_hash = preferences_hash(tmppath) - cachefile = compilecache_path(pkg, prefs_hash) + if cache_objects + # Ensure that the user can execute the `.so` we're generating + # Note that on windows, `filemode(path)` typically returns `0o666`, so this + # addition of the execute bit for the user is doubly needed. + chmod(tmppath_so, filemode(path) & 0o777 | 0o300) + end # prune the directory with cache files if pkg.uuid !== nothing entrypath, entryfile = cache_file_entry(pkg) - cachefiles = filter!(x -> startswith(x, entryfile * "_"), readdir(cachepath)) + cachefiles = filter!(x -> startswith(x, entryfile * "_") && endswith(x, ".ji"), readdir(cachepath)) + if length(cachefiles) >= MAX_NUM_PRECOMPILE_FILES[] idx = findmin(mtime.(joinpath.(cachepath, cachefiles)))[2] - rm(joinpath(cachepath, cachefiles[idx]); force=true) + evicted_cachefile = joinpath(cachepath, cachefiles[idx]) + @debug "Evicting file from cache" evicted_cachefile + rm(evicted_cachefile; force=true) + try + rm(ocachefile_from_cachefile(evicted_cachefile); force=true) + catch + end end end # this is atomic according to POSIX (not Win32): rename(tmppath, cachefile; force=true) - return cachefile + if cache_objects + rename(tmppath_so, ocachefile::String; force=true) + end + return cachefile, ocachefile end finally rm(tmppath, force=true) + if cache_objects + rm(tmppath_o, force=true) + rm(tmppath_so, force=true) + end end if p.exitcode == 125 return PrecompilableError() @@ -2028,9 +2124,26 @@ function module_build_id(m::Module) return (UInt128(hi) << 64) | lo end -isvalid_cache_header(f::IOStream) = ccall(:jl_read_verify_header, UInt64, (Ptr{Cvoid},), f.ios) # returns checksum id or zero +function isvalid_cache_header(f::IOStream) + pkgimage = Ref{UInt8}() + checksum = ccall(:jl_read_verify_header, UInt64, (Ptr{Cvoid}, Ptr{UInt8}, Ptr{Int64}, Ptr{Int64}), f.ios, pkgimage, Ref{Int64}(), Ref{Int64}()) # returns checksum id or zero + + if !iszero(checksum) && pkgimage[] != 0 + @debug "Cache header was for pkgimage" + return UInt64(0) # We somehow read the header for a pkgimage and not a ji + end + return checksum +end isvalid_file_crc(f::IOStream) = (_crc32c(seekstart(f), filesize(f) - 4) == read(f, UInt32)) +function isvalid_pkgimage_crc(f::IOStream, ocachefile::String) + seekstart(f) # TODO necessary + seek(f, filesize(f) - 8) + expected_crc_so = read(f, UInt32) + crc_so = open(_crc32c, ocachefile, "r") + expected_crc_so == crc_so +end + struct CacheHeaderIncludes id::PkgId filename::String @@ -2039,6 +2152,7 @@ struct CacheHeaderIncludes end function parse_cache_header(f::IO) + flags = read(f, UInt8) modules = Vector{Pair{PkgId, UInt64}}() while true n = read(f, Int32) @@ -2112,7 +2226,10 @@ function parse_cache_header(f::IO) build_id |= read(f, UInt64) push!(required_modules, PkgId(uuid, sym) => build_id) end - return modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash + l = read(f, Int32) + clone_targets = read(f, l) + + return modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, flags end function parse_cache_header(cachefile::String; srcfiles_only::Bool=false) @@ -2134,8 +2251,6 @@ function parse_cache_header(cachefile::String; srcfiles_only::Bool=false) end end - - preferences_hash(f::IO) = parse_cache_header(f)[6] function preferences_hash(cachefile::String) io = open(cachefile, "r") @@ -2149,7 +2264,6 @@ function preferences_hash(cachefile::String) end end - function cache_dependencies(f::IO) _, (includes, _), modules, _... = parse_cache_header(f) return modules, map(chi -> (chi.filename, chi.mtime), includes) # return just filename and mtime @@ -2376,6 +2490,15 @@ get_compiletime_preferences(uuid::UUID) = collect(get(Vector{String}, COMPILETIM get_compiletime_preferences(m::Module) = get_compiletime_preferences(PkgId(m).uuid) get_compiletime_preferences(::Nothing) = String[] +function check_clone_targets(clone_targets) + try + ccall(:jl_check_pkgimage_clones, Cvoid, (Ptr{Cchar},), clone_targets) + return true + catch + return false + end +end + # returns true if it "cachefile.ji" is stale relative to "modpath.jl" and build_id for modkey # otherwise returns the list of dependencies to also check @constprop :none function stale_cachefile(modpath::String, cachefile::String; ignore_loaded::Bool = false) @@ -2389,10 +2512,33 @@ end @debug "Rejecting cache file $cachefile due to it containing an invalid cache header" return true # invalid cache file end - modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash = parse_cache_header(io) + modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, flags = parse_cache_header(io) if isempty(modules) return true # ignore empty file end + if ccall(:jl_match_cache_flags, UInt8, (UInt8,), flags) == 0 + @debug "Rejecting cache file $cachefile for $modkey since the flags are mismatched" cachefile_flags=flags current_flags=ccall(:jl_cache_flags, UInt8, ()) + return true + end + pkgimage = !isempty(clone_targets) + if pkgimage + ocachefile = ocachefile_from_cachefile(cachefile) + if JLOptions().use_pkgimages == 0 + # presence of clone_targets means native code cache + @debug "Rejecting cache file $cachefile for $modkey since it would require usage of pkgimage" + return true + end + if !check_clone_targets(clone_targets) + @debug "Rejecting cache file $cachefile for $modkey since pkgimage can't be loaded on this target" + return true + end + if !isfile(ocachefile) + @debug "Rejecting cache file $cachefile for $modkey since pkgimage $ocachefile was not found" + return true + end + else + ocachefile = nothing + end id = first(modules) if id.first != modkey && modkey != PkgId("") @debug "Rejecting cache file $cachefile for $modkey since it is for $id instead" @@ -2455,7 +2601,7 @@ end # now check if this file is fresh relative to its source files if !skip_timecheck - if !samefile(includes[1].filename, modpath) + if !samefile(includes[1].filename, modpath) && !samefile(fixup_stdlib_path(includes[1].filename), modpath) @debug "Rejecting cache file $cachefile because it is for file $(includes[1].filename) not file $modpath" return true # cache file was compiled from a different path end @@ -2468,6 +2614,16 @@ end end for chi in includes f, ftime_req = chi.filename, chi.mtime + if !isfile(f) + _f = fixup_stdlib_path(f) + if isfile(_f) && startswith(_f, Sys.STDLIB) + # mtime is changed by extraction + @debug "Skipping mtime check for file $f used by $cachefile, since it is a stdlib" + continue + end + @debug "Rejecting stale cache file $cachefile because file $f does not exist" + return true + end ftime = mtime(f) is_stale = ( ftime != ftime_req ) && ( ftime != floor(ftime_req) ) && # Issue #13606, PR #13613: compensate for Docker images rounding mtimes @@ -2487,13 +2643,20 @@ end return true end + if pkgimage + if !isvalid_pkgimage_crc(io, ocachefile::String) + @debug "Rejecting cache file $cachefile because $ocachefile has an invalid checksum" + return true + end + end + curr_prefs_hash = get_preferences_hash(id.uuid, prefs) if prefs_hash != curr_prefs_hash @debug "Rejecting cache file $cachefile because preferences hash does not match 0x$(string(prefs_hash, base=16)) != 0x$(string(curr_prefs_hash, base=16))" return true end - return depmods # fresh cachefile + return depmods, ocachefile # fresh cachefile finally close(io) end @@ -2566,4 +2729,5 @@ end precompile(include_package_for_output, (PkgId, String, Vector{String}, Vector{String}, Vector{String}, typeof(_concrete_dependencies), Nothing)) precompile(include_package_for_output, (PkgId, String, Vector{String}, Vector{String}, Vector{String}, typeof(_concrete_dependencies), String)) -precompile(create_expr_cache, (PkgId, String, String, typeof(_concrete_dependencies), IO, IO)) +precompile(create_expr_cache, (PkgId, String, String, String, typeof(_concrete_dependencies), IO, IO)) +precompile(create_expr_cache, (PkgId, String, String, Nothing, typeof(_concrete_dependencies), IO, IO)) diff --git a/base/options.jl b/base/options.jl index 48a8f7ff59d38..dda0e8b377076 100644 --- a/base/options.jl +++ b/base/options.jl @@ -38,6 +38,7 @@ struct JLOptions handle_signals::Int8 use_sysimage_native_code::Int8 use_compiled_modules::Int8 + use_pkgimages::Int8 bindto::Ptr{UInt8} outputbc::Ptr{UInt8} outputunoptbc::Ptr{UInt8} diff --git a/base/util.jl b/base/util.jl index cef62587be05f..957e30d774aab 100644 --- a/base/util.jl +++ b/base/util.jl @@ -133,7 +133,7 @@ See also [`print`](@ref), [`println`](@ref), [`show`](@ref). printstyled(stdout, msg...; bold=bold, underline=underline, blink=blink, reverse=reverse, hidden=hidden, color=color) """ - Base.julia_cmd(juliapath=joinpath(Sys.BINDIR, julia_exename())) + Base.julia_cmd(juliapath=joinpath(Sys.BINDIR, julia_exename()); cpu_target) Return a julia command similar to the one of the running process. Propagates any of the `--cpu-target`, `--sysimage`, `--compile`, `--sysimage-native-code`, @@ -148,10 +148,15 @@ Among others, `--math-mode`, `--warn-overwrite`, and `--trace-compile` are notab !!! compat "Julia 1.5" The flags `--color` and `--startup-file` were added in Julia 1.5. + +!!! compat "Julia 1.9" + The keyword argument `cpu_target` was added. """ -function julia_cmd(julia=joinpath(Sys.BINDIR, julia_exename())) +function julia_cmd(julia=joinpath(Sys.BINDIR, julia_exename()); cpu_target::Union{Nothing,String} = nothing) opts = JLOptions() - cpu_target = unsafe_string(opts.cpu_target) + if cpu_target === nothing + cpu_target = unsafe_string(opts.cpu_target) + end image_file = unsafe_string(opts.image_file) addflags = String[] let compile = if opts.compile_enabled == 0 @@ -220,6 +225,12 @@ function julia_cmd(julia=joinpath(Sys.BINDIR, julia_exename())) if opts.use_sysimage_native_code == 0 push!(addflags, "--sysimage-native-code=no") end + if opts.use_pkgimages == 0 + push!(addflags, "--pkgimages=no") + else + # If pkgimage is set, malloc_log and code_coverage should not + @assert opts.malloc_log == 0 && opts.code_coverage == 0 + end return `$julia -C$cpu_target -J$image_file $addflags` end @@ -485,10 +496,17 @@ function _crc32c(io::IO, nb::Integer, crc::UInt32=0x00000000) end _crc32c(io::IO, crc::UInt32=0x00000000) = _crc32c(io, typemax(Int64), crc) _crc32c(io::IOStream, crc::UInt32=0x00000000) = _crc32c(io, filesize(io)-position(io), crc) -_crc32c(uuid::UUID, crc::UInt32=0x00000000) = - ccall(:jl_crc32c, UInt32, (UInt32, Ref{UInt128}, Csize_t), crc, uuid.value, 16) +_crc32c(uuid::UUID, crc::UInt32=0x00000000) = _crc32c(uuid.value, crc) +_crc32c(x::UInt128, crc::UInt32=0x00000000) = + ccall(:jl_crc32c, UInt32, (UInt32, Ref{UInt128}, Csize_t), crc, x, 16) _crc32c(x::UInt64, crc::UInt32=0x00000000) = ccall(:jl_crc32c, UInt32, (UInt32, Ref{UInt64}, Csize_t), crc, x, 8) +_crc32c(x::UInt32, crc::UInt32=0x00000000) = + ccall(:jl_crc32c, UInt32, (UInt32, Ref{UInt32}, Csize_t), crc, x, 4) +_crc32c(x::UInt16, crc::UInt32=0x00000000) = + ccall(:jl_crc32c, UInt32, (UInt32, Ref{UInt16}, Csize_t), crc, x, 2) +_crc32c(x::UInt8, crc::UInt32=0x00000000) = + ccall(:jl_crc32c, UInt32, (UInt32, Ref{UInt8}, Csize_t), crc, x, 1) """ @kwdef typedef @@ -650,7 +668,8 @@ function runtests(tests = ["all"]; ncores::Int = ceil(Int, Sys.CPU_THREADS / 2), seed !== nothing && push!(tests, "--seed=0x$(string(seed % UInt128, base=16))") # cast to UInt128 to avoid a minus sign ENV2 = copy(ENV) ENV2["JULIA_CPU_THREADS"] = "$ncores" - ENV2["JULIA_DEPOT_PATH"] = mktempdir(; cleanup = true) + pathsep = Sys.iswindows() ? ";" : ":" + ENV2["JULIA_DEPOT_PATH"] = string(mktempdir(; cleanup = true), pathsep) # make sure the default depots can be loaded delete!(ENV2, "JULIA_LOAD_PATH") delete!(ENV2, "JULIA_PROJECT") try diff --git a/contrib/cache_stdlibs.jl b/contrib/cache_stdlibs.jl new file mode 100644 index 0000000000000..bdcc3d9535fa4 --- /dev/null +++ b/contrib/cache_stdlibs.jl @@ -0,0 +1,49 @@ +# Stdlibs sorted in dependency, then alphabetical, order by contrib/print_sorted_stdlibs.jl +# Run with the `--exclude-sysimage` option to filter out all packages included in the system image +stdlibs = [ + # No dependencies + + # 1-depth packages + :GMP_jll, + :LLVMLibUnwind_jll, + :LibUV_jll, + :LibUnwind_jll, + :MbedTLS_jll, + :OpenLibm_jll, + :PCRE2_jll, + :Zlib_jll, + :dSFMT_jll, + :libLLVM_jll, + + # 2-depth packages + :LibSSH2_jll, + :MPFR_jll, + + # 3-depth packages + :LibGit2_jll, + + # 7-depth packages + :LLD_jll, + :SuiteSparse_jll, + + # 9-depth packages + :Statistics, + :SuiteSparse, +] + +depot = abspath(Sys.BINDIR, "..", "share", "julia") + +if haskey(ENV, "JULIA_CPU_TARGET") + target = ENV["JULIA_CPU_TARGET"] +else + target = "native" +end + +@info "Caching stdlibrary to" depot target +empty!(Base.DEPOT_PATH) +push!(Base.DEPOT_PATH, depot) + +for pkg in stdlibs + pkgid = Base.identify_package(string(pkg)) + Base.compilecache(pkgid) +end diff --git a/contrib/print_sorted_stdlibs.jl b/contrib/print_sorted_stdlibs.jl index 28d75f079b9dd..6bc2023c4f1cc 100644 --- a/contrib/print_sorted_stdlibs.jl +++ b/contrib/print_sorted_stdlibs.jl @@ -12,11 +12,12 @@ function check_flag(flag) end if check_flag("--help") || check_flag("-h") - println("Usage: julia print_sorted_stdlibs.jl [stdlib_dir] [--exclude-jlls]") + println("Usage: julia print_sorted_stdlibs.jl [stdlib_dir] [--exclude-jlls] [--exclude-sysimage]") end # Allow users to ask for JLL or no JLLs exclude_jlls = check_flag("--exclude-jlls") +exclude_sysimage = check_flag("--exclude-sysimage") # Default to the `stdlib/vX.Y` directory STDLIB_DIR = get(ARGS, 1, joinpath(@__DIR__, "..", "usr", "share", "julia", "stdlib")) @@ -80,12 +81,20 @@ if exclude_jlls filter!(p -> !endswith(p, "_jll"), sorted_projects) end +if exclude_sysimage + loaded_modules = Set(map(k->k.name, collect(keys(Base.loaded_modules)))) + filter!(p->!in(p, loaded_modules), sorted_projects) +end + # Print out sorted projects, ready to be pasted into `sysimg.jl` last_depth = 0 println(" # Stdlibs sorted in dependency, then alphabetical, order by contrib/print_sorted_stdlibs.jl") if exclude_jlls println(" # Run with the `--exclude-jlls` option to filter out all JLL packages") end +if exclude_sysimage + println(" # Run with the `--exclude-sysimage` option to filter out all packages included in the system image") +end println(" stdlibs = [") println(" # No dependencies") for p in sorted_projects diff --git a/deps/checksums/compilersupportlibraries b/deps/checksums/compilersupportlibraries index 721ad2e8a8759..098c181ca5c87 100644 --- a/deps/checksums/compilersupportlibraries +++ b/deps/checksums/compilersupportlibraries @@ -1,92 +1,92 @@ -CompilerSupportLibraries.v1.0.1+0.aarch64-apple-darwin-libgfortran5.tar.gz/md5/20ebaad57850393b6ac9fa924e511fe4 -CompilerSupportLibraries.v1.0.1+0.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/020de4d8b0ff6bedbadaa305ff8445e6849f12053762ea4aa68412d1ec763dbd86f479587a2fbb862487f1feb04d976c38099ddf3887817a3d32b3f029cf85b1 -CompilerSupportLibraries.v1.0.1+0.aarch64-linux-gnu-libgfortran3.tar.gz/md5/3908fa1a2f739b330e787468c9bfb5c8 -CompilerSupportLibraries.v1.0.1+0.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/1741e3403ac7aa99e7cfd9a01222c4153ed300f47cc1b347e1af1a6cd07a82caaa54b9cfbebae8751440420551621cc6524504413446d104f9493dff2c081853 -CompilerSupportLibraries.v1.0.1+0.aarch64-linux-gnu-libgfortran4.tar.gz/md5/2444dbb7637b32cf543675cc12330878 -CompilerSupportLibraries.v1.0.1+0.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/8537f0b243df8544350c884021b21c585fd302e8dd462a30a6ee84c7a36a049133262e5d1bc362f972066b8e8d6a091c32c3b746bab1feb9fccf2e7cca65756c -CompilerSupportLibraries.v1.0.1+0.aarch64-linux-gnu-libgfortran5.tar.gz/md5/d79c1434594c0c5e7d6be798bf52c99e -CompilerSupportLibraries.v1.0.1+0.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/7e71accc401a45b51b298702fb4c79a2fc856c7b28f0935f6ad3a0db5381c55fe5432daff371842930d718024b7c6c1d80e2bd09d397145203673bebbe3496ae -CompilerSupportLibraries.v1.0.1+0.aarch64-linux-musl-libgfortran3.tar.gz/md5/f212059053d99558a9b0bf54b20180e1 -CompilerSupportLibraries.v1.0.1+0.aarch64-linux-musl-libgfortran3.tar.gz/sha512/5c104b1282cec8a944e5d008f44a4d60f4394fd5d797fec7d1f487d13e7328cd9c88ec4916dabf18596d87160756bda914e4f8c5a356b5577f9349d0d9e976d6 -CompilerSupportLibraries.v1.0.1+0.aarch64-linux-musl-libgfortran4.tar.gz/md5/3e3b3795ee93ef317223050e803a9875 -CompilerSupportLibraries.v1.0.1+0.aarch64-linux-musl-libgfortran4.tar.gz/sha512/85d3c955e15f66bfe8bfec2f28c9160bc03d4d531ea4ffe6bc6b51e0d69ccea3ab67a16ca752dabc870861c407381c4519d75c6be3832e8dccd6122ec8c6ed75 -CompilerSupportLibraries.v1.0.1+0.aarch64-linux-musl-libgfortran5.tar.gz/md5/cf2d1315f6a348af2e6c065e2a286e7a -CompilerSupportLibraries.v1.0.1+0.aarch64-linux-musl-libgfortran5.tar.gz/sha512/58420377bc77aa7678034ee5f708eb6be7db359faef2c2638869765453633da9bf455512bd88e95b38ae0428ecc4053561517b176b2371129bdaef9d8d5dadfd -CompilerSupportLibraries.v1.0.1+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/f5c09ed7e0eeb8d345d328f950582f26 -CompilerSupportLibraries.v1.0.1+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/9c657f55c8fcdeb404be168a3a63a5e84304730fe34f25673d92cdae4b0a1fcc6a877ee1433f060e1be854c7811d66632e32510a2ed591d88330f1340b9c20de -CompilerSupportLibraries.v1.0.1+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/c685518aca4721cd8621d510e2039683 -CompilerSupportLibraries.v1.0.1+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/b760468c6377dcd2b8dd50200daaabe604006afc070984d78152b2becd0680b59036c9a6e91dea490121bd85b58d285bfc1e1cf696d29af236528400101de36c -CompilerSupportLibraries.v1.0.1+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/8faf5c8ad62ab10f71dd2ec9683053e2 -CompilerSupportLibraries.v1.0.1+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/921239f241a5c89710cf07272d7f6c3f10201a7533068ed1e9643f9fb2f439e1bb765a4966d913829866ee0ce4f1589d30d06e4b5c1361e3c016a9473f087177 -CompilerSupportLibraries.v1.0.1+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/b38fcb70691ac2621379d298eef8c79e -CompilerSupportLibraries.v1.0.1+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/06c7f64257ce721f5941f6e50a0d2717cdc9394fc532ded19ce3eaacd5e92a416969534227562e4fee04d2b6340c650d8bc9779e14519b90038bc41e8d1f5ce3 -CompilerSupportLibraries.v1.0.1+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/cdfab2c7bc41765caf4441c3caeed761 -CompilerSupportLibraries.v1.0.1+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/7109d4a7b32c00309c42685f54a86fc2cc63c0c00f65584ad296b6e44ad3320eed1aaf49684a8831841cdffa5555d72f89272fb722a780596e27ef020528026b -CompilerSupportLibraries.v1.0.1+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/441980ebd23d72772cbe603f1c275336 -CompilerSupportLibraries.v1.0.1+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/e273d9f1af259a3080df8f173e1808a1ade976a943aba97216bf59a96178e7c052e7a048b0ceee53ab486ed577a2ecb92579857be2f7b29e76322ee1f13c9d76 -CompilerSupportLibraries.v1.0.1+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/f5c09ed7e0eeb8d345d328f950582f26 -CompilerSupportLibraries.v1.0.1+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/9c657f55c8fcdeb404be168a3a63a5e84304730fe34f25673d92cdae4b0a1fcc6a877ee1433f060e1be854c7811d66632e32510a2ed591d88330f1340b9c20de -CompilerSupportLibraries.v1.0.1+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/c685518aca4721cd8621d510e2039683 -CompilerSupportLibraries.v1.0.1+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/b760468c6377dcd2b8dd50200daaabe604006afc070984d78152b2becd0680b59036c9a6e91dea490121bd85b58d285bfc1e1cf696d29af236528400101de36c -CompilerSupportLibraries.v1.0.1+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/8faf5c8ad62ab10f71dd2ec9683053e2 -CompilerSupportLibraries.v1.0.1+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/921239f241a5c89710cf07272d7f6c3f10201a7533068ed1e9643f9fb2f439e1bb765a4966d913829866ee0ce4f1589d30d06e4b5c1361e3c016a9473f087177 -CompilerSupportLibraries.v1.0.1+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/b38fcb70691ac2621379d298eef8c79e -CompilerSupportLibraries.v1.0.1+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/06c7f64257ce721f5941f6e50a0d2717cdc9394fc532ded19ce3eaacd5e92a416969534227562e4fee04d2b6340c650d8bc9779e14519b90038bc41e8d1f5ce3 -CompilerSupportLibraries.v1.0.1+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/cdfab2c7bc41765caf4441c3caeed761 -CompilerSupportLibraries.v1.0.1+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/7109d4a7b32c00309c42685f54a86fc2cc63c0c00f65584ad296b6e44ad3320eed1aaf49684a8831841cdffa5555d72f89272fb722a780596e27ef020528026b -CompilerSupportLibraries.v1.0.1+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/441980ebd23d72772cbe603f1c275336 -CompilerSupportLibraries.v1.0.1+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/e273d9f1af259a3080df8f173e1808a1ade976a943aba97216bf59a96178e7c052e7a048b0ceee53ab486ed577a2ecb92579857be2f7b29e76322ee1f13c9d76 -CompilerSupportLibraries.v1.0.1+0.i686-linux-gnu-libgfortran3.tar.gz/md5/6decf8fd5afb50451771c761e63a8917 -CompilerSupportLibraries.v1.0.1+0.i686-linux-gnu-libgfortran3.tar.gz/sha512/4984724bcc847724b1bc005b6f760a18b68147f7d5402d0faf4e28fc0d14fa10975368a951f9caf2a8856500046dec8343043274557d58269e77492b929a9e4b -CompilerSupportLibraries.v1.0.1+0.i686-linux-gnu-libgfortran4.tar.gz/md5/39d1e8a3baa144c018d3eaf7f3806482 -CompilerSupportLibraries.v1.0.1+0.i686-linux-gnu-libgfortran4.tar.gz/sha512/fc4d429279c5a93b6c28b6e911b1e7cfd1c1cfe46f11f2e901b3832ce90d45f49d3d29f0ef18518a94af6cc8651f67c4ed81672680f9281ada390440b172a2af -CompilerSupportLibraries.v1.0.1+0.i686-linux-gnu-libgfortran5.tar.gz/md5/37dabd9cd224c9fed9633dedccb6c565 -CompilerSupportLibraries.v1.0.1+0.i686-linux-gnu-libgfortran5.tar.gz/sha512/b253149e72eef9486888fbaace66e9b6945f4477f6b818f64f3047331165b0e2bc17aa6e3fc8c88686a72e478eb62c8f53883415d5419db448d8016fa3a1da5e -CompilerSupportLibraries.v1.0.1+0.i686-linux-musl-libgfortran3.tar.gz/md5/afdd32bfadd465848e6be458817a44ae -CompilerSupportLibraries.v1.0.1+0.i686-linux-musl-libgfortran3.tar.gz/sha512/eebd679c499143014514c7c9d1875dedbbab9e3af51526c4dd445a9e3dbade95d24522da8bbad0a50ab400755e47b018828b324c4ad7705e212ccd990e34439a -CompilerSupportLibraries.v1.0.1+0.i686-linux-musl-libgfortran4.tar.gz/md5/bc4a0f0b7cea328f7e8850583774496b -CompilerSupportLibraries.v1.0.1+0.i686-linux-musl-libgfortran4.tar.gz/sha512/82285b67946212b49cddf6259f2c60ff5469f8c5263ccefe44f1d93ace98ab68e2c152e1b54434b2f075fd8d192c06d5451bc8cca26d951ad15f3453102f02b5 -CompilerSupportLibraries.v1.0.1+0.i686-linux-musl-libgfortran5.tar.gz/md5/177f0232abce8d523882530ed7a93092 -CompilerSupportLibraries.v1.0.1+0.i686-linux-musl-libgfortran5.tar.gz/sha512/db80acf0f2434f28ee7680e1beb34f564940071815d1ad89fb5913cbd9ac24da528e826d0d54be6265a7340ebd661b6d308ed79d96b67fa5d8c98dc3f1bee8d6 -CompilerSupportLibraries.v1.0.1+0.i686-w64-mingw32-libgfortran3.tar.gz/md5/c723e7d3c3038f59b9bf0cc3a65826bc -CompilerSupportLibraries.v1.0.1+0.i686-w64-mingw32-libgfortran3.tar.gz/sha512/0545561ccd7e525b6cd86935366a2724a5e013411a1c01564db21b66da5fef959cf06b0839b96f1dc2c970eb6c8fb19c012e6cd2c17bc381b55420c72fe1b9f6 -CompilerSupportLibraries.v1.0.1+0.i686-w64-mingw32-libgfortran4.tar.gz/md5/763bd82645d2f3c72b6244d68bebb40f -CompilerSupportLibraries.v1.0.1+0.i686-w64-mingw32-libgfortran4.tar.gz/sha512/700e719eeab486915a9fb0954125cb9a3e9a813d7a069eca05be3a16621f4875668918a5ed5f645e734ac62b0c2ddbaa6234adc9109e98fb88b8ca1197481ed8 -CompilerSupportLibraries.v1.0.1+0.i686-w64-mingw32-libgfortran5.tar.gz/md5/18e90d15dc6dd0a836e9aa076b342105 -CompilerSupportLibraries.v1.0.1+0.i686-w64-mingw32-libgfortran5.tar.gz/sha512/9ff61e8da2b431a8cb09818bde5daab2d7b8cf7a934f184f14ea50eccf5796ae91558e06a22137eb021c4055c54faf4a524a54dbbd718e8ea0abb5dcec844fdb -CompilerSupportLibraries.v1.0.1+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/4e5e4b23dc87450738da33926a07511d -CompilerSupportLibraries.v1.0.1+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/fc09879d94b750e75775d8b64a41ab9924d675fb53c5700467604412928fe7f5cb21911da0f64898d2463fa77ffbaf4c96c397b9060f4746eec152747930cddc -CompilerSupportLibraries.v1.0.1+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/9a92138ed69aa317a932a615c6e62d69 -CompilerSupportLibraries.v1.0.1+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/0b7785379936a2a209b074177b1424dd7e00b29b5165f564e799b0aa4e06a582e9d616525d97274ba2507cb88192028f1ac485d3f99bdc7ee53fc63c1a7e85de -CompilerSupportLibraries.v1.0.1+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/8ffee3d6de5197c7a1f354d72c8238fa -CompilerSupportLibraries.v1.0.1+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/deadc4d7224c84f9b82dc956b69e815c44ae036802838365d870ab9f58c8bcf8ce0645f2f387c8ff344ac2108fc8e7e1ee907fa55e93c91aa5d9fd921bf3fdcb -CompilerSupportLibraries.v1.0.1+0.x86_64-apple-darwin-libgfortran3.tar.gz/md5/87449e72e3f33dbb69b7053cdc2649d4 -CompilerSupportLibraries.v1.0.1+0.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/5ce02ad10c6f4686a476eb2a5de2988cd8b482f5e693db2880c84ad1c82f468ef03fe01b9d0feefe5d4ee741d1d16643d36b144e6261ed32311b3b6f312fac2f -CompilerSupportLibraries.v1.0.1+0.x86_64-apple-darwin-libgfortran4.tar.gz/md5/0407cde92cfa42fa89ac83217ca0ec16 -CompilerSupportLibraries.v1.0.1+0.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/032c831f1166a336551138939ac40eb2c68a048ce786c0c1403b879a20c1b706caac16d22560b2c7f2b3d6373986c347188675674116005ca251336ee048d09f -CompilerSupportLibraries.v1.0.1+0.x86_64-apple-darwin-libgfortran5.tar.gz/md5/23418763b808371ee94772a90d501f4d -CompilerSupportLibraries.v1.0.1+0.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/7867b843551457b11bda7821dd384c1c1cf23b80a308b2058a693de7b7da099f0b37eb0a6de2b84c04b625a68c60eea55138e200d5d6ec6f6af09bd7ce406a96 -CompilerSupportLibraries.v1.0.1+0.x86_64-linux-gnu-libgfortran3.tar.gz/md5/e3d33ae03c18affea74699bdc1fabb68 -CompilerSupportLibraries.v1.0.1+0.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/42013f4921de5a69ad857195ce5c19ad1bca3c920d79699e5501f1f4534ab132fabd422362b2b5056f5d182215d6c069db5df460bafa700903faf962cc00f77b -CompilerSupportLibraries.v1.0.1+0.x86_64-linux-gnu-libgfortran4.tar.gz/md5/d40c1e8c0393213c6057c53a12f44175 -CompilerSupportLibraries.v1.0.1+0.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/fe7baa4de7490065ab7b953cc12f41462a24bcb49d0a4a64b23249e98e7569b19bb1cb455af2f76090e34066a7d3cdd7a48cae6515ce6c7a5c8486b0cacc5106 -CompilerSupportLibraries.v1.0.1+0.x86_64-linux-gnu-libgfortran5.tar.gz/md5/48541b90f715c4c86ee4da0570275947 -CompilerSupportLibraries.v1.0.1+0.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/7f2683fb98e80f12629f4ed3bea9fd59d32b7e7a9ed1699e782d8e238ff0915ecc61bf00adaf4597cfe41caf82cdca0f9be250f595f5f0bea6d8f77dba99eaf4 -CompilerSupportLibraries.v1.0.1+0.x86_64-linux-musl-libgfortran3.tar.gz/md5/4547059eb905995667be48bf85d49911 -CompilerSupportLibraries.v1.0.1+0.x86_64-linux-musl-libgfortran3.tar.gz/sha512/7400fdabc924434ab4a4949248c3603887ac06ffd2f205ae33e14495d86cd4f816bbd1999eeafa0257f518df1e7f7c522f596e847a71dbfbfccff4859f50acc7 -CompilerSupportLibraries.v1.0.1+0.x86_64-linux-musl-libgfortran4.tar.gz/md5/46267543cad6584d7b7b9fcc8f18f21d -CompilerSupportLibraries.v1.0.1+0.x86_64-linux-musl-libgfortran4.tar.gz/sha512/0353d7d724be48d4185d3c181692970b7996f53f6a01723072aa5c94b53a8c5055faeed30df51659c252a46f4b941dec0cb24569323e3c85c166f14c5b7c8e9e -CompilerSupportLibraries.v1.0.1+0.x86_64-linux-musl-libgfortran5.tar.gz/md5/14dba2897a6e9d370fa9091c045375fc -CompilerSupportLibraries.v1.0.1+0.x86_64-linux-musl-libgfortran5.tar.gz/sha512/10b79f9c059839f5b57fa8d2a381a034c4067262c4088bd354d14ea56bec097878069383aa9cfadaa09d73bd20fc348fb61662d863a8d62cb25d7af6b8e29858 -CompilerSupportLibraries.v1.0.1+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/eed836d1addeb10d0901f836724aff1e -CompilerSupportLibraries.v1.0.1+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/e33eca424d1529a1fb23ba9cf7fac345ed1cfc8073c975b6b31ca44d2e8c3f5083af65433df009b22483dceb2e43149f3c1e8433681fec5fb812e1d5b4243ce4 -CompilerSupportLibraries.v1.0.1+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/d5ae9f9519341fdaabf62267c89461d2 -CompilerSupportLibraries.v1.0.1+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/6421aa5d1bd6f08ad43f59ed4dc1bef8b9b598ebbbd3e48149730f3bec3471f8e2c02ffb338427326924290b8f52ef9e626e3313448bc931a61d866c5dc544ae -CompilerSupportLibraries.v1.0.1+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/fc1df521395362a5aaa2e2aeef707207 -CompilerSupportLibraries.v1.0.1+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/f2e5a08e3cae171242ae6a20d2d4838c1529ce042745dc466148b7bbc06896d94476fd05c7787e6e8641bea752dfc0e6b09e95b160bede600d20d2ad68e7705f -CompilerSupportLibraries.v1.0.1+0.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/80c337837a9032e4c9614f0d3218993b -CompilerSupportLibraries.v1.0.1+0.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/cf07e459ca55cb9ee3d38e6858320530c1d1ab2ffd35bfa2a33b2505d3189f13b9743a0e279d70f85d227cee8a8974448f1371a122dcbea03fb1e414f8df8337 -CompilerSupportLibraries.v1.0.1+0.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/792cae36932dd53af20b7f61c80f623b -CompilerSupportLibraries.v1.0.1+0.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/805f2b64fe9d2b94fc6c966945e10458d8d1c47a8d95fcda057c03a13999d7d0f136c754e4b1e152faaf23e4949861c2ad42b4437dba19f59b3db745d7a76108 -CompilerSupportLibraries.v1.0.1+0.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/063c07fcbba4b9c3bd23ab0d987f1dbb -CompilerSupportLibraries.v1.0.1+0.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/1d0344b30b5fb34a63f6844be0501c0ad08f1116b0c7b00e13d47860cc6bbdd39734416ad3b492414a28ba1744240bd05aca0d1560873f687d3f61747058626b +CompilerSupportLibraries.v1.0.2+0.aarch64-apple-darwin-libgfortran5.tar.gz/md5/20ebaad57850393b6ac9fa924e511fe4 +CompilerSupportLibraries.v1.0.2+0.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/020de4d8b0ff6bedbadaa305ff8445e6849f12053762ea4aa68412d1ec763dbd86f479587a2fbb862487f1feb04d976c38099ddf3887817a3d32b3f029cf85b1 +CompilerSupportLibraries.v1.0.2+0.aarch64-linux-gnu-libgfortran3.tar.gz/md5/3908fa1a2f739b330e787468c9bfb5c8 +CompilerSupportLibraries.v1.0.2+0.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/1741e3403ac7aa99e7cfd9a01222c4153ed300f47cc1b347e1af1a6cd07a82caaa54b9cfbebae8751440420551621cc6524504413446d104f9493dff2c081853 +CompilerSupportLibraries.v1.0.2+0.aarch64-linux-gnu-libgfortran4.tar.gz/md5/2444dbb7637b32cf543675cc12330878 +CompilerSupportLibraries.v1.0.2+0.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/8537f0b243df8544350c884021b21c585fd302e8dd462a30a6ee84c7a36a049133262e5d1bc362f972066b8e8d6a091c32c3b746bab1feb9fccf2e7cca65756c +CompilerSupportLibraries.v1.0.2+0.aarch64-linux-gnu-libgfortran5.tar.gz/md5/d79c1434594c0c5e7d6be798bf52c99e +CompilerSupportLibraries.v1.0.2+0.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/7e71accc401a45b51b298702fb4c79a2fc856c7b28f0935f6ad3a0db5381c55fe5432daff371842930d718024b7c6c1d80e2bd09d397145203673bebbe3496ae +CompilerSupportLibraries.v1.0.2+0.aarch64-linux-musl-libgfortran3.tar.gz/md5/f212059053d99558a9b0bf54b20180e1 +CompilerSupportLibraries.v1.0.2+0.aarch64-linux-musl-libgfortran3.tar.gz/sha512/5c104b1282cec8a944e5d008f44a4d60f4394fd5d797fec7d1f487d13e7328cd9c88ec4916dabf18596d87160756bda914e4f8c5a356b5577f9349d0d9e976d6 +CompilerSupportLibraries.v1.0.2+0.aarch64-linux-musl-libgfortran4.tar.gz/md5/3e3b3795ee93ef317223050e803a9875 +CompilerSupportLibraries.v1.0.2+0.aarch64-linux-musl-libgfortran4.tar.gz/sha512/85d3c955e15f66bfe8bfec2f28c9160bc03d4d531ea4ffe6bc6b51e0d69ccea3ab67a16ca752dabc870861c407381c4519d75c6be3832e8dccd6122ec8c6ed75 +CompilerSupportLibraries.v1.0.2+0.aarch64-linux-musl-libgfortran5.tar.gz/md5/cf2d1315f6a348af2e6c065e2a286e7a +CompilerSupportLibraries.v1.0.2+0.aarch64-linux-musl-libgfortran5.tar.gz/sha512/58420377bc77aa7678034ee5f708eb6be7db359faef2c2638869765453633da9bf455512bd88e95b38ae0428ecc4053561517b176b2371129bdaef9d8d5dadfd +CompilerSupportLibraries.v1.0.2+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/f5c09ed7e0eeb8d345d328f950582f26 +CompilerSupportLibraries.v1.0.2+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/9c657f55c8fcdeb404be168a3a63a5e84304730fe34f25673d92cdae4b0a1fcc6a877ee1433f060e1be854c7811d66632e32510a2ed591d88330f1340b9c20de +CompilerSupportLibraries.v1.0.2+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/c685518aca4721cd8621d510e2039683 +CompilerSupportLibraries.v1.0.2+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/b760468c6377dcd2b8dd50200daaabe604006afc070984d78152b2becd0680b59036c9a6e91dea490121bd85b58d285bfc1e1cf696d29af236528400101de36c +CompilerSupportLibraries.v1.0.2+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/8faf5c8ad62ab10f71dd2ec9683053e2 +CompilerSupportLibraries.v1.0.2+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/921239f241a5c89710cf07272d7f6c3f10201a7533068ed1e9643f9fb2f439e1bb765a4966d913829866ee0ce4f1589d30d06e4b5c1361e3c016a9473f087177 +CompilerSupportLibraries.v1.0.2+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/b38fcb70691ac2621379d298eef8c79e +CompilerSupportLibraries.v1.0.2+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/06c7f64257ce721f5941f6e50a0d2717cdc9394fc532ded19ce3eaacd5e92a416969534227562e4fee04d2b6340c650d8bc9779e14519b90038bc41e8d1f5ce3 +CompilerSupportLibraries.v1.0.2+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/cdfab2c7bc41765caf4441c3caeed761 +CompilerSupportLibraries.v1.0.2+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/7109d4a7b32c00309c42685f54a86fc2cc63c0c00f65584ad296b6e44ad3320eed1aaf49684a8831841cdffa5555d72f89272fb722a780596e27ef020528026b +CompilerSupportLibraries.v1.0.2+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/441980ebd23d72772cbe603f1c275336 +CompilerSupportLibraries.v1.0.2+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/e273d9f1af259a3080df8f173e1808a1ade976a943aba97216bf59a96178e7c052e7a048b0ceee53ab486ed577a2ecb92579857be2f7b29e76322ee1f13c9d76 +CompilerSupportLibraries.v1.0.2+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/f5c09ed7e0eeb8d345d328f950582f26 +CompilerSupportLibraries.v1.0.2+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/9c657f55c8fcdeb404be168a3a63a5e84304730fe34f25673d92cdae4b0a1fcc6a877ee1433f060e1be854c7811d66632e32510a2ed591d88330f1340b9c20de +CompilerSupportLibraries.v1.0.2+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/c685518aca4721cd8621d510e2039683 +CompilerSupportLibraries.v1.0.2+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/b760468c6377dcd2b8dd50200daaabe604006afc070984d78152b2becd0680b59036c9a6e91dea490121bd85b58d285bfc1e1cf696d29af236528400101de36c +CompilerSupportLibraries.v1.0.2+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/8faf5c8ad62ab10f71dd2ec9683053e2 +CompilerSupportLibraries.v1.0.2+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/921239f241a5c89710cf07272d7f6c3f10201a7533068ed1e9643f9fb2f439e1bb765a4966d913829866ee0ce4f1589d30d06e4b5c1361e3c016a9473f087177 +CompilerSupportLibraries.v1.0.2+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/b38fcb70691ac2621379d298eef8c79e +CompilerSupportLibraries.v1.0.2+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/06c7f64257ce721f5941f6e50a0d2717cdc9394fc532ded19ce3eaacd5e92a416969534227562e4fee04d2b6340c650d8bc9779e14519b90038bc41e8d1f5ce3 +CompilerSupportLibraries.v1.0.2+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/cdfab2c7bc41765caf4441c3caeed761 +CompilerSupportLibraries.v1.0.2+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/7109d4a7b32c00309c42685f54a86fc2cc63c0c00f65584ad296b6e44ad3320eed1aaf49684a8831841cdffa5555d72f89272fb722a780596e27ef020528026b +CompilerSupportLibraries.v1.0.2+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/441980ebd23d72772cbe603f1c275336 +CompilerSupportLibraries.v1.0.2+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/e273d9f1af259a3080df8f173e1808a1ade976a943aba97216bf59a96178e7c052e7a048b0ceee53ab486ed577a2ecb92579857be2f7b29e76322ee1f13c9d76 +CompilerSupportLibraries.v1.0.2+0.i686-linux-gnu-libgfortran3.tar.gz/md5/6decf8fd5afb50451771c761e63a8917 +CompilerSupportLibraries.v1.0.2+0.i686-linux-gnu-libgfortran3.tar.gz/sha512/4984724bcc847724b1bc005b6f760a18b68147f7d5402d0faf4e28fc0d14fa10975368a951f9caf2a8856500046dec8343043274557d58269e77492b929a9e4b +CompilerSupportLibraries.v1.0.2+0.i686-linux-gnu-libgfortran4.tar.gz/md5/39d1e8a3baa144c018d3eaf7f3806482 +CompilerSupportLibraries.v1.0.2+0.i686-linux-gnu-libgfortran4.tar.gz/sha512/fc4d429279c5a93b6c28b6e911b1e7cfd1c1cfe46f11f2e901b3832ce90d45f49d3d29f0ef18518a94af6cc8651f67c4ed81672680f9281ada390440b172a2af +CompilerSupportLibraries.v1.0.2+0.i686-linux-gnu-libgfortran5.tar.gz/md5/37dabd9cd224c9fed9633dedccb6c565 +CompilerSupportLibraries.v1.0.2+0.i686-linux-gnu-libgfortran5.tar.gz/sha512/b253149e72eef9486888fbaace66e9b6945f4477f6b818f64f3047331165b0e2bc17aa6e3fc8c88686a72e478eb62c8f53883415d5419db448d8016fa3a1da5e +CompilerSupportLibraries.v1.0.2+0.i686-linux-musl-libgfortran3.tar.gz/md5/afdd32bfadd465848e6be458817a44ae +CompilerSupportLibraries.v1.0.2+0.i686-linux-musl-libgfortran3.tar.gz/sha512/eebd679c499143014514c7c9d1875dedbbab9e3af51526c4dd445a9e3dbade95d24522da8bbad0a50ab400755e47b018828b324c4ad7705e212ccd990e34439a +CompilerSupportLibraries.v1.0.2+0.i686-linux-musl-libgfortran4.tar.gz/md5/bc4a0f0b7cea328f7e8850583774496b +CompilerSupportLibraries.v1.0.2+0.i686-linux-musl-libgfortran4.tar.gz/sha512/82285b67946212b49cddf6259f2c60ff5469f8c5263ccefe44f1d93ace98ab68e2c152e1b54434b2f075fd8d192c06d5451bc8cca26d951ad15f3453102f02b5 +CompilerSupportLibraries.v1.0.2+0.i686-linux-musl-libgfortran5.tar.gz/md5/177f0232abce8d523882530ed7a93092 +CompilerSupportLibraries.v1.0.2+0.i686-linux-musl-libgfortran5.tar.gz/sha512/db80acf0f2434f28ee7680e1beb34f564940071815d1ad89fb5913cbd9ac24da528e826d0d54be6265a7340ebd661b6d308ed79d96b67fa5d8c98dc3f1bee8d6 +CompilerSupportLibraries.v1.0.2+0.i686-w64-mingw32-libgfortran3.tar.gz/md5/756718e5eaa4547b874a71a8e3545492 +CompilerSupportLibraries.v1.0.2+0.i686-w64-mingw32-libgfortran3.tar.gz/sha512/c21c1be10ca8810f56e435b3629e2ab0678926ea9c4f4c3dd003f9e292c075493b83df04401d3bcf7738f1a44098f674f9b01bba9db4b9a9e45ad7af3497444e +CompilerSupportLibraries.v1.0.2+0.i686-w64-mingw32-libgfortran4.tar.gz/md5/65ce0024bf8fe3276addbf185ed03e48 +CompilerSupportLibraries.v1.0.2+0.i686-w64-mingw32-libgfortran4.tar.gz/sha512/5e8105a12ab04e2949e41eda50a060dea04ccd98660c7528cfc86e120fe61cca8bab878fd2c92a3858f02ac3f3c55d0e48789907e5fbd2392a8e84b183ed4636 +CompilerSupportLibraries.v1.0.2+0.i686-w64-mingw32-libgfortran5.tar.gz/md5/b7727324d550f637209db795238c46a4 +CompilerSupportLibraries.v1.0.2+0.i686-w64-mingw32-libgfortran5.tar.gz/sha512/864b1db2642e68665b9d3322563c7ce964835d0e720325ea00b193e2cbf6791760e0014710e2a79876165ab0daffa6d53d61b87a5034f956ba6e255b0144652c +CompilerSupportLibraries.v1.0.2+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/4e5e4b23dc87450738da33926a07511d +CompilerSupportLibraries.v1.0.2+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/fc09879d94b750e75775d8b64a41ab9924d675fb53c5700467604412928fe7f5cb21911da0f64898d2463fa77ffbaf4c96c397b9060f4746eec152747930cddc +CompilerSupportLibraries.v1.0.2+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/9a92138ed69aa317a932a615c6e62d69 +CompilerSupportLibraries.v1.0.2+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/0b7785379936a2a209b074177b1424dd7e00b29b5165f564e799b0aa4e06a582e9d616525d97274ba2507cb88192028f1ac485d3f99bdc7ee53fc63c1a7e85de +CompilerSupportLibraries.v1.0.2+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/8ffee3d6de5197c7a1f354d72c8238fa +CompilerSupportLibraries.v1.0.2+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/deadc4d7224c84f9b82dc956b69e815c44ae036802838365d870ab9f58c8bcf8ce0645f2f387c8ff344ac2108fc8e7e1ee907fa55e93c91aa5d9fd921bf3fdcb +CompilerSupportLibraries.v1.0.2+0.x86_64-apple-darwin-libgfortran3.tar.gz/md5/87449e72e3f33dbb69b7053cdc2649d4 +CompilerSupportLibraries.v1.0.2+0.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/5ce02ad10c6f4686a476eb2a5de2988cd8b482f5e693db2880c84ad1c82f468ef03fe01b9d0feefe5d4ee741d1d16643d36b144e6261ed32311b3b6f312fac2f +CompilerSupportLibraries.v1.0.2+0.x86_64-apple-darwin-libgfortran4.tar.gz/md5/0407cde92cfa42fa89ac83217ca0ec16 +CompilerSupportLibraries.v1.0.2+0.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/032c831f1166a336551138939ac40eb2c68a048ce786c0c1403b879a20c1b706caac16d22560b2c7f2b3d6373986c347188675674116005ca251336ee048d09f +CompilerSupportLibraries.v1.0.2+0.x86_64-apple-darwin-libgfortran5.tar.gz/md5/23418763b808371ee94772a90d501f4d +CompilerSupportLibraries.v1.0.2+0.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/7867b843551457b11bda7821dd384c1c1cf23b80a308b2058a693de7b7da099f0b37eb0a6de2b84c04b625a68c60eea55138e200d5d6ec6f6af09bd7ce406a96 +CompilerSupportLibraries.v1.0.2+0.x86_64-linux-gnu-libgfortran3.tar.gz/md5/e3d33ae03c18affea74699bdc1fabb68 +CompilerSupportLibraries.v1.0.2+0.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/42013f4921de5a69ad857195ce5c19ad1bca3c920d79699e5501f1f4534ab132fabd422362b2b5056f5d182215d6c069db5df460bafa700903faf962cc00f77b +CompilerSupportLibraries.v1.0.2+0.x86_64-linux-gnu-libgfortran4.tar.gz/md5/d40c1e8c0393213c6057c53a12f44175 +CompilerSupportLibraries.v1.0.2+0.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/fe7baa4de7490065ab7b953cc12f41462a24bcb49d0a4a64b23249e98e7569b19bb1cb455af2f76090e34066a7d3cdd7a48cae6515ce6c7a5c8486b0cacc5106 +CompilerSupportLibraries.v1.0.2+0.x86_64-linux-gnu-libgfortran5.tar.gz/md5/48541b90f715c4c86ee4da0570275947 +CompilerSupportLibraries.v1.0.2+0.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/7f2683fb98e80f12629f4ed3bea9fd59d32b7e7a9ed1699e782d8e238ff0915ecc61bf00adaf4597cfe41caf82cdca0f9be250f595f5f0bea6d8f77dba99eaf4 +CompilerSupportLibraries.v1.0.2+0.x86_64-linux-musl-libgfortran3.tar.gz/md5/4547059eb905995667be48bf85d49911 +CompilerSupportLibraries.v1.0.2+0.x86_64-linux-musl-libgfortran3.tar.gz/sha512/7400fdabc924434ab4a4949248c3603887ac06ffd2f205ae33e14495d86cd4f816bbd1999eeafa0257f518df1e7f7c522f596e847a71dbfbfccff4859f50acc7 +CompilerSupportLibraries.v1.0.2+0.x86_64-linux-musl-libgfortran4.tar.gz/md5/46267543cad6584d7b7b9fcc8f18f21d +CompilerSupportLibraries.v1.0.2+0.x86_64-linux-musl-libgfortran4.tar.gz/sha512/0353d7d724be48d4185d3c181692970b7996f53f6a01723072aa5c94b53a8c5055faeed30df51659c252a46f4b941dec0cb24569323e3c85c166f14c5b7c8e9e +CompilerSupportLibraries.v1.0.2+0.x86_64-linux-musl-libgfortran5.tar.gz/md5/14dba2897a6e9d370fa9091c045375fc +CompilerSupportLibraries.v1.0.2+0.x86_64-linux-musl-libgfortran5.tar.gz/sha512/10b79f9c059839f5b57fa8d2a381a034c4067262c4088bd354d14ea56bec097878069383aa9cfadaa09d73bd20fc348fb61662d863a8d62cb25d7af6b8e29858 +CompilerSupportLibraries.v1.0.2+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/eed836d1addeb10d0901f836724aff1e +CompilerSupportLibraries.v1.0.2+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/e33eca424d1529a1fb23ba9cf7fac345ed1cfc8073c975b6b31ca44d2e8c3f5083af65433df009b22483dceb2e43149f3c1e8433681fec5fb812e1d5b4243ce4 +CompilerSupportLibraries.v1.0.2+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/d5ae9f9519341fdaabf62267c89461d2 +CompilerSupportLibraries.v1.0.2+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/6421aa5d1bd6f08ad43f59ed4dc1bef8b9b598ebbbd3e48149730f3bec3471f8e2c02ffb338427326924290b8f52ef9e626e3313448bc931a61d866c5dc544ae +CompilerSupportLibraries.v1.0.2+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/fc1df521395362a5aaa2e2aeef707207 +CompilerSupportLibraries.v1.0.2+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/f2e5a08e3cae171242ae6a20d2d4838c1529ce042745dc466148b7bbc06896d94476fd05c7787e6e8641bea752dfc0e6b09e95b160bede600d20d2ad68e7705f +CompilerSupportLibraries.v1.0.2+0.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/2338f8aa2696935f7460454e708ce308 +CompilerSupportLibraries.v1.0.2+0.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/5a4b0e97928c26eee16bbec4c3e69e55fa9c768101257c3e2f161118809c778aa0feaf21307198822c3172a58ed12ca0a49285b2941ed0b8f2b367e64ca1c51a +CompilerSupportLibraries.v1.0.2+0.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/b393d2bf0d181d218130ac572c17d369 +CompilerSupportLibraries.v1.0.2+0.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/76e0f7caa24bb734c6f7542be9f834d5b912f082cb3c4c3c52a63e37d4b8c33dd94e576c43f4bee6c04bfb44af2f2b67ba70773fa52ad0de6c8c0059b3e51b83 +CompilerSupportLibraries.v1.0.2+0.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/23db836e6e4142f621862971017fe61e +CompilerSupportLibraries.v1.0.2+0.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/c0b04f7fe5aabfe6af509c77a1f68e0bcfd14714758042fe502b968c4cc272156fc84c8b4c1ee574754bb2fddaa810f6a4215cbd164ddc11b697b3adaef09a81 diff --git a/doc/make.jl b/doc/make.jl index 75e3598ced6f7..04b8af595e58f 100644 --- a/doc/make.jl +++ b/doc/make.jl @@ -142,6 +142,7 @@ DevDocs = [ "devdocs/subarrays.md", "devdocs/isbitsunionarrays.md", "devdocs/sysimg.md", + "devdocs/pkgimg.md", "devdocs/llvm.md", "devdocs/stdio.md", "devdocs/boundscheck.md", diff --git a/doc/src/devdocs/pkgimg.md b/doc/src/devdocs/pkgimg.md new file mode 100644 index 0000000000000..8230c4b91b338 --- /dev/null +++ b/doc/src/devdocs/pkgimg.md @@ -0,0 +1,48 @@ +# Package Images + +Julia package images provide object (native code) caches for Julia packages. +They are similar to Julia's [system image](@ref dev-sysimg) and support many of the same features. +In fact the underlying serialization format is the same, and the system image is the base image that the package images are build against. + +## High-level overview + +Package images are shared libraries that contain both code and data. Like `.ji` cache files, they are generated per package. The data section contains both global data (global variables in the package) as well as the necessary metadata about what methods and types are defined by the package. The code section contains native objects that cache the final output of Julia's LLVM-based compiler. + +The command line option `--pkgimages=no` can be used to turn off object caching for this session. Note that this means that cache files have to likely be regenerated. +See [`JULIA_MAX_NUM_PRECOMPILE_FILES`](@ref env-max-num-precompile-files) for the upper limit of variants Julia caches per default. + +!!! note + While the package images present themselves as native shared libraries, they are only an approximation thereof. You will not be able to link against them from a native program and they must be loaded from Julia. + + +## Linking + +Since the package images contain native code, we must run a linker over them before we can use them. You can set the environment variable `JULIA_VERBOSE_LINKING` to `true` to make the package image linking process verbose. + +Furthermore, we cannot assume that the user has a working system linker installed. Therefore, Julia ships with LLD, the LLVM linker, to provide a working out of the box experience. In `base/linking.jl`, we implement a limited interface to be able to link package images on all supported platforms. + +### Quirks +Despite LLD being a multi-platform linker, it does not provide a consistent interface across platforms. Furthermore, it is meant to be used from `clang` or +another compiler driver, we therefore reimplement some of the logic from `llvm-project/clang/lib/Driver/ToolChains`. Thankfully one can use `lld -flavor` to set lld to the right platform + +#### Windows +To avoid having to deal with `link.exe` we use `-flavor gnu`, effectively turning `lld` into a cross-linker from a mingw32 environment. Windows DLLs are required to contain a `_DllMainCRTStartup` function and to minimize our dependence on mingw32 libraries, we inject a stub definition ourselves. + +#### MacOS +Dynamic libraries on macOS need to link against `-lSystem`. On recent macOS versions, `-lSystem` is only available for linking when Xcode is available. +To that effect we link with `-undefined dynamic_lookup`. + +## Package images optimized for multiple microarchitectures +Similar to [multi-versioning](@ref sysimg-multi-versioning) for system images, package images support multi-versioning. If you are in a heterogenous environment, with a unified cache, +you can set the environment variable `JULIA_CPU_TARGET=generic` to multi-version the object caches. + +## Flags that impact package image creation and selection + +These are the Julia command line flags that impact cache selection. Package images +that were created with different flags will be rejected. + +- `-g`, `--debug-info`: Exact match required since it changes code generation. +- `--check-bounds`: Exact match required since it changes code generation. +- `--pkgimages`: To allow running without object caching enabled. +- `-O`, `--optimize`: Reject package images generated for a lower optimization level, + but allow for higher optimization levels to be loaded. diff --git a/doc/src/devdocs/sysimg.md b/doc/src/devdocs/sysimg.md index 5c976875846d3..a21e3ba265f9b 100644 --- a/doc/src/devdocs/sysimg.md +++ b/doc/src/devdocs/sysimg.md @@ -19,7 +19,7 @@ This operation is useful for multiple reasons. A user may: The [`PackageCompiler.jl` package](https://github.com/JuliaLang/PackageCompiler.jl) contains convenient wrapper functions to automate this process. -## System image optimized for multiple microarchitectures +## [System image optimized for multiple microarchitectures](@id sysimg-multi-versioning) The system image can be compiled simultaneously for multiple CPU microarchitectures under the same instruction set architecture (ISA). Multiple versions of the same function diff --git a/doc/src/manual/code-loading.md b/doc/src/manual/code-loading.md index f9575b0159d8c..f39c1f6ae4178 100644 --- a/doc/src/manual/code-loading.md +++ b/doc/src/manual/code-loading.md @@ -388,17 +388,18 @@ When a package with extensions is added to an environment, the `weakdeps` and `e are stored in the manifest file in the section for that package. The dependency lookup rules for a package are the same as for its "parent" except that the listed extension dependencies are also considered as dependencies. -### Package/Environment Preferences + +### [Package/Environment Preferences](@id preferences) Preferences are dictionaries of metadata that influence package behavior within an environment. -The preferences system supports reading preferences at compile-time, which means that at code-loading time, we must ensure that a particular `.ji` file was built with the same preferences as the current environment before loading it. +The preferences system supports reading preferences at compile-time, which means that at code-loading time, we must ensure that the precompilation files selected by Julia were built with the same preferences as the current environment before loading them. The public API for modifying Preferences is contained within the [Preferences.jl](https://github.com/JuliaPackaging/Preferences.jl) package. Preferences are stored as TOML dictionaries within a `(Julia)LocalPreferences.toml` file next to the currently-active project. If a preference is "exported", it is instead stored within the `(Julia)Project.toml` instead. The intention is to allow shared projects to contain shared preferences, while allowing for users themselves to override those preferences with their own settings in the LocalPreferences.toml file, which should be .gitignored as the name implies. -Preferences that are accessed during compilation are automatically marked as compile-time preferences, and any change recorded to these preferences will cause the Julia compiler to recompile any cached precompilation `.ji` files for that module. -This is done by serializing the hash of all compile-time preferences during compilation, then checking that hash against the current environment when searching for the proper `.ji` file to load. +Preferences that are accessed during compilation are automatically marked as compile-time preferences, and any change recorded to these preferences will cause the Julia compiler to recompile any cached precompilation file(s) (`.ji` and corresponding `.so`, `.dll`, or `.dylib` files) for that module. +This is done by serializing the hash of all compile-time preferences during compilation, then checking that hash against the current environment when searching for the proper file(s) to load. Preferences can be set with depot-wide defaults; if package Foo is installed within your global environment and it has preferences set, these preferences will apply as long as your global environment is part of your `LOAD_PATH`. Preferences in environments higher up in the environment stack get overridden by the more proximal entries in the load path, ending with the currently active project. diff --git a/doc/src/manual/command-line-interface.md b/doc/src/manual/command-line-interface.md index 4af3c05d51eb6..e35cbf5e313e7 100644 --- a/doc/src/manual/command-line-interface.md +++ b/doc/src/manual/command-line-interface.md @@ -88,7 +88,7 @@ There are various ways to run Julia code and provide options, similar to those a julia [switches] -- [programfile] [args...] ``` -The following is a complete list of command-line switches available when launching julia (a '*' marks the default value, if applicable): +The following is a complete list of command-line switches available when launching julia (a '*' marks the default value, if applicable; settings marked '($)' may trigger package precompilation): |Switch |Description| |:--- |:---| @@ -102,6 +102,7 @@ The following is a complete list of command-line switches available when launchi |`--handle-signals={yes*\|no}` |Enable or disable Julia's default signal handlers| |`--sysimage-native-code={yes*\|no}` |Use native code from system image if available| |`--compiled-modules={yes*\|no}` |Enable or disable incremental precompilation of modules| +|`--pkgimages={yes*\|no}` |Enable or disable usage of native code caching in the form of pkgimages| |`-e`, `--eval ` |Evaluate ``| |`-E`, `--print ` |Evaluate `` and display the result| |`-L`, `--load ` |Load `` immediately on all processors| @@ -117,11 +118,11 @@ The following is a complete list of command-line switches available when launchi |`--warn-overwrite={yes\|no*}` |Enable or disable method overwrite warnings| |`--warn-scope={yes*\|no}` |Enable or disable warning for ambiguous top-level scope| |`-C`, `--cpu-target ` |Limit usage of CPU features up to ``; set to `help` to see the available options| -|`-O`, `--optimize={0,1,2*,3}` |Set the optimization level (level is 3 if `-O` is used without a level)| +|`-O`, `--optimize={0,1,2*,3}` |Set the optimization level (level is 3 if `-O` is used without a level) ($)| |`--min-optlevel={0*,1,2,3}` |Set the lower bound on per-module optimization| -|`-g {0,1*,2}` |Set the level of debug info generation (level is 2 if `-g` is used without a level)| +|`-g`, `--debug-info={0,1*,2}` |Set the level of debug info generation (level is 2 if `-g` is used without a level) ($)| |`--inline={yes\|no}` |Control whether inlining is permitted, including overriding `@inline` declarations| -|`--check-bounds={yes\|no\|auto*}` |Emit bounds checks always, never, or respect `@inbounds` declarations| +|`--check-bounds={yes\|no\|auto*}` |Emit bounds checks always, never, or respect `@inbounds` declarations ($)| |`--math-mode={ieee,fast}` |Disallow or enable unsafe floating point optimizations (overrides `@fastmath` declaration)| |`--code-coverage[={none*\|user\|all}]` |Count executions of source lines (omitting setting is equivalent to `user`)| |`--code-coverage=tracefile.info` |Append coverage information to the LCOV tracefile (filename supports format tokens).| diff --git a/doc/src/manual/environment-variables.md b/doc/src/manual/environment-variables.md index bc4a742365d69..f29e5b7aaf8f7 100644 --- a/doc/src/manual/environment-variables.md +++ b/doc/src/manual/environment-variables.md @@ -162,10 +162,14 @@ The absolute path `REPL.find_hist_file()` of the REPL's history file. If $(DEPOT_PATH[1])/logs/repl_history.jl ``` -### `JULIA_MAX_NUM_PRECOMPILE_FILES` +### [`JULIA_MAX_NUM_PRECOMPILE_FILES`](@id env-max-num-precompile-files) Sets the maximum number of different instances of a single package that are to be stored in the precompile cache (default = 10). +### `JULIA_VERBOSE_LINKING` + +If set to true, linker commands will be displayed during precompilation. + ## Pkg.jl ### `JULIA_CI` diff --git a/doc/src/manual/methods.md b/doc/src/manual/methods.md index 6cbcc4fad6a65..a504f8e3511b2 100644 --- a/doc/src/manual/methods.md +++ b/doc/src/manual/methods.md @@ -265,8 +265,40 @@ julia> methods(+) ``` Multiple dispatch together with the flexible parametric type system give Julia its ability to -abstractly express high-level algorithms decoupled from implementation details, yet generate efficient, -specialized code to handle each case at run time. +abstractly express high-level algorithms decoupled from implementation details. + +## [Method specializations](@id man-method-specializations) + +When you create multiple methods of the same function, this is sometimes called +"specialization." In this case, you're specializing the *function* by adding additional +methods to it: each new method is a new specialization of the function. +As shown above, these specializations are returned by `methods`. + +There's another kind of specialization that occurs without programmer intervention: +Julia's compiler can automatically specialize the *method* for the specific argument types used. +Such specializations are *not* listed by `methods`, as this doesn't create new `Method`s, but tools like [`@code_typed`](@ref) allow you to inspect such specializations. + +For example, if you create a method + +``` +mysum(x::Real, y::Real) = x + y +``` + +you've given the function `mysum` one new method (possibly its only method), and that method takes any pair of `Real` number inputs. But if you then execute + +```julia-repl +julia> mysum(1, 2) +3 + +julia> mysum(1.0, 2.0) +3.0 +``` + +Julia will compile `mysum` twice, once for `x::Int, y::Int` and again for `x::Float64, y::Float64`. +The point of compiling twice is performance: the methods that get called for `+` (which `mysum` uses) vary depending on the specific types of `x` and `y`, and by compiling different specializations Julia can do all the method lookup ahead of time. This allows the program to run much more quickly, since it does not have to bother with method lookup while it is running. +Julia's automatic specialization allows you to write generic algorithms and expect that the compiler will generate efficient, specialized code to handle each case you need. + +In cases where the number of potential specializations might be effectively unlimited, Julia may avoid this default specialization. See [Be aware of when Julia avoids specializing](@ref) for more information. ## [Method Ambiguities](@id man-ambiguities) diff --git a/doc/src/manual/modules.md b/doc/src/manual/modules.md index f0a9a5110ded4..90680828d2bc2 100644 --- a/doc/src/manual/modules.md +++ b/doc/src/manual/modules.md @@ -9,7 +9,7 @@ Modules in Julia help organize code into coherent units. They are delimited synt 2. Modules have facilities for detailed namespace management: each defines a set of names it `export`s, and can import names from other modules with `using` and `import` (we explain these below). -3. Modules can be precompiled for faster loading, and contain code for runtime initialization. +3. Modules can be precompiled for faster loading, and may contain code for runtime initialization. Typically, in larger Julia packages you will see module code organized into files, eg @@ -429,11 +429,14 @@ Large modules can take several seconds to load because executing all of the stat often involves compiling a large amount of code. Julia creates precompiled caches of the module to reduce this time. -The incremental precompiled module file are created and used automatically when using `import` -or `using` to load a module. This will cause it to be automatically compiled the first time -it is imported. Alternatively, you can manually call [`Base.compilecache(Base.identify_package("modulename"))`](@ref). The resulting -cache files will be stored in `DEPOT_PATH[1]/compiled/`. Subsequently, the module is automatically -recompiled upon `using` or `import` whenever any of its dependencies change; dependencies are modules it +Precompiled module files (sometimes called "cache files") are created and used automatically when `import` or `using` loads a module. If the cache file(s) do not yet exist, the module will be compiled and saved for future reuse. You can also manually call [`Base.compilecache(Base.identify_package("modulename"))`](@ref) to create these files without loading the module. The resulting +cache files will be stored in the `compiled` subfolder of `DEPOT_PATH[1]`. If nothing about your system changes, +such cache files will be used when you load the module with `import` or `using`. + +Precompilation cache files store definitions of modules, types, methods, and constants. They may also store method specializations and the code generated for them, but this typically requires that the developer add explicit [`precompile`](@ref) directives or execute workloads that force compilation during the package build. + +However, if you update the module's dependencies or change its source code, the module is automatically +recompiled upon `using` or `import`. Dependencies are modules it imports, the Julia build, files it includes, or explicit dependencies declared by [`include_dependency(path)`](@ref) in the module file(s). @@ -445,6 +448,7 @@ by the search logic in `require` matches the path that had created the precompil into account the set of dependencies already loaded into the current process and won't recompile those modules, even if their files change or disappear, in order to avoid creating incompatibilities between the running system and the precompile cache. +Finally, it takes account of changes in any [compile-time preferences](@ref preferences). If you know that a module is *not* safe to precompile (for example, for one of the reasons described below), you should @@ -589,6 +593,12 @@ A few other points to be aware of: It is sometimes helpful during module development to turn off incremental precompilation. The command line flag `--compiled-modules={yes|no}` enables you to toggle module precompilation on and off. When Julia is started with `--compiled-modules=no` the serialized modules in the compile cache -are ignored when loading modules and module dependencies. `Base.compilecache` can still be called +are ignored when loading modules and module dependencies. +More fine-grained control is available with `--pkgimages=no`, which suppresses only +native-code storage during precompilation. `Base.compilecache` can still be called manually. The state of this command line flag is passed to `Pkg.build` to disable automatic precompilation triggering when installing, updating, and explicitly building packages. + +You can also debug some precompilation failures with environment variables. Setting +`JULIA_VERBOSE_LINKING=true` may help resolve failures in linking shared libraries of compiled +native code. See the **Developer Documentation** part of the Julia manual, where you will find further details in the section documenting Julia's internals under "Package Images". diff --git a/doc/src/manual/performance-tips.md b/doc/src/manual/performance-tips.md index 6bfdce4fc411b..1f3b7bc06ae7f 100644 --- a/doc/src/manual/performance-tips.md +++ b/doc/src/manual/performance-tips.md @@ -525,7 +525,7 @@ at the time `k` is compiled. ### Be aware of when Julia avoids specializing -As a heuristic, Julia avoids automatically specializing on argument type parameters in three +As a heuristic, Julia avoids automatically [specializing](@ref man-method-specializations) on argument type parameters in three specific cases: `Type`, `Function`, and `Vararg`. Julia will always specialize when the argument is used within the method, but not if the argument is just passed through to another function. This usually has no performance impact at runtime and diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index 7325adde8b060..6f9345ee18f82 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -61,7 +61,9 @@ using namespace llvm; #include "jitlayers.h" +#include "serialize.h" #include "julia_assert.h" +#include "codegen_shared.h" #define DEBUG_TYPE "julia_aotcompile" @@ -93,6 +95,7 @@ typedef struct { std::vector jl_sysimg_gvars; std::map> jl_fvar_map; std::vector jl_value_to_llvm; + std::vector jl_external_to_llvm; } jl_native_code_desc_t; extern "C" JL_DLLEXPORT @@ -118,6 +121,15 @@ void jl_get_llvm_gvs_impl(void *native_code, arraylist_t *gvs) memcpy(gvs->items, data->jl_value_to_llvm.data(), gvs->len * sizeof(void*)); } +extern "C" JL_DLLEXPORT +void jl_get_llvm_external_fns_impl(void *native_code, arraylist_t *external_fns) +{ + jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code; + arraylist_grow(external_fns, data->jl_external_to_llvm.size()); + memcpy(external_fns->items, data->jl_external_to_llvm.data(), + external_fns->len * sizeof(jl_code_instance_t*)); +} + extern "C" JL_DLLEXPORT LLVMOrcThreadSafeModuleRef jl_get_llvm_module_impl(void *native_code) { @@ -248,13 +260,17 @@ static void jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_instance *ci_out = codeinst; } +void replaceUsesWithLoad(Function &F, function_ref should_replace, MDNode *tbaa_const); + // takes the running content that has collected in the shadow module and dump it to disk // this builds the object file portion of the sysimage files for fast startup, and can // also be used be extern consumers like GPUCompiler.jl to obtain a module containing -// all reachable & inferrrable functions. The `policy` flag switches between the default -// mode `0`, the extern mode `1`. +// all reachable & inferrrable functions. +// The `policy` flag switches between the default mode `0` and the extern mode `1` used by GPUCompiler. +// `_imaging_mode` controls if raw pointers can be embedded (e.g. the code will be loaded into the same session). +// `_external_linkage` create linkages between pkgimages. extern "C" JL_DLLEXPORT -void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _policy, int _imaging_mode) +void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _policy, int _imaging_mode, int _external_linkage) { ++CreateNativeCalls; CreateNativeMax.updateMax(jl_array_len(methods)); @@ -289,6 +305,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm jl_codegen_params_t params(ctxt); params.params = cgparams; params.imaging = imaging; + params.external_linkage = _external_linkage; size_t compile_for[] = { jl_typeinf_world, jl_atomic_load_acquire(&jl_world_counter) }; for (int worlds = 0; worlds < 2; worlds++) { params.world = compile_for[worlds]; @@ -348,6 +365,39 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm } CreateNativeMethods += emitted.size(); + size_t offset = gvars.size(); + data->jl_external_to_llvm.resize(params.external_fns.size()); + + auto tbaa_const = tbaa_make_child_with_context(*ctxt.getContext(), "jtbaa_const", nullptr, true).first; + for (auto &extern_fn : params.external_fns) { + jl_code_instance_t *this_code = std::get<0>(extern_fn.first); + bool specsig = std::get<1>(extern_fn.first); + assert(specsig && "Error external_fns doesn't handle non-specsig yet"); + (void)specsig; + Function *F = extern_fn.second; + Module *M = F->getParent(); + + Type *T_funcp = F->getFunctionType()->getPointerTo(); + // Can't create a GC with type FunctionType. Alias also doesn't work + GlobalVariable *GV = new GlobalVariable(*M, T_funcp, false, + GlobalVariable::ExternalLinkage, + Constant::getNullValue(T_funcp), + F->getName()); + + + // Need to insert load instruction, thus we can't use replace all uses with + replaceUsesWithLoad(*F, [GV](Instruction &) { return GV; }, tbaa_const); + + assert(F->getNumUses() == 0); // declaration counts as use + GV->takeName(F); + F->eraseFromParent(); + + size_t idx = gvars.size() - offset; + assert(idx >= 0); + data->jl_external_to_llvm.at(idx) = this_code; + gvars.push_back(std::string(GV->getName())); + } + // clones the contents of the module `m` to the shadow_output collector // while examining and recording what kind of function pointer we have for (auto &def : emitted) { @@ -459,7 +509,7 @@ static void injectCRTAlias(Module &M, StringRef name, StringRef alias, FunctionT if (!target) { target = Function::Create(FT, Function::ExternalLinkage, alias, M); } - Function *interposer = Function::Create(FT, Function::WeakAnyLinkage, name, M); + Function *interposer = Function::Create(FT, Function::InternalLinkage, name, M); appendToCompilerUsed(M, {interposer}); llvm::IRBuilder<> builder(BasicBlock::Create(M.getContext(), "top", interposer)); @@ -477,7 +527,7 @@ extern "C" JL_DLLEXPORT void jl_dump_native_impl(void *native_code, const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname, const char *asm_fname, - const char *sysimg_data, size_t sysimg_len) + const char *sysimg_data, size_t sysimg_len, ios_t *s) { JL_TIMING(NATIVE_DUMP); jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code; @@ -589,7 +639,7 @@ void jl_dump_native_impl(void *native_code, } // do the actual work - auto add_output = [&] (Module &M, StringRef unopt_bc_Name, StringRef bc_Name, StringRef obj_Name, StringRef asm_Name) { + auto add_output = [&] (Module &M, StringRef unopt_bc_Name, StringRef bc_Name, StringRef obj_Name, StringRef asm_Name, bool inject_crt) { preopt.run(M, empty.MAM); if (bc_fname || obj_fname || asm_fname) { assert(!verifyModule(M, &errs())); @@ -597,22 +647,49 @@ void jl_dump_native_impl(void *native_code, assert(!verifyModule(M, &errs())); } - // We would like to emit an alias or an weakref alias to redirect these symbols - // but LLVM doesn't let us emit a GlobalAlias to a declaration... - // So for now we inject a definition of these functions that calls our runtime - // functions. We do so after optimization to avoid cloning these functions. - injectCRTAlias(M, "__gnu_h2f_ieee", "julia__gnu_h2f_ieee", - FunctionType::get(Type::getFloatTy(Context), { Type::getHalfTy(Context) }, false)); - injectCRTAlias(M, "__extendhfsf2", "julia__gnu_h2f_ieee", - FunctionType::get(Type::getFloatTy(Context), { Type::getHalfTy(Context) }, false)); - injectCRTAlias(M, "__gnu_f2h_ieee", "julia__gnu_f2h_ieee", - FunctionType::get(Type::getHalfTy(Context), { Type::getFloatTy(Context) }, false)); - injectCRTAlias(M, "__truncsfhf2", "julia__gnu_f2h_ieee", - FunctionType::get(Type::getHalfTy(Context), { Type::getFloatTy(Context) }, false)); - injectCRTAlias(M, "__truncdfhf2", "julia__truncdfhf2", - FunctionType::get(Type::getHalfTy(Context), { Type::getDoubleTy(Context) }, false)); + if (inject_crt) { + // We would like to emit an alias or an weakref alias to redirect these symbols + // but LLVM doesn't let us emit a GlobalAlias to a declaration... + // So for now we inject a definition of these functions that calls our runtime + // functions. We do so after optimization to avoid cloning these functions. + injectCRTAlias(M, "__gnu_h2f_ieee", "julia__gnu_h2f_ieee", + FunctionType::get(Type::getFloatTy(Context), { Type::getHalfTy(Context) }, false)); + injectCRTAlias(M, "__extendhfsf2", "julia__gnu_h2f_ieee", + FunctionType::get(Type::getFloatTy(Context), { Type::getHalfTy(Context) }, false)); + injectCRTAlias(M, "__gnu_f2h_ieee", "julia__gnu_f2h_ieee", + FunctionType::get(Type::getHalfTy(Context), { Type::getFloatTy(Context) }, false)); + injectCRTAlias(M, "__truncsfhf2", "julia__gnu_f2h_ieee", + FunctionType::get(Type::getHalfTy(Context), { Type::getFloatTy(Context) }, false)); + injectCRTAlias(M, "__truncdfhf2", "julia__truncdfhf2", + FunctionType::get(Type::getHalfTy(Context), { Type::getDoubleTy(Context) }, false)); + +#if defined(_OS_WINDOWS_) + // Windows expect that the function `_DllMainStartup` is present in an dll. + // Normal compilers use something like Zig's crtdll.c instead we provide a + // a stub implementation. + auto T_pvoid = Type::getInt8Ty(Context)->getPointerTo(); + auto T_int32 = Type::getInt32Ty(Context); + auto FT = FunctionType::get(T_int32, {T_pvoid, T_int32, T_pvoid}, false); + auto F = Function::Create(FT, Function::ExternalLinkage, "_DllMainCRTStartup", M); + F->setCallingConv(CallingConv::X86_StdCall); + + llvm::IRBuilder<> builder(BasicBlock::Create(M.getContext(), "top", F)); + builder.CreateRet(ConstantInt::get(T_int32, 1)); +#endif + } postopt.run(M, empty.MAM); + + // Get target by snooping on multiversioning + GlobalVariable *target_ids = M.getNamedGlobal("jl_dispatch_target_ids"); + if (s && target_ids) { + if(auto targets = dyn_cast(target_ids->getInitializer())) { + auto rawTargets = targets->getRawDataValues(); + write_int32(s, rawTargets.size()); + ios_write(s, rawTargets.data(), rawTargets.size()); + }; + } + emitter.run(M); if (unopt_bc_fname) @@ -625,7 +702,7 @@ void jl_dump_native_impl(void *native_code, emit_result(asm_Archive, asm_Buffer, asm_Name, outputs); }; - add_output(*dataM, "unopt.bc", "text.bc", "text.o", "text.s"); + add_output(*dataM, "unopt.bc", "text.bc", "text.o", "text.s", true); orc::ThreadSafeModule sysimage(std::make_unique("sysimage", Context), TSCtx); auto sysimageM = sysimage.getModuleUnlocked(); @@ -648,7 +725,7 @@ void jl_dump_native_impl(void *native_code, GlobalVariable::ExternalLinkage, len, "jl_system_image_size")); } - add_output(*sysimageM, "data.bc", "data.bc", "data.o", "data.s"); + add_output(*sysimageM, "data.bc", "data.bc", "data.o", "data.s", false); object::Archive::Kind Kind = getDefaultForHost(TheTriple); if (unopt_bc_fname) diff --git a/src/codegen-stubs.c b/src/codegen-stubs.c index 01324e349f08f..e7b7d1fb791a5 100644 --- a/src/codegen-stubs.c +++ b/src/codegen-stubs.c @@ -12,8 +12,9 @@ JL_DLLEXPORT void jl_dump_native_fallback(void *native_code, const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname, const char *asm_fname, - const char *sysimg_data, size_t sysimg_len) UNAVAILABLE + const char *sysimg_data, size_t sysimg_len, ios_t *s) UNAVAILABLE JL_DLLEXPORT void jl_get_llvm_gvs_fallback(void *native_code, arraylist_t *gvs) UNAVAILABLE +JL_DLLEXPORT void jl_get_llvm_external_fns_fallback(void *native_code, arraylist_t *gvs) UNAVAILABLE JL_DLLEXPORT void jl_extern_c_fallback(jl_function_t *f, jl_value_t *rt, jl_value_t *argt, char *name) UNAVAILABLE JL_DLLEXPORT jl_value_t *jl_dump_method_asm_fallback(jl_method_instance_t *linfo, size_t world, @@ -31,10 +32,10 @@ JL_DLLEXPORT int jl_getFunctionInfo_fallback(jl_frame_t **frames, uintptr_t poin return 0; } -JL_DLLEXPORT void jl_register_fptrs_fallback(uint64_t sysimage_base, const struct _jl_sysimg_fptrs_t *fptrs, +JL_DLLEXPORT void jl_register_fptrs_fallback(uint64_t image_base, const struct _jl_image_fptrs_t *fptrs, jl_method_instance_t **linfos, size_t n) { - (void)sysimage_base; (void)fptrs; (void)linfos; (void)n; + (void)image_base; (void)fptrs; (void)linfos; (void)n; } JL_DLLEXPORT jl_code_instance_t *jl_generate_fptr_fallback(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world) @@ -66,7 +67,7 @@ JL_DLLEXPORT size_t jl_jit_total_bytes_fallback(void) return 0; } -JL_DLLEXPORT void *jl_create_native_fallback(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _policy, int _imaging_mode) UNAVAILABLE +JL_DLLEXPORT void *jl_create_native_fallback(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _policy, int _imaging_mode, int _external_linkage) UNAVAILABLE JL_DLLEXPORT void jl_dump_compiles_fallback(void *s) { diff --git a/src/codegen.cpp b/src/codegen.cpp index cdbc833267bb8..66eeecb80593a 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -1418,6 +1418,7 @@ class jl_codectx_t { jl_codegen_params_t &emission_context; llvm::MapVector call_targets; std::map &global_targets; + std::map, Function*> &external_calls; Function *f = NULL; // local var info. globals are not in here. std::vector slots; @@ -1454,6 +1455,7 @@ class jl_codectx_t { bool debug_enabled = false; bool use_cache = false; + bool external_linkage = false; const jl_cgparams_t *params = NULL; std::vector llvmcall_modules; @@ -1463,8 +1465,10 @@ class jl_codectx_t { emission_context(params), call_targets(), global_targets(params.globals), + external_calls(params.external_fns), world(params.world), use_cache(params.cache), + external_linkage(params.external_linkage), params(params.params) { } jl_typecache_t &types() { @@ -4022,9 +4026,17 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const std::string name; StringRef protoname; bool need_to_emit = true; - // TODO: We should check if the code is available externally - // and then emit a trampoline. - if (ctx.use_cache) { + bool cache_valid = ctx.use_cache; + bool external = false; + if (ctx.external_linkage) { + if (jl_object_in_image((jl_value_t*)codeinst)) { + // Target is present in another pkgimage + cache_valid = true; + external = true; + } + } + + if (cache_valid) { // optimization: emit the correct name immediately, if we know it // TODO: use `emitted` map here too to try to consolidate names? auto invoke = jl_atomic_load_relaxed(&codeinst->invoke); @@ -4051,6 +4063,13 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const result = emit_call_specfun_other(ctx, mi, codeinst->rettype, protoname, argv, nargs, &cc, &return_roots, rt); else result = emit_call_specfun_boxed(ctx, codeinst->rettype, protoname, argv, nargs, rt); + if (external) { + assert(!need_to_emit); + auto calledF = jl_Module->getFunction(protoname); + assert(calledF); + // TODO: Check if already present? + ctx.external_calls[std::make_tuple(codeinst, specsig)] = calledF; + } handled = true; if (need_to_emit) { Function *trampoline_decl = cast(jl_Module->getNamedValue(protoname)); @@ -5370,7 +5389,16 @@ static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Module *M, jl_cod Function *theFunc; Value *theFarg; auto invoke = jl_atomic_load_relaxed(&codeinst->invoke); - if (params.cache && invoke != NULL) { + + bool cache_valid = params.cache; + if (params.external_linkage) { + if (jl_object_in_image((jl_value_t*)codeinst)) { + // Target is present in another pkgimage + cache_valid = true; + } + } + + if (cache_valid && invoke != NULL) { StringRef theFptrName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, codeinst); theFunc = cast( M->getOrInsertFunction(theFptrName, jlinvoke_func->_type(ctx.builder.getContext())).getCallee()); @@ -8261,11 +8289,12 @@ void jl_compile_workqueue( StringRef preal_decl = ""; bool preal_specsig = false; auto invoke = jl_atomic_load_relaxed(&codeinst->invoke); - // TODO: available_extern - // We need to emit a trampoline that loads the target address in an extern_module from a GV - // Right now we will unecessarily emit a function we have already compiled in a native module - // again in a calling module. - if (params.cache && invoke != NULL) { + bool cache_valid = params.cache; + if (params.external_linkage) { + cache_valid = jl_object_in_image((jl_value_t*)codeinst); + } + // WARNING: isspecsig is protected by the codegen-lock. If that lock is removed, then the isspecsig load needs to be properly atomically sequenced with this. + if (cache_valid && invoke != NULL) { auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr); if (invoke == jl_fptr_args_addr) { preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst); diff --git a/src/coverage.cpp b/src/coverage.cpp index 46363a7e9ac01..0dfb903798bfa 100644 --- a/src/coverage.cpp +++ b/src/coverage.cpp @@ -17,7 +17,7 @@ using namespace llvm; static int codegen_imaging_mode(void) { - return jl_options.image_codegen || (jl_generating_output() && !jl_options.incremental); + return jl_options.image_codegen || jl_generating_output(); } // Logging for code coverage and memory allocation diff --git a/src/debug-registry.h b/src/debug-registry.h index 3780bbee33718..165f0efa479e3 100644 --- a/src/debug-registry.h +++ b/src/debug-registry.h @@ -81,11 +81,11 @@ class JITDebugInfoRegistry ~Locked() JL_NOTSAFEPOINT = default; }; - struct sysimg_info_t { - uint64_t jl_sysimage_base; - jl_sysimg_fptrs_t sysimg_fptrs; - jl_method_instance_t **sysimg_fvars_linfo; - size_t sysimg_fvars_n; + struct image_info_t { + uint64_t base; + jl_image_fptrs_t fptrs; + jl_method_instance_t **fvars_linfo; + size_t fvars_n; }; struct libc_frames_t { @@ -122,7 +122,7 @@ class JITDebugInfoRegistry // that it came from (providing name, type signature, file info, etc.) Locked> codeinst_in_flight{}; - Locked sysimg_info{}; + Locked> image_info{}; Locked objfilemap{}; @@ -141,7 +141,7 @@ class JITDebugInfoRegistry std::function getLoadAddress, std::function lookupWriteAddress) JL_NOTSAFEPOINT; objectmap_t& getObjectMap() JL_NOTSAFEPOINT; - void set_sysimg_info(sysimg_info_t info) JL_NOTSAFEPOINT; - Locked::ConstLockT get_sysimg_info() const JL_NOTSAFEPOINT; + void add_image_info(image_info_t info) JL_NOTSAFEPOINT; + bool get_image_info(uint64_t base, image_info_t *info) const JL_NOTSAFEPOINT; Locked::LockT get_objfile_map() JL_NOTSAFEPOINT; }; diff --git a/src/debuginfo.cpp b/src/debuginfo.cpp index fe5614100f9e3..997c04aff6445 100644 --- a/src/debuginfo.cpp +++ b/src/debuginfo.cpp @@ -109,13 +109,19 @@ JITDebugInfoRegistry::getObjectMap() JL_NOTSAFEPOINT return objectmap; } -void JITDebugInfoRegistry::set_sysimg_info(sysimg_info_t info) JL_NOTSAFEPOINT { - (**this->sysimg_info) = info; +void JITDebugInfoRegistry::add_image_info(image_info_t info) JL_NOTSAFEPOINT { + (**this->image_info)[info.base] = info; } -JITDebugInfoRegistry::Locked::ConstLockT -JITDebugInfoRegistry::get_sysimg_info() const JL_NOTSAFEPOINT { - return *this->sysimg_info; + +bool JITDebugInfoRegistry::get_image_info(uint64_t base, JITDebugInfoRegistry::image_info_t *info) const JL_NOTSAFEPOINT { + auto infos = *this->image_info; + auto it = infos->find(base); + if (it != infos->end()) { + *info = it->second; + return true; + } + return false; } JITDebugInfoRegistry::Locked::LockT @@ -680,10 +686,10 @@ openDebugInfo(StringRef debuginfopath, const debug_link_info &info) std::move(SplitFile.get())); } extern "C" JL_DLLEXPORT -void jl_register_fptrs_impl(uint64_t sysimage_base, const jl_sysimg_fptrs_t *fptrs, +void jl_register_fptrs_impl(uint64_t image_base, const jl_image_fptrs_t *fptrs, jl_method_instance_t **linfos, size_t n) { - getJITDebugRegistry().set_sysimg_info({(uintptr_t) sysimage_base, *fptrs, linfos, n}); + getJITDebugRegistry().add_image_info({(uintptr_t) image_base, *fptrs, linfos, n}); } template @@ -694,12 +700,9 @@ static inline void ignoreError(T &err) JL_NOTSAFEPOINT #endif } -static void get_function_name_and_base(llvm::object::SectionRef Section, size_t pointer, int64_t slide, bool insysimage, +static void get_function_name_and_base(llvm::object::SectionRef Section, size_t pointer, int64_t slide, bool inimage, void **saddr, char **name, bool untrusted_dladdr) JL_NOTSAFEPOINT { - // Assume we only need base address for sysimg for now - if (!insysimage || !getJITDebugRegistry().get_sysimg_info()->sysimg_fptrs.base) - saddr = nullptr; bool needs_saddr = saddr && (!*saddr || untrusted_dladdr); bool needs_name = name && (!*name || untrusted_dladdr); // Try platform specific methods first since they are usually faster @@ -780,7 +783,7 @@ static void get_function_name_and_base(llvm::object::SectionRef Section, size_t } #ifdef _OS_WINDOWS_ // For ntdll and msvcrt since we are currently only parsing DWARF debug info through LLVM - if (!insysimage && needs_name) { + if (!inimage && needs_name) { static char frame_info_func[ sizeof(SYMBOL_INFO) + MAX_SYM_NAME * sizeof(TCHAR)]; @@ -1012,7 +1015,7 @@ static object::SectionRef getModuleSectionForAddress(const object::ObjectFile *o bool jl_dylib_DI_for_fptr(size_t pointer, object::SectionRef *Section, int64_t *slide, llvm::DIContext **context, - bool onlySysImg, bool *isSysImg, void **saddr, char **name, char **filename) JL_NOTSAFEPOINT + bool onlyImage, bool *isImage, uint64_t *_fbase, void **saddr, char **name, char **filename) JL_NOTSAFEPOINT { *Section = object::SectionRef(); *context = NULL; @@ -1046,10 +1049,11 @@ bool jl_dylib_DI_for_fptr(size_t pointer, object::SectionRef *Section, int64_t * if (fname.empty()) // empirically, LoadedImageName might be missing fname = ModuleInfo.ImageName; DWORD64 fbase = ModuleInfo.BaseOfImage; - bool insysimage = (fbase == getJITDebugRegistry().get_sysimg_info()->jl_sysimage_base); - if (isSysImg) - *isSysImg = insysimage; - if (onlySysImg && !insysimage) + JITDebugInfoRegistry::image_info_t image_info; + bool inimage = getJITDebugRegistry().get_image_info(fbase, &image_info); + if (isImage) + *isImage = inimage; + if (onlyImage && !inimage) return false; // If we didn't find the filename before in the debug // info, use the dll name @@ -1057,6 +1061,8 @@ bool jl_dylib_DI_for_fptr(size_t pointer, object::SectionRef *Section, int64_t * jl_copy_str(filename, fname.data()); if (saddr) *saddr = NULL; + if (_fbase) + *_fbase = fbase; #else // ifdef _OS_WINDOWS_ Dl_info dlinfo; @@ -1095,16 +1101,19 @@ bool jl_dylib_DI_for_fptr(size_t pointer, object::SectionRef *Section, int64_t * fbase = (uintptr_t)dlinfo.dli_fbase; #endif StringRef fname; - bool insysimage = (fbase == getJITDebugRegistry().get_sysimg_info()->jl_sysimage_base); - if (saddr && !(insysimage && untrusted_dladdr)) + JITDebugInfoRegistry::image_info_t image_info; + bool inimage = getJITDebugRegistry().get_image_info(fbase, &image_info); + if (saddr && !(inimage && untrusted_dladdr)) *saddr = dlinfo.dli_saddr; - if (isSysImg) - *isSysImg = insysimage; - if (onlySysImg && !insysimage) + if (isImage) + *isImage = inimage; + if (onlyImage && !inimage) return false; + if (_fbase) + *_fbase = fbase; // In case we fail with the debug info lookup, we at least still // have the function name, even if we don't have line numbers - if (name && !(insysimage && untrusted_dladdr)) + if (name && !(inimage && untrusted_dladdr)) jl_copy_str(name, dlinfo.dli_sname); if (filename) jl_copy_str(filename, dlinfo.dli_fname); @@ -1115,7 +1124,10 @@ bool jl_dylib_DI_for_fptr(size_t pointer, object::SectionRef *Section, int64_t * *context = entry.ctx; if (entry.obj) *Section = getModuleSectionForAddress(entry.obj, pointer + entry.slide); - get_function_name_and_base(*Section, pointer, entry.slide, insysimage, saddr, name, untrusted_dladdr); + // Assume we only need base address for sysimg for now + if (!inimage || !image_info.fptrs.base) + saddr = nullptr; + get_function_name_and_base(*Section, pointer, entry.slide, inimage, saddr, name, untrusted_dladdr); return true; } @@ -1144,34 +1156,36 @@ static int jl_getDylibFunctionInfo(jl_frame_t **frames, size_t pointer, int skip object::SectionRef Section; llvm::DIContext *context = NULL; int64_t slide; - bool isSysImg; + bool isImage; void *saddr; - if (!jl_dylib_DI_for_fptr(pointer, &Section, &slide, &context, skipC, &isSysImg, &saddr, &frame0->func_name, &frame0->file_name)) { + uint64_t fbase; + if (!jl_dylib_DI_for_fptr(pointer, &Section, &slide, &context, skipC, &isImage, &fbase, &saddr, &frame0->func_name, &frame0->file_name)) { frame0->fromC = 1; return 1; } - frame0->fromC = !isSysImg; + frame0->fromC = !isImage; { - auto sysimg_locked = getJITDebugRegistry().get_sysimg_info(); - if (isSysImg && sysimg_locked->sysimg_fptrs.base && saddr) { - intptr_t diff = (uintptr_t)saddr - (uintptr_t)sysimg_locked->sysimg_fptrs.base; - for (size_t i = 0; i < sysimg_locked->sysimg_fptrs.nclones; i++) { - if (diff == sysimg_locked->sysimg_fptrs.clone_offsets[i]) { - uint32_t idx = sysimg_locked->sysimg_fptrs.clone_idxs[i] & jl_sysimg_val_mask; - if (idx < sysimg_locked->sysimg_fvars_n) // items after this were cloned but not referenced directly by a method (such as our ccall PLT thunks) - frame0->linfo = sysimg_locked->sysimg_fvars_linfo[idx]; + JITDebugInfoRegistry::image_info_t image; + bool inimage = getJITDebugRegistry().get_image_info(fbase, &image); + if (isImage && saddr && inimage) { + intptr_t diff = (uintptr_t)saddr - (uintptr_t)image.fptrs.base; + for (size_t i = 0; i < image.fptrs.nclones; i++) { + if (diff == image.fptrs.clone_offsets[i]) { + uint32_t idx = image.fptrs.clone_idxs[i] & jl_sysimg_val_mask; + if (idx < image.fvars_n) // items after this were cloned but not referenced directly by a method (such as our ccall PLT thunks) + frame0->linfo = image.fvars_linfo[idx]; break; } } - for (size_t i = 0; i < sysimg_locked->sysimg_fvars_n; i++) { - if (diff == sysimg_locked->sysimg_fptrs.offsets[i]) { - frame0->linfo = sysimg_locked->sysimg_fvars_linfo[i]; + for (size_t i = 0; i < image.fvars_n; i++) { + if (diff == image.fptrs.offsets[i]) { + frame0->linfo = image.fvars_linfo[i]; break; } } } } - return lookup_pointer(Section, context, frames, pointer, slide, isSysImg, noInline); + return lookup_pointer(Section, context, frames, pointer, slide, isImage, noInline); } int jl_DI_for_fptr(uint64_t fptr, uint64_t *symsize, int64_t *slide, diff --git a/src/debuginfo.h b/src/debuginfo.h index 5ea34350ac1fb..5b5cdcb82d534 100644 --- a/src/debuginfo.h +++ b/src/debuginfo.h @@ -6,7 +6,7 @@ int jl_DI_for_fptr(uint64_t fptr, uint64_t *symsize, int64_t *slide, llvm::object::SectionRef *Section, llvm::DIContext **context) JL_NOTSAFEPOINT; bool jl_dylib_DI_for_fptr(size_t pointer, llvm::object::SectionRef *Section, int64_t *slide, llvm::DIContext **context, - bool onlySysImg, bool *isSysImg, void **saddr, char **name, char **filename) JL_NOTSAFEPOINT; + bool onlyImage, bool *isImage, uint64_t* fbase, void **saddr, char **name, char **filename) JL_NOTSAFEPOINT; static object::SectionedAddress makeAddress( llvm::object::SectionRef Section, uint64_t address) JL_NOTSAFEPOINT diff --git a/src/disasm.cpp b/src/disasm.cpp index cfc030f649fd6..5b510a24b33da 100644 --- a/src/disasm.cpp +++ b/src/disasm.cpp @@ -592,7 +592,7 @@ jl_value_t *jl_dump_fptr_asm_impl(uint64_t fptr, char raw_mc, const char* asm_va llvm::DIContext *context = NULL; if (!jl_DI_for_fptr(fptr, &symsize, &slide, &Section, &context)) { if (!jl_dylib_DI_for_fptr(fptr, &Section, &slide, &context, - false, NULL, NULL, NULL, NULL)) { + false, NULL, NULL, NULL, NULL, NULL)) { jl_printf(JL_STDERR, "WARNING: Unable to find function pointer\n"); return jl_pchar_to_string("", 0); } diff --git a/src/jitlayers.h b/src/jitlayers.h index 77ac5d64bb46d..bad5a91cad6c5 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -73,7 +73,7 @@ GlobalVariable *jl_emit_RTLD_DEFAULT_var(Module *M); DataLayout jl_create_datalayout(TargetMachine &TM); static inline bool imaging_default() { - return jl_options.image_codegen || (jl_generating_output() && !jl_options.incremental); + return jl_options.image_codegen || jl_generating_output(); } struct OptimizationOptions { @@ -173,6 +173,7 @@ typedef struct _jl_codegen_params_t { // outputs std::vector> workqueue; std::map globals; + std::map, Function*> external_fns; std::map ditypes; std::map llvmtypes; DenseMap mergedConstants; @@ -200,6 +201,7 @@ typedef struct _jl_codegen_params_t { size_t world = 0; const jl_cgparams_t *params = &jl_default_cgparams; bool cache = false; + bool external_linkage = false; bool imaging; _jl_codegen_params_t(orc::ThreadSafeContext ctx) : tsctx(std::move(ctx)), tsctx_lock(tsctx.getLock()), imaging(imaging_default()) {} } jl_codegen_params_t; diff --git a/src/jl_exported_data.inc b/src/jl_exported_data.inc index 6f0671ef0d6f7..c81ee410c9cd7 100644 --- a/src/jl_exported_data.inc +++ b/src/jl_exported_data.inc @@ -127,6 +127,8 @@ XX(jl_voidpointer_type) \ XX(jl_void_type) \ XX(jl_weakref_type) \ + XX(jl_build_ids) \ + XX(jl_linkage_blobs) \ // Data symbols that are defined inside the public libjulia #define JL_EXPORTED_DATA_SYMBOLS(XX) \ diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index cc387a2769ac1..f3a6b950a1dd5 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -118,6 +118,7 @@ XX(jl_dlopen) \ XX(jl_dlsym) \ XX(jl_dump_host_cpu) \ + XX(jl_check_pkgimage_clones) \ XX(jl_egal) \ XX(jl_egal__bits) \ XX(jl_egal__special) \ @@ -393,6 +394,8 @@ XX(jl_queue_work) \ XX(jl_raise_debugger) \ XX(jl_readuntil) \ + XX(jl_cache_flags) \ + XX(jl_match_cache_flags) \ XX(jl_read_verify_header) \ XX(jl_realloc) \ XX(jl_register_newmeth_tracer) \ @@ -535,6 +538,7 @@ YY(jl_get_LLVM_VERSION) \ YY(jl_dump_native) \ YY(jl_get_llvm_gvs) \ + YY(jl_get_llvm_external_fns) \ YY(jl_dump_function_asm) \ YY(jl_LLVMCreateDisasm) \ YY(jl_LLVMDisasmInstruction) \ diff --git a/src/jloptions.c b/src/jloptions.c index 8fb709513d7e8..5d2dee81ddc79 100644 --- a/src/jloptions.c +++ b/src/jloptions.c @@ -71,6 +71,7 @@ JL_DLLEXPORT void jl_init_options(void) JL_OPTIONS_HANDLE_SIGNALS_ON, JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_YES, JL_OPTIONS_USE_COMPILED_MODULES_YES, + JL_OPTIONS_USE_PKGIMAGES_YES, NULL, // bind-to NULL, // output-bc NULL, // output-unopt-bc @@ -92,7 +93,7 @@ JL_DLLEXPORT void jl_init_options(void) static const char usage[] = "\n julia [switches] -- [programfile] [args...]\n\n"; static const char opts[] = - "Switches (a '*' marks the default value, if applicable):\n\n" + "Switches (a '*' marks the default value, if applicable; settings marked '($)' may trigger package precompilation):\n\n" " -v, --version Display version information\n" " -h, --help Print this message (--help-hidden for more)\n" " --help-hidden Uncommon options not shown by `-h`\n\n" @@ -107,7 +108,9 @@ static const char opts[] = " --sysimage-native-code={yes*|no}\n" " Use native code from system image if available\n" " --compiled-modules={yes*|no}\n" - " Enable or disable incremental precompilation of modules\n\n" + " Enable or disable incremental precompilation of modules\n" + " --pkgimages={yes*|no}\n" + " Enable or disable usage of native code caching in the form of pkgimages ($)\n\n" // actions " -e, --eval Evaluate \n" @@ -143,16 +146,16 @@ static const char opts[] = // code generation options " -C, --cpu-target Limit usage of CPU features up to ; set to `help` to see the available options\n" - " -O, --optimize={0,1,2*,3} Set the optimization level (level 3 if `-O` is used without a level)\n" + " -O, --optimize={0,1,2*,3} Set the optimization level (level 3 if `-O` is used without a level) ($)\n" " --min-optlevel={0*,1,2,3} Set a lower bound on the optimization level\n" #ifdef JL_DEBUG_BUILD - " -g, --debug-info=[{0,1,2*}] Set the level of debug info generation in the julia-debug build\n" + " -g, --debug-info=[{0,1,2*}] Set the level of debug info generation in the julia-debug build ($)\n" #else - " -g, --debug-info=[{0,1*,2}] Set the level of debug info generation (level 2 if `-g` is used without a level)\n" + " -g, --debug-info=[{0,1*,2}] Set the level of debug info generation (level 2 if `-g` is used without a level) ($)\n" #endif " --inline={yes*|no} Control whether inlining is permitted, including overriding @inline declarations\n" " --check-bounds={yes|no|auto*}\n" - " Emit bounds checks always, never, or respect @inbounds declarations\n" + " Emit bounds checks always, never, or respect @inbounds declarations ($)\n" #ifdef USE_POLLY " --polly={yes*|no} Enable or disable the polyhedral optimizer Polly (overrides @polly declaration)\n" #endif @@ -239,6 +242,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) opt_banner, opt_sysimage_native_code, opt_compiled_modules, + opt_pkgimages, opt_machine_file, opt_project, opt_bug_report, @@ -267,6 +271,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) { "sysimage", required_argument, 0, 'J' }, { "sysimage-native-code", required_argument, 0, opt_sysimage_native_code }, { "compiled-modules",required_argument, 0, opt_compiled_modules }, + { "pkgimages", required_argument, 0, opt_pkgimages }, { "cpu-target", required_argument, 0, 'C' }, { "procs", required_argument, 0, 'p' }, { "threads", required_argument, 0, 't' }, @@ -317,6 +322,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) const char **cmds = NULL; int codecov = JL_LOG_NONE; int malloclog = JL_LOG_NONE; + int pkgimage_explicit = 0; int argc = *argcp; char **argv = *argvp; char *endptr; @@ -444,6 +450,15 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) else jl_errorf("julia: invalid argument to --compiled-modules={yes|no} (%s)", optarg); break; + case opt_pkgimages: + pkgimage_explicit = 1; + if (!strcmp(optarg,"yes")) + jl_options.use_pkgimages = JL_OPTIONS_USE_PKGIMAGES_YES; + else if (!strcmp(optarg,"no")) + jl_options.use_pkgimages = JL_OPTIONS_USE_PKGIMAGES_NO; + else + jl_errorf("julia: invalid argument to --pkgimage={yes|no} (%s)", optarg); + break; case 'C': // cpu-target jl_options.cpu_target = strdup(optarg); if (!jl_options.cpu_target) @@ -805,6 +820,13 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) "This is a bug, please report it.", c); } } + if (codecov || malloclog) { + if (pkgimage_explicit && jl_options.use_pkgimages) { + jl_errorf("julia: Can't use --pkgimages=yes together " + "with --track-allocation or --code-coverage."); + } + jl_options.use_pkgimages = 0; + } jl_options.code_coverage = codecov; jl_options.malloc_log = malloclog; int proc_args = *argcp < optind ? *argcp : optind; diff --git a/src/jloptions.h b/src/jloptions.h index d7be95348f01f..d0aba777027e7 100644 --- a/src/jloptions.h +++ b/src/jloptions.h @@ -42,6 +42,7 @@ typedef struct { int8_t handle_signals; int8_t use_sysimage_native_code; int8_t use_compiled_modules; + int8_t use_pkgimages; const char *bindto; const char *outputbc; const char *outputunoptbc; diff --git a/src/julia.h b/src/julia.h index fa6b8d32d10c0..139ec2f55e291 100644 --- a/src/julia.h +++ b/src/julia.h @@ -1766,7 +1766,7 @@ JL_DLLEXPORT jl_gcframe_t **jl_adopt_thread(void); JL_DLLEXPORT int jl_deserialize_verify_header(ios_t *s); JL_DLLEXPORT void jl_preload_sysimg_so(const char *fname); JL_DLLEXPORT void jl_set_sysimg_so(void *handle); -JL_DLLEXPORT ios_t *jl_create_system_image(void *, jl_array_t *worklist); +JL_DLLEXPORT void jl_create_system_image(void **, jl_array_t *worklist, bool_t emit_split, ios_t **s, ios_t **z, jl_array_t **udeps, int64_t *srctextpos); JL_DLLEXPORT void jl_restore_system_image(const char *fname); JL_DLLEXPORT void jl_restore_system_image_data(const char *buf, size_t len); JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *depmods, int complete); @@ -2187,6 +2187,9 @@ JL_DLLEXPORT int jl_generating_output(void) JL_NOTSAFEPOINT; #define JL_OPTIONS_USE_COMPILED_MODULES_YES 1 #define JL_OPTIONS_USE_COMPILED_MODULES_NO 0 +#define JL_OPTIONS_USE_PKGIMAGES_YES 1 +#define JL_OPTIONS_USE_PKGIMAGES_NO 0 + // Version information #include // Generated file diff --git a/src/julia_internal.h b/src/julia_internal.h index 54daf076a1030..995a320787ede 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -997,11 +997,12 @@ JL_DLLEXPORT jl_value_t *jl_dump_fptr_asm(uint64_t fptr, char raw_mc, const char JL_DLLEXPORT jl_value_t *jl_dump_function_ir(jl_llvmf_dump_t *dump, char strip_ir_metadata, char dump_module, const char *debuginfo); JL_DLLEXPORT jl_value_t *jl_dump_function_asm(jl_llvmf_dump_t *dump, char raw_mc, const char* asm_variant, const char *debuginfo, char binary); -void *jl_create_native(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int policy, int imaging_mode); +void *jl_create_native(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int policy, int imaging_mode, int cache); void jl_dump_native(void *native_code, const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname, const char *asm_fname, - const char *sysimg_data, size_t sysimg_len); + const char *sysimg_data, size_t sysimg_len, ios_t *s); void jl_get_llvm_gvs(void *native_code, arraylist_t *gvs); +void jl_get_llvm_external_fns(void *native_code, arraylist_t *gvs); JL_DLLEXPORT void jl_get_function_id(void *native_code, jl_code_instance_t *ncode, int32_t *func_idx, int32_t *specfunc_idx); @@ -1623,9 +1624,9 @@ extern JL_DLLEXPORT jl_sym_t *jl_sequentially_consistent_sym; JL_DLLEXPORT enum jl_memory_order jl_get_atomic_order(jl_sym_t *order, char loading, char storing); JL_DLLEXPORT enum jl_memory_order jl_get_atomic_order_checked(jl_sym_t *order, char loading, char storing); -struct _jl_sysimg_fptrs_t; +struct _jl_image_fptrs_t; -void jl_register_fptrs(uint64_t sysimage_base, const struct _jl_sysimg_fptrs_t *fptrs, +void jl_register_fptrs(uint64_t image_base, const struct _jl_image_fptrs_t *fptrs, jl_method_instance_t **linfos, size_t n); void jl_write_coverage_data(const char*); void jl_write_malloc_log(void); diff --git a/src/llvm-multiversioning.cpp b/src/llvm-multiversioning.cpp index 8cabfeb334096..8333e272cae17 100644 --- a/src/llvm-multiversioning.cpp +++ b/src/llvm-multiversioning.cpp @@ -45,6 +45,8 @@ using namespace llvm; extern Optional always_have_fma(Function&); +void replaceUsesWithLoad(Function &F, function_ref should_replace, MDNode *tbaa_const); + namespace { constexpr uint32_t clone_mask = JL_TARGET_CLONE_LOOP | JL_TARGET_CLONE_SIMD | JL_TARGET_CLONE_MATH | JL_TARGET_CLONE_CPU; @@ -264,8 +266,6 @@ struct CloneCtx { uint32_t get_func_id(Function *F); template Constant *rewrite_gv_init(const Stack& stack); - template - Value *rewrite_inst_use(const Stack& stack, Value *replace, Instruction *insert_before); std::pair get_reloc_slot(Function *F); Constant *get_ptrdiff32(Constant *ptr, Constant *base) const; template @@ -810,7 +810,7 @@ std::pair CloneCtx::get_reloc_slot(Function *F) } template -Value *CloneCtx::rewrite_inst_use(const Stack& stack, Value *replace, Instruction *insert_before) +static Value *rewrite_inst_use(const Stack& stack, Value *replace, Instruction *insert_before) { SmallVector args; uint32_t nlevel = stack.size(); @@ -869,40 +869,24 @@ void CloneCtx::fix_inst_uses() continue; auto orig_f = orig_funcs[i]; auto F = grp.base_func(orig_f); - bool changed; - do { - changed = false; - for (auto uses = ConstantUses(F, M); !uses.done(); uses.next()) { - auto info = uses.get_info(); - auto use_i = info.val; - auto use_f = use_i->getFunction(); - if (!use_f->getName().endswith(suffix)) + replaceUsesWithLoad(*F, [&](Instruction &I) -> GlobalVariable * { + uint32_t id; + GlobalVariable *slot; + auto use_f = I.getFunction(); + if (!use_f->getName().endswith(suffix)) + return nullptr; + std::tie(id, slot) = get_reloc_slot(orig_f); + + grp.relocs.insert(id); + for (auto &tgt: grp.clones) { + // The enclosing function of the use is cloned, + // no need to deal with this use on this target. + if (map_get(*tgt.vmap, use_f)) continue; - Instruction *insert_before = use_i; - if (auto phi = dyn_cast(use_i)) - insert_before = phi->getIncomingBlock(*info.use)->getTerminator(); - uint32_t id; - GlobalVariable *slot; - std::tie(id, slot) = get_reloc_slot(orig_f); - Instruction *ptr = new LoadInst(orig_f->getType(), slot, "", false, insert_before); - ptr->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const); - ptr->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(ptr->getContext(), None)); - use_i->setOperand(info.use->getOperandNo(), - rewrite_inst_use(uses.get_stack(), ptr, - insert_before)); - - grp.relocs.insert(id); - for (auto &tgt: grp.clones) { - // The enclosing function of the use is cloned, - // no need to deal with this use on this target. - if (map_get(*tgt.vmap, use_f)) - continue; - tgt.relocs.insert(id); - } - - changed = true; + tgt.relocs.insert(id); } - } while (changed); + return slot; + }, tbaa_const); } } } @@ -1192,6 +1176,30 @@ static RegisterPass X("JuliaMultiVersioning", "JuliaMulti } // anonymous namespace +void replaceUsesWithLoad(Function &F, function_ref should_replace, MDNode *tbaa_const) { + bool changed; + do { + changed = false; + for (auto uses = ConstantUses(&F, *F.getParent()); !uses.done(); uses.next()) { + auto info = uses.get_info(); + auto use_i = info.val; + GlobalVariable *slot = should_replace(*use_i); + if (!slot) + continue; + Instruction *insert_before = use_i; + if (auto phi = dyn_cast(use_i)) + insert_before = phi->getIncomingBlock(*info.use)->getTerminator(); + Instruction *ptr = new LoadInst(F.getType(), slot, "", false, insert_before); + ptr->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const); + ptr->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(ptr->getContext(), None)); + use_i->setOperand(info.use->getOperandNo(), + rewrite_inst_use(uses.get_stack(), ptr, + insert_before)); + changed = true; + } + } while (changed); +} + PreservedAnalyses MultiVersioning::run(Module &M, ModuleAnalysisManager &AM) { auto &FAM = AM.getResult(M).getManager(); diff --git a/src/precompile.c b/src/precompile.c index ebe7afae69f64..bfc123cf3fda8 100644 --- a/src/precompile.c +++ b/src/precompile.c @@ -10,6 +10,7 @@ #include "julia.h" #include "julia_internal.h" #include "julia_assert.h" +#include "serialize.h" #ifdef __cplusplus extern "C" { @@ -20,8 +21,53 @@ JL_DLLEXPORT int jl_generating_output(void) return jl_options.outputo || jl_options.outputbc || jl_options.outputunoptbc || jl_options.outputji || jl_options.outputasm; } -static void *jl_precompile(int all); -static void *jl_precompile_worklist(jl_array_t *worklist); +void write_srctext(ios_t *f, jl_array_t *udeps, int64_t srctextpos) { + // Write the source-text for the dependent files + if (udeps) { + // Go back and update the source-text position to point to the current position + int64_t posfile = ios_pos(f); + ios_seek(f, srctextpos); + write_uint64(f, posfile); + ios_seek_end(f); + // Each source-text file is written as + // int32: length of abspath + // char*: abspath + // uint64: length of src text + // char*: src text + // At the end we write int32(0) as a terminal sentinel. + size_t len = jl_array_len(udeps); + ios_t srctext; + for (size_t i = 0; i < len; i++) { + jl_value_t *deptuple = jl_array_ptr_ref(udeps, i); + jl_value_t *depmod = jl_fieldref(deptuple, 0); // module + // Dependencies declared with `include_dependency` are excluded + // because these may not be Julia code (and could be huge) + if (depmod != (jl_value_t*)jl_main_module) { + jl_value_t *dep = jl_fieldref(deptuple, 1); // file abspath + const char *depstr = jl_string_data(dep); + if (!depstr[0]) + continue; + ios_t *srctp = ios_file(&srctext, depstr, 1, 0, 0, 0); + if (!srctp) { + jl_printf(JL_STDERR, "WARNING: could not cache source text for \"%s\".\n", + jl_string_data(dep)); + continue; + } + size_t slen = jl_string_len(dep); + write_int32(f, slen); + ios_write(f, depstr, slen); + posfile = ios_pos(f); + write_uint64(f, 0); // placeholder for length of this file in bytes + uint64_t filelen = (uint64_t) ios_copyall(f, &srctext); + ios_close(&srctext); + ios_seek(f, posfile); + write_uint64(f, filelen); + ios_seek_end(f); + } + } + } + write_int32(f, 0); // mark the end of the source text +} JL_DLLEXPORT void jl_write_compiler_output(void) { @@ -35,7 +81,8 @@ JL_DLLEXPORT void jl_write_compiler_output(void) } jl_array_t *worklist = jl_module_init_order; - JL_GC_PUSH1(&worklist); + jl_array_t *udeps = NULL; + JL_GC_PUSH2(&worklist, &udeps); jl_module_init_order = jl_alloc_vec_any(0); int i, l = jl_array_len(worklist); for (i = 0; i < l; i++) { @@ -59,49 +106,54 @@ JL_DLLEXPORT void jl_write_compiler_output(void) assert(jl_precompile_toplevel_module == NULL); void *native_code = NULL; - if (jl_options.outputo || jl_options.outputbc || jl_options.outputunoptbc || jl_options.outputasm) { - if (jl_options.incremental) - jl_precompile_toplevel_module = (jl_module_t*)jl_array_ptr_ref(worklist, jl_array_len(worklist)-1); - native_code = jl_options.incremental ? jl_precompile_worklist(worklist) : jl_precompile(jl_options.compile_enabled == JL_OPTIONS_COMPILE_ALL); - if (jl_options.incremental) - jl_precompile_toplevel_module = NULL; - } - if (jl_options.incremental) { - if (jl_options.outputbc || jl_options.outputunoptbc) - jl_printf(JL_STDERR, "WARNING: incremental output to a .bc file is not implemented\n"); - if (jl_options.outputasm) - jl_printf(JL_STDERR, "WARNING: incremental output to a .s file is not implemented\n"); - if (jl_options.outputo) { - jl_printf(JL_STDERR, "WARNING: incremental output to a .o file is not implemented\n"); - } - } + bool_t emit_native = jl_options.outputo || jl_options.outputbc || jl_options.outputunoptbc || jl_options.outputasm; - ios_t *s = jl_create_system_image(native_code, jl_options.incremental ? worklist : NULL); + bool_t emit_split = jl_options.outputji && emit_native; - if (jl_options.outputji) { - ios_t f; - if (ios_file(&f, jl_options.outputji, 1, 1, 1, 1) == NULL) - jl_errorf("cannot open system image file \"%s\" for writing", jl_options.outputji); - ios_write(&f, (const char*)s->buf, (size_t)s->size); - ios_close(&f); - } + ios_t *s = NULL; + ios_t *z = NULL; + int64_t srctextpos = 0 ; + jl_create_system_image(&native_code, jl_options.incremental ? worklist : NULL, emit_split, + &s, &z, &udeps, &srctextpos); + if (!emit_split) + z = s; + + // jl_dump_native writes the clone_targets into `s` + // We need to postpone the srctext writing after that. if (native_code) { jl_dump_native(native_code, jl_options.outputbc, jl_options.outputunoptbc, jl_options.outputo, jl_options.outputasm, - (const char*)s->buf, (size_t)s->size); + (const char*)z->buf, (size_t)z->size, s); jl_postoutput_hook(); } + if ((jl_options.outputji || emit_native) && jl_options.incremental) { + write_srctext(s, udeps, srctextpos); + } + + if (jl_options.outputji) { + ios_t f; + if (ios_file(&f, jl_options.outputji, 1, 1, 1, 1) == NULL) + jl_errorf("cannot open system image file \"%s\" for writing", jl_options.outputji); + ios_write(&f, (const char*)s->buf, (size_t)s->size); + ios_close(&f); + } + if (s) { ios_close(s); free(s); } + if (emit_split) { + ios_close(z); + free(z); + } + for (size_t i = 0; i < jl_current_modules.size; i += 2) { if (jl_current_modules.table[i + 1] != HT_NOTFOUND) { jl_printf(JL_STDERR, "\nWARNING: detected unclosed module: "); @@ -112,296 +164,6 @@ JL_DLLEXPORT void jl_write_compiler_output(void) JL_GC_POP(); } -// f{<:Union{...}}(...) is a common pattern -// and expanding the Union may give a leaf function -static void _compile_all_tvar_union(jl_value_t *methsig) -{ - int tvarslen = jl_subtype_env_size(methsig); - jl_value_t *sigbody = methsig; - jl_value_t **roots; - JL_GC_PUSHARGS(roots, 1 + 2 * tvarslen); - jl_value_t **env = roots + 1; - int *idx = (int*)alloca(sizeof(int) * tvarslen); - int i; - for (i = 0; i < tvarslen; i++) { - assert(jl_is_unionall(sigbody)); - idx[i] = 0; - env[2 * i] = (jl_value_t*)((jl_unionall_t*)sigbody)->var; - env[2 * i + 1] = jl_bottom_type; // initialize the list with Union{}, since T<:Union{} is always a valid option - sigbody = ((jl_unionall_t*)sigbody)->body; - } - - for (i = 0; i < tvarslen; /* incremented by inner loop */) { - jl_value_t **sig = &roots[0]; - JL_TRY { - // TODO: wrap in UnionAll for each tvar in env[2*i + 1] ? - // currently doesn't matter much, since jl_compile_hint doesn't work on abstract types - *sig = (jl_value_t*)jl_instantiate_type_with(sigbody, env, tvarslen); - } - JL_CATCH { - goto getnext; // sigh, we found an invalid type signature. should we warn the user? - } - if (!jl_has_concrete_subtype(*sig)) - goto getnext; // signature wouldn't be callable / is invalid -- skip it - if (jl_is_concrete_type(*sig)) { - if (jl_compile_hint((jl_tupletype_t *)*sig)) - goto getnext; // success - } - - getnext: - for (i = 0; i < tvarslen; i++) { - jl_tvar_t *tv = (jl_tvar_t*)env[2 * i]; - if (jl_is_uniontype(tv->ub)) { - size_t l = jl_count_union_components(tv->ub); - size_t j = idx[i]; - if (j == l) { - env[2 * i + 1] = jl_bottom_type; - idx[i] = 0; - } - else { - jl_value_t *ty = jl_nth_union_component(tv->ub, j); - if (!jl_is_concrete_type(ty)) - ty = (jl_value_t*)jl_new_typevar(tv->name, tv->lb, ty); - env[2 * i + 1] = ty; - idx[i] = j + 1; - break; - } - } - else { - env[2 * i + 1] = (jl_value_t*)tv; - } - } - } - JL_GC_POP(); -} - -// f(::Union{...}, ...) is a common pattern -// and expanding the Union may give a leaf function -static void _compile_all_union(jl_value_t *sig) -{ - jl_tupletype_t *sigbody = (jl_tupletype_t*)jl_unwrap_unionall(sig); - size_t count_unions = 0; - size_t i, l = jl_svec_len(sigbody->parameters); - jl_svec_t *p = NULL; - jl_value_t *methsig = NULL; - - for (i = 0; i < l; i++) { - jl_value_t *ty = jl_svecref(sigbody->parameters, i); - if (jl_is_uniontype(ty)) - ++count_unions; - else if (ty == jl_bottom_type) - return; // why does this method exist? - else if (jl_is_datatype(ty) && !jl_has_free_typevars(ty) && - ((!jl_is_kind(ty) && ((jl_datatype_t*)ty)->isconcretetype) || - ((jl_datatype_t*)ty)->name == jl_type_typename)) - return; // no amount of union splitting will make this a leaftype signature - } - - if (count_unions == 0 || count_unions >= 6) { - _compile_all_tvar_union(sig); - return; - } - - int *idx = (int*)alloca(sizeof(int) * count_unions); - for (i = 0; i < count_unions; i++) { - idx[i] = 0; - } - - JL_GC_PUSH2(&p, &methsig); - int idx_ctr = 0, incr = 0; - while (!incr) { - p = jl_alloc_svec_uninit(l); - for (i = 0, idx_ctr = 0, incr = 1; i < l; i++) { - jl_value_t *ty = jl_svecref(sigbody->parameters, i); - if (jl_is_uniontype(ty)) { - assert(idx_ctr < count_unions); - size_t l = jl_count_union_components(ty); - size_t j = idx[idx_ctr]; - jl_svecset(p, i, jl_nth_union_component(ty, j)); - ++j; - if (incr) { - if (j == l) { - idx[idx_ctr] = 0; - } - else { - idx[idx_ctr] = j; - incr = 0; - } - } - ++idx_ctr; - } - else { - jl_svecset(p, i, ty); - } - } - methsig = (jl_value_t*)jl_apply_tuple_type(p); - methsig = jl_rewrap_unionall(methsig, sig); - _compile_all_tvar_union(methsig); - } - - JL_GC_POP(); -} - -static int compile_all_collect__(jl_typemap_entry_t *ml, void *env) -{ - jl_array_t *allmeths = (jl_array_t*)env; - jl_method_t *m = ml->func.method; - if (m->source) { - // method has a non-generated definition; can be compiled generically - jl_array_ptr_1d_push(allmeths, (jl_value_t*)m); - } - return 1; -} - -static int compile_all_collect_(jl_methtable_t *mt, void *env) -{ - jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), compile_all_collect__, env); - return 1; -} - -static void jl_compile_all_defs(jl_array_t *mis) -{ - jl_array_t *allmeths = jl_alloc_vec_any(0); - JL_GC_PUSH1(&allmeths); - - jl_foreach_reachable_mtable(compile_all_collect_, allmeths); - - size_t i, l = jl_array_len(allmeths); - for (i = 0; i < l; i++) { - jl_method_t *m = (jl_method_t*)jl_array_ptr_ref(allmeths, i); - if (jl_is_datatype(m->sig) && jl_isa_compileable_sig((jl_tupletype_t*)m->sig, jl_emptysvec, m)) { - // method has a single compilable specialization, e.g. its definition - // signature is concrete. in this case we can just hint it. - jl_compile_hint((jl_tupletype_t*)m->sig); - } - else { - // first try to create leaf signatures from the signature declaration and compile those - _compile_all_union(m->sig); - - // finally, compile a fully generic fallback that can work for all arguments - jl_method_instance_t *unspec = jl_get_unspecialized(m); - if (unspec) - jl_array_ptr_1d_push(mis, (jl_value_t*)unspec); - } - } - - JL_GC_POP(); -} - -static int precompile_enq_specialization_(jl_method_instance_t *mi, void *closure) -{ - assert(jl_is_method_instance(mi)); - jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mi->cache); - while (codeinst) { - int do_compile = 0; - if (jl_atomic_load_relaxed(&codeinst->invoke) != jl_fptr_const_return) { - jl_value_t *inferred = jl_atomic_load_relaxed(&codeinst->inferred); - if (inferred && - inferred != jl_nothing && - jl_ir_flag_inferred((jl_array_t*)inferred) && - (jl_ir_inlining_cost((jl_array_t*)inferred) == UINT16_MAX)) { - do_compile = 1; - } - else if (jl_atomic_load_relaxed(&codeinst->invoke) != NULL || jl_atomic_load_relaxed(&codeinst->precompile)) { - do_compile = 1; - } - } - if (do_compile) { - jl_array_ptr_1d_push((jl_array_t*)closure, (jl_value_t*)mi); - return 1; - } - codeinst = jl_atomic_load_relaxed(&codeinst->next); - } - return 1; -} - -static int precompile_enq_all_specializations__(jl_typemap_entry_t *def, void *closure) -{ - jl_method_t *m = def->func.method; - if ((m->name == jl_symbol("__init__") || m->ccallable) && jl_is_dispatch_tupletype(m->sig)) { - // ensure `__init__()` and @ccallables get strongly-hinted, specialized, and compiled - jl_method_instance_t *mi = jl_specializations_get_linfo(m, m->sig, jl_emptysvec); - jl_array_ptr_1d_push((jl_array_t*)closure, (jl_value_t*)mi); - } - else { - jl_svec_t *specializations = jl_atomic_load_relaxed(&def->func.method->specializations); - size_t i, l = jl_svec_len(specializations); - for (i = 0; i < l; i++) { - jl_value_t *mi = jl_svecref(specializations, i); - if (mi != jl_nothing) - precompile_enq_specialization_((jl_method_instance_t*)mi, closure); - } - } - if (m->ccallable) - jl_array_ptr_1d_push((jl_array_t*)closure, (jl_value_t*)m->ccallable); - return 1; -} - -static int precompile_enq_all_specializations_(jl_methtable_t *mt, void *env) -{ - return jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), precompile_enq_all_specializations__, env); -} - -static void *jl_precompile_(jl_array_t *m) -{ - jl_array_t *m2 = NULL; - jl_method_instance_t *mi = NULL; - JL_GC_PUSH2(&m2, &mi); - m2 = jl_alloc_vec_any(0); - for (size_t i = 0; i < jl_array_len(m); i++) { - jl_value_t *item = jl_array_ptr_ref(m, i); - if (jl_is_method_instance(item)) { - mi = (jl_method_instance_t*)item; - size_t min_world = 0; - size_t max_world = ~(size_t)0; - if (mi != jl_atomic_load_relaxed(&mi->def.method->unspecialized) && !jl_isa_compileable_sig((jl_tupletype_t*)mi->specTypes, mi->sparam_vals, mi->def.method)) - mi = jl_get_specialization1((jl_tupletype_t*)mi->specTypes, jl_atomic_load_acquire(&jl_world_counter), &min_world, &max_world, 0); - if (mi) - jl_array_ptr_1d_push(m2, (jl_value_t*)mi); - } - else { - assert(jl_is_simplevector(item)); - assert(jl_svec_len(item) == 2); - jl_array_ptr_1d_push(m2, item); - } - } - void *native_code = jl_create_native(m2, NULL, NULL, 0, 1); - JL_GC_POP(); - return native_code; -} - -static void *jl_precompile(int all) -{ - // array of MethodInstances and ccallable aliases to include in the output - jl_array_t *m = jl_alloc_vec_any(0); - JL_GC_PUSH1(&m); - if (all) - jl_compile_all_defs(m); - jl_foreach_reachable_mtable(precompile_enq_all_specializations_, m); - void *native_code = jl_precompile_(m); - JL_GC_POP(); - return native_code; -} - -static void *jl_precompile_worklist(jl_array_t *worklist) -{ - if (!worklist) - return NULL; - // this "found" array will contain function - // type signatures that were inferred but haven't been compiled - jl_array_t *m = jl_alloc_vec_any(0); - JL_GC_PUSH1(&m); - size_t i, nw = jl_array_len(worklist); - for (i = 0; i < nw; i++) { - jl_module_t *mod = (jl_module_t*)jl_array_ptr_ref(worklist, i); - assert(jl_is_module(mod)); - foreach_mtable_in_module(mod, precompile_enq_all_specializations_, m); - } - void *native_code = jl_precompile_(m); - JL_GC_POP(); - return native_code; -} - #ifdef __cplusplus } #endif diff --git a/src/precompile_utils.c b/src/precompile_utils.c new file mode 100644 index 0000000000000..f251d00f76cfd --- /dev/null +++ b/src/precompile_utils.c @@ -0,0 +1,306 @@ +// f{<:Union{...}}(...) is a common pattern +// and expanding the Union may give a leaf function +static void _compile_all_tvar_union(jl_value_t *methsig) +{ + int tvarslen = jl_subtype_env_size(methsig); + jl_value_t *sigbody = methsig; + jl_value_t **roots; + JL_GC_PUSHARGS(roots, 1 + 2 * tvarslen); + jl_value_t **env = roots + 1; + int *idx = (int*)alloca(sizeof(int) * tvarslen); + int i; + for (i = 0; i < tvarslen; i++) { + assert(jl_is_unionall(sigbody)); + idx[i] = 0; + env[2 * i] = (jl_value_t*)((jl_unionall_t*)sigbody)->var; + env[2 * i + 1] = jl_bottom_type; // initialize the list with Union{}, since T<:Union{} is always a valid option + sigbody = ((jl_unionall_t*)sigbody)->body; + } + + for (i = 0; i < tvarslen; /* incremented by inner loop */) { + jl_value_t **sig = &roots[0]; + JL_TRY { + // TODO: wrap in UnionAll for each tvar in env[2*i + 1] ? + // currently doesn't matter much, since jl_compile_hint doesn't work on abstract types + *sig = (jl_value_t*)jl_instantiate_type_with(sigbody, env, tvarslen); + } + JL_CATCH { + goto getnext; // sigh, we found an invalid type signature. should we warn the user? + } + if (!jl_has_concrete_subtype(*sig)) + goto getnext; // signature wouldn't be callable / is invalid -- skip it + if (jl_is_concrete_type(*sig)) { + if (jl_compile_hint((jl_tupletype_t *)*sig)) + goto getnext; // success + } + + getnext: + for (i = 0; i < tvarslen; i++) { + jl_tvar_t *tv = (jl_tvar_t*)env[2 * i]; + if (jl_is_uniontype(tv->ub)) { + size_t l = jl_count_union_components(tv->ub); + size_t j = idx[i]; + if (j == l) { + env[2 * i + 1] = jl_bottom_type; + idx[i] = 0; + } + else { + jl_value_t *ty = jl_nth_union_component(tv->ub, j); + if (!jl_is_concrete_type(ty)) + ty = (jl_value_t*)jl_new_typevar(tv->name, tv->lb, ty); + env[2 * i + 1] = ty; + idx[i] = j + 1; + break; + } + } + else { + env[2 * i + 1] = (jl_value_t*)tv; + } + } + } + JL_GC_POP(); +} + +// f(::Union{...}, ...) is a common pattern +// and expanding the Union may give a leaf function +static void _compile_all_union(jl_value_t *sig) +{ + jl_tupletype_t *sigbody = (jl_tupletype_t*)jl_unwrap_unionall(sig); + size_t count_unions = 0; + size_t i, l = jl_svec_len(sigbody->parameters); + jl_svec_t *p = NULL; + jl_value_t *methsig = NULL; + + for (i = 0; i < l; i++) { + jl_value_t *ty = jl_svecref(sigbody->parameters, i); + if (jl_is_uniontype(ty)) + ++count_unions; + else if (ty == jl_bottom_type) + return; // why does this method exist? + else if (jl_is_datatype(ty) && !jl_has_free_typevars(ty) && + ((!jl_is_kind(ty) && ((jl_datatype_t*)ty)->isconcretetype) || + ((jl_datatype_t*)ty)->name == jl_type_typename)) + return; // no amount of union splitting will make this a leaftype signature + } + + if (count_unions == 0 || count_unions >= 6) { + _compile_all_tvar_union(sig); + return; + } + + int *idx = (int*)alloca(sizeof(int) * count_unions); + for (i = 0; i < count_unions; i++) { + idx[i] = 0; + } + + JL_GC_PUSH2(&p, &methsig); + int idx_ctr = 0, incr = 0; + while (!incr) { + p = jl_alloc_svec_uninit(l); + for (i = 0, idx_ctr = 0, incr = 1; i < l; i++) { + jl_value_t *ty = jl_svecref(sigbody->parameters, i); + if (jl_is_uniontype(ty)) { + assert(idx_ctr < count_unions); + size_t l = jl_count_union_components(ty); + size_t j = idx[idx_ctr]; + jl_svecset(p, i, jl_nth_union_component(ty, j)); + ++j; + if (incr) { + if (j == l) { + idx[idx_ctr] = 0; + } + else { + idx[idx_ctr] = j; + incr = 0; + } + } + ++idx_ctr; + } + else { + jl_svecset(p, i, ty); + } + } + methsig = (jl_value_t*)jl_apply_tuple_type(p); + methsig = jl_rewrap_unionall(methsig, sig); + _compile_all_tvar_union(methsig); + } + + JL_GC_POP(); +} + +static int compile_all_collect__(jl_typemap_entry_t *ml, void *env) +{ + jl_array_t *allmeths = (jl_array_t*)env; + jl_method_t *m = ml->func.method; + if (m->source) { + // method has a non-generated definition; can be compiled generically + jl_array_ptr_1d_push(allmeths, (jl_value_t*)m); + } + return 1; +} + +static int compile_all_collect_(jl_methtable_t *mt, void *env) +{ + jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), compile_all_collect__, env); + return 1; +} + +static void jl_compile_all_defs(jl_array_t *mis) +{ + jl_array_t *allmeths = jl_alloc_vec_any(0); + JL_GC_PUSH1(&allmeths); + + jl_foreach_reachable_mtable(compile_all_collect_, allmeths); + + size_t i, l = jl_array_len(allmeths); + for (i = 0; i < l; i++) { + jl_method_t *m = (jl_method_t*)jl_array_ptr_ref(allmeths, i); + if (jl_is_datatype(m->sig) && jl_isa_compileable_sig((jl_tupletype_t*)m->sig, jl_emptysvec, m)) { + // method has a single compilable specialization, e.g. its definition + // signature is concrete. in this case we can just hint it. + jl_compile_hint((jl_tupletype_t*)m->sig); + } + else { + // first try to create leaf signatures from the signature declaration and compile those + _compile_all_union(m->sig); + + // finally, compile a fully generic fallback that can work for all arguments + jl_method_instance_t *unspec = jl_get_unspecialized(m); + if (unspec) + jl_array_ptr_1d_push(mis, (jl_value_t*)unspec); + } + } + + JL_GC_POP(); +} + +static int precompile_enq_specialization_(jl_method_instance_t *mi, void *closure) +{ + assert(jl_is_method_instance(mi)); + jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mi->cache); + while (codeinst) { + int do_compile = 0; + if (jl_atomic_load_relaxed(&codeinst->invoke) != jl_fptr_const_return) { + jl_value_t *inferred = jl_atomic_load_relaxed(&codeinst->inferred); + if (inferred && + inferred != jl_nothing && + jl_ir_flag_inferred((jl_array_t*)inferred) && + (jl_ir_inlining_cost((jl_array_t*)inferred) == UINT16_MAX)) { + do_compile = 1; + } + else if (jl_atomic_load_relaxed(&codeinst->invoke) != NULL || jl_atomic_load_relaxed(&codeinst->precompile)) { + do_compile = 1; + } + } + if (do_compile) { + jl_array_ptr_1d_push((jl_array_t*)closure, (jl_value_t*)mi); + return 1; + } + codeinst = jl_atomic_load_relaxed(&codeinst->next); + } + return 1; +} + +static int precompile_enq_all_specializations__(jl_typemap_entry_t *def, void *closure) +{ + jl_method_t *m = def->func.method; + if ((m->name == jl_symbol("__init__") || m->ccallable) && jl_is_dispatch_tupletype(m->sig)) { + // ensure `__init__()` and @ccallables get strongly-hinted, specialized, and compiled + jl_method_instance_t *mi = jl_specializations_get_linfo(m, m->sig, jl_emptysvec); + jl_array_ptr_1d_push((jl_array_t*)closure, (jl_value_t*)mi); + } + else { + jl_svec_t *specializations = jl_atomic_load_relaxed(&def->func.method->specializations); + size_t i, l = jl_svec_len(specializations); + for (i = 0; i < l; i++) { + jl_value_t *mi = jl_svecref(specializations, i); + if (mi != jl_nothing) + precompile_enq_specialization_((jl_method_instance_t*)mi, closure); + } + } + if (m->ccallable) + jl_array_ptr_1d_push((jl_array_t*)closure, (jl_value_t*)m->ccallable); + return 1; +} + +static int precompile_enq_all_specializations_(jl_methtable_t *mt, void *env) +{ + return jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), precompile_enq_all_specializations__, env); +} + +static void *jl_precompile_(jl_array_t *m, int external_linkage) +{ + jl_array_t *m2 = NULL; + jl_method_instance_t *mi = NULL; + JL_GC_PUSH2(&m2, &mi); + m2 = jl_alloc_vec_any(0); + for (size_t i = 0; i < jl_array_len(m); i++) { + jl_value_t *item = jl_array_ptr_ref(m, i); + if (jl_is_method_instance(item)) { + mi = (jl_method_instance_t*)item; + size_t min_world = 0; + size_t max_world = ~(size_t)0; + if (mi != jl_atomic_load_relaxed(&mi->def.method->unspecialized) && !jl_isa_compileable_sig((jl_tupletype_t*)mi->specTypes, mi->sparam_vals, mi->def.method)) + mi = jl_get_specialization1((jl_tupletype_t*)mi->specTypes, jl_atomic_load_acquire(&jl_world_counter), &min_world, &max_world, 0); + if (mi) + jl_array_ptr_1d_push(m2, (jl_value_t*)mi); + } + else { + assert(jl_is_simplevector(item)); + assert(jl_svec_len(item) == 2); + jl_array_ptr_1d_push(m2, item); + } + } + void *native_code = jl_create_native(m2, NULL, NULL, 0, 1, external_linkage); + JL_GC_POP(); + return native_code; +} + +static void *jl_precompile(int all) +{ + // array of MethodInstances and ccallable aliases to include in the output + jl_array_t *m = jl_alloc_vec_any(0); + JL_GC_PUSH1(&m); + if (all) + jl_compile_all_defs(m); + jl_foreach_reachable_mtable(precompile_enq_all_specializations_, m); + void *native_code = jl_precompile_(m, 0); + JL_GC_POP(); + return native_code; +} + +static void *jl_precompile_worklist(jl_array_t *worklist, jl_array_t *extext_methods, jl_array_t *new_specializations) +{ + if (!worklist) + return NULL; + // this "found" array will contain function + // type signatures that were inferred but haven't been compiled + jl_array_t *m = jl_alloc_vec_any(0); + JL_GC_PUSH1(&m); + size_t i, n = jl_array_len(worklist); + for (i = 0; i < n; i++) { + jl_module_t *mod = (jl_module_t*)jl_array_ptr_ref(worklist, i); + assert(jl_is_module(mod)); + foreach_mtable_in_module(mod, precompile_enq_all_specializations_, m); + } + n = jl_array_len(extext_methods); + for (i = 0; i < n; i++) { + jl_method_t *method = (jl_method_t*)jl_array_ptr_ref(extext_methods, i); + assert(jl_is_method(method)); + jl_svec_t *specializations = jl_atomic_load_relaxed(&method->specializations); + size_t j, l = jl_svec_len(specializations); + for (j = 0; j < l; j++) { + jl_value_t *mi = jl_svecref(specializations, j); + if (mi != jl_nothing) + precompile_enq_specialization_((jl_method_instance_t*)mi, m); + } + } + n = jl_array_len(new_specializations); + for (i = 0; i < n; i++) { + jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(new_specializations, i); + precompile_enq_specialization_(ci->def, m); + } + void *native_code = jl_precompile_(m, 1); + JL_GC_POP(); + return native_code; +} diff --git a/src/processor.cpp b/src/processor.cpp index df114b4d80257..13b40ec4f7363 100644 --- a/src/processor.cpp +++ b/src/processor.cpp @@ -621,9 +621,9 @@ static inline std::vector> &get_cmdline_targets(F &&feature_cb) // Load sysimg, use the `callback` for dispatch and perform all relocations // for the selected target. template -static inline jl_sysimg_fptrs_t parse_sysimg(void *hdl, F &&callback) +static inline jl_image_fptrs_t parse_sysimg(void *hdl, F &&callback) { - jl_sysimg_fptrs_t res = {nullptr, 0, nullptr, 0, nullptr, nullptr}; + jl_image_fptrs_t res = {nullptr, 0, nullptr, 0, nullptr, nullptr}; // .data base char *data_base; diff --git a/src/processor.h b/src/processor.h index 43c009ba72648..44699e850d014 100644 --- a/src/processor.h +++ b/src/processor.h @@ -133,7 +133,7 @@ JL_DLLEXPORT int jl_test_cpu_feature(jl_cpu_feature_t feature); static const uint32_t jl_sysimg_tag_mask = 0x80000000u; static const uint32_t jl_sysimg_val_mask = ~((uint32_t)0x80000000u); -typedef struct _jl_sysimg_fptrs_t { +typedef struct _jl_image_fptrs_t { // base function pointer const char *base; // number of functions @@ -151,7 +151,7 @@ typedef struct _jl_sysimg_fptrs_t { const int32_t *clone_offsets; // sorted indices of the cloned functions (including the tag bit) const uint32_t *clone_idxs; -} jl_sysimg_fptrs_t; +} jl_image_fptrs_t; /** * Initialize the processor dispatch system with sysimg `hdl` (also initialize the sysimg itself). @@ -163,14 +163,15 @@ typedef struct _jl_sysimg_fptrs_t { * * Return the data about the function pointers selected. */ -jl_sysimg_fptrs_t jl_init_processor_sysimg(void *hdl); -jl_sysimg_fptrs_t jl_init_processor_pkgimg(void *hdl); +jl_image_fptrs_t jl_init_processor_sysimg(void *hdl); +jl_image_fptrs_t jl_init_processor_pkgimg(void *hdl); // Return the name of the host CPU as a julia string. JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void); // Dump the name and feature set of the host CPU // For debugging only JL_DLLEXPORT void jl_dump_host_cpu(void); +JL_DLLEXPORT void jl_check_pkgimage_clones(char* data); JL_DLLEXPORT int32_t jl_set_zero_subnormals(int8_t isZero); JL_DLLEXPORT int32_t jl_get_zero_subnormals(void); diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp index a46db93488770..748c2f2dd2917 100644 --- a/src/processor_arm.cpp +++ b/src/processor_arm.cpp @@ -1795,14 +1795,14 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void) return jl_cstr_to_string(host_cpu_name().c_str()); } -jl_sysimg_fptrs_t jl_init_processor_sysimg(void *hdl) +jl_image_fptrs_t jl_init_processor_sysimg(void *hdl) { if (!jit_targets.empty()) jl_error("JIT targets already initialized"); return parse_sysimg(hdl, sysimg_init_cb); } -jl_sysimg_fptrs_t jl_init_processor_pkgimg(void *hdl) +jl_image_fptrs_t jl_init_processor_pkgimg(void *hdl) { if (jit_targets.empty()) jl_error("JIT targets not initialized"); @@ -1811,6 +1811,11 @@ jl_sysimg_fptrs_t jl_init_processor_pkgimg(void *hdl) return parse_sysimg(hdl, pkgimg_init_cb); } +JL_DLLEXPORT void jl_check_pkgimage_clones(char *data) +{ + pkgimg_init_cb(data); +} + std::pair> jl_get_llvm_target(bool imaging, uint32_t &flags) { ensure_jit_target(imaging); diff --git a/src/processor_fallback.cpp b/src/processor_fallback.cpp index 3160bd0ba6750..c1353e1bb43b0 100644 --- a/src/processor_fallback.cpp +++ b/src/processor_fallback.cpp @@ -112,14 +112,14 @@ get_llvm_target_str(const TargetData<1> &data) using namespace Fallback; -jl_sysimg_fptrs_t jl_init_processor_sysimg(void *hdl) +jl_image_fptrs_t jl_init_processor_sysimg(void *hdl) { if (!jit_targets.empty()) jl_error("JIT targets already initialized"); return parse_sysimg(hdl, sysimg_init_cb); } -jl_sysimg_fptrs_t jl_init_processor_pkgimg(void *hdl) +jl_image_fptrs_t jl_init_processor_pkgimg(void *hdl) { if (jit_targets.empty()) jl_error("JIT targets not initialized"); @@ -170,6 +170,11 @@ JL_DLLEXPORT void jl_dump_host_cpu(void) jl_safe_printf("Features: %s\n", jl_get_cpu_features_llvm().c_str()); } +JL_DLLEXPORT void jl_check_pkgimage_clones(char *data) +{ + pkgimg_init_cb(data); +} + extern "C" int jl_test_cpu_feature(jl_cpu_feature_t) { return 0; diff --git a/src/processor_x86.cpp b/src/processor_x86.cpp index b73838a55777e..6f064ddd47d19 100644 --- a/src/processor_x86.cpp +++ b/src/processor_x86.cpp @@ -1019,19 +1019,24 @@ JL_DLLEXPORT void jl_dump_host_cpu(void) cpus, ncpu_names); } +JL_DLLEXPORT void jl_check_pkgimage_clones(char *data) +{ + pkgimg_init_cb(data); +} + JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void) { return jl_cstr_to_string(host_cpu_name().c_str()); } -jl_sysimg_fptrs_t jl_init_processor_sysimg(void *hdl) +jl_image_fptrs_t jl_init_processor_sysimg(void *hdl) { if (!jit_targets.empty()) jl_error("JIT targets already initialized"); return parse_sysimg(hdl, sysimg_init_cb); } -jl_sysimg_fptrs_t jl_init_processor_pkgimg(void *hdl) +jl_image_fptrs_t jl_init_processor_pkgimg(void *hdl) { if (jit_targets.empty()) jl_error("JIT targets not initialized"); diff --git a/src/staticdata.c b/src/staticdata.c index f4e0c9b99e1ca..8033b78aa9367 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -89,6 +89,7 @@ External links: #include "julia_assert.h" #include "staticdata_utils.c" +#include "precompile_utils.c" #ifdef __cplusplus extern "C" { @@ -320,7 +321,7 @@ typedef struct { uint64_t base; uintptr_t *gvars_base; int32_t *gvars_offsets; - jl_sysimg_fptrs_t fptrs; + jl_image_fptrs_t fptrs; } jl_image_t; // array of definitions for the predefined function pointers @@ -363,6 +364,7 @@ typedef struct { jl_array_t *link_ids_relocs; jl_array_t *link_ids_gctags; jl_array_t *link_ids_gvars; + jl_array_t *link_ids_external_fnvars; jl_ptls_t ptls; htable_t callers_with_edges; jl_image_t *image; @@ -855,7 +857,6 @@ static void write_padding(ios_t *s, size_t nb) JL_NOTSAFEPOINT ios_write(s, zeros, nb); } - static void write_pointer(ios_t *s) JL_NOTSAFEPOINT { assert((ios_pos(s) & (sizeof(void*) - 1)) == 0 && "stream misaligned for writing a word-sized value"); @@ -1081,6 +1082,24 @@ static void record_gvars(jl_serializer_state *s, arraylist_t *globals) JL_NOTSAF } } +static void record_external_fns(jl_serializer_state *s, arraylist_t *external_fns) JL_NOTSAFEPOINT +{ + if (!s->incremental) { + assert(external_fns->len == 0); + (void) external_fns; + return; + } + + // We could call jl_queue_for_serialization here, but that should + // always be a no-op. +#ifndef JL_NDEBUG + for (size_t i = 0; i < external_fns->len; i++) { + jl_code_instance_t *ci = (jl_code_instance_t*)external_fns->items[i]; + assert(jl_object_in_image((jl_value_t*)ci)); + } +#endif +} + jl_value_t *jl_find_ptr = NULL; // The main function for serializing all the items queued in `serialization_order` // (They are also stored in `serialization_queue` which is order-preserving, unlike the hash table used @@ -1570,7 +1589,7 @@ static uintptr_t get_reloc_for_item(uintptr_t reloc_item, size_t reloc_offset) } // Compute target location at deserialization -static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t base, size_t size, uintptr_t reloc_id, jl_array_t *link_ids, int *link_index) +static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t base, size_t size, uintptr_t reloc_id, jl_array_t *link_ids, int *link_index) JL_NOTSAFEPOINT { enum RefTags tag = (enum RefTags)(reloc_id >> RELOC_TAG_OFFSET); size_t offset = (reloc_id & (((uintptr_t)1 << RELOC_TAG_OFFSET) - 1)); @@ -1834,20 +1853,20 @@ static jl_value_t *jl_delayed_reloc(jl_serializer_state *s, uintptr_t offset) JL static void jl_update_all_fptrs(jl_serializer_state *s, jl_image_t *image) { - jl_sysimg_fptrs_t fvars = image->fptrs; + jl_image_fptrs_t fvars = image->fptrs; // make these NULL now so we skip trying to restore GlobalVariable pointers later image->gvars_base = NULL; image->fptrs.base = NULL; if (fvars.base == NULL) return; - int sysimg_fvars_max = s->fptr_record->size / sizeof(void*); + int img_fvars_max = s->fptr_record->size / sizeof(void*); size_t i; uintptr_t base = (uintptr_t)&s->s->buf[0]; // These will become MethodInstance references, but they start out as a list of // offsets into `s` for CodeInstances jl_method_instance_t **linfos = (jl_method_instance_t**)&s->fptr_record->buf[0]; uint32_t clone_idx = 0; - for (i = 0; i < sysimg_fvars_max; i++) { + for (i = 0; i < img_fvars_max; i++) { reloc_t offset = *(reloc_t*)&linfos[i]; linfos[i] = NULL; if (offset != 0) { @@ -1882,12 +1901,13 @@ static void jl_update_all_fptrs(jl_serializer_state *s, jl_image_t *image) } } // Tell LLVM about the native code - jl_register_fptrs(image->base, &fvars, linfos, sysimg_fvars_max); + jl_register_fptrs(image->base, &fvars, linfos, img_fvars_max); } -static void write_gvars(jl_serializer_state *s, arraylist_t *globals) JL_NOTSAFEPOINT +static uint32_t write_gvars(jl_serializer_state *s, arraylist_t *globals, arraylist_t *external_fns) JL_NOTSAFEPOINT { - ios_ensureroom(s->gvar_record, globals->len * sizeof(reloc_t)); + size_t len = globals->len + external_fns->len; + ios_ensureroom(s->gvar_record, len * sizeof(reloc_t)); for (size_t i = 0; i < globals->len; i++) { void *g = globals->items[i]; if (jl_is_binding((uintptr_t)g)) { @@ -1908,10 +1928,17 @@ static void write_gvars(jl_serializer_state *s, arraylist_t *globals) JL_NOTSAFE write_reloc_t(s->gvar_record, reloc); record_uniquing(s, (jl_value_t*)g, ((i << 2) | 2)); // mark as gvar && !tag } + for (size_t i = 0; i < external_fns->len; i++) { + jl_code_instance_t *ci = (jl_code_instance_t*)external_fns->items[i]; + uintptr_t item = backref_id(s, (void*)ci, s->link_ids_external_fnvars); + uintptr_t reloc = get_reloc_for_item(item, 0); + write_reloc_t(s->gvar_record, reloc); + } + return globals->len + 1; } // Pointer relocation for native-code referenced global variables -static void jl_update_all_gvars(jl_serializer_state *s, jl_image_t *image) +static void jl_update_all_gvars(jl_serializer_state *s, jl_image_t *image, uint32_t external_fns_begin) { if (image->gvars_base == NULL) return; @@ -1920,17 +1947,24 @@ static void jl_update_all_gvars(jl_serializer_state *s, jl_image_t *image) uintptr_t base = (uintptr_t)&s->s->buf[0]; size_t size = s->s->size; reloc_t *gvars = (reloc_t*)&s->gvar_record->buf[0]; - int link_index = 0; + int gvar_link_index = 0; + int external_fns_link_index = 0; for (i = 0; i < l; i++) { uintptr_t offset = gvars[i]; - uintptr_t v = get_item_for_reloc(s, base, size, offset, s->link_ids_gvars, &link_index); + uintptr_t v = 0; + if (i < external_fns_begin) { + v = get_item_for_reloc(s, base, size, offset, s->link_ids_gvars, &gvar_link_index); + } else { + v = get_item_for_reloc(s, base, size, offset, s->link_ids_external_fnvars, &external_fns_link_index); + } uintptr_t *gv = sysimg_gvars(image->gvars_base, image->gvars_offsets, i); *gv = v; } - assert(!s->link_ids_gvars || link_index == jl_array_len(s->link_ids_gvars)); + assert(!s->link_ids_gvars || gvar_link_index == jl_array_len(s->link_ids_gvars)); + assert(!s->link_ids_external_fnvars || external_fns_link_index == jl_array_len(s->link_ids_external_fnvars)); } -static void jl_root_new_gvars(jl_serializer_state *s, jl_image_t *image) +static void jl_root_new_gvars(jl_serializer_state *s, jl_image_t *image, uint32_t external_fns_begin) { if (image->gvars_base == NULL) return; @@ -1939,8 +1973,14 @@ static void jl_root_new_gvars(jl_serializer_state *s, jl_image_t *image) for (i = 0; i < l; i++) { uintptr_t *gv = sysimg_gvars(image->gvars_base, image->gvars_offsets, i); uintptr_t v = *gv; - if (!jl_is_binding(v)) - v = (uintptr_t)jl_as_global_root((jl_value_t*)v); + if (i < external_fns_begin) { + if (!jl_is_binding(v)) + v = (uintptr_t)jl_as_global_root((jl_value_t*)v); + } else { + jl_code_instance_t *codeinst = (jl_code_instance_t*) v; + assert(codeinst && codeinst->isspecsig); + v = (uintptr_t)codeinst->specptr.fptr; + } *gv = v; } } @@ -2292,13 +2332,18 @@ static void jl_save_system_image_to_stream(ios_t *f, s.link_ids_relocs = jl_alloc_array_1d(jl_array_uint64_type, 0); s.link_ids_gctags = jl_alloc_array_1d(jl_array_uint64_type, 0); s.link_ids_gvars = jl_alloc_array_1d(jl_array_uint64_type, 0); + s.link_ids_external_fnvars = jl_alloc_array_1d(jl_array_uint64_type, 0); htable_new(&s.callers_with_edges, 0); jl_value_t **const*const tags = get_tags(); // worklist == NULL ? get_tags() : NULL; arraylist_t gvars; + arraylist_t external_fns; arraylist_new(&gvars, 0); - if (native_functions) + arraylist_new(&external_fns, 0); + if (native_functions) { jl_get_llvm_gvs(native_functions, &gvars); + jl_get_llvm_external_fns(native_functions, &external_fns); + } if (worklist == NULL) { // empty!(Core.ARGS) @@ -2364,6 +2409,7 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_serialize_reachable(&s); // step 1.2: now that we have marked all bindings (badly), ensure all gvars are part of the sysimage record_gvars(&s, &gvars); + record_external_fns(&s, &external_fns); jl_serialize_reachable(&s); // step 1.3: prune (garbage collect) some special weak references from // built-in type caches @@ -2377,10 +2423,11 @@ static void jl_save_system_image_to_stream(ios_t *f, } } + uint32_t external_fns_begin = 0; { // step 2: build all the sysimg sections write_padding(&sysimg, sizeof(uintptr_t)); jl_write_values(&s); - write_gvars(&s, &gvars); + external_fns_begin = write_gvars(&s, &gvars, &external_fns); jl_write_relocations(&s); } @@ -2480,6 +2527,9 @@ static void jl_save_system_image_to_stream(ios_t *f, ios_write(f, (char*)jl_array_data(s.link_ids_relocs), jl_array_len(s.link_ids_relocs)*sizeof(uint64_t)); write_uint32(f, jl_array_len(s.link_ids_gvars)); ios_write(f, (char*)jl_array_data(s.link_ids_gvars), jl_array_len(s.link_ids_gvars)*sizeof(uint64_t)); + write_uint32(f, jl_array_len(s.link_ids_external_fnvars)); + ios_write(f, (char*)jl_array_data(s.link_ids_external_fnvars), jl_array_len(s.link_ids_external_fnvars)*sizeof(uint64_t)); + write_uint32(f, external_fns_begin); jl_write_arraylist(s.s, &s.ccallable_list); } // Write the build_id key @@ -2497,6 +2547,7 @@ static void jl_save_system_image_to_stream(ios_t *f, arraylist_free(&s.relocs_list); arraylist_free(&s.gctags_list); arraylist_free(&gvars); + arraylist_free(&external_fns); htable_free(&field_replace); if (worklist) htable_free(&external_objects); @@ -2517,9 +2568,8 @@ static void jl_write_header_for_incremental(ios_t *f, jl_array_t *worklist, jl_a assert(jl_precompile_toplevel_module == NULL); jl_precompile_toplevel_module = (jl_module_t*)jl_array_ptr_ref(worklist, jl_array_len(worklist)-1); - write_header(f); - // last word of the header is the checksumpos - *checksumpos = ios_pos(f) - sizeof(uint64_t); + *checksumpos = write_header(f, 0); + write_uint8(f, jl_cache_flags()); // write description of contents (name, uuid, buildid) write_worklist_for_header(f, worklist); // Determine unique (module, abspath, mtime) dependencies for the files defining modules in the worklist @@ -2533,88 +2583,96 @@ static void jl_write_header_for_incremental(ios_t *f, jl_array_t *worklist, jl_a write_mod_list(f, *mod_array); } - -JL_DLLEXPORT ios_t *jl_create_system_image(void *_native_data, jl_array_t *worklist) +JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *worklist, bool_t emit_split, + ios_t **s, ios_t **z, jl_array_t **udeps, int64_t *srctextpos) { jl_gc_collect(JL_GC_FULL); jl_gc_collect(JL_GC_INCREMENTAL); // sweep finalizers JL_TIMING(SYSIMG_DUMP); + // iff emit_split + // write header and src_text to one file f/s + // write systemimg to a second file ff/z jl_task_t *ct = jl_current_task; ios_t *f = (ios_t*)malloc_s(sizeof(ios_t)); ios_mem(f, 0); - jl_array_t *mod_array = NULL, *udeps = NULL, *extext_methods = NULL, *new_specializations = NULL; + + ios_t *ff = NULL; + if (emit_split) { + ff = (ios_t*)malloc_s(sizeof(ios_t)); + ios_mem(ff, 0); + } else { + ff = f; + } + + jl_array_t *mod_array = NULL, *extext_methods = NULL, *new_specializations = NULL; jl_array_t *method_roots_list = NULL, *ext_targets = NULL, *edges = NULL; - JL_GC_PUSH7(&mod_array, &udeps, &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges); - int64_t srctextpos = 0; int64_t checksumpos = 0; + int64_t checksumpos_ff = 0; int64_t datastartpos = 0; + JL_GC_PUSH6(&mod_array, &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges); if (worklist) { - jl_write_header_for_incremental(f, worklist, &mod_array, &udeps, &srctextpos, &checksumpos); + jl_write_header_for_incremental(f, worklist, &mod_array, udeps, srctextpos, &checksumpos); + if (emit_split) { + checksumpos_ff = write_header(ff, 1); + write_uint8(ff, jl_cache_flags()); + write_mod_list(ff, mod_array); + } else { + checksumpos_ff = checksumpos; + } jl_gc_enable_finalizers(ct, 0); // make sure we don't run any Julia code concurrently after this point jl_prepare_serialization_data(mod_array, newly_inferred, jl_worklist_key(worklist), &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges); - write_padding(f, LLT_ALIGN(ios_pos(f), JL_CACHE_BYTE_ALIGNMENT) - ios_pos(f)); - datastartpos = ios_pos(f); + + // Generate _native_data` + if (jl_options.outputo || jl_options.outputbc || jl_options.outputunoptbc || jl_options.outputasm) { + jl_precompile_toplevel_module = (jl_module_t*)jl_array_ptr_ref(worklist, jl_array_len(worklist)-1); + *_native_data = jl_precompile_worklist(worklist, extext_methods, new_specializations); + jl_precompile_toplevel_module = NULL; + } + + if (!emit_split) { + write_int32(f, 0); // No clone_targets + write_padding(f, LLT_ALIGN(ios_pos(f), JL_CACHE_BYTE_ALIGNMENT) - ios_pos(f)); + } else { + write_padding(ff, LLT_ALIGN(ios_pos(ff), JL_CACHE_BYTE_ALIGNMENT) - ios_pos(ff)); + } + datastartpos = ios_pos(ff); + } else { + *_native_data = jl_precompile(jl_options.compile_enabled == JL_OPTIONS_COMPILE_ALL); } - native_functions = _native_data; - jl_save_system_image_to_stream(f, worklist, extext_methods, new_specializations, method_roots_list, ext_targets, edges); + native_functions = *_native_data; + jl_save_system_image_to_stream(ff, worklist, extext_methods, new_specializations, method_roots_list, ext_targets, edges); native_functions = NULL; if (worklist) { jl_gc_enable_finalizers(ct, 1); // make sure we don't run any Julia code concurrently before this point + jl_precompile_toplevel_module = NULL; + } + + if (worklist) { // Go back and update the checksum in the header - int64_t dataendpos = ios_pos(f); - uint32_t checksum = jl_crc32c(0, &f->buf[datastartpos], dataendpos - datastartpos); - ios_seek(f, checksumpos); - write_uint64(f, checksum | ((uint64_t)0xfafbfcfd << 32)); - ios_seek(f, srctextpos); - write_uint64(f, dataendpos); - // Write the source-text for the dependent files - // Go back and update the source-text position to point to the current position - if (udeps) { - ios_seek_end(f); - // Each source-text file is written as - // int32: length of abspath - // char*: abspath - // uint64: length of src text - // char*: src text - // At the end we write int32(0) as a terminal sentinel. - size_t len = jl_array_len(udeps); - ios_t srctext; - for (size_t i = 0; i < len; i++) { - jl_value_t *deptuple = jl_array_ptr_ref(udeps, i); - jl_value_t *depmod = jl_fieldref(deptuple, 0); // module - // Dependencies declared with `include_dependency` are excluded - // because these may not be Julia code (and could be huge) - if (depmod != (jl_value_t*)jl_main_module) { - jl_value_t *dep = jl_fieldref(deptuple, 1); // file abspath - const char *depstr = jl_string_data(dep); - if (!depstr[0]) - continue; - ios_t *srctp = ios_file(&srctext, depstr, 1, 0, 0, 0); - if (!srctp) { - jl_printf(JL_STDERR, "WARNING: could not cache source text for \"%s\".\n", - jl_string_data(dep)); - continue; - } - size_t slen = jl_string_len(dep); - write_int32(f, slen); - ios_write(f, depstr, slen); - int64_t posfile = ios_pos(f); - write_uint64(f, 0); // placeholder for length of this file in bytes - uint64_t filelen = (uint64_t) ios_copyall(f, &srctext); - ios_close(&srctext); - ios_seek(f, posfile); - write_uint64(f, filelen); - ios_seek_end(f); - } - } + int64_t dataendpos = ios_pos(ff); + uint32_t checksum = jl_crc32c(0, &ff->buf[datastartpos], dataendpos - datastartpos); + ios_seek(ff, checksumpos_ff); + write_uint64(ff, checksum | ((uint64_t)0xfafbfcfd << 32)); + write_uint64(ff, datastartpos); + write_uint64(ff, dataendpos); + ios_seek(ff, dataendpos); + + // Write the checksum to the split header if necessary + if (emit_split) { + int64_t cur = ios_pos(f); + ios_seek(f, checksumpos); + write_uint64(f, checksum | ((uint64_t)0xfafbfcfd << 32)); + ios_seek(f, cur); + // Next we will write the clone_targets and afterwards the srctext } - write_int32(f, 0); // mark the end of the source text - jl_precompile_toplevel_module = NULL; } JL_GC_POP(); - return f; + *s = f; + if (emit_split) + *z = ff; + return; } JL_DLLEXPORT size_t ios_write_direct(ios_t *dest, ios_t *src); @@ -2677,7 +2735,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl s.ptls = jl_current_task->ptls; arraylist_new(&s.relocs_list, 0); arraylist_new(&s.gctags_list, 0); - s.link_ids_relocs = s.link_ids_gctags = s.link_ids_gvars = NULL; + s.link_ids_relocs = s.link_ids_gctags = s.link_ids_gvars = s.link_ids_external_fnvars = NULL; jl_value_t **const*const tags = get_tags(); htable_t new_dt_objs; htable_new(&new_dt_objs, 0); @@ -2769,6 +2827,12 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl s.link_ids_gvars = jl_alloc_array_1d(jl_array_uint64_type, nlinks_gvars); ios_read(f, (char*)jl_array_data(s.link_ids_gvars), nlinks_gvars * sizeof(uint64_t)); } + size_t nlinks_external_fnvars = read_uint32(f); + if (nlinks_external_fnvars > 0) { + s.link_ids_external_fnvars = jl_alloc_array_1d(jl_array_uint64_type, nlinks_external_fnvars); + ios_read(f, (char*)jl_array_data(s.link_ids_external_fnvars), nlinks_external_fnvars * sizeof(uint64_t)); + } + uint32_t external_fns_begin = read_uint32(f); jl_read_arraylist(s.s, ccallable_list ? ccallable_list : &s.ccallable_list); if (s.incremental) { assert(restored && init_order && extext_methods && new_specializations && method_roots_list && ext_targets && edges); @@ -2782,6 +2846,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl } s.s = NULL; + // step 3: apply relocations assert(!ios_eof(f)); jl_read_symbols(&s); @@ -2798,7 +2863,8 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl (void)sizeof_tags; jl_read_reloclist(&s, s.link_ids_relocs, 0); // general relocs // s.link_ids_gvars will be processed in `jl_update_all_gvars` - jl_update_all_gvars(&s, image); // gvars relocs + // s.link_ids_external_fns will be processed in `jl_update_all_gvars` + jl_update_all_gvars(&s, image, external_fns_begin); // gvars relocs if (s.incremental) { jl_read_arraylist(s.relocs, &s.uniquing_types); jl_read_arraylist(s.relocs, &s.uniquing_objs); @@ -3107,7 +3173,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl arraylist_free(&s.fixup_objs); if (s.incremental) - jl_root_new_gvars(&s, image); + jl_root_new_gvars(&s, image, external_fns_begin); ios_close(&relocs); ios_close(&const_data); ios_close(&gvar_record); @@ -3177,21 +3243,26 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl jl_gc_enable(en); } -static jl_value_t *jl_validate_cache_file(ios_t *f, jl_array_t *depmods, uint64_t *checksum, int64_t *dataendpos) +static jl_value_t *jl_validate_cache_file(ios_t *f, jl_array_t *depmods, uint64_t *checksum, int64_t *dataendpos, int64_t *datastartpos) { - if (ios_eof(f) || 0 == (*checksum = jl_read_verify_header(f)) || (*checksum >> 32 != 0xfafbfcfd)) { + uint8_t pkgimage = 0; + if (ios_eof(f) || 0 == (*checksum = jl_read_verify_header(f, &pkgimage, dataendpos, datastartpos)) || (*checksum >> 32 != 0xfafbfcfd)) { return jl_get_exceptionf(jl_errorexception_type, "Precompile file header verification checks failed."); } - { // skip past the mod list + uint8_t flags = read_uint8(f); + if (pkgimage && !jl_match_cache_flags(flags)) { + return jl_get_exceptionf(jl_errorexception_type, "Pkgimage flags mismatch"); + } + if (!pkgimage) { + // skip past the worklist size_t len; while ((len = read_int32(f))) ios_skip(f, len + 3 * sizeof(uint64_t)); - } - { // skip past the dependency list + // skip past the dependency list size_t deplen = read_uint64(f); ios_skip(f, deplen - sizeof(uint64_t)); - *dataendpos = read_uint64(f); + read_uint64(f); // where is this write coming from? } // verify that the system state is valid @@ -3203,10 +3274,14 @@ static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *im { uint64_t checksum = 0; int64_t dataendpos = 0; - jl_value_t *verify_fail = jl_validate_cache_file(f, depmods, &checksum, &dataendpos); + int64_t datastartpos = 0; + jl_value_t *verify_fail = jl_validate_cache_file(f, depmods, &checksum, &dataendpos, &datastartpos); + if (verify_fail) return verify_fail; + assert(datastartpos > 0 && datastartpos < dataendpos); + jl_value_t *restored = NULL; jl_array_t *init_order = NULL, *extext_methods = NULL, *new_specializations = NULL, *method_roots_list = NULL, *ext_targets = NULL, *edges = NULL; jl_svec_t *cachesizes_sv = NULL; @@ -3217,11 +3292,9 @@ static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *im { // make a permanent in-memory copy of f (excluding the header) ios_bufmode(f, bm_none); JL_SIGATOMIC_BEGIN(); - size_t len_begin = LLT_ALIGN(ios_pos(f), JL_CACHE_BYTE_ALIGNMENT); - assert(len_begin > 0 && len_begin < dataendpos); - size_t len = dataendpos - len_begin; + size_t len = dataendpos - datastartpos; char *sysimg = (char*)jl_gc_perm_alloc(len, 0, 64, 0); - ios_seek(f, len_begin); + ios_seek(f, datastartpos); if (ios_readall(f, sysimg, len) != len || jl_crc32c(0, sysimg, len) != (uint32_t)checksum) { restored = jl_get_exceptionf(jl_errorexception_type, "Error reading system image file."); JL_SIGATOMIC_END(); @@ -3338,7 +3411,7 @@ JL_DLLEXPORT void jl_restore_system_image_data(const char *buf, size_t len) JL_SIGATOMIC_END(); } -JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, jl_array_t *depmods) +JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, jl_array_t *depmods, int complete) { void *pkgimg_handle = jl_dlopen(fname, JL_RTLD_LAZY); if (!pkgimg_handle) { @@ -3362,9 +3435,9 @@ JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, j if (!jl_dlsym(pkgimg_handle, "jl_sysimg_gvars_base", (void **)&pkgimage.gvars_base, 0)) { pkgimage.gvars_base = NULL; } + jl_dlsym(pkgimg_handle, "jl_sysimg_gvars_offsets", (void **)&pkgimage.gvars_offsets, 1); pkgimage.gvars_offsets += 1; - jl_value_t* mod = jl_restore_incremental_from_buf(pkgimg_data, &pkgimage, *plen, depmods, 0); void *pgcstack_func_slot; jl_dlsym(pkgimg_handle, "jl_pgcstack_func_slot", &pgcstack_func_slot, 0); @@ -3378,6 +3451,20 @@ JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, j *tls_offset_idx = (uintptr_t)(jl_tls_offset == -1 ? 0 : jl_tls_offset); } + #ifdef _OS_WINDOWS_ + pkgimage.base = (intptr_t)pkgimg_handle; + #else + Dl_info dlinfo; + if (dladdr((void*)pkgimage.gvars_base, &dlinfo) != 0) { + pkgimage.base = (intptr_t)dlinfo.dli_fbase; + } + else { + pkgimage.base = 0; + } + #endif + + jl_value_t* mod = jl_restore_incremental_from_buf(pkgimg_data, &pkgimage, *plen, depmods, complete); + return mod; } diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c index 19ccc591ea2cd..95b0fb56d4924 100644 --- a/src/staticdata_utils.c +++ b/src/staticdata_utils.c @@ -632,11 +632,46 @@ static void write_mod_list(ios_t *s, jl_array_t *a) write_int32(s, 0); } +JL_DLLEXPORT uint8_t jl_cache_flags(void) +{ + // ??OOCDDP + uint8_t flags = 0; + flags |= (jl_options.use_pkgimages & 1); + flags |= (jl_options.debug_level & 3) << 1; + flags |= (jl_options.check_bounds & 1) << 2; + flags |= (jl_options.opt_level & 3) << 4; + // NOTES: + // In contrast to check-bounds, inline has no "observable effect" + return flags; +} + +JL_DLLEXPORT uint8_t jl_match_cache_flags(uint8_t flags) +{ + // 1. Check which flags are relevant + uint8_t current_flags = jl_cache_flags(); + uint8_t supports_pkgimage = (current_flags & 1); + uint8_t is_pkgimage = (flags & 1); + + // For .ji packages ignore other flags + if (!supports_pkgimage && !is_pkgimage) { + return 1; + } + + // 2. Check all flags that must be exact + uint8_t mask = (1 << 4)-1; + if ((flags & mask) != (current_flags & mask)) + return 0; + // 3. allow for higher optimization flags in cache + flags >>= 4; + current_flags >>= 4; + return flags >= current_flags; +} + // "magic" string and version header of .ji file static const int JI_FORMAT_VERSION = 12; static const char JI_MAGIC[] = "\373jli\r\n\032\n"; // based on PNG signature static const uint16_t BOM = 0xFEFF; // byte-order marker -static void write_header(ios_t *s) +static int64_t write_header(ios_t *s, uint8_t pkgimage) { ios_write(s, JI_MAGIC, strlen(JI_MAGIC)); write_uint16(s, JI_FORMAT_VERSION); @@ -648,7 +683,12 @@ static void write_header(ios_t *s) const char *branch = jl_git_branch(), *commit = jl_git_commit(); ios_write(s, branch, strlen(branch)+1); ios_write(s, commit, strlen(commit)+1); + write_uint8(s, pkgimage); + int64_t checksumpos = ios_pos(s); write_uint64(s, 0); // eventually will hold checksum for the content portion of this (build_id.hi) + write_uint64(s, 0); // eventually will hold dataendpos + write_uint64(s, 0); // eventually will hold datastartpos + return checksumpos; } // serialize information about the result of deserializing this file @@ -1262,9 +1302,10 @@ static int readstr_verify(ios_t *s, const char *str, int include_null) return 1; } -JL_DLLEXPORT uint64_t jl_read_verify_header(ios_t *s) +JL_DLLEXPORT uint64_t jl_read_verify_header(ios_t *s, uint8_t *pkgimage, int64_t *dataendpos, int64_t *datastartpos) { uint16_t bom; + uint64_t checksum = 0; if (readstr_verify(s, JI_MAGIC, 0) && read_uint16(s) == JI_FORMAT_VERSION && ios_read(s, (char *) &bom, 2) == 2 && bom == BOM && @@ -1274,6 +1315,11 @@ JL_DLLEXPORT uint64_t jl_read_verify_header(ios_t *s) readstr_verify(s, JULIA_VERSION_STRING, 1) && readstr_verify(s, jl_git_branch(), 1) && readstr_verify(s, jl_git_commit(), 1)) - return read_uint64(s); - return 0; + { + *pkgimage = read_uint8(s); + checksum = read_uint64(s); + *datastartpos = (int64_t)read_uint64(s); + *dataendpos = (int64_t)read_uint64(s); + } + return checksum; } diff --git a/stdlib/CompilerSupportLibraries_jll/Project.toml b/stdlib/CompilerSupportLibraries_jll/Project.toml index b072831326627..fc5883cc79802 100644 --- a/stdlib/CompilerSupportLibraries_jll/Project.toml +++ b/stdlib/CompilerSupportLibraries_jll/Project.toml @@ -4,7 +4,7 @@ uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" # NOTE: When updating this, also make sure to update the value # `CSL_NEXT_GLIBCXX_VERSION` in `deps/csl.mk`, to properly disable # automatic usage of BB-built CSLs on extremely up-to-date systems! -version = "1.0.1+0" +version = "1.0.2+0" [deps] Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" diff --git a/test/compiler/contextual.jl b/test/compiler/contextual.jl index b2f51b2047563..4cd4aa22eb6d8 100644 --- a/test/compiler/contextual.jl +++ b/test/compiler/contextual.jl @@ -209,7 +209,11 @@ try @test length(Bar.mt) == 1 finally rm(load_path, recursive=true, force=true) - rm(depot_path, recursive=true, force=true) + try + rm(depot_path, force=true, recursive=true) + catch err + @show err + end filter!((≠)(load_path), LOAD_PATH) filter!((≠)(depot_path), DEPOT_PATH) end diff --git a/test/loading.jl b/test/loading.jl index d52a7246abe7c..497dfaed4af18 100644 --- a/test/loading.jl +++ b/test/loading.jl @@ -746,7 +746,11 @@ for env in keys(envs) rm(env, force=true, recursive=true) end for depot in depots - rm(depot, force=true, recursive=true) + try + rm(depot, force=true, recursive=true) + catch err + @show err + end end append!(empty!(LOAD_PATH), saved_load_path) diff --git a/test/precompile.jl b/test/precompile.jl index 3794cd353f41f..d0f58f6e680cb 100644 --- a/test/precompile.jl +++ b/test/precompile.jl @@ -28,8 +28,18 @@ function precompile_test_harness(@nospecialize(f), separate::Bool) pushfirst!(DEPOT_PATH, load_cache_path) f(load_path) finally - rm(load_path, recursive=true, force=true) - separate && rm(load_cache_path, recursive=true, force=true) + try + rm(load_path, force=true, recursive=true) + catch err + @show err + end + if separate + try + rm(load_cache_path, force=true, recursive=true) + catch err + @show err + end + end filter!((≠)(load_path), LOAD_PATH) separate && filter!((≠)(load_cache_path), DEPOT_PATH) end @@ -318,11 +328,16 @@ precompile_test_harness(false) do dir cachedir = joinpath(dir, "compiled", "v$(VERSION.major).$(VERSION.minor)") cachedir2 = joinpath(dir2, "compiled", "v$(VERSION.major).$(VERSION.minor)") cachefile = joinpath(cachedir, "$Foo_module.ji") + if Base.JLOptions().use_pkgimages == 1 + ocachefile = Base.ocachefile_from_cachefile(cachefile) + else + ocachefile = nothing + end # use _require_from_serialized to ensure that the test fails if # the module doesn't reload from the image: @test_warn "@ccallable was already defined for this method name" begin @test_logs (:warn, "Replacing module `$Foo_module`") begin - m = Base._require_from_serialized(Base.PkgId(Foo), cachefile) + m = Base._require_from_serialized(Base.PkgId(Foo), cachefile, ocachefile) @test isa(m, Module) end end @@ -343,7 +358,7 @@ precompile_test_harness(false) do dir @test string(Base.Docs.doc(Foo.Bar.bar)) == "bar function\n" @test string(Base.Docs.doc(Foo.Bar)) == "Bar module\n" - modules, (deps, requires), required_modules = Base.parse_cache_header(cachefile) + modules, (deps, requires), required_modules, _... = Base.parse_cache_header(cachefile) discard_module = mod_fl_mt -> (mod_fl_mt.filename, mod_fl_mt.mtime) @test modules == [ Base.PkgId(Foo) => Base.module_build_id(Foo) % UInt64 ] @test map(x -> x.filename, deps) == [ Foo_file, joinpath(dir, "foo.jl"), joinpath(dir, "bar.jl") ] @@ -378,7 +393,7 @@ precompile_test_harness(false) do dir ), ) @test discard_module.(deps) == deps1 - modules, (deps, requires), required_modules = Base.parse_cache_header(cachefile; srcfiles_only=true) + modules, (deps, requires), required_modules, _... = Base.parse_cache_header(cachefile; srcfiles_only=true) @test map(x -> x.filename, deps) == [Foo_file] @test current_task()(0x01, 0x4000, 0x30031234) == 2 @@ -441,7 +456,7 @@ precompile_test_harness(false) do dir """) Nest = Base.require(Main, Nest_module) cachefile = joinpath(cachedir, "$Nest_module.ji") - modules, (deps, requires), required_modules = Base.parse_cache_header(cachefile) + modules, (deps, requires), required_modules, _... = Base.parse_cache_header(cachefile) @test last(deps).modpath == ["NestInner"] UsesB_module = :UsesB4b3a94a1a081a8cb @@ -463,7 +478,7 @@ precompile_test_harness(false) do dir """) UsesB = Base.require(Main, UsesB_module) cachefile = joinpath(cachedir, "$UsesB_module.ji") - modules, (deps, requires), required_modules = Base.parse_cache_header(cachefile) + modules, (deps, requires), required_modules, _... = Base.parse_cache_header(cachefile) id1, id2 = only(requires) @test Base.pkgorigins[id1].cachepath == cachefile @test Base.pkgorigins[id2].cachepath == joinpath(cachedir, "$B_module.ji") @@ -497,18 +512,19 @@ precompile_test_harness(false) do dir end """) - cachefile = Base.compilecache(Base.PkgId("FooBar")) + cachefile, _ = Base.compilecache(Base.PkgId("FooBar")) empty_prefs_hash = Base.get_preferences_hash(nothing, String[]) @test cachefile == Base.compilecache_path(Base.PkgId("FooBar"), empty_prefs_hash) @test isfile(joinpath(cachedir, "FooBar.ji")) - @test Base.stale_cachefile(FooBar_file, joinpath(cachedir, "FooBar.ji")) isa Vector + Tsc = Bool(Base.JLOptions().use_pkgimages) ? Tuple{<:Vector, String} : Tuple{<:Vector, Nothing} + @test Base.stale_cachefile(FooBar_file, joinpath(cachedir, "FooBar.ji")) isa Tsc @test !isdefined(Main, :FooBar) @test !isdefined(Main, :FooBar1) relFooBar_file = joinpath(dir, "subfolder", "..", "FooBar.jl") - @test Base.stale_cachefile(relFooBar_file, joinpath(cachedir, "FooBar.ji")) isa (Sys.iswindows() ? Vector : Bool) # `..` is not a symlink on Windows + @test Base.stale_cachefile(relFooBar_file, joinpath(cachedir, "FooBar.ji")) isa (Sys.iswindows() ? Tuple{<:Vector, String} : Bool) # `..` is not a symlink on Windows mkdir(joinpath(dir, "subfolder")) - @test Base.stale_cachefile(relFooBar_file, joinpath(cachedir, "FooBar.ji")) isa Vector + @test Base.stale_cachefile(relFooBar_file, joinpath(cachedir, "FooBar.ji")) isa Tsc @eval using FooBar fb_uuid = Base.module_build_id(FooBar) @@ -520,7 +536,7 @@ precompile_test_harness(false) do dir @test !isfile(joinpath(cachedir, "FooBar1.ji")) @test isfile(joinpath(cachedir2, "FooBar1.ji")) @test Base.stale_cachefile(FooBar_file, joinpath(cachedir, "FooBar.ji")) === true - @test Base.stale_cachefile(FooBar1_file, joinpath(cachedir2, "FooBar1.ji")) isa Vector + @test Base.stale_cachefile(FooBar1_file, joinpath(cachedir2, "FooBar1.ji")) isa Tsc @test fb_uuid == Base.module_build_id(FooBar) fb_uuid1 = Base.module_build_id(FooBar1) @test fb_uuid != fb_uuid1 @@ -1260,7 +1276,11 @@ end end finally cd(save_cwd) - rm(temp_path, recursive=true) + try + rm(temp_path, recursive=true) + catch err + @show err + end pop!(test_workers) # remove myid rmprocs(test_workers) end @@ -1400,13 +1420,13 @@ precompile_test_harness("Issue #25971") do load_path sourcefile = joinpath(load_path, "Foo25971.jl") write(sourcefile, "module Foo25971 end") chmod(sourcefile, 0o666) - cachefile = Base.compilecache(Base.PkgId("Foo25971")) + cachefile, _ = Base.compilecache(Base.PkgId("Foo25971")) @test filemode(sourcefile) == filemode(cachefile) chmod(sourcefile, 0o600) - cachefile = Base.compilecache(Base.PkgId("Foo25971")) + cachefile, _ = Base.compilecache(Base.PkgId("Foo25971")) @test filemode(sourcefile) == filemode(cachefile) chmod(sourcefile, 0o444) - cachefile = Base.compilecache(Base.PkgId("Foo25971")) + cachefile, _ = Base.compilecache(Base.PkgId("Foo25971")) # Check writable @test touch(cachefile) == cachefile end @@ -1580,6 +1600,80 @@ end @test which(f46778, Tuple{Any,DataType}).specializations[1].cache.invoke != C_NULL end +precompile_test_harness("PkgCacheInspector") do load_path + # Test functionality needed by PkgCacheInspector.jl + write(joinpath(load_path, "PCI.jl"), + """ + module PCI + Base.repl_cmd() = 55 # external method + f() = Base.repl_cmd(7, "hello") # external specialization (should never exist otherwise) + try + f() + catch + end + end + """) + cachefile, ocachefile = Base.compilecache(Base.PkgId("PCI")) + + # Get the depmods + local depmods + @lock Base.require_lock begin + local depmodnames + io = open(cachefile, "r") + try + # isvalid_cache_header returns checksum id or zero + Base.isvalid_cache_header(io) == 0 && throw(ArgumentError("Invalid header in cache file $cachefile.")) + depmodnames = Base.parse_cache_header(io)[3] + Base.isvalid_file_crc(io) || throw(ArgumentError("Invalid checksum in cache file $cachefile.")) + finally + close(io) + end + ndeps = length(depmodnames) + depmods = Vector{Any}(undef, ndeps) + for i in 1:ndeps + modkey, build_id = depmodnames[i] + dep = Base._tryrequire_from_serialized(modkey, build_id) + if !isa(dep, Module) + return dep + end + depmods[i] = dep + end + end + + if ocachefile !== nothing + sv = ccall(:jl_restore_package_image_from_file, Any, (Cstring, Any, Cint), ocachefile, depmods, true) + else + sv = ccall(:jl_restore_incremental, Any, (Cstring, Any, Cint), cachefile, depmods, true) + end + + modules, init_order, external_methods, new_specializations, new_method_roots, external_targets, edges = sv + m = only(external_methods) + @test m.name == :repl_cmd && m.nargs < 2 + @test any(new_specializations) do ci + mi = ci.def + mi.specTypes == Tuple{typeof(Base.repl_cmd), Int, String} + end +end + +precompile_test_harness("DynamicExpressions") do load_path + # https://github.com/JuliaLang/julia/pull/47184#issuecomment-1364716312 + write(joinpath(load_path, "Float16MWE.jl"), + """ + module Float16MWE + struct Node{T} + val::T + end + doconvert(::Type{<:Node}, val) = convert(Float16, val) + precompile(Tuple{typeof(doconvert), Type{Node{Float16}}, Float64}) + end # module Float16MWE + """) + Base.compilecache(Base.PkgId("Float16MWE")) + (@eval (using Float16MWE)) + Base.invokelatest() do + @test Float16MWE.doconvert(Float16MWE.Node{Float16}, -1.2) === Float16(-1.2) + end +end + empty!(Base.DEPOT_PATH) append!(Base.DEPOT_PATH, original_depot_path) empty!(Base.LOAD_PATH) From 78809309828080b4033439e071291a90734295e7 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Thu, 22 Dec 2022 19:44:35 +0800 Subject: [PATCH 56/57] Apply `InitialOptimizations` more consistently in sorting & fix dispatch bug (#47946) * Apply InitialOptimizations by default in several cases when it was previously present * fixup for MissingOptimization * fix stability in the sortperm union with missing case (cherry picked from commit 12e679cabbe827d3be1869b9eaac24263415ee95) --- base/sort.jl | 47 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index 6d9f65c61b390..1266da8a8c9df 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -86,7 +86,7 @@ issorted(itr; issorted(itr, ord(lt,by,rev,order)) function partialsort!(v::AbstractVector, k::Union{Integer,OrdinalRange}, o::Ordering) - _sort!(v, QuickerSort(k), o, (;)) + _sort!(v, InitialOptimizations(QuickerSort(k)), o, (;)) maybeview(v, k) end @@ -566,8 +566,30 @@ function _sort!(v::AbstractVector, a::MissingOptimization, o::Ordering, kw) if nonmissingtype(eltype(v)) != eltype(v) && o isa DirectOrdering lo, hi = send_to_end!(ismissing, v, o; lo, hi) _sort!(WithoutMissingVector(v, unsafe=true), a.next, o, (;kw..., lo, hi)) - elseif eltype(v) <: Integer && o isa Perm{DirectOrdering} && nonmissingtype(eltype(o.data)) != eltype(o.data) - lo, hi = send_to_end!(i -> ismissing(@inbounds o.data[i]), v, o) + elseif eltype(v) <: Integer && o isa Perm && o.order isa DirectOrdering && + nonmissingtype(eltype(o.data)) != eltype(o.data) && + all(i === j for (i,j) in zip(v, eachindex(o.data))) + # TODO make this branch known at compile time + # This uses a custom function because we need to ensure stability of both sides and + # we can assume v is equal to eachindex(o.data) which allows a copying partition + # without allocations. + lo_i, hi_i = lo, hi + for (i,x) in zip(eachindex(o.data), o.data) + if ismissing(x) == (o.order == Reverse) # should i go at the beginning? + v[lo_i] = i + lo_i += 1 + else + v[hi_i] = i + hi_i -= 1 + end + end + reverse!(v, lo_i, hi) + if o.order == Reverse + lo = lo_i + else + hi = hi_i + end + _sort!(v, a.next, Perm(o.order, WithoutMissingVector(o.data, unsafe=true)), (;kw..., lo, hi)) else _sort!(v, a.next, o, kw) @@ -1160,7 +1182,9 @@ end """ InitialOptimizations(next) <: Algorithm -Attempt to apply a suite of low-cost optimizations to the input vector before sorting. +Attempt to apply a suite of low-cost optimizations to the input vector before sorting. These +optimizations may be automatically applied by the `sort!` family of functions when +`alg=InsertionSort`, `alg=MergeSort`, or `alg=QuickSort` is passed as an argument. `InitialOptimizations` is an implementation detail and subject to change or removal in future versions of Julia. @@ -1347,7 +1371,7 @@ function sort!(v::AbstractVector{T}; rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward, scratch::Union{Vector{T}, Nothing}=nothing) where T - _sort!(v, alg, ord(lt,by,rev,order), (;scratch)) + _sort!(v, maybe_apply_initial_optimizations(alg), ord(lt,by,rev,order), (;scratch)) v end @@ -1474,7 +1498,7 @@ function partialsortperm!(ix::AbstractVector{<:Integer}, v::AbstractVector, end # do partial quicksort - _sort!(ix, QuickerSort(k), Perm(ord(lt, by, rev, order), v), (;)) + _sort!(ix, InitialOptimizations(QuickerSort(k)), Perm(ord(lt, by, rev, order), v), (;)) maybeview(ix, k) end @@ -1679,11 +1703,11 @@ function sort(A::AbstractArray{T}; pdims = (dim, setdiff(1:ndims(A), dim)...) # put the selected dimension first Ap = permutedims(A, pdims) Av = vec(Ap) - sort_chunks!(Av, n, alg, order, scratch) + sort_chunks!(Av, n, maybe_apply_initial_optimizations(alg), order, scratch) permutedims(Ap, invperm(pdims)) else Av = A[:] - sort_chunks!(Av, n, alg, order, scratch) + sort_chunks!(Av, n, maybe_apply_initial_optimizations(alg), order, scratch) reshape(Av, axes(A)) end end @@ -1746,7 +1770,7 @@ function sort!(A::AbstractArray{T}; rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward, # TODO stop eagerly over-allocating. scratch::Union{Vector{T}, Nothing}=similar(A, size(A, dims))) where T - __sort!(A, Val(dims), alg, ord(lt, by, rev, order), scratch) + __sort!(A, Val(dims), maybe_apply_initial_optimizations(alg), ord(lt, by, rev, order), scratch) end function __sort!(A::AbstractArray{T}, ::Val{K}, alg::Algorithm, @@ -1911,6 +1935,11 @@ Characteristics: """ const MergeSort = MergeSortAlg() +maybe_apply_initial_optimizations(alg::Algorithm) = alg +maybe_apply_initial_optimizations(alg::QuickSortAlg) = InitialOptimizations(alg) +maybe_apply_initial_optimizations(alg::MergeSortAlg) = InitialOptimizations(alg) +maybe_apply_initial_optimizations(alg::InsertionSortAlg) = InitialOptimizations(alg) + # selectpivot! # # Given 3 locations in an array (lo, mi, and hi), sort v[lo], v[mi], v[hi]) and From 6adc4288f58307ae9dec12468b2e4778a99a9023 Mon Sep 17 00:00:00 2001 From: Antonio Rojas Date: Sun, 25 Dec 2022 00:57:00 +0100 Subject: [PATCH 57/57] Restore libgcc_s symlinkin in !macOS (#47986) Commit c8b72e2bf49046e8daca64214765694377277947 completely removed libgcc_s symlinking (I assume unintentionally) in !macOS. (cherry picked from commit ea13810f632341409eeddf008aef66b11f015b3d) --- base/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/base/Makefile b/base/Makefile index e4b28f1380705..d92302b766988 100644 --- a/base/Makefile +++ b/base/Makefile @@ -231,6 +231,8 @@ else $(eval $(call symlink_system_library,CSL,libgcc_s,1)) endif endif +else +$(eval $(call symlink_system_library,CSL,libgcc_s,1)) endif ifneq (,$(LIBGFORTRAN_VERSION)) $(eval $(call symlink_system_library,CSL,libgfortran,$(LIBGFORTRAN_VERSION)))